import bs4
import requests
from bs4 import BeautifulSoup
def getHIMLText(url):
try:
r=requests.get(url,timeout=30)
r.raise_for_status()
r.encoding=r.apparent_encoding
return r.text
except:
return ""
def fillUnivList(ulist,html):
soup=BeautifulSoup(html,"html.parser")
for tr in soup.find('tbody').children:
if isinstance(tr,bs4.element.Tag):
tds=tr('td')
ulist.append([tds[0].text.strip(),tds[1].text.split()[0],tds[4].text.strip()])
def printUnivList(ulist,num):
print(f'{"排名":^10}\t{"学校名称":10}\t{"总分":^10}')
for t in range(num):
i=ulist[t]
print(f'{i[0]:^10}\t{i[1]:10}\t{i[2]:^10}')
print(f'suc\t{num}')
def main():
uinfo=[]
url='https://www.shanghairanking.cn/rankings/bcur/2020'
html=getHIMLText(url)
fillUnivList(uinfo,html)
printUnivList(uinfo,20)
main()
python爬取最好大学网排名实例代码
最新推荐文章于 2022-09-20 14:16:15 发布