爬取全部的校园新闻
1.从新闻url获取新闻详情: 字典,anews
代码如下:
1 importrequests2 from bs4 importBeautifulSoup3 from datetime importdatetime4 importre5
6 defclick(url):7 id = re.findall('(\d{1,5})',url)[-1]#返回所有匹配的字符串的字符串列表的最后一个
8 clickUrl = 'http://oa.gzcc.cn/api.php?op=count&id={}&modelid=80'.format(id)9 resClick =requests.get(clickUrl)10 newsClick = int(resClick.text.split('.html')[-1].lstrip("('").rstrip("');"))11 returnnewsClick12
13 defnewsdt(showinfo):14 newsDate = showinfo.split()[0].split(':')[1]15 newsTime = showinfo.split()[1]16 newsDT = newsDate+' '+newsTime17 dt = datetime.strptime(newsDT,'%Y-%m-%d %H:%M:%S')#转换成datetime类型
18 returndt19
20 defanews(url):21 newsDetail ={}22 res =requests.get(url)23 res.encoding = 'utf-8'
24 soup = BeautifulSoup(res.text,'html.parser')25 newsDetail['newsTitle'] = soup.select('.show-title')[0].text#题目
26 showinfo = soup.select('.show-info')[0].text27 newsDetai