安装需要的包 #!pip install chardet #!pip list #!pip install bs4 构造请求头 headers = { 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36' }# 构造请求头,主网站的请求头较为简单只需构造浏览器头 import需要的包 import requests import chardet import re from bs4 import BeautifulSoup requests获取网页内容 url = 'https://post.smzdm.com/p/aqn28vrv/' html = requests.get(url,headers = headers) html.encoding = chardet.detect(html.content)['encoding'] html_b = BeautifulSoup(html.text,'lxml') #print(html.text) 获取所有的评论网址 comment_html_list = [] a = html_b.findAll('ul',attrs={ 'class','pagination'}) b = a[0].findAll('li') for i in b: if i.a!=None and re.match('https(.*)',i.a['href']): if not(i.a['href'] in comment_html_list): comment_html_list