import time
import os
from lxml import etree
import requests
import os
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36'
}
openfile = os.path.dirname(os.path.abspath("__file__")) + "\\movies.dat"
with open("C:\\Users\\10693\\Documents\\Tencent Files\\1069300778\\FileRecv\\111.txt", "r") as f:
while True:
rs=f.read()
#print(rs)
if rs:
break
#print(rs)
de=rs.split('::')
for i in range(0,len(de)):
if i%3==0:
print(de[i])
url = 'https://www.douban.com/search?cat=1002&q='+de[i]
n=de[i]
request =requests.get(url=url,headers=headers)
res=etree.HTML(request.content).xpath("//div/div/a/img/@src")
#print(res)
#tt=res[0].split(".")
#print(n)
mm=i/3+1
if len(res)==0:
continue
with open("D:\\BaiduNetdiskDownload\\"+str(mm)+str(n) + ".jpg", "wb") as f:
print("保存第{0}张".format(i/3+1))
url1=res[0]
f.write((requests.get(url=url1, headers=headers)).content)
f.close()
爬取文件了指定电影名称的海报