1.如何发送一个get请求
在python中有一个内置包urllib
方法:urlopen()在request里面
from urllib.request import urlopen
from urllib.request import urlopen
url ='http://www.baidu.com'
res = urlopen(url=url)# 获得二进制的字符串
res = res.read()print(res.decode())
from urllib.request import urlopen
import gzip
from lxml import etree
url ='https://www.xbiquge.la/xiaoshuodaquan/'
res = urlopen(url=url,timeout=3)try:
res = res.read()
res = gzip.decompress(res)
res = res.decode('utf-8')except:
res = res.decode('utf-8')# res必须是一个字符串
ele = etree.HTML(res)
book_name = ele.xpath("//div[@class='novellist']/ul/li/a/text()")
book_urls = ele.xpath("//div[@class='novellist']/ul/li/a/@href")for book_index,book_url inenumerate(book_urls):
res = urlopen(url=book_url,timeout=3)
res = res.read()try:
res = gzip.decompress(res)
res = res.decode('utf-8')except:
res = res.decode('utf-8')
ele = etree.HTML(res)
chapter_name = ele.xpath('//div[@id="list"]/dl/dd/a/text()')
chapter_urls = ele.xpath('//div[@id="list"]/dl/dd/a/@href')for chapter_index,chapter_url inenumerate(chapter_urls):
res = urlopen(url='https://www.xbiquge.la'+chapter_url,timeout=3)
res = res.read()try:
res = gzip.decompress(res)
res = res.decode()except:
res = res.decode()
ele = etree.HTML(res)
content = ele.xpath('//div[@id="content"]/text()')
s =''withopen('小说/'+book_name[book_index]+'.txt','a+',encoding='utf-8')as w:
content = chapter_name[chapter_index]+'\n\n\n\n\n\n\n'+s.join(content)+'\n'print(content)
w.write(content)# /0/951/827334.html# /0/951/827334.html