今天我们用爬虫框架feapder进行小说的简单爬取
话不多说
下面是代码
import feapder
path = r'D:\爬取文件'
#轻量级爬虫
class TaobaoSpider(feapder.AirSpider):
def start_callback(self):
print("爬虫开始")
def end_callback(self):
print("爬虫结束")
#下发任务
def start_requests(self):
#网页地址链接
yield feapder.Request('http://book.zongheng.com/showchapter/1141504.html', render=True)
def parse(self, request, response):
'''
解析详情
:param request:
:param response:
:return:
'''
#不支持的字符忽略
response.encoding_errors = 'ignore'
#找到网页内容标签
content_list = response.xpath('//div[@class="volume-list"]/div[2]/ul')
#创建字典
lists = []
for content in content_list:
#遍历
# print(content)
#找章节标题
title = content.xpath('li/a//text()').extract()
#找章节链接