导入第三方库
import scrapy # 导入第三方库
from scrapy import Selector
创建类,并设置爬虫参数
class BiQuGe(scrapy.Spider): #创建类
name = 'biquge' # 爬虫名字
allowed_domains = ['quanben.la'] # 网站域名
start_urls = ['https://www.quanben.la/top/allvisit/1.html'] # 起始网页
创建方法
def _parse(self, response, **kwargs):
# 获取
book_list = response.xpath('//*[@id="main"]/div[1]/li')
# 遍历
for book in book_list:
book_=Spider01Item() # 数据存储实例
book_["type"] = book.xpath('./span/text()')[0].extract()
book_["name"] = book.xpath('./span/a/text()')[0].extract()
book_["user"] = book.xpath('./span[@class="s4"]/text()')[0].extract()
# 返回数据
yield book_
页面跳转爬取(以前十页测试)
for page in ra