selenium爬取电商网站
from selenium import webdriver
import random
import time
import csv
import re
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC#条件判断
from selenium.webdriver.support.wait import WebDriverWait#等待
class TestSpider(object):
def __init__(self):
self.url = '这里是目标网站链接'
self.error = []
#进入一级页面
def get_page(self):
options = webdriver.ChromeOptions()
# 添加无界面参数
options.add_argument('--headless')
self.browser = webdriver.Chrome(options=options)
self.browser.get(self.url)
print('已启动浏览器1')
self.browser.maximize_window()
self.browser.implicitly_wait(8)
self.browser2 = webdriver.Chrome(options=options)
self.browser2.implicitly_wait(3)
print('已启动浏览器2')
WebDriverWait(self.browser,5).until(EC.element_to_be_clickable((By.XPATH,'//*[@id="pageTWrap"]/div[4]/ul/li[2]/a'))).click()
#遍历一级页面各个产品节点
def xunhuan_one_page(self):
i = 1
#如果出错了,重新爬取,可以在这里加入上次那个产品所在的页数,位置,代码有待补充
while True:
list = self.browser.find_elements_by_xpath('//div[@id="proList"]//div[@index]') # 产品节点
if i <= int(len(list)):
try:
num = list[i-1].find_element_by_xpath('.//div[@class="pro-sold"]//span[@class="num"]')
page_one = self.browser.find_element_by_xpath('//div[@class="filter-page"]/span').text
page_one = re.split('\s|\/', pag