本文基于 macOS 10.15.7 | selenium - Version: 4.21.0 | Python 3.11.5
import time
import os
import sys
from selenium import webdriver
from selenium.webdriver.common.by import By
list_path = 'list.txt'
def next_page(driver):
# 示例:获取页面列表连接数据
a_nodes = driver.find_elements(by=By.XPATH, value='//div[@class="xxx"]//a')
print(len(a_nodes))
for anode in a_nodes:
# print(anode.get_attribute('href'))
with open(list_path, 'a') as fa:
fa.write(anode.get_attribute('href').strip() + '\n')
next_btn = driver.find_elements(by=By.XPATH, value='//a[@class="next"]')[-1]
print('-- next_btn : ', next_btn)
status = next_btn.get_attribute('aria-disabled')
print('-- next status : ', status)
if status == 'true':return
next_btn.click()
time.sleep(5)
next_page(driver)
def all_page():
options = webdriver.ChromeOptions()
# options.add_argument('headless')
options.add_argument('window-size=1920x1080')
options.add_argument('start-maximized')
options.add_argument('blink-settings=imagesEnabled=false')
driver = webdriver.Chrome(options=options)
driver.set_page_load_timeout(30)
url = 'xxxx'
driver.get(url)
time.sleep(5)
next_page(driver)
all_page()