# import os
# import datetime
# import openpyxl
# from selenium import webdriver
# from selenium.webdriver.chrome.service import Service
# from selenium.webdriver.chrome.options import Options
# from selenium.webdriver.common.by import By
# from selenium.webdriver.common.keys import Keys
#
# # 指定 Chrome WebDriver 的路径
# chromedriver_path = "/data/apps/chromedriver/chromedriver"
#
# # 创建 Chrome WebDriver 的 Service 对象
# service = Service(chromedriver_path)
#
# # 创建 Chrome WebDriver
# options = Options()
# options.headless = False # 设置为无头模式,即不显示浏览器窗口
# driver = webdriver.Chrome(service=service, options=options)
#
# # 访问搜狗微信搜索页面
# driver.get("https://weixin.sogou.com")
#
# # 定位搜索框并输入关键字
# search_box = driver.find_element(By.ID, "query")
# search_box.send_keys("AIGC")
# search_box.send_keys(Keys.RETURN)
#
# # 创建 Excel 文件
# excel_filename = f"AIGC_微信_{datetime.datetime.now().strftime('%Y%m%d')}.xlsx"
# wb = openpyxl.Workbook()
# ws = wb.active
# ws.append(["标题", "链接", "来源"])
#
# # 爬取前10页的内容
# for page in range(10):
# articles = driver.find_elements(By.CSS_SELECTOR, ".news-box .txt-box")
# for article in articles:
# title = article.find_element(By.CSS_SELECTOR, "h3").text
# link = article.find_element(By.CSS_SELECTOR, "h3 a").get_attribute("href")
# source = article.find_element(By.CSS_SELECTOR, ".s-p").text
# ws.append([title, link, source])
#
# # 点击下一页
# next_page = driver.find_element(By.PARTIAL_LINK_TEXT, "下一页")
# next_page.click()
#
# # 保存 Excel 文件
# wb.save(excel_filename)
#
# # 关闭 WebDriver
# driver.quit()
#
# print(f"爬取完成,结果已保存到 {excel_filename}")
#
# import os
# import datetime
# import openpyxl
# from selenium import webdriver
# from selenium.webdriver.chrome.service import Service
# from selenium.webdriver.chrome.options import Options
# from selenium.webdriver.common.by import By
# from selenium.webdriver.common.keys import Keys
#
# # 指定 Chrome WebDriver 的路径
# chromedriver_path = "/data/apps/chromedriver/chromedriver"
#
# # 创建 Chrome WebDriver 的 Service 对象
# service = Service(chromedriver_path)
#
# # 创建 Chrome WebDriver
# options = Options()
# options.headless = False # 设置为无头模式,即不显示浏览器窗口
# driver = webdriver.Chrome(service=service, options=options)
#
# # 访问搜狗微信搜索页面
# driver.get("https://weixin.sogou.com")
#
# # 定位搜索框并输入关键字
# search_box = driver.find_element(By.ID, "query")
# search_box.send_keys("AIGC")
# search_box.send_keys(Keys.RETURN)
#
# # 创建 Excel 文件
# excel_filename = f"AIGC_微信_{datetime.datetime.now().strftime('%Y%m%d')}.xlsx"
# wb = openpyxl.Workbook()
# ws = wb.active
# ws.append(["标题", "链接", "来源"])
#
# # 爬取前10页的内容
# for page in range(10):
# articles = driver.find_elements(By.CSS_SELECTOR, ".news-box .txt-box")
# for article in articles:
# title = article.find_element(By.CSS_SELECTOR, "h3").text
# link = article.find_element(By.CSS_SELECTOR, "h3 a").get_attribute("href")
# source = article.find_element(By.CSS_SELECTOR, ".s-p").text
# ws.append([title, link, source])
#
# # 尝试定位下一页按钮
# next_page_button = driver.find_elements(By.CSS_SELECTOR, ".p-fy a")[-1]
# if "下一页" in next_page_button.text:
# next_page_button.click()
# else:
# break
#
# # 保存 Excel 文件
# wb.save(excel_filename)
#
# # 关闭 WebDriver
# driver.quit()
#
# print(f"爬取完成,结果已保存到 {excel_filename}")
#
# import os
# import datetime
# import openpyxl
# import re
# from selenium import webdriver
# from selenium.webdriver.chrome.service import Service
# from selenium.webdriver.chrome.options import Options
# from selenium.webdriver.common.by import By
# from selenium.webdriver.common.keys import Keys
#
# # 指定 Chrome WebDriver 的路径
# chromedriver_path = "/data/apps/chromedriver/chromedriver"
#
# # 创建 Chrome WebDriver 的 Service 对象
# service = Service(chromedriver_path)
#
# # 创建 Chrome WebDriver
# options = Options()
# options.headless = False # 设置为无头模式,即不显示浏览器窗口
# driver = webdriver.Chrome(service=service, options=options)
#
# # 访问搜狗微信搜索页面
# driver.get("https://weixin.sogou.com")
#
# # 定位搜索框并输入关键字
# search_box = driver.find_element(By.ID, "query")
# search_box.send_keys("AIGC")
# search_box.send_keys(Keys.RETURN)
#
# # 创建 Excel 文件
# excel_filename = f"AIGC_微信_{datetime.datetime.now().strftime('%Y%m%d')}.xlsx"
# wb = openpyxl.Workbook()
# ws = wb.active
# ws.append(["标题", "链接", "来源", "发布时间"])
#
# # 爬取前10页的内容
# for page in range(10):
# articles = driver.find_elements(By.CSS_SELECTOR, ".news-box .txt-box")
# for article in articles:
# title = article.find_element(By.CSS_SELECTOR, "h3").text
# link = article.find_element(By.CSS_SELECTOR, "h3 a").get_attribute("href")
# source = article.find_element(By.CSS_SELECTOR, ".s-p").text
# # 使用正则表达式提取发布时间
# time_match = re.search(r"(\d{4}-\d{1,2}-\d{1,2})", source)
# publish_time = time_match.group(1) if time_match else ""
# ws.append([title, link, source, publish_time])
#
# # 尝试定位下一页按钮
# next_page_button = driver.find_elements(By.CSS_SELECTOR, ".p-fy a")[-1]
# if "下一页" in next_page_button.text:
# next_page_button.click()
# else:
# break
#
# # 保存 Excel 文件
# wb.save(excel_filename)
#
# # 关闭 WebDriver
# driver.quit()
#
# print(f"爬取完成,结果已保存到 {excel_filename}")
#
# import os
# import datetime
# import openpyxl
# import re
# from selenium import webdriver
# from selenium.webdriver.chrome.service import Service
# from selenium.webdriver.chrome.options import Options
# from selenium.webdriver.common.by import By
# from selenium.webdriver.common.keys import Keys
#
# # 指定 Chrome WebDriver 的路径
# chromedriver_path = "/data/apps/chromedriver/chromedriver"
#
# # 创建 Chrome WebDriver 的 Service 对象
# service = Service(chromedriver_path)
#
# # 创建 Chrome WebDriver
# options = Options()
# options.headless = False # 设置为无头模式,即不显示浏览器窗口
# driver = webdriver.Chrome(service=service, options=options)
#
# # 访问搜狗微信搜索页面
# driver.get("https://weixin.sogou.com")
#
# # 定位搜索框并输入关键字
# search_box = driver.find_element(By.ID, "query")
# search_box.send_keys("AIGC")
# search_box.send_keys(Keys.RETURN)
#
# # 创建 Excel 文件
# excel_filename = f"AIGC_微信_{datetime.datetime.now().strftime('%Y%m%d')}.xlsx"
# wb = openpyxl.Workbook()
# ws = wb.active
# ws.append(["标题", "链接", "来源", "发布时间"])
#
# # 爬取前10页的内容
# for page in range(10):
# articles = driver.find_elements(By.CSS_SELECTOR, ".news-box .txt-box")
# for article in articles:
# title = article.find_element(By.CSS_SELECTOR, "h3").text
# link = article.find_element(By.CSS_SELECTOR, "h3 a").get_attribute("href")
# source = article.find_element(By.CSS_SELECTOR, ".s-p").text
# time_element = article.find_element(By.CSS_SELECTOR, ".s2 span")
# publish_time = time_element.get_attribute("t")
# ws.append([title, link, source, publish_time])
#
# # 尝试定位下一页按钮
# next_page_button = driver.find_elements(By.CSS_SELECTOR, ".p-fy a")[-1]
# if "下一页" in next_page_button.text:
# next_page_button.click()
# else:
# break
#
# # 保存 Excel 文件
# wb.save(excel_filename)
#
# # 关闭 WebDriver
# driver.quit()
#
# print(f"爬取完成,结果已保存到 {excel_filename}")
#
# import os
# import

qbit2coding
- 粉丝: 257
最新资源
- ibm存储虚拟化和高可用解决专项方案.docx
- PLC专业课程设计方案报告段斯静.doc
- 论仓库管理系统软件测试计划样本.doc
- 基于单片机gps全球定位呼救系统本科学位论文(1).doc
- autoCAD数据库连接入门.ppt
- 勤智创新创业大数据平台解决专业方案.docx
- Excel表格的基本知识与基本操作PPT学习课件.ppt
- 第七章操作系统接口(1).ppt
- 初中数学信息化环境下的函数教学策略(1).docx
- PLC十字路口红绿灯程设计带梯形图.docx
- 对称密钥密码算法研究应用.doc
- 运输网络线路优化计算题(课堂PPT).ppt
- 最新国家开放大学电大《电子商务概论》机考第五套真题题库及答案(1).pdf
- 大数据视野下的市场分析方法研究(1).docx
- 暑假计算机实习报告.doc
- IBM刀片式服务器解决专项方案.doc
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈


