以下代码是ChatGPT和我共创的
ChatGPT问答记录分享
国家统计局2023全国行政区划
import requests
from bs4 import BeautifulSoup
import pandas as pd
from urllib.parse import urlparse, unquote, urljoin
def fetch_data(url, encoding='utf-8'):
response = requests.get(url)
if response.status_code == 200:
response.encoding = encoding
return response.text
else:
print(f"Failed to fetch data. Status code: {
response.status_code}")
return None
def parse_html(html, url):
soup = BeautifulSoup(html, 'html.parser')
data = []
# 根据实际网页结构调整以下代码
provinces = soup.select('tr.provincetr a')
for province in provinces:
province_name = province.text.strip()
province_code = province['href'].split('.')[0]
if province_code == '37':
# 输出省级数据
print(f"Province: {
province_name} ({
province_code})")
# 获取市级数据
city_url_base = get_parent_folder_path(url)
city_url = f"{
city_url_base}/{
province_code}.html"
city_html = fetch_data(city_url)
if city_html:
city_data = parse_city_html(
city_html, province_code, city_url)
data.extend(city_data)
return data
def parse_city_html(html, province_code, url):
soup = BeautifulS