python语言淘宝商品爬虫和分析程序代码ZXQMQZQ-2025-9-12-19-25

# pip install mysql-connector-python matplotlib pandas numpy

import requests
import json
import re
import time
import tkinter as tk
from tkinter import ttk, scrolledtext, messagebox
import mysql.connector
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import urllib.parse

# 确保中文显示正常
plt.rcParams["font.family"] = ["SimHei", "WenQuanYi Micro Hei", "Heiti TC"]
plt.rcParams["axes.unicode_minus"] = False


class TaobaoLaptopSystem:
    def __init__(self, root):
        self.root = root
        self.root.title("淘宝笔记本电脑数据爬取与分析系统")
        self.root.geometry("1200x800")
        self.root.minsize(1000, 700)

        # 爬取控制变量
        self.crawling = False
        self.total_pages = 0  # 总爬取页数
        self.current_crawl_page = 0  # 当前正在爬取的页码
        self.total_items_estimate = 0  # 预估总商品数(每页48个)
        self.current_crawl_items = 0  # 当前已爬取商品数

        # 数据库连接
        self.conn = None
        self.cursor = None
        self.connect_db()

        # 创建主框架
        self.main_frame = ttk.Frame(root)
        self.main_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=10)

        # 创建左侧导航栏
        self.nav_frame = ttk.Frame(self.main_frame, width=200)
        self.nav_frame.pack(side=tk.LEFT, fill=tk.Y)

        self.nav_buttons = [
            ("数据爬取", self.show_crawl_frame),
            ("数据展示", self.show_display_frame),
            ("价格分布", self.show_price_distribution_frame),
            ("销量排行", self.show_sales_rank_frame),
            ("店铺地区分布", self.show_region_distribution_frame),
            ("价格区间分析", self.show_price_range_frame)
        ]

        for text, command in self.nav_buttons:
            btn = ttk.Button(self.nav_frame, text=text, command=command)
            btn.pack(fill=tk.X, padx=5, pady=5)

        # 创建右侧内容框架
        self.content_frame = ttk.Frame(self.main_frame)
        self.content_frame.pack(side=tk.RIGHT, fill=tk.BOTH, expand=True)

        # 初始显示数据爬取页面
        self.show_crawl_frame()

    def connect_db(self):
        """连接数据库并创建必要的表"""
        try:
            # 连接数据库(请根据实际情况修改参数)
            self.conn = mysql.connector.connect(
                host="localhost",
                user="root",  # 替换为你的MySQL用户名
                password="ye17876586815",  # 替换为你的MySQL密码
                database="taobao_laptop_db"  # 数据库名
            )
            self.cursor = self.conn.cursor()

            # 创建数据库(如果不存在)
            self.cursor.execute("CREATE DATABASE IF NOT EXISTS taobao_laptop_db")
            self.cursor.execute("USE taobao_laptop_db")

            # 创建笔记本电脑数据表
            self.cursor.execute('''
            CREATE TABLE IF NOT EXISTS laptops (
                id INT AUTO_INCREMENT PRIMARY KEY,
                title VARCHAR(255) NOT NULL,
                price DECIMAL(10,2) NOT NULL,
                price_str VARCHAR(50) NOT NULL,
                sales INT NOT NULL,
                sales_str VARCHAR(50) NOT NULL,
                shop_name VARCHAR(255) NOT NULL,
                address VARCHAR(100),
                crawl_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
            )
            ''')
            self.conn.commit()
            messagebox.showinfo("数据库连接", "数据库连接成功")

        except mysql.connector.Error as err:
            messagebox.showerror("数据库错误", f"数据库连接/创建表失败: {err}")
            # 尝试不带数据库参数连接以创建数据库
            try:
                self.conn = mysql.connector.connect(
                    host="localhost",
                    user="root",
                    password="123456"
                )
                self.cursor = self.conn.cursor()
                self.cursor.execute("CREATE DATABASE IF NOT EXISTS taobao_laptop_db")
                self.cursor.execute("USE taobao_laptop_db")
                # 再次尝试创建表
                self.cursor.execute('''
                CREATE TABLE IF NOT EXISTS laptops (
                    id INT AUTO_INCREMENT PRIMARY KEY,
                    title VARCHAR(255) NOT NULL,
                    price DECIMAL(10,2) NOT NULL,
                    price_str VARCHAR(50) NOT NULL,
                    sales INT NOT NULL,
                    sales_str VARCHAR(50) NOT NULL,
                    shop_name VARCHAR(255) NOT NULL,
                    address VARCHAR(100),
                    crawl_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
                )
                ''')
                self.conn.commit()
                messagebox.showinfo("数据库连接", "数据库初始化成功,已自动创建所需数据库和表")
            except Exception as e:
                messagebox.showerror("数据库错误", f"无法初始化数据库: {e}")

    def clear_content_frame(self):
        """清空内容框架"""
        for widget in self.content_frame.winfo_children():
            widget.destroy()

    def show_crawl_frame(self):
        """显示数据爬取页面(新增进度条组件)"""
        self.clear_content_frame()

        crawl_frame = ttk.Frame(self.content_frame)
        crawl_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=10)

        # 配置网格权重,使控件能够随窗口大小调整
        crawl_frame.columnconfigure(0, weight=1)
        crawl_frame.columnconfigure(1, weight=1)
        # 为进度条区域添加行权重
        crawl_frame.rowconfigure(12, weight=1)
        crawl_frame.rowconfigure(14, weight=1)

        # Cookie配置
        ttk.Label(crawl_frame, text="请输入淘宝Cookie字符串:").grid(row=0, column=0, sticky=tk.W, pady=5)
        self.cookie_entry = scrolledtext.ScrolledText(crawl_frame, height=5)
        self.cookie_entry.grid(row=1, column=0, columnspan=2, sticky=tk.W + tk.E + tk.N + tk.S, pady=5)

        # 尝试加载已保存的cookie
        try:
            with open("cookie.txt", "r", encoding="utf-8") as f:
                self.cookie_entry.insert(tk.END, f.read())
        except:
            pass

        ttk.Button(crawl_frame, text="测试Cookie", command=self.test_cookie).grid(row=2, column=0, pady=5)
        ttk.Button(crawl_frame, text="保存Cookie", command=self.save_cookie).grid(row=2, column=1, pady=5)

        # 搜索配置
        ttk.Label(crawl_frame, text="搜索关键词:").grid(row=3, column=0, sticky=tk.W, pady=5)
        self.keyword_entry = ttk.Entry(crawl_frame)
        self.keyword_entry.insert(0, "笔记本电脑")
        self.keyword_entry.grid(row=4, column=0, columnspan=2, sticky=tk.W + tk.E, pady=5)

        ttk.Label(crawl_frame, text="爬取页数:").grid(row=5, column=0, sticky=tk.W, pady=5)
        self.page_entry = ttk.Entry(crawl_frame)
        self.page_entry.insert(0, "3")
        self.page_entry.grid(row=6, column=0, sticky=tk.W, pady=5)

        # 爬取控制按钮
        control_frame = ttk.Frame(crawl_frame)
        control_frame.grid(row=7, column=0, columnspan=2, pady=10)

        ttk.Button(control_frame, text="开始爬取", command=self.start_crawl).pack(side=tk.LEFT, padx=10)
        ttk.Button(control_frame, text="停止爬取", command=self.stop_crawl).pack(side=tk.LEFT, padx=10)
        ttk.Button(control_frame, text="清空数据", command=self.clear_data).pack(side=tk.LEFT, padx=10)

        # 实时进度(保留原文字提示)
        ttk.Label(crawl_frame, text="实时进度:").grid(row=8, column=0, sticky=tk.W, pady=5)
        self.progress_var = tk.StringVar()
        self.progress_var.set("准备就绪")
        ttk.Label(crawl_frame, textvariable=self.progress_var).grid(row=9, column=0, sticky=tk.W, pady=5)

        # -------------------------- 新增:总进度条 --------------------------
        ttk.Label(crawl_frame, text="总爬取进度:").grid(row=10, column=0, sticky=tk.W, pady=5)
        self.total_progress_bar = ttk.Progressbar(
            crawl_frame,
            orient=tk.HORIZONTAL,
            length=100,
            mode='determinate',  # 确定模式(有明确进度目标)
            maximum=100  # 进度最大值(百分比)
        )
        self.total_progress_bar.grid(row=11, column=0, columnspan=2, sticky=tk.W + tk.E, pady=5)
        self.total_progress_label = ttk.Label(crawl_frame, text="0% (0/0 商品)")
        self.total_progress_label.grid(row=12, column=0, sticky=tk.W, pady=2)

        # -------------------------- 新增:当前页进度条 --------------------------
        ttk.Label(crawl_frame, text="当前页进度:").grid(row=13, column=0, sticky=tk.W, pady=5)
        self.page_progress_bar = ttk.Progressbar(
            crawl_frame,
            orient=tk.HORIZONTAL,
            length=100,
            mode='determinate',
            maximum=48  # 淘宝每页最多48个商品,以此为最大值
        )
        self.page_progress_bar.grid(row=14, column=0, columnspan=2, sticky=tk.W + tk.E, pady=5)
        self.page_progress_label = ttk.Label(crawl_frame, text="0/48 商品")
        self.page_progress_label.grid(row=15, column=0, sticky=tk.W, pady=2)

        # 操作日志
        ttk.Label(crawl_frame, text="操作日志:").grid(row=16, column=0, sticky=tk.W, pady=5)
        self.log_text = scrolledtext.ScrolledText(crawl_frame, height=10)
        self.log_text.grid(row=17, column=0, columnspan=2, sticky=tk.W + tk.E + tk.N + tk.S, pady=5)

        # 设置行权重,使日志区域可以扩展
        for i in [1, 12, 14, 17]:
            crawl_frame.rowconfigure(i, weight=1)

    def test_cookie(self):
        """测试Cookie有效性(原功能不变)"""
        try:
            cookie = self.cookie_entry.get("1.0", tk.END).strip()
            if not cookie:
                messagebox.showwarning("警告", "请输入Cookie")
                return

            test_url = "https://h5api.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/"
            headers = {
                'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 Edg/112.0.1722.68',
                'cookie': cookie,
                'referer': 'https://s.taobao.com/'
            }
            res = requests.get(test_url, headers=headers, timeout=10)

            if res.status_code == 200:
                self.log_text.insert(tk.END, "Cookie测试成功\n")
                messagebox.showinfo("测试结果", "Cookie测试成功")
            else:
                self.log_text.insert(tk.END, f"Cookie测试失败,状态码: {res.status_code}\n")
                messagebox.showerror("测试结果", f"Cookie测试失败,状态码: {res.status_code}")

            self.log_text.see(tk.END)

        except Exception as e:
            self.log_text.insert(tk.END, f"Cookie测试出错: {str(e)}\n")
            messagebox.showerror("测试错误", f"Cookie测试出错: {str(e)}")
            self.log_text.see(tk.END)

    def save_cookie(self):
        """保存Cookie到文件(原功能不变)"""
        cookie = self.cookie_entry.get("1.0", tk.END).strip()
        if not cookie:
            messagebox.showwarning("警告", "请输入Cookie")
            return

        with open("cookie.txt", "w", encoding="utf-8") as f:
            f.write(cookie)
        self.log_text.insert(tk.END, "Cookie已保存到cookie.txt\n")
        messagebox.showinfo("保存成功", "Cookie已保存到cookie.txt")
        self.log_text.see(tk.END)

    def start_crawl(self):
        """开始爬取数据(新增进度条更新逻辑)"""
        if self.crawling:
            messagebox.showinfo("提示", "正在爬取中,请不要重复点击")
            return

        self.crawling = True
        self.progress_var.set("爬取中...")
        self.log_text.insert(tk.END, "开始爬取数据...\n")
        self.log_text.see(tk.END)

        try:
            keyword = self.keyword_entry.get().strip()
            if not keyword:
                keyword = "笔记本电脑"

            # 编码关键词用于URL
            encoded_keyword = urllib.parse.quote(keyword)

            # 获取爬取页数
            try:
                self.total_pages = int(self.page_entry.get().strip())
                if self.total_pages < 1 or self.total_pages > 20:
                    raise ValueError
            except:
                messagebox.showwarning("警告", "请输入1-20之间的数字作为爬取页数")
                self.crawling = False
                self.progress_var.set("准备就绪")
                return

            # 初始化进度条参数
            self.current_crawl_page = 0
            self.current_crawl_items = 0
            self.total_items_estimate = self.total_pages * 48  # 预估总商品数(每页48个)
            # 重置进度条显示
            self.total_progress_bar["value"] = 0
            self.total_progress_label["text"] = f"0% (0/{self.total_items_estimate} 商品)"
            self.page_progress_bar["value"] = 0
            self.page_progress_label["text"] = "0/48 商品"

            # 获取Cookie
            try:
                with open("cookie.txt", "r", encoding="utf-8") as f:
                    cookie = f.read().strip()
            except:
                cookie = self.cookie_entry.get("1.0", tk.END).strip()

            if not cookie:
                messagebox.showwarning("警告", "请输入或保存Cookie")
                self.crawling = False
                self.progress_var.set("准备就绪")
                return

            # 设置请求头
            headers = {
                'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 Edg/112.0.1722.68',
                'cookie': cookie,
                'referer': 'https://s.taobao.com/',
                'accept': 'application/json, text/javascript, */*; q=0.01',
                'x-requested-with': 'XMLHttpRequest'
            }

            item_count = 0
            # 循环爬取每一页
            for page in range(1, self.total_pages + 1):
                if not self.crawling:  # 检查是否需要停止爬取
                    break

                self.current_crawl_page = page
                page_item_count = 0  # 当前页已爬取商品数
                self.log_text.insert(tk.END, f"开始爬取第{page}页...\n")
                self.log_text.see(tk.END)
                self.progress_var.set(f"正在爬取第{page}/{self.total_pages}页")
                # 重置当前页进度条
                self.page_progress_bar["value"] = 0
                self.page_progress_label["text"] = "0/48 商品"
                self.root.update_idletasks()  # 强制刷新UI

                try:
                    # 构造请求URL
                    timestamp = int(time.time() * 1000)
                    url = f"https://h5api.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/?jsv=2.7.4&appKey=12574478&t=1757669923734&sign=aa8ec210a2c22c1b85a7bf7e35c005f6&api=mtop.relationrecommend.wirelessrecommend.recommend&v=2.0&timeout=10000&type=jsonp&dataType=jsonp&callback=mtopjsonp13&data=%7B%22appId%22%3A%2234385%22%2C%22params%22%3A%22%7B%5C%22device%5C%22%3A%5C%22HMA-AL00%5C%22%2C%5C%22isBeta%5C%22%3A%5C%22false%5C%22%2C%5C%22grayHair%5C%22%3A%5C%22false%5C%22%2C%5C%22from%5C%22%3A%5C%22nt_history%5C%22%2C%5C%22brand%5C%22%3A%5C%22HUAWEI%5C%22%2C%5C%22info%5C%22%3A%5C%22wifi%5C%22%2C%5C%22index%5C%22%3A%5C%224%5C%22%2C%5C%22rainbow%5C%22%3A%5C%22%5C%22%2C%5C%22schemaType%5C%22%3A%5C%22auction%5C%22%2C%5C%22elderHome%5C%22%3A%5C%22false%5C%22%2C%5C%22isEnterSrpSearch%5C%22%3A%5C%22true%5C%22%2C%5C%22newSearch%5C%22%3A%5C%22false%5C%22%2C%5C%22network%5C%22%3A%5C%22wifi%5C%22%2C%5C%22subtype%5C%22%3A%5C%22%5C%22%2C%5C%22hasPreposeFilter%5C%22%3A%5C%22false%5C%22%2C%5C%22prepositionVersion%5C%22%3A%5C%22v2%5C%22%2C%5C%22client_os%5C%22%3A%5C%22Android%5C%22%2C%5C%22gpsEnabled%5C%22%3A%5C%22false%5C%22%2C%5C%22searchDoorFrom%5C%22%3A%5C%22srp%5C%22%2C%5C%22debug_rerankNewOpenCard%5C%22%3A%5C%22false%5C%22%2C%5C%22homePageVersion%5C%22%3A%5C%22v7%5C%22%2C%5C%22searchElderHomeOpen%5C%22%3A%5C%22false%5C%22%2C%5C%22search_action%5C%22%3A%5C%22initiative%5C%22%2C%5C%22sugg%5C%22%3A%5C%22_4_1%5C%22%2C%5C%22sversion%5C%22%3A%5C%2213.6%5C%22%2C%5C%22style%5C%22%3A%5C%22list%5C%22%2C%5C%22ttid%5C%22%3A%5C%22600000%40taobao_pc_10.7.0%5C%22%2C%5C%22needTabs%5C%22%3A%5C%22true%5C%22%2C%5C%22areaCode%5C%22%3A%5C%22CN%5C%22%2C%5C%22vm%5C%22%3A%5C%22nw%5C%22%2C%5C%22countryNum%5C%22%3A%5C%22156%5C%22%2C%5C%22m%5C%22%3A%5C%22pc%5C%22%2C%5C%22page%5C%22%3A1%2C%5C%22n%5C%22%3A48%2C%5C%22q%5C%22%3A%5C%22%25E7%25AC%2594%25E8%25AE%25B0%25E6%259C%25AC%25E7%2594%25B5%25E8%2584%2591%5C%22%2C%5C%22qSource%5C%22%3A%5C%22url%5C%22%2C%5C%22pageSource%5C%22%3A%5C%22%5C%22%2C%5C%22channelSrp%5C%22%3A%5C%22%5C%22%2C%5C%22tab%5C%22%3A%5C%22all%5C%22%2C%5C%22pageSize%5C%22%3A48%2C%5C%22totalPage%5C%22%3A100%2C%5C%22totalResults%5C%22%3A4800%2C%5C%22sourceS%5C%22%3A%5C%220%5C%22%2C%5C%22sort%5C%22%3A%5C%22_coefp%5C%22%2C%5C%22bcoffset%5C%22%3A%5C%22%5C%22%2C%5C%22ntoffset%5C%22%3A%5C%22%5C%22%2C%5C%22filterTag%5C%22%3A%5C%22%5C%22%2C%5C%22service%5C%22%3A%5C%22%5C%22%2C%5C%22prop%5C%22%3A%5C%22%5C%22%2C%5C%22loc%5C%22%3A%5C%22%5C%22%2C%5C%22start_price%5C%22%3Anull%2C%5C%22end_price%5C%22%3Anull%2C%5C%22startPrice%5C%22%3Anull%2C%5C%22endPrice%5C%22%3Anull%2C%5C%22itemIds%5C%22%3Anull%2C%5C%22p4pIds%5C%22%3Anull%2C%5C%22p4pS%5C%22%3Anull%2C%5C%22categoryp%5C%22%3A%5C%22%5C%22%2C%5C%22ha3Kvpairs%5C%22%3Anull%2C%5C%22myCNA%5C%22%3A%5C%22gJ5KIcEknxUCAd9KApH0VX4v%5C%22%2C%5C%22screenResolution%5C%22%3A%5C%221536x864%5C%22%2C%5C%22userAgent%5C%22%3A%5C%22Mozilla%2F5.0%20(Windows%20NT%2010.0%3B%20Win64%3B%20x64)%20AppleWebKit%2F537.36%20(KHTML%2C%20like%20Gecko)%20Chrome%2F140.0.0.0%20Safari%2F537.36%20Edg%2F140.0.0.0%5C%22%2C%5C%22couponUnikey%5C%22%3A%5C%22%5C%22%2C%5C%22subTabId%5C%22%3A%5C%22%5C%22%2C%5C%22np%5C%22%3A%5C%22%5C%22%2C%5C%22clientType%5C%22%3A%5C%22h5%5C%22%2C%5C%22isNewDomainAb%5C%22%3A%5C%22false%5C%22%2C%5C%22forceOldDomain%5C%22%3A%5C%22false%5C%22%7D%22%7D&bx-ua=defaultFY2_load_failed%20with%20timeout%40%40https%3A%2F%2F2.zoppoz.workers.dev%3A443%2Fhttps%2Fs.taobao.com%2Fsearch%40%401757669923752&bx-umidtoken=defaultFY2_load_failed%20with%20timeout%40%40https%3A%2F%2F2.zoppoz.workers.dev%3A443%2Fhttps%2Fs.taobao.com%2Fsearch%40%401757669923752&bx_et=default_not_fun"

                    # 发送请求
                    res = requests.get(url=url, headers=headers, timeout=15)
                    self.log_text.insert(tk.END, f"第{page}页请求状态码: {res.status_code}\n")

                    if res.status_code == 200:
                        # 处理响应
                        response_text = res.text.strip()

                        # 提取JSON部分
                        try:
                            start_idx = response_text.find('(') + 1
                            end_idx = response_text.rfind(')')
                            json_str = response_text[start_idx:end_idx].strip()
                            json_str = re.sub(r'[\x00-\x1F\x7F]', '', json_str)

                            # 解析JSON
                            da = json.loads(json_str)

                            # 提取商品数据
                            if "data" in da and "itemsArray" in da["data"]:
                                page_items = da["data"]["itemsArray"]
                                total_page_items = len(page_items)  # 当前页实际商品总数
                                # 更新当前页进度条最大值(适配实际商品数)
                                self.page_progress_bar["maximum"] = total_page_items

                                for idx, item in enumerate(page_items):
                                    if not self.crawling:  # 检查是否需要停止爬取
                                        break

                                    try:
                                        # 提取商品信息
                                        title = re.sub(r'<.*?>', '', item.get('title', '未知标题'))

                                        # 处理价格
                                        price_show = item.get('priceShow', {})
                                        price_str = f"{price_show.get('price', '0')}元"
                                        try:
                                            price = float(re.sub(r'[^\d.]', '', price_str))
                                        except:
                                            price = 0

                                        # 处理销量
                                        sales_str = item.get('realSales', '0')
                                        try:
                                            # 提取数字并处理"万"单位
                                            sales_num = float(re.sub(r'[^\d.]', '', sales_str))
                                            if '万' in sales_str:
                                                sales_num *= 10000
                                            sales = int(sales_num)
                                        except:
                                            sales = 0

                                        # 店铺信息
                                        shop_info = item.get('shopInfo', {})
                                        shop_name = shop_info.get('title', '未知店铺')
                                        address = item.get('procity', '未知地区')

                                        # 插入数据库
                                        self.cursor.execute('''
                                        INSERT INTO laptops (title, price, price_str, sales, sales_str, shop_name, address)
                                        VALUES (%s, %s, %s, %s, %s, %s, %s)
                                        ''', (title, price, price_str, sales, sales_str, shop_name, address))
                                        self.conn.commit()

                                        item_count += 1
                                        page_item_count += 1
                                        self.current_crawl_items += 1
                                        self.log_text.insert(tk.END, f"已爬取: {title[:30]}...\n")
                                        self.log_text.see(tk.END)

                                        # -------------------------- 更新进度条 --------------------------
                                        # 更新当前页进度条
                                        self.page_progress_bar["value"] = page_item_count
                                        self.page_progress_label["text"] = f"{page_item_count}/{total_page_items} 商品"
                                        # 更新总进度条(按已爬取商品数/预估总商品数计算百分比)
                                        total_progress_percent = (
                                                                             self.current_crawl_items / self.total_items_estimate) * 100
                                        self.total_progress_bar["value"] = total_progress_percent
                                        self.total_progress_label[
                                            "text"] = f"{total_progress_percent:.1f}% ({self.current_crawl_items}/{self.total_items_estimate} 商品)"
                                        # 强制刷新UI,确保进度条实时更新
                                        self.root.update_idletasks()

                                        # 适当延迟,避免被反爬
                                        time.sleep(0.5)

                                    except Exception as e:
                                        self.log_text.insert(tk.END, f"处理商品出错: {str(e)}\n")
                                        self.log_text.see(tk.END)
                                        continue

                                self.log_text.insert(tk.END, f"第{page}页爬取完成,共{page_item_count}个商品\n")
                            else:
                                self.log_text.insert(tk.END, f"第{page}页未找到商品数据\n")

                        except json.JSONDecodeError as e:
                            self.log_text.insert(tk.END, f"JSON解析失败: {str(e)}\n")
                        except Exception as e:
                            self.log_text.insert(tk.END, f"处理响应时出错: {str(e)}\n")
                    else:
                        self.log_text.insert(tk.END, f"第{page}页请求失败,状态码: {res.status_code}\n")

                    # 页与页之间的延迟
                    time.sleep(2)

                except Exception as e:
                    self.log_text.insert(tk.END, f"第{page}页爬取出错: {str(e)}\n")
                    self.log_text.see(tk.END)
                    continue

            # 爬取结束处理
            if self.crawling:  # 如果是正常完成而非被停止
                # 最终更新进度条为100%
                self.total_progress_bar["value"] = 100
                self.total_progress_label["text"] = f"100% ({item_count}/{item_count} 商品)"
                self.page_progress_bar["value"] = self.page_progress_bar["maximum"] if self.page_progress_bar[
                                                                                           "maximum"] > 0 else 0
                self.page_progress_label[
                    "text"] = f"{self.page_progress_bar['value']}/{self.page_progress_bar['value']} 商品"

                self.log_text.insert(tk.END, f"爬取完成,共获取{item_count}个商品数据\n")
                self.progress_var.set(f"爬取完成,共{item_count}个商品")
                messagebox.showinfo("爬取完成", f"成功爬取{item_count}个商品数据")
            else:
                # 停止时更新进度条为当前实际进度
                total_progress_percent = (
                                                     self.current_crawl_items / self.total_items_estimate) * 100 if self.total_items_estimate > 0 else 0
                self.total_progress_bar["value"] = total_progress_percent
                self.total_progress_label[
                    "text"] = f"{total_progress_percent:.1f}% ({self.current_crawl_items}/{self.total_items_estimate} 商品)"

                self.log_text.insert(tk.END, f"爬取已停止,已获取{item_count}个商品数据\n")
                self.progress_var.set(f"已停止,共{item_count}个商品")

        except Exception as e:
            self.log_text.insert(tk.END, f"爬取过程出错: {str(e)}\n")
            messagebox.showerror("爬取错误", f"爬取过程出错: {str(e)}")
        finally:
            self.crawling = False
            self.log_text.see(tk.END)
            self.root.update_idletasks()  # 最终刷新UI

    def stop_crawl(self):
        """停止爬取数据(原功能不变)"""
        if self.crawling:
            self.crawling = False
            self.progress_var.set("正在停止...")
            self.log_text.insert(tk.END, "正在停止爬取...\n")
            self.log_text.see(tk.END)
        else:
            messagebox.showinfo("提示", "当前没有正在进行的爬取任务")

    def clear_data(self):
        """清空数据库中的数据(原功能不变)"""
        if messagebox.askyesno("确认", "确定要清空所有爬取的数据吗?此操作不可恢复!"):
            try:
                self.cursor.execute("TRUNCATE TABLE laptops")
                self.conn.commit()
                self.log_text.insert(tk.END, "数据库数据已清空\n")
                messagebox.showinfo("成功", "数据库数据已清空")
            except Exception as e:
                self.log_text.insert(tk.END, f"清空数据出错: {str(e)}\n")
                messagebox.showerror("错误", f"清空数据出错: {str(e)}")
            self.log_text.see(tk.END)

    def show_display_frame(self):
        """显示数据展示页面(原功能不变)"""
        self.clear_content_frame()

        display_frame = ttk.Frame(self.content_frame)
        display_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=10)

        # 查询数据总量
        try:
            self.cursor.execute("SELECT COUNT(*) FROM laptops")
            total = self.cursor.fetchone()[0]
            ttk.Label(display_frame, text=f"共收录 {total} 条笔记本电脑数据", font=("SimHei", 12)).pack(pady=10)
        except Exception as e:
            ttk.Label(display_frame, text=f"数据查询错误: {str(e)}").pack(pady=10)
            return

        # 创建表格
        columns = ("id", "标题", "价格", "销量", "店铺名称", "地区")
        tree = ttk.Treeview(display_frame, columns=columns, show="headings")

        # 设置列宽和标题
        tree.column("id", width=50, anchor="center")
        tree.column("标题", width=300, anchor="w")
        tree.column("价格", width=80, anchor="center")
        tree.column("销量", width=80, anchor="center")
        tree.column("店铺名称", width=150, anchor="w")
        tree.column("地区", width=100, anchor="center")

        for col in columns:
            tree.heading(col, text=col)

        # 添加滚动条
        scrollbar_y = ttk.Scrollbar(display_frame, orient=tk.VERTICAL, command=tree.yview)
        scrollbar_x = ttk.Scrollbar(display_frame, orient=tk.HORIZONTAL, command=tree.xview)
        tree.configure(yscrollcommand=scrollbar_y.set, xscrollcommand=scrollbar_x.set)

        # 布局
        scrollbar_y.pack(side=tk.RIGHT, fill=tk.Y)
        scrollbar_x.pack(side=tk.BOTTOM, fill=tk.X)
        tree.pack(side=tk.TOP, fill=tk.BOTH, expand=True)

        # 分页控制
        pagination_frame = ttk.Frame(display_frame)
        pagination_frame.pack(side=tk.BOTTOM, pady=10)

        self.current_page = 1
        self.page_size = 20

        def load_page(page_num):
            # 清空现有数据
            for item in tree.get_children():
                tree.delete(item)

            # 计算偏移量
            offset = (page_num - 1) * self.page_size

            # 查询数据
            try:
                self.cursor.execute(
                    f"SELECT id, title, price_str, sales_str, shop_name, address FROM laptops LIMIT {self.page_size} OFFSET {offset}")
                rows = self.cursor.fetchall()

                # 插入数据
                for row in rows:
                    tree.insert("", tk.END, values=row)

                # 更新页码显示
                page_info_var.set(f"第 {page_num} 页,共 {((total + self.page_size - 1) // self.page_size)} 页")
                self.current_page = page_num

                # 控制按钮状态
                prev_btn.config(state=tk.NORMAL if page_num > 1 else tk.DISABLED)
                next_btn.config(
                    state=tk.NORMAL if page_num < ((total + self.page_size - 1) // self.page_size) else tk.DISABLED)

            except Exception as e:
                messagebox.showerror("错误", f"加载数据出错: {str(e)}")

        def prev_page():
            if self.current_page > 1:
                load_page(self.current_page - 1)

        def next_page():
            if self.current_page < ((total + self.page_size - 1) // self.page_size):
                load_page(self.current_page + 1)

        # 分页按钮
        prev_btn = ttk.Button(pagination_frame, text="上一页", command=prev_page)
        prev_btn.pack(side=tk.LEFT, padx=10)

        page_info_var = tk.StringVar()
        ttk.Label(pagination_frame, textvariable=page_info_var).pack(side=tk.LEFT, padx=10)

        next_btn = ttk.Button(pagination_frame, text="下一页", command=next_page)
        next_btn.pack(side=tk.LEFT, padx=10)

        # 初始加载第一页
        load_page(1)

    def show_price_distribution_frame(self):
        """显示价格分布页面(原功能不变)"""
        self.clear_content_frame()

        distribution_frame = ttk.Frame(self.content_frame)
        distribution_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=10)

        ttk.Label(distribution_frame, text="笔记本电脑价格分布", font=("SimHei", 14)).pack(pady=10)

        try:
            # 获取价格数据
            self.cursor.execute("SELECT price FROM laptops WHERE price > 0")
            prices = [row[0] for row in self.cursor.fetchall()]

            if not prices:
                ttk.Label(distribution_frame, text="没有找到价格数据,请先爬取数据").pack(pady=20)
                return

            # 创建图表
            fig, ax = plt.subplots(figsize=(8, 5))

            # 设置 bins
            max_price = max(prices)
            if max_price > 20000:
                bins = range(0, 25001, 2500)
            elif max_price > 10000:
                bins = range(0, 15001, 1500)
            else:
                bins = range(0, 10001, 1000)

            # 绘制直方图
            n, bins, patches = ax.hist(prices, bins=bins, alpha=0.7, color='#4CAF50')

            # 设置标题和标签
            ax.set_title('笔记本电脑价格分布直方图', fontsize=12)
            ax.set_xlabel('价格 (元)', fontsize=10)
            ax.set_ylabel('商品数量', fontsize=10)

            # 添加网格线
            ax.grid(axis='y', linestyle='--', alpha=0.7)

            # 调整布局
            plt.tight_layout()

            # 将图表嵌入Tkinter
            canvas = FigureCanvasTkAgg(fig, master=distribution_frame)
            canvas.draw()
            canvas.get_tk_widget().pack(fill=tk.BOTH, expand=True)

            # 统计信息
            stats_frame = ttk.Frame(distribution_frame)
            stats_frame.pack(pady=20)

            avg_price = sum(prices) / len(prices)
            median_price = np.median(prices)
            min_price = min(prices)
            max_price = max(prices)

            ttk.Label(stats_frame, text=f"平均价格: ¥{avg_price:.2f}", font=("SimHei", 10)).grid(row=0, column=0, padx=20)
            ttk.Label(stats_frame, text=f"中位数价格: ¥{median_price:.2f}", font=("SimHei", 10)).grid(row=0, column=1,
                                                                                                 padx=20)
            ttk.Label(stats_frame, text=f"最低价格: ¥{min_price:.2f}", font=("SimHei", 10)).grid(row=0, column=2, padx=20)
            ttk.Label(stats_frame, text=f"最高价格: ¥{max_price:.2f}", font=("SimHei", 10)).grid(row=0, column=3, padx=20)

        except Exception as e:
            ttk.Label(distribution_frame, text=f"生成价格分布图表出错: {str(e)}").pack(pady=20)

    def show_sales_rank_frame(self):
        """显示销量排行页面(原功能不变)"""
        self.clear_content_frame()

        sales_frame = ttk.Frame(self.content_frame)
        sales_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=10)

        ttk.Label(sales_frame, text="笔记本电脑销量排行榜", font=("SimHei", 14)).pack(pady=10)

        # 排行数量选择
        rank_frame = ttk.Frame(sales_frame)
        rank_frame.pack(pady=10)

        ttk.Label(rank_frame, text="显示前:").pack(side=tk.LEFT, padx=5)

        self.rank_count = tk.StringVar(value="10")
        ttk.Combobox(rank_frame, textvariable=self.rank_count, values=["5", "10", "20", "30"], width=5).pack(
            side=tk.LEFT, padx=5)

        def update_rank():
            # 清除现有图表
            for widget in chart_frame.winfo_children():
                widget.destroy()

            try:
                count = int(self.rank_count.get())

                # 获取销量排行数据
                self.cursor.execute(
                    f"SELECT title, sales, price FROM laptops WHERE sales > 0 ORDER BY sales DESC LIMIT {count}")
                data = self.cursor.fetchall()

                if not data:
                    ttk.Label(chart_frame, text="没有找到销量数据,请先爬取数据").pack(pady=20)
                    return

                # 处理数据
                titles = [f"{item[0][:10]}..." for item in data]  # 截断标题,避免过长
                sales = [item[1] for item in data]
                prices = [item[2] for item in data]

                # 创建图表
                fig, ax1 = plt.subplots(figsize=(10, 6))

                # 销量柱状图
                color = 'tab:blue'
                ax1.set_xlabel('商品', fontsize=10)
                ax1.set_ylabel('销量', color=color, fontsize=10)
                bars = ax1.bar(titles, sales, color=color, alpha=0.7, label='销量')
                ax1.tick_params(axis='y', labelcolor=color)
                ax1.tick_params(axis='x', rotation=45)

                # 价格折线图(双Y轴)
                ax2 = ax1.twinx()
                color = 'tab:red'
                ax2.set_ylabel('价格 (元)', color=color, fontsize=10)
                ax2.plot(titles, prices, color=color, marker='o', label='价格')
                ax2.tick_params(axis='y', labelcolor=color)

                # 添加标题和图例
                fig.suptitle(f'销量前{count}的笔记本电脑', fontsize=12)
                lines1, labels1 = ax1.get_legend_handles_labels()
                lines2, labels2 = ax2.get_legend_handles_labels()
                ax1.legend(lines1 + lines2, labels1 + labels2, loc='upper right')

                # 调整布局
                plt.tight_layout()

                # 将图表嵌入Tkinter
                canvas = FigureCanvasTkAgg(fig, master=chart_frame)
                canvas.draw()
                canvas.get_tk_widget().pack(fill=tk.BOTH, expand=True)

            except Exception as e:
                ttk.Label(chart_frame, text=f"生成销量排行榜出错: {str(e)}").pack(pady=20)

        ttk.Button(rank_frame, text="更新排行", command=update_rank).pack(side=tk.LEFT, padx=5)

        # 图表区域
        chart_frame = ttk.Frame(sales_frame)
        chart_frame.pack(fill=tk.BOTH, expand=True, pady=10)

        # 初始显示
        update_rank()

    def show_region_distribution_frame(self):
        """显示店铺地区分布页面(原功能不变)"""
        self.clear_content_frame()

        region_frame = ttk.Frame(self.content_frame)
        region_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=10)

        ttk.Label(region_frame, text="笔记本电脑销售店铺地区分布", font=("SimHei", 14)).pack(pady=10)

        try:
            # 获取地区数据
            self.cursor.execute("SELECT address FROM laptops WHERE address != '未知地区'")
            addresses = [row[0] for row in self.cursor.fetchall()]

            if not addresses:
                ttk.Label(region_frame, text="没有找到地区数据,请先爬取数据").pack(pady=20)
                return

            # 处理地区数据,提取省份
            province_data = {}
            for addr in addresses:
                # 提取省份(简单处理,实际情况可能更复杂)
                if addr.startswith(('北京', '上海', '天津', '重庆')):
                    province = addr[:2]
                elif addr.startswith('内蒙古'):
                    province = '内蒙古'
                elif addr.startswith('新疆'):
                    province = '新疆'
                elif addr.startswith('宁夏'):
                    province = '宁夏'
                elif addr.startswith('广西'):
                    province = '广西'
                elif addr.startswith('西藏'):
                    province = '西藏'
                else:
                    province = addr[:1] + '省'  # 假设其他都是XX省

                # 统计
                if province in province_data:
                    province_data[province] += 1
                else:
                    province_data[province] = 1

            # 按数量排序,取前10
            sorted_provinces = sorted(province_data.items(), key=lambda x: x[1], reverse=True)[:10]
            provinces = [item[0] for item in sorted_provinces]
            counts = [item[1] for item in sorted_provinces]

            # 创建图表
            fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 6))

            # 饼图
            ax1.pie(counts, labels=provinces, autopct='%1.1f%%', startangle=90)
            ax1.axis('equal')
            ax1.set_title('地区分布占比', fontsize=12)

            # 柱状图
            ax2.bar(provinces, counts, color='skyblue')
            ax2.set_title('地区店铺数量', fontsize=12)
            ax2.set_xlabel('地区', fontsize=10)
            ax2.set_ylabel('店铺数量', fontsize=10)
            ax2.tick_params(axis='x', rotation=45)

            # 调整布局
            plt.tight_layout()

            # 将图表嵌入Tkinter
            canvas = FigureCanvasTkAgg(fig, master=region_frame)
            canvas.draw()
            canvas.get_tk_widget().pack(fill=tk.BOTH, expand=True)

        except Exception as e:
            ttk.Label(region_frame, text=f"生成地区分布图表出错: {str(e)}").pack(pady=20)

    def show_price_range_frame(self):
        """显示价格区间分析页面(原功能不变)"""
        self.clear_content_frame()

        price_range_frame = ttk.Frame(self.content_frame)
        price_range_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=10)

        ttk.Label(price_range_frame, text="笔记本电脑价格区间分析", font=("SimHei", 14)).pack(pady=10)

        try:
            # 获取价格和销量数据
            self.cursor.execute("SELECT price, sales FROM laptops WHERE price > 0 AND sales > 0")
            data = self.cursor.fetchall()

            if not data:
                ttk.Label(price_range_frame, text="没有找到足够的数据,请先爬取数据").pack(pady=20)
                return

            prices = [row[0] for row in data]
            sales = [row[1] for row in data]

            # 定义价格区间
            ranges = [
                (0, 3000),
                (3000, 5000),
                (5000, 7000),
                (7000, 10000),
                (10000, float('inf'))
            ]
            range_labels = [
                "3000元以下",
                "3000-5000元",
                "5000-7000元",
                "7000-10000元",
                "10000元以上"
            ]

            # 统计每个区间的商品数量和总销量
            range_counts = [0] * len(ranges)
            range_sales = [0] * len(ranges)
            range_avg_price = [0.0] * len(ranges)

            for i, (min_p, max_p) in enumerate(ranges):
                items_in_range = [(p, s) for p, s in zip(prices, sales) if min_p <= p < max_p]
                range_counts[i] = len(items_in_range)
                range_sales[i] = sum(s for _, s in items_in_range)
                if range_counts[i] > 0:
                    range_avg_price[i] = sum(p for p, _ in items_in_range) / range_counts[i]

            # 创建图表
            fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(15, 5))

            # 各区间商品数量
            ax1.bar(range_labels, range_counts, color='lightgreen')
            ax1.set_title('各价格区间商品数量', fontsize=12)
            ax1.set_xlabel('价格区间', fontsize=10)
            ax1.set_ylabel('商品数量', fontsize=10)
            ax1.tick_params(axis='x', rotation=45)

            # 各区间总销量
            ax2.bar(range_labels, range_sales, color='orange')
            ax2.set_title('各价格区间总销量', fontsize=12)
            ax2.set_xlabel('价格区间', fontsize=10)
            ax2.set_ylabel('总销量', fontsize=10)
            ax2.tick_params(axis='x', rotation=45)

            # 各区间平均价格
            ax3.bar(range_labels, range_avg_price, color='purple')
            ax3.set_title('各价格区间平均价格', fontsize=12)
            ax3.set_xlabel('价格区间', fontsize=10)
            ax3.set_ylabel('平均价格 (元)', fontsize=10)
            ax3.tick_params(axis='x', rotation=45)

            # 调整布局
            plt.tight_layout()

            # 将图表嵌入Tkinter
            canvas = FigureCanvasTkAgg(fig, master=price_range_frame)
            canvas.draw()
            canvas.get_tk_widget().pack(fill=tk.BOTH, expand=True)

        except Exception as e:
            ttk.Label(price_range_frame, text=f"生成价格区间分析图表出错: {str(e)}").pack(pady=20)

    def __del__(self):
        """程序退出时关闭数据库连接(原功能不变)"""
        if self.conn:
            self.conn.close()


if __name__ == "__main__":
    root = tk.Tk()
    app = TaobaoLaptopSystem(root)
    root.mainloop()

数据库下载地址:
mysql-9.4.0-winx64.msi

https://download.csdn.net/download/qq_32257509/91927995

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

EYYLTV

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值