将以下代码做成PDF教案,详述每段代码的作用,包括整个程序的架构
import tkinter as tk
from tkinter import filedialog, messagebox
from tkinter import ttk
import pandas as pd
import json
import os
import sys
class DataAnalysisApp:
def __init__(self, root):
self.root = root
self.root.title("数据分析助手")
# 配置文件路径
self.config_file = self.get_resource_path("app_config.json")
# 加载配置
self.config = self.load_config()
# 设置窗口大小和位置
window_width = self.config.get("window_width", 1000)
window_height = self.config.get("window_height", 600)
screen_width = root.winfo_screenwidth()
screen_height = root.winfo_screenheight()
center_x = int(screen_width/2 - window_width/2)
center_y = int(screen_height/2 - window_height/2)
self.root.geometry(f'{window_width}x{window_height}+{center_x}+{center_y}')
# 设置窗口最小尺寸
self.root.minsize(800, 400)
# 创建菜单栏
self.menu_bar = tk.Menu(self.root)
# 文件菜单
self.file_menu = tk.Menu(self.menu_bar, tearoff=0)
self.file_menu.add_command(label="打开", command=self.open_file)
self.file_menu.add_separator()
self.file_menu.add_command(label="退出", command=self.root.quit)
self.menu_bar.add_cascade(label="文件", menu=self.file_menu)
# 添加公式菜单
self.formula_menu = tk.Menu(self.menu_bar, tearoff=0)
self.formula_menu.add_command(label="自定义公式", command=self.open_formula_window)
self.menu_bar.add_cascade(label="公式", menu=self.formula_menu)
self.root.config(menu=self.menu_bar)
# 创建主框架
self.main_frame = ttk.Frame(self.root)
self.main_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=5)
# 创建可拖拽的分隔窗口
self.paned_window = ttk.PanedWindow(self.main_frame, orient=tk.HORIZONTAL)
self.paned_window.pack(fill=tk.BOTH, expand=True)
# 创建左侧数据显示区域
self.left_frame = ttk.Frame(self.paned_window)
# 创建右侧控制面板
self.right_frame = ttk.Frame(self.paned_window)
# 添加框架到分隔窗口
self.paned_window.add(self.left_frame, weight=1)
self.paned_window.add(self.right_frame, weight=0)
# 设置分隔位置
if "paned_position" in self.config:
self.paned_window.after(100, lambda: self.paned_window.sashpos(0, self.config["paned_position"]))
# 创建算法选择区域
self.algorithm_frame = ttk.LabelFrame(self.right_frame, text="算法选择", padding=10)
self.algorithm_frame.pack(fill=tk.X, pady=(0, 10))
# 添加算法选择下拉框
self.algorithm_var = tk.StringVar()
self.algorithms = [
"描述性统计",
"相关性分析",
"数据分布分析",
"时间序列分析",
"分组统计分析",
"缺失值分析",
"CPK分析"
]
self.algorithm_combo = ttk.Combobox(
self.algorithm_frame,
textvariable=self.algorithm_var,
values=self.algorithms,
state="readonly"
)
self.algorithm_combo.pack(fill=tk.X, pady=(5, 0))
self.algorithm_combo.set("请选择分析方法")
# 添加运行按钮
self.run_button = ttk.Button(
self.algorithm_frame,
text="运行分析",
command=self.run_analysis
)
self.run_button.pack(fill=tk.X, pady=(10, 0))
# 创建结果显示区域
self.result_frame = ttk.LabelFrame(self.right_frame, text="分析结果", padding=10)
self.result_frame.pack(fill=tk.BOTH, expand=True)
# 添加结果文本框
self.result_text = tk.Text(
self.result_frame,
wrap=tk.WORD,
width=30,
height=20,
font=('Arial', 10) # 设置字体
)
# 为结果文本框添加滚动条
self.result_scrollbar = ttk.Scrollbar(
self.result_frame,
orient="vertical",
command=self.result_text.yview
)
# 正确放置滚动条和文本框
self.result_scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
self.result_text.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
# 配置文本框的滚动
self.result_text.configure(yscrollcommand=self.result_scrollbar.set)
# 配置文本标签样式
self.result_text.tag_configure('header', font=('Arial', 11, 'bold'))
self.result_text.tag_configure('subtitle', font=('Arial', 10, 'bold'))
self.result_text.tag_configure('warning', foreground='orange')
self.result_text.tag_configure('error', foreground='red')
# 设置为只读
self.result_text.config(state='disabled')
# 创建框架来容纳Treeview和滚动条
self.tree_frame = ttk.Frame(self.left_frame)
self.tree_frame.pack(fill=tk.BOTH, expand=True)
# 创建并配置Treeview样式
style = ttk.Style()
style.configure("Treeview",
rowheight=22, # 稍微减小行高
font=('Arial', 9), # 更改字体大小
background="#FFFFFF",
fieldbackground="#FFFFFF",
foreground="#000000",
borderwidth=1,
relief='solid'
)
# 配置标题样式,更接近Excel
style.configure("Treeview.Heading",
font=('Arial', 9, 'bold'),
relief='flat',
borderwidth=1,
background='#F0F0F0', # Excel风格的标题背景色
foreground='#000000'
)
# 设置选中颜色为Excel风格的蓝色
style.map('Treeview',
background=[('selected', '#E1E9F5')], # Excel选中的浅蓝色
foreground=[('selected', '#000000')] # 选中时保持黑色文字
)
# 设置Treeview网格线颜色
style.configure("Treeview",
background="white",
fieldbackground="white",
foreground="black",
bordercolor="#DDD", # 网格线颜色
lightcolor="#DDD", # 亮边框颜色
darkcolor="#DDD" # 暗边框颜色
)
# 创建Treeview控件用于显示数据
self.tree = ttk.Treeview(self.tree_frame)
# 创建垂直滚动条
self.vsb = ttk.Scrollbar(self.tree_frame, orient="vertical", command=self.tree.yview)
self.vsb.pack(side=tk.RIGHT, fill=tk.Y)
# 创建水平滚动条
self.hsb = ttk.Scrollbar(self.tree_frame, orient="horizontal", command=self.tree.xview)
self.hsb.pack(side=tk.BOTTOM, fill=tk.X)
# 设置Treeview的滚动
self.tree.configure(yscrollcommand=self.vsb.set, xscrollcommand=self.hsb.set)
# 放置Treeview
self.tree.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
# 显示行标题
self.tree["show"] = "headings"
# 创建状态栏
self.status_bar = ttk.Label(self.root, text="就绪", anchor=tk.W)
self.status_bar.pack(side=tk.BOTTOM, fill=tk.X, padx=5, pady=3)
# 添加数据存储变量
self.current_data = None
def run_analysis(self):
if self.current_data is None:
messagebox.showwarning("警告", "请先加载数据")
return
selected_algorithm = self.algorithm_var.get()
if selected_algorithm == "请选择分析方法":
messagebox.showwarning("警告", "请选择分析方法")
return
try:
# 创建不包含前两列的数据副本
analysis_data = self.current_data.iloc[:, 2:].copy()
if analysis_data.empty:
messagebox.showwarning("警告", "没有可分析的数据列")
return
self.result_text.config(state='normal')
self.result_text.delete(1.0, tk.END)
if selected_algorithm == "CPK分析":
self._run_cpk_analysis(analysis_data)
elif selected_algorithm == "描述性统计":
self._run_descriptive_analysis(analysis_data)
elif selected_algorithm == "相关性分析":
self._run_correlation_analysis(analysis_data)
elif selected_algorithm == "数据分布分析":
self._run_distribution_analysis(analysis_data)
elif selected_algorithm == "时间序列分析":
self._run_time_series_analysis(analysis_data)
elif selected_algorithm == "分组统计分析":
self._run_group_analysis(analysis_data)
elif selected_algorithm == "缺失值分析":
self._run_missing_value_analysis(analysis_data)
self.result_text.config(state='disabled')
except Exception as e:
self.result_text.delete(1.0, tk.END)
self.result_text.insert(tk.END, f"⚠ 分析过程出错:\n{str(e)}", 'error')
self.result_text.config(state='disabled')
def _run_descriptive_analysis(self, data):
"""描述性统计"""
numeric_cols = data.select_dtypes(include=['int64', 'float64']).columns
non_numeric_cols = data.select_dtypes(exclude=['int64', 'float64']).columns
# 处理数值列
if not numeric_cols.empty:
numeric_stats = data[numeric_cols].describe()
self.result_text.insert(tk.END, "═══ 数值型数据统计 ═══\n\n", 'header')
# 格式化数值统计结果
for col in numeric_cols:
stats = numeric_stats[col]
self.result_text.insert(tk.END, f"▶ {col}\n", 'subtitle')
self.result_text.insert(tk.END, f" • 数量: {stats['count']:.0f}\n")
self.result_text.insert(tk.END, f" • 均值: {stats['mean']:.2f}\n")
self.result_text.insert(tk.END, f" • 标准差: {stats['std']:.2f}\n")
self.result_text.insert(tk.END, f" • 最小值: {stats['min']:.2f}\n")
self.result_text.insert(tk.END, f" • 25%分位: {stats['25%']:.2f}\n")
self.result_text.insert(tk.END, f" • 中位数: {stats['50%']:.2f}\n")
self.result_text.insert(tk.END, f" • 75%分位: {stats['75%']:.2f}\n")
self.result_text.insert(tk.END, f" • 最大值: {stats['max']:.2f}\n")
self.result_text.insert(tk.END, "\n")
# 处理非数值列
if not non_numeric_cols.empty:
self.result_text.insert(tk.END, "═══ 非数值型数据统计 ═══\n\n", 'header')
for col in non_numeric_cols:
value_counts = data[col].value_counts()
unique_count = data[col].nunique()
total_count = len(data[col])
self.result_text.insert(tk.END, f"▶ {col}\n", 'subtitle')
self.result_text.insert(tk.END, f" • 总数据量: {total_count}\n")
self.result_text.insert(tk.END, f" • 唯一值数量: {unique_count}\n")
self.result_text.insert(tk.END, " • 前5项频率分布:\n")
# 显示前5个值的频率分布
for val, count in value_counts.head().items():
percentage = (count / total_count) * 100
self.result_text.insert(tk.END, f" - {val}: {count} ({percentage:.1f}%)\n")
self.result_text.insert(tk.END, "\n")
def _run_correlation_analysis(self, data):
"""相关性分析"""
numeric_data = data.select_dtypes(include=['int64', 'float64'])
if numeric_data.empty:
self.result_text.insert(tk.END, "⚠ 没有找到可以进行相关性分析的数值型数据", 'warning')
else:
result = numeric_data.corr()
self.result_text.insert(tk.END, "═══ 相关性分析结果 ═══\n\n", 'header')
# 格式化相关性矩阵
for col1 in result.columns:
self.result_text.insert(tk.END, f"▶ {col1} 的相关性:\n", 'subtitle')
for col2 in result.columns:
if col1 != col2: # 不显示自身的相关性
corr = result.loc[col1, col2]
# 添加相关性强度的描述
strength = ""
if abs(corr) > 0.7:
strength = "强"
elif abs(corr) > 0.4:
strength = "中等"
else:
strength = "弱"
self.result_text.insert(tk.END,
f" • 与 {col2}: {corr:.3f} ({strength}相关)\n")
self.result_text.insert(tk.END, "\n")
def _run_distribution_analysis(self, data):
"""数据分布分析"""
numeric_cols = data.select_dtypes(include=['int64', 'float64']).columns
if numeric_cols.empty:
self.result_text.insert(tk.END, "⚠ 没有找到可以分析的数值型数据", 'warning')
return
self.result_text.insert(tk.END, "═══ 数据分布分析 ═══\n\n", 'header')
for col in numeric_cols:
# 修改变量名,避免与参数名冲突
col_data = data[col].dropna()
# 计算分布相关指标
skewness = col_data.skew()
kurtosis = col_data.kurtosis()
# 计算分位数
quantiles = col_data.quantile([0.1, 0.25, 0.5, 0.75, 0.9])
self.result_text.insert(tk.END, f"▶ {col}\n", 'subtitle')
self.result_text.insert(tk.END, f" • 偏度: {skewness:.3f}\n")
self.result_text.insert(tk.END, f" • 峰度: {kurtosis:.3f}\n")
self.result_text.insert(tk.END, " • 分位数分布:\n")
self.result_text.insert(tk.END, f" - 10%: {quantiles[0.1]:.2f}\n")
self.result_text.insert(tk.END, f" - 25%: {quantiles[0.25]:.2f}\n")
self.result_text.insert(tk.END, f" - 50%: {quantiles[0.5]:.2f}\n")
self.result_text.insert(tk.END, f" - 75%: {quantiles[0.75]:.2f}\n")
self.result_text.insert(tk.END, f" - 90%: {quantiles[0.9]:.2f}\n\n")
def _run_time_series_analysis(self, data):
"""时间序列分析"""
# 查找日期列
date_cols = data.select_dtypes(include=['datetime64']).columns
if date_cols.empty:
self.result_text.insert(tk.END, "⚠ 没有找到日期类型的列\n", 'warning')
return
self.result_text.insert(tk.END, "═══ 时间序列分析 ═══\n\n", 'header')
for date_col in date_cols:
self.result_text.insert(tk.END, f"▶ {date_col} 时间分布\n", 'subtitle')
# 基本时间范围
time_min = data[date_col].min()
time_max = data[date_col].max()
time_range = time_max - time_min
self.result_text.insert(tk.END, f" • 时间范围: {time_range.days} 天\n")
self.result_text.insert(tk.END, f" • 起始时间: {time_min:%Y-%m-%d}\n")
self.result_text.insert(tk.END, f" • 结束时间: {time_max:%Y-%m-%d}\n\n")
# 按月份分布
monthly_counts = data[date_col].dt.month.value_counts().sort_index()
self.result_text.insert(tk.END, " • 月份分布:\n")
for month, count in monthly_counts.items():
self.result_text.insert(tk.END, f" - {month}月: {count}条记录\n")
self.result_text.insert(tk.END, "\n")
def _run_group_analysis(self, data):
"""分组统计分析"""
# 获取可能的分组列(分类数据)
category_cols = data.select_dtypes(include=['object', 'category']).columns
numeric_cols = data.select_dtypes(include=['int64', 'float64']).columns
if category_cols.empty or numeric_cols.empty:
self.result_text.insert(tk.END, "⚠ 需要同时包含分类数据和数值数据\n", 'warning')
return
self.result_text.insert(tk.END, "═══ 分组统计分析 ═══\n\n", 'header')
for cat_col in category_cols:
self.result_text.insert(tk.END, f"▶ 按 {cat_col} 分组统计\n", 'subtitle')
# 计算每个分组的基本统计量
for num_col in numeric_cols:
group_stats = data.groupby(cat_col)[num_col].agg([
'count', 'mean', 'std', 'min', 'max'
])
self.result_text.insert(tk.END, f" • {num_col} 统计:\n")
for group_name, stats in group_stats.iterrows():
self.result_text.insert(tk.END, f" - {group_name}:\n")
self.result_text.insert(tk.END, f" 数量: {stats['count']:.0f}\n")
self.result_text.insert(tk.END, f" 均值: {stats['mean']:.2f}\n")
self.result_text.insert(tk.END, f" 标准差: {stats['std']:.2f}\n")
self.result_text.insert(tk.END, f" 最小值: {stats['min']:.2f}\n")
self.result_text.insert(tk.END, f" 最大值: {stats['max']:.2f}\n")
self.result_text.insert(tk.END, "\n")
def _run_missing_value_analysis(self, data):
"""缺失值分析"""
self.result_text.insert(tk.END, "═══ 缺失值分析 ═══\n\n", 'header')
# 计算每列的缺失值
missing_stats = data.isnull().sum()
total_rows = len(data)
# 只显示有缺失值的列
missing_cols = missing_stats[missing_stats > 0]
if missing_cols.empty:
self.result_text.insert(tk.END, "✓ 数据中没有发现缺失值\n", 'subtitle')
return
self.result_text.insert(tk.END, "▶ 缺失值统计\n", 'subtitle')
for col, missing_count in missing_cols.items():
missing_percentage = (missing_count / total_rows) * 100
self.result_text.insert(tk.END, f" • {col}:\n")
self.result_text.insert(tk.END, f" - 缺失数量: {missing_count}\n")
self.result_text.insert(tk.END, f" - 缺失比例: {missing_percentage:.2f}%\n")
# 添加缺失值模式分析
self.result_text.insert(tk.END, "\n▶ 缺失值模式\n", 'subtitle')
total_missing = data.isnull().sum().sum()
self.result_text.insert(tk.END, f" • 总缺失值数量: {total_missing}\n")
self.result_text.insert(tk.END, f" • 总缺失率: {(total_missing/(total_rows*len(data.columns))):.2f}%\n")
def _run_cpk_analysis(self, data):
"""CPK分析"""
numeric_cols = data.select_dtypes(include=['int64', 'float64']).columns
if numeric_cols.empty:
self.result_text.insert(tk.END, "⚠ 没有找到可以进行CPK分析的数值型数据", 'warning')
return
# 创建输入对话框获取规格限
spec_dialog = tk.Toplevel(self.root)
spec_dialog.title("输入规格限")
spec_dialog.geometry("400x500") # 增加窗口大小
# 使对话框成为模态窗口
spec_dialog.transient(self.root)
spec_dialog.grab_set()
# 创建主框架,并添加滚动条
main_frame = ttk.Frame(spec_dialog)
main_frame.pack(fill=tk.BOTH, expand=True, padx=5, pady=5)
# 创建Canvas和滚动条
canvas = tk.Canvas(main_frame)
scrollbar = ttk.Scrollbar(main_frame, orient="vertical", command=canvas.yview)
# 创建内容框架
content_frame = ttk.Frame(canvas)
# 配置Canvas
canvas.configure(yscrollcommand=scrollbar.set)
# 创建规格限输入框
specs = {}
row = 0
# 添加标题标签
title_label = ttk.Label(content_frame, text="请输入各列的规格上下限:", font=('Arial', 10, 'bold'))
title_label.grid(row=row, column=0, columnspan=3, pady=10, padx=5, sticky='w')
row += 1
for col in numeric_cols:
# 列名标签
col_label = ttk.Label(content_frame, text=f"{col}:", font=('Arial', 9))
col_label.grid(row=row, column=0, pady=5, padx=5, sticky='w')
# USL输入框和标签
usl_frame = ttk.Frame(content_frame)
usl_frame.grid(row=row, column=1, padx=5, sticky='w')
usl_var = tk.StringVar()
ttk.Entry(usl_frame, textvariable=usl_var, width=12).pack(side=tk.LEFT, padx=2)
ttk.Label(usl_frame, text="USL").pack(side=tk.LEFT, padx=2)
row += 1
# LSL输入框和标签
lsl_frame = ttk.Frame(content_frame)
lsl_frame.grid(row=row, column=1, padx=5, sticky='w')
lsl_var = tk.StringVar()
ttk.Entry(lsl_frame, textvariable=lsl_var, width=12).pack(side=tk.LEFT, padx=2)
ttk.Label(lsl_frame, text="LSL").pack(side=tk.LEFT, padx=2)
specs[col] = {'usl': usl_var, 'lsl': lsl_var}
row += 1
# 添加分隔线
ttk.Separator(content_frame, orient='horizontal').grid(
row=row, column=0, columnspan=3, sticky='ew', pady=5)
row += 1
# 添加按钮框架
button_frame = ttk.Frame(content_frame)
button_frame.grid(row=row, column=0, columnspan=3, pady=10)
ttk.Button(button_frame, text="计算CPK", command=lambda: calculate_cpk()).pack(side=tk.LEFT, padx=5)
ttk.Button(button_frame, text="取消", command=spec_dialog.destroy).pack(side=tk.LEFT, padx=5)
# 放置Canvas和滚动条
canvas.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
# 将content_frame放入canvas
canvas_window = canvas.create_window((0, 0), window=content_frame, anchor='nw')
# 配置canvas滚动区域
def configure_scroll_region(event):
canvas.configure(scrollregion=canvas.bbox('all'))
# 配置canvas宽度
def configure_canvas_width(event):
canvas.itemconfig(canvas_window, width=event.width)
# 绑定事件
content_frame.bind('<Configure>', configure_scroll_region)
canvas.bind('<Configure>', configure_canvas_width)
# 绑定鼠标滚轮
def on_mousewheel(event):
canvas.yview_scroll(int(-1 * (event.delta / 120)), "units")
canvas.bind_all("<MouseWheel>", on_mousewheel)
def calculate_cpk():
"""计算CPK"""
try:
# 确保文本框可编辑
self.result_text.config(state='normal')
self.result_text.delete(1.0, tk.END)
for col in numeric_cols:
try:
# 获取规格限
usl = float(specs[col]['usl'].get())
lsl = float(specs[col]['lsl'].get())
# 获取数据
values = data[col].dropna()
# 计算统计量
mean = values.mean()
std = values.std()
# 计算CPU和CPL
cpu = (usl - mean) / (3 * std)
cpl = (mean - lsl) / (3 * std)
# 计算CPK
cpk = min(cpu, cpl)
# 计算过程能力评级
rating = "未知"
if cpk >= 1.67:
rating = "极佳"
elif cpk >= 1.33:
rating = "良好"
elif cpk >= 1.0:
rating = "合格"
else:
rating = "不合格"
# 显示结果
self.result_text.insert(tk.END, f"▶ {col}\n", 'subtitle')
self.result_text.insert(tk.END, f" • 均值: {mean:.3f}\n")
self.result_text.insert(tk.END, f" • 标准差: {std:.3f}\n")
self.result_text.insert(tk.END, f" • USL: {usl:.3f}\n")
self.result_text.insert(tk.END, f" • LSL: {lsl:.3f}\n")
self.result_text.insert(tk.END, f" • CPU: {cpu:.3f}\n")
self.result_text.insert(tk.END, f" • CPL: {cpl:.3f}\n")
self.result_text.insert(tk.END, f" • CPK: {cpk:.3f}\n")
self.result_text.insert(tk.END, f" • 过程能力评级: {rating}\n\n")
except ValueError:
self.result_text.insert(tk.END, f"⚠ {col}: 输入数值无效\n", 'warning')
except Exception as e:
self.result_text.insert(tk.END, f"⚠ {col}: 计算出错 - {str(e)}\n", 'error')
# 设置文本框为只读
self.result_text.config(state='disabled')
# 关闭对话框
spec_dialog.destroy()
except Exception as e:
messagebox.showerror("错误", f"计算过程出错:{str(e)}")
# 确保发生错误时也设置文本框为只读
self.result_text.config(state='disabled')
def open_file(self):
file_path = filedialog.askopenfilename(
title="选择文件",
filetypes=(("Excel files", "*.xlsx;*.xls"), ("All files", "*.*"))
)
if file_path:
try:
# 使用pandas读取Excel数据
self.current_data = pd.read_excel(file_path)
data = self.current_data
# 清除现有的Treeview数据
self.tree.delete(*self.tree.get_children())
# 设置Treeview的列和标题
self.tree["columns"] = list(data.columns)
for col in data.columns:
# 更精确的列宽计算
max_width = max(
len(str(col)) * 7, # 进一步减小系数
data[col].astype(str).str.len().max() * 7
)
width = min(max(max_width, 50), 150) # 更紧凑的列宽范围
self.tree.column(col,
anchor=tk.W,
width=width,
minwidth=40, # 更小的最小宽度
stretch=True
)
self.tree.heading(col,
text=col,
anchor=tk.W,
)
# 插入数据到Treeview
for i, (index, row) in enumerate(data.iterrows()):
tags = ('evenrow',) if i % 2 == 0 else ('oddrow',)
self.tree.insert("", "end", values=list(row), tags=tags)
# 配置更细微的交替行颜色
self.tree.tag_configure('oddrow', background='#FAFAFA') # 更浅的灰色
self.tree.tag_configure('evenrow', background='#FFFFFF') # 纯白色
# 更新状态栏
self.status_bar.config(
text=f"已加载 {len(data)} 行数据,{len(data.columns)} 列 | {file_path}"
)
# 清除之前的分析结果
self.result_text.config(state='normal')
self.result_text.delete(1.0, tk.END)
self.result_text.config(state='disabled')
self.algorithm_var.set("请选择分析方法")
except Exception as e:
messagebox.showerror("错误", f"无法读取文件: {e}")
self.status_bar.config(text="读取文件失败")
def load_config(self):
"""加载配置文件"""
config_dir = os.path.expanduser("~/.data_analysis_app")
self.config_file = os.path.join(config_dir, "config.json")
# 确保配置目录存在
if not os.path.exists(config_dir):
os.makedirs(config_dir)
if os.path.exists(self.config_file):
try:
with open(self.config_file, 'r', encoding='utf-8') as f:
return json.load(f)
except:
return {}
return {}
def save_config(self):
"""保存配置到文件"""
config = {
"window_width": self.root.winfo_width(),
"window_height": self.root.winfo_height(),
"paned_position": self.paned_window.sashpos(0)
}
try:
with open(self.config_file, 'w', encoding='utf-8') as f:
json.dump(config, f, indent=4)
except Exception as e:
print(f"保存配置失败: {e}")
def on_sash_moved(self, event):
"""分隔条移动后的处理"""
self.save_config()
def on_closing(self):
"""窗口关闭时的处理"""
self.save_config()
self.root.destroy()
def open_formula_window(self):
"""打开公式编辑窗口"""
formula_window = tk.Toplevel(self.root)
formula_window.title("自定义公式")
formula_window.geometry("600x400")
# 使窗口居中
window_width = 600
window_height = 400
screen_width = formula_window.winfo_screenwidth()
screen_height = formula_window.winfo_screenheight()
x = int((screen_width - window_width) / 2)
y = int((screen_height - window_height) / 2)
formula_window.geometry(f"{window_width}x{window_height}+{x}+{y}")
# 创建主框架
main_frame = ttk.Frame(formula_window, padding="10")
main_frame.pack(fill=tk.BOTH, expand=True)
# 创建说明标签
ttk.Label(main_frame, text="在这里输入您的自定义公式:", font=('Arial', 10)).pack(anchor=tk.W)
# 创建公式名称输入框
name_frame = ttk.Frame(main_frame)
name_frame.pack(fill=tk.X, pady=(10,5))
ttk.Label(name_frame, text="公式名称:").pack(side=tk.LEFT)
formula_name = ttk.Entry(name_frame)
formula_name.pack(side=tk.LEFT, fill=tk.X, expand=True)
# 创建公式输入区域
formula_frame = ttk.LabelFrame(main_frame, text="公式内容", padding="5")
formula_frame.pack(fill=tk.BOTH, expand=True, pady=(5,10))
# 创建文本编辑器和滚动条的容器
text_container = ttk.Frame(formula_frame)
text_container.pack(fill=tk.BOTH, expand=True)
# 创建文本编辑器
formula_text = tk.Text(text_container, wrap=tk.WORD, font=('Consolas', 11))
# 创建垂直滚动条
v_scrollbar = ttk.Scrollbar(text_container, orient=tk.VERTICAL, command=formula_text.yview)
v_scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
# 创建水平滚动条
h_scrollbar = ttk.Scrollbar(formula_frame, orient=tk.HORIZONTAL, command=formula_text.xview)
h_scrollbar.pack(side=tk.BOTTOM, fill=tk.X)
# 配置文本框的滚动
formula_text.configure(yscrollcommand=v_scrollbar.set, xscrollcommand=h_scrollbar.set)
formula_text.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
# 添加示例文本
example_text = """# 示例公式:
# 可以使用 Python 语法编写公式
# 数据可通过 data 变量访问
def calculate(data):
# 示例:计算某列的平均值
result = data['列名'].mean()
return result
# 更多示例:
# 1. 计算多列的平均值
# result = data[['列1', '列2', '列3']].mean()
# 2. 条件筛选
# result = data[data['列名'] > 100].mean()
# 3. 自定义计算
# result = (data['列1'] + data['列2']) / 2
# 4. 分组统计
# result = data.groupby('分组列')['值列'].mean()
# 5. 数据转换
# result = data['列名'].apply(lambda x: x * 2)
"""
formula_text.insert('1.0', example_text)
# 创建按钮框架
button_frame = ttk.Frame(main_frame)
button_frame.pack(fill=tk.X, pady=(0,5))
def save_formula():
"""保存公式"""
name = formula_name.get().strip()
formula = formula_text.get('1.0', tk.END).strip()
if not name:
messagebox.showwarning("警告", "请输入公式名称")
return
if not formula:
messagebox.showwarning("警告", "请输入公式内容")
return
try:
# 保存公式到文件
formulas_file = "custom_formulas.json"
formulas = {}
# 读取现有公式
if os.path.exists(formulas_file):
with open(formulas_file, 'r', encoding='utf-8') as f:
formulas = json.load(f)
# 添加或更新公式
formulas[name] = formula
# 保存到文件
with open(formulas_file, 'w', encoding='utf-8') as f:
json.dump(formulas, f, indent=4, ensure_ascii=False)
messagebox.showinfo("成功", "公式保存成功!")
formula_window.destroy()
except Exception as e:
messagebox.showerror("错误", f"保存公式失败:{str(e)}")
def test_formula():
"""测试公式"""
if self.current_data is None:
messagebox.showwarning("警告", "请先加载数据")
return
formula = formula_text.get('1.0', tk.END).strip()
if not formula:
messagebox.showwarning("警告", "请输入公式内容")
return
try:
# 创建一个本地命名空间
local_dict = {}
# 执行公式代码
exec(formula, globals(), local_dict)
if 'calculate' not in local_dict:
raise ValueError("未找到 calculate 函数")
# 执行计算
result = local_dict['calculate'](self.current_data)
# 显示结果
messagebox.showinfo("测试结果", f"计算结果:{result}")
except Exception as e:
messagebox.showerror("错误", f"公式测试失败:{str(e)}")
# 添加按钮
ttk.Button(button_frame, text="测试公式", command=test_formula).pack(side=tk.LEFT, padx=5)
ttk.Button(button_frame, text="保存公式", command=save_formula).pack(side=tk.LEFT, padx=5)
ttk.Button(button_frame, text="取消", command=formula_window.destroy).pack(side=tk.RIGHT, padx=5)
def get_resource_path(self, relative_path):
"""获取资源文件的绝对路径"""
try:
# PyInstaller创建临时文件夹,将路径存储在_MEIPASS中
base_path = sys._MEIPASS
except Exception:
base_path = os.path.abspath(".")
return os.path.join(base_path, relative_path)
if __name__ == "__main__":
root = tk.Tk()
app = DataAnalysisApp(root)
root.mainloop()