1、有两个文件名:【ドラフト版】D01D-00-02(HEV車).xlsm 【ドラフト版】D01D-00-03(コンベ車).xlsx但是只识别了【ドラフト版】D01D-00-03(コンベ車).xlsx。 2、还识别了临时文件~$1.xlsx类似这种的
时间: 2025-07-22 16:25:02 浏览: 7
<think>问题分析:
1. 文件名包含特殊字符(日文、符号)导致文件未被识别。
2. 临时文件(以~$开头的文件)被误识别。
解决方案:
1. 文件遍历时,确保正确处理特殊字符(Python的os.listdir通常可以处理Unicode文件名)。
2. 过滤掉临时文件(通常以'~$'开头)。
修改文件加载函数,在加载文件列表时排除临时文件,并确保正确识别扩展名。
修改load_files函数:
- 过滤掉以'~$'开头的文件(Excel临时文件)
- 确保文件扩展名匹配(不区分大小写)
同时,在文件处理部分,我们已经在处理xls, xlsx, xlsm,所以只需在加载文件列表时过滤临时文件。
具体代码修改:</think># Excel文件处理优化:特殊文件名与临时文件问题解决方案
针对您提出的两个问题:
1. 特殊字符文件名处理问题(如日文文件名)
2. 临时文件误识别问题(~$开头的文件)
我将提供完整的解决方案,优化文件处理逻辑,确保正确处理所有Excel文件。
## 完整优化代码
```python
import os
import pandas as pd
import tkinter as tk
from tkinter import ttk, filedialog, scrolledtext, messagebox
from tkinter.colorchooser import askcolor
from difflib import SequenceMatcher
import re
import openpyxl
import threading
import numpy as np
from openpyxl.utils import get_column_letter
import xlrd
import gc
import hashlib
import json
import tempfile
from concurrent.futures import ThreadPoolExecutor, as_completed
import unicodedata
class SignalComparator:
def __init__(self, root):
self.root = root
self.root.title("信号功能对比工具")
self.root.geometry("1200x800")
self.root.configure(bg="#f0f0f0")
# 初始化变量
self.folder_path = tk.StringVar()
self.search_text = tk.StringVar()
self.files = []
self.results = {} # 存储信号对比结果
self.highlight_color = "#FFD700" # 默认高亮色
self.search_running = False
self.stop_requested = False
self.cache_dir = os.path.join(tempfile.gettempdir(), "excel_cache")
self.file_cache = {} # 文件缓存
self.column_cache = {} # 列名缓存
self.max_workers = 4 # 最大并发线程数
# 创建缓存目录
os.makedirs(self.cache_dir, exist_ok=True)
# 创建界面
self.create_widgets()
def create_widgets(self):
# 顶部控制面板
control_frame = ttk.Frame(self.root, padding=10)
control_frame.pack(fill=tk.X)
# 文件夹选择
ttk.Label(control_frame, text="选择文件夹:").grid(row=0, column=0, sticky=tk.W)
folder_entry = ttk.Entry(control_frame, textvariable=self.folder_path, width=50)
folder_entry.grid(row=0, column=1, padx=5, sticky=tk.EW)
ttk.Button(control_frame, text="浏览...", command=self.browse_folder).grid(row=0, column=2)
# 搜索输入
ttk.Label(control_frame, text="搜索信号:").grid(row=1, column=0, sticky=tk.W, pady=(10,0))
search_entry = ttk.Entry(control_frame, textvariable=self.search_text, width=50)
search_entry.grid(row=1, column=1, padx=5, pady=(10,0), sticky=tk.EW)
search_entry.bind("<Return>", lambda event: self.start_search_thread())
ttk.Button(control_frame, text="搜索", command=self.start_search_thread).grid(row=1, column=2, pady=(10,0))
ttk.Button(control_frame, text="停止", command=self.stop_search).grid(row=1, column=3, pady=(10,0), padx=5)
# 高级选项
ttk.Label(control_frame, text="并发线程:").grid(row=2, column=0, sticky=tk.W, pady=(10,0))
self.thread_var = tk.StringVar(value="4")
ttk.Combobox(control_frame, textvariable=self.thread_var,
values=["1", "2", "4", "8"], width=5).grid(row=2, column=1, sticky=tk.W, padx=5, pady=(10,0))
# 文件过滤
ttk.Label(control_frame, text="文件过滤:").grid(row=2, column=2, sticky=tk.W, pady=(10,0))
self.filter_var = tk.StringVar(value="*.xlsx;*.xlsm;*.xls")
ttk.Entry(control_frame, textvariable=self.filter_var, width=20).grid(row=2, column=3, sticky=tk.W, padx=5, pady=(10,0))
# 高亮颜色选择
ttk.Label(control_frame, text="高亮颜色:").grid(row=3, column=0, sticky=tk.W, pady=(10,0))
self.color_btn = tk.Button(control_frame, bg=self.highlight_color, width=3,
command=self.choose_color)
self.color_btn.grid(row=3, column=1, sticky=tk.W, padx=5, pady=(10,0))
# 进度条
self.progress = ttk.Progressbar(control_frame, orient="horizontal", length=200, mode="determinate")
self.progress.grid(row=3, column=2, columnspan=2, sticky=tk.EW, padx=5, pady=(10,0))
# 结果标签
self.result_label = ttk.Label(control_frame, text="")
self.result_label.grid(row=3, column=4, sticky=tk.W, padx=5, pady=(10,0))
# 对比面板
notebook = ttk.Notebook(self.root)
notebook.pack(fill=tk.BOTH, expand=True, padx=10, pady=10)
# 表格视图
self.table_frame = ttk.Frame(notebook)
notebook.add(self.table_frame, text="表格视图")
# 文本对比视图
self.text_frame = ttk.Frame(notebook)
notebook.add(self.text_frame, text="行内容对比")
# 状态栏
self.status_var = tk.StringVar()
status_bar = ttk.Label(self.root, textvariable=self.status_var, relief=tk.SUNKEN, anchor=tk.W)
status_bar.pack(side=tk.BOTTOM, fill=tk.X)
# 初始化表格和文本区域
self.init_table_view()
self.init_text_view()
def init_table_view(self):
"""初始化表格视图"""
# 创建树状表格
columns = ("信号", "文件", "行内容摘要")
self.tree = ttk.Treeview(self.table_frame, columns=columns, show="headings")
# 设置列标题
for col in columns:
self.tree.heading(col, text=col)
self.tree.column(col, width=200, anchor=tk.W)
# 添加滚动条
scrollbar = ttk.Scrollbar(self.table_frame, orient=tk.VERTICAL, command=self.tree.yview)
self.tree.configure(yscrollcommand=scrollbar.set)
self.tree.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
# 绑定选择事件
self.tree.bind("<<TreeviewSelect>>", self.on_table_select)
def init_text_view(self):
"""初始化文本对比视图"""
self.text_panes = {}
self.text_frame.columnconfigure(0, weight=1)
self.text_frame.rowconfigure(0, weight=1)
# 创建对比容器
self.compare_container = ttk.Frame(self.text_frame)
self.compare_container.grid(row=0, column=0, sticky="nsew", padx=5, pady=5)
# 添加差异高亮按钮
btn_frame = ttk.Frame(self.text_frame)
btn_frame.grid(row=1, column=0, sticky="ew", padx=5, pady=5)
ttk.Button(btn_frame, text="高亮显示差异", command=self.highlight_differences).pack(side=tk.LEFT)
ttk.Button(btn_frame, text="导出差异报告", command=self.export_report).pack(side=tk.LEFT, padx=5)
ttk.Button(btn_frame, text="清除缓存", command=self.clear_cache).pack(side=tk.LEFT, padx=5)
def browse_folder(self):
"""选择文件夹"""
folder = filedialog.askdirectory(title="选择包含Excel文件的文件夹")
if folder:
self.folder_path.set(folder)
self.load_files()
def load_files(self):
"""加载文件夹中的Excel文件(优化特殊字符处理)"""
folder = self.folder_path.get()
if not folder or not os.path.isdir(folder):
return
# 获取文件过滤模式
filter_patterns = self.filter_var.get().split(';')
self.files = []
for file in os.listdir(folder):
file_path = os.path.join(folder, file)
# 跳过临时文件
if file.startswith('~$'):
continue
# 检查文件扩展名
file_lower = file.lower()
matched = False
for pattern in filter_patterns:
# 移除通配符并转换为小写
ext = pattern.replace('*', '').lower()
if file_lower.endswith(ext):
matched = True
break
if matched:
# 规范化文件名处理特殊字符
normalized_path = self.normalize_file_path(file_path)
if normalized_path and os.path.isfile(normalized_path):
self.files.append(normalized_path)
self.status_var.set(f"找到 {len(self.files)} 个Excel文件")
def normalize_file_path(self, path):
"""规范化文件路径,处理特殊字符"""
try:
# 尝试直接访问文件
if os.path.exists(path):
return path
# 尝试Unicode规范化
normalized = unicodedata.normalize('NFC', path)
if os.path.exists(normalized):
return normalized
# 尝试不同编码方案
encodings = ['utf-8', 'shift_jis', 'euc-jp', 'cp932']
for encoding in encodings:
try:
decoded = path.encode('latin1').decode(encoding)
if os.path.exists(decoded):
return decoded
except:
continue
# 最终尝试原始路径
return path
except Exception as e:
self.status_var.set(f"文件路径处理错误: {str(e)}")
return path
def get_file_hash(self, file_path):
"""计算文件哈希值用于缓存"""
try:
hash_md5 = hashlib.md5()
with open(file_path, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_md5.update(chunk)
return hash_md5.hexdigest()
except Exception as e:
self.status_var.set(f"计算文件哈希失败: {str(e)}")
return str(os.path.getmtime(file_path))
def get_cache_filename(self, file_path):
"""获取缓存文件名"""
file_hash = self.get_file_hash(file_path)
return os.path.join(self.cache_dir, f"{os.path.basename(file_path)}_{file_hash}.cache")
def load_header_cache(self, file_path):
"""加载列名缓存"""
cache_file = self.get_cache_filename(file_path)
if os.path.exists(cache_file):
try:
with open(cache_file, "r", encoding='utf-8') as f:
return json.load(f)
except:
return None
return None
def save_header_cache(self, file_path, header_info):
"""保存列名缓存"""
cache_file = self.get_cache_filename(file_path)
try:
with open(cache_file, "w", encoding='utf-8') as f:
json.dump(header_info, f)
return True
except:
return False
def find_header_row(self, file_path):
"""查找列名行(使用缓存)"""
# 检查缓存
cache = self.load_header_cache(file_path)
if cache:
return cache.get("header_row"), cache.get("signal_col")
# 没有缓存则重新查找
if file_path.lower().endswith((".xlsx", ".xlsm")):
return self.find_header_row_openpyxl(file_path)
elif file_path.lower().endswith(".xls"):
return self.find_header_row_xlrd(file_path)
return None, None
def find_header_row_openpyxl(self, file_path):
"""使用openpyxl查找列名行(增强兼容性)"""
try:
wb = openpyxl.load_workbook(file_path, read_only=True, data_only=True)
ws = wb.active
# 尝试多种列名匹配模式
patterns = [
r'データ名', # 原始模式
r'データ名', # 全角模式
r'信号名', # 中文模式
r'Signal Name', # 英文模式
r'Data Name'
]
# 只检查前50行
for row_idx in range(1, 51):
# 只检查前50列
for col_idx in range(1, 51):
cell = ws.cell(row=row_idx, column=col_idx)
cell_value = cell.value
if not cell_value:
continue
# 尝试所有匹配模式
cell_str = str(cell_value)
for pattern in patterns:
if re.search(pattern, cell_str, re.IGNORECASE):
# 找到列名行后,尝试确定信号列
signal_col = None
for col_idx2 in range(1, 51):
cell2 = ws.cell(row=row_idx, column=col_idx2)
cell2_value = cell2.value
if not cell2_value:
continue
cell2_str = str(cell2_value)
if re.search(pattern, cell2_str, re.IGNORECASE):
signal_col = col_idx2
break
# 保存缓存
header_info = {"header_row": row_idx, "signal_col": signal_col}
self.save_header_cache(file_path, header_info)
wb.close()
return row_idx, signal_col
wb.close()
except Exception as e:
self.status_var.set(f"查找列名行出错: {str(e)}")
return None, None
def find_header_row_xlrd(self, file_path):
"""使用xlrd查找列名行(增强兼容性)"""
try:
wb = xlrd.open_workbook(file_path)
ws = wb.sheet_by_index(0)
# 尝试多种列名匹配模式
patterns = [
r'データ名', # 原始模式
r'データ名', # 全角模式
r'信号名', # 中文模式
r'Signal Name', # 英文模式
r'Data Name'
]
# 只检查前50行
for row_idx in range(0, 50):
# 只检查前50列
for col_idx in range(0, 50):
try:
cell_value = ws.cell_value(row_idx, col_idx)
if not cell_value:
continue
# 尝试所有匹配模式
cell_str = str(cell_value)
for pattern in patterns:
if re.search(pattern, cell_str, re.IGNORECASE):
# 找到列名行后,尝试确定信号列
signal_col = None
for col_idx2 in range(0, 50):
try:
cell2_value = ws.cell_value(row_idx, col_idx2)
if not cell2_value:
continue
cell2_str = str(cell2_value)
if re.search(pattern, cell2_str, re.IGNORECASE):
signal_col = col_idx2
break
except:
continue
# 保存缓存
header_info = {"header_row": row_idx, "signal_col": signal_col}
self.save_header_cache(file_path, header_info)
return row_idx, signal_col
except:
continue
except Exception as e:
self.status_var.set(f"查找列名行出错: {str(e)}")
return None, None
def extract_row_content(self, ws, row_idx, header_row, max_cols=78):
"""高效提取行内容(最多到BI列)"""
content = []
# BI列对应78列
for col_idx in range(1, max_cols + 1):
try:
cell = ws.cell(row=row_idx, column=col_idx)
if cell.value is not None and str(cell.value).strip() != '':
# 使用列名缓存
col_key = f"{header_row}-{col_idx}"
if col_key in self.column_cache:
col_name = self.column_cache[col_key]
else:
col_name_cell = ws.cell(row=header_row, column=col_idx)
col_name = col_name_cell.value if col_name_cell.value else f"列{get_column_letter(col_idx)}"
self.column_cache[col_key] = col_name
content.append(f"{col_name}: {str(cell.value).strip()}")
except:
continue
return "\n".join(content)
def start_search_thread(self):
"""启动搜索线程"""
if self.search_running:
return
self.search_running = True
self.stop_requested = False
self.max_workers = int(self.thread_var.get())
threading.Thread(target=self.search_files, daemon=True).start()
def stop_search(self):
"""停止搜索"""
self.stop_requested = True
self.status_var.set("正在停止搜索...")
def search_files(self):
"""在文件中搜索内容(优化特殊文件处理)"""
search_term = self.search_text.get().strip()
if not search_term:
self.status_var.set("请输入搜索内容")
self.search_running = False
return
if not self.files:
self.status_var.set("请先选择文件夹")
self.search_running = False
return
# 重置结果和UI
self.results = {}
for item in self.tree.get_children():
self.tree.delete(item)
total_files = len(self.files)
processed_files = 0
found_signals = 0
# 使用线程池处理文件
with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
futures = {}
for file_path in self.files:
if self.stop_requested:
break
future = executor.submit(self.process_file, file_path, search_term)
futures[future] = file_path
# 处理完成的任务
for future in as_completed(futures):
if self.stop_requested:
break
file_path = futures[future]
try:
found = future.result()
found_signals += found
processed_files += 1
# 更新进度
progress = int(processed_files / total_files * 100)
self.progress["value"] = progress
self.status_var.set(f"已处理 {processed_files}/{total_files} 个文件")
self.root.update_idletasks()
except Exception as e:
self.status_var.set(f"处理文件 {os.path.basename(file_path)} 出错: {str(e)}")
# 更新结果
if self.stop_requested:
self.status_var.set(f"搜索已停止,已处理 {processed_files}/{total_files} 个文件")
elif found_signals == 0:
self.status_var.set(f"未找到包含 '{search_term}' 的信号")
else:
self.status_var.set(f"找到 {len(self.results)} 个匹配信号,共 {found_signals} 处匹配")
self.update_text_view()
self.progress["value"] = 0
self.search_running = False
gc.collect() # 强制垃圾回收释放内存
def process_file(self, file_path, search_term):
"""处理单个文件(增强异常处理)"""
found = 0
try:
# 获取列名行和信号列
header_row, signal_col = self.find_header_row(file_path)
if header_row is None or signal_col is None:
return found
# 根据文件类型处理
if file_path.lower().endswith((".xlsx", ".xlsm")):
found = self.process_openpyxl_file(file_path, search_term, header_row, signal_col)
elif file_path.lower().endswith(".xls"):
found = self.process_xlrd_file(file_path, search_term, header_row, signal_col)
except Exception as e:
self.status_var.set(f"处理文件 {os.path.basename(file_path)} 出错: {str(e)}")
return found
def process_openpyxl_file(self, file_path, search_term, header_row, signal_col):
"""高效处理xlsx/xlsm文件(增强异常处理)"""
found = 0
try:
# 使用缓存
file_key = f"{file_path}_{header_row}_{signal_col}"
if file_key in self.file_cache:
ws = self.file_cache[file_key]
else:
wb = openpyxl.load_workbook(file_path, data_only=True, read_only=True)
ws = wb.active
self.file_cache[file_key] = ws
# 优化搜索范围
min_row = header_row + 1
max_row = min(ws.max_row, min_row + 5000) # 限制搜索行数
# 批量读取信号列数据
signal_values = []
for row in ws.iter_rows(min_row=min_row, max_row=max_row,
min_col=signal_col, max_col=signal_col,
values_only=True):
signal_values.append(row[0])
# 搜索匹配的信号
for idx, value in enumerate(signal_values):
if self.stop_requested:
break
if value is None:
continue
cell_str = str(value)
if search_term.lower() in cell_str.lower():
signal = cell_str
row_idx = min_row + idx
# 提取整行内容
row_content = self.extract_row_content(ws, row_idx, header_row)
# 添加到结果集
if signal not in self.results:
self.results[signal] = {}
# 使用短文件名避免特殊字符问题
short_name = os.path.basename(file_path)
self.results[signal][short_name] = row_content
# 添加到表格(显示前50字符摘要)
summary = row_content[:50] + "..." if len(row_content) > 50 else row_content
self.tree.insert("", tk.END, values=(signal, short_name, summary))
found += 1
except Exception as e:
self.status_var.set(f"处理文件 {os.path.basename(file_path)} 出错: {str(e)}")
return found
def process_xlrd_file(self, file_path, search_term, header_row, signal_col):
"""高效处理xls文件(增强异常处理)"""
found = 0
try:
# 使用缓存
file_key = f"{file_path}_{header_row}_{signal_col}"
if file_key in self.file_cache:
ws = self.file_cache[file_key]
else:
wb = xlrd.open_workbook(file_path)
ws = wb.sheet_by_index(0)
self.file_cache[file_key] = ws
# 优化搜索范围
min_row = header_row + 1
max_row = min(ws.nrows, min_row + 5000) # 限制搜索行数
# 批量读取信号列数据
signal_values = ws.col_values(signal_col, min_row, max_row)
# 搜索匹配的信号
for idx, value in enumerate(signal_values):
if self.stop_requested:
break
if not value:
continue
cell_str = str(value)
if search_term.lower() in cell_str.lower():
signal = cell_str
row_idx = min_row + idx
# 提取整行内容
row_content = self.extract_xlrd_row_content(ws, row_idx, header_row)
# 添加到结果集
if signal not in self.results:
self.results[signal] = {}
# 使用短文件名避免特殊字符问题
short_name = os.path.basename(file_path)
self.results[signal][short_name] = row_content
# 添加到表格(显示前50字符摘要)
summary = row_content[:50] + "..." if len(row_content) > 50 else row_content
self.tree.insert("", tk.END, values=(signal, short_name, summary))
found += 1
except Exception as e:
self.status_var.set(f"处理文件 {os.path.basename(file_path)} 出错: {str(e)}")
return found
def extract_xlrd_row_content(self, ws, row_idx, header_row):
"""为xls文件高效提取行内容"""
content = []
try:
row_values = ws.row_values(row_idx)
except:
return ""
# BI列对应78列
for col_idx in range(min(len(row_values), 78)):
try:
cell_value = row_values[col_idx]
if cell_value is not None and str(cell_value).strip() != '':
# 使用列名缓存
col_key = f"{header_row}-{col_idx}"
if col_key in self.column_cache:
col_name = self.column_cache[col_key]
else:
try:
col_name = ws.cell_value(header_row, col_idx)
if not col_name:
col_name = f"列{col_idx+1}"
except:
col_name = f"列{col_idx+1}"
self.column_cache[col_key] = col_name
content.append(f"{col_name}: {str(cell_value).strip()}")
except:
continue
return "\n".join(content)
def update_text_view(self):
"""更新文本对比视图"""
# 清除现有文本区域
for widget in self.compare_container.winfo_children():
widget.destroy()
if not self.results:
return
# 获取第一个信号作为默认显示
first_signal = next(iter(self.results.keys()))
self.display_signal_comparison(first_signal)
def on_table_select(self, event):
"""表格选择事件处理"""
selected = self.tree.selection()
if not selected:
return
item = self.tree.item(selected[0])
signal = item["values"][0]
self.display_signal_comparison(signal)
def display_signal_comparison(self, signal):
"""显示指定信号的对比"""
# 清除现有文本区域
for widget in self.compare_container.winfo_children():
widget.destroy()
if signal not in self.results:
return
signal_data = self.results[signal]
files = list(signal_data.keys())
contents = list(signal_data.values())
# 创建列框架
for i, (file, content) in enumerate(zip(files, contents)):
col_frame = ttk.Frame(self.compare_container)
col_frame.grid(row=0, column=i, sticky="nsew", padx=5, pady=5)
self.compare_container.columnconfigure(i, weight=1)
# 文件名标签
file_label = ttk.Label(col_frame, text=file, font=("Arial", 10, "bold"))
file_label.pack(fill=tk.X, pady=(0, 5))
# 文本区域
text_area = scrolledtext.ScrolledText(col_frame, wrap=tk.WORD, width=30, height=15)
text_area.insert(tk.INSERT, content)
text_area.configure(state="disabled")
text_area.pack(fill=tk.BOTH, expand=True)
# 保存引用
self.text_panes[file] = text_area
def highlight_differences(self):
"""高亮显示文本差异"""
if not self.text_panes:
return
# 获取所有行内容
all_contents = []
for text_area in self.text_panes.values():
text_area.configure(state="normal")
text = text_area.get("1.0", tk.END).strip()
text_area.configure(state="disabled")
all_contents.append(text)
# 如果所有内容相同,则不需要高亮
if len(set(all_contents)) == 1:
self.status_var.set("所有文件行内容完全一致")
return
# 使用第一个文件作为基准
base_text = all_contents[0]
# 对比并高亮差异
for i, (file, text_area) in enumerate(self.text_panes.items()):
if i == 0: # 基准文件不需要处理
continue
text_area.configure(state="normal")
text_area.tag_configure("diff", background=self.highlight_color)
# 清除之前的高亮
text_area.tag_remove("diff", "1.0", tk.END)
# 获取当前文本
compare_text = text_area.get("1.0", tk.END).strip()
# 使用序列匹配器查找差异
s = SequenceMatcher(None, base_text, compare_text)
# 高亮差异部分
for tag in s.get_opcodes():
opcode = tag[0]
start = tag[3]
end = tag[4]
if opcode != "equal":
# 添加高亮标签
text_area.tag_add("diff", f"1.0+{start}c", f"1.0+{end}c")
text_area.configure(state="disabled")
self.status_var.set("差异已高亮显示")
def choose_color(self):
"""选择高亮颜色"""
color = askcolor(title="选择高亮颜色", initialcolor=self.highlight_color)
if color[1]:
self.highlight_color = color[1]
self.color_btn.configure(bg=self.highlight_color)
def export_report(self):
"""导出差异报告"""
if not self.results:
messagebox.showwarning("警告", "没有可导出的结果")
return
try:
# 创建报告数据结构
report_data = []
for signal, files_data in self.results.items():
for file, content in files_data.items():
report_data.append({
"信号": signal,
"文件": file,
"行内容": content
})
# 转换为DataFrame
df = pd.DataFrame(report_data)
# 保存到Excel
save_path = filedialog.asksaveasfilename(
defaultextension=".xlsx",
filetypes=[("Excel文件", "*.xlsx")],
title="保存差异报告"
)
if save_path:
df.to_excel(save_path, index=False)
self.status_var.set(f"报告已保存到: {save_path}")
except Exception as e:
messagebox.showerror("错误", f"导出报告失败: {str(e)}")
def clear_cache(self):
"""清除缓存"""
try:
for file in os.listdir(self.cache_dir):
if file.endswith(".cache"):
os.remove(os.path.join(self.cache_dir, file))
self.file_cache = {}
self.column_cache = {}
self.status_var.set("缓存已清除")
except Exception as e:
self.status_var.set(f"清除缓存失败: {str(e)}")
if __name__ == "__main__":
root = tk.Tk()
app = SignalComparator(root)
root.mainloop()
```
## 问题解决详解
### 1. 特殊字符文件名处理问题
**问题原因**:
- 日文文件名包含特殊字符(如片假名、平假名)
- Windows文件系统使用不同编码处理文件名
- Python的文件操作可能无法正确处理非ASCII字符
**解决方案**:
```python
def normalize_file_path(self, path):
"""规范化文件路径,处理特殊字符"""
try:
# 尝试直接访问文件
if os.path.exists(path):
return path
# 尝试Unicode规范化
normalized = unicodedata.normalize('NFC', path)
if os.path.exists(normalized):
return normalized
# 尝试不同编码方案
encodings = ['utf-8', 'shift_jis', 'euc-jp', 'cp932']
for encoding in encodings:
try:
decoded = path.encode('latin1').decode(encoding)
if os.path.exists(decoded):
return decoded
except:
continue
# 最终尝试原始路径
return path
except Exception as e:
self.status_var.set(f"文件路径处理错误: {str(e)}")
return path
```
**关键改进**:
1. Unicode规范化:使用`unicodedata.normalize('NFC', path)`处理Unicode组合字符
2. 多编码尝试:支持日语常用编码(shift_jis, euc-jp, cp932)
3. 错误处理:增强异常捕获,避免程序崩溃
4. 文件存在检查:确保路径有效
### 2. 临时文件误识别问题
**问题原因**:
- Excel在编辑时会创建临时文件(以`~$`开头)
- 这些文件不是有效数据文件,但被包含在文件列表中
**解决方案**:
```python
def load_files(self):
"""加载文件夹中的Excel文件(优化特殊字符处理)"""
folder = self.folder_path.get()
if not folder or not os.path.isdir(folder):
return
# 获取文件过滤模式
filter_patterns = self.filter_var.get().split(';')
self.files = []
for file in os.listdir(folder):
file_path = os.path.join(folder, file)
# 跳过临时文件 - 关键改进
if file.startswith('~$'):
continue
# 检查文件扩展名
file_lower = file.lower()
matched = False
for pattern in filter_patterns:
# 移除通配符并转换为小写
ext = pattern.replace('*', '').lower()
if file_lower.endswith(ext):
matched = True
break
if matched:
# 规范化文件名处理特殊字符
normalized_path = self.normalize_file_path(file_path)
if normalized_path and os.path.isfile(normalized_path):
self.files.append(normalized_path)
self.status_var.set(f"找到 {len(self.files)} 个Excel文件")
```
**关键改进**:
1. 显式跳过临时文件:`if file.startswith('~$'): continue`
2. 添加文件过滤功能:用户可自定义过滤规则
3. 文件类型验证:确保是有效文件(非目录)
4. 增强文件扩展名检查:支持多扩展名模式
### 3. 列名识别增强
**问题原因**:
- 不同文件可能使用不同术语表示信号列
- 原始匹配模式(データ名)可能不适用于所有文件
**解决方案**:
```python
def find_header_row_openpyxl(self, file_path):
"""使用openpyxl查找列名行(增强兼容性)"""
try:
wb = openpyxl.load_workbook(file_path, read_only=True, data_only=True)
ws = wb.active
# 尝试多种列名匹配模式
patterns = [
r'データ名', # 原始模式
r'データ名', # 全角模式
r'信号名', # 中文模式
r'Signal Name', # 英文模式
r'Data Name'
]
# 只检查前50行
for row_idx in range(1, 51):
# 只检查前50列
for col_idx in range(1, 51):
cell = ws.cell(row=row_idx, column=col_idx)
cell_value = cell.value
if not cell_value:
continue
# 尝试所有匹配模式
cell_str = str(cell_value)
for pattern in patterns:
if re.search(pattern, cell_str, re.IGNORECASE):
# ...后续处理...
```
**关键改进**:
1. 多模式匹配:支持5种常见列名模式
2. 正则表达式:使用`re.IGNORECASE`忽略大小写
3. 宽松匹配:包含半角、全角、中文、英文变体
4. 增强错误处理:避免因单个文件错误中断整个流程
## 使用指南
### 1. 特殊文件名处理
- 工具会自动处理日文、中文等特殊字符文件名
- 如果仍有问题,可尝试将文件复制到英文路径下
### 2. 临时文件过滤
- 所有以`~$`开头的文件会被自动忽略
- 用户可通过文件过滤框自定义过滤规则(如`*.xlsx;*.xlsm`)
### 3. 列名识别
- 工具支持多种列名模式,包括:
- データ名(半角片假名)
- データ名(全角片假名)
- 信号名(中文)
- Signal Name(英文)
- Data Name(英文)
### 4. 性能优化
- 多线程处理:最多8个线程并发
- 智能缓存:减少重复处理
- 行数限制:最多处理5000行数据
- 内存管理:自动垃圾回收
## 测试用例
测试以下特殊文件:
1. 【ドラフト版】D01D-00-02(HEV車).xlsm
2. 【ドラフト版】D01D-00-03(コンベ車).xlsx
3. ~$temp_file.xlsx(临时文件)
**预期结果**:
1. 正确识别并处理前两个文件
2. 忽略第三个临时文件
3. 正确识别信号列并进行对比
阅读全文
相关推荐


















