增强型本地文件搜索工具:Find Any File

FindAnyFile是一款适用于Mac的增强型文件搜索应用,能通过文件名、日期、大小和类型来搜索,包括Spotlight找不到的隐藏文件。它不使用数据库,直接搜索磁盘,提供分层结果视图,尤其适合找大文件、最近更改的文件或部分记得名称的文件。尽管它不搜索文件内容,但速度较快,是Spotlight的有效补充。

Find Any File是mac上一款增强型本地文件搜索工具,可以让你在本地磁盘上搜索、查找任何文件,包括本地磁盘的名称、 创建或修改日期、 大小或类型和创建者代码等。小编现为大家提供最新Find Any File Mac破解版,欢迎需要的朋友下载使用。

 

Find Any File通过名称、创建或修改日期,大小或类型和创建者代码(而不是内容)在本地磁盘上搜索文件。

Spotlight不满意,因为它没有找到你知道在那里的文件?使用Find Any File查找磁盘上的每个文件,包括通常隐藏的文件。通过文件名,日期,大小(不是由内容,但!)

功能特点

•恢复名称,你部分记得的文件?

•查看过去5分钟内哪些文件发生了变化?

•查找磁盘上的所有大文件?

•卸载将文件保留在Spotlight不显示的隐藏位置的软件?

Find Any File是完成这些任务的理想工具。

您甚至可以搜索未由Spotlight编入索引的磁盘,包括服务器卷。

Find Any File都可以找到Spotlight没有的文件,例如捆绑包和包内的文件,以及通常从Spotlight搜索中排除的系统文件夹。

与Spotlight相反,它不使用数据库,而是直接在磁盘上搜索数据。这使您可以搜索文件属性,如名称,创建和修改日期,文件大小,甚至文件内的纯文本。

另一个有用的功能是其分层结果视图。它可以让您查看其各自文件夹中找到的项目,使浏览100个找到的项目往往更容易。

最后,它是快速的。没有Spotlight那么快,但通常只需要几秒钟就能在磁盘上找到所有搜索到的项目。

•注意•

Find Any File无法搜索纯文本(未格式化)文本以外的文件内容,即使这样很慢,所以不要指望这是Spotlight的好替代品。

import os import time import pandas as pd import csv import chardet from watchdog.observers import Observer from watchdog.events import FileSystemEventHandler import logging from datetime import datetime import re import threading import queue from concurrent.futures import ThreadPoolExecutor, as_completed # 配置日志 logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler("flight_data_processor.log", encoding='utf-8'), logging.StreamHandler() ] ) # 默认飞机呼号与型号映射字典 DEFAULT_AIRCRAFT_MAPPING = { "ABC": "F-35", "DEF": "Boeing 737", "GHI": "Airbus A320", "JKL": "Cessna 172", "MNO": "F-16", } # 全局飞机映射字典 AIRCRAFT_MAPPING = {} # ==================== 关键参数调整区域 ==================== # 线程池大小 - 增加到50个线程 MAX_WORKERS = 50 # 队列大小 - 增加到200,避免队列满 MAX_QUEUE_SIZE = 200 # 批量处理大小 - 一次性处理100个文件 BATCH_PROCESS_SIZE = 100 # 创建线程安全的文件处理队列 file_queue = queue.Queue(maxsize=MAX_QUEUE_SIZE) # 创建线程池执行器 - 使用调整后的线程数 executor = ThreadPoolExecutor(max_workers=MAX_WORKERS) # 创建处理中的文件集合 processing_files = set() # 创建线程锁 processing_lock = threading.Lock() # 信号量控制并发文件读取数量,避免内存溢出 file_read_semaphore = threading.Semaphore(20) # 同时最多20个文件读取操作 # ======================================================== def load_mapping_from_excel(mapping_file_path): """从Excel文件加载呼号-型号映射""" global AIRCRAFT_MAPPING if not os.path.exists(mapping_file_path): logging.warning(f"映射文件不存在: {mapping_file_path},使用默认映射") AIRCRAFT_MAPPING = DEFAULT_AIRCRAFT_MAPPING.copy() return try: if mapping_file_path.endswith('.xls'): df_mapping = pd.read_excel(mapping_file_path, engine='xlrd') else: df_mapping = pd.read_excel(mapping_file_path, engine='openpyxl') callsign_col = None model_col = None for col in df_mapping.columns: col_lower = str(col).lower() if '呼号' in col_lower or 'callsign' in col_lower: callsign_col = col elif '型号' in col_lower or 'model' in col_lower: model_col = col if callsign_col is None or model_col is None: logging.error("在映射文件中找不到呼号列或型号列") AIRCRAFT_MAPPING = DEFAULT_AIRCRAFT_MAPPING.copy() return new_mapping = {} for _, row in df_mapping.iterrows(): callsign = str(row[callsign_col]).strip() if pd.notna(row[callsign_col]) else "" model = str(row[model_col]).strip() if pd.notna(row[model_col]) else "" if callsign and model: new_mapping[callsign] = model if new_mapping: AIRCRAFT_MAPPING = new_mapping logging.info(f"成功加载 {len(AIRCRAFT_MAPPING)} 条映射记录") else: AIRCRAFT_MAPPING = DEFAULT_AIRCRAFT_MAPPING.copy() except Exception as e: logging.error(f"加载映射文件失败: {str(e)}") AIRCRAFT_MAPPING = DEFAULT_AIRCRAFT_MAPPING.copy() def reload_mapping_if_updated(mapping_file_path, last_modified_time): """检查映射文件是否更新并重新加载""" try: if os.path.exists(mapping_file_path): current_modified_time = os.path.getmtime(mapping_file_path) if current_modified_time > last_modified_time: logging.info("检测到映射文件更新,重新加载...") load_mapping_from_excel(mapping_file_path) return current_modified_time except Exception as e: logging.error(f"检查映射文件更新失败: {str(e)}") return last_modified_time class FileHandler(FileSystemEventHandler): def __init__(self, directory_to_watch, mapping_file_path): self.directory_to_watch = directory_to_watch self.mapping_file_path = mapping_file_path self.mapping_last_modified = os.path.getmtime(mapping_file_path) if os.path.exists(mapping_file_path) else 0 self.processed_files = set() # 初始化时批量处理现有文件 self.process_existing_files_batch() def process_existing_files_batch(self): """批量处理目录中已存在的文件""" all_files = [] for filename in os.listdir(self.directory_to_watch): file_path = os.path.join(self.directory_to_watch, filename) if (os.path.isfile(file_path) and self.is_supported_file(filename) and not filename.startswith('~$') and file_path != self.mapping_file_path): all_files.append(file_path) # 分批处理文件,每批100个 for i in range(0, len(all_files), BATCH_PROCESS_SIZE): batch_files = all_files[i:i + BATCH_PROCESS_SIZE] logging.info(f"开始处理第 {i//BATCH_PROCESS_SIZE + 1} 批文件,共 {len(batch_files)} 个文件") self.process_files_concurrently(batch_files) def on_created(self, event): """当有新文件创建时调用""" if not event.is_directory: file_path = event.src_path if (self.is_supported_file(file_path) and not os.path.basename(file_path).startswith('~$') and file_path != self.mapping_file_path): time.sleep(0.5) # 减少等待时间 self.add_file_to_queue(file_path) def on_modified(self, event): """当文件被修改时调用""" if not event.is_directory: file_path = event.src_path if file_path == self.mapping_file_path: # 映射文件更新处理 try: self.mapping_last_modified = reload_mapping_if_updated(file_path, self.mapping_last_modified) except Exception as e: logging.error(f"处理映射文件更新失败: {str(e)}") elif (self.is_supported_file(file_path) and not os.path.basename(file_path).startswith('~$')): time.sleep(0.5) self.add_file_to_queue(file_path) def add_file_to_queue(self, file_path): """将文件添加到处理队列""" with processing_lock: if (file_path not in self.processed_files and file_path not in processing_files and file_queue.qsize() < MAX_QUEUE_SIZE - 10): # 留出一些缓冲空间 try: file_queue.put_nowait(file_path) logging.debug(f"已添加文件到队列: {os.path.basename(file_path)}") except queue.Full: logging.warning("处理队列已满,等待空间释放") def process_files_concurrently(self, file_paths): """并发处理多个文件""" futures = [] for file_path in file_paths: if file_path not in self.processed_files: futures.append(executor.submit(self.process_file_wrapper, file_path)) # 使用回调处理完成的任务 for future in as_completed(futures): try: future.result() except Exception as e: logging.error(f"文件处理失败: {str(e)}") def process_file_wrapper(self, file_path): """包装文件处理函数,用于线程池""" with processing_lock: if file_path in processing_files: return processing_files.add(file_path) try: with file_read_semaphore: # 控制并发读取数量 self.process_file(file_path) with processing_lock: self.processed_files.add(file_path) processing_files.remove(file_path) except Exception as e: with processing_lock: if file_path in processing_files: processing_files.remove(file_path) logging.error(f"处理文件 {os.path.basename(file_path)} 时出错: {str(e)}") def is_supported_file(self, file_path): """检查文件是否为支持的格式""" return any(file_path.lower().endswith(ext) for ext in ['.csv', '.xls', '.xlsx']) def detect_encoding(self, file_path): """检测文件编码""" try: with open(file_path, 'rb') as f: raw_data = f.read(10000) result = chardet.detect(raw_data) return result['encoding'] if result['confidence'] > 0.7 else 'utf-8' except: return 'utf-8' def find_callsign_column(self, df): """智能查找呼号列""" possible_names = ['呼号', 'callsign', 'call sign', 'flight', 'flightnumber', 'flight number'] for col in df.columns: col_lower = str(col).lower().strip() for name in possible_names: if name.lower() == col_lower: return col for col in df.columns: col_lower = str(col).lower().strip() for name in possible_names: if name.lower() in col_lower: return col for col in df.columns: if df[col].dtype == 'object': sample_values = df[col].dropna().head(10) if len(sample_values) > 0: call_sign_pattern = re.compile(r'^[A-Za-z]{2,}\d{2,}') matches = sum(1 for val in sample_values if call_sign_pattern.match(str(val))) if matches / len(sample_values) > 0.5: return col return None def find_model_column(self, df): """智能查找机型列""" possible_names = ['机型', '型号', 'model', 'aircraft', 'type', 'aircraft type'] for col in df.columns: col_lower = str(col).lower().strip() for name in possible_names: if name.lower() == col_lower: return col for col in df.columns: col_lower = str(col).lower().strip() for name in possible_names: if name.lower() in col_lower: return col return None def is_unknown_value(self, value): """检查值是否为'未知'或空值""" if pd.isna(value) or value is None: return True value_str = str(value).strip().lower() unknown_patterns = ['未知', 'unknown', 'none', 'null', 'nan', ''] return any(pattern == value_str for pattern in unknown_patterns) def process_file(self, file_path): """处理单个文件""" file_ext = os.path.splitext(file_path)[1].lower() try: if file_ext == '.csv': encodings_to_try = ['utf-8', 'gbk', 'gb2312', 'latin-1', 'utf-8-sig', 'cp936'] df = None for encoding in encodings_to_try: try: df = pd.read_csv(file_path, encoding=encoding) break except: continue if df is None: raise Exception("无法用任何编码读取CSV文件") else: try: if file_ext == '.xls': df = pd.read_excel(file_path, engine='xlrd') else: df = pd.read_excel(file_path, engine='openpyxl') except Exception as e: try: if file_ext == '.xls': df = pd.read_excel(file_path, engine='openpyxl') else: df = pd.read_excel(file_path, engine='xlrd') except Exception as e2: raise except Exception as e: logging.error(f"读取文件 {os.path.basename(file_path)} 失败: {str(e)}") return callsign_col = self.find_callsign_column(df) if callsign_col is None: logging.warning(f"文件 {os.path.basename(file_path)} 中没有找到呼号列") return model_col = self.find_model_column(df) if model_col is None: model_col_patterns = ['型号', 'model', 'aircraft', 'type'] for col in df.columns: col_lower = str(col).lower().strip() for pattern in model_col_patterns: if pattern.lower() in col_lower: model_col = col break if model_col: break if model_col is None: model_col = '机型' df[model_col] = '未知' updated_count = 0 unknown_count = 0 skipped_count = 0 for index, row in df.iterrows(): callsign = str(row[callsign_col]) if pd.notna(row[callsign_col]) else "" if not callsign.strip(): skipped_count += 1 continue current_model = row[model_col] if pd.notna(row[model_col]) else "" if not self.is_unknown_value(current_model): skipped_count += 1 continue # 使用全局映射字典查找匹配 aircraft_model = None for prefix, model in AIRCRAFT_MAPPING.items(): if callsign.startswith(prefix): aircraft_model = model break if aircraft_model: df.at[index, model_col] = aircraft_model updated_count += 1 else: df.at[index, model_col] = "未知" unknown_count += 1 try: if file_ext == '.csv': df.to_csv(file_path, index=False, encoding='utf-8-sig') else: if file_ext == '.xls': df.to_excel(file_path, index=False, engine='xlrd') else: df.to_excel(file_path, index=False, engine='openpyxl') logging.info(f"文件 {os.path.basename(file_path)} 处理完成: 更新{updated_count}, 未知{unknown_count}, 跳过{skipped_count}") except Exception as e: logging.error(f"保存文件 {os.path.basename(file_path)} 失败: {str(e)}") def process_queue(): """处理文件队列的线程函数 - 增强版""" while True: try: # 批量获取文件,提高效率 files_to_process = [] for _ in range(min(10, file_queue.qsize())): # 一次最多取10个文件 try: file_path = file_queue.get_nowait() files_to_process.append(file_path) except queue.Empty: break if files_to_process: # 批量提交任务 futures = [] for file_path in files_to_process: futures.append(executor.submit(event_handler.process_file_wrapper, file_path)) # 等待这批任务完成 for future in as_completed(futures): try: future.result() except Exception as e: logging.error(f"队列文件处理失败: {str(e)}") finally: file_queue.task_done() else: time.sleep(0.1) # 减少空队列时的CPU占用 except Exception as e: logging.error(f"处理队列时出错: {str(e)}") time.sleep(1) def main(): directory_to_watch = r"C:\Users\user\Desktop\flight-analysis-system" mapping_file_path = r"C:\Users\user\Desktop\flight-analysis-system\aircraft_mapping.xlsx" # 创建默认映射文件如果不存在 if not os.path.exists(mapping_file_path): try: default_mapping_df = pd.DataFrame({ '飞机呼号': list(DEFAULT_AIRCRAFT_MAPPING.keys()), '飞机型号': list(DEFAULT_AIRCRAFT_MAPPING.values()) }) default_mapping_df.to_excel(mapping_file_path, index=False, engine='openpyxl') logging.info(f"已创建默认映射文件: {mapping_file_path}") except Exception as e: logging.error(f"创建默认映射文件失败: {str(e)}") # 加载映射文件 load_mapping_from_excel(mapping_file_path) if not os.path.exists(directory_to_watch): logging.error(f"指定的目录不存在: {directory_to_watch}") return logging.info(f"开始监控目录: {directory_to_watch}") logging.info(f"映射文件: {mapping_file_path}") logging.info(f"线程池大小: {MAX_WORKERS}") logging.info(f"批量处理大小: {BATCH_PROCESS_SIZE}") global event_handler event_handler = FileHandler(directory_to_watch, mapping_file_path) observer = Observer() observer.schedule(event_handler, directory_to_watch, recursive=True) observer.start() # 启动多个队列处理线程 queue_threads = [] for i in range(3): # 启动3个队列处理线程 thread = threading.Thread(target=process_queue, daemon=True, name=f"QueueProcessor-{i}") thread.start() queue_threads.append(thread) try: while True: # 定期检查映射文件更新 event_handler.mapping_last_modified = reload_mapping_if_updated( mapping_file_path, event_handler.mapping_last_modified ) # 定期报告状态 if file_queue.qsize() > 0 or len(processing_files) > 0: logging.info(f"队列状态: 待处理={file_queue.qsize()}, 处理中={len(processing_files)}, 已完成={len(event_handler.processed_files)}") time.sleep(5) except KeyboardInterrupt: observer.stop() logging.info("正在停止监控...") observer.join() # 等待队列清空 while file_queue.qsize() > 0 or len(processing_files) > 0: logging.info(f"等待任务完成: 队列={file_queue.qsize()}, 处理中={len(processing_files)}") time.sleep(1) executor.shutdown(wait=True) logging.info("所有任务已完成,程序退出") if __name__ == "__main__": main()需要对代码进行改进,首先对于监控文件夹中文件如果一次性导入几百个excel、csv运行会非常慢,并且如果表格内容数据特别多运行也非常慢,其次,我希望监视该文件夹中,将文件夹原有的数据保持不变,而是在桌面创建一个“data is here”的文件夹,将监视文件夹中处理的数据copy到这个文件夹中,其中处理的数据包括经过比对补充完的数据,和比对后没有变化的数据都要保存至上述文件夹,要求对数据处理和另存另一个文件,在文件数量为300个平均大小为150kb的情况下同步在三秒内完成
最新发布
09-04
还是用这版脚本,加上忽略大小写匹配机制就行 import os import re import sys def extract_keywords(keyword_file, output_file): """增强版关键字搜索,解决匹配失败问题""" # 1. 增强的关键字读取(支持注释和空行) try: with open(keyword_file, 'r', encoding='utf-8', errors='replace') as kf: raw_content = kf.read() # 支持#号注释和空行 keywords = [kw.strip() for line in raw_content.splitlines() for kw in re.split(r'[,;|]+', line.split('#')[0].strip()) if kw.strip()] except Exception as e: print(f"❌ 读取关键字文件失败: {str(e)}") return False if not keywords: print("⚠️ 警告:未找到有效关键字") return False print(f"已加载 {len(keywords)} 个关键字: {', '.join(keywords[:5])}{'...' if len(keywords) > 5 else ''}") # 2. 调试模式:显示实际加载的关键字 debug_mode = input("启用调试模式? (y/n) [默认n]: ").lower() == 'y' if debug_mode: print("\n[调试] 实际加载关键字列表:") for i, kw in enumerate(keywords, 1): print(f"{i}. {repr(kw)} (长度:{len(kw)})") # 3. 获取当前目录 current_dir = os.getcwd() print(f"搜索目录: {current_dir}") # 4. 增强的匹配逻辑 total_matches = 0 processed_files = 0 match_details = [] # 存储详细匹配信息 with open(output_file, 'w', encoding='utf-8') as of: of.write(f"搜索目录: {current_dir}\n关键字列表: {', '.join(keywords)}\n") of.write("=" * 80 + "\n") for filename in os.listdir(current_dir): filepath = os.path.join(current_dir, filename) # 跳过目录和非文本文件 if (os.path.isdir(filepath) or filename == os.path.basename(output_file) or not is_text_file(filepath)): continue try: with open(filepath, 'r', encoding='utf-8', errors='replace') as f: file_matches = 0 file_content = f.read() # 5. 增强的匹配检测 for kw in keywords: # 处理特殊字符转义 safe_kw = re.escape(kw) pattern = re.compile(safe_kw) matches = pattern.finditer(file_content) for match in matches: start_pos = match.start() # 获取上下文(匹配行及前后行) line_start = file_content.rfind('\n', 0, start_pos) + 1 line_end = file_content.find('\n', start_pos) if line_end == -1: line_end = len(file_content) full_line = file_content[line_start:line_end] # 计算行号(通过计数换行符) line_num = file_content.count('\n', 0, line_start) + 1 # 存储匹配详情 match_details.append({ 'file': filename, 'line': line_num, 'keyword': kw, 'content': full_line.strip() }) file_matches += 1 # 6. 输出结果 if file_matches > 0: of.write(f"\n▼ 文件: {filename}\n") of.write("-" * 80 + "\n") for detail in [m for m in match_details if m['file'] == filename]: of.write(f"行{detail['line']} [匹配 '{detail['keyword']}']: {detail['content']}\n") of.write(f"\n※ 本文件匹配: {file_matches} 处\n") processed_files += 1 total_matches += file_matches # 7. 未匹配时的调试输出 elif debug_mode and file_matches == 0: print(f"\n[调试] 文件 {filename} 扫描完成但未匹配") print(f"文件大小: {os.path.getsize(filepath)} 字节") print("文件前100字符:") print(repr(file_content[:100])) print("关键字测试:") for kw in keywords[:5]: print(f"- '{kw}' 存在? {'是' if kw in file_content else '否'}") except Exception as e: print(f"处理文件 {filename} 时出错: {str(e)}") # 8. 最终统计 print(f"\n处理完成!扫描 {len(os.listdir(current_dir))} 个文件") print(f"匹配文件: {processed_files} 个, 总匹配: {total_matches} 处") if total_matches == 0 and not debug_mode: print("\n⚠️ 警告:未找到任何匹配!建议:") print("1. 运行脚本时启用调试模式") print("2. 检查关键字是否包含空格或特殊字符") print("3. 确认目标文件是否包含关键字") print("4. 尝试在关键字文件中添加转义字符(如\\*)") return True def is_text_file(filepath): """改进的文本文件检测""" try: # 扩展跳过文件类型 binary_exts = ['.exe', '.dll', '.png', '.jpg', '.zip', '.pdf'] if any(filepath.lower().endswith(ext) for ext in binary_exts): return False # 内容检测 with open(filepath, 'rb') as f: return b'\x00' not in f.read(1024) except: return False # 增强的主函数 if __name__ == "__main__": print("=" * 60) print("关键字搜索工具 - 增强匹配版") print("=" * 60) # 交互式配置 # KEYWORD_FILE = input("关键字文件路径 [默认: keywords.txt]: ") or "keywords.txt" # OUTPUT_FILE = input("输出文件路径 [默认: results.txt]: ") or "results.txt" KEYWORD_FILE = "D:\脚本开发keywords.txt" # 关键字文件 OUTPUT_FILE = "search_results.txt" # 输出结果文件 # 执行搜索 extract_keywords(KEYWORD_FILE, OUTPUT_FILE) # 结果提示 if sys.platform.startswith('win'): input("\n按 Enter 键退出...")
07-26
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值