WINDOWS C盘变大，你们怎么办的？

最新推荐文章于 2025-09-17 10:41:30 发布
YONYON-R&D
最新推荐文章于 2025-09-17 10:41:30 发布
阅读量234
点赞数 2
CC 4.0 BY-SA版权
分类专栏：编程分享文章标签： python
本文链接：https://blog.csdn.net/eydj2008/article/details/150843592
编程分享专栏收录该内容
35 篇文章
订阅专栏
我主要监控，C:\Users 下面的用户数据，多半是它增大了，怎么办？建立文件快照吧
它让自动扫描大于30MB的文件，下次生成的时候自动对比，增加，删除，修改了什么文件。
filesnapshot.py 文件如下：
import os
import json
import hashlib
import pandas as pd
from datetime import datetime
from pathlib import Path

class FileSnapshot:
    def __init__(self, base_path):
        self.base_path = Path(base_path)
        self.snapshot_file = "initial_snapshot.json"

    # def load_snapshot(self, snapshot_file):
    #     """加载快照文件"""
    #     with open(snapshot_file, 'r', encoding='utf-8') as f:
    #         return json.load(f)

    def load_snapshot(self, snapshot_file):
        """加载快照文件（确保参数是字符串路径）"""
        if not isinstance(snapshot_file, str):
            raise TypeError(f"快照文件参数必须是字符串路径，但收到 {type(snapshot_file)}")
        
        if not os.path.exists(snapshot_file):
            raise FileNotFoundError(f"快照文件不存在: {snapshot_file}")
        
        with open(snapshot_file, 'r', encoding='utf-8') as f:
            return json.load(f)
        
    def calculate_file_hash(self, file_path):
        """计算文件的MD5哈希值，用于检测内容变化"""
        hash_md5 = hashlib.md5()
        try:
            with open(file_path, "rb") as f:
                for chunk in iter(lambda: f.read(4096), b""):
                    hash_md5.update(chunk)
            return hash_md5.hexdigest()
        except (PermissionError, IOError):
            return "access_denied"
    
    def create_snapshot(self, snapshot_name=None, min_size_mb=0):
        """创建当前文件系统快照
        
        Args:
            snapshot_name (str): 快照名称，默认为时间戳格式
            min_size_mb (float): 最小文件大小阈值(MB)，大于此值的文件才会被纳入快照
        """
        if snapshot_name is None:
            snapshot_name = datetime.now().strftime("snapshot_%Y%m%d_%H%M%S")
        
        # 将 MB 转换为字节
        min_size_bytes = min_size_mb * 1024 * 1024
        
        snapshot_data = {
            "timestamp": datetime.now().isoformat(),
            "base_path": str(self.base_path),
            "min_size_mb": min_size_mb,
            "files": {}
        }
        
        print(f"正在创建快照: {snapshot_name} (仅包含大于 {min_size_mb}MB 的文件)")
        file_count = 0
        included_count = 0
        total_size_bytes = 0
        
        for file_path in self.base_path.rglob('*'):
            if file_path.is_file():
                try:
                    file_size = file_path.stat().st_size
                    file_count += 1
                    
                    # 检查文件大小是否超过阈值
                    if file_size >= min_size_bytes:
                        rel_path = str(file_path.relative_to(self.base_path))
                        file_info = {
                            "size": file_size,
                            "size_mb": round(file_size / (1024 * 1024), 2),
                            "mtime": file_path.stat().st_mtime,
                            "ctime": file_path.stat().st_ctime,
                            "hash": self.calculate_file_hash(file_path)
                        }
                        snapshot_data["files"][rel_path] = file_info
                        included_count += 1
                        total_size_bytes += file_size
                        
                        if included_count % 100 == 0:
                            print(f"已扫描 {file_count} 个文件，已包含 {included_count} 个符合要求的文件...")
                    else:
                        # 小文件计数，但不包含在快照中
                        if file_count % 1000 == 0:
                            print(f"已扫描 {file_count} 个文件，已包含 {included_count} 个符合要求的文件...")
                            
                except (PermissionError, OSError) as e:
                    # 记录错误文件但继续执行
                    rel_path = str(file_path.relative_to(self.base_path))
                    snapshot_data["files"][rel_path] = {"error": str(e)}
        
        # 保存快照到文件
        snapshot_filename = f"{snapshot_name}.json"
        with open(snapshot_filename, 'w', encoding='utf-8') as f:
            json.dump(snapshot_data, f, indent=2, ensure_ascii=False)
        
        # 统计信息
        total_size_mb = round(total_size_bytes / (1024 * 1024), 2)
        print(f"快照完成！扫描 {file_count} 个文件，包含 {included_count} 个大于 {min_size_mb}MB 的文件")
        print(f"总大小: {total_size_mb}MB，保存为: {snapshot_filename}")
        return snapshot_filename
    
    
    def compare_snapshots(self, old_snapshot_file, new_snapshot_file, output_format="excel"):
        """比较两个快照的差异"""
        print(f"正在比较快照: {old_snapshot_file} 和 {new_snapshot_file}")
        
        # 加载快照
        old_data = self.load_snapshot(old_snapshot_file)
        new_data = self.load_snapshot(new_snapshot_file)
        
        old_files = old_data["files"]
        new_files = new_data["files"]
        
        # 分析差异
        comparison_results = {
            "added_files": [],
            "deleted_files": [],
            "modified_files": [],
            "unchanged_files": []
        }
        
        all_files = set(old_files.keys()) | set(new_files.keys())
        
        for file_path in all_files:
            if file_path in new_files and file_path not in old_files:
                # 新增文件
                comparison_results["added_files"].append({
                    "file_path": file_path,
                    "size": new_files[file_path].get("size", 0),
                    "type": "新增"
                })
                
            elif file_path in old_files and file_path not in new_files:
                # 删除文件
                comparison_results["deleted_files"].append({
                    "file_path": file_path,
                    "size": old_files[file_path].get("size", 0),
                    "type": "删除"
                })
                
            elif file_path in old_files and file_path in new_files:
                old_file = old_files[file_path]
                new_file = new_files[file_path]
                
                # 检查内容是否变化（通过哈希值）
                if old_file.get("hash") != new_file.get("hash"):
                    comparison_results["modified_files"].append({
                        "file_path": file_path,
                        "old_size": old_file.get("size", 0),
                        "new_size": new_file.get("size", 0),
                        "size_change": new_file.get("size", 0) - old_file.get("size", 0),
                        "type": "修改"
                    })
                else:
                    comparison_results["unchanged_files"].append({
                        "file_path": file_path,
                        "size": new_file.get("size", 0),
                        "type": "未变化"
                    })
        
        # 生成报告
        self.generate_report(comparison_results, old_snapshot_file, new_snapshot_file, output_format)
        
        return comparison_results
    
    def generate_report(self, results, old_snapshot, new_snapshot, format_type="excel"):
        """生成差异报告"""
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        
        if format_type == "excel":
            # 生成Excel报告
            with pd.ExcelWriter(f"comparison_report_{timestamp}.xlsx") as writer:
                for change_type, files in results.items():
                    if files:  # 只输出有数据的sheet
                        df = pd.DataFrame(files)
                        df.to_excel(writer, sheet_name=change_type, index=False)
            
            print(f"Excel报告已生成: comparison_report_{timestamp}.xlsx")
            
        else:
            # 生成文本报告
            report_file = f"comparison_report_{timestamp}.txt"
            with open(report_file, 'w', encoding='utf-8') as f:
                f.write(f"文件系统差异分析报告\n")
                f.write(f"对比时间: {datetime.now()}\n")
                f.write(f"旧快照: {old_snapshot}\n")
                f.write(f"新快照: {new_snapshot}\n")
                f.write("="*50 + "\n\n")
                
                for change_type, files in results.items():
                    if files:
                        f.write(f"\n【{change_type}】 - 共 {len(files)} 个文件\n")
                        for file_info in files:
                            f.write(f"  - {file_info['file_path']}\n")
                            if 'size' in file_info:
                                f.write(f"    大小: {file_info['size']} bytes\n")
                            if 'size_change' in file_info:
                                f.write(f"    大小变化: {file_info['size_change']} bytes\n")
            
            print(f"文本报告已生成: {report_file}")


# 格式化打印结果
def print_comparison_results(results):
    print("=" * 60)
    print("快照比较结果")
    print("=" * 60)
    
    print(f"\n新增文件 ({len(results['added_files'])}):")
    for file in results["added_files"]:
        print(f"  {file['file_path']} - 大小: {file['size']} 字节")
    
    print(f"\n删除文件 ({len(results['deleted_files'])}):")
    for file in results["deleted_files"]:
        print(f"  {file['file_path']} - 大小: {file['size']} 字节")
    
    print(f"\n修改文件 ({len(results['modified_files'])}):")
    for file in results["modified_files"]:
        print(f"  {file['file_path']} - 大小变化: {file['size_change']} 字节 "
              f"(旧: {file['old_size']}, 新: {file['new_size']})")
    
    # print(f"\n未变化文件 ({len(results['unchanged_files'])}):")
    # for file in results["unchanged_files"]:
    #     print(f"  {file['file_path']} - 大小: {file['size']} 字节")
    
    print("\n统计摘要:")
    print(f"  总计文件: {sum(len(v) for v in results.values())}")
    print(f"  新增: {len(results['added_files'])}")
    print(f"  删除: {len(results['deleted_files'])}")
    print(f"  修改: {len(results['modified_files'])}")
    print(f"  未变化: {len(results['unchanged_files'])}")

# 使用示例
import argparse

def main(monitor_path):
    #monitor_path = r"C:\Users\Administrator\AppData\Local"
    print(f"开始扫描目录: {monitor_path}")
    snapshot = FileSnapshot(monitor_path)
    
    # 检查是否已有快照文件
    initial_snapshot_file = "initial_snapshot.json"
    
    if os.path.exists(initial_snapshot_file):
        # 第二次运行：加载旧快照 → 创建新快照 → 对比
        print("检测到已有快照，开始对比模式...")
           
        # 2. 创建新快照
        new_file = snapshot.create_snapshot("updated_snapshot", min_size_mb=30)    
        
        results = snapshot.compare_snapshots(initial_snapshot_file, new_file)

        print_comparison_results(results)
        
    else:
        # 第一次运行：创建初始快照
        print("首次运行，创建初始快照...")
        snapshot_file = snapshot.create_snapshot("initial_snapshot", min_size_mb=30)
        print(f"请妥善保存此文件: {snapshot_file}")
    


if __name__ == "__main__":
    # 设置命令行参数解析
    parser = argparse.ArgumentParser(description='文件监控程序')
    parser.add_argument('path', type=str, help='要监控的目录路径')
    
    # 解析参数
    args = parser.parse_args()
    
    # 调用主函数，并传入路径参数
    main(args.path)
如果没有安装 pip 和 pandas 的需要安装pip 和 pandas库，相关安装请问AI。
再建一个BAT/CMD文件扫描你想要对比的目录：
@echo off
echo 正在创建初始快照...
rem filesnapshot.py "C:\Users\Administrator\AppData\Roaming"
filesnapshot.py "C:\Users\Administrator"
pause