WINDOWS C盘变大,你们怎么办的?

我主要监控,C:\Users 下面的用户数据,多半是它增大了,怎么办?建立文件快照吧 

它让自动扫描大于30MB的文件,下次生成的时候自动对比,增加,删除,修改了什么文件。

filesnapshot.py  文件如下:

import os
import json
import hashlib
import pandas as pd
from datetime import datetime
from pathlib import Path

class FileSnapshot:
    def __init__(self, base_path):
        self.base_path = Path(base_path)
        self.snapshot_file = "initial_snapshot.json"

    # def load_snapshot(self, snapshot_file):
    #     """加载快照文件"""
    #     with open(snapshot_file, 'r', encoding='utf-8') as f:
    #         return json.load(f)

    def load_snapshot(self, snapshot_file):
        """加载快照文件(确保参数是字符串路径)"""
        if not isinstance(snapshot_file, str):
            raise TypeError(f"快照文件参数必须是字符串路径,但收到 {type(snapshot_file)}")
        
        if not os.path.exists(snapshot_file):
            raise FileNotFoundError(f"快照文件不存在: {snapshot_file}")
        
        with open(snapshot_file, 'r', encoding='utf-8') as f:
            return json.load(f)
        
    def calculate_file_hash(self, file_path):
        """计算文件的MD5哈希值,用于检测内容变化"""
        hash_md5 = hashlib.md5()
        try:
            with open(file_path, "rb") as f:
                for chunk in iter(lambda: f.read(4096), b""):
                    hash_md5.update(chunk)
            return hash_md5.hexdigest()
        except (PermissionError, IOError):
            return "access_denied"
    
    def create_snapshot(self, snapshot_name=None, min_size_mb=0):
        """创建当前文件系统快照
        
        Args:
            snapshot_name (str): 快照名称,默认为时间戳格式
            min_size_mb (float): 最小文件大小阈值(MB),大于此值的文件才会被纳入快照
        """
        if snapshot_name is None:
            snapshot_name = datetime.now().strftime("snapshot_%Y%m%d_%H%M%S")
        
        # 将 MB 转换为字节
        min_size_bytes = min_size_mb * 1024 * 1024
        
        snapshot_data = {
            "timestamp": datetime.now().isoformat(),
            "base_path": str(self.base_path),
            "min_size_mb": min_size_mb,
            "files": {}
        }
        
        print(f"正在创建快照: {snapshot_name} (仅包含大于 {min_size_mb}MB 的文件)")
        file_count = 0
        included_count = 0
        total_size_bytes = 0
        
        for file_path in self.base_path.rglob('*'):
            if file_path.is_file():
                try:
                    file_size = file_path.stat().st_size
                    file_count += 1
                    
                    # 检查文件大小是否超过阈值
                    if file_size >= min_size_bytes:
                        rel_path = str(file_path.relative_to(self.base_path))
                        file_info = {
                            "size": file_size,
                            "size_mb": round(file_size / (1024 * 1024), 2),
                            "mtime": file_path.stat().st_mtime,
                            "ctime": file_path.stat().st_ctime,
                            "hash": self.calculate_file_hash(file_path)
                        }
                        snapshot_data["files"][rel_path] = file_info
                        included_count += 1
                        total_size_bytes += file_size
                        
                        if included_count % 100 == 0:
                            print(f"已扫描 {file_count} 个文件,已包含 {included_count} 个符合要求的文件...")
                    else:
                        # 小文件计数,但不包含在快照中
                        if file_count % 1000 == 0:
                            print(f"已扫描 {file_count} 个文件,已包含 {included_count} 个符合要求的文件...")
                            
                except (PermissionError, OSError) as e:
                    # 记录错误文件但继续执行
                    rel_path = str(file_path.relative_to(self.base_path))
                    snapshot_data["files"][rel_path] = {"error": str(e)}
        
        # 保存快照到文件
        snapshot_filename = f"{snapshot_name}.json"
        with open(snapshot_filename, 'w', encoding='utf-8') as f:
            json.dump(snapshot_data, f, indent=2, ensure_ascii=False)
        
        # 统计信息
        total_size_mb = round(total_size_bytes / (1024 * 1024), 2)
        print(f"快照完成!扫描 {file_count} 个文件,包含 {included_count} 个大于 {min_size_mb}MB 的文件")
        print(f"总大小: {total_size_mb}MB,保存为: {snapshot_filename}")
        return snapshot_filename
    
    
    def compare_snapshots(self, old_snapshot_file, new_snapshot_file, output_format="excel"):
        """比较两个快照的差异"""
        print(f"正在比较快照: {old_snapshot_file} 和 {new_snapshot_file}")
        
        # 加载快照
        old_data = self.load_snapshot(old_snapshot_file)
        new_data = self.load_snapshot(new_snapshot_file)
        
        old_files = old_data["files"]
        new_files = new_data["files"]
        
        # 分析差异
        comparison_results = {
            "added_files": [],
            "deleted_files": [],
            "modified_files": [],
            "unchanged_files": []
        }
        
        all_files = set(old_files.keys()) | set(new_files.keys())
        
        for file_path in all_files:
            if file_path in new_files and file_path not in old_files:
                # 新增文件
                comparison_results["added_files"].append({
                    "file_path": file_path,
                    "size": new_files[file_path].get("size", 0),
                    "type": "新增"
                })
                
            elif file_path in old_files and file_path not in new_files:
                # 删除文件
                comparison_results["deleted_files"].append({
                    "file_path": file_path,
                    "size": old_files[file_path].get("size", 0),
                    "type": "删除"
                })
                
            elif file_path in old_files and file_path in new_files:
                old_file = old_files[file_path]
                new_file = new_files[file_path]
                
                # 检查内容是否变化(通过哈希值)
                if old_file.get("hash") != new_file.get("hash"):
                    comparison_results["modified_files"].append({
                        "file_path": file_path,
                        "old_size": old_file.get("size", 0),
                        "new_size": new_file.get("size", 0),
                        "size_change": new_file.get("size", 0) - old_file.get("size", 0),
                        "type": "修改"
                    })
                else:
                    comparison_results["unchanged_files"].append({
                        "file_path": file_path,
                        "size": new_file.get("size", 0),
                        "type": "未变化"
                    })
        
        # 生成报告
        self.generate_report(comparison_results, old_snapshot_file, new_snapshot_file, output_format)
        
        return comparison_results
    
    def generate_report(self, results, old_snapshot, new_snapshot, format_type="excel"):
        """生成差异报告"""
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        
        if format_type == "excel":
            # 生成Excel报告
            with pd.ExcelWriter(f"comparison_report_{timestamp}.xlsx") as writer:
                for change_type, files in results.items():
                    if files:  # 只输出有数据的sheet
                        df = pd.DataFrame(files)
                        df.to_excel(writer, sheet_name=change_type, index=False)
            
            print(f"Excel报告已生成: comparison_report_{timestamp}.xlsx")
            
        else:
            # 生成文本报告
            report_file = f"comparison_report_{timestamp}.txt"
            with open(report_file, 'w', encoding='utf-8') as f:
                f.write(f"文件系统差异分析报告\n")
                f.write(f"对比时间: {datetime.now()}\n")
                f.write(f"旧快照: {old_snapshot}\n")
                f.write(f"新快照: {new_snapshot}\n")
                f.write("="*50 + "\n\n")
                
                for change_type, files in results.items():
                    if files:
                        f.write(f"\n【{change_type}】 - 共 {len(files)} 个文件\n")
                        for file_info in files:
                            f.write(f"  - {file_info['file_path']}\n")
                            if 'size' in file_info:
                                f.write(f"    大小: {file_info['size']} bytes\n")
                            if 'size_change' in file_info:
                                f.write(f"    大小变化: {file_info['size_change']} bytes\n")
            
            print(f"文本报告已生成: {report_file}")


# 格式化打印结果
def print_comparison_results(results):
    print("=" * 60)
    print("快照比较结果")
    print("=" * 60)
    
    print(f"\n新增文件 ({len(results['added_files'])}):")
    for file in results["added_files"]:
        print(f"  {file['file_path']} - 大小: {file['size']} 字节")
    
    print(f"\n删除文件 ({len(results['deleted_files'])}):")
    for file in results["deleted_files"]:
        print(f"  {file['file_path']} - 大小: {file['size']} 字节")
    
    print(f"\n修改文件 ({len(results['modified_files'])}):")
    for file in results["modified_files"]:
        print(f"  {file['file_path']} - 大小变化: {file['size_change']} 字节 "
              f"(旧: {file['old_size']}, 新: {file['new_size']})")
    
    # print(f"\n未变化文件 ({len(results['unchanged_files'])}):")
    # for file in results["unchanged_files"]:
    #     print(f"  {file['file_path']} - 大小: {file['size']} 字节")
    
    print("\n统计摘要:")
    print(f"  总计文件: {sum(len(v) for v in results.values())}")
    print(f"  新增: {len(results['added_files'])}")
    print(f"  删除: {len(results['deleted_files'])}")
    print(f"  修改: {len(results['modified_files'])}")
    print(f"  未变化: {len(results['unchanged_files'])}")

# 使用示例
import argparse

def main(monitor_path):
    #monitor_path = r"C:\Users\Administrator\AppData\Local"
    print(f"开始扫描目录: {monitor_path}")
    snapshot = FileSnapshot(monitor_path)
    
    # 检查是否已有快照文件
    initial_snapshot_file = "initial_snapshot.json"
    
    if os.path.exists(initial_snapshot_file):
        # 第二次运行:加载旧快照 → 创建新快照 → 对比
        print("检测到已有快照,开始对比模式...")
           
        # 2. 创建新快照
        new_file = snapshot.create_snapshot("updated_snapshot", min_size_mb=30)    
        
        results = snapshot.compare_snapshots(initial_snapshot_file, new_file)

        print_comparison_results(results)
        
    else:
        # 第一次运行:创建初始快照
        print("首次运行,创建初始快照...")
        snapshot_file = snapshot.create_snapshot("initial_snapshot", min_size_mb=30)
        print(f"请妥善保存此文件: {snapshot_file}")
    


if __name__ == "__main__":
    # 设置命令行参数解析
    parser = argparse.ArgumentParser(description='文件监控程序')
    parser.add_argument('path', type=str, help='要监控的目录路径')
    
    # 解析参数
    args = parser.parse_args()
    
    # 调用主函数,并传入路径参数
    main(args.path)

如果没有安装 pip 和 pandas 的需要安装pip 和 pandas库,相关安装请问AI。

再建一个BAT/CMD文件  扫描你想要对比的目录  :

@echo off
echo 正在创建初始快照...
rem filesnapshot.py "C:\Users\Administrator\AppData\Roaming"
filesnapshot.py "C:\Users\Administrator"

pause

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值