word无法创建工作文件，请检查临时环境变量。

word无法创建工作文件请检查临时环境变量

我是一名移动运营商的工作人员，负责以电话外呼的方式开展客户满意度修复工作。现在需要利用python技术写一个关于外呼电话录音包批量质检分析的系统，需要封装成可执行的.exe程序。请给出详细的方案及切实可执行的详细完整代码（附注解），不要假设可行的伪代码场景，不要简化。以下为分析维度及要求：一、要求： 1、输入框需包含：音频选择框（支持多选或选择文件夹）、关键词选择框（选择本地关键词.xlsx文件地址）。 2、程序界面需包含开始分析、停止分析、清空等按钮命令。 3、输出框需包含已完成分析、分析错误、分析报告储存路径等提示（分析报告生成一个汇总的结构化excel清单，一行代表一个文件，包含多个分析维度结果。同时生成一个汇总的可视化分析word版报告）。 4、音频文件可能有多种格式，如MP3、WAV、AMR等，需要兼容多种格式。 5、模型使用组合：语音识别：campp_enterprise_v2；说话人分离：campp_enterprise_v2；中文文本情感分析：campp_enterprise_v2。 6、所有模型均下载至本地离线加载使用。 7、录音包主要为客服与客户的对话，进行说话人分离时客服人员说话根据全部片段的前三个片段是否存在开场白关键字来确定身份。 8、模型优化内存资源，确保最后封装为程序时可在GPU专享内存为512M，共享内存为3.7G的个人笔记本中运行。二、分析维度： 1、通话基本信息：音频时长、文件名称。 2、服务规范检查：开场白检查（根据关键字）、结束语检查（根据关键字）、服务禁语检查（根据关键字）。 3、客服服务态度情感分析：结合客服的文本情感分析，分析是否有不耐烦、生气等负面情绪。 4、客户情感分析：结合客户的文本情感分析客户整体情感极性（积极/消极/中性），特定情绪识别（如愤怒、不耐烦）。 5、沟通技巧检查：客服语速与清晰度分析、平均音量水平及稳定性。 6、问题解决率分析：客户问题是否被有效回答。

self.error_signal.emit("关键词加载错误", f"无法加载关键词文件: {str(e)}") return keywords def run(self): """执行分析任务""" total_files = len(self.audio_files) results = [] asr_pipeline = ...

检查代码是否可运行，是否高效，是否可CPUimport sys import os import json import time import wave import numpy as np import pandas as pd import matplotlib.pyplot as plt import soundfile as sf from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas from PyQt5.QtWidgets import (QApplication, QMainWindow, QWidget, QVBoxLayout, QHBoxLayout, QPushButton, QLabel, QLineEdit, QTextEdit, QFileDialog, QProgressBar, QGroupBox, QComboBox, QCheckBox, QMessageBox) from PyQt5.QtCore import QThread, pyqtSignal from pydub import AudioSegment from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification import whisper from pyannote.audio import Pipeline from docx import Document from docx.shared import Inches import librosa import tempfile from collections import defaultdict import re from concurrent.futures import ThreadPoolExecutor, as_completed import torch from torch.cuda import is_available as cuda_available import logging import gc # 配置日志 logging.basicConfig(level=logging.INFO) logger = logging.getLogger(name) # 全局模型缓存 MODEL_CACHE = {} class AnalysisThread(QThread): progress = pyqtSignal(int) message = pyqtSignal(str) analysis_complete = pyqtSignal(dict) error = pyqtSignal(str) def init(self, audio_files, keyword_file, whisper_model_path, pyannote_model_path, emotion_model_path): super().init() self.audio_files = audio_files self.keyword_file = keyword_file self.whisper_model_path = whisper_model_path self.pyannote_model_path = pyannote_model_path self.emotion_model_path = emotion_model_path self.running = True self.cached_models = {} self.temp_files = [] # 用于管理临时文件 self.lock = torch.multiprocessing.Lock() # 用于模型加载的锁 def run(self): try: # 加载关键词 self.message.emit("正在加载关键词...") keywords = self.load_keywords() # 预加载模型 self.message.emit("正在预加载模型...") self.preload_models() results = [] total_files = len(self.audio_files) for idx, audio_file in enumerate(self.audio_files): if not self.running: self.message.emit("分析已停止") return self.message.emit(f"正在处理文件: {os.path.basename(audio_file)} ({idx + 1}/{total_files})") file_result = self.analyze_file(audio_file, keywords) if file_result: results.append(file_result) # 定期清理内存 if idx % 5 == 0: gc.collect() torch.cuda.empty_cache() if cuda_available() else None self.progress.emit(int((idx + 1) / total_files * 100)) self.analysis_complete.emit({"results": results, "keywords": keywords}) self.message.emit("分析完成！") except Exception as e: import traceback error_msg = f"分析过程中发生错误: {str(e)}\n{traceback.format_exc()}" self.error.emit(error_msg) logger.error(error_msg) finally: # 清理临时文件 self.cleanup_temp_files() def cleanup_temp_files(self): """清理所有临时文件""" for temp_file in self.temp_files: if os.path.exists(temp_file): try: os.unlink(temp_file) except Exception as e: logger.warning(f"删除临时文件失败: {temp_file}, 原因: {str(e)}") def preload_models(self): """预加载所有模型到缓存（添加线程安全）""" global MODEL_CACHE # 使用锁确保线程安全 with self.lock: # 检查全局缓存是否已加载模型 if 'whisper' in MODEL_CACHE and 'pyannote' in MODEL_CACHE and 'emotion_classifier' in MODEL_CACHE: self.cached_models = MODEL_CACHE self.message.emit("使用缓存的模型") return self.cached_models = {} try: # 加载语音识别模型 if 'whisper' not in MODEL_CACHE: self.message.emit("正在加载语音识别模型...") MODEL_CACHE['whisper'] = whisper.load_model( self.whisper_model_path, device="cuda" if cuda_available() else "cpu" ) self.cached_models['whisper'] = MODEL_CACHE['whisper'] # 加载说话人分离模型 if 'pyannote' not in MODEL_CACHE: self.message.emit("正在加载说话人分离模型...") MODEL_CACHE['pyannote'] = Pipeline.from_pretrained( self.pyannote_model_path, use_auth_token=True ) self.cached_models['pyannote'] = MODEL_CACHE['pyannote'] # 加载情感分析模型 if 'emotion_classifier' not in MODEL_CACHE: self.message.emit("正在加载情感分析模型...") device = 0 if cuda_available() else -1 tokenizer = AutoTokenizer.from_pretrained(self.emotion_model_path) model = AutoModelForSequenceClassification.from_pretrained(self.emotion_model_path) # 尝试使用半精度浮点数减少内存占用 try: if device != -1: model = model.half() except Exception: pass # 如果失败则继续使用全精度 MODEL_CACHE['emotion_classifier'] = pipeline( "text-classification", model=model, tokenizer=tokenizer, device=device ) self.cached_models['emotion_classifier'] = MODEL_CACHE['emotion_classifier'] except Exception as e: raise Exception(f"模型加载失败: {str(e)}") def analyze_file(self, audio_file, keywords): """分析单个音频文件（优化内存使用）""" try: # 确保音频为WAV格式 wav_file, is_temp = self.convert_to_wav(audio_file) if is_temp: self.temp_files.append(wav_file) # 获取音频信息 duration, sample_rate, channels = self.get_audio_info(wav_file) # 说话人分离 - 使用较小的音频片段处理大文件 diarization = self.process_diarization(wav_file, duration) # 识别客服和客户 agent_segments, customer_segments = self.identify_speakers(wav_file, diarization, keywords['opening']) # 并行处理客服和客户音频 agent_result, customer_result = {}, {} with ThreadPoolExecutor(max_workers=2) as executor: agent_future = executor.submit( self.process_speaker_audio, wav_file, agent_segments, "客服" ) customer_future = executor.submit( self.process_speaker_audio, wav_file, customer_segments, "客户" ) agent_result = agent_future.result() customer_result = customer_future.result() # 情感分析 - 批处理提高效率 agent_emotion, customer_emotion = self.analyze_emotions( [agent_result.get('text', ''), customer_result.get('text', '')] ) # 服务规范检查 opening_check = self.check_opening(agent_result.get('text', ''), keywords['opening']) closing_check = self.check_closing(agent_result.get('text', ''), keywords['closing']) forbidden_check = self.check_forbidden(agent_result.get('text', ''), keywords['forbidden']) # 沟通技巧分析 speech_rate = self.analyze_speech_rate(agent_result.get('segments', [])) volume_analysis = self.analyze_volume(wav_file, agent_segments, sample_rate) # 问题解决率分析 resolution_rate = self.analyze_resolution( agent_result.get('text', ''), customer_result.get('text', ''), keywords['resolution'] ) return { "file_name": os.path.basename(audio_file), "duration": duration, "agent_text": agent_result.get('text', ''), "customer_text": customer_result.get('text', ''), "opening_check": opening_check, "closing_check": closing_check, "forbidden_check": forbidden_check, "agent_emotion": agent_emotion, "customer_emotion": customer_emotion, "speech_rate": speech_rate, "volume_mean": volume_analysis.get('mean', -60), "volume_std": volume_analysis.get('std', 0), "resolution_rate": resolution_rate } except Exception as e: error_msg = f"处理文件 {os.path.basename(audio_file)} 时出错: {str(e)}" self.error.emit(error_msg) logger.error(error_msg, exc_info=True) return None finally: # 清理临时文件 if is_temp and os.path.exists(wav_file): try: os.unlink(wav_file) except Exception: pass def process_diarization(self, wav_file, duration): """分块处理说话人分离，避免大文件内存溢出""" # 对于短音频直接处理 if duration <= 600: # 10分钟以下 return self.cached_models['pyannote'](wav_file) # 对于长音频分块处理 self.message.emit(f"音频较长({duration:.1f}秒)，将分块处理...") diarization_result = [] chunk_size = 300 # 5分钟块 for start in range(0, int(duration), chunk_size): if not self.running: return [] end = min(start + chunk_size, duration) self.message.emit(f"处理片段: {start}-{end}秒") # 提取音频片段 with tempfile.NamedTemporaryFile(suffix='.wav') as tmpfile: self.extract_audio_segment(wav_file, start, end, tmpfile.name) segment_diarization = self.cached_models['pyannote'](tmpfile.name) # 调整时间偏移 for segment, _, speaker in segment_diarization.itertracks(yield_label=True): diarization_result.append(( segment.start + start, segment.end + start, speaker )) return diarization_result def extract_audio_segment(self, input_file, start_sec, end_sec, output_file): """提取音频片段""" audio = AudioSegment.from_wav(input_file) start_ms = int(start_sec * 1000) end_ms = int(end_sec * 1000) segment = audio[start_ms:end_ms] segment.export(output_file, format="wav") def process_speaker_audio(self, wav_file, segments, speaker_type): """处理说话人音频（优化内存使用）""" if not segments: return {'text': "", 'segments': []} text = "" segment_details = [] whisper_model = self.cached_models['whisper'] # 处理每个片段 for idx, (start, end) in enumerate(segments): if not self.running: break # 每处理5个片段报告一次进度 if idx % 5 == 0: self.message.emit(f"{speaker_type}: 处理片段 {idx+1}/{len(segments)}") duration = end - start segment_text = self.transcribe_audio_segment(wav_file, start, end, whisper_model) segment_details.append({ 'start': start, 'end': end, 'duration': duration, 'text': segment_text }) text += segment_text + " " return { 'text': text.strip(), 'segments': segment_details } def identify_speakers(self, wav_file, diarization, opening_keywords): """ 改进的客服识别方法 1. 检查前三个片段是否有开场白关键词 2. 如果片段不足三个，则检查所有存在的片段 3. 如果无法确定客服，则默认第二个说话人是客服 """ if not diarization: return [], [] speaker_segments = defaultdict(list) speaker_first_occurrence = {} # 记录每个说话人的首次出现时间 # 收集所有说话人片段并记录首次出现时间 for item in diarization: if len(item) == 3: # 来自分块处理的结果 start, end, speaker = item else: # 来自pyannote的直接结果 segment, _, speaker = item start, end = segment.start, segment.end speaker_segments[speaker].append((start, end)) if speaker not in speaker_first_occurrence or start < speaker_first_occurrence[speaker]: speaker_first_occurrence[speaker] = start # 如果没有说话人 if not speaker_segments: return [], [] # 如果只有一个说话人 if len(speaker_segments) == 1: speaker = list(speaker_segments.keys())[0] return speaker_segments[speaker], [] # 计算每个说话人的开场白得分 speaker_scores = {} whisper_model = self.cached_models['whisper'] for speaker, segments in speaker_segments.items(): score = 0 # 检查前三个片段（如果存在） check_segments = segments[:3] # 最多取前三个片段 for start, end in check_segments: # 转录片段 text = self.transcribe_audio_segment(wav_file, start, end, whisper_model) # 检查开场白关键词 for keyword in opening_keywords: if keyword and keyword in text: score += 1 break # 找到一个关键词就加分并跳出循环 speaker_scores[speaker] = score # 尝试找出得分最高的说话人 max_score = max(speaker_scores.values()) max_speakers = [spk for spk, score in speaker_scores.items() if score == max_score] # 如果有唯一最高分说话人，作为客服 if len(max_speakers) == 1: agent_speaker = max_speakers[0] else: # 无法通过开场白确定客服时，默认第二个说话人是客服 # 按首次出现时间排序 sorted_speakers = sorted(speaker_first_occurrence.items(), key=lambda x: x[1]) # 确保至少有两个说话人 if len(sorted_speakers) >= 2: # 取时间上第二个出现的说话人 agent_speaker = sorted_speakers[1][0] else: # 如果只有一个说话人（理论上不会进入此分支，但安全处理） agent_speaker = sorted_speakers[0][0] # 分离客服和客户片段 agent_segments = speaker_segments[agent_speaker] customer_segments = [] for speaker, segments in speaker_segments.items(): if speaker != agent_speaker: customer_segments.extend(segments) return agent_segments, customer_segments def load_keywords(self): """从Excel文件加载关键词（增强健壮性）""" try: df = pd.read_excel(self.keyword_file) # 确保列存在 columns = ['opening', 'closing', 'forbidden', 'resolution'] for col in columns: if col not in df.columns: raise ValueError(f"关键词文件缺少必要列: {col}") keywords = { "opening": [str(k).strip() for k in df['opening'].dropna().tolist() if str(k).strip()], "closing": [str(k).strip() for k in df['closing'].dropna().tolist() if str(k).strip()], "forbidden": [str(k).strip() for k in df['forbidden'].dropna().tolist() if str(k).strip()], "resolution": [str(k).strip() for k in df['resolution'].dropna().tolist() if str(k).strip()] } # 检查是否有足够的关键词 if not any(keywords.values()): raise ValueError("关键词文件中没有找到有效关键词") return keywords except Exception as e: raise Exception(f"加载关键词文件失败: {str(e)}") def convert_to_wav(self, audio_file): """将音频文件转换为WAV格式（增强健壮性）""" try: if not os.path.exists(audio_file): raise FileNotFoundError(f"音频文件不存在: {audio_file}") if audio_file.lower().endswith('.wav'): return audio_file, False # 使用临时文件避免磁盘IO with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmpfile: output_file = tmpfile.name audio = AudioSegment.from_file(audio_file) audio.export(output_file, format='wav') return output_file, True except Exception as e: raise Exception(f"音频转换失败: {str(e)}") def get_audio_info(self, wav_file): """获取音频文件信息（增强健壮性）""" try: if not os.path.exists(wav_file): raise FileNotFoundError(f"音频文件不存在: {wav_file}") # 使用soundfile获取更可靠的信息 with sf.SoundFile(wav_file) as f: duration = len(f) / f.samplerate sample_rate = f.samplerate channels = f.channels return duration, sample_rate, channels except Exception as e: raise Exception(f"获取音频信息失败: {str(e)}") def transcribe_audio_segment(self, wav_file, start, end, model): """转录单个音频片段 - 优化内存使用""" # 使用pydub加载音频 audio = AudioSegment.from_wav(wav_file) # 转换为毫秒 start_ms = int(start * 1000) end_ms = int(end * 1000) segment_audio = audio[start_ms:end_ms] # 使用临时文件 with tempfile.NamedTemporaryFile(suffix='.wav') as tmpfile: segment_audio.export(tmpfile.name, format="wav") try: result = model.transcribe( tmpfile.name, fp16=cuda_available() # 使用FP16加速（如果可用） ) return result['text'] except RuntimeError as e: if "out of memory" in str(e).lower(): # 尝试释放内存后重试 torch.cuda.empty_cache() gc.collect() result = model.transcribe( tmpfile.name, fp16=cuda_available() ) return result['text'] raise def analyze_emotions(self, texts): """批量分析文本情感（提高效率）""" if not any(t.strip() for t in texts): return [{"label": "中性", "score": 0.0} for _ in texts] # 截断长文本以提高性能 processed_texts = [t[:500] if len(t) > 500 else t for t in texts] # 批量处理 classifier = self.cached_models['emotion_classifier'] results = classifier(processed_texts, truncation=True, max_length=512, batch_size=4) # 确保返回格式一致 emotions = [] for result in results: if isinstance(result, list) and result: emotions.append({ "label": result[0]['label'], "score": result[0]['score'] }) else: emotions.append({ "label": "中性", "score": 0.0 }) return emotions def check_opening(self, text, opening_keywords): """检查开场白（使用正则表达式提高准确性）""" if not text or not opening_keywords: return False pattern = "|".join(re.escape(k) for k in opening_keywords) return bool(re.search(pattern, text)) def check_closing(self, text, closing_keywords): """检查结束语（使用正则表达式提高准确性）""" if not text or not closing_keywords: return False pattern = "|".join(re.escape(k) for k in closing_keywords) return bool(re.search(pattern, text)) def check_forbidden(self, text, forbidden_keywords): """检查服务禁语（使用正则表达式提高准确性）""" if not text or not forbidden_keywords: return False pattern = "|".join(re.escape(k) for k in forbidden_keywords) return bool(re.search(pattern, text)) def analyze_speech_rate(self, segments): """改进的语速分析 - 基于实际识别文本""" if not segments: return 0 total_chars = 0 total_duration = 0 for segment in segments: # 计算片段时长（秒） duration = segment['duration'] total_duration += duration # 计算中文字符数（去除标点和空格） chinese_chars = sum(1 for char in segment['text'] if '\u4e00' <= char <= '\u9fff') total_chars += chinese_chars if total_duration == 0: return 0 # 语速 = 总字数 / 总时长(分钟) return total_chars / (total_duration / 60) def analyze_volume(self, wav_file, segments, sample_rate): """改进的音量分析 - 使用librosa计算RMS分贝值""" if not segments: return {"mean": -60, "std": 0} # 使用soundfile加载音频（更高效） try: y, sr = sf.read(wav_file, dtype='float32') if sr != sample_rate: y = librosa.resample(y, orig_sr=sr, target_sr=sample_rate) sr = sample_rate except Exception: # 回退到librosa y, sr = librosa.load(wav_file, sr=sample_rate, mono=True) all_dB = [] for start, end in segments: start_sample = int(start * sr) end_sample = int(end * sr) # 确保片段在有效范围内 if start_sample < len(y) and end_sample <= len(y): segment_audio = y[start_sample:end_sample] # 计算RMS并转换为dB rms = librosa.feature.rms(y=segment_audio)[0] dB = librosa.amplitude_to_db(rms, ref=1.0) # 使用标准参考值 all_dB.extend(dB) if not all_dB: return {"mean": -60, "std": 0} return { "mean": float(np.mean(all_dB)), "std": float(np.std(all_dB)) } def analyze_resolution(self, agent_text, customer_text, resolution_keywords): """分析问题解决率（使用更智能的匹配）""" # 检查客户是否提到问题 problem_patterns = [ "问题", "故障", "解决", "怎么办", "如何", "为什么", "不行", "不能", "无法", "错误", "bug", "issue", "疑问", "咨询" ] problem_regex = re.compile("|".join(problem_patterns)) has_problem = bool(problem_regex.search(customer_text)) # 检查客服是否提供解决方案 solution_regex = re.compile("|".join(re.escape(k) for k in resolution_keywords)) solution_found = bool(solution_regex.search(agent_text)) # 如果没有检测到问题，则认为已解决 if not has_problem: return True return solution_found def stop(self): """停止分析""" self.running = False self.message.emit("正在停止分析...") class MainWindow(QMainWindow): def init(self): super().init() self.setWindowTitle("外呼电话录音包质检分析系统") self.setGeometry(100, 100, 1000, 700) self.setStyleSheet(""" QMainWindow { background-color: #f0f0f0; } QGroupBox { font-weight: bold; border: 1px solid gray; border-radius: 5px; margin-top: 1ex; } QGroupBox::title { subcontrol-origin: margin; left: 10px; padding: 0 5px; } QPushButton { background-color: #4CAF50; color: white; border: none; padding: 5px 10px; border-radius: 3px; } QPushButton:hover { background-color: #45a049; } QPushButton:disabled { background-color: #cccccc; } QProgressBar { border: 1px solid grey; border-radius: 3px; text-align: center; } QProgressBar::chunk { background-color: #4CAF50; width: 10px; } QTextEdit { font-family: Consolas, Monaco, monospace; } """) # 初始化变量 self.audio_files = [] self.keyword_file = "" self.whisper_model_path = "./models/whisper-small" self.pyannote_model_path = "./models/pyannote-speaker-diarization" self.emotion_model_path = "./models/Erlangshen-Roberta-110M-Sentiment" self.output_dir = os.path.expanduser("~/质检报告") # 创建主控件 central_widget = QWidget() self.setCentralWidget(central_widget) main_layout = QVBoxLayout(central_widget) main_layout.setSpacing(10) main_layout.setContentsMargins(15, 15, 15, 15) # 文件选择区域 file_group = QGroupBox("文件选择") file_layout = QVBoxLayout(file_group) file_layout.setSpacing(8) # 音频文件选择 audio_layout = QHBoxLayout() self.audio_label = QLabel("音频文件/文件夹:") audio_layout.addWidget(self.audio_label) self.audio_path_edit = QLineEdit() self.audio_path_edit.setPlaceholderText("请选择音频文件或文件夹") audio_layout.addWidget(self.audio_path_edit, 3) self.audio_browse_btn = QPushButton("浏览...") self.audio_browse_btn.clicked.connect(self.browse_audio) audio_layout.addWidget(self.audio_browse_btn) file_layout.addLayout(audio_layout) # 关键词文件选择 keyword_layout = QHBoxLayout() self.keyword_label = QLabel("关键词文件:") keyword_layout.addWidget(self.keyword_label) self.keyword_path_edit = QLineEdit() self.keyword_path_edit.setPlaceholderText("请选择Excel格式的关键词文件") keyword_layout.addWidget(self.keyword_path_edit, 3) self.keyword_browse_btn = QPushButton("浏览...") self.keyword_browse_btn.clicked.connect(self.browse_keyword) keyword_layout.addWidget(self.keyword_browse_btn) file_layout.addLayout(keyword_layout) main_layout.addWidget(file_group) # 模型设置区域 model_group = QGroupBox("模型设置") model_layout = QVBoxLayout(model_group) model_layout.setSpacing(8) # Whisper模型路径 whisper_layout = QHBoxLayout() whisper_layout.addWidget(QLabel("Whisper模型路径:")) self.whisper_edit = QLineEdit(self.whisper_model_path) whisper_layout.addWidget(self.whisper_edit, 3) model_layout.addLayout(whisper_layout) # Pyannote模型路径 pyannote_layout = QHBoxLayout() pyannote_layout.addWidget(QLabel("Pyannote模型路径:")) self.pyannote_edit = QLineEdit(self.pyannote_model_path) pyannote_layout.addWidget(self.pyannote_edit, 3) model_layout.addLayout(pyannote_layout) # 情感分析模型路径 emotion_layout = QHBoxLayout() emotion_layout.addWidget(QLabel("情感分析模型路径:")) self.emotion_edit = QLineEdit(self.emotion_model_path) emotion_layout.addWidget(self.emotion_edit, 3) model_layout.addLayout(emotion_layout) # 输出目录 output_layout = QHBoxLayout() output_layout.addWidget(QLabel("输出目录:")) self.output_edit = QLineEdit(self.output_dir) self.output_edit.setPlaceholderText("请选择报告输出目录") output_layout.addWidget(self.output_edit, 3) self.output_browse_btn = QPushButton("浏览...") self.output_browse_btn.clicked.connect(self.browse_output) output_layout.addWidget(self.output_browse_btn) model_layout.addLayout(output_layout) main_layout.addWidget(model_group) # 控制按钮区域 control_layout = QHBoxLayout() control_layout.setSpacing(10) self.start_btn = QPushButton("开始分析") self.start_btn.setStyleSheet("background-color: #2196F3;") self.start_btn.clicked.connect(self.start_analysis) control_layout.addWidget(self.start_btn) self.stop_btn = QPushButton("停止分析") self.stop_btn.setStyleSheet("background-color: #f44336;") self.stop_btn.clicked.connect(self.stop_analysis) self.stop_btn.setEnabled(False) control_layout.addWidget(self.stop_btn) self.clear_btn = QPushButton("清空") self.clear_btn.clicked.connect(self.clear_all) control_layout.addWidget(self.clear_btn) main_layout.addLayout(control_layout) # 进度条 self.progress_bar = QProgressBar() self.progress_bar.setValue(0) self.progress_bar.setFormat("就绪") self.progress_bar.setMinimumHeight(25) main_layout.addWidget(self.progress_bar) # 日志输出区域 log_group = QGroupBox("分析日志") log_layout = QVBoxLayout(log_group) self.log_text = QTextEdit() self.log_text.setReadOnly(True) log_layout.addWidget(self.log_text) main_layout.addWidget(log_group, 1) # 给日志区域更多空间 # 状态区域 status_layout = QHBoxLayout() self.status_label = QLabel("状态: 就绪") status_layout.addWidget(self.status_label, 1) self.file_count_label = QLabel("已选择0个音频文件") status_layout.addWidget(self.file_count_label) main_layout.addLayout(status_layout) # 初始化分析线程 self.analysis_thread = None def browse_audio(self): """浏览音频文件或文件夹""" options = QFileDialog.Options() files, _ = QFileDialog.getOpenFileNames( self, "选择音频文件", "", "音频文件 (.mp3 .wav .amr .ogg .flac .m4a);;所有文件 ()", options=options ) if files: self.audio_files = files self.audio_path_edit.setText("; ".join(files)) self.file_count_label.setText(f"已选择{len(files)}个音频文件") self.log_text.append(f"已选择{len(files)}个音频文件") def browse_keyword(self): """浏览关键词文件""" options = QFileDialog.Options() file, _ = QFileDialog.getOpenFileName( self, "选择关键词文件", "", "Excel文件 (.xlsx .xls);;所有文件 ()", options=options ) if file: self.keyword_file = file self.keyword_path_edit.setText(file) self.log_text.append(f"已选择关键词文件: {file}") def browse_output(self): """浏览输出目录""" options = QFileDialog.Options() directory = QFileDialog.getExistingDirectory( self, "选择输出目录", self.output_dir, options=options ) if directory: self.output_dir = directory self.output_edit.setText(directory) self.log_text.append(f"输出目录设置为: {directory}") def start_analysis(self): """开始分析""" if not self.audio_files: self.show_warning("请先选择音频文件") return if not self.keyword_file: self.show_warning("请先选择关键词文件") return if not os.path.exists(self.keyword_file): self.show_warning("关键词文件不存在，请重新选择") return # 检查模型路径 model_paths = [ self.whisper_edit.text(), self.pyannote_edit.text(), self.emotion_edit.text() ] for path in model_paths: if not os.path.exists(path): self.show_warning(f"模型路径不存在: {path}") return # 更新模型路径 self.whisper_model_path = self.whisper_edit.text() self.pyannote_model_path = self.pyannote_edit.text() self.emotion_model_path = self.emotion_edit.text() self.output_dir = self.output_edit.text() # 创建输出目录 os.makedirs(self.output_dir, exist_ok=True) self.log_text.append("开始分析...") self.start_btn.setEnabled(False) self.stop_btn.setEnabled(True) self.status_label.setText("状态: 分析中...") self.progress_bar.setFormat("分析中... 0%") self.progress_bar.setValue(0) # 创建并启动分析线程 self.analysis_thread = AnalysisThread( self.audio_files, self.keyword_file, self.whisper_model_path, self.pyannote_model_path, self.emotion_model_path ) self.analysis_thread.progress.connect(self.update_progress) self.analysis_thread.message.connect(self.log_text.append) self.analysis_thread.analysis_complete.connect(self.on_analysis_complete) self.analysis_thread.error.connect(self.on_analysis_error) self.analysis_thread.finished.connect(self.on_analysis_finished) self.analysis_thread.start() def update_progress(self, value): """更新进度条""" self.progress_bar.setValue(value) self.progress_bar.setFormat(f"分析中... {value}%") def stop_analysis(self): """停止分析""" if self.analysis_thread and self.analysis_thread.isRunning(): self.analysis_thread.stop() self.log_text.append("正在停止分析...") self.stop_btn.setEnabled(False) def clear_all(self): """清空所有内容""" self.audio_files = [] self.keyword_file = "" self.audio_path_edit.clear() self.keyword_path_edit.clear() self.log_text.clear() self.progress_bar.setValue(0) self.progress_bar.setFormat("就绪") self.status_label.setText("状态: 就绪") self.file_count_label.setText("已选择0个音频文件") self.log_text.append("已清空所有内容") def show_warning(self, message): """显示警告消息""" QMessageBox.warning(self, "警告", message) self.log_text.append(f"警告: {message}") def on_analysis_complete(self, result): """分析完成处理""" try: self.log_text.append("正在生成报告...") if not result.get("results"): self.log_text.append("警告: 没有生成任何分析结果") return # 生成Excel报告 excel_path = os.path.join(self.output_dir, "质检分析报告.xlsx") self.generate_excel_report(result, excel_path) # 生成Word报告 word_path = os.path.join(self.output_dir, "质检分析报告.docx") self.generate_word_report(result, word_path) self.log_text.append(f"分析报告已保存至: {excel_path}") self.log_text.append(f"可视化报告已保存至: {word_path}") self.log_text.append("分析完成！") self.status_label.setText(f"状态: 分析完成！报告保存至: {self.output_dir}") self.progress_bar.setFormat("分析完成！") # 显示完成消息 QMessageBox.information( self, "分析完成", f"分析完成！报告已保存至:\n{excel_path}\n{word_path}" ) except Exception as e: import traceback error_msg = f"生成报告时出错: {str(e)}\n{traceback.format_exc()}" self.log_text.append(error_msg) logger.error(error_msg) def on_analysis_error(self, message): """分析错误处理""" self.log_text.append(f"错误: {message}") self.status_label.setText("状态: 发生错误") self.progress_bar.setFormat("发生错误") QMessageBox.critical(self, "分析错误", message) def on_analysis_finished(self): """分析线程结束处理""" self.start_btn.setEnabled(True) self.stop_btn.setEnabled(False) def generate_excel_report(self, result, output_path): """生成Excel报告（增强健壮性）""" try: # 从结果中提取数据 data = [] for res in result['results']: data.append({ "文件名": res['file_name'], "音频时长(秒)": res['duration'], "开场白检查": "通过" if res['opening_check'] else "未通过", "结束语检查": "通过" if res['closing_check'] else "未通过", "服务禁语检查": "通过" if not res['forbidden_check'] else "未通过", "客服情感": res['agent_emotion']['label'], "客服情感得分": res['agent_emotion']['score'], "客户情感": res['customer_emotion']['label'], "客户情感得分": res['customer_emotion']['score'], "语速(字/分)": res['speech_rate'], "平均音量(dB)": res['volume_mean'], "音量标准差": res['volume_std'], "问题解决率": "是" if res['resolution_rate'] else "否" }) # 创建DataFrame并保存 df = pd.DataFrame(data) # 尝试使用openpyxl引擎（更稳定） try: df.to_excel(output_path, index=False, engine='openpyxl') except ImportError: df.to_excel(output_path, index=False) # 添加汇总统计 try: with pd.ExcelWriter(output_path, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer: summary_data = { "统计项": ["总文件数", "开场白通过率", "结束语通过率", "服务禁语通过率", "问题解决率"], "数值": [ len(result['results']), df['开场白检查'].value_counts().get('通过', 0) / len(df), df['结束语检查'].value_counts().get('通过', 0) / len(df), df['服务禁语检查'].value_counts().get('通过', 0) / len(df), df['问题解决率'].value_counts().get('是', 0) / len(df) ] } summary_df = pd.DataFrame(summary_data) summary_df.to_excel(writer, sheet_name='汇总统计', index=False) except Exception as e: self.log_text.append(f"添加汇总统计时出错: {str(e)}") except Exception as e: raise Exception(f"生成Excel报告失败: {str(e)}") def generate_word_report(self, result, output_path): """生成Word报告（增强健壮性）""" try: doc = Document() # 添加标题 doc.add_heading('外呼电话录音质检分析报告', 0) # 添加基本信息 doc.add_heading('分析概况', level=1) doc.add_paragraph(f"分析时间: {time.strftime('%Y-%m-%d %H:%M:%S')}") doc.add_paragraph(f"分析文件数量: {len(result['results'])}") doc.add_paragraph(f"关键词文件: {os.path.basename(self.keyword_file)}") # 添加汇总统计 doc.add_heading('汇总统计', level=1) # 创建汇总表格 table = doc.add_table(rows=5, cols=2) table.style = 'Table Grid' # 表头 hdr_cells = table.rows[0].cells hdr_cells[0].text = '统计项' hdr_cells[1].text = '数值' # 计算统计数据 df = pd.DataFrame(result['results']) pass_rates = { "开场白通过率": df['opening_check'].mean() if not df.empty else 0, "结束语通过率": df['closing_check'].mean() if not df.empty else 0, "服务禁语通过率": (1 - df['forbidden_check']).mean() if not df.empty else 0, "问题解决率": df['resolution_rate'].mean() if not df.empty else 0 } # 填充表格 rows = [ ("总文件数", len(result['results'])), ("开场白通过率", f"{pass_rates['开场白通过率']:.2%}"), ("结束语通过率", f"{pass_rates['结束语通过率']:.2%}"), ("服务禁语通过率", f"{pass_rates['服务禁语通过率']:.2%}"), ("问题解决率", f"{pass_rates['问题解决率']:.2%}") ] for i, row_data in enumerate(rows): if i < len(table.rows): row_cells = table.rows[i].cells row_cells[0].text = row_data[0] row_cells[1].text = str(row_data[1]) # 添加情感分析图表 if result['results']: doc.add_heading('情感分析', level=1) # 客服情感分布 agent_emotions = [res['agent_emotion']['label'] for res in result['results']] agent_emotion_counts = pd.Series(agent_emotions).value_counts() if not agent_emotion_counts.empty: fig, ax = plt.subplots(figsize=(6, 4)) agent_emotion_counts.plot.pie(autopct='%1.1f%%', ax=ax) ax.set_title('客服情感分布') ax.set_ylabel('') # 移除默认的ylabel plt.tight_layout() # 保存图表到临时文件 chart_path = os.path.join(self.output_dir, "agent_emotion_chart.png") plt.savefig(chart_path, dpi=100, bbox_inches='tight') plt.close() doc.add_picture(chart_path, width=Inches(4)) doc.add_paragraph('图1: 客服情感分布') # 客户情感分布 customer_emotions = [res['customer_emotion']['label'] for res in result['results']] customer_emotion_counts = pd.Series(customer_emotions).value_counts() if not customer_emotion_counts.empty: fig, ax = plt.subplots(figsize=(6, 4)) customer_emotion_counts.plot.pie(autopct='%1.1f%%', ax=ax) ax.set_title('客户情感分布') ax.set_ylabel('') # 移除默认的ylabel plt.tight_layout() chart_path = os.path.join(self.output_dir, "customer_emotion_chart.png") plt.savefig(chart_path, dpi=100, bbox_inches='tight') plt.close() doc.add_picture(chart_path, width=Inches(4)) doc.add_paragraph('图2: 客户情感分布') # 添加详细分析结果 doc.add_heading('详细分析结果', level=1) # 创建详细表格 table = doc.add_table(rows=1, cols=6) table.style = 'Table Grid' # 表头 hdr_cells = table.rows[0].cells headers = ['文件名', '开场白', '结束语', '禁语', '客服情感', '问题解决'] for i, header in enumerate(headers): hdr_cells[i].text = header # 填充数据 for res in result['results']: row_cells = table.add_row().cells row_cells[0].text = res['file_name'] row_cells[1].text = "✓" if res['opening_check'] else "✗" row_cells[2].text = "✓" if res['closing_check'] else "✗" row_cells[3].text = "✗" if res['forbidden_check'] else "✓" row_cells[4].text = res['agent_emotion']['label'] row_cells[5].text = "✓" if res['resolution_rate'] else "✗" # 保存文档 doc.save(output_path) except Exception as e: raise Exception(f"生成Word报告失败: {str(e)}") if name == "main": # 检查是否安装了torch try: import torch except ImportError: print("警告: PyTorch 未安装，情感分析可能无法使用GPU加速") app = QApplication(sys.argv) # 设置应用样式 app.setStyle("Fusion") window = MainWindow() window.show() sys.exit(app.exec_())

- 使用临时文件处理音频片段，避免大文件占用内存 - 定期垃圾回收和CUDA缓存清理（如果可用） - 分块处理长音频文件（超过10分钟的音频） 2. **CPU支持**： - 所有模型都支持在CPU上运行（通过设置device参数...

检查代码并优化： import sys import os import json import time import wave import numpy as np import pandas as pd import matplotlib.pyplot as plt import soundfile as sf # 新增依赖，用于音频片段保存 from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas from PyQt5.QtWidgets import (QApplication, QMainWindow, QWidget, QVBoxLayout, QHBoxLayout, QPushButton, QLabel, QLineEdit, QTextEdit, QFileDialog, QProgressBar, QGroupBox, QComboBox, QCheckBox) from PyQt5.QtCore import QThread, pyqtSignal from pydub import AudioSegment from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification import whisper from pyannote.audio import Pipeline from docx import Document from docx.shared import Inches import librosa import tempfile from collections import defaultdict import re # 新增用于中文字符计数 class AnalysisThread(QThread): progress = pyqtSignal(int) message = pyqtSignal(str) analysis_complete = pyqtSignal(dict) error = pyqtSignal(str) def init(self, audio_files, keyword_file, whisper_model_path, pyannote_model_path, emotion_model_path): super().init() self.audio_files = audio_files self.keyword_file = keyword_file self.whisper_model_path = whisper_model_path self.pyannote_model_path = pyannote_model_path self.emotion_model_path = emotion_model_path self.running = True self.cached_models = {} def run(self): try: # 加载关键词 self.message.emit("正在加载关键词...") keywords = self.load_keywords() # 预加载模型 self.message.emit("正在预加载模型...") self.preload_models() results = [] total_files = len(self.audio_files) for idx, audio_file in enumerate(self.audio_files): if not self.running: self.message.emit("分析已停止") return self.message.emit(f"正在处理文件: {os.path.basename(audio_file)} ({idx+1}/{total_files})") file_result = self.analyze_file(audio_file, keywords) if file_result: results.append(file_result) self.progress.emit(int((idx + 1) / total_files * 100)) self.analysis_complete.emit({"results": results, "keywords": keywords}) self.message.emit("分析完成！") except Exception as e: import traceback self.error.emit(f"分析过程中发生错误: {str(e)}\n{traceback.format_exc()}") def preload_models(self): """预加载所有模型到缓存""" # 检查是否已加载模型 if hasattr(self, 'cached_models') and self.cached_models: return self.cached_models = {} # 加载语音识别模型 if 'whisper' not in self.cached_models: self.message.emit("正在加载语音识别模型...") self.cached_models['whisper'] = whisper.load_model(self.whisper_model_path) # 加载说话人分离模型 if 'pyannote' not in self.cached_models: self.message.emit("正在加载说话人分离模型...") self.cached_models['pyannote'] = Pipeline.from_pretrained(self.pyannote_model_path) # 加载情感分析模型 if 'emotion_classifier' not in self.cached_models: self.message.emit("正在加载情感分析模型...") tokenizer = AutoTokenizer.from_pretrained(self.emotion_model_path) model = AutoModelForSequenceClassification.from_pretrained(self.emotion_model_path) self.cached_models['emotion_classifier'] = pipeline( "text-classification", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1 # 使用GPU如果可用 ) def analyze_file(self, audio_file, keywords): """分析单个音频文件""" try: # 确保音频为WAV格式 wav_file = self.convert_to_wav(audio_file) # 获取音频信息 duration, sample_rate, channels = self.get_audio_info(wav_file) # 说话人分离 diarization = self.cached_models['pyannote'](wav_file) # 识别客服和客户（使用改进的方法） agent_segments, customer_segments = self.identify_speakers(wav_file, diarization, keywords['opening']) # 语音识别（使用优化后的方法） whisper_model = self.cached_models['whisper'] agent_text = self.transcribe_audio(wav_file, agent_segments, whisper_model) customer_text = self.transcribe_audio(wav_file, customer_segments, whisper_model) # 情感分析 emotion_classifier = self.cached_models['emotion_classifier'] agent_emotion = self.analyze_emotion(agent_text, emotion_classifier) customer_emotion = self.analyze_emotion(customer_text, emotion_classifier) # 服务规范检查 opening_check = self.check_opening(agent_text, keywords['opening']) closing_check = self.check_closing(agent_text, keywords['closing']) forbidden_check = self.check_forbidden(agent_text, keywords['forbidden']) # 沟通技巧分析（使用改进的方法） speech_rate = self.analyze_speech_rate(wav_file, agent_segments) volume_analysis = self.analyze_volume(wav_file, agent_segments) # 问题解决率分析 resolution_rate = self.analyze_resolution(agent_text, customer_text, keywords['resolution']) # 构建结果 return { "file_name": os.path.basename(audio_file), "duration": duration, "agent_text": agent_text, "customer_text": customer_text, "opening_check": opening_check, "closing_check": closing_check, "forbidden_check": forbidden_check, "agent_emotion": agent_emotion, "customer_emotion": customer_emotion, "speech_rate": speech_rate, "volume_mean": volume_analysis['mean'], "volume_std": volume_analysis['std'], "resolution_rate": resolution_rate } except Exception as e: self.error.emit(f"处理文件 {os.path.basename(audio_file)} 时出错: {str(e)}") return None def load_keywords(self): """从Excel文件加载关键词""" try: df = pd.read_excel(self.keyword_file) keywords = { "opening": [str(k).strip() for k in df['opening'].dropna().tolist()], "closing": [str(k).strip() for k in df['closing'].dropna().tolist()], "forbidden": [str(k).strip() for k in df['forbidden'].dropna().tolist()], "resolution": [str(k).strip() for k in df['resolution'].dropna().tolist()] } return keywords except Exception as e: raise Exception(f"加载关键词文件失败: {str(e)}") def convert_to_wav(self, audio_file): """将音频文件转换为WAV格式（如果需要）""" try: if not audio_file.lower().endswith('.wav'): # 使用临时文件避免磁盘IO with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmpfile: output_file = tmpfile.name audio = AudioSegment.from_file(audio_file) audio.export(output_file, format='wav') return output_file return audio_file except Exception as e: raise Exception(f"音频转换失败: {str(e)}") def get_audio_info(self, wav_file): """获取音频文件信息""" try: with wave.open(wav_file, 'rb') as wf: frames = wf.getnframes() rate = wf.getframerate() channels = wf.getnchannels() duration = frames / float(rate) return duration, rate, channels except Exception as e: raise Exception(f"获取音频信息失败: {str(e)}") def identify_speakers(self, wav_file, diarization, opening_keywords): """改进的客服识别方法 - 检查前三个片段是否有开场白关键词""" speaker_segments = defaultdict(list) for segment, _, speaker in diarization.itertracks(yield_label=True): speaker_segments[speaker].append((segment.start, segment.end)) # 如果没有说话人 if not speaker_segments: return [], [] # 如果只有一个说话人 if len(speaker_segments) == 1: speaker = list(speaker_segments.keys())[0] return speaker_segments[speaker], [] # 检查每个说话人的前三个片段是否有开场白 speaker_scores = {} whisper_model = self.cached_models['whisper'] for speaker, segments in speaker_segments.items(): score = 0 # 取前三个片段（或所有片段如果少于3个） check_segments = segments[:3] for start, end in check_segments: # 转录片段 text = self.transcribe_audio_segment(wav_file, [(start, end)], whisper_model) # 检查开场白关键词 for keyword in opening_keywords: if keyword and keyword in text: score += 1 break speaker_scores[speaker] = score # 找到得分最高的说话人作为客服 agent_speaker = max(speaker_scores, key=speaker_scores.get) agent_segments = [] customer_segments = [] for speaker, segments in speaker_segments.items(): if speaker == agent_speaker: agent_segments = segments else: customer_segments.extend(segments) return agent_segments, customer_segments def transcribe_audio_segment(self, wav_file, segments, model): """转录单个音频片段 - 用于客服识别""" if not segments: return "" # 使用pydub加载音频 audio = AudioSegment.from_wav(wav_file) start, end = segments[0] # 转换为毫秒 start_ms = int(start * 1000) end_ms = int(end * 1000) segment_audio = audio[start_ms:end_ms] # 使用临时文件 with tempfile.NamedTemporaryFile(suffix='.wav') as tmpfile: segment_audio.export(tmpfile.name, format="wav") result = model.transcribe(tmpfile.name) return result['text'] def transcribe_audio(self, wav_file, segments, model): """优化后的转录方法 - 按片段转录""" if not segments: return "" # 使用pydub加载音频 audio = AudioSegment.from_wav(wav_file) full_text = "" # 只处理指定片段 for start, end in segments: # 转换为毫秒 start_ms = int(start * 1000) end_ms = int(end * 1000) segment_audio = audio[start_ms:end_ms] # 使用临时文件避免内存占用 with tempfile.NamedTemporaryFile(suffix='.wav') as tmpfile: segment_audio.export(tmpfile.name, format="wav") result = model.transcribe(tmpfile.name) full_text += result['text'] + " " return full_text.strip() def analyze_emotion(self, text, classifier): """分析文本情感""" if not text.strip(): return {"label": "中性", "score": 0.0} # 截断长文本以提高性能 if len(text) > 500: text = text[:500] result = classifier(text, truncation=True, max_length=512) return { "label": result[0]['label'], "score": result[0]['score'] } def check_opening(self, text, opening_keywords): """检查开场白""" return any(keyword in text for keyword in opening_keywords if keyword) def check_closing(self, text, closing_keywords): """检查结束语""" return any(keyword in text for keyword in closing_keywords if keyword) def check_forbidden(self, text, forbidden_keywords): """检查服务禁语""" return any(keyword in text for keyword in forbidden_keywords if keyword) def analyze_speech_rate(self, wav_file, segments): """改进的语速分析 - 基于实际识别文本""" if not segments: return 0 # 加载音频 y, sr = librosa.load(wav_file, sr=None) total_chars = 0 total_duration = 0 whisper_model = self.cached_models['whisper'] for start, end in segments: # 计算片段时长（秒） duration = end - start total_duration += duration # 转录片段 text = self.transcribe_audio_segment(wav_file, [(start, end)], whisper_model) # 计算中文字符数（去除标点和空格） chinese_chars = sum(1 for char in text if '\u4e00' <= char <= '\u9fff') total_chars += chinese_chars if total_duration == 0: return 0 # 语速 = 总字数 / 总时长(分钟) return total_chars / (total_duration / 60) def analyze_volume(self, wav_file, segments): """改进的音量分析 - 使用librosa计算RMS分贝值""" if not segments: return {"mean": -60, "std": 0} # 加载音频 y, sr = librosa.load(wav_file, sr=None) all_dB = [] for start, end in segments: start_sample = int(start * sr) end_sample = int(end * sr) segment_audio = y[start_sample:end_sample] # 计算RMS并转换为dB rms = librosa.feature.rms(y=segment_audio)[0] dB = librosa.amplitude_to_db(rms, ref=np.max) all_dB.extend(dB) if not all_dB: return {"mean": -60, "std": 0} return { "mean": float(np.mean(all_dB)), "std": float(np.std(all_dB)) } def analyze_resolution(self, agent_text, customer_text, resolution_keywords): """分析问题解决率""" return any(keyword in agent_text for keyword in resolution_keywords if keyword) def stop(self): """停止分析""" self.running = False class MainWindow(QMainWindow): def init(self): super().init() self.setWindowTitle("外呼电话录音包质检分析系统") self.setGeometry(100, 100, 1000, 700) # 初始化变量 self.audio_files = [] self.keyword_file = "" self.whisper_model_path = "./models/whisper-small" self.pyannote_model_path = "./models/pyannote-speaker-diarization" self.emotion_model_path = "./models/Erlangshen-Roberta-110M-Sentiment" self.output_dir = "./reports" # 创建主控件 central_widget = QWidget() self.setCentralWidget(central_widget) main_layout = QVBoxLayout(central_widget) # 文件选择区域 file_group = QGroupBox("文件选择") file_layout = QVBoxLayout(file_group) # 音频文件选择 audio_layout = QHBoxLayout() self.audio_label = QLabel("音频文件/文件夹:") audio_layout.addWidget(self.audio_label) self.audio_path_edit = QLineEdit() audio_layout.addWidget(self.audio_path_edit) self.audio_browse_btn = QPushButton("浏览...") self.audio_browse_btn.clicked.connect(self.browse_audio) audio_layout.addWidget(self.audio_browse_btn) file_layout.addLayout(audio_layout) # 关键词文件选择 keyword_layout = QHBoxLayout() self.keyword_label = QLabel("关键词文件:") keyword_layout.addWidget(self.keyword_label) self.keyword_path_edit = QLineEdit() keyword_layout.addWidget(self.keyword_path_edit) self.keyword_browse_btn = QPushButton("浏览...") self.keyword_browse_btn.clicked.connect(self.browse_keyword) keyword_layout.addWidget(self.keyword_browse_btn) file_layout.addLayout(keyword_layout) main_layout.addWidget(file_group) # 模型设置区域 model_group = QGroupBox("模型设置") model_layout = QVBoxLayout(model_group) # Whisper模型路径 whisper_layout = QHBoxLayout() whisper_layout.addWidget(QLabel("Whisper模型路径:")) self.whisper_edit = QLineEdit(self.whisper_model_path) whisper_layout.addWidget(self.whisper_edit) model_layout.addLayout(whisper_layout) # Pyannote模型路径 pyannote_layout = QHBoxLayout() pyannote_layout.addWidget(QLabel("Pyannote模型路径:")) self.pyannote_edit = QLineEdit(self.pyannote_model_path) pyannote_layout.addWidget(self.pyannote_edit) model_layout.addLayout(pyannote_layout) # 情感分析模型路径 emotion_layout = QHBoxLayout() emotion_layout.addWidget(QLabel("情感分析模型路径:")) self.emotion_edit = QLineEdit(self.emotion_model_path) emotion_layout.addWidget(self.emotion_edit) model_layout.addLayout(emotion_layout) # 输出目录 output_layout = QHBoxLayout() output_layout.addWidget(QLabel("输出目录:")) self.output_edit = QLineEdit(self.output_dir) output_layout.addWidget(self.output_edit) self.output_browse_btn = QPushButton("浏览...") self.output_browse_btn.clicked.connect(self.browse_output) output_layout.addWidget(self.output_browse_btn) model_layout.addLayout(output_layout) main_layout.addWidget(model_group) # 控制按钮区域 control_layout = QHBoxLayout() self.start_btn = QPushButton("开始分析") self.start_btn.clicked.connect(self.start_analysis) control_layout.addWidget(self.start_btn) self.stop_btn = QPushButton("停止分析") self.stop_btn.clicked.connect(self.stop_analysis) self.stop_btn.setEnabled(False) control_layout.addWidget(self.stop_btn) self.clear_btn = QPushButton("清空") self.clear_btn.clicked.connect(self.clear_all) control_layout.addWidget(self.clear_btn) main_layout.addLayout(control_layout) # 进度条 self.progress_bar = QProgressBar() self.progress_bar.setValue(0) main_layout.addWidget(self.progress_bar) # 日志输出区域 log_group = QGroupBox("分析日志") log_layout = QVBoxLayout(log_group) self.log_text = QTextEdit() self.log_text.setReadOnly(True) log_layout.addWidget(self.log_text) main_layout.addWidget(log_group) # 状态区域 status_layout = QHBoxLayout() self.status_label = QLabel("就绪") status_layout.addWidget(self.status_label) self.file_count_label = QLabel("已选择0个音频文件") status_layout.addWidget(self.file_count_label) main_layout.addLayout(status_layout) # 初始化分析线程 self.analysis_thread = None def browse_audio(self): """浏览音频文件或文件夹""" options = QFileDialog.Options() files, _ = QFileDialog.getOpenFileNames( self, "选择音频文件", "", "音频文件 (.mp3 .wav .amr .ogg .flac);;所有文件 ()", options=options ) if files: self.audio_files = files self.audio_path_edit.setText("; ".join(files)) self.file_count_label.setText(f"已选择{len(files)}个音频文件") self.log_text.append(f"已选择{len(files)}个音频文件") def browse_keyword(self): """浏览关键词文件""" options = QFileDialog.Options() file, _ = QFileDialog.getOpenFileName( self, "选择关键词文件", "", "Excel文件 (.xlsx .xls);;所有文件 (*)", options=options ) if file: self.keyword_file = file self.keyword_path_edit.setText(file) self.log_text.append(f"已选择关键词文件: {file}") def browse_output(self): """浏览输出目录""" options = QFileDialog.Options() directory = QFileDialog.getExistingDirectory( self, "选择输出目录", "", options=options ) if directory: self.output_dir = directory self.output_edit.setText(directory) self.log_text.append(f"输出目录设置为: {directory}") def start_analysis(self): """开始分析""" if not self.audio_files: self.log_text.append("错误: 请先选择音频文件") return if not self.keyword_file: self.log_text.append("错误: 请先选择关键词文件") return # 更新模型路径 self.whisper_model_path = self.whisper_edit.text() self.pyannote_model_path = self.pyannote_edit.text() self.emotion_model_path = self.emotion_edit.text() self.output_dir = self.output_edit.text() # 创建输出目录 os.makedirs(self.output_dir, exist_ok=True) self.log_text.append("开始分析...") self.start_btn.setEnabled(False) self.stop_btn.setEnabled(True) self.status_label.setText("分析中...") self.progress_bar.setValue(0) # 创建并启动分析线程 self.analysis_thread = AnalysisThread( self.audio_files, self.keyword_file, self.whisper_model_path, self.pyannote_model_path, self.emotion_model_path ) self.analysis_thread.progress.connect(self.progress_bar.setValue) self.analysis_thread.message.connect(self.log_text.append) self.analysis_thread.analysis_complete.connect(self.on_analysis_complete) self.analysis_thread.error.connect(self.on_analysis_error) self.analysis_thread.finished.connect(self.on_analysis_finished) self.analysis_thread.start() def stop_analysis(self): """停止分析""" if self.analysis_thread and self.analysis_thread.isRunning(): self.analysis_thread.stop() self.log_text.append("正在停止分析...") self.stop_btn.setEnabled(False) def clear_all(self): """清空所有内容""" self.audio_files = [] self.keyword_file = "" self.audio_path_edit.clear() self.keyword_path_edit.clear() self.log_text.clear() self.progress_bar.setValue(0) self.status_label.setText("就绪") self.file_count_label.setText("已选择0个音频文件") self.log_text.append("已清空所有内容") def on_analysis_complete(self, result): """分析完成处理""" try: self.log_text.append("正在生成报告...") if not result.get("results"): self.log_text.append("警告: 没有生成任何分析结果") return # 生成Excel报告 excel_path = os.path.join(self.output_dir, "质检分析报告.xlsx") self.generate_excel_report(result, excel_path) # 生成Word报告 word_path = os.path.join(self.output_dir, "质检分析报告.docx") self.generate_word_report(result, word_path) self.log_text.append(f"分析报告已保存至: {excel_path}") self.log_text.append(f"可视化报告已保存至: {word_path}") self.log_text.append("分析完成！") self.status_label.setText(f"分析完成！报告保存至: {self.output_dir}") except Exception as e: import traceback self.log_text.append(f"生成报告时出错: {str(e)}\n{traceback.format_exc()}") def on_analysis_error(self, message): """分析错误处理""" self.log_text.append(f"错误: {message}") self.status_label.setText("发生错误") def on_analysis_finished(self): """分析线程结束处理""" self.start_btn.setEnabled(True) self.stop_btn.setEnabled(False) def generate_excel_report(self, result, output_path): """生成Excel报告""" # 从结果中提取数据 data = [] for res in result['results']: data.append({ "文件名": res['file_name'], "音频时长(秒)": res['duration'], "开场白检查": "通过" if res['opening_check'] else "未通过", "结束语检查": "通过" if res['closing_check'] else "未通过", "服务禁语检查": "通过" if not res['forbidden_check'] else "未通过", "客服情感": res['agent_emotion']['label'], "客服情感得分": res['agent_emotion']['score'], "客户情感": res['customer_emotion']['label'], "客户情感得分": res['customer_emotion']['score'], "语速(字/分)": res['speech_rate'], "平均音量(dB)": res['volume_mean'], "音量标准差": res['volume_std'], "问题解决率": "是" if res['resolution_rate'] else "否" }) # 创建DataFrame并保存 df = pd.DataFrame(data) df.to_excel(output_path, index=False) # 添加汇总统计 try: with pd.ExcelWriter(output_path, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer: summary_data = { "统计项": ["总文件数", "开场白通过率", "结束语通过率", "服务禁语通过率", "问题解决率"], "数值": [ len(result['results']), df['开场白检查'].value_counts().get('通过', 0) / len(df), df['结束语检查'].value_counts().get('通过', 0) / len(df), df['服务禁语检查'].value_counts().get('通过', 0) / len(df), df['问题解决率'].value_counts().get('是', 0) / len(df) ] } summary_df = pd.DataFrame(summary_data) summary_df.to_excel(writer, sheet_name='汇总统计', index=False) except Exception as e: self.log_text.append(f"添加汇总统计时出错: {str(e)}") def generate_word_report(self, result, output_path): """生成Word报告""" doc = Document() # 添加标题 doc.add_heading('外呼电话录音质检分析报告', 0) # 添加基本信息 doc.add_heading('分析概况', level=1) doc.add_paragraph(f"分析时间: {time.strftime('%Y-%m-%d %H:%M:%S')}") doc.add_paragraph(f"分析文件数量: {len(result['results'])}") doc.add_paragraph(f"关键词文件: {os.path.basename(self.keyword_file)}") # 添加汇总统计 doc.add_heading('汇总统计', level=1) # 创建汇总表格 table = doc.add_table(rows=5, cols=2) table.style = 'Table Grid' # 表头 hdr_cells = table.rows[0].cells hdr_cells[0].text = '统计项' hdr_cells[1].text = '数值' # 计算统计数据 df = pd.DataFrame(result['results']) pass_rates = { "开场白通过率": df['opening_check'].mean() if not df.empty else 0, "结束语通过率": df['closing_check'].mean() if not df.empty else 0, "服务禁语通过率": (1 - df['forbidden_check']).mean() if not df.empty else 0, "问题解决率": df['resolution_rate'].mean() if not df.empty else 0 } # 填充表格 rows = [ ("总文件数", len(result['results'])), ("开场白通过率", f"{pass_rates['开场白通过率']:.2%}"), ("结束语通过率", f"{pass_rates['结束语通过率']:.2%}"), ("服务禁语通过率", f"{pass_rates['服务禁语通过率']:.2%}"), ("问题解决率", f"{pass_rates['问题解决率']:.2%}") ] for i, row_data in enumerate(rows): if i < len(table.rows): row_cells = table.rows[i].cells row_cells[0].text = row_data[0] row_cells[1].text = str(row_data[1]) # 添加情感分析图表 if result['results']: doc.add_heading('情感分析', level=1) # 客服情感分布 agent_emotions = [res['agent_emotion']['label'] for res in result['results']] agent_emotion_counts = pd.Series(agent_emotions).value_counts() if not agent_emotion_counts.empty: fig, ax = plt.subplots(figsize=(6, 4)) agent_emotion_counts.plot.pie(autopct='%1.1f%%', ax=ax) ax.set_title('客服情感分布') plt.tight_layout() # 保存图表到临时文件 chart_path = os.path.join(self.output_dir, "agent_emotion_chart.png") plt.savefig(chart_path, dpi=100) plt.close() doc.add_picture(chart_path, width=Inches(4)) doc.add_paragraph('图1: 客服情感分布') # 客户情感分布 customer_emotions = [res['customer_emotion']['label'] for res in result['results']] customer_emotion_counts = pd.Series(customer_emotions).value_counts() if not customer_emotion_counts.empty: fig, ax = plt.subplots(figsize=(6, 4)) customer_emotion_counts.plot.pie(autopct='%1.1f%%', ax=ax) ax.set_title('客户情感分布') plt.tight_layout() chart_path = os.path.join(self.output_dir, "customer_emotion_chart.png") plt.savefig(chart_path, dpi=100) plt.close() doc.add_picture(chart_path, width=Inches(4)) doc.add_paragraph('图2: 客户情感分布') # 添加详细分析结果 doc.add_heading('详细分析结果', level=1) # 创建详细表格 table = doc.add_table(rows=1, cols=6) table.style = 'Table Grid' # 表头 hdr_cells = table.rows[0].cells headers = ['文件名', '开场白', '结束语', '禁语', '客服情感', '问题解决'] for i, header in enumerate(headers): hdr_cells[i].text = header # 填充数据 for res in result['results']: row_cells = table.add_row().cells row_cells[0].text = res['file_name'] row_cells[1].text = "✓" if res['opening_check'] else "✗" row_cells[2].text = "✓" if res['closing_check'] else "✗" row_cells[3].text = "✗" if res['forbidden_check'] else "✓" row_cells[4].text = res['agent_emotion']['label'] row_cells[5].text = "✓" if res['resolution_rate'] else "✗" # 保存文档 doc.save(output_path) if name == "main": # 检查是否安装了torch try: import torch except ImportError: print("警告: PyTorch 未安装，情感分析可能无法使用GPU加速") app = QApplication(sys.argv) window = MainWindow()

在convert_to_wav方法中，我们创建了一个临时文件，但是在转换完成后，我们返回了临时文件的路径。然而，这个临时文件在函数返回后会被删除（因为设置了delete=True，这是默认值）。这会导致后续处理时文件不...

非常好！我们将在前文的基础上，重新整理一套完整、清晰、适合地质行业电脑小白用户的 AI 工作流部署方案。该方案将基于： - Windows 11 + WSL2 - Docker Desktop（Linux 容器模式） - Ollama（本地大模型） - RAGFlow（文档知识库 + 向量检索） - Dify（可视化 AI 工作流） - Python 脚本（自动生成 Word 报告） --- ## 📁 部署目标结构 D:\AI_Studio\ ├── apps\ # 所有应用程序安装目录 │ ├── docker\ # Docker Desktop 安装路径 │ ├── python\ # Python 安装路径 │ └── ollama\ # Ollama 原生可执行文件（备选） │ ├── data\ # 数据存储目录 │ ├── models\ # 模型缓存（如 Ollama 模型） │ ├── ragflow_data\ # RAGFlow 数据目录 │ ├── dify_data\ # Dify 数据目录 │ ├── docker\ # Docker 数据卷 │ └── reports\ # Word 报告模板与输出 │ ├── templates\ # Word 模板 │ └── output\ # 自动生成的报告 │ ├── installer\ # 安装包统一存放目录 │ ├── Docker Desktop Installer.exe │ ├── ollama-windows-amd64.exe │ └── python-3.11.9-amd64.exe │ └── scripts\ # 自定义脚本目录 └── generate_report.py # 自动生成 Word 报告的 Python 脚本 --- ## 🔧 第一步：准备系统环境 ### ✅ 1. 启用 WSL2（无需手动安装 Ubuntu）以管理员身份打开 PowerShell： powershell # 启用 WSL 功能并安装默认发行版（Ubuntu） wsl --install # 设置默认版本为 WSL2 wsl --set-default-version 2 # 查看已安装的发行版 wsl --list --verbose > ⚠️ 如果未自动安装 Ubuntu，运行： powershell wsl --install -d Ubuntu 安装完成后设置用户名和密码。 --- ## 🐳 第二步：安装 Docker Desktop（集成 WSL2） ### ✅ 1. 下载安装包放入： D:\AI_Studio\installer\Docker%20Desktop%20Installer.exe ### ✅ 2. 安装 Docker Desktop 双击运行安装程序，并选择安装路径为： D:\AI_Studio\apps\docker > 💡 如果安装程序不支持更改路径，可以先安装到默认位置，再移动文件夹并创建软链接。 ### ✅ 3. 设置 Docker 数据卷路径编辑或新建配置文件： C:\ProgramData\Docker\config\daemon.json 写入内容： json { "data-root": "D:/AI_Studio/data/docker" } 重启 Docker 服务生效。 ### ✅ 4. 切换容器类型为 Linux 模式右键任务栏 Docker Desktop 图标 → Switch to Linux containers --- ## 🧠 第三步：安装 Ollama（推荐使用 Docker 模式） ### ✅ 方法一：使用 Docker 运行 Ollama（推荐） powershell docker run -d -p 11434:11434 -v D:/AI_Studio/data/models:/root/.ollama infiniflow/ollama:latest 拉取模型： powershell ollama pull llama3 ### ✅ 方法二：使用原生 Windows 版本（备选）将下载好的 ollama-windows-amd64.exe 放入： D:\AI_Studio\apps\ollama 重命名为 ollama.exe，并在 PowerShell 中运行： powershell cd D:\AI_Studio\apps\ollama .\ollama.exe serve --- ## 📚 第四步：部署 RAGFlow（基于 Docker） powershell mkdir D:\AI_Studio\data\ragflow_data docker run -d --name ragflow -p 8080:8080 -v D:/AI_Studio/data/ragflow_data:/data -e LOCAL_MODE=1 -e MODEL_PROVIDER=ollama -e OLLAMA_BASE_URL=http://host.docker.internal:11434 -e DEFAULT_VICUNA_MODEL=llama3 infiniflow/ragflow:latest 访问： http://localhost:8080 --- ## 🔧 第五步：部署 Dify（基于 Docker） powershell mkdir D:\AI_Studio\data\dify_data docker run -d --name dify -p 3000:3000 -v D:/AI_Studio/data/dify_data:/app/data -e DATABASE_URL=file:///app/data/dify.db -e OLLAMA_API_BASE=http://host.docker.internal:11434 -e MODEL_PROVIDER=ollama chatchat/oneclick:latest 访问： http://localhost:3000 --- ## 🐍 第六步：安装 Python（用于自动化脚本） ### ✅ 安装建议版本：Python 3.11.x 下载地址： 🔗 [https://www.python.org/downloads/windows/](https://www.python.org/downloads/windows/) 安装时请勾选： - ✅ Add Python to PATH（添加到系统环境变量）修改安装路径为： D:\AI_Studio\apps\python ### ✅ 安装常用库 powershell pip install docxtpl requests python-docx pandas flask langchain --- ## 📝 第七步：编写自动化生成 Word 报告脚本保存路径：D:\AI_Studio\scripts\generate_report.py python from docxtpl import DocxTemplate import os def generate_report(template_name, context): template_path = os.path.join("D:\\AI_Studio\\data\\reports\\templates", template_name) doc = DocxTemplate(template_path) doc.render(context) output_path = os.path.join("D:\\AI_Studio\\data\\reports\\output", f"report_{context['project']}.docx") doc.save(output_path) print(f"Report saved to {output_path}") # 示例调用 generate_report( "geology_template.docx", { "project": "某矿区勘探项目", "location": "新疆某地", "conclusion": "发现高品位铜矿带" } ) --- ## 🔁 第八步：整合工作流（RAGFlow + Dify + Python 报告生成）你可以通过 Dify 构建如下 AI 工作流： 1. 用户上传 PDF 地质资料 2. Dify 调用 RAGFlow 提取关键信息（如构造、岩性、矿体等） 3. 输出结构化 JSON 数据 4. 传入 Python 脚本填充 Word 模板 5. 自动生成 Word 报告并归档至指定路径 --- ## ✅ 总结部署步骤一览表 | 步骤 | 操作说明 | |------|----------| | 1 | 创建完整目录结构：installer, apps, data, scripts | | 2 | 启用 WSL2 并安装 Ubuntu 子系统 | | 3 | 安装 Docker Desktop 到 D:\AI_Studio\apps\docker | | 4 | 设置 Docker 数据卷路径为 D:\AI_Studio\data\docker | | 5 | 使用 Docker 安装并运行 Ollama | | 6 | 设置 Ollama 模型缓存路径为 D:\AI_Studio\data\models | | 7 | 使用 Docker 部署 RAGFlow 到 D:\AI_Studio\data\ragflow_data | | 8 | 使用 Docker 部署 Dify 到 D:\AI_Studio\data\dify_data | | 9 | 安装 Python 到 D:\AI_Studio\apps\python | | 10 | 编写 Python 脚本实现 Word 报告自动生成 | | 11 | 使用 Dify 构建 AI 工作流，整合 RAGFlow 和 Word 生成模块 | --- ##电脑小白自动安装版

7. 安装Python并配置环境 8. 部署报告生成脚本注意：由于涉及系统级操作，该脚本需要以管理员权限运行。我们将编写一个PowerShell脚本，因为它在Windows上功能强大且易于执行。 ### 自动安装脚本：D:\AI_Studio\...

C#类库封装：简化SDK调用实现多功能集成，构建地磅无人值守系统

内容概要：本文介绍了利用C#类库封装多个硬件设备的SDK接口，实现一系列复杂功能的一键式调用。具体功能包括身份证信息读取、人证识别、车牌识别（支持臻识和海康摄像头）、LED显示屏文字输出、称重数据读取、二维码扫描以及语音播报。所有功能均被封装为简单的API，极大降低了开发者的工作量和技术门槛。文中详细展示了各个功能的具体实现方式及其应用场景，如身份证读取、人证核验、车牌识别等，并最终将这些功能整合到一起，形成了一套完整的地磅称重无人值守系统解决方案。适合人群：具有一定C#编程经验的技术人员，尤其是需要快速集成多种硬件设备SDK的应用开发者。使用场景及目标：适用于需要高效集成多种硬件设备SDK的项目，特别是那些涉及身份验证、车辆管理、物流仓储等领域的企业级应用。通过使用这些封装好的API，可以大大缩短开发周期，降低维护成本，提高系统的稳定性和易用性。其他说明：虽然封装后的API极大地简化了开发流程，但对于一些特殊的业务需求，仍然可能需要深入研究底层SDK。此外，在实际部署过程中，还需考虑网络环境、硬件兼容性等因素的影响。

基于STM32F1的BLDC无刷直流电机与PMSM永磁同步电机源码解析：传感器与无传感器驱动详解

基于STM32F1的BLDC无刷直流电机和PMSM永磁同步电机的驱动实现方法，涵盖了有传感器和无传感两种驱动方式。对于BLDC电机，有传感器部分采用霍尔传感器进行六步换相，无传感部分则利用反电动势过零点检测实现换相。对于PMSM电机，有传感器部分包括霍尔传感器和编码器的方式，无传感部分则采用了滑模观测器进行矢量控制（FOC）。文中不仅提供了详细的代码片段，还分享了许多调试经验和技巧。适合人群：具有一定嵌入式系统和电机控制基础知识的研发人员和技术爱好者。使用场景及目标：适用于需要深入了解和实现BLDC和PMSM电机驱动的开发者，帮助他们掌握不同传感器条件下的电机控制技术和优化方法。其他说明：文章强调了实际调试过程中可能遇到的问题及其解决方案，如霍尔传感器的中断触发换相、反电动势过零点检测的采样时机、滑模观测器的参数调整以及编码器的ABZ解码等。

基于Java的跨平台图像处理软件ImageJ：多功能图像编辑与分析工具

内容概要：本文介绍了基于Java的图像处理软件ImageJ，详细阐述了它的跨平台特性、多线程处理能力及其丰富的图像处理功能。ImageJ由美国国立卫生研究院开发，能够在多种操作系统上运行，包括Windows、Mac OS、Linux等。它支持多种图像格式，如TIFF、PNG、GIF、JPEG、BMP、DICOM、FITS等，并提供图像栈功能，允许多个图像在同一窗口中进行并行处理。此外，ImageJ还提供了诸如缩放、旋转、扭曲、平滑处理等基本操作，以及区域和像素统计、间距、角度计算等高级功能。这些特性使ImageJ成为科研、医学、生物等多个领域的理想选择。适合人群：需要进行图像处理的专业人士，如科研人员、医生、生物学家，以及对图像处理感兴趣的普通用户。使用场景及目标：适用于需要高效处理大量图像数据的场合，特别是在科研、医学、生物学等领域。用户可以通过ImageJ进行图像的编辑、分析、处理和保存，提高工作效率。其他说明：ImageJ不仅功能强大，而且操作简单，用户无需安装额外的运行环境即可直接使用。其基于Java的开发方式确保了不同操作系统之间的兼容性和一致性。

MATLAB语音识别系统：基于GUI的数字0-9识别及深度学习模型应用 · GUI v1.2

内容概要：本文介绍了一款基于MATLAB的语音识别系统，主要功能是识别数字0到9。该系统采用图形用户界面（GUI），方便用户操作，并配有详尽的代码注释和开发报告。文中详细描述了系统的各个组成部分，包括音频采集、信号处理、特征提取、模型训练和预测等关键环节。此外，还讨论了MATLAB在此项目中的优势及其面临的挑战，如提高识别率和处理背景噪音等问题。最后，通过对各模块的工作原理和技术细节的总结，为未来的研究和发展提供了宝贵的参考资料。适合人群：对语音识别技术和MATLAB感兴趣的初学者、学生或研究人员。使用场景及目标：适用于希望深入了解语音识别技术原理的人群，特别是希望通过实际案例掌握MATLAB编程技巧的学习者。目标是在实践中学习如何构建简单的语音识别应用程序。其他说明：该程序需要MATLAB 2019b及以上版本才能正常运行，建议使用者确保软件环境符合要求。

word无法创建工作文件，请检查临时环境变量。

相关推荐

word无法创建工作文件 请检查临时环境变量

无法创建工作文件，请检查临时环境变量

word无法创建工作文件,请检查临时环境变量

Word无法创建工作文件，请检查临时环境变量

word无法创建工作文件

快速解决Word安全模式的批处理文件教程

创建互动文档：Excel与Word通过VBA连接

【POI高手进阶】：Word变量替换技巧与性能提升大揭秘

性能杀手警示：Java循环中创建临时字符串的规避策略

【高级技巧】：Win32com Shell库创建和管理文件快捷方式的秘籍

STM32H7XX-CubeMX个性化启动文件创建：项目定制化一步到位

dify生成word工作流

C#类库封装：简化SDK调用实现多功能集成，构建地磅无人值守系统

基于STM32F1的BLDC无刷直流电机与PMSM永磁同步电机源码解析：传感器与无传感器驱动详解

基于Java的跨平台图像处理软件ImageJ：多功能图像编辑与分析工具

MATLAB语音识别系统：基于GUI的数字0-9识别及深度学习模型应用 · GUI v1.2

大家在看

基于 ADS9110的隔离式数据采集 (DAQ) 系统方案（待编辑）-电路方案

自动化图书管理系统 v7.0

真正的VB6.0免安装，可以装U盘启动了

详细说明 VC++的MFC开发串口调试助手源代码,包括数据发送,接收,显示制式等29782183com

文档编码批量转换UTF16toUTF8.rar

最新推荐

C#类库封装：简化SDK调用实现多功能集成，构建地磅无人值守系统

基于STM32F1的BLDC无刷直流电机与PMSM永磁同步电机源码解析：传感器与无传感器驱动详解

基于Java的跨平台图像处理软件ImageJ：多功能图像编辑与分析工具

MATLAB语音识别系统：基于GUI的数字0-9识别及深度学习模型应用 · GUI v1.2

Teleport Pro教程：轻松复制网站内容

【跨平台开发者的必读】：解决Qt5Widgetsd.lib目标计算机类型冲突终极指南

普通RNN结构和特点

探讨通用数据连接池的核心机制与应用

【LabVIEW网络通讯终极指南】：7个技巧提升UDP性能和安全性

简要介绍cnn卷积神经网络

word无法创建工作文件请检查临时环境变量