运行 import cv2
import mediapipe as mp
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd
from datetime import datetime
import math
from scipy.spatial import distance as dist
import librosa
import noisereduce as nr
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from pydub import AudioSegment
import wave
import contextlib
import joblib
# 初始化MediaPipe模型
mp_pose = mp.solutions.pose
mp_drawing = mp.solutions.drawing_utils
pose = mp_pose.Pose(
min_detection_confidence=0.5,
min_tracking_confidence=0.5,
model_complexity=2
)
# 姿态类型定义
POSTURE_TYPES = [
"双臂放下", "单手指点", "双手指点", "双手合掌",
"左侧身", "右侧身", "背身", "上举手臂", "无人"
]
# ====================== 姿态分析模块(含自动学习) ======================
class PostureAnalyzer:
def __init__(self, auto_learn=True):
self.posture_history = []
self.current_posture = "未知"
self.posture_timers = {p: 0 for p in POSTURE_TYPES}
self.last_posture_time = 0
self.transition_count = 0
self.shoulder_instability = 0
self.leg_instability = 0
self.auto_learn = auto_learn
# 姿态定义阈值(初始值)
self.POSTURE_THRESHOLDS = {
"双臂放下": {"shoulder_angle": (160, 200), "elbow_angle": (160, 200)},
"单手指点": {"elbow_angle": (60, 120), "wrist_height": 0.7},
"双手指点": {"elbow_angle": (60, 120), "wrist_distance": 0.2},
"双手合掌": {"wrist_distance": (0.05, 0.15), "hand_height": (0.3, 0.7)},
"左侧身": {"shoulder_hip_angle": (70, 110), "hip_knee_angle": (160, 200)},
"右侧身": {"shoulder_hip_angle": (70, 110), "hip_knee_angle": (160, 200)},
"背身": {"visibility": (0.5, 1.0)},
"上举手臂": {"elbow_angle": (30, 90), "wrist_height": 0.8}
}
# 自动学习数据结构
self.posture_features = {p: [] for p in POSTURE_TYPES}
self.posture_classifier = None
self.scaler = StandardScaler()
# 检查并加载预训练模型
# model_path = "models/posture/posture_model.joblib"
# if os.path.exists(model_path):
# self.load_learning_model(model_path)
# elif auto_learn:
# print("未找到预训练姿态模型,将从零开始学习...")
# else:
# print("警告: 未找到姿态模型且未启用自动学习")
# 加载已有的学习模型(如果存在)
if os.path.exists("posture_model.joblib"):
self.load_learning_model()
def save_learning_model(self):
"""保存学习到的模型"""
model_data = {
'classifier': self.posture_classifier,
'scaler': self.scaler,
'thresholds': self.POSTURE_THRESHOLDS
}
joblib.dump(model_data, "posture_model.joblib")
print("姿态模型已保存")
# def load_learning_model(self, model_path="posture_model.joblib"):
# """加载学习到的模型"""
# model_data = joblib.load(model_path)
# self.posture_classifier = model_data['classifier']
# self.scaler = model_data['scaler']
# self.POSTURE_THRESHOLDS = model_data['thresholds']
# print("姿态模型已加载")
def load_learning_model(self):
"""加载学习到的模型"""
model_data = joblib.load("posture_model.joblib")
self.posture_classifier = model_data['classifier']
self.scaler = model_data['scaler']
self.POSTURE_THRESHOLDS = model_data['thresholds']
print("姿态模型已加载")
def train_classifier(self):
"""训练姿态分类器"""
# 准备训练数据
X = []
y = []
for posture, features_list in self.posture_features.items():
if len(features_list) > 10: # 确保有足够样本
for features in features_list:
X.append(features)
y.append(POSTURE_TYPES.index(posture))
if len(X) < 100: # 样本不足
print("样本不足,无法训练分类器")
return
# 数据标准化
X = self.scaler.fit_transform(X)
# 训练测试分割
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)
# 创建并训练分类器
self.posture_classifier = KMeans(n_clusters=len(POSTURE_TYPES), random_state=42)
self.posture_classifier.fit(X_train)
# 评估模型
train_score = self.posture_classifier.score(X_train)
test_score = self.posture_classifier.score(X_test)
print(f"姿态分类器训练完成 - 训练得分: {train_score:.2f}, 测试得分: {test_score:.2f}")
# 保存模型
self.save_learning_model()
def extract_features(self, keypoints):
"""从关键点提取特征向量"""
# 获取必要的关键点
left_shoulder = keypoints[mp_pose.PoseLandmark.LEFT_SHOULDER.value]
right_shoulder = keypoints[mp_pose.PoseLandmark.RIGHT_SHOULDER.value]
left_elbow = keypoints[mp_pose.PoseLandmark.LEFT_ELBOW.value]
right_elbow = keypoints[mp_pose.PoseLandmark.RIGHT_ELBOW.value]
left_wrist = keypoints[mp_pose.PoseLandmark.LEFT_WRIST.value]
right_wrist = keypoints[mp_pose.PoseLandmark.RIGHT_WRIST.value]
left_hip = keypoints[mp_pose.PoseLandmark.LEFT_HIP.value]
right_hip = keypoints[mp_pose.PoseLandmark.RIGHT_HIP.value]
left_knee = keypoints[mp_pose.PoseLandmark.LEFT_KNEE.value]
right_knee = keypoints[mp_pose.PoseLandmark.RIGHT_KNEE.value]
# 计算特征
features = [
# 肩部特征
abs(left_shoulder[1] - right_shoulder[1]), # 肩部不平度
dist.euclidean((left_shoulder[0], left_shoulder[1]),
(right_shoulder[0], right_shoulder[1])), # 肩宽
# 手臂特征
self.calculate_angle(left_shoulder, left_elbow, left_wrist), # 左肘角度
self.calculate_angle(right_shoulder, right_elbow, right_wrist), # 右肘角度
left_wrist[1], # 左手腕高度
right_wrist[1], # 右手腕高度
dist.euclidean((left_wrist[0], left_wrist[1]),
(right_wrist[0], right_wrist[1])), # 手腕间距离
# 身体特征
self.calculate_angle(left_shoulder, left_hip, left_knee), # 左髋角度
self.calculate_angle(right_shoulder, right_hip, right_knee), # 右髋角度
abs(left_hip[1] - right_hip[1]), # 髋部不平度
abs(left_knee[1] - right_knee[1]), # 膝盖不平度
]
return features
def calculate_angle(self, a, b, c):
"""计算三个点之间的角度"""
radians = np.arctan2(c[1] - b[1], c[0] - b[0]) - np.arctan2(a[1] - b[1], a[0] - b[0])
angle = np.abs(radians * 180.0 / np.pi)
return angle if angle < 180 else 360 - angle
def analyze_frame(self, frame, timestamp):
"""分析视频帧中的姿态"""
# 转换颜色空间并处理
image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
results = pose.process(image)
if not results.pose_landmarks:
self.current_posture = "无人"
return None
# 提取关键点坐标
landmarks = results.pose_landmarks.landmark
keypoints = {}
for idx, landmark in enumerate(landmarks):
keypoints[idx] = (landmark.x, landmark.y, landmark.visibility)
# 姿态识别
if self.posture_classifier and self.auto_learn:
# 使用学习到的分类器
features = self.extract_features(keypoints)
scaled_features = self.scaler.transform([features])
posture_idx = self.posture_classifier.predict(scaled_features)[0]
posture = POSTURE_TYPES[posture_idx]
else:
# 使用基于规则的识别
posture = self.detect_posture(keypoints)
# 收集样本用于学习
if self.auto_learn:
features = self.extract_features(keypoints)
self.posture_features[posture].append(features)
# 定期训练分类器
if len(self.posture_features[posture]) % 50 == 0:
self.train_classifier()
# 姿态变化检测
if posture != self.current_posture:
self.transition_count += 1
self.current_posture = posture
self.last_posture_time = timestamp
# 更新姿态持续时间
self.posture_timers[posture] += 1
# 肩部和腿部稳定性分析
self.analyze_stability(keypoints, timestamp)
return results
def detect_posture(self, keypoints):
"""根据关键点检测具体姿态(基于规则)"""
# 获取必要的关键点
left_shoulder = keypoints[mp_pose.PoseLandmark.LEFT_SHOULDER.value]
right_shoulder = keypoints[mp_pose.PoseLandmark.RIGHT_SHOULDER.value]
left_elbow = keypoints[mp_pose.PoseLandmark.LEFT_ELBOW.value]
right_elbow = keypoints[mp_pose.PoseLandmark.RIGHT_ELBOW.value]
left_wrist = keypoints[mp_pose.PoseLandmark.LEFT_WRIST.value]
right_wrist = keypoints[mp_pose.PoseLandmark.RIGHT_WRIST.value]
# 1. 检测手臂位置
if left_wrist[1] < 0.3 and right_wrist[1] < 0.3:
return "上举手臂"
# 2. 检测单手/双手指点
if self.is_pointing_gesture(left_elbow, left_wrist):
return "单手指点" if not self.is_pointing_gesture(right_elbow, right_wrist) else "双手指点"
# 3. 检测双手合掌
if dist.euclidean((left_wrist[0], left_wrist[1]), (right_wrist[0], right_wrist[1])) < 0.1:
return "双手合掌"
# 4. 检测身体朝向
body_orientation = self.detect_body_orientation(keypoints)
if body_orientation != "正面":
return body_orientation
# 默认姿态
return "双臂放下"
def analyze_stability(self, keypoints, timestamp):
"""分析肩部和腿部的稳定性"""
# 肩部不平度计算
left_shoulder = keypoints[mp_pose.PoseLandmark.LEFT_SHOULDER.value]
right_shoulder = keypoints[mp_pose.PoseLandmark.RIGHT_SHOULDER.value]
shoulder_diff = abs(left_shoulder[1] - right_shoulder[1])
if shoulder_diff > 0.08: # 阈值
self.shoulder_instability += 1
# 腿部晃动检测
left_hip = keypoints[mp_pose.PoseLandmark.LEFT_HIP.value]
right_hip = keypoints[mp_pose.PoseLandmark.RIGHT_HIP.value]
left_knee = keypoints[mp_pose.PoseLandmark.LEFT_KNEE.value]
right_knee = keypoints[mp_pose.PoseLandmark.RIGHT_KNEE.value]
hip_diff = abs(left_hip[1] - right_hip[1])
knee_diff = abs(left_knee[1] - right_knee[1])
if hip_diff > 0.1 or knee_diff > 0.15:
self.leg_instability += 1
# ====================== 语音分析模块(本地处理) ======================
class SpeechAnalyzer:
def __init__(self, reference_text, auto_adapt=True):
self.reference_text = reference_text
self.phoneme_accuracy = []
self.pronunciation_model = None
self.auto_adapt = auto_adapt
self.adaptive_model_path = "pronunciation_model.h5"
# 检查并加载预训练模型
# model_path = "models/speech/pronunciation_model.h5"
# if os.path.exists(model_path):
# self.pronunciation_model = tf.keras.models.load_model(model_path)
# print("已加载预训练发音模型")
# 加载预训练模型
if os.path.exists(self.adaptive_model_path):
self.pronunciation_model = tf.keras.models.load_model(self.adaptive_model_path)
print("已加载自适应发音模型")
def save_adaptive_model(self):
"""保存自适应发音模型"""
if self.pronunciation_model:
self.pronunciation_model.save(self.adaptive_model_path)
print("自适应发音模型已保存")
def extract_mfcc(self, audio_path, n_mfcc=13):
"""提取音频的MFCC特征"""
try:
# 读取音频文件
y, sr = librosa.load(audio_path, sr=None)
# 降噪
y = nr.reduce_noise(y=y, sr=sr)
# 提取MFCC特征
mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
# 计算均值和标准差
mfcc_mean = np.mean(mfcc, axis=1)
mfcc_std = np.std(mfcc, axis=1)
return np.concatenate([mfcc_mean, mfcc_std])
except Exception as e:
print(f"音频处理错误: {str(e)}")
return None
def build_pronunciation_model(self, input_dim):
"""构建发音评估模型"""
model = models.Sequential([
layers.Dense(64, activation='relu', input_shape=(input_dim,)),
layers.Dropout(0.3),
layers.Dense(32, activation='relu'),
layers.Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy'])
return model
def train_adaptive_model(self, new_features, new_labels):
"""训练自适应发音模型"""
# 如果没有模型,创建一个新模型
if not self.pronunciation_model:
input_dim = len(new_features[0])
self.pronunciation_model = self.build_pronunciation_model(input_dim)
# 转换为numpy数组
new_features = np.array(new_features)
new_labels = np.array(new_labels)
# 训练模型
self.pronunciation_model.fit(
new_features,
new_labels,
epochs=10,
batch_size=16,
validation_split=0.2,
verbose=0
)
# 保存模型
self.save_adaptive_model()
def analyze_audio(self, audio_path):
"""分析音频文件并计算发音准确率"""
# 分割音频为单个字符(这里简化为按固定时间分割)
char_audio_segments = self.split_audio_by_chars(audio_path, self.reference_text)
# 分析每个字符的发音
for i, (segment_path, char) in enumerate(char_audio_segments):
# 提取特征
features = self.extract_mfcc(segment_path)
if features is None:
continue
# 评估发音准确率
if self.pronunciation_model and self.auto_adapt:
# 使用自适应模型
prediction = self.pronunciation_model.predict(np.array([features]))[0][0]
is_correct = 1 if prediction > 0.7 else 0
else:
# 使用基于规则的方法(简化为随机)
is_correct = 1 if np.random.random() > 0.3 else 0
# 记录结果
status = "正确" if is_correct == 1 else "错误"
self.phoneme_accuracy.append((status, char))
# 收集样本用于自适应学习
if self.auto_adapt:
# 在实际应用中,这里需要专家标注
# 简化为假设前20%是正确发音
label = 1 if i < len(self.reference_text) * 0.2 else 0
# 定期更新模型
if i % 10 == 0:
self.train_adaptive_model([features], [label])
return self.reference_text
def split_audio_by_chars(self, audio_path, text):
"""将音频分割为单个字符(简化版)"""
# 在实际应用中,这里需要使用语音对齐算法
# 这里简化为按字符平均分割
# 获取音频长度
with contextlib.closing(wave.open(audio_path, 'r')) as f:
frames = f.getnframes()
rate = f.getframerate()
duration = frames / float(rate)
# 计算每个字符的持续时间
char_duration = duration / len(text)
# 分割音频
segments = []
audio = AudioSegment.from_wav(audio_path)
for i, char in enumerate(text):
start = i * char_duration * 1000 # 转换为毫秒
end = (i + 1) * char_duration * 1000
segment = audio[start:end]
# 保存临时文件
segment_path = f"char_{i}.wav"
segment.export(segment_path, format="wav")
segments.append((segment_path, char))
return segments
# ====================== 报告生成模块 ======================
class ReportGenerator:
def __init__(self, posture_analyzer, speech_analyzer, video_duration):
self.posture = posture_analyzer
self.speech = speech_analyzer
self.video_duration = video_duration
def generate_report(self):
"""生成完整的教学分析报告"""
report = {
"基本统计": self.basic_statistics(),
"姿态分析": self.posture_analysis(),
"语音分析": self.speech_analysis(),
"教学行为分析": self.teaching_behavior_analysis(),
"改进建议": self.suggestions()
}
# 生成可视化图表
self.generate_visualizations()
return report
def basic_statistics(self):
"""生成基本统计数据"""
return {
"分析日期": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"视频时长(秒)": self.video_duration,
"姿态变化总次数": self.posture.transition_count,
"平均姿态变化频率(次/分钟)": self.posture.transition_count / (self.video_duration / 60),
"肩部不平总时长(秒)": self.posture.shoulder_instability / 30, # 假设30fps
"腿部不稳总时长(秒)": self.posture.leg_instability / 30
}
def posture_analysis(self):
"""生成姿态分析数据"""
posture_percentage = {}
total_frames = sum(self.posture.posture_timers.values())
for posture, count in self.posture.posture_timers.items():
percentage = (count / total_frames) * 100
posture_percentage[posture] = f"{percentage:.2f}%"
return posture_percentage
def speech_analysis(self):
"""生成语音分析数据"""
total_chars = len(self.speech.phoneme_accuracy)
correct_chars = sum(1 for status, _ in self.speech.phoneme_accuracy if status == "正确")
accuracy = (correct_chars / total_chars) * 100 if total_chars > 0 else 0
return {
"总字数": total_chars,
"正确字数": correct_chars,
"普通话准确率": f"{accuracy:.2f}%",
"详细发音分析": self.speech.phoneme_accuracy[:50] # 仅显示前50个字符
}
def teaching_behavior_analysis(self):
"""生成教学行为分析"""
# 基于姿态数据推断教学行为
pointing_time = (self.posture.posture_timers["单手指点"] +
self.posture.posture_timers["双手指点"]) / 30
writing_estimate = self.posture.posture_timers["背身"] / 30 * 0.7 # 假设70%背身时间是板书
return {
"板书行为(秒)": writing_estimate,
"讲授行为(秒)": (self.posture.posture_timers["双臂放下"] +
self.posture.posture_timers["单手指点"]) / 30,
"问答行为(次)": self.posture.posture_timers["双手合掌"] / 30 * 2, # 估计值
"教学激励行为": "高" if self.posture.posture_timers["上举手臂"] > 100 else "中",
"课堂组织效率": self.calculate_classroom_organization(),
"教学活动多样性": self.calculate_activity_diversity(),
"教学技能评分": self.calculate_teaching_skill(),
"时间分配合理性": self.calculate_time_distribution(),
"安全教学指数": self.calculate_safety_index()
}
def calculate_classroom_organization(self):
"""计算课堂组织效率"""
# 基于姿态变化频率和语音同步情况
posture_changes = self.posture.transition_count
speech_char_count = len(self.speech.phoneme_accuracy)
if speech_char_count == 0:
return 0
# 简化的效率计算公式
efficiency = min(100, 80 + (posture_changes / speech_char_count) * 20)
return f"{efficiency:.1f}/100"
def generate_visualizations(self):
"""生成可视化图表"""
# 姿态分布饼图
plt.figure(figsize=(10, 7))
postures = list(self.posture.posture_timers.keys())
counts = [self.posture.posture_timers[p] for p in postures]
plt.pie(counts, labels=postures, autopct='%1.1f%%')
plt.title('教师姿态分布')
plt.savefig('posture_distribution.png')
# 发音准确率柱状图
plt.figure(figsize=(12, 6))
status_counts = {
"正确": sum(1 for s, _ in self.speech.phoneme_accuracy if s == "正确"),
"错误": sum(1 for s, _ in self.speech.phoneme_accuracy if s == "错误")
}
plt.bar(status_counts.keys(), status_counts.values())
plt.title('发音准确率分析')
plt.savefig('pronunciation_accuracy.png')
# 教学行为时间分配图
plt.figure(figsize=(12, 6))
behaviors = {
"板书": self.posture.posture_timers["背身"] / 30 * 0.7,
"讲授": (self.posture.posture_timers["双臂放下"] +
self.posture.posture_timers["单手指点"]) / 30,
"互动": self.posture.posture_timers["双手合掌"] / 30 * 2,
"激励": self.posture.posture_timers["上举手臂"] / 30
}
plt.bar(behaviors.keys(), behaviors.values())
plt.title('教学行为时间分配')
plt.ylabel('时间(秒)')
plt.savefig('teaching_behaviors.png')
def suggestions(self):
"""生成改进建议"""
suggestions = []
# 姿态相关建议
if self.posture.shoulder_instability / 30 > 60: # 超过60秒
suggestions.append("肩部不平时间较长,建议注意保持肩部平衡")
if self.posture.posture_timers["背身"] / 30 > 120: # 超过2分钟
suggestions.append("背向学生时间过长,建议增加面向学生的时间")
if self.posture.posture_timers["上举手臂"] / 30 < 30: # 少于30秒
suggestions.append("教学激励行为不足,建议增加手势互动")
# 语音相关建议
correct_count = sum(1 for s, _ in self.speech.phoneme_accuracy if s == "正确")
accuracy = correct_count / len(self.speech.phoneme_accuracy) * 100 if len(
self.speech.phoneme_accuracy) > 0 else 0
if accuracy < 90:
suggestions.append(f"普通话准确率({accuracy:.1f}%)有待提高,建议加强发音练习")
# 教学行为建议
writing_time = self.posture.posture_timers["背身"] / 30 * 0.7
teaching_time = (self.posture.posture_timers["双臂放下"] +
self.posture.posture_timers["单手指点"]) / 30
if writing_time > teaching_time * 0.5:
suggestions.append("板书时间过长,建议平衡板书与讲解的比例")
return suggestions
# ====================== 主执行流程 ======================
def main(video_path, audio_path, reference_text):
# 初始化分析器
posture_analyzer = PostureAnalyzer(auto_learn=True)
speech_analyzer = SpeechAnalyzer(reference_text, auto_adapt=True)
print("开始视频分析...")
# 处理视频
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
print("无法打开视频文件")
return
fps = cap.get(cv2.CAP_PROP_FPS)
frame_count = 0
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
while cap.isOpened():
success, frame = cap.read()
if not success:
break
# 显示进度
if frame_count % 100 == 0:
print(f"视频分析进度: {frame_count}/{total_frames} ({frame_count / total_frames * 100:.1f}%)")
# 每隔5帧分析一次(提高性能)
if frame_count % 5 == 0:
timestamp = frame_count / fps
posture_analyzer.analyze_frame(frame, timestamp)
frame_count += 1
video_duration = frame_count / fps
cap.release()
print("视频分析完成")
print("开始音频分析...")
# 处理音频
recognized_text = speech_analyzer.analyze_audio(audio_path)
print("音频分析完成")
# 生成报告
report_generator = ReportGenerator(posture_analyzer, speech_analyzer, video_duration)
report = report_generator.generate_report()
# 保存报告
with open("teaching_analysis_report.txt", "w", encoding="utf-8") as f:
f.write("=============== 教师授课分析报告 ===============\n\n")
for section, content in report.items():
f.write(f"=== {section} ===\n")
if isinstance(content, dict):
for k, v in content.items():
f.write(f"{k}: {v}\n")
elif isinstance(content, list):
for item in content:
f.write(f"- {item}\n")
else:
f.write(f"{content}\n")
f.write("\n")
print("分析报告已生成: teaching_analysis_report.txt")
print("可视化图表已保存: posture_distribution.png, pronunciation_accuracy.png, teaching_behaviors.png")
if __name__ == "__main__":
# 配置参数
# 视频
VIDEO_PATH = "D:/java/桌面资源/666/11.mp4"
# 音频
AUDIO_PATH = "D:/java/桌面资源/666/11.wav"
# 稿子
REFERENCE_TEXT = "这里是教师的标准讲稿文本,用于发音对比..."
main(VIDEO_PATH, AUDIO_PATH, REFERENCE_TEXT) 出现 INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
WARNING: All log messages before absl::InitializeLog() is called are written to STDERR
W0000 00:00:1752563404.663794 28056 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
Process finished with exit code -1073741819 (0xC0000005)
最新发布