用Flask实现的简单语音识别应用示例-CSDN博客

本文链接：https://blog.csdn.net/umut9/article/details/149472967

以下是一个使用Flask实现的简单语音识别应用示例，结合了讯飞语音识别API。请确保已安装所需的Python库：

from flask import Flask, render_template, request, jsonify
import wave
import time
import os
import requests

app = Flask(__name__)
app.config['UPLOAD_FOLDER'] = 'uploads'
app.config['ALLOWED_EXTENSIONS'] = {'wav'}

# 讯飞API参数（需替换为您的真实密钥）
XUNFEI_APPID = 'your_app_id'
XUNFEI_API_KEY = 'your_api_key'
XUNFEI_API_SECRET = 'your_api_secret'
XUNFEI_URL = 'https://api.xfyun.cn/v1/service/v1/speech/transcribe'

def allowed_file(filename):
    return '.' in filename and filename.rsplit('.', 1)[1].lower() in app.config['ALLOWED_EXTENSIONS']

@app.route('/')
def index():
    return render_template('index.html')

@app.route('/upload', methods=['POST'])
def upload():
    if 'file' not in request.files:
        return jsonify({'error': 'No file part'}), 400
    file = request.files['file']
    if file.filename == '':
        return jsonify({'error': 'No selected file'}), 400
    if file and allowed_file(file.filename):
        filename = f"record_{int(time.time())}.wav"
        filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
        file.save(filepath)
        return jsonify({'success': True, 'file_path': filepath})
    else:
        return jsonify({'error': 'Invalid file type'}), 400

@app.route('/recognize', methods=['POST'])
def recognize():
    data = request.json
    file_path = data.get('file_path')
    text_input = data.get('text_input')
    
    if not file_path or not text_input:
        return jsonify({'error': 'Missing parameters'}), 400
    
    # 讯飞语音识别
    with open(file_path, 'rb') as f:
        audio_content = f.read()
    
    headers = {
        'X-Appid': XUNFEI_APPID,
        'X-CurTime': str(int(time.time())),
        'X-Param': 'src_type=wav&file_len={}&sample_rate=16000'.format(len(audio_content)),
        'X-CheckSum': hashlib.md5(audio_content).hexdigest(),
        'Content-Type': 'application/octet-stream'
    }
    
    response = requests.post(XUNFEI_URL, headers=headers, data=audio_content)
    result = response.json()
    
    if 'data' in result:
        recognized_text = result['data']
        # 计算准确率
        accuracy = calculate_accuracy(text_input, recognized_text)
        return jsonify({'recognized_text': recognized_text, 'accuracy': accuracy})
    else:
        return jsonify({'error': 'Speech recognition failed'}), 500

def calculate_accuracy(text_input, recognized_text):
    input_words = text_input.split()
    recognized_words = recognized_text.split()
    correct = 0
    for word in input_words:
        if word in recognized_words:
            correct += 1
    return (correct / len(input_words)) * 100 if input_words else 0.0

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=5000)

HTML前端（templates/index.html）：

<!DOCTYPE html>
<html lang="zh-CN">
<head>
    <meta charset="UTF-8">
    <title>语音识别准确率测试</title>
    <style>
        body { font-family: Arial, sans-serif; padding: 20px; }
        .container { max-width: 600px; margin: auto; }
        input[type="text"] { width: 100%; padding: 10px; margin-bottom: 15px; }
        button { padding: 10px 20px; background-color: #4CAF50; color: white; border: none; cursor: pointer; }
        #result { margin-top: 20px; }
    </style>
</head>
<body>
    <div class="container">
        <h1>语音识别准确率测试</h1>
        <input type="text" id="textInput" placeholder="请输入参考文本">
        <button id="recordButton">开始录制</button>
        <div id="result"></div>
    </div>

    <script>
        const recordButton = document.getElementById('recordButton');
        const textInput = document.getElementById('textInput');
        const resultDiv = document.getElementById('result');

        recordButton.addEventListener('click', async () => {
            if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
                resultDiv.innerHTML = '<p>浏览器不支持录音功能</p>';
                return;
            }

            try {
                const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
                const chunks = [];
                const mediaRecorder = new MediaRecorder(stream);

                mediaRecorder.ondataavailable = (e) => chunks.push(e.data);
                mediaRecorder.onstop = async () => {
                    const blob = new Blob(chunks, { type: 'audio/wav' });
                    const formData = new FormData();
                    formData.append('file', blob, 'record.wav');

                    fetch('/upload', {
                        method: 'POST',
                        body: formData
                    })
                    .then(res => res.json())
                    .then(data => {
                        if (data.success) {
                            fetch('/recognize', {
                                method: 'POST',
                                headers: { 'Content-Type': 'application/json' },
                                body: JSON.stringify({
                                    file_path: data.file_path,
                                    text_input: textInput.value
                                })
                            })
                            .then(res => res.json())
                            .then(res => {
                                if (res.recognized_text) {
                                    const accuracy = Math.round(res.accuracy);
                                    resultDiv.innerHTML = `<p>识别结果: ${res.recognized_text}</p>
                                    <p>准确率: ${accuracy}%</p>`;
                                } else {
                                    resultDiv.innerHTML = '<p>语音识别失败</p>';
                                }
                            });
                        } else {
                            resultDiv.innerHTML = '<p>文件上传失败</p>';
                        }
                    });
                };

                mediaRecorder.start();
                recordButton.disabled = true;
                recordButton.textContent = '正在录制...';

                setTimeout(() => {
                    mediaRecorder.stop();
                    stream.getTracks().forEach(track => track.stop());
                    recordButton.disabled = false;
                    recordButton.textContent = '开始录制';
                }, 5000); // 录制5秒
            } catch (error) {
                resultDiv.innerHTML = `<p>错误: ${error.message}</p>`;
            }
        });
    </script>
</body>
</html>

使用说明

配置讯飞API：在代码中替换XUNFEI_APPID、XUNFEI_API_KEY、XUNFEI_API_SECRET为您的实际密钥。

运行应用：

pip install flask requests
python app.py

访问页面：打开浏览器访问http://127.0.0.1:5000（或局域网IP地址）。
操作流程：
- 在文本框中输入参考文本。
- 点击“开始录制”按钮（需浏览器允许麦克风权限）。
- 录制完成后自动上传音频并显示识别结果和准确率。