# coding=utf-8
# WELCOME TO MING BRO's WORLD
# THE MEETING TIME 2024/12/17 16:26
import pyttsx3
import whisper
import speech_recognition as sr
import time
import numpy as np
import pyaudio
import wave
from tqdm import tqdm
def speak(audioString):
print(audioString)
pyttsx3.speak(audioString)
engine = pyttsx3.init()
engine.say("你是谁")
engine.runAndWait()
speak("I will speak this text")
# 加载 Whisper 模型
model = whisper.load_model("base")
#离线识别语音
# result = model.transcribe("zh.wav")
# print(result['text'])
def record_audio(wave_out_path,record_second):
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK)
wf = wave.open(wave_out_path, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
print("* recording")
for i in tqdm(range(0, int(RATE / CHUNK * record_second))):
data = stream.read(CHUNK)
wf.writeframes(data)
print("* done recording")
stream.stop_stream()
stream.close()
p.terminate()
wf.close()
# record_audio("output.mp3",record_second=4)
# text = model.transcribe("M500000yLI8y2RJRq3.mp3")
# print(text)
def process_audio_data(audio_chunk):
"""
将音频块数据转换为模型可以处理的格式。
"""
audio_data = np.frombuffer(audio_chunk, dtype=np.int16).astype(np.float32) / 32768.0
return audio_data
def transcribe_audio(audio_data):
"""
使用 Whisper 模型对音频数据进行转录。
"""
result = model.transcribe(audio_data, fp16=False)
return result['text']
def main():
# 配置 PyAudio
p = pyaudio.PyAudio()
stream = p.open(format=pyaudio.paInt16, channels=1, rate=44100, input=True, frames_per_buffer=1024)
print("开始实时语音识别...")
time.sleep(10)
try:
while True:
# 读取音频块
audio_chunk = stream.read(1024)
# 处理音频数据
audio_data = process_audio_data(audio_chunk)
# 转录音频数据
# text = transcribe_audio("output.mp3")
text = transcribe_audio(audio_data)
# 输出转录结果
print(text)
except KeyboardInterrupt:
print("\n停止实时语音识别.")
finally:
# 关闭音频流
stream.stop_stream()
stream.close()
p.terminate()
if __name__ == "__main__":
main()
#
# # 录下来你讲的话
# def recordAudio():
# # 用麦克风记录下你的话
# print("开始麦克风记录下你的话")
# r = sr.Recognizer()
# with sr.Microphone() as source:
# audio = r.listen(source)
# data = ""
# try:
# data = r.recognize_google(audio)
# print("You said: " + data)
# except sr.UnknownValueError:
# print("Google Speech Recognition could not understand audio")
# except sr.RequestError as e:
# print("Could not request results from Google Speech Recognition service; {0}".format(e))
# return data
#
#
# if __name__ == '__main__':
# time.sleep(2)
# while True:
# data = recordAudio()
# print(data)