运行后出现import websockets
import asyncio
import json
import base64
import uuid
import requests
import logging
import struct
import os
import sys
from typing import Dict, Optional, Union
# 确保当前目录在Python路径中
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
try:
from config import VOLCANO_TTS_CONFIG
except ImportError:
# 默认配置(仅用于演示)
VOLCANO_TTS_CONFIG = {
"appid": "YOUR_APP_ID",
"token": "YOUR_API_TOKEN",
"secret_key": "YOUR_SECRET_KEY",
"http_voice": "zh_female_wanwanxiaohe_moon_bigtts",
"websocket_voice": "BV701_streaming",
"websocket_url": "wss://openspeech.bytedance.com/api/v2/tts",
"http_url": "https://openspeech.bytedance.com/api/v2/tts",
"log_level": "INFO"
}
print("警告: 未找到config.py,使用默认配置")
# 配置日志
log_level = getattr(logging, VOLCANO_TTS_CONFIG.get("log_level", "INFO").upper())
logging.basicConfig(
level=log_level,
format="%(asctime)s [%(levelname)s] %(name)s:%(lineno)d - %(message)s",
handlers=[logging.StreamHandler()]
)
logger = logging.getLogger("VolcanoTTS")
class VolcanoTTS:
def __init__(self, uid: str = "default_uid"):
"""
从config.py加载配置初始化TTS客户端
"""
self.config = VOLCANO_TTS_CONFIG
self.appid = self.config["appid"]
self.token = self.config["token"]
self.secret_key = self.config["secret_key"]
self.uid = uid
self.websocket_url = self.config["websocket_url"]
self.http_url = self.config["http_url"]
self.http_voice = self.config["http_voice"]
self.websocket_voice = self.config["websocket_voice"]
logger.info(f"TTS客户端初始化完成 | 应用ID: {self.appid[:3]}*** | HTTP音色: {self.http_voice} | WebSocket音色: {self.websocket_voice}")
def _build_base_request(self, text: str, voice_type: str, operation: str, **kwargs) -> Dict:
"""构建基础请求参数"""
request = {
"app": {
"appid": self.appid,
"token": self.token,
"cluster": "volcano_tts"
},
"user": {
"uid": self.uid
},
"audio": {
"voice_type": voice_type,
"encoding": kwargs.get("encoding", "mp3"),
"speed_ratio": kwargs.get("speed_ratio", 1.0),
"rate": kwargs.get("rate", 24000),
"volume": kwargs.get("volume", 1.0),
"pitch_ratio": kwargs.get("pitch_ratio", 1.0),
},
"request": {
"reqid": str(uuid.uuid4()),
"text": text,
"operation": operation
}
}
# 可选参数处理
if "emotion" in kwargs:
request["audio"]["emotion"] = kwargs["emotion"]
if "extra_param" in kwargs:
request["request"]["extra_param"] = kwargs["extra_param"]
return request
async def websocket_tts(self, text: str, output_file: str, **kwargs):
"""
WebSocket流式语音合成(使用配置中的流式音色)
"""
voice_type = kwargs.pop("voice_type", self.websocket_voice)
logger.info(f"开始WebSocket TTS合成 | 文本长度: {len(text)} | 音色: {voice_type}")
# 构建请求数据
request_data = self._build_base_request(text, voice_type, "submit", **kwargs)
headers = {
"Authorization": f"Bearer; {self.token}",
"X-Secret-Key": self.secret_key
}
# 直接发送JSON文本(火山引擎API要求)
request_message = json.dumps(request_data)
audio_data = b""
try:
async with websockets.connect(
self.websocket_url,
extra_headers=headers,
ping_interval=20,
ping_timeout=20,
close_timeout=10,
max_size=10*1024*1024
) as websocket:
logger.debug(f"发送请求: {request_message[:200]}...")
await websocket.send(request_message)
# 接收初始响应
init_response = await websocket.recv()
logger.debug(f"初始响应: {init_response}")
# 验证初始响应
if isinstance(init_response, str):
try:
init_data = json.loads(init_response)
if init_data.get("code") != 3000:
logger.error(f"初始响应错误: {init_data}")
return False
except json.JSONDecodeError:
logger.warning(f"无法解析初始响应: {init_response}")
# 接收音频数据
while True:
try:
response = await asyncio.wait_for(websocket.recv(), timeout=30.0)
if isinstance(response, bytes):
# 二进制音频数据
audio_data += response
logger.debug(f"收到音频数据块: {len(response)}字节")
elif isinstance(response, str):
# 文本控制消息
control_msg = json.loads(response)
logger.debug(f"控制消息: {control_msg}")
# 检查结束标志
if control_msg.get("status") == 2:
logger.info("收到合成结束消息")
break
# 检查错误
if control_msg.get("code") != 3000:
logger.error(f"服务端错误: {control_msg.get('message')}")
return False
except asyncio.TimeoutError:
logger.error("接收响应超时")
break
except websockets.exceptions.ConnectionClosed as e:
logger.error(f"连接异常关闭: 代码={e.code}, 原因={e.reason}")
if e.code == 1006:
logger.error("1006错误可能原因: 1.音色不支持流式 2.认证失败 3.协议错误")
return False
except Exception as e:
logger.exception("处理响应时发生意外错误")
return False
except Exception as e:
logger.exception("WebSocket连接失败")
return False
# 保存音频文件
if audio_data:
encoding = kwargs.get("encoding", "mp3")
self._save_audio(audio_data, output_file, encoding)
logger.info(f"音频已保存至: {output_file} | 大小: {len(audio_data)}字节")
return True
else:
logger.error("未收到有效音频数据")
return False
def http_tts(self, text: str, output_file: str, **kwargs):
"""
HTTP非流式语音合成(使用配置中的HTTP音色)
"""
voice_type = kwargs.pop("voice_type", self.http_voice)
logger.info(f"开始HTTP TTS合成 | 文本长度: {len(text)} | 音色: {voice_type}")
# 构建请求
request_data = self._build_base_request(text, voice_type, "query", **kwargs)
headers = {
"Authorization": f"Bearer; {self.token}",
"X-Secret-Key": self.secret_key,
"Content-Type": "application/json"
}
try:
# 发送请求
response = requests.post(
self.http_url,
json=request_data,
headers=headers,
timeout=30.0
)
response.raise_for_status()
# 解析响应
result = response.json()
if result.get("code") != 3000:
error_msg = result.get("message", "未知错误")
logger.error(f"合成失败: {error_msg} (代码: {result.get('code')})")
return False
# 解码并保存音频
audio_data = base64.b64decode(result["data"])
encoding = kwargs.get("encoding", "mp3")
self._save_audio(audio_data, output_file, encoding)
logger.info(f"合成成功 | 时长: {result['addition']['duration']}ms | 保存至: {output_file}")
return True
except requests.exceptions.RequestException as e:
logger.error(f"HTTP请求失败: {str(e)}")
except (KeyError, ValueError) as e:
logger.error(f"响应解析失败: {str(e)}")
except Exception as e:
logger.exception("处理HTTP响应时发生意外错误")
return False
def _save_audio(self, data: bytes, file_path: str, encoding: str):
"""
保存音频数据到文件
"""
# 确保文件扩展名匹配编码格式
ext_mapping = {
"mp3": ".mp3",
"wav": ".wav",
"pcm": ".pcm"
}
# 获取扩展名
ext = ext_mapping.get(encoding.lower(), ".mp3")
# 确保文件路径有正确的扩展名
if not file_path.endswith(ext):
if "." in file_path:
# 移除现有扩展名
base_path = os.path.splitext(file_path)[0]
file_path = base_path + ext
else:
file_path += ext
# 保存文件
with open(file_path, "wb") as f:
f.write(data)
logger.debug(f"音频保存成功 | 路径: {file_path} | 格式: {encoding} | 大小: {len(data)}字节")
return file_path
# 使用示例
if __name__ == "__main__":
# 初始化TTS客户端
tts = VolcanoTTS(uid="app_user_001")
# 示例1: HTTP合成
http_text = "欢迎使用火山引擎语音合成服务,这是HTTP合成示例"
http_output = "http_tts_sample"
tts.http_tts(
text=http_text,
output_file=http_output,
encoding="mp3",
speed_ratio=1.2
)
# 示例2: WebSocket流式合成
async def run_websocket_example():
ws_text = "这是WebSocket流式合成示例,已解决1006连接问题"
ws_output = "websocket_tts_sample"
success = await tts.websocket_tts(
text=ws_text,
output_file=ws_output,
encoding="wav",
emotion="happy" # 情感参数示例
)
if success:
logger.info(f"WebSocket合成成功: {ws_output}")
else:
logger.error("WebSocket合成失败")
asyncio.run(run_websocket_example())
最新发布