AVAudioEngine录音崩溃, reason: ‘format.sampleRate == hwFormat.sampleRate

在使用AVAudioEngine进行录音时,应用程序出现崩溃,错误信息显示'format.sampleRate == hwFormat.sampleRate'。经过排查,发现是inputNode和mainMixerNode的采样率不一致导致的问题。原始代码中,inputFormat为48000 Hz,而outputFormat为44100 Hz。为了解决这个问题,将两个format的采样率都设置为48000 Hz,参考了https://gist.github.com/tad-iizuka/1ca07ca2045a8c6d11e22d15812f7e15的解决方案,修改后的代码解决了崩溃问题。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

AVAudioEngine录音频时偶发崩溃
报错信息大致如下:
2021-12-15 20:12:38.429028+0800 *[1659:708511] NSURLConnection finished with error - code -1002
"AudioRecorder 创建Audio缓存文件夹成功 /var/mobile/Containers/Data/Application/
/Library/Caches/Audio"
2021-12-15 20:13:30.762736+0800 ***[1659:708411] [avae] AVAEInternal.h:76 required condition is false: [AVAudioIONodeImpl.mm:1158:SetOutputFormat: (format.sampleRate == hwFormat.sampleRate)]
2021-12-15 20:13:30.764037+0800 ***[1659:708411] *** Terminating app due to uncaught exception ‘com.apple.coreaudio.avfaudio’, reason: ‘required condition is false: format.sampleRate == hwFormat.sampleRate’
*** First throw call stack:
(0x1989799d8 0x1accffb54 0x19888850c 0x1a88e2984 0x1a898c998 0x1a8923038 0x1a892945c 0x1a89a4f4c 0x1a899f414 0x10626bad4 0x10626b130 0x10627f8bc 0x10480ecc0 0x10480e8f4 0x10621e100 0x10621b720 0x10621dcf8 0x10480e8a0 0x10480ed20 0x19b1eaf38 0x19ab80184 0x19ab804c8 0x19ab7ee00 0x19ad65350 0x19ad634e0 0x19b2274cc 0x19b202b0c 0x19b285078 0x19b289818 0x19b280afc 0x1988f9bf0 0x1988f9af0 0x1988f8e38 0x1988f33e0 0x1988f2ba0 0x1af65b598 0x19b1e42f4 0x19b1e9874 0x1ac073b54 0x104a54194 0x104a5410c 0x104a5425c 0x1985d1568)
libc++abi.dylib: terminating with uncaught exception of type NSException
*** Terminating app due to uncaught exception ‘com.apple.coreaudio.avfaudio’, reason: ‘required condition is false: format.sampleRate == hwFormat.sampleRate’
terminating with uncaught exception of type NSException
*** Terminating app due to uncaught exception ‘com.apple.coreaudio.avfaudio’, reason: ‘required condition is false: format.sampleRate == hwFormat.sampleRate’
terminating with uncaught exception of type NSException

排查发现是inputNode和mainMixerNode的format不一致
(lldb) po inputFormat
<AVAudioFormat ***: 1 ch, 48000 Hz, Float32>

(ll

运行后出现import websockets import asyncio import json import base64 import uuid import requests import logging import struct import os import sys from typing import Dict, Optional, Union # 确保当前目录在Python路径中 sys.path.append(os.path.dirname(os.path.abspath(__file__))) try: from config import VOLCANO_TTS_CONFIG except ImportError: # 默认配置(仅用于演示) VOLCANO_TTS_CONFIG = { "appid": "YOUR_APP_ID", "token": "YOUR_API_TOKEN", "secret_key": "YOUR_SECRET_KEY", "http_voice": "zh_female_wanwanxiaohe_moon_bigtts", "websocket_voice": "BV701_streaming", "websocket_url": "wss://openspeech.bytedance.com/api/v2/tts", "http_url": "https://openspeech.bytedance.com/api/v2/tts", "log_level": "INFO" } print("警告: 未找到config.py,使用默认配置") # 配置日志 log_level = getattr(logging, VOLCANO_TTS_CONFIG.get("log_level", "INFO").upper()) logging.basicConfig( level=log_level, format="%(asctime)s [%(levelname)s] %(name)s:%(lineno)d - %(message)s", handlers=[logging.StreamHandler()] ) logger = logging.getLogger("VolcanoTTS") class VolcanoTTS: def __init__(self, uid: str = "default_uid"): """ 从config.py加载配置初始化TTS客户端 """ self.config = VOLCANO_TTS_CONFIG self.appid = self.config["appid"] self.token = self.config["token"] self.secret_key = self.config["secret_key"] self.uid = uid self.websocket_url = self.config["websocket_url"] self.http_url = self.config["http_url"] self.http_voice = self.config["http_voice"] self.websocket_voice = self.config["websocket_voice"] logger.info(f"TTS客户端初始化完成 | 应用ID: {self.appid[:3]}*** | HTTP音色: {self.http_voice} | WebSocket音色: {self.websocket_voice}") def _build_base_request(self, text: str, voice_type: str, operation: str, **kwargs) -> Dict: """构建基础请求参数""" request = { "app": { "appid": self.appid, "token": self.token, "cluster": "volcano_tts" }, "user": { "uid": self.uid }, "audio": { "voice_type": voice_type, "encoding": kwargs.get("encoding", "mp3"), "speed_ratio": kwargs.get("speed_ratio", 1.0), "rate": kwargs.get("rate", 24000), "volume": kwargs.get("volume", 1.0), "pitch_ratio": kwargs.get("pitch_ratio", 1.0), }, "request": { "reqid": str(uuid.uuid4()), "text": text, "operation": operation } } # 可选参数处理 if "emotion" in kwargs: request["audio"]["emotion"] = kwargs["emotion"] if "extra_param" in kwargs: request["request"]["extra_param"] = kwargs["extra_param"] return request async def websocket_tts(self, text: str, output_file: str, **kwargs): """ WebSocket流式语音合成(使用配置中的流式音色) """ voice_type = kwargs.pop("voice_type", self.websocket_voice) logger.info(f"开始WebSocket TTS合成 | 文本长度: {len(text)} | 音色: {voice_type}") # 构建请求数据 request_data = self._build_base_request(text, voice_type, "submit", **kwargs) headers = { "Authorization": f"Bearer; {self.token}", "X-Secret-Key": self.secret_key } # 直接发送JSON文本(火山引擎API要求) request_message = json.dumps(request_data) audio_data = b"" try: async with websockets.connect( self.websocket_url, extra_headers=headers, ping_interval=20, ping_timeout=20, close_timeout=10, max_size=10*1024*1024 ) as websocket: logger.debug(f"发送请求: {request_message[:200]}...") await websocket.send(request_message) # 接收初始响应 init_response = await websocket.recv() logger.debug(f"初始响应: {init_response}") # 验证初始响应 if isinstance(init_response, str): try: init_data = json.loads(init_response) if init_data.get("code") != 3000: logger.error(f"初始响应错误: {init_data}") return False except json.JSONDecodeError: logger.warning(f"无法解析初始响应: {init_response}") # 接收音频数据 while True: try: response = await asyncio.wait_for(websocket.recv(), timeout=30.0) if isinstance(response, bytes): # 二进制音频数据 audio_data += response logger.debug(f"收到音频数据块: {len(response)}字节") elif isinstance(response, str): # 文本控制消息 control_msg = json.loads(response) logger.debug(f"控制消息: {control_msg}") # 检查结束标志 if control_msg.get("status") == 2: logger.info("收到合成结束消息") break # 检查错误 if control_msg.get("code") != 3000: logger.error(f"服务端错误: {control_msg.get('message')}") return False except asyncio.TimeoutError: logger.error("接收响应超时") break except websockets.exceptions.ConnectionClosed as e: logger.error(f"连接异常关闭: 代码={e.code}, 原因={e.reason}") if e.code == 1006: logger.error("1006错误可能原因: 1.音色不支持流式 2.认证失败 3.协议错误") return False except Exception as e: logger.exception("处理响应时发生意外错误") return False except Exception as e: logger.exception("WebSocket连接失败") return False # 保存音频文件 if audio_data: encoding = kwargs.get("encoding", "mp3") self._save_audio(audio_data, output_file, encoding) logger.info(f"音频已保存至: {output_file} | 大小: {len(audio_data)}字节") return True else: logger.error("未收到有效音频数据") return False def http_tts(self, text: str, output_file: str, **kwargs): """ HTTP非流式语音合成(使用配置中的HTTP音色) """ voice_type = kwargs.pop("voice_type", self.http_voice) logger.info(f"开始HTTP TTS合成 | 文本长度: {len(text)} | 音色: {voice_type}") # 构建请求 request_data = self._build_base_request(text, voice_type, "query", **kwargs) headers = { "Authorization": f"Bearer; {self.token}", "X-Secret-Key": self.secret_key, "Content-Type": "application/json" } try: # 发送请求 response = requests.post( self.http_url, json=request_data, headers=headers, timeout=30.0 ) response.raise_for_status() # 解析响应 result = response.json() if result.get("code") != 3000: error_msg = result.get("message", "未知错误") logger.error(f"合成失败: {error_msg} (代码: {result.get('code')})") return False # 解码并保存音频 audio_data = base64.b64decode(result["data"]) encoding = kwargs.get("encoding", "mp3") self._save_audio(audio_data, output_file, encoding) logger.info(f"合成成功 | 时长: {result['addition']['duration']}ms | 保存至: {output_file}") return True except requests.exceptions.RequestException as e: logger.error(f"HTTP请求失败: {str(e)}") except (KeyError, ValueError) as e: logger.error(f"响应解析失败: {str(e)}") except Exception as e: logger.exception("处理HTTP响应时发生意外错误") return False def _save_audio(self, data: bytes, file_path: str, encoding: str): """ 保存音频数据到文件 """ # 确保文件扩展名匹配编码格式 ext_mapping = { "mp3": ".mp3", "wav": ".wav", "pcm": ".pcm" } # 获取扩展名 ext = ext_mapping.get(encoding.lower(), ".mp3") # 确保文件路径有正确的扩展名 if not file_path.endswith(ext): if "." in file_path: # 移除现有扩展名 base_path = os.path.splitext(file_path)[0] file_path = base_path + ext else: file_path += ext # 保存文件 with open(file_path, "wb") as f: f.write(data) logger.debug(f"音频保存成功 | 路径: {file_path} | 格式: {encoding} | 大小: {len(data)}字节") return file_path # 使用示例 if __name__ == "__main__": # 初始化TTS客户端 tts = VolcanoTTS(uid="app_user_001") # 示例1: HTTP合成 http_text = "欢迎使用火山引擎语音合成服务,这是HTTP合成示例" http_output = "http_tts_sample" tts.http_tts( text=http_text, output_file=http_output, encoding="mp3", speed_ratio=1.2 ) # 示例2: WebSocket流式合成 async def run_websocket_example(): ws_text = "这是WebSocket流式合成示例,已解决1006连接问题" ws_output = "websocket_tts_sample" success = await tts.websocket_tts( text=ws_text, output_file=ws_output, encoding="wav", emotion="happy" # 情感参数示例 ) if success: logger.info(f"WebSocket合成成功: {ws_output}") else: logger.error("WebSocket合成失败") asyncio.run(run_websocket_example())
最新发布
07-16
import websockets import asyncio import json import base64 import uuid import requests import io import pyaudio from typing import Dict, Optional class VolcanoTTS: def __init__(self, uid: str = "default_uid"): """ 初始化火山引擎语音合成客户端 :param uid: 用户标识(非空字符串即可) """ from api.config import API_CONFIGS config = API_CONFIGS['volcano_tts'] self.appid = config['appid'] self.token = config['token'] self.secret_key = config['Key'] # 新增Secret Key self.uid = uid self.websocket_url = config['websocket_url'] self.http_url = config['http_url'] def _build_base_request(self, text: str, voice_type: str, operation: str, **kwargs) -> Dict: """构建基础请求参数""" request = { "app": { "appid": self.appid, "token": self.token, "cluster": "volcano_tts" }, "user": { "uid": self.uid }, "audio": { "voice_type": voice_type, "encoding": kwargs.get("encoding", "mp3"), "speed_ratio": kwargs.get("speed_ratio", 1.0), "rate": kwargs.get("rate", 24000), "loudness_ratio": kwargs.get("loudness_ratio", 1.0) }, "request": { "reqid": str(uuid.uuid4()), # 生成唯一请求ID "text": text, "operation": operation } } # 处理情感参数 if "emotion" in kwargs: request["audio"]["emotion"] = kwargs["emotion"] request["audio"]["enable_emotion"] = True # 处理额外参数 if "extra_param" in kwargs: request["request"]["extra_param"] = kwargs["extra_param"] return request async def websocket_tts(self, text: str, voice_type: str, output_file: str,** kwargs): """ WebSocket流式合成(边合成边返回音频) :param text: 合成文本 :param voice_type: 音色类型 :param output_file: 输出音频文件路径 """ import logging import struct logger = logging.getLogger(__name__) logger.info(f"开始WebSocket TTS转换,文本长度: {len(text)},音色类型: {voice_type}") request_data = self._build_base_request(text, voice_type, "submit", **kwargs) headers = { "Authorization": f"Bearer; {self.token}", "X-Secret-Key": self.secret_key } # 构造WebSocket请求报文头 (4字节) # 协议版本(4bit)=0b0001, 报头大小(4bit)=0b0001(4字节) header_byte1 = 0b0001 << 4 | 0b0001 # 消息类型(4bit)=0b0001(full client request), 标志(4bit)=0b0000 header_byte2 = 0b0001 << 4 | 0b0000 # 序列化方法(4bit)=0b0001(JSON), 压缩方法(4bit)=0b0000(无压缩) header_byte3 = 0b0001 << 4 | 0b0000 # 保留字段(8bit)=0x00 header_byte4 = 0x00 request_header = struct.pack('>BBBB', header_byte1, header_byte2, header_byte3, header_byte4) # 序列化请求数据并添加报头 request_payload = json.dumps(request_data).encode('utf-8') request_message = request_header + request_payload async with websockets.connect(self.websocket_url, extra_headers=headers) as websocket: # 发送请求 await websocket.send(request_message) # 接收音频数据 audio_data = b"" while True: try: response = await websocket.recv() if isinstance(response, bytes): # 解析响应报文头 (4字节) if len(response) < 4: logger.error("响应报文过短,无法解析报头") continue header_byte1, header_byte2, header_byte3, header_byte4 = struct.unpack('>BBBB', response[:4]) protocol_version = (header_byte1 >> 4) & 0x0F header_size = (header_byte1 & 0x0F) * 4 message_type = (header_byte2 >> 4) & 0x0F flags = header_byte2 & 0x0F logger.debug(f"协议版本: {protocol_version}, 报头大小: {header_size}, 消息类型: {message_type}, 标志: {flags}") # 处理音频响应 (0b1011=11) if message_type == 0b1011: payload = response[header_size:] audio_data += payload logger.debug(f"接收到音频数据块,长度: {len(payload)} bytes") # 检查是否为最后一条消息 (sequence < 0) if flags & 0b0010: logger.info("接收到最后一块音频数据") break # 处理错误消息 (0b1111=15) elif message_type == 0b1111: error_payload = response[header_size:].decode('utf-8') logger.error(f"服务器错误响应: {error_payload}") break elif isinstance(response, str): logger.warning(f"接收到文本消息: {response}") if '"code":' in response: error_info = json.loads(response) logger.error(f"API错误: {error_info.get('message', '未知错误')} (错误码: {error_info.get('code')})") break except websockets.exceptions.ConnectionClosed as e: logger.error(f"WebSocket连接异常关闭: {e}") logger.error(f"连接关闭时状态码: {e.code},原因: {e.reason}") break except Exception as e: logger.error(f"处理响应时发生异常: {str(e)}") break # 播放音频 if audio_data: self._play_audio(audio_data) logger.info("音频播放完成") # 保存音频文件 if output_file: with open(output_file, "wb") as f: f.write(audio_data) logger.info(f"音频已保存至: {output_file}") else: logger.error("未接收到有效的音频数据") if 'last_message' in locals(): logger.error(f"最后接收到的消息类型: {'str' if isinstance(last_message, str) else 'bytes'}") if isinstance(last_message, str): logger.error(f"最后接收到的文本消息: {last_message}") def _play_audio(self, audio_data: bytes): """ 播放音频数据 :param audio_data: 音频二进制数据 """ p = pyaudio.PyAudio() stream = p.open(format=pyaudio.paInt16, channels=1, rate=24000, output=True) # 将音频数据转换为流 audio_stream = io.BytesIO(audio_data) stream.write(audio_stream.read()) stream.stop_stream() stream.close() p.terminate() def http_tts(self, text: str, voice_type: str, output_file: str = None,** kwargs): """ HTTP非流式合成(全部合成后返回) :param text: 合成文本 :param voice_type: 音色类型 :param output_file: 输出音频文件路径 """ request_data = self._build_base_request(text, voice_type, "query", **kwargs) headers = { "Authorization": f"Bearer; {self.token}", "Content-Type": "application/json" } try: response = requests.post(self.http_url, json=request_data, headers=headers) response.raise_for_status() # 检查HTTP错误 result = response.json() if result.get("code") == 3000: # 解码并播放音频 audio_data = base64.b64decode(result["data"]) self._play_audio(audio_data) # 保存音频文件 if output_file: with open(output_file, "wb") as f: f.write(audio_data) print(f"音频合成成功,时长:{result['addition']['duration']}ms") return True else: error_msg = result.get("message", "未知错误") error_code = result.get("code", "未知错误码") print(f"合成失败:{error_msg}(错误码:{error_code})") return False except requests.exceptions.RequestException as e: print(f"HTTP请求失败: {str(e)}") return False except (KeyError, ValueError) as e: print(f"响应解析失败: {str(e)}") return False # 使用示例 if __name__ == "__main__": # 从配置中获取默认音色类型 from api.config import API_CONFIGS VOICE_TYPE = API_CONFIGS['volcano_tts']['default_voice_type'] tts = VolcanoTTS() # 示例1:HTTP合成 tts.http_tts( text="欢迎使用火山引擎大模型语音合成API", voice_type=VOICE_TYPE, output_file="http_tts_result.mp3", encoding="mp3", speed_ratio=1.2 ) # 示例2:WebSocket流式合成(需异步运行) async def run_websocket_demo(): await tts.websocket_tts( text="这是一个WebSocket流式合成的示例", voice_type=VOICE_TYPE, output_file="websocket_tts_result.mp3", encoding="wav" ) asyncio.run(run_websocket_demo())
07-16
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值