目录
1.部署环境
(1)操作系统:Ubuntu server 22.04.2 LTS
(2)已安装docker
(3)部署版本:funasr-runtime-sdk-online-cpu-0.1.12
(4)官方github地址:https://github.com/modelscope/FunASR
(5)云服务器:2核4G
2.下载项目文件的ZIP压缩包
(1)123云盘(无需登录即可下载):
链接: https://www.123684.com/s/dUCojv-ybYO3
(2)里面包含了5个文件:原项目的压缩包,修改过的html文件,3个录音处理需要的js文件,是这个项目的:https://github.com/xiangyuecn/Recorder
(3)如图所示:
3.输入以下命令拉取docker镜像
sudo docker pull registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-online-cpu-0.1.12
4.创建本地目录
(1)这是要创建的本地目录路径,这个目录将用于挂载到容器中,存储模型文件。如图:
mkdir -p ./funasr-runtime-resources/models
5.启动镜像
(1)镜像拉取成功后启动镜像,官方是前台启动的,我这里的是后台启动的,这样不用担心关闭窗口服务就断了,命令如下:
sudo docker run -d -p 10096:10095 -it --privileged=true -v $PWD/funasr-runtime-resources/models:/workspace/models registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-online-cpu-0.1.12
(2)运行后可以输入以下命令查看,运行是否成功,成功如下图:
docker ps
6.进入容器并启动服务
(1)上面已经查出容器的CONTAINER ID ,输入下面命令进入,记得替换成你的ID,如图
docker exec -it fbb28c23b131 /bin/bash
(2)进到启动服务的脚本目录
cd FunASR/runtime
(3)启动服务,我这里在官方原有的命令上加了“–certfile 0”意思是关闭ssl,命令如下:
nohup bash run_server_2pass.sh --download-model-dir /workspace/models --certfile 0 --vad-dir damo/speech_fsmn_vad_zh-cn-16k-common-onnx --model-dir damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-onnx --online-model-dir damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online-onnx --punc-dir damo/punc_ct-transformer_zh-cn-common-vad_realtime-vocab272727-onnx --lm-dir damo/speech_ngram_lm_zh-cn-ai-wesp-fst --itn-dir thuduj12/fst_itn_zh --hotword /workspace/models/hotwords.txt > log.txt 2>&1 &
7.打开服务器防火墙对应的端口
端口:10096(TCP)端口要跟上面的对应
8.测试
(1)使用浏览器打开上面的下载的html文件,记得换成你服务的IP地址
(2)点击连接,显示如下即为成功,如果显示连接不上就等等,有时候下载模型会比较慢
(3)点击“开始”,你就可以开始讲话了,如下图就证明你成功啦
9.最后附上html文件的源码
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width,initial-scale=1" />
<title>语音识别</title>
<style>
.container {
width: 90%;
max-width: 800px;
margin: 0 auto;
padding: 20px;
}
.panel {
border: 1px solid #ccc;
padding: 15px;
margin: 10px 0;
border-radius: 5px;
}
.input-full,
.result-area {
width: 100%;
padding: 8px;
margin: 5px 0;
}
.result-area {
min-height: 150px;
}
.button-group button {
padding: 8px 20px;
margin-right: 10px;
cursor: pointer;
}
</style>
</head>
<body>
<!-- 核心录音组件 -->
<script src="recorder-core.js" charset="UTF-8"></script>
<script src="wav.js" charset="UTF-8"></script>
<script src="pcm.js" charset="UTF-8"></script>
<div class="container">
<h1>语音识别系统</h1>
<!-- 服务器配置面板 -->
<div class="panel">
<h3>服务器配置</h3>
<input id="wssip" type="text" class="input-full" value="ws://127.0.0.1:10096/" />
</div>
<!-- 识别结果面板 -->
<div class="panel">
<h3>识别结果</h3>
<textarea id="varArea" class="result-area" readonly="true"></textarea>
<div id="info_div">请点击开始按钮开始识别</div>
<!-- 控制按钮组 -->
<div class="button-group">
<button id="btnConnect">连接</button>
<button id="btnStart">开始</button>
<button id="btnStop">停止</button>
</div>
<!-- 音频播放器 -->
<audio id="audio_record" controls style="width: 100%"></audio>
</div>
</div>
<script>
// 全局变量优化与初始化
const elements = {
varArea: document.getElementById('varArea'),
info_div: document.getElementById('info_div'),
btnStart: document.getElementById('btnStart'),
btnStop: document.getElementById('btnStop'),
btnConnect: document.getElementById('btnConnect'),
wssip: document.getElementById('wssip'),
varHot: document.getElementById('varHot'),
audio_record: document.getElementById('audio_record')
};
let sampleBuf = new Int16Array();
let rec_text = "";
let offline_text = "";
// 初始化按钮状态
elements.btnStart.disabled = true;
elements.btnStop.disabled = true;
// 录音对象初始化,pcm格式
const rec = Recorder({
type: "pcm",
bitRate: 16,
sampleRate: 16000,
onProcess: recProcess
});
// 获取ASR模式 - 默认在线识别
function getAsrMode() {
return "2pass"; // 固定返回在线识别模式
}
// 获取ITN设置 - 默认关闭
function getUseITN() {
return false; // 固定返回false
}
// 获取热词 - 默认为空
function getHotwords() {
return null; // 不使用热词
}
// 连接WebSocket
const wsconnecter = new WebSocketConnectMethod({
msgHandle: getJsonMessage,
stateHandle: getConnState
});
// 绑定事件处理
elements.btnStart.onclick = record;
elements.btnStop.onclick = stop;
elements.btnConnect.onclick = start;
elements.wssip.onchange = () => console.log("地址变更:", elements.wssip.value);
// 核心功能函数
function record() {
rec.open(() => {
rec.start();
console.log("开始录音");
elements.btnStart.disabled = true;
elements.btnStop.disabled = false;
elements.btnConnect.disabled = true;
});
}
function start() {
clear();
const ret = wsconnecter.wsStart();
if (ret === 1) {
elements.info_div.innerHTML = "正在连接asr服务器,请等待...";
elements.btnStart.disabled = true;
elements.btnStop.disabled = true;
elements.btnConnect.disabled = true;
return 1;
} else {
elements.info_div.innerHTML = "请点击开始";
elements.btnStart.disabled = true;
elements.btnStop.disabled = true;
elements.btnConnect.disabled = false;
return 0;
}
}
function stop() {
const request = {
"chunk_size": [5, 10, 5],
"wav_name": "h5",
"is_speaking": false,
"chunk_interval": 10,
"mode": getAsrMode(),
};
// 发送剩余数据
if (sampleBuf.length > 0) {
wsconnecter.wsSend(sampleBuf);
sampleBuf = new Int16Array();
}
wsconnecter.wsSend(JSON.stringify(request));
// 更新界面状态
elements.info_div.innerHTML = "发送完数据,请等候,正在识别...";
elements.btnStop.disabled = true;
elements.btnStart.disabled = true;
elements.btnConnect.disabled = true;
// 延迟关闭连接
setTimeout(() => {
wsconnecter.wsStop();
elements.btnConnect.disabled = false;
elements.info_div.innerHTML = "请点击连接";
}, 3000);
// 处理录音结果
rec.stop(
(blob) => {
Recorder.pcm2wav({
sampleRate: 16000,
bitRate: 16,
blob: blob
}, (theblob) => {
elements.audio_record.src = (window.URL || webkitURL).createObjectURL(theblob);
elements.audio_record.controls = true;
});
},
(errMsg) => console.log("录音错误: " + errMsg)
);
}
function clear() {
elements.varArea.value = "";
rec_text = "";
offline_text = "";
}
// 录音处理函数
function recProcess(buffer, powerLevel, bufferDuration, bufferSampleRate, newBufferIdx) {
const data_48k = buffer[buffer.length - 1];
const data_16k = Recorder.SampleData([data_48k], bufferSampleRate, 16000).data;
// 将新数据添加到缓冲区
sampleBuf = Int16Array.from([...sampleBuf, ...data_16k]);
const chunk_size = 960;
elements.info_div.innerHTML = "" + (bufferDuration / 1000).toFixed(1) + "s";
// 分块发送数据
while (sampleBuf.length >= chunk_size) {
const sendBuf = sampleBuf.slice(0, chunk_size);
sampleBuf = sampleBuf.slice(chunk_size, sampleBuf.length);
wsconnecter.wsSend(sendBuf);
}
}
// 消息处理; 处理服务器返回的识别结果
function getJsonMessage(jsonMsg) {
console.log("message: " + JSON.parse(jsonMsg.data)['text']);
var rectxt = "" + JSON.parse(jsonMsg.data)['text'];
var asrmodel = JSON.parse(jsonMsg.data)['mode'];
var is_final = JSON.parse(jsonMsg.data)['is_final'];
var timestamp = JSON.parse(jsonMsg.data)['timestamp'];
if (asrmodel == "2pass-offline" || asrmodel == "offline") {
offline_text = offline_text + handleWithTimestamp(rectxt, timestamp);
rec_text = offline_text;
} else {
rec_text = rec_text + rectxt;
}
var varArea = document.getElementById('varArea');
varArea.value = rec_text;
}
// 时间戳处理; 为识别结果添加时间戳
function handleWithTimestamp(tmptext, tmptime) {
if (tmptime == null || tmptime == "undefined" || tmptext.length <= 0) {
return tmptext;
}
tmptext = tmptext.replace(/。|?|,|、|\?|\.|\ /g, ",");
var words = tmptext.split(",");
var jsontime = JSON.parse(tmptime);
var char_index = 0;
var text_withtime = "";
for (var i = 0; i < words.length; i++) {
if (words[i] == "undefined" || words[i].length <= 0) {
continue;
}
if (/^[a-zA-Z]+$/.test(words[i])) {
text_withtime = text_withtime + jsontime[char_index][0] / 1000 + ":" + words[i] + "\n";
char_index = char_index + 1;
} else {
text_withtime = text_withtime + jsontime[char_index][0] / 1000 + ":" + words[i] + "\n";
char_index = char_index + words[i].length;
}
}
return text_withtime;
}
// 连接状态处理; 处理WebSocket连接的各种状态
function getConnState(connState) {
if (connState === 0) {
elements.info_div.innerHTML = '连接成功!请点击开始';
elements.btnStart.disabled = false;
elements.btnStop.disabled = true;
elements.btnConnect.disabled = true;
} else if (connState === 1) {
//stop();
} else if (connState === 2) {
stop();
console.log('connection error');
alert("连接地址" + document.getElementById('wssip').value + "失败,请检查asr地址和端口。");
elements.btnStart.disabled = true;
elements.btnStop.disabled = true;
elements.btnConnect.disabled = false;
elements.info_div.innerHTML = '请点击连接';
}
}
// WebSocket连接类; 管理与服务器的WebSocket连接
function WebSocketConnectMethod(config) {
var speechSokt;
var msgHandle = config.msgHandle;
var stateHandle = config.stateHandle;
this.wsStart = function () {
var Uri = document.getElementById('wssip').value;
if (Uri.match(/wss:\S*|ws:\S*/)) {
console.log("Uri" + Uri);
} else {
alert("请检查wss地址正确性");
return 0;
}
if ('WebSocket' in window) {
speechSokt = new WebSocket(Uri);
speechSokt.onopen = function (e) {
onOpen(e);
};
speechSokt.onclose = function (e) {
console.log("onclose ws!");
onClose(e);
};
speechSokt.onmessage = function (e) {
onMessage(e);
};
speechSokt.onerror = function (e) {
onError(e);
};
return 1;
} else {
alert('当前浏览器不支持 WebSocket');
return 0;
}
};
this.wsStop = function () {
if (speechSokt != undefined) {
console.log("stop ws!");
speechSokt.close();
}
};
this.wsSend = function (oneData) {
if (speechSokt == undefined) return;
if (speechSokt.readyState === 1) {
speechSokt.send(oneData);
}
};
// 连接打开处理; 处理WebSocket连接成功事件
function onOpen(e) {
var chunk_size = new Array(5, 10, 5);
var request = {
"chunk_size": chunk_size,
"wav_name": "h5",
"is_speaking": true,
"chunk_interval": 10,
"itn": getUseITN(),
"mode": getAsrMode(),
};
var hotwords = getHotwords();
if (hotwords != null) {
request.hotwords = hotwords;
}
console.log(JSON.stringify(request));
speechSokt.send(JSON.stringify(request));
console.log("连接成功");
stateHandle(0);
}
// 连接关闭处理; 处理WebSocket连接关闭事件
function onClose(e) {
stateHandle(1);
}
// 消息接收处理; 处理WebSocket接收到的消息
function onMessage(e) {
msgHandle(e);
}
// 错误处理; 处理WebSocket连接错误
function onError(e) {
elements.info_div.innerHTML = "连接" + e;
console.log(e);
stateHandle(2);
}
}
</script>
</body>
</html>