0. SenseVoice安装
0.1 安装依赖包
# 安装基础依赖
pip install torch torchaudio
pip install numpy scipy
# 安装ModelScope和FunASR
pip install modelscope
pip install funasr
# 如果安装速度慢,可以使用国内镜像源
pip install -U funasr modelscope -i https://pypi.tuna.tsinghua.edu.cn/simple
依赖说明
- torch和torchaudio:PyTorch深度学习框架
- numpy和scipy:科学计算库
- modelscope:ModelScope平台SDK
- funasr:语音识别框架
0.2 通过ModelScope SDK安装
# 下载模型
from modelscope import snapshot_download
model_dir = snapshot_download('iic/SenseVoiceSmall')
0.3 通过Git安装
# 克隆仓库
git clone https://www.modelscope.cn/iic/SenseVoiceSmall.git
0.4 初始化测试
创建测试脚本 test_init.py
:
from funasr import AutoModel
def test_model():
try:
print("正在初始化模型...")
model = AutoModel(
model="iic/SenseVoiceSmall",
trust_remote_code=True,
device="cpu"
)
print("模型初始化成功!")
return True
except Exception as e:
print(f"模型初始化失败: {str(e)}")
return False
if __name__ == "__main__":
test_model()
运行测试:
python test_init.py
安装说明
- 推荐使用ModelScope SDK安装,可以自动处理依赖关系
- 安装完成后,模型文件会保存在用户目录下的.cache文件夹中
- 如果使用Git安装,需要手动安装所有依赖
- 初始化测试可以验证安装是否成功
- 如果使用GPU,需要安装CUDA版本的PyTorch
安装注意事项
- 确保网络连接正常,因为需要下载模型文件
- 如果使用代理,请确保代理设置正确
- 安装过程中如果遇到依赖冲突,可以尝试创建新的虚拟环境
- 如果初始化测试失败,请检查错误信息并确保所有依赖都已正确安装
- 建议使用Python 3.7或更高版本
- 如果使用GPU,请确保CUDA版本与PyTorch版本匹配
1. Python环境部署
1.1 安装依赖
pip install -U funasr modelscope
1.2 创建服务类
创建 sensevoice_service.py
:
from funasr import AutoModel
from funasr.utils.postprocess_utils import rich_transcription_postprocess
import os
import json
class SenseVoiceService:
_instance = None
def __new__(cls):
if cls._instance is None:
cls._instance = super(SenseVoiceService, cls).__new__(cls)
cls._instance._initialized = False
return cls._instance
def __init__(self, config_path=None):
if not self._initialized:
try:
print("正在初始化模型...")
# 加载配置
config = self._load_config(config_path)
self.model = AutoModel(
model=config.get("model_path", "C:/Users/19341/.cache/modelscope/hub/models/iic/SenseVoiceSmall"),
trust_remote_code=config.get("trust_remote_code", True),
remote_code=config.get("remote_code", "./model.py"),
vad_model=config.get("vad_model", "fsmn-vad"),
vad_kwargs=config.get("vad_kwargs", {"max_single_segment_time": 30000}),
device=config.get("device", "cpu"),
)
print("模型初始化完成!")
self._initialized = True
except Exception as e:
print(f"模型初始化失败: {str(e)}")
raise
def _load_config(self, config_path):
"""加载配置文件"""
default_config = {
"model_path": "C:/Users/19341/.cache/modelscope/hub/models/iic/SenseVoiceSmall",
"trust_remote_code": True,
"remote_code": "./model.py",
"vad_model": "fsmn-vad",
"vad_kwargs": {"max_single_segment_time": 30000},
"device": "cpu"
}
if config_path and os.path.exists(config_path):
try:
with open(config_path, 'r', encoding='utf-8') as f:
config = json.load(f)
return {**default_config, **config}
except Exception as e:
print(f"加载配置文件失败: {str(e)}")
return default_config
return default_config
def transcribe(self, audio_path, language="auto"):
"""
转录音频文件
:param audio_path: 音频文件路径
:param language: 语言设置,可选:"auto", "zn", "en", "yue", "ja", "ko", "nospeech"
:return: 转录结果
"""
try:
if not os.path.exists(audio_path):
raise FileNotFoundError(f"音频文件 {audio_path} 不存在")
res = self.model.generate(
input=audio_path,
cache={},
language=language,
use_itn=True,
batch_size_s=60,
merge_vad=True,
merge_length_s=15,
)
text = rich_transcription_postprocess(res[0]["text"])
return {
"success": True,
"text": text,
"error": None
}
except Exception as e:
return {
"success": False,
"text": None,
"error": str(e)
}
2. Spring Boot集成
2.1 添加依赖
<dependency>
<groupId>org.python</groupId>
<artifactId>jython-standalone</artifactId>
<version>2.7.2</version>
</dependency>
2.2 创建Java服务类
package com.your.package.service;
import org.python.core.PyObject;
import org.python.util.PythonInterpreter;
import org.springframework.stereotype.Service;
import javax.annotation.PostConstruct;
import java.io.File;
import java.util.HashMap;
import java.util.Map;
@Service
public class SenseVoiceService {
private PythonInterpreter interpreter;
private PyObject senseVoiceInstance;
@PostConstruct
public void init() {
try {
// 初始化Python解释器
interpreter = new PythonInterpreter();
// 添加Python路径
String pythonPath = System.getProperty("user.dir") + File.separator + "python";
interpreter.exec("import sys");
interpreter.exec("sys.path.append('" + pythonPath + "')");
// 导入Python模块
interpreter.exec("from sensevoice_service import SenseVoiceService");
// 创建Python服务实例
interpreter.exec("service = SenseVoiceService()");
senseVoiceInstance = interpreter.get("service");
} catch (Exception e) {
throw new RuntimeException("初始化SenseVoice服务失败", e);
}
}
public Map<String, Object> transcribe(String audioPath, String language) {
try {
// 调用Python方法
PyObject result = senseVoiceInstance.invoke("transcribe",
new PyObject[]{new PyString(audioPath), new PyString(language)});
// 转换结果为Java对象
return (Map<String, Object>) result.__tojava__(Map.class);
} catch (Exception e) {
Map<String, Object> errorResult = new HashMap<>();
errorResult.put("success", false);
errorResult.put("error", e.getMessage());
return errorResult;
}
}
}
2.3 创建Controller
package com.your.package.controller;
import com.your.package.service.SenseVoiceService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.*;
import org.springframework.web.multipart.MultipartFile;
import java.util.Map;
@RestController
@RequestMapping("/api/sensevoice")
public class SenseVoiceController {
@Autowired
private SenseVoiceService senseVoiceService;
@PostMapping("/transcribe")
public Map<String, Object> transcribe(
@RequestParam("file") MultipartFile file,
@RequestParam(value = "language", defaultValue = "auto") String language) {
// 保存上传的文件
String audioPath = saveUploadedFile(file);
// 调用转录服务
return senseVoiceService.transcribe(audioPath, language);
}
private String saveUploadedFile(MultipartFile file) {
// 实现文件保存逻辑
// 返回保存后的文件路径
}
}
2.4 配置文件结构
your-project/
├── src/
│ └── main/
│ ├── java/
│ │ └── com/your/package/
│ │ ├── controller/
│ │ ├── service/
│ │ └── Application.java
│ └── resources/
│ └── python/
│ ├── sensevoice_service.py
│ └── config.json
2.5 配置文件示例
{
"model_path": "path/to/your/model",
"device": "cpu",
"vad_model": "fsmn-vad",
"vad_kwargs": {
"max_single_segment_time": 30000
}
}
3. 使用说明
3.1 启动服务
启动Spring Boot应用后,可以通过以下方式调用服务:
curl -X POST -F "file=@audio.wav" -F "language=auto" http://localhost:8080/api/sensevoice/transcribe
3.2 返回结果格式
{
"success": true,
"text": "转录的文本内容",
"error": null
}
注意事项
- 确保Python环境已正确安装所有依赖
- 配置文件中的路径需要根据实际环境调整
- 文件上传路径需要有写入权限
- 建议添加适当的错误处理和日志记录
- 可以考虑添加缓存机制,避免重复处理相同的音频文件
常见问题
- 如果遇到Python模块导入错误,检查Python路径配置
- 如果遇到内存不足,考虑调整JVM参数
- 如果遇到文件权限问题,检查文件保存路径的权限设置