部署大模型方法大同小异
创建文件
requirements.txt
langchain-core
langchain-community
langchain-ollama
python-dotenv
.env
OLLAMA_MODEL=llama3
main.py
from langchain_community.llms import Ollama
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
import requests # 用于测试连接
def check_ollama_connection():
"""检查Ollama服务是否运行"""
try:
response = requests.get("http://localhost:11434")
if response.status_code == 200:
print("Ollama服务已启动")
return True
except requests.ConnectionError:
print("无法连接到Ollama服务")
print("请确保Ollama正在运行: `ollama serve`")
return False
def check_model_available(model_name):
"""检查模型是否可用"""
try:
response = requests.post(
"http://localhost:11434/api/show",
json={"name": model_name}
)
if response.status_code == 200:
print(f"模型 '{model_name}' 已加载")
return True
except Exception:
pass
print(f"模型 '{model_name}' 未找到")
print(f"请下载模型: `ollama pull {model_name}`")
return False
# 配置模型 - 可以选择较小的模型如 'phi3' 如果llama3太大
MODEL_NAME = "llama3"
# 检查服务状态
if not check_ollama_connection() or not check_model_available(MODEL_NAME):
exit(1)
# 初始化Ollama
llm = Ollama(
model=MODEL_NAME,
base_url="http://localhost:11434",
temperature=0.7,
system="你是一个耐心的编程导师,用简单的中文解释概念"
)
# 创建对话链
prompt = ChatPromptTemplate.from_template(
"用简单的中文回答这个编程问题:{input}"
)
chain = prompt | llm | StrOutputParser()
# 运行对话
print("\n示例问答:")
response = chain.invoke({"input": "编程学习有哪些要求"})
print("\nAI回答:", response)
# 流式输出演示
print("\n流式输出演示(解释神经网络):")
for chunk in chain.stream({"input": "用简单的中文解释神经网络"}):
print(chunk, end="", flush=True)
终端运行
python main.py