Skip to content

Commit 08f2327

Browse files
authored
Create vllm-performance-stream-qwen1.5-long.py
1 parent b676a90 commit 08f2327

File tree

1 file changed

+191
-0
lines changed

1 file changed

+191
-0
lines changed
Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
import requests
2+
import json
3+
import time
4+
import numpy as np
5+
6+
#a800
7+
#url = "http://10.193.195.xxx:9009/v1/chat/completions"
8+
9+
#4090
10+
#url = "http://10.112.2.xxx:9009/v1/chat/completions"
11+
12+
#h800
13+
url = "http://10.112.64.xxx:9009/v1/chat/completions"
14+
15+
16+
# input_path = "/home/aicc/alpaca_data_1k.json"
17+
input_path = "./alpaca_gpt4_data_input_1k.json"
18+
list_str = json.load(open(input_path, "r"))
19+
20+
first_token_time_list = []
21+
avg_token_time_list = []
22+
23+
intertoken_time_list = []
24+
total_time_list = []
25+
gen_token_len_list = []
26+
27+
28+
prompt_tokens_list = []
29+
completion_tokens_list = []
30+
total_tokens_list = []
31+
32+
33+
34+
count = 0
35+
36+
for line in list_str:
37+
# instruction = line.get("instruction")
38+
# inputs = line.get("input")
39+
instruction = line
40+
inputs = line
41+
42+
43+
count += 1
44+
if count > 1000:
45+
break
46+
47+
print("--------------------", str(count))
48+
49+
if len(inputs) == 0:
50+
continue
51+
52+
#content = f"<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n"
53+
content = f"{instruction}"
54+
55+
payload = json.dumps({
56+
"model": "qwen1.5",
57+
"messages": [
58+
{
59+
"role": "user",
60+
"content": "保持健康的秘诀"
61+
},
62+
{
63+
"role": "assistant",
64+
"content": "保持健康是一个综合性的概念,涉及到饮食、运动、休息、心理等多个方面。以下是一些保持健康的基本秘诀:\n\n1. 均衡饮食:保证营养均衡,吃足够的蔬菜、水果、全谷类、优质蛋白质(如鱼、鸡、豆腐等)和健康脂肪(如坚果、鱼油)。避免过多摄入糖分、盐分和饱和脂肪。\n\n2. 适量运动:每周至少150分钟的中等强度有氧运动,如快步走、游泳、骑自行车等,或者75分钟的高强度运动,以及每周两次以上的肌肉力量训练。\n\n3. 充足睡眠:成年人每晚应保证7-9小时的睡眠,良好的睡眠有助于身体修复和恢复。\n\n4. 戒烟限酒:避免吸烟,限制酒精摄入,过量饮酒对健康有害。\n\n5. 定期体检:定期进行身体检查,早期发现并处理可能的健康问题。\n\n6. 保持良好的心理状态:保持积极乐观的心态,学会应对压力,进行适当的心理调适,如冥想、瑜伽等。\n\n7. 保持良好的生活习惯:避免熬夜,保持良好的个人卫生,定期洗手,避免接触传染病。\n\n"
65+
},
66+
{
67+
"role": "user",
68+
"content": "如何保持良好的心态"
69+
},{"role": "assistant",
70+
"content": "保持良好的心态是身心健康的重要组成部分,以下是一些方法:\n\n1. **正念冥想**:这是一种训练注意力和觉察力的技巧,可以帮助你专注于当下,减少对过去或未来的忧虑,提升内心的平静和专注。\n\n2. **积极思考**:尝试用积极的角度看待问题,看待困难和挑战时,看到它们作为学习和成长的机会,而不是障碍。\n\n3. **健康的生活方式**:均衡饮食、规律作息、适度运动,这些都有助于保持身体和精神的活力,减少压力。\n\n4. **社交互动**:和朋友、家人保持良好的沟通,社交活动可以提供情感支持,减轻压力。\n\n5. **放松技巧**:如深呼吸、瑜伽、按摩、温泉浴等,可以帮助放松身心,缓解紧张和压力。\n\n6. **设定目标**:有目标的生活可以带来方向感和动力,但要确保目标是实际可行的,避免过度压力。\n\n7. **心理咨询**:如果你发现自己难以应对情绪或压力,不要犹豫寻求专业的心理咨询师的帮助。\n\n8. **自我接纳**:接受自己的优点和缺点,对自己宽容,减少自我批评。\n\n9. **时间管理**:合理安排时间,避免过度工作或学习,给自己留"
71+
},
72+
{
73+
"role": "user",
74+
"content": content
75+
}
76+
],
77+
"max_tokens": 256,
78+
"top_p": 0.85,
79+
#"n": 10,
80+
"stream": True
81+
})
82+
83+
#print(payload)
84+
headers = {
85+
'Content-Type': 'application/json'
86+
}
87+
88+
start_time = time.perf_counter()
89+
start = start_time
90+
91+
response = requests.request("POST", url, headers=headers, data=payload, stream=True)
92+
response.raise_for_status()
93+
94+
i = 0
95+
gen_time_list = []
96+
for chunk in response.iter_content(chunk_size=8192):
97+
end_time = time.perf_counter()
98+
result = chunk.decode('utf-8')
99+
print(result)
100+
if "assistant" in result and "role" in result :
101+
continue
102+
103+
gen_time = end_time - start_time
104+
start_time = end_time
105+
106+
i+=1
107+
if i==1:
108+
first_token_time_list.append(gen_time)
109+
print("首Token时延:", round(gen_time, 4))
110+
else:
111+
gen_time_list.append(gen_time)
112+
113+
if "usage" in result and "prompt_tokens" in result :
114+
result_new = result.lstrip("data: ")
115+
print("------------------------", result_new)
116+
json_data= eval(result_new)
117+
118+
prompt_tokens_list.append(int(json_data["usage"]["prompt_tokens"]))
119+
completion_tokens_list.append(int(json_data["usage"]["completion_tokens"]))
120+
total_tokens_list.append(int(json_data["usage"]["total_tokens"]))
121+
#start_time = end_time
122+
123+
avg_token_time = sum(gen_time_list) / len(gen_time_list)
124+
intertoken_time_list.extend(gen_time_list)
125+
gen_token_len_list.append(len(gen_time_list))
126+
print("Token间时延:", round(avg_token_time, 4))
127+
avg_token_time_list.append(avg_token_time)
128+
129+
total_time = end_time - start
130+
print("端到端时延:", round(total_time, 4))
131+
total_time_list.append(total_time)
132+
133+
134+
print("Token输入输出长度---------------------")
135+
136+
print("平均输入token长度:", round(sum(prompt_tokens_list) / len(prompt_tokens_list), 5))
137+
138+
arr_np = np.array(prompt_tokens_list)
139+
print("输入token长度-均值:", round(np.mean(arr_np),5))
140+
print("输入token长度-方差:", round(np.var(arr_np),5))
141+
142+
143+
print("平均输出token长度:", round(sum(completion_tokens_list) / len(completion_tokens_list), 5))
144+
print("平均总token长度:", round(sum(total_tokens_list) / len(total_tokens_list), 5))
145+
146+
147+
print("首Token时延---------------------")
148+
print("最小值:", round(min(first_token_time_list), 5))
149+
print("最大值:", round(max(first_token_time_list), 5))
150+
print("TP50:", np.percentile(np.array(first_token_time_list), 50))
151+
print("TP90:", np.percentile(np.array(first_token_time_list), 90))
152+
print("TP99:", np.percentile(np.array(first_token_time_list), 99))
153+
print("平均:", round(sum(first_token_time_list) / len(first_token_time_list), 5))
154+
155+
156+
print("平均Token间时延-宏平均---------------------")
157+
print("最小值:", round(min(avg_token_time_list), 4))
158+
print("最大值:", round(max(avg_token_time_list), 4))
159+
print("TP50:", np.percentile(np.array(avg_token_time_list), 50))
160+
print("TP90:", np.percentile(np.array(avg_token_time_list), 90))
161+
print("TP99:", np.percentile(np.array(avg_token_time_list), 99))
162+
print("平均:", round(sum(avg_token_time_list) / len(avg_token_time_list), 4))
163+
164+
165+
print("生成token长度---------------------")
166+
167+
print("最小值:", round(min(gen_token_len_list), 4))
168+
print("最大值:", round(max(gen_token_len_list), 4))
169+
print("TP50:", np.percentile(np.array(gen_token_len_list), 50))
170+
print("TP90:", np.percentile(np.array(gen_token_len_list), 90))
171+
print("TP99:", np.percentile(np.array(gen_token_len_list), 99))
172+
print("平均:", round(sum(gen_token_len_list) / len(gen_token_len_list), 4))
173+
174+
175+
176+
print("Token间时延-微平均---------------------")
177+
print("最小值:", round(min(intertoken_time_list), 4))
178+
print("最大值:", round(max(intertoken_time_list), 4))
179+
print("TP50:", np.percentile(np.array(intertoken_time_list), 50))
180+
print("TP90:", np.percentile(np.array(intertoken_time_list), 90))
181+
print("TP99:", np.percentile(np.array(intertoken_time_list), 99))
182+
print("平均:", round(sum(intertoken_time_list) / len(intertoken_time_list), 4))
183+
184+
print("端到端时延---------------------")
185+
print("最小值:", round(min(total_time_list), 4))
186+
print("最大值:", round(max(total_time_list), 4))
187+
print("TP50:", np.percentile(np.array(total_time_list), 50))
188+
print("TP90:", np.percentile(np.array(total_time_list), 90))
189+
print("TP99:", np.percentile(np.array(total_time_list), 99))
190+
print("平均:", round(sum(total_time_list) / len(total_time_list), 4))
191+

0 commit comments

Comments
 (0)