mirror of
https://github.com/QwenLM/Qwen.git
synced 2026-05-20 16:35:47 +08:00
add 72B and 1.8B Qwen models, add Ascend 910 and Hygon DCU support, add docker support
This commit is contained in:
39
dcu-support/cli_demo_batch.py
Normal file
39
dcu-support/cli_demo_batch.py
Normal file
@@ -0,0 +1,39 @@
|
||||
import argparse
|
||||
from fastllm_pytools import llm
|
||||
import time
|
||||
|
||||
def args_parser():
|
||||
parser = argparse.ArgumentParser(description = 'fastllm_chat_demo')
|
||||
parser.add_argument('-p', '--path', type = str, required = True, default = '', help = '模型文件的路径')
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = args_parser()
|
||||
|
||||
model_path = args.path
|
||||
|
||||
prompts = ["深圳有什么好玩的", "上海有什么好玩的", "晚上睡不着怎么办", "南京有什么好吃的"] * 2
|
||||
print(prompts)
|
||||
|
||||
responses, historys = [], []
|
||||
|
||||
model = llm.model(model_path)
|
||||
|
||||
t0 = time.time()
|
||||
responses, historys = model.response_batch(prompts)
|
||||
t1 = time.time()
|
||||
|
||||
token_output_count = 0
|
||||
word_len = 0
|
||||
for i, res in enumerate(responses):
|
||||
tokens = model.tokenizer_encode_string(res)
|
||||
token_output_count += len(tokens)
|
||||
word_len += len(res)
|
||||
|
||||
print("batch index: ", i)
|
||||
print(res)
|
||||
print("")
|
||||
|
||||
print("\ntoken/s: {:.2f}, character/s: {:.2f}".format(token_output_count/(t1-t0), word_len/(t1-t0)))
|
||||
|
||||
Reference in New Issue
Block a user