add 72B and 1.8B Qwen models, add Ascend 910 and Hygon DCU support, add docker support

2026-05-20 16:35:47 +08:00 · 2023-11-30 15:29:13 +08:00
parent 981c89b2a9
commit e8e15962d8
52 changed files with 6139 additions and 1435 deletions
--- a/dcu-support/cli_demo_batch.py
+++ b/dcu-support/cli_demo_batch.py
@@ -0,0 +1,39 @@
+import argparse
+from fastllm_pytools import llm
+import time
+
+def args_parser():
+    parser = argparse.ArgumentParser(description = 'fastllm_chat_demo')
+    parser.add_argument('-p', '--path', type = str, required = True, default = '', help = '模型文件的路径')
+    args = parser.parse_args()
+    return args
+
+if __name__ == "__main__":
+    args = args_parser()
+
+    model_path = args.path
+
+    prompts = ["深圳有什么好玩的", "上海有什么好玩的", "晚上睡不着怎么办", "南京有什么好吃的"] * 2
+    print(prompts)
+
+    responses, historys = [], []
+    
+    model = llm.model(model_path)
+    
+    t0 = time.time()
+    responses, historys = model.response_batch(prompts)        
+    t1 = time.time()
+
+    token_output_count = 0
+    word_len = 0
+    for i, res in enumerate(responses):
+        tokens = model.tokenizer_encode_string(res)
+        token_output_count += len(tokens)
+        word_len += len(res)
+
+        print("batch index: ", i)
+        print(res)
+        print("")
+
+    print("\ntoken/s: {:.2f}, character/s: {:.2f}".format(token_output_count/(t1-t0), word_len/(t1-t0)))
+