Merge pull request #62 from QwenLM/add_streaming_demo

add streaming chat demo
2026-05-20 16:35:47 +08:00 · 2023-08-04 22:25:36 +08:00
parent 957578bd12 47d984e90e
commit a626f56bd6
1 changed files with 71 additions and 0 deletions
--- a/cli_demo.py
+++ b/cli_demo.py
@@ -0,0 +1,71 @@
+import os
+import platform
+import signal
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from transformers.generation import GenerationConfig
+
+tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-7B-Chat", trust_remote_code=True)
+# We recommend checking the support of BF16 first. Run the command below:
+# import torch
+# torch.cuda.is_bf16_supported()
+# use bf16
+# model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-7B-Chat", device_map="auto", trust_remote_code=True, bf16=True).eval()
+# use fp16
+# model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-7B-Chat", device_map="auto", trust_remote_code=True, fp16=True).eval()
+# use cpu only
+# model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-7B-Chat", device_map="cpu", trust_remote_code=True).eval()
+# use fp32
+model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-7B-Chat", device_map="auto", trust_remote_code=True).eval()
+model.generation_config = GenerationConfig.from_pretrained("Qwen/Qwen-7B-Chat",
+                                                           trust_remote_code=True)  # 可指定不同的生成长度、top_p等相关超参
+
+stop_stream = False
+
+
+def signal_handler(signal, frame):
+    global stop_stream
+    stop_stream = True
+
+
+def clear_screen():
+    if platform.system() == "Windows":
+        os.system("cls")
+    else:
+        os.system("clear")
+
+
+def print_history(history):
+    for pair in history:
+        print(f"\nUser：{pair[0]}\nQwen-7B：{pair[1]}")
+
+
+def main():
+    history, response = [], ''
+    global stop_stream
+    clear_screen()
+    print("欢迎使用 Qwen-7B 模型，输入内容即可进行对话，clear 清空对话历史，stop 终止程序")
+    while True:
+        query = input("\nUser：")
+        if query.strip() == "stop":
+            break
+        if query.strip() == "clear":
+            history = []
+            clear_screen()
+            print("欢迎使用 Qwen-7B 模型，输入内容即可进行对话，clear 清空对话历史，stop 终止程序")
+            continue
+        for response in model.chat(tokenizer, query, history=history, stream=True):
+            if stop_stream:
+                stop_stream = False
+                break
+            else:
+                clear_screen()
+                print_history(history)
+                print(f"\nUser: {query}")
+                print("\nQwen-7B：", end="")
+                print(response)
+
+        history.append((query, response))
+
+
+if __name__ == "__main__":
+    main()