From df25dee1673af5f1768d8f8498d5af174069f0e2 Mon Sep 17 00:00:00 2001
From: wsl-wy <this@wysaid.org>
Date: Sat, 5 Aug 2023 20:48:26 +0800
Subject: [PATCH 01/10] Add python webui & Add simple run shell script

---
 run_web_demo.sh |  45 ++++++++++++++++++++
 web_demo.py     | 109 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 154 insertions(+)
 create mode 100755 run_web_demo.sh
 create mode 100755 web_demo.py
diff --git a/run_web_demo.sh b/run_web_demo.sh
new file mode 100755
index 0000000..f3f9a8b
--- /dev/null
+++ b/run_web_demo.sh
@@ -0,0 +1,45 @@
+#!/usr/bin/env bash
+
+cd "$(dirname "$0")"
+thisDir=$(pwd)
+
+function performInstall() {
+    set -e
+
+    pushd "$thisDir"
+    pip3 install -r requirements.txt
+    pip3 install gradio mdtex2html scipy
+
+    if [[ ! -d flash-attention ]]; then
+        if ! git clone -b v1.0.8 https://github.com/Dao-AILab/flash-attention; then
+            echo "Clone flash-attention failed, please install it manually."
+            return 0
+        fi
+    fi
+
+    cd flash-attention &&
+        pip3 install . &&
+        pip3 install csrc/layer_norm &&
+        pip3 install csrc/rotary ||
+        echo "Install flash-attention failed, please install it manually."
+    popd
+}
+
+echo "Starting WebUI..."
+
+if ! python3 web_demo.py; then
+    echo "Run demo failed, install the deps and try again? (y/n)"
+    # auto perform install if in docker
+    if [[ -t 0 ]] && [[ -t 1 ]] && [[ ! -f "/.dockerenv" ]]; then
+        read doInstall
+    else
+        doInstall="y"
+    fi
+
+    if ! [[ "$doInstall" =~ y|Y ]]; then
+        exit 1
+    fi
+
+    echo "Installing deps, and try again..."
+    performInstall && python3 web_demo.py
+fi
diff --git a/web_demo.py b/web_demo.py
new file mode 100755
index 0000000..8b37b02
--- /dev/null
+++ b/web_demo.py
@@ -0,0 +1,109 @@
+#!/usr/bin/env python3
+
+""" Ref: https://github.com/THUDM/ChatGLM2-6B/blob/main/web_demo.py """
+
+from transformers import AutoTokenizer
+import gradio as gr
+import mdtex2html
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from transformers.generation import GenerationConfig
+import sys
+
+tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-7B-Chat", trust_remote_code=True)
+model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-7B-Chat", device_map="auto", trust_remote_code=True).eval()
+model.generation_config = GenerationConfig.from_pretrained("Qwen/Qwen-7B-Chat", trust_remote_code=True) 
+
+if len(sys.argv) > 1 and sys.argv[1] == "--exit":
+    exit(0)
+
+def postprocess(self, y):
+    if y is None:
+        return []
+    for i, (message, response) in enumerate(y):
+        y[i] = (
+            None if message is None else mdtex2html.convert((message)),
+            None if response is None else mdtex2html.convert(response),
+        )
+    return y
+
+
+gr.Chatbot.postprocess = postprocess
+
+
+def parse_text(text):
+    """copy from https://github.com/GaiZhenbiao/ChuanhuChatGPT/"""
+    lines = text.split("\n")
+    lines = [line for line in lines if line != ""]
+    count = 0
+    for i, line in enumerate(lines):
+        if "```" in line:
+            count += 1
+            items = line.split('`')
+            if count % 2 == 1:
+                lines[i] = f'<pre><code class="language-{items[-1]}">'
+            else:
+                lines[i] = f'<br></code></pre>'
+        else:
+            if i > 0:
+                if count % 2 == 1:
+                    line = line.replace("`", "\`")
+                    line = line.replace("<", "&lt;")
+                    line = line.replace(">", "&gt;")
+                    line = line.replace(" ", "&nbsp;")
+                    line = line.replace("*", "&ast;")
+                    line = line.replace("_", "&lowbar;")
+                    line = line.replace("-", "&#45;")
+                    line = line.replace(".", "&#46;")
+                    line = line.replace("!", "&#33;")
+                    line = line.replace("(", "&#40;")
+                    line = line.replace(")", "&#41;")
+                    line = line.replace("$", "&#36;")
+                lines[i] = "<br>"+line
+    text = "".join(lines)
+    return text
+
+def predict(input, chatbot, history, past_key_values):
+    print('Q: ' + parse_text(input))
+    chatbot.append((parse_text(input), ""))
+    fullResponse = "";
+
+    for response in model.chat(tokenizer, input, history=history, stream=True):
+        chatbot[-1] = (parse_text(input), parse_text(response))
+
+        yield chatbot, history, past_key_values
+        fullResponse = parse_text(response);
+    
+    print("A: " + parse_text(fullResponse))
+
+
+def reset_user_input():
+    return gr.update(value='')
+
+
+def reset_state():
+    return [], [], None
+
+
+with gr.Blocks() as demo:
+    gr.HTML("""<h1 align="center">通义千问 - QwenLM/Qwen-7B</h1>""")
+
+    chatbot = gr.Chatbot()
+    with gr.Row():
+        with gr.Column(scale=4):
+            with gr.Column(scale=12):
+                user_input = gr.Textbox(show_label=False, placeholder="Input...", lines=10).style(
+                    container=False)
+            with gr.Column(min_width=32, scale=1):
+                submitBtn = gr.Button("Submit", variant="primary")
+        with gr.Column(scale=1):
+            emptyBtn = gr.Button("Clear History")
+
+    history = gr.State([])
+    past_key_values = gr.State(None)
+
+    submitBtn.click(predict, [user_input, chatbot, history, past_key_values],
+                    [chatbot, history, past_key_values], show_progress=True)
+    submitBtn.click(reset_user_input, [], [user_input])
+    emptyBtn.click(reset_state, outputs=[chatbot, history, past_key_values], show_progress=True)
+
+demo.queue().launch(share=False, inbrowser=True, server_port=80, server_name="0.0.0.0")

From ad66116fe5db6d7777965ccee2c1ed8fa165c896 Mon Sep 17 00:00:00 2001
From: wsl-wy <this@wysaid.org>
Date: Mon, 7 Aug 2023 21:01:42 +0800
Subject: [PATCH 02/10] fix no history problem

---
 web_demo.py | 56 +++++++++++++++++++++++++++++++++--------------------
 1 file changed, 35 insertions(+), 21 deletions(-)

diff --git a/web_demo.py b/web_demo.py
index 8b37b02..9f812cf 100755
--- a/web_demo.py
+++ b/web_demo.py
@@ -9,12 +9,23 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
 from transformers.generation import GenerationConfig
 import sys
 
-tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-7B-Chat", trust_remote_code=True)
-model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-7B-Chat", device_map="auto", trust_remote_code=True).eval()
-model.generation_config = GenerationConfig.from_pretrained("Qwen/Qwen-7B-Chat", trust_remote_code=True) 
+tokenizer = AutoTokenizer.from_pretrained(
+    "Qwen/Qwen-7B-Chat", trust_remote_code=True, resume_download=True
+)
+model = AutoModelForCausalLM.from_pretrained(
+    "Qwen/Qwen-7B-Chat",
+    device_map="auto",
+    offload_folder="offload",
+    trust_remote_code=True,
+    resume_download=True,
+).eval()
+model.generation_config = GenerationConfig.from_pretrained(
+    "Qwen/Qwen-7B-Chat", trust_remote_code=True, resume_download=True
+)
 
 if len(sys.argv) > 1 and sys.argv[1] == "--exit":
-    exit(0)
+    sys.exit(0)
+
 
 def postprocess(self, y):
     if y is None:
@@ -58,21 +69,26 @@ def parse_text(text):
                     line = line.replace("(", "&#40;")
                     line = line.replace(")", "&#41;")
                     line = line.replace("$", "&#36;")
-                lines[i] = "<br>"+line
+                lines[i] = "<br>" + line
     text = "".join(lines)
     return text
 
-def predict(input, chatbot, history, past_key_values):
+
+task_history = []
+
+
+def predict(input, chatbot):
     print('Q: ' + parse_text(input))
     chatbot.append((parse_text(input), ""))
-    fullResponse = "";
+    fullResponse = ""
 
-    for response in model.chat(tokenizer, input, history=history, stream=True):
+    for response in model.chat(tokenizer, input, history=task_history, stream=True):
         chatbot[-1] = (parse_text(input), parse_text(response))
 
-        yield chatbot, history, past_key_values
-        fullResponse = parse_text(response);
-    
+        yield chatbot
+        fullResponse = parse_text(response)
+
+    task_history.append((input, fullResponse))
     print("A: " + parse_text(fullResponse))
 
 
@@ -81,7 +97,8 @@ def reset_user_input():
 
 
 def reset_state():
-    return [], [], None
+    task_history = []
+    return []
 
 
 with gr.Blocks() as demo:
@@ -91,19 +108,16 @@ with gr.Blocks() as demo:
     with gr.Row():
         with gr.Column(scale=4):
             with gr.Column(scale=12):
-                user_input = gr.Textbox(show_label=False, placeholder="Input...", lines=10).style(
-                    container=False)
+                query = gr.Textbox(
+                    show_label=False, placeholder="Input...", lines=10
+                ).style(container=False)
             with gr.Column(min_width=32, scale=1):
                 submitBtn = gr.Button("Submit", variant="primary")
         with gr.Column(scale=1):
             emptyBtn = gr.Button("Clear History")
 
-    history = gr.State([])
-    past_key_values = gr.State(None)
-
-    submitBtn.click(predict, [user_input, chatbot, history, past_key_values],
-                    [chatbot, history, past_key_values], show_progress=True)
-    submitBtn.click(reset_user_input, [], [user_input])
-    emptyBtn.click(reset_state, outputs=[chatbot, history, past_key_values], show_progress=True)
+    submitBtn.click(predict, [query, chatbot], [chatbot], show_progress=True)
+    submitBtn.click(reset_user_input, [], [query])
+    emptyBtn.click(reset_state, outputs=[chatbot], show_progress=True)
 
 demo.queue().launch(share=False, inbrowser=True, server_port=80, server_name="0.0.0.0")

From 92c5c47a4c6c63f8528758319c50bf2822f644ed Mon Sep 17 00:00:00 2001
From: wsl-wy <this@wysaid.org>
Date: Tue, 8 Aug 2023 00:45:59 +0800
Subject: [PATCH 03/10] better install shell based on issue comment

---
 run_web_demo.sh | 78 ++++++++++++++++++++++++++++++++++---------------
 web_demo.py     | 41 ++++++++++++++------------
 2 files changed, 77 insertions(+), 42 deletions(-)

diff --git a/run_web_demo.sh b/run_web_demo.sh
index f3f9a8b..17529f3 100755
--- a/run_web_demo.sh
+++ b/run_web_demo.sh
@@ -3,43 +3,73 @@
 cd "$(dirname "$0")"
 thisDir=$(pwd)
 
+export INSTALL_DEPS=false
+export INSTALL_FLASH_ATTN=false
+
+declare -a PASS_THROUGH_ARGS=()
+
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+    -h | --help)
+        echo "Usage: $0 [-h|--help] [--install-deps] [--install-flash-attn]"
+        exit 0
+        ;;
+    --install-deps)
+        export INSTALL_DEPS=true
+        shift
+        ;;
+    --install-flash-attn)
+        export INSTALL_FLASH_ATTN=true
+        shift
+        ;;
+    -)
+        shift
+        PASS_THROUGH_ARGS=($@)
+        break
+        ;;
+
+    *)
+        echo "Unknown option: $1"
+        exit 1
+        ;;
+    esac
+done
+
+echo "INSTALL_DEPS: $INSTALL_DEPS"
+echo "INSTALL_FLASH_ATTN: $INSTALL_FLASH_ATTN"
+echo "PASS_THROUGH_ARGS: ${PASS_THROUGH_ARGS[@]}"
+
 function performInstall() {
-    set -e
 
     pushd "$thisDir"
     pip3 install -r requirements.txt
-    pip3 install gradio mdtex2html scipy
+    pip3 install gradio mdtex2html scipy argparse
 
-    if [[ ! -d flash-attention ]]; then
-        if ! git clone -b v1.0.8 https://github.com/Dao-AILab/flash-attention; then
-            echo "Clone flash-attention failed, please install it manually."
-            return 0
+    if $INSTALL_FLASH_ATTN; then
+        if [[ ! -d flash-attention ]]; then
+            if ! git clone -b v1.0.8 https://github.com/Dao-AILab/flash-attention; then
+                echo "Clone flash-attention failed, please install it manually."
+                return 0
+            fi
         fi
+
+        cd flash-attention &&
+            pip3 install . &&
+            pip3 install csrc/layer_norm &&
+            pip3 install csrc/rotary ||
+            echo "Install flash-attention failed, please install it manually."
     fi
 
-    cd flash-attention &&
-        pip3 install . &&
-        pip3 install csrc/layer_norm &&
-        pip3 install csrc/rotary ||
-        echo "Install flash-attention failed, please install it manually."
     popd
 }
 
 echo "Starting WebUI..."
 
-if ! python3 web_demo.py; then
-    echo "Run demo failed, install the deps and try again? (y/n)"
-    # auto perform install if in docker
-    if [[ -t 0 ]] && [[ -t 1 ]] && [[ ! -f "/.dockerenv" ]]; then
-        read doInstall
+if ! python3 web_demo.py ${PASS_THROUGH_ARGS[@]}; then
+    if $INSTALL_DEPS; then
+        echo "Installing deps, and try again..."
+        performInstall && python3 web_demo.py ${PASS_THROUGH_ARGS[@]}
     else
-        doInstall="y"
+        echo "Please install deps manually, or use --install-deps to install deps automatically."
     fi
-
-    if ! [[ "$doInstall" =~ y|Y ]]; then
-        exit 1
-    fi
-
-    echo "Installing deps, and try again..."
-    performInstall && python3 web_demo.py
 fi
diff --git a/web_demo.py b/web_demo.py
index 9f812cf..27a12a7 100755
--- a/web_demo.py
+++ b/web_demo.py
@@ -7,21 +7,14 @@ import gradio as gr
 import mdtex2html
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from transformers.generation import GenerationConfig
+from argparse import ArgumentParser
 import sys
 
-tokenizer = AutoTokenizer.from_pretrained(
-    "Qwen/Qwen-7B-Chat", trust_remote_code=True, resume_download=True
-)
-model = AutoModelForCausalLM.from_pretrained(
-    "Qwen/Qwen-7B-Chat",
-    device_map="auto",
-    offload_folder="offload",
-    trust_remote_code=True,
-    resume_download=True,
-).eval()
-model.generation_config = GenerationConfig.from_pretrained(
-    "Qwen/Qwen-7B-Chat", trust_remote_code=True, resume_download=True
-)
+tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-7B-Chat", trust_remote_code=True, resume_download=True)
+
+model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-7B-Chat", device_map="auto", offload_folder="offload", trust_remote_code=True, resume_download=True).eval()
+
+model.generation_config = GenerationConfig.from_pretrained("Qwen/Qwen-7B-Chat", trust_remote_code=True, resume_download=True)
 
 if len(sys.argv) > 1 and sys.argv[1] == "--exit":
     sys.exit(0)
@@ -82,7 +75,7 @@ def predict(input, chatbot):
     chatbot.append((parse_text(input), ""))
     fullResponse = ""
 
-    for response in model.chat(tokenizer, input, history=task_history, stream=True):
+    for response in model.chat_stream(tokenizer, input, history=task_history):
         chatbot[-1] = (parse_text(input), parse_text(response))
 
         yield chatbot
@@ -108,9 +101,7 @@ with gr.Blocks() as demo:
     with gr.Row():
         with gr.Column(scale=4):
             with gr.Column(scale=12):
-                query = gr.Textbox(
-                    show_label=False, placeholder="Input...", lines=10
-                ).style(container=False)
+                query = gr.Textbox(show_label=False, placeholder="Input...", lines=10).style(container=False)
             with gr.Column(min_width=32, scale=1):
                 submitBtn = gr.Button("Submit", variant="primary")
         with gr.Column(scale=1):
@@ -120,4 +111,18 @@ with gr.Blocks() as demo:
     submitBtn.click(reset_user_input, [], [query])
     emptyBtn.click(reset_state, outputs=[chatbot], show_progress=True)
 
-demo.queue().launch(share=False, inbrowser=True, server_port=80, server_name="0.0.0.0")
+if len(sys.argv) > 1:
+
+    print("Call args:" + str(sys.argv))
+    parser = ArgumentParser()
+    parser.add_argument("--share", action="store_true", default=False)
+    parser.add_argument("--inbrowser", action="store_true", default=False)
+    parser.add_argument("--server_port", type=int, default=80)
+    parser.add_argument("--server_name", type=str, default="0.0.0.0")
+    args = parser.parse_args(sys.argv[1:])
+    print("Args:" + str(args))
+
+    print("Args:" + str(args))
+    demo.queue().launch(args)
+else:
+    demo.queue().launch()

From 1c9396dceb15cbe7e8210927b8aef83511626861 Mon Sep 17 00:00:00 2001
From: wsl-wy <this@wysaid.org>
Date: Tue, 8 Aug 2023 01:49:02 +0800
Subject: [PATCH 04/10] remove offload_folder to match cli_demo

---
 web_demo.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/web_demo.py b/web_demo.py
index 27a12a7..3bf485b 100755
--- a/web_demo.py
+++ b/web_demo.py
@@ -12,7 +12,7 @@ import sys
 
 tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-7B-Chat", trust_remote_code=True, resume_download=True)
 
-model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-7B-Chat", device_map="auto", offload_folder="offload", trust_remote_code=True, resume_download=True).eval()
+model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-7B-Chat", device_map="auto", trust_remote_code=True, resume_download=True).eval()
 
 model.generation_config = GenerationConfig.from_pretrained("Qwen/Qwen-7B-Chat", trust_remote_code=True, resume_download=True)
 

From 164aabd0b90895ebf9a34da429ec8689552caa89 Mon Sep 17 00:00:00 2001
From: wsl-wy <this@wysaid.org>
Date: Tue, 8 Aug 2023 02:38:29 +0800
Subject: [PATCH 05/10] better argument parsing

---
 web_demo.py | 42 +++++++++++++++++++++++++++---------------
 1 file changed, 27 insertions(+), 15 deletions(-)

diff --git a/web_demo.py b/web_demo.py
index 3bf485b..2a9c29b 100755
--- a/web_demo.py
+++ b/web_demo.py
@@ -10,14 +10,37 @@ from transformers.generation import GenerationConfig
 from argparse import ArgumentParser
 import sys
 
+print("Call args:" + str(sys.argv))
+parser = ArgumentParser()
+parser.add_argument("--share", action="store_true", default=False)
+parser.add_argument("--inbrowser", action="store_true", default=False)
+parser.add_argument("--server_port", type=int, default=80)
+parser.add_argument("--server_name", type=str, default="0.0.0.0")
+parser.add_argument("--exit", action="store_true", default=False)
+parser.add_argument("--model_revision", type=str, default="")
+args = parser.parse_args(sys.argv[1:])
+print("Args:" + str(args))
+
 tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-7B-Chat", trust_remote_code=True, resume_download=True)
 
-model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-7B-Chat", device_map="auto", trust_remote_code=True, resume_download=True).eval()
+model = AutoModelForCausalLM.from_pretrained(
+    "Qwen/Qwen-7B-Chat",
+    device_map="auto",
+    trust_remote_code=True,
+    resume_download=True,
+    **{"revision": args.model_revision} if args.model_revision is not None and args.model_revision != "" else {},
+).eval()
 
 model.generation_config = GenerationConfig.from_pretrained("Qwen/Qwen-7B-Chat", trust_remote_code=True, resume_download=True)
 
-if len(sys.argv) > 1 and sys.argv[1] == "--exit":
-    sys.exit(0)
+if 'exit' in args:
+    if args.exit:
+        sys.exit(0)
+    else:
+        del args.exit
+
+if 'model_revision' in args:
+    del args.model_revision
 
 
 def postprocess(self, y):
@@ -112,17 +135,6 @@ with gr.Blocks() as demo:
     emptyBtn.click(reset_state, outputs=[chatbot], show_progress=True)
 
 if len(sys.argv) > 1:
-
-    print("Call args:" + str(sys.argv))
-    parser = ArgumentParser()
-    parser.add_argument("--share", action="store_true", default=False)
-    parser.add_argument("--inbrowser", action="store_true", default=False)
-    parser.add_argument("--server_port", type=int, default=80)
-    parser.add_argument("--server_name", type=str, default="0.0.0.0")
-    args = parser.parse_args(sys.argv[1:])
-    print("Args:" + str(args))
-
-    print("Args:" + str(args))
-    demo.queue().launch(args)
+    demo.queue().launch(**vars(args))
 else:
     demo.queue().launch()

From 4a47bf559d56e6685b7c916df7cf5e1987347f9d Mon Sep 17 00:00:00 2001
From: wsl-wy <this@wysaid.org>
Date: Tue, 8 Aug 2023 02:52:43 +0800
Subject: [PATCH 06/10] support pass model_revision with 'None' as using
 default

---
 web_demo.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/web_demo.py b/web_demo.py
index 2a9c29b..32a1529 100755
--- a/web_demo.py
+++ b/web_demo.py
@@ -28,7 +28,7 @@ model = AutoModelForCausalLM.from_pretrained(
     device_map="auto",
     trust_remote_code=True,
     resume_download=True,
-    **{"revision": args.model_revision} if args.model_revision is not None and args.model_revision != "" else {},
+    **{"revision": args.model_revision} if args.model_revision is not None and args.model_revision != "" and args.model_revision != "None" else {},
 ).eval()
 
 model.generation_config = GenerationConfig.from_pretrained("Qwen/Qwen-7B-Chat", trust_remote_code=True, resume_download=True)

From 658e0acc4a559dc9260cab36feb06d84fed311ac Mon Sep 17 00:00:00 2001
From: Junyang Lin <justinlin930319@hotmail.com>
Date: Tue, 8 Aug 2023 17:27:38 +0800
Subject: [PATCH 07/10] Update web_demo.py

---
 web_demo.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/web_demo.py b/web_demo.py
index 32a1529..dd52bd3 100755
--- a/web_demo.py
+++ b/web_demo.py
@@ -1,7 +1,5 @@
 #!/usr/bin/env python3
 
-""" Ref: https://github.com/THUDM/ChatGLM2-6B/blob/main/web_demo.py """
-
 from transformers import AutoTokenizer
 import gradio as gr
 import mdtex2html
@@ -58,7 +56,6 @@ gr.Chatbot.postprocess = postprocess
 
 
 def parse_text(text):
-    """copy from https://github.com/GaiZhenbiao/ChuanhuChatGPT/"""
     lines = text.split("\n")
     lines = [line for line in lines if line != ""]
     count = 0

From 5a8ce292d05871e5da6f6c133879078c7caed7db Mon Sep 17 00:00:00 2001
From: Junyang Lin <justinlin930319@hotmail.com>
Date: Tue, 8 Aug 2023 21:15:13 +0800
Subject: [PATCH 08/10] Delete run_web_demo.sh

---
 run_web_demo.sh | 75 -------------------------------------------------
 1 file changed, 75 deletions(-)
 delete mode 100755 run_web_demo.sh

diff --git a/run_web_demo.sh b/run_web_demo.sh
deleted file mode 100755
index 17529f3..0000000
--- a/run_web_demo.sh
+++ /dev/null
@@ -1,75 +0,0 @@
-#!/usr/bin/env bash
-
-cd "$(dirname "$0")"
-thisDir=$(pwd)
-
-export INSTALL_DEPS=false
-export INSTALL_FLASH_ATTN=false
-
-declare -a PASS_THROUGH_ARGS=()
-
-while [[ $# -gt 0 ]]; do
-    case "$1" in
-    -h | --help)
-        echo "Usage: $0 [-h|--help] [--install-deps] [--install-flash-attn]"
-        exit 0
-        ;;
-    --install-deps)
-        export INSTALL_DEPS=true
-        shift
-        ;;
-    --install-flash-attn)
-        export INSTALL_FLASH_ATTN=true
-        shift
-        ;;
-    -)
-        shift
-        PASS_THROUGH_ARGS=($@)
-        break
-        ;;
-
-    *)
-        echo "Unknown option: $1"
-        exit 1
-        ;;
-    esac
-done
-
-echo "INSTALL_DEPS: $INSTALL_DEPS"
-echo "INSTALL_FLASH_ATTN: $INSTALL_FLASH_ATTN"
-echo "PASS_THROUGH_ARGS: ${PASS_THROUGH_ARGS[@]}"
-
-function performInstall() {
-
-    pushd "$thisDir"
-    pip3 install -r requirements.txt
-    pip3 install gradio mdtex2html scipy argparse
-
-    if $INSTALL_FLASH_ATTN; then
-        if [[ ! -d flash-attention ]]; then
-            if ! git clone -b v1.0.8 https://github.com/Dao-AILab/flash-attention; then
-                echo "Clone flash-attention failed, please install it manually."
-                return 0
-            fi
-        fi
-
-        cd flash-attention &&
-            pip3 install . &&
-            pip3 install csrc/layer_norm &&
-            pip3 install csrc/rotary ||
-            echo "Install flash-attention failed, please install it manually."
-    fi
-
-    popd
-}
-
-echo "Starting WebUI..."
-
-if ! python3 web_demo.py ${PASS_THROUGH_ARGS[@]}; then
-    if $INSTALL_DEPS; then
-        echo "Installing deps, and try again..."
-        performInstall && python3 web_demo.py ${PASS_THROUGH_ARGS[@]}
-    else
-        echo "Please install deps manually, or use --install-deps to install deps automatically."
-    fi
-fi

From c612df154a2c39d09af8a8b1fa97a65d8ec512e5 Mon Sep 17 00:00:00 2001
From: Junyang Lin <justinlin930319@hotmail.com>
Date: Tue, 8 Aug 2023 21:17:04 +0800
Subject: [PATCH 09/10] Update web_demo.py

---
 web_demo.py | 76 +++++++++++++++++++++++++++++++++++------------------
 1 file changed, 51 insertions(+), 25 deletions(-)

diff --git a/web_demo.py b/web_demo.py
index dd52bd3..66ce079 100755
--- a/web_demo.py
+++ b/web_demo.py
@@ -19,25 +19,33 @@ parser.add_argument("--model_revision", type=str, default="")
 args = parser.parse_args(sys.argv[1:])
 print("Args:" + str(args))
 
-tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-7B-Chat", trust_remote_code=True, resume_download=True)
+tokenizer = AutoTokenizer.from_pretrained(
+    "Qwen/Qwen-7B-Chat", trust_remote_code=True, resume_download=True
+)
 
 model = AutoModelForCausalLM.from_pretrained(
     "Qwen/Qwen-7B-Chat",
     device_map="auto",
     trust_remote_code=True,
     resume_download=True,
-    **{"revision": args.model_revision} if args.model_revision is not None and args.model_revision != "" and args.model_revision != "None" else {},
+    **{"revision": args.model_revision}
+    if args.model_revision is not None
+    and args.model_revision != ""
+    and args.model_revision != "None"
+    else {},
 ).eval()
 
-model.generation_config = GenerationConfig.from_pretrained("Qwen/Qwen-7B-Chat", trust_remote_code=True, resume_download=True)
+model.generation_config = GenerationConfig.from_pretrained(
+    "Qwen/Qwen-7B-Chat", trust_remote_code=True, resume_download=True
+)
 
-if 'exit' in args:
+if "exit" in args:
     if args.exit:
         sys.exit(0)
     else:
         del args.exit
 
-if 'model_revision' in args:
+if "model_revision" in args:
     del args.model_revision
 
 
@@ -62,11 +70,11 @@ def parse_text(text):
     for i, line in enumerate(lines):
         if "```" in line:
             count += 1
-            items = line.split('`')
+            items = line.split("`")
             if count % 2 == 1:
                 lines[i] = f'<pre><code class="language-{items[-1]}">'
             else:
-                lines[i] = f'<br></code></pre>'
+                lines[i] = f"<br></code></pre>"
         else:
             if i > 0:
                 if count % 2 == 1:
@@ -90,46 +98,64 @@ def parse_text(text):
 task_history = []
 
 
-def predict(input, chatbot):
-    print('Q: ' + parse_text(input))
-    chatbot.append((parse_text(input), ""))
+def predict(query, chatbot):
+    print("User: " + parse_text(query))
+    chatbot.append((parse_text(query), ""))
     fullResponse = ""
 
-    for response in model.chat_stream(tokenizer, input, history=task_history):
-        chatbot[-1] = (parse_text(input), parse_text(response))
+    for response in model.chat_stream(tokenizer, query, history=task_history):
+        chatbot[-1] = (parse_text(query), parse_text(response))
 
         yield chatbot
         fullResponse = parse_text(response)
 
-    task_history.append((input, fullResponse))
-    print("A: " + parse_text(fullResponse))
+    task_history.append((query, fullResponse))
+    print("Qwen-7B-Chat: " + parse_text(fullResponse))
+    
+
+# Temporarily chat_stream does not support sampling, and thus regenerate does not work for now.
+def regenerate(chatbot):
+    if not task_history:
+        yield chatbot
+        return
+    item = task_history.pop(-1)
+    chatbot.pop(-1)
+    yield from predict(item[0], chatbot)
 
 
 def reset_user_input():
-    return gr.update(value='')
+    return gr.update(value="")
 
 
 def reset_state():
-    task_history = []
+    task_history.clear()
     return []
 
 
 with gr.Blocks() as demo:
-    gr.HTML("""<h1 align="center">通义千问 - QwenLM/Qwen-7B</h1>""")
+    gr.Markdown("""<p align="center"><img src="https://modelscope.cn/api/v1/models/qwen/Qwen-7B-Chat/repo?Revision=master&FilePath=assets/logo.jpeg&View=true" style="height: 80px"/><p>""")
+    gr.Markdown("""<center><font size=8>Qwen-7B-Chat Bot</center>""")
+    gr.Markdown(
+        """<center><font size=3>This WebUI is based on Qwen-7B-Chat, developed by Alibaba Cloud. (本WebUI基于Qwen-7B-Chat打造，实现聊天机器人功能。)</center>"""
+    )
+    gr.Markdown(
+        """<center><font size=4>Qwen-7B <a href="https://modelscope.cn/models/qwen/Qwen-7B/summary">🤖 <a> | <a href="https://huggingface.co/Qwen/Qwen-7B">🤗</a>&nbsp ｜ Qwen-7B-Chat <a href="https://modelscope.cn/models/qwen/Qwen-7B-Chat/summary">🤖 <a>| <a href="https://huggingface.co/Qwen/Qwen-7B-Chat">🤗</a>&nbsp ｜ &nbsp<a href="https://github.com/QwenLM/Qwen-7B/blob/main/tech_memo.md">Report</a></center>"""
+    )
+    
+    chatbot = gr.Chatbot(lines=10, label='Qwen-7B-Chat', elem_classes="control-height")
+    query = gr.Textbox(lines=2, label='Input')
 
-    chatbot = gr.Chatbot()
     with gr.Row():
-        with gr.Column(scale=4):
-            with gr.Column(scale=12):
-                query = gr.Textbox(show_label=False, placeholder="Input...", lines=10).style(container=False)
-            with gr.Column(min_width=32, scale=1):
-                submitBtn = gr.Button("Submit", variant="primary")
-        with gr.Column(scale=1):
-            emptyBtn = gr.Button("Clear History")
+        emptyBtn = gr.Button("🧹 Clear History (清除历史对话)")
+        submitBtn = gr.Button("🚀 Submit (发送)")
 
     submitBtn.click(predict, [query, chatbot], [chatbot], show_progress=True)
     submitBtn.click(reset_user_input, [], [query])
     emptyBtn.click(reset_state, outputs=[chatbot], show_progress=True)
+    
+    gr.Markdown(
+        """<font size=2>Note: This demo is governed by the original license of Qwen-7B. We strongly advise users not to knowingly generate or allow others to knowingly generate harmful content, including hate speech, violence, pornography, deception, etc. (注：本演示受Qwen-7B的许可协议限制。我们强烈建议，用户不应传播及不应允许他人传播以下内容，包括但不限于仇恨言论、暴力、色情、欺诈相关的有害信息。)"""
+    )
 
 if len(sys.argv) > 1:
     demo.queue().launch(**vars(args))

From 18cc7e5897c388d8e0c2f54496072259d1f7f6cd Mon Sep 17 00:00:00 2001
From: Junyang Lin <justinlin930319@hotmail.com>
Date: Tue, 8 Aug 2023 23:01:53 +0800
Subject: [PATCH 10/10] add regenerate button

---
 web_demo.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/web_demo.py b/web_demo.py
index 66ce079..e5f15a1 100755
--- a/web_demo.py
+++ b/web_demo.py
@@ -111,9 +111,8 @@ def predict(query, chatbot):
 
     task_history.append((query, fullResponse))
     print("Qwen-7B-Chat: " + parse_text(fullResponse))
-    
 
-# Temporarily chat_stream does not support sampling, and thus regenerate does not work for now.
+
 def regenerate(chatbot):
     if not task_history:
         yield chatbot
@@ -139,20 +138,22 @@ with gr.Blocks() as demo:
         """<center><font size=3>This WebUI is based on Qwen-7B-Chat, developed by Alibaba Cloud. (本WebUI基于Qwen-7B-Chat打造，实现聊天机器人功能。)</center>"""
     )
     gr.Markdown(
-        """<center><font size=4>Qwen-7B <a href="https://modelscope.cn/models/qwen/Qwen-7B/summary">🤖 <a> | <a href="https://huggingface.co/Qwen/Qwen-7B">🤗</a>&nbsp ｜ Qwen-7B-Chat <a href="https://modelscope.cn/models/qwen/Qwen-7B-Chat/summary">🤖 <a>| <a href="https://huggingface.co/Qwen/Qwen-7B-Chat">🤗</a>&nbsp ｜ &nbsp<a href="https://github.com/QwenLM/Qwen-7B/blob/main/tech_memo.md">Report</a></center>"""
+        """<center><font size=4>Qwen-7B <a href="https://modelscope.cn/models/qwen/Qwen-7B/summary">🤖 <a> | <a href="https://huggingface.co/Qwen/Qwen-7B">🤗</a>&nbsp ｜ Qwen-7B-Chat <a href="https://modelscope.cn/models/qwen/Qwen-7B-Chat/summary">🤖 <a>| <a href="https://huggingface.co/Qwen/Qwen-7B-Chat">🤗</a>&nbsp ｜ &nbsp<a href="https://github.com/QwenLM/Qwen-7B">Github</a></center>"""
     )
-    
+
     chatbot = gr.Chatbot(lines=10, label='Qwen-7B-Chat', elem_classes="control-height")
     query = gr.Textbox(lines=2, label='Input')
 
     with gr.Row():
-        emptyBtn = gr.Button("🧹 Clear History (清除历史对话)")
+        emptyBtn = gr.Button("🧹 Clear History (清除历史)")
         submitBtn = gr.Button("🚀 Submit (发送)")
+        regenBtn = gr.Button("🤔️ Regenerate (重试)")
 
     submitBtn.click(predict, [query, chatbot], [chatbot], show_progress=True)
     submitBtn.click(reset_user_input, [], [query])
     emptyBtn.click(reset_state, outputs=[chatbot], show_progress=True)
-    
+    regenBtn.click(regenerate, [chatbot], [chatbot], show_progress=True)
+
     gr.Markdown(
         """<font size=2>Note: This demo is governed by the original license of Qwen-7B. We strongly advise users not to knowingly generate or allow others to knowingly generate harmful content, including hate speech, violence, pornography, deception, etc. (注：本演示受Qwen-7B的许可协议限制。我们强烈建议，用户不应传播及不应允许他人传播以下内容，包括但不限于仇恨言论、暴力、色情、欺诈相关的有害信息。)"""
     )