add modelscope links for int8 models

This commit is contained in:
lukeming.lkm
2023-11-06 11:47:32 +08:00
parent c00209f932
commit 845dc08474
4 changed files with 72 additions and 36 deletions

View File

@@ -17,8 +17,8 @@
| | Qwen-Chat | Qwen-Chat (Int4) | Qwen-Chat (Int8) | Qwen |
|-----|:------------------------------------------------------------------------------------------------------------------------------------:|:----------------------------------------------------------------------------------------------------------------------------------------------:|:---------------------------------------------------------------:|:--------------------------------------------------------------------------------------------------------------------------:|
| 7B | <a href="https://modelscope.cn/models/qwen/Qwen-7B-Chat/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-7B-Chat">๐Ÿค—</a> | <a href="https://modelscope.cn/models/qwen/Qwen-7B-Chat-Int4/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-7B-Chat-Int4">๐Ÿค—</a> | <a href="https://huggingface.co/Qwen/Qwen-7B-Chat-Int8">๐Ÿค—</a> | <a href="https://modelscope.cn/models/qwen/Qwen-7B/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-7B">๐Ÿค—</a> |
| 14B | <a href="https://modelscope.cn/models/qwen/Qwen-14B-Chat/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-14B-Chat">๐Ÿค—</a> | <a href="https://modelscope.cn/models/qwen/Qwen-14B-Chat-Int4/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-14B-Chat-Int4">๐Ÿค—</a> | <a href="https://huggingface.co/Qwen/Qwen-14B-Chat-Int8">๐Ÿค—</a> | <a href="https://modelscope.cn/models/qwen/Qwen-14B/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-14B">๐Ÿค—</a> |
| 7B | <a href="https://modelscope.cn/models/qwen/Qwen-7B-Chat/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-7B-Chat">๐Ÿค—</a> | <a href="https://modelscope.cn/models/qwen/Qwen-7B-Chat-Int4/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-7B-Chat-Int4">๐Ÿค—</a> | <a href="https://modelscope.cn/models/qwen/Qwen-7B-Chat-Int8/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-7B-Chat-Int8">๐Ÿค—</a> | <a href="https://modelscope.cn/models/qwen/Qwen-7B/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-7B">๐Ÿค—</a> |
| 14B | <a href="https://modelscope.cn/models/qwen/Qwen-14B-Chat/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-14B-Chat">๐Ÿค—</a> | <a href="https://modelscope.cn/models/qwen/Qwen-14B-Chat-Int4/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-14B-Chat-Int4">๐Ÿค—</a> | <a href="https://modelscope.cn/models/qwen/Qwen-14B-Chat-Int8/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-14B-Chat-Int8">๐Ÿค—</a> | <a href="https://modelscope.cn/models/qwen/Qwen-14B/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-14B">๐Ÿค—</a> |
@@ -205,10 +205,10 @@ from modelscope import snapshot_download
from transformers import AutoModelForCausalLM, AutoTokenizer
# Downloading model checkpoint to a local dir model_dir
# model_dir = snapshot_download('qwen/Qwen-7B', revision='v1.1.4')
# model_dir = snapshot_download('qwen/Qwen-7B-Chat', revision='v1.1.4')
# model_dir = snapshot_download('qwen/Qwen-14B', revision='v1.0.4')
model_dir = snapshot_download('qwen/Qwen-14B-Chat', revision='v1.0.4')
# model_dir = snapshot_download('qwen/Qwen-7B')
# model_dir = snapshot_download('qwen/Qwen-7B-Chat')
# model_dir = snapshot_download('qwen/Qwen-14B')
model_dir = snapshot_download('qwen/Qwen-14B-Chat')
# Loading local checkpoints
# trust_remote_code is still set as True since we still load codes from local dir instead of transformers
@@ -229,9 +229,9 @@ from modelscope import AutoModelForCausalLM, AutoTokenizer
from modelscope import GenerationConfig
# Model names: "qwen/Qwen-7B-Chat", "qwen/Qwen-14B-Chat"
tokenizer = AutoTokenizer.from_pretrained("qwen/Qwen-7B-Chat", revision='v1.0.5', trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("qwen/Qwen-7B-Chat", revision='v1.0.5', device_map="auto", trust_remote_code=True, fp16=True).eval()
model.generation_config = GenerationConfig.from_pretrained("Qwen/Qwen-7B-Chat", revision='v1.0.5', trust_remote_code=True) # ๅฏๆŒ‡ๅฎšไธๅŒ็š„็”Ÿๆˆ้•ฟๅบฆใ€top_p็ญ‰็›ธๅ…ณ่ถ…ๅ‚
tokenizer = AutoTokenizer.from_pretrained("qwen/Qwen-7B-Chat", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("qwen/Qwen-7B-Chat", device_map="auto", trust_remote_code=True, fp16=True).eval()
model.generation_config = GenerationConfig.from_pretrained("Qwen/Qwen-7B-Chat", trust_remote_code=True) # ๅฏๆŒ‡ๅฎšไธๅŒ็š„็”Ÿๆˆ้•ฟๅบฆใ€top_p็ญ‰็›ธๅ…ณ่ถ…ๅ‚
response, history = model.chat(tokenizer, "ไฝ ๅฅฝ", history=None)
print(response)