mirror of
https://github.com/QwenLM/Qwen.git
synced 2026-05-20 08:25:47 +08:00
add modelscope links for int8 models
This commit is contained in:
39
README.md
39
README.md
@@ -17,8 +17,8 @@
|
||||
|
||||
| | Qwen-Chat | Qwen-Chat (Int4) | Qwen-Chat (Int8) | Qwen |
|
||||
|-----|:------------------------------------------------------------------------------------------------------------------------------------:|:----------------------------------------------------------------------------------------------------------------------------------------------:|:---------------------------------------------------------------:|:--------------------------------------------------------------------------------------------------------------------------:|
|
||||
| 7B | <a href="https://modelscope.cn/models/qwen/Qwen-7B-Chat/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-7B-Chat">๐ค</a> | <a href="https://modelscope.cn/models/qwen/Qwen-7B-Chat-Int4/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-7B-Chat-Int4">๐ค</a> | <a href="https://huggingface.co/Qwen/Qwen-7B-Chat-Int8">๐ค</a> | <a href="https://modelscope.cn/models/qwen/Qwen-7B/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-7B">๐ค</a> |
|
||||
| 14B | <a href="https://modelscope.cn/models/qwen/Qwen-14B-Chat/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-14B-Chat">๐ค</a> | <a href="https://modelscope.cn/models/qwen/Qwen-14B-Chat-Int4/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-14B-Chat-Int4">๐ค</a> | <a href="https://huggingface.co/Qwen/Qwen-14B-Chat-Int8">๐ค</a> | <a href="https://modelscope.cn/models/qwen/Qwen-14B/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-14B">๐ค</a> |
|
||||
| 7B | <a href="https://modelscope.cn/models/qwen/Qwen-7B-Chat/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-7B-Chat">๐ค</a> | <a href="https://modelscope.cn/models/qwen/Qwen-7B-Chat-Int4/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-7B-Chat-Int4">๐ค</a> | <a href="https://modelscope.cn/models/qwen/Qwen-7B-Chat-Int8/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-7B-Chat-Int8">๐ค</a> | <a href="https://modelscope.cn/models/qwen/Qwen-7B/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-7B">๐ค</a> |
|
||||
| 14B | <a href="https://modelscope.cn/models/qwen/Qwen-14B-Chat/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-14B-Chat">๐ค</a> | <a href="https://modelscope.cn/models/qwen/Qwen-14B-Chat-Int4/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-14B-Chat-Int4">๐ค</a> | <a href="https://modelscope.cn/models/qwen/Qwen-14B-Chat-Int8/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-14B-Chat-Int8">๐ค</a> | <a href="https://modelscope.cn/models/qwen/Qwen-14B/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-14B">๐ค</a> |
|
||||
|
||||
|
||||
|
||||
@@ -205,10 +205,10 @@ from modelscope import snapshot_download
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
|
||||
# Downloading model checkpoint to a local dir model_dir
|
||||
# model_dir = snapshot_download('qwen/Qwen-7B', revision='v1.1.4')
|
||||
# model_dir = snapshot_download('qwen/Qwen-7B-Chat', revision='v1.1.4')
|
||||
# model_dir = snapshot_download('qwen/Qwen-14B', revision='v1.0.4')
|
||||
model_dir = snapshot_download('qwen/Qwen-14B-Chat', revision='v1.0.4')
|
||||
# model_dir = snapshot_download('qwen/Qwen-7B')
|
||||
# model_dir = snapshot_download('qwen/Qwen-7B-Chat')
|
||||
# model_dir = snapshot_download('qwen/Qwen-14B')
|
||||
model_dir = snapshot_download('qwen/Qwen-14B-Chat')
|
||||
|
||||
# Loading local checkpoints
|
||||
# trust_remote_code is still set as True since we still load codes from local dir instead of transformers
|
||||
@@ -229,9 +229,9 @@ from modelscope import AutoModelForCausalLM, AutoTokenizer
|
||||
from modelscope import GenerationConfig
|
||||
|
||||
# Model names: "qwen/Qwen-7B-Chat", "qwen/Qwen-14B-Chat"
|
||||
tokenizer = AutoTokenizer.from_pretrained("qwen/Qwen-7B-Chat", revision='v1.0.5', trust_remote_code=True)
|
||||
model = AutoModelForCausalLM.from_pretrained("qwen/Qwen-7B-Chat", revision='v1.0.5', device_map="auto", trust_remote_code=True, fp16=True).eval()
|
||||
model.generation_config = GenerationConfig.from_pretrained("Qwen/Qwen-7B-Chat", revision='v1.0.5', trust_remote_code=True) # ๅฏๆๅฎไธๅ็็ๆ้ฟๅบฆใtop_p็ญ็ธๅ
ณ่ถ
ๅ
|
||||
tokenizer = AutoTokenizer.from_pretrained("qwen/Qwen-7B-Chat", trust_remote_code=True)
|
||||
model = AutoModelForCausalLM.from_pretrained("qwen/Qwen-7B-Chat", device_map="auto", trust_remote_code=True, fp16=True).eval()
|
||||
model.generation_config = GenerationConfig.from_pretrained("Qwen/Qwen-7B-Chat", trust_remote_code=True) # ๅฏๆๅฎไธๅ็็ๆ้ฟๅบฆใtop_p็ญ็ธๅ
ณ่ถ
ๅ
|
||||
|
||||
response, history = model.chat(tokenizer, "ไฝ ๅฅฝ", history=None)
|
||||
print(response)
|
||||
@@ -365,6 +365,11 @@ We illustrate the model performance of both BF16, Int8 and Int4 models on the be
|
||||
| Qwen-14B-Chat (Int4) | 63.3 | 69.0 | 59.8 | 45.7 |
|
||||
|
||||
### Quantization of KV cache
|
||||
|
||||
> NOTE: Please be aware that due to the internal mechanism of Hugging Face, the support files for this functionality
|
||||
> (i.e., `cache_autogptq_cuda_256.cpp` and `cache_autogptq_cuda_kernel_245.cu`) may be missing. Please manually download
|
||||
> them from the Hugging Face Hub and place them into the same folder as the other module files.
|
||||
|
||||
Attention KV cache can be quantized and compressed for storage, to get a higher sample throughput. The parameters of 'use_cache_quantization' and 'use_cache_kernel' are provided to control kv-cache-quantization behavior
|
||||
When use_cache_quantization=True and use_cache_kernel=True, kv-cache-quantization will be enabled.
|
||||
The specific use method is as follows:
|
||||
@@ -610,6 +615,9 @@ sh finetune/finetune_qlora_ds.sh
|
||||
|
||||
For Q-LoRA, we advise you to load our provided quantized model, e.g., Qwen-7B-Chat-Int4. You **SHOULD NOT** use the bf16 models. Different from full-parameter finetuning and LoRA, only fp16 is supported for Q-LoRA. For single-GPU training, we have to use deepspeed for mixed-precision training due to our observation of errors caused by torch amp. Besides, for Q-LoRA, the troubles with the special tokens in LoRA still exist. However, as we only provide the Int4 models for chat models, which means the language model has learned the special tokens of ChatML format, you have no worry about the layers. Note that the layers of the Int4 model should not be trainable, and thus if you introduce special tokens in your training, Q-LoRA might not work.
|
||||
|
||||
> NOTE: Please be aware that due to the internal mechanisms of Hugging Face, certain non-Python files (e.g., `*.cpp` and `*.cu`)
|
||||
> may be missing from the saved checkpoint. You may need to manually copy them to the directory containing other files.
|
||||
|
||||
Different from full-parameter finetuning, the training of both LoRA and Q-LoRA only saves the adapter parameters. Suppose your training starts from Qwen-7B, you can load the finetuned model for inference as shown below:
|
||||
|
||||
```python
|
||||
@@ -639,6 +647,19 @@ merged_model = model.merge_and_unload()
|
||||
merged_model.save_pretrained(new_model_directory, max_shard_size="2048MB", safe_serialization=True)
|
||||
```
|
||||
|
||||
The `new_model_directory` directory will contain the merged model weights and module files. Please note that `*.cu` and `*.cpp` files may be missing in the saved files. If you wish to use the KV cache functionality, please manually copy them. Besides, the tokenizer files are not saved in the new directory in this step. You can copy the tokenizer files or use the following code
|
||||
```python
|
||||
from transformers import AutoTokenizer
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(
|
||||
path_to_adapter, # path to the output directory
|
||||
trust_remote_code=True
|
||||
)
|
||||
|
||||
tokenizer.save_pretrained(new_model_directory)
|
||||
```
|
||||
|
||||
|
||||
Note: For multi-GPU training, you need to specify the proper hyperparameters for distributed training based on your machine. Besides, we advise you to specify your maximum sequence length with the argument `--model_max_length`, based on your consideration of data, memory footprint, and training speed.
|
||||
|
||||
|
||||
|
||||
33
README_CN.md
33
README_CN.md
@@ -17,8 +17,8 @@
|
||||
|
||||
| | Qwen-Chat | Qwen-Chat (Int4) | Qwen-Chat (Int8) | Qwen |
|
||||
|-----|:------------------------------------------------------------------------------------------------------------------------------------:|:----------------------------------------------------------------------------------------------------------------------------------------------:|:---------------------------------------------------------------:|:--------------------------------------------------------------------------------------------------------------------------:|
|
||||
| 7B | <a href="https://modelscope.cn/models/qwen/Qwen-7B-Chat/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-7B-Chat">๐ค</a> | <a href="https://modelscope.cn/models/qwen/Qwen-7B-Chat-Int4/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-7B-Chat-Int4">๐ค</a> | <a href="https://huggingface.co/Qwen/Qwen-7B-Chat-Int8">๐ค</a> | <a href="https://modelscope.cn/models/qwen/Qwen-7B/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-7B">๐ค</a> |
|
||||
| 14B | <a href="https://modelscope.cn/models/qwen/Qwen-14B-Chat/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-14B-Chat">๐ค</a> | <a href="https://modelscope.cn/models/qwen/Qwen-14B-Chat-Int4/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-14B-Chat-Int4">๐ค</a> | <a href="https://huggingface.co/Qwen/Qwen-14B-Chat-Int8">๐ค</a> | <a href="https://modelscope.cn/models/qwen/Qwen-14B/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-14B">๐ค</a> |
|
||||
| 7B | <a href="https://modelscope.cn/models/qwen/Qwen-7B-Chat/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-7B-Chat">๐ค</a> | <a href="https://modelscope.cn/models/qwen/Qwen-7B-Chat-Int4/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-7B-Chat-Int4">๐ค</a> | <a href="https://modelscope.cn/models/qwen/Qwen-7B-Chat-Int8/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-7B-Chat-Int8">๐ค</a> | <a href="https://modelscope.cn/models/qwen/Qwen-7B/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-7B">๐ค</a> |
|
||||
| 14B | <a href="https://modelscope.cn/models/qwen/Qwen-14B-Chat/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-14B-Chat">๐ค</a> | <a href="https://modelscope.cn/models/qwen/Qwen-14B-Chat-Int4/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-14B-Chat-Int4">๐ค</a> | <a href="https://modelscope.cn/models/qwen/Qwen-14B-Chat-Int8/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-14B-Chat-Int8">๐ค</a> | <a href="https://modelscope.cn/models/qwen/Qwen-14B/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-14B">๐ค</a> |
|
||||
|
||||
ๆไปฌๅผๆบไบ**Qwen**๏ผ้ไนๅ้ฎ๏ผ็ณปๅๅทฅไฝ๏ผๅฝๅๅผๆบๆจกๅ็ๅๆฐ่งๆจกไธบ70ไบฟ๏ผ7B๏ผๅ140ไบฟ๏ผ14B๏ผใๆฌๆฌกๅผๆบๅ
ๆฌๅบ็กๆจกๅ**Qwen**๏ผๅณ**Qwen-7B**ๅ**Qwen-14B**๏ผไปฅๅๅฏน่ฏๆจกๅ**Qwen-Chat**๏ผๅณ**Qwen-7B-Chat**ๅ**Qwen-14B-Chat**ใๆจกๅ้พๆฅๅจ่กจๆ ผไธญ๏ผ่ฏท็นๅปไบ่งฃ่ฏฆๆ
ใๅๆถ๏ผๆไปฌๅ
ฌๅผไบๆไปฌ็<b><a href="https://arxiv.org/abs/2309.16609">ๆๆฏๆฅๅ</a></b>๏ผ่ฏท็นๅปไธๆน่ฎบๆ้พๆฅๆฅ็ใ
|
||||
|
||||
@@ -196,10 +196,10 @@ from modelscope import snapshot_download
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
|
||||
# Downloading model checkpoint to a local dir model_dir
|
||||
# model_dir = snapshot_download('qwen/Qwen-7B', revision='v1.1.4')
|
||||
# model_dir = snapshot_download('qwen/Qwen-7B-Chat', revision='v1.1.4')
|
||||
# model_dir = snapshot_download('qwen/Qwen-14B', revision='v1.0.4')
|
||||
model_dir = snapshot_download('qwen/Qwen-14B-Chat', revision='v1.0.4')
|
||||
# model_dir = snapshot_download('qwen/Qwen-7B')
|
||||
# model_dir = snapshot_download('qwen/Qwen-7B-Chat')
|
||||
# model_dir = snapshot_download('qwen/Qwen-14B')
|
||||
model_dir = snapshot_download('qwen/Qwen-14B-Chat')
|
||||
|
||||
# Loading local checkpoints
|
||||
# trust_remote_code is still set as True since we still load codes from local dir instead of transformers
|
||||
@@ -220,9 +220,9 @@ from modelscope import AutoModelForCausalLM, AutoTokenizer
|
||||
from modelscope import GenerationConfig
|
||||
|
||||
# ๅฏ้็ๆจกๅๅ
ๆฌ: "qwen/Qwen-7B-Chat", "qwen/Qwen-14B-Chat"
|
||||
tokenizer = AutoTokenizer.from_pretrained("qwen/Qwen-7B-Chat", revision='v1.0.5', trust_remote_code=True)
|
||||
model = AutoModelForCausalLM.from_pretrained("qwen/Qwen-7B-Chat", revision='v1.0.5', device_map="auto", trust_remote_code=True, fp16=True).eval()
|
||||
model.generation_config = GenerationConfig.from_pretrained("Qwen/Qwen-7B-Chat", revision='v1.0.5', trust_remote_code=True) # ๅฏๆๅฎไธๅ็็ๆ้ฟๅบฆใtop_p็ญ็ธๅ
ณ่ถ
ๅ
|
||||
tokenizer = AutoTokenizer.from_pretrained("qwen/Qwen-7B-Chat", trust_remote_code=True)
|
||||
model = AutoModelForCausalLM.from_pretrained("qwen/Qwen-7B-Chat", device_map="auto", trust_remote_code=True, fp16=True).eval()
|
||||
model.generation_config = GenerationConfig.from_pretrained("Qwen/Qwen-7B-Chat", trust_remote_code=True) # ๅฏๆๅฎไธๅ็็ๆ้ฟๅบฆใtop_p็ญ็ธๅ
ณ่ถ
ๅ
|
||||
|
||||
response, history = model.chat(tokenizer, "ไฝ ๅฅฝ", history=None)
|
||||
print(response)
|
||||
@@ -360,6 +360,8 @@ response, history = model.chat(tokenizer, "Hi", history=None)
|
||||
|
||||
### KV cache้ๅ
|
||||
|
||||
> ๆณจๆ๏ผ็ฑไบHugging Face็ๅ
้จๅฎ็ฐ๏ผๆฌๅ่ฝ็ๆฏๆๆไปถ`cache_autogptq_cuda_356.cpp`ไธ`cache_autogptq_cuda_kernel_245.cu`ๅฏ่ฝๆฒก่ขซไธ่ฝฝใๅฆ้ๅผๅฏไฝฟ็จ๏ผ่ฏทๆๅจไป็ธๅ
ณไฝ็ฝฎไธ่ฝฝ๏ผๅนถๆพ็ฝฎๅฐ็ธๅบๆไปถไธญใ
|
||||
|
||||
ๅจๆจกๅinferๆถ๏ผๅฏไปฅๅฐไธญ้ด็ปๆkeyไปฅๅvalue็ๅผ้ๅๅๅ็ผฉๅญๅจ๏ผ่ฟๆ ทไพฟๅฏไปฅๅจ็ธๅ็ๅกไธๅญๅจๆดๅค็keyไปฅๅvalue๏ผๅขๅ ๆ ทๆฌๅๅใ
|
||||
|
||||
ๆไพuse_cache_quantizationไปฅๅuse_cache_kernelไธคไธชๅๆฐๅฏนๆจกๅๆงๅถ๏ผๅฝuse_cache_quantizationไปฅๅuse_cache_kernelๅๅผๅฏๆถ๏ผๅฐๅฏๅจkv-cache้ๅ็ๅ่ฝใๅ
ทไฝไฝฟ็จๅฆไธ๏ผ
|
||||
@@ -594,6 +596,8 @@ sh finetune/finetune_qlora_ds.sh
|
||||
|
||||
ๆไปฌๅปบ่ฎฎไฝ ไฝฟ็จๆไปฌๆไพ็Int4้ๅๆจกๅ่ฟ่ก่ฎญ็ป๏ผๅณQwen-7B-Chat-Int4ใ่ฏท**ไธ่ฆไฝฟ็จ**้้ๅๆจกๅ๏ผไธๅ
จๅๆฐๅพฎ่ฐไปฅๅLoRAไธๅ๏ผQ-LoRAไป
ๆฏๆfp16ใๆณจๆ๏ผ็ฑไบๆไปฌๅ็ฐtorch ampๆฏๆ็fp16ๆททๅ็ฒพๅบฆ่ฎญ็ปๅญๅจ้ฎ้ข๏ผๅ ๆญคๅฝๅ็ๅๅก่ฎญ็ปQ-LoRAๅฟ
้กปไฝฟ็จDeepSpeedใๆญคๅค๏ผไธ่ฟฐLoRAๅ
ณไบ็นๆฎtoken็้ฎ้ขๅจQ-LoRAไพ็ถๅญๅจใๅนถไธ๏ผInt4ๆจกๅ็ๅๆฐๆ ๆณ่ขซ่ฎพไธบๅฏ่ฎญ็ป็ๅๆฐใๆๅนธ็ๆฏ๏ผๆไปฌๅชๆไพไบChatๆจกๅ็Int4ๆจกๅ๏ผๅ ๆญคไฝ ไธ็จๆ
ๅฟ่ฟไธช้ฎ้ขใไฝๆฏ๏ผๅฆๆไฝ ๆงๆ่ฆๅจQ-LoRAไธญๅผๅ
ฅๆฐ็็นๆฎtoken๏ผๅพๆฑๆญ๏ผๆไปฌๆ ๆณไฟ่ฏไฝ ่ฝๆๅ่ฎญ็ปใ
|
||||
|
||||
> ๆณจๆ๏ผ็ฑไบHugging Face็ๅ
้จๅฎ็ฐ๏ผๆจกๅๅจไฟๅญๆถ๏ผไธไบ้Pythonๆไปถๆชไฟๅญ๏ผไพๅฆ`*.cpp`ไธ`*.cu`๏ผ๏ผๅฆ้่ฆๆฏๆ็ธๅ
ณๅ่ฝ๏ผ่ฏทๆๅจๅคๅถๆๅ
ณๆไปถใ
|
||||
|
||||
ไธๅ
จๅๆฐๅพฎ่ฐไธๅ๏ผLoRAๅQ-LoRA็่ฎญ็ปๅช้ๅญๅจadapter้จๅ็ๅๆฐใๅๅฆไฝ ้่ฆไฝฟ็จLoRA่ฎญ็ปๅ็ๆจกๅ๏ผไฝ ้่ฆไฝฟ็จๅฆไธๆนๆณใๅ่ฎพไฝ ไฝฟ็จQwen-7B่ฎญ็ปๆจกๅ๏ผไฝ ๅฏไปฅ็จๅฆไธไปฃ็ ่ฏปๅๆจกๅ๏ผ
|
||||
|
||||
```python
|
||||
@@ -623,6 +627,17 @@ merged_model = model.merge_and_unload()
|
||||
merged_model.save_pretrained(new_model_directory, max_shard_size="2048MB", safe_serialization=True)
|
||||
```
|
||||
|
||||
`new_model_directory`็ฎๅฝๅฐๅ
ๅซๅๅนถๅ็ๆจกๅๅๆฐไธ็ธๅ
ณๆจกๅไปฃ็ ใ่ฏทๆณจๆ`*.cu`ๅ`*.cpp`ๆไปถๅฏ่ฝๆฒก่ขซไฟๅญ๏ผ่ฏทๆๅจๅคๅถใๅฆๅค๏ผ`merge_and_unload`ไป
ไฟๅญๆจกๅ๏ผๅนถๆชไฟๅญtokenizer๏ผๅฆๆ้่ฆ๏ผ่ฏทๅคๅถ็ธๅ
ณๆไปถๆไฝฟ็จไปฅไปฅไธไปฃ็ ไฟๅญ
|
||||
```python
|
||||
from transformers import AutoTokenizer
|
||||
tokenizer = AutoTokenizer.from_pretrained(
|
||||
path_to_adapter, # path to the output directory
|
||||
trust_remote_code=True
|
||||
)
|
||||
tokenizer.save_pretrained(new_model_directory)
|
||||
```
|
||||
|
||||
|
||||
ๆณจๆ๏ผๅๅธๅผ่ฎญ็ป้่ฆๆ นๆฎไฝ ็้ๆฑๅๆบๅจๆๅฎๆญฃ็กฎ็ๅๅธๅผ่ฎญ็ป่ถ
ๅๆฐใๆญคๅค๏ผไฝ ้่ฆๆ นๆฎไฝ ็ๆฐๆฎใๆพๅญๆ
ๅตๅ่ฎญ็ป้ๅบฆ้ขๆ๏ผไฝฟ็จ`--model_max_length`่ฎพๅฎไฝ ็ๆฐๆฎ้ฟๅบฆใ
|
||||
|
||||
### ๆพๅญๅ ็จๅ่ฎญ็ป้ๅบฆ
|
||||
|
||||
18
README_FR.md
18
README_FR.md
@@ -17,8 +17,8 @@
|
||||
|
||||
| | Qwen-Chat | Qwen-Chat (Int4) | Qwen-Chat (Int8) | Qwen |
|
||||
|-----|:------------------------------------------------------------------------------------------------------------------------------------:|:----------------------------------------------------------------------------------------------------------------------------------------------:|:---------------------------------------------------------------:|:--------------------------------------------------------------------------------------------------------------------------:|
|
||||
| 7B | <a href="https://modelscope.cn/models/qwen/Qwen-7B-Chat/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-7B-Chat">๐ค</a> | <a href="https://modelscope.cn/models/qwen/Qwen-7B-Chat-Int4/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-7B-Chat-Int4">๐ค</a> | <a href="https://huggingface.co/Qwen/Qwen-7B-Chat-Int8">๐ค</a> | <a href="https://modelscope.cn/models/qwen/Qwen-7B/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-7B">๐ค</a> |
|
||||
| 14B | <a href="https://modelscope.cn/models/qwen/Qwen-14B-Chat/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-14B-Chat">๐ค</a> | <a href="https://modelscope.cn/models/qwen/Qwen-14B-Chat-Int4/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-14B-Chat-Int4">๐ค</a> | <a href="https://huggingface.co/Qwen/Qwen-14B-Chat-Int8">๐ค</a> | <a href="https://modelscope.cn/models/qwen/Qwen-14B/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-14B">๐ค</a> |
|
||||
| 7B | <a href="https://modelscope.cn/models/qwen/Qwen-7B-Chat/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-7B-Chat">๐ค</a> | <a href="https://modelscope.cn/models/qwen/Qwen-7B-Chat-Int4/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-7B-Chat-Int4">๐ค</a> | <a href="https://modelscope.cn/models/qwen/Qwen-7B-Chat-Int8/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-7B-Chat-Int8">๐ค</a> | <a href="https://modelscope.cn/models/qwen/Qwen-7B/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-7B">๐ค</a> |
|
||||
| 14B | <a href="https://modelscope.cn/models/qwen/Qwen-14B-Chat/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-14B-Chat">๐ค</a> | <a href="https://modelscope.cn/models/qwen/Qwen-14B-Chat-Int4/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-14B-Chat-Int4">๐ค</a> | <a href="https://modelscope.cn/models/qwen/Qwen-14B-Chat-Int8/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-14B-Chat-Int8">๐ค</a> | <a href="https://modelscope.cn/models/qwen/Qwen-14B/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-14B">๐ค</a> |
|
||||
|
||||
|
||||
|
||||
@@ -205,10 +205,10 @@ from modelscope import snapshot_download
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
|
||||
# Downloading model checkpoint to a local dir model_dir
|
||||
# model_dir = snapshot_download('qwen/Qwen-7B', revision='v1.1.4')
|
||||
# model_dir = snapshot_download('qwen/Qwen-7B-Chat', revision='v1.1.4')
|
||||
# model_dir = snapshot_download('qwen/Qwen-14B', revision='v1.0.4')
|
||||
model_dir = snapshot_download('qwen/Qwen-14B-Chat', revision='v1.0.4')
|
||||
# model_dir = snapshot_download('qwen/Qwen-7B')
|
||||
# model_dir = snapshot_download('qwen/Qwen-7B-Chat')
|
||||
# model_dir = snapshot_download('qwen/Qwen-14B')
|
||||
model_dir = snapshot_download('qwen/Qwen-14B-Chat')
|
||||
|
||||
# Loading local checkpoints
|
||||
# trust_remote_code is still set as True since we still load codes from local dir instead of transformers
|
||||
@@ -229,9 +229,9 @@ from modelscope import AutoModelForCausalLM, AutoTokenizer
|
||||
from modelscope import GenerationConfig
|
||||
|
||||
# Model names: "qwen/Qwen-7B-Chat", "qwen/Qwen-14B-Chat"
|
||||
tokenizer = AutoTokenizer.from_pretrained("qwen/Qwen-7B-Chat", revision='v1.0.5', trust_remote_code=True)
|
||||
model = AutoModelForCausalLM.from_pretrained("qwen/Qwen-7B-Chat", revision='v1.0.5', device_map="auto", trust_remote_code=True, fp16=True).eval()
|
||||
model.generation_config = GenerationConfig.from_pretrained("Qwen/Qwen-7B-Chat", revision='v1.0.5', trust_remote_code=True) # ๅฏๆๅฎไธๅ็็ๆ้ฟๅบฆใtop_p็ญ็ธๅ
ณ่ถ
ๅ
|
||||
tokenizer = AutoTokenizer.from_pretrained("qwen/Qwen-7B-Chat", trust_remote_code=True)
|
||||
model = AutoModelForCausalLM.from_pretrained("qwen/Qwen-7B-Chat", device_map="auto", trust_remote_code=True, fp16=True).eval()
|
||||
model.generation_config = GenerationConfig.from_pretrained("Qwen/Qwen-7B-Chat", trust_remote_code=True) # ๅฏๆๅฎไธๅ็็ๆ้ฟๅบฆใtop_p็ญ็ธๅ
ณ่ถ
ๅ
|
||||
|
||||
response, history = model.chat(tokenizer, "ไฝ ๅฅฝ", history=None)
|
||||
print(response)
|
||||
|
||||
18
README_JA.md
18
README_JA.md
@@ -22,8 +22,8 @@
|
||||
|
||||
| | Qwen-Chat | Qwen-Chat (Int4) | Qwen-Chat (Int8) | Qwen |
|
||||
|-----|:------------------------------------------------------------------------------------------------------------------------------------:|:----------------------------------------------------------------------------------------------------------------------------------------------:|:---------------------------------------------------------------:|:--------------------------------------------------------------------------------------------------------------------------:|
|
||||
| 7B | <a href="https://modelscope.cn/models/qwen/Qwen-7B-Chat/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-7B-Chat">๐ค</a> | <a href="https://modelscope.cn/models/qwen/Qwen-7B-Chat-Int4/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-7B-Chat-Int4">๐ค</a> | <a href="https://huggingface.co/Qwen/Qwen-7B-Chat-Int8">๐ค</a> | <a href="https://modelscope.cn/models/qwen/Qwen-7B/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-7B">๐ค</a> |
|
||||
| 14B | <a href="https://modelscope.cn/models/qwen/Qwen-14B-Chat/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-14B-Chat">๐ค</a> | <a href="https://modelscope.cn/models/qwen/Qwen-14B-Chat-Int4/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-14B-Chat-Int4">๐ค</a> | <a href="https://huggingface.co/Qwen/Qwen-14B-Chat-Int8">๐ค</a> | <a href="https://modelscope.cn/models/qwen/Qwen-14B/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-14B">๐ค</a> |
|
||||
| 7B | <a href="https://modelscope.cn/models/qwen/Qwen-7B-Chat/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-7B-Chat">๐ค</a> | <a href="https://modelscope.cn/models/qwen/Qwen-7B-Chat-Int4/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-7B-Chat-Int4">๐ค</a> | <a href="https://modelscope.cn/models/qwen/Qwen-7B-Chat-Int8/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-7B-Chat-Int8">๐ค</a> | <a href="https://modelscope.cn/models/qwen/Qwen-7B/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-7B">๐ค</a> |
|
||||
| 14B | <a href="https://modelscope.cn/models/qwen/Qwen-14B-Chat/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-14B-Chat">๐ค</a> | <a href="https://modelscope.cn/models/qwen/Qwen-14B-Chat-Int4/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-14B-Chat-Int4">๐ค</a> | <a href="https://modelscope.cn/models/qwen/Qwen-14B-Chat-Int8/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-14B-Chat-Int8">๐ค</a> | <a href="https://modelscope.cn/models/qwen/Qwen-14B/summary">๐ค</a> <a href="https://huggingface.co/Qwen/Qwen-14B">๐ค</a> |
|
||||
|
||||
|
||||
|
||||
@@ -200,10 +200,10 @@ from modelscope import snapshot_download
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
|
||||
# Downloading model checkpoint to a local dir model_dir
|
||||
# model_dir = snapshot_download('qwen/Qwen-7B', revision='v1.1.4')
|
||||
# model_dir = snapshot_download('qwen/Qwen-7B-Chat', revision='v1.1.4')
|
||||
# model_dir = snapshot_download('qwen/Qwen-14B', revision='v1.0.4')
|
||||
model_dir = snapshot_download('qwen/Qwen-14B-Chat', revision='v1.0.4')
|
||||
# model_dir = snapshot_download('qwen/Qwen-7B')
|
||||
# model_dir = snapshot_download('qwen/Qwen-7B-Chat')
|
||||
# model_dir = snapshot_download('qwen/Qwen-14B')
|
||||
model_dir = snapshot_download('qwen/Qwen-14B-Chat')
|
||||
|
||||
# Loading local checkpoints
|
||||
# trust_remote_code is still set as True since we still load codes from local dir instead of transformers
|
||||
@@ -224,9 +224,9 @@ from modelscope import AutoModelForCausalLM, AutoTokenizer
|
||||
from modelscope import GenerationConfig
|
||||
|
||||
# Model names๏ผ"Qwen/Qwen-7B-Chat"ใ"Qwen/Qwen-14B-Chat"
|
||||
tokenizer = AutoTokenizer.from_pretrained("qwen/Qwen-7B-Chat", revision='v1.0.5', trust_remote_code=True)
|
||||
model = AutoModelForCausalLM.from_pretrained("qwen/Qwen-7B-Chat", revision='v1.0.5', device_map="auto", trust_remote_code=True, fp16=True).eval()
|
||||
model.generation_config = GenerationConfig.from_pretrained("Qwen/Qwen-7B-Chat", revision='v1.0.5', trust_remote_code=True) # ๅฏๆๅฎไธๅ็็ๆ้ฟๅบฆใtop_p็ญ็ธๅ
ณ่ถ
ๅ
|
||||
tokenizer = AutoTokenizer.from_pretrained("qwen/Qwen-7B-Chat", trust_remote_code=True)
|
||||
model = AutoModelForCausalLM.from_pretrained("qwen/Qwen-7B-Chat", device_map="auto", trust_remote_code=True, fp16=True).eval()
|
||||
model.generation_config = GenerationConfig.from_pretrained("Qwen/Qwen-7B-Chat", trust_remote_code=True) # ๅฏๆๅฎไธๅ็็ๆ้ฟๅบฆใtop_p็ญ็ธๅ
ณ่ถ
ๅ
|
||||
|
||||
response, history = model.chat(tokenizer, "ไฝ ๅฅฝ", history=None)
|
||||
print(response)
|
||||
|
||||
Reference in New Issue
Block a user