Merge pull request #583 from QwenLM/update_readme_1106

add modelscope links for int8 models
This commit is contained in:
Yang An
2023-11-06 11:58:52 +08:00
committed by GitHub
4 changed files with 72 additions and 36 deletions

View File

@@ -17,8 +17,8 @@
| | Qwen-Chat | Qwen-Chat (Int4) | Qwen-Chat (Int8) | Qwen |
|-----|:------------------------------------------------------------------------------------------------------------------------------------:|:----------------------------------------------------------------------------------------------------------------------------------------------:|:---------------------------------------------------------------:|:--------------------------------------------------------------------------------------------------------------------------:|
| 7B | <a href="https://modelscope.cn/models/qwen/Qwen-7B-Chat/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-7B-Chat">๐Ÿค—</a> | <a href="https://modelscope.cn/models/qwen/Qwen-7B-Chat-Int4/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-7B-Chat-Int4">๐Ÿค—</a> | <a href="https://huggingface.co/Qwen/Qwen-7B-Chat-Int8">๐Ÿค—</a> | <a href="https://modelscope.cn/models/qwen/Qwen-7B/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-7B">๐Ÿค—</a> |
| 14B | <a href="https://modelscope.cn/models/qwen/Qwen-14B-Chat/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-14B-Chat">๐Ÿค—</a> | <a href="https://modelscope.cn/models/qwen/Qwen-14B-Chat-Int4/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-14B-Chat-Int4">๐Ÿค—</a> | <a href="https://huggingface.co/Qwen/Qwen-14B-Chat-Int8">๐Ÿค—</a> | <a href="https://modelscope.cn/models/qwen/Qwen-14B/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-14B">๐Ÿค—</a> |
| 7B | <a href="https://modelscope.cn/models/qwen/Qwen-7B-Chat/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-7B-Chat">๐Ÿค—</a> | <a href="https://modelscope.cn/models/qwen/Qwen-7B-Chat-Int4/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-7B-Chat-Int4">๐Ÿค—</a> | <a href="https://modelscope.cn/models/qwen/Qwen-7B-Chat-Int8/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-7B-Chat-Int8">๐Ÿค—</a> | <a href="https://modelscope.cn/models/qwen/Qwen-7B/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-7B">๐Ÿค—</a> |
| 14B | <a href="https://modelscope.cn/models/qwen/Qwen-14B-Chat/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-14B-Chat">๐Ÿค—</a> | <a href="https://modelscope.cn/models/qwen/Qwen-14B-Chat-Int4/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-14B-Chat-Int4">๐Ÿค—</a> | <a href="https://modelscope.cn/models/qwen/Qwen-14B-Chat-Int8/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-14B-Chat-Int8">๐Ÿค—</a> | <a href="https://modelscope.cn/models/qwen/Qwen-14B/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-14B">๐Ÿค—</a> |
@@ -205,10 +205,10 @@ from modelscope import snapshot_download
from transformers import AutoModelForCausalLM, AutoTokenizer
# Downloading model checkpoint to a local dir model_dir
# model_dir = snapshot_download('qwen/Qwen-7B', revision='v1.1.4')
# model_dir = snapshot_download('qwen/Qwen-7B-Chat', revision='v1.1.4')
# model_dir = snapshot_download('qwen/Qwen-14B', revision='v1.0.4')
model_dir = snapshot_download('qwen/Qwen-14B-Chat', revision='v1.0.4')
# model_dir = snapshot_download('qwen/Qwen-7B')
# model_dir = snapshot_download('qwen/Qwen-7B-Chat')
# model_dir = snapshot_download('qwen/Qwen-14B')
model_dir = snapshot_download('qwen/Qwen-14B-Chat')
# Loading local checkpoints
# trust_remote_code is still set as True since we still load codes from local dir instead of transformers
@@ -229,9 +229,9 @@ from modelscope import AutoModelForCausalLM, AutoTokenizer
from modelscope import GenerationConfig
# Model names: "qwen/Qwen-7B-Chat", "qwen/Qwen-14B-Chat"
tokenizer = AutoTokenizer.from_pretrained("qwen/Qwen-7B-Chat", revision='v1.0.5', trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("qwen/Qwen-7B-Chat", revision='v1.0.5', device_map="auto", trust_remote_code=True, fp16=True).eval()
model.generation_config = GenerationConfig.from_pretrained("Qwen/Qwen-7B-Chat", revision='v1.0.5', trust_remote_code=True) # ๅฏๆŒ‡ๅฎšไธๅŒ็š„็”Ÿๆˆ้•ฟๅบฆใ€top_p็ญ‰็›ธๅ…ณ่ถ…ๅ‚
tokenizer = AutoTokenizer.from_pretrained("qwen/Qwen-7B-Chat", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("qwen/Qwen-7B-Chat", device_map="auto", trust_remote_code=True, fp16=True).eval()
model.generation_config = GenerationConfig.from_pretrained("Qwen/Qwen-7B-Chat", trust_remote_code=True) # ๅฏๆŒ‡ๅฎšไธๅŒ็š„็”Ÿๆˆ้•ฟๅบฆใ€top_p็ญ‰็›ธๅ…ณ่ถ…ๅ‚
response, history = model.chat(tokenizer, "ไฝ ๅฅฝ", history=None)
print(response)
@@ -365,6 +365,11 @@ We illustrate the model performance of both BF16, Int8 and Int4 models on the be
| Qwen-14B-Chat (Int4) | 63.3 | 69.0 | 59.8 | 45.7 |
### Quantization of KV cache
> NOTE: Please be aware that due to the internal mechanism of Hugging Face, the support files for this functionality
> (i.e., `cache_autogptq_cuda_256.cpp` and `cache_autogptq_cuda_kernel_245.cu`) may be missing. Please manually download
> them from the Hugging Face Hub and place them into the same folder as the other module files.
Attention KV cache can be quantized and compressed for storage, to get a higher sample throughput. The parameters of 'use_cache_quantization' and 'use_cache_kernel' are provided to control kv-cache-quantization behavior
When use_cache_quantization=True and use_cache_kernel=True, kv-cache-quantization will be enabled.
The specific use method is as follows:
@@ -610,6 +615,9 @@ sh finetune/finetune_qlora_ds.sh
For Q-LoRA, we advise you to load our provided quantized model, e.g., Qwen-7B-Chat-Int4. You **SHOULD NOT** use the bf16 models. Different from full-parameter finetuning and LoRA, only fp16 is supported for Q-LoRA. For single-GPU training, we have to use deepspeed for mixed-precision training due to our observation of errors caused by torch amp. Besides, for Q-LoRA, the troubles with the special tokens in LoRA still exist. However, as we only provide the Int4 models for chat models, which means the language model has learned the special tokens of ChatML format, you have no worry about the layers. Note that the layers of the Int4 model should not be trainable, and thus if you introduce special tokens in your training, Q-LoRA might not work.
> NOTE: Please be aware that due to the internal mechanisms of Hugging Face, certain non-Python files (e.g., `*.cpp` and `*.cu`)
> may be missing from the saved checkpoint. You may need to manually copy them to the directory containing other files.
Different from full-parameter finetuning, the training of both LoRA and Q-LoRA only saves the adapter parameters. Suppose your training starts from Qwen-7B, you can load the finetuned model for inference as shown below:
```python
@@ -639,6 +647,19 @@ merged_model = model.merge_and_unload()
merged_model.save_pretrained(new_model_directory, max_shard_size="2048MB", safe_serialization=True)
```
The `new_model_directory` directory will contain the merged model weights and module files. Please note that `*.cu` and `*.cpp` files may be missing in the saved files. If you wish to use the KV cache functionality, please manually copy them. Besides, the tokenizer files are not saved in the new directory in this step. You can copy the tokenizer files or use the following code
```python
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(
path_to_adapter, # path to the output directory
trust_remote_code=True
)
tokenizer.save_pretrained(new_model_directory)
```
Note: For multi-GPU training, you need to specify the proper hyperparameters for distributed training based on your machine. Besides, we advise you to specify your maximum sequence length with the argument `--model_max_length`, based on your consideration of data, memory footprint, and training speed.

View File

@@ -17,8 +17,8 @@
| | Qwen-Chat | Qwen-Chat (Int4) | Qwen-Chat (Int8) | Qwen |
|-----|:------------------------------------------------------------------------------------------------------------------------------------:|:----------------------------------------------------------------------------------------------------------------------------------------------:|:---------------------------------------------------------------:|:--------------------------------------------------------------------------------------------------------------------------:|
| 7B | <a href="https://modelscope.cn/models/qwen/Qwen-7B-Chat/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-7B-Chat">๐Ÿค—</a> | <a href="https://modelscope.cn/models/qwen/Qwen-7B-Chat-Int4/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-7B-Chat-Int4">๐Ÿค—</a> | <a href="https://huggingface.co/Qwen/Qwen-7B-Chat-Int8">๐Ÿค—</a> | <a href="https://modelscope.cn/models/qwen/Qwen-7B/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-7B">๐Ÿค—</a> |
| 14B | <a href="https://modelscope.cn/models/qwen/Qwen-14B-Chat/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-14B-Chat">๐Ÿค—</a> | <a href="https://modelscope.cn/models/qwen/Qwen-14B-Chat-Int4/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-14B-Chat-Int4">๐Ÿค—</a> | <a href="https://huggingface.co/Qwen/Qwen-14B-Chat-Int8">๐Ÿค—</a> | <a href="https://modelscope.cn/models/qwen/Qwen-14B/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-14B">๐Ÿค—</a> |
| 7B | <a href="https://modelscope.cn/models/qwen/Qwen-7B-Chat/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-7B-Chat">๐Ÿค—</a> | <a href="https://modelscope.cn/models/qwen/Qwen-7B-Chat-Int4/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-7B-Chat-Int4">๐Ÿค—</a> | <a href="https://modelscope.cn/models/qwen/Qwen-7B-Chat-Int8/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-7B-Chat-Int8">๐Ÿค—</a> | <a href="https://modelscope.cn/models/qwen/Qwen-7B/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-7B">๐Ÿค—</a> |
| 14B | <a href="https://modelscope.cn/models/qwen/Qwen-14B-Chat/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-14B-Chat">๐Ÿค—</a> | <a href="https://modelscope.cn/models/qwen/Qwen-14B-Chat-Int4/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-14B-Chat-Int4">๐Ÿค—</a> | <a href="https://modelscope.cn/models/qwen/Qwen-14B-Chat-Int8/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-14B-Chat-Int8">๐Ÿค—</a> | <a href="https://modelscope.cn/models/qwen/Qwen-14B/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-14B">๐Ÿค—</a> |
ๆˆ‘ไปฌๅผ€ๆบไบ†**Qwen**๏ผˆ้€šไน‰ๅƒ้—ฎ๏ผ‰็ณปๅˆ—ๅทฅไฝœ๏ผŒๅฝ“ๅ‰ๅผ€ๆบๆจกๅž‹็š„ๅ‚ๆ•ฐ่ง„ๆจกไธบ70ไบฟ๏ผˆ7B๏ผ‰ๅ’Œ140ไบฟ๏ผˆ14B๏ผ‰ใ€‚ๆœฌๆฌกๅผ€ๆบๅŒ…ๆ‹ฌๅŸบ็ก€ๆจกๅž‹**Qwen**๏ผŒๅณ**Qwen-7B**ๅ’Œ**Qwen-14B**๏ผŒไปฅๅŠๅฏน่ฏๆจกๅž‹**Qwen-Chat**๏ผŒๅณ**Qwen-7B-Chat**ๅ’Œ**Qwen-14B-Chat**ใ€‚ๆจกๅž‹้“พๆŽฅๅœจ่กจๆ ผไธญ๏ผŒ่ฏท็‚นๅ‡ปไบ†่งฃ่ฏฆๆƒ…ใ€‚ๅŒๆ—ถ๏ผŒๆˆ‘ไปฌๅ…ฌๅผ€ไบ†ๆˆ‘ไปฌ็š„<b><a href="https://arxiv.org/abs/2309.16609">ๆŠ€ๆœฏๆŠฅๅ‘Š</a></b>๏ผŒ่ฏท็‚นๅ‡ปไธŠๆ–น่ฎบๆ–‡้“พๆŽฅๆŸฅ็œ‹ใ€‚
@@ -196,10 +196,10 @@ from modelscope import snapshot_download
from transformers import AutoModelForCausalLM, AutoTokenizer
# Downloading model checkpoint to a local dir model_dir
# model_dir = snapshot_download('qwen/Qwen-7B', revision='v1.1.4')
# model_dir = snapshot_download('qwen/Qwen-7B-Chat', revision='v1.1.4')
# model_dir = snapshot_download('qwen/Qwen-14B', revision='v1.0.4')
model_dir = snapshot_download('qwen/Qwen-14B-Chat', revision='v1.0.4')
# model_dir = snapshot_download('qwen/Qwen-7B')
# model_dir = snapshot_download('qwen/Qwen-7B-Chat')
# model_dir = snapshot_download('qwen/Qwen-14B')
model_dir = snapshot_download('qwen/Qwen-14B-Chat')
# Loading local checkpoints
# trust_remote_code is still set as True since we still load codes from local dir instead of transformers
@@ -220,9 +220,9 @@ from modelscope import AutoModelForCausalLM, AutoTokenizer
from modelscope import GenerationConfig
# ๅฏ้€‰็š„ๆจกๅž‹ๅŒ…ๆ‹ฌ: "qwen/Qwen-7B-Chat", "qwen/Qwen-14B-Chat"
tokenizer = AutoTokenizer.from_pretrained("qwen/Qwen-7B-Chat", revision='v1.0.5', trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("qwen/Qwen-7B-Chat", revision='v1.0.5', device_map="auto", trust_remote_code=True, fp16=True).eval()
model.generation_config = GenerationConfig.from_pretrained("Qwen/Qwen-7B-Chat", revision='v1.0.5', trust_remote_code=True) # ๅฏๆŒ‡ๅฎšไธๅŒ็š„็”Ÿๆˆ้•ฟๅบฆใ€top_p็ญ‰็›ธๅ…ณ่ถ…ๅ‚
tokenizer = AutoTokenizer.from_pretrained("qwen/Qwen-7B-Chat", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("qwen/Qwen-7B-Chat", device_map="auto", trust_remote_code=True, fp16=True).eval()
model.generation_config = GenerationConfig.from_pretrained("Qwen/Qwen-7B-Chat", trust_remote_code=True) # ๅฏๆŒ‡ๅฎšไธๅŒ็š„็”Ÿๆˆ้•ฟๅบฆใ€top_p็ญ‰็›ธๅ…ณ่ถ…ๅ‚
response, history = model.chat(tokenizer, "ไฝ ๅฅฝ", history=None)
print(response)
@@ -360,6 +360,8 @@ response, history = model.chat(tokenizer, "Hi", history=None)
### KV cache้‡ๅŒ–
> ๆณจๆ„๏ผš็”ฑไบŽHugging Face็š„ๅ†…้ƒจๅฎž็Žฐ๏ผŒๆœฌๅŠŸ่ƒฝ็š„ๆ”ฏๆŒๆ–‡ไปถ`cache_autogptq_cuda_356.cpp`ไธŽ`cache_autogptq_cuda_kernel_245.cu`ๅฏ่ƒฝๆฒก่ขซไธ‹่ฝฝใ€‚ๅฆ‚้œ€ๅผ€ๅฏไฝฟ็”จ๏ผŒ่ฏทๆ‰‹ๅŠจไปŽ็›ธๅ…ณไฝ็ฝฎไธ‹่ฝฝ๏ผŒๅนถๆ”พ็ฝฎๅˆฐ็›ธๅบ”ๆ–‡ไปถไธญใ€‚
ๅœจๆจกๅž‹inferๆ—ถ๏ผŒๅฏไปฅๅฐ†ไธญ้—ด็ป“ๆžœkeyไปฅๅŠvalue็š„ๅ€ผ้‡ๅŒ–ๅŽๅŽ‹็ผฉๅญ˜ๅ‚จ๏ผŒ่ฟ™ๆ ทไพฟๅฏไปฅๅœจ็›ธๅŒ็š„ๅกไธŠๅญ˜ๅ‚จๆ›ดๅคš็š„keyไปฅๅŠvalue๏ผŒๅขžๅŠ ๆ ทๆœฌๅžๅใ€‚
ๆไพ›use_cache_quantizationไปฅๅŠuse_cache_kernelไธคไธชๅ‚ๆ•ฐๅฏนๆจกๅž‹ๆŽงๅˆถ๏ผŒๅฝ“use_cache_quantizationไปฅๅŠuse_cache_kernelๅ‡ๅผ€ๅฏๆ—ถ๏ผŒๅฐ†ๅฏๅŠจkv-cache้‡ๅŒ–็š„ๅŠŸ่ƒฝใ€‚ๅ…ทไฝ“ไฝฟ็”จๅฆ‚ไธ‹๏ผš
@@ -594,6 +596,8 @@ sh finetune/finetune_qlora_ds.sh
ๆˆ‘ไปฌๅปบ่ฎฎไฝ ไฝฟ็”จๆˆ‘ไปฌๆไพ›็š„Int4้‡ๅŒ–ๆจกๅž‹่ฟ›่กŒ่ฎญ็ปƒ๏ผŒๅณQwen-7B-Chat-Int4ใ€‚่ฏท**ไธ่ฆไฝฟ็”จ**้ž้‡ๅŒ–ๆจกๅž‹๏ผไธŽๅ…จๅ‚ๆ•ฐๅพฎ่ฐƒไปฅๅŠLoRAไธๅŒ๏ผŒQ-LoRAไป…ๆ”ฏๆŒfp16ใ€‚ๆณจๆ„๏ผŒ็”ฑไบŽๆˆ‘ไปฌๅ‘็Žฐtorch ampๆ”ฏๆŒ็š„fp16ๆททๅˆ็ฒพๅบฆ่ฎญ็ปƒๅญ˜ๅœจ้—ฎ้ข˜๏ผŒๅ› ๆญคๅฝ“ๅ‰็š„ๅ•ๅก่ฎญ็ปƒQ-LoRAๅฟ…้กปไฝฟ็”จDeepSpeedใ€‚ๆญคๅค–๏ผŒไธŠ่ฟฐLoRAๅ…ณไบŽ็‰นๆฎŠtoken็š„้—ฎ้ข˜ๅœจQ-LoRAไพ็„ถๅญ˜ๅœจใ€‚ๅนถไธ”๏ผŒInt4ๆจกๅž‹็š„ๅ‚ๆ•ฐๆ— ๆณ•่ขซ่ฎพไธบๅฏ่ฎญ็ปƒ็š„ๅ‚ๆ•ฐใ€‚ๆ‰€ๅนธ็š„ๆ˜ฏ๏ผŒๆˆ‘ไปฌๅชๆไพ›ไบ†Chatๆจกๅž‹็š„Int4ๆจกๅž‹๏ผŒๅ› ๆญคไฝ ไธ็”จๆ‹…ๅฟƒ่ฟ™ไธช้—ฎ้ข˜ใ€‚ไฝ†ๆ˜ฏ๏ผŒๅฆ‚ๆžœไฝ ๆ‰งๆ„่ฆๅœจQ-LoRAไธญๅผ•ๅ…ฅๆ–ฐ็š„็‰นๆฎŠtoken๏ผŒๅพˆๆŠฑๆญ‰๏ผŒๆˆ‘ไปฌๆ— ๆณ•ไฟ่ฏไฝ ่ƒฝๆˆๅŠŸ่ฎญ็ปƒใ€‚
> ๆณจๆ„๏ผš็”ฑไบŽHugging Face็š„ๅ†…้ƒจๅฎž็Žฐ๏ผŒๆจกๅž‹ๅœจไฟๅญ˜ๆ—ถ๏ผŒไธ€ไบ›้žPythonๆ–‡ไปถๆœชไฟๅญ˜๏ผˆไพ‹ๅฆ‚`*.cpp`ไธŽ`*.cu`๏ผ‰๏ผŒๅฆ‚้œ€่ฆๆ”ฏๆŒ็›ธๅ…ณๅŠŸ่ƒฝ๏ผŒ่ฏทๆ‰‹ๅŠจๅคๅˆถๆœ‰ๅ…ณๆ–‡ไปถใ€‚
ไธŽๅ…จๅ‚ๆ•ฐๅพฎ่ฐƒไธๅŒ๏ผŒLoRAๅ’ŒQ-LoRA็š„่ฎญ็ปƒๅช้œ€ๅญ˜ๅ‚จadapter้ƒจๅˆ†็š„ๅ‚ๆ•ฐใ€‚ๅ‡ๅฆ‚ไฝ ้œ€่ฆไฝฟ็”จLoRA่ฎญ็ปƒๅŽ็š„ๆจกๅž‹๏ผŒไฝ ้œ€่ฆไฝฟ็”จๅฆ‚ไธ‹ๆ–นๆณ•ใ€‚ๅ‡่ฎพไฝ ไฝฟ็”จQwen-7B่ฎญ็ปƒๆจกๅž‹๏ผŒไฝ ๅฏไปฅ็”จๅฆ‚ไธ‹ไปฃ็ ่ฏปๅ–ๆจกๅž‹๏ผš
```python
@@ -623,6 +627,17 @@ merged_model = model.merge_and_unload()
merged_model.save_pretrained(new_model_directory, max_shard_size="2048MB", safe_serialization=True)
```
`new_model_directory`็›ฎๅฝ•ๅฐ†ๅŒ…ๅซๅˆๅนถๅŽ็š„ๆจกๅž‹ๅ‚ๆ•ฐไธŽ็›ธๅ…ณๆจกๅž‹ไปฃ็ ใ€‚่ฏทๆณจๆ„`*.cu`ๅ’Œ`*.cpp`ๆ–‡ไปถๅฏ่ƒฝๆฒก่ขซไฟๅญ˜๏ผŒ่ฏทๆ‰‹ๅŠจๅคๅˆถใ€‚ๅฆๅค–๏ผŒ`merge_and_unload`ไป…ไฟๅญ˜ๆจกๅž‹๏ผŒๅนถๆœชไฟๅญ˜tokenizer๏ผŒๅฆ‚ๆœ‰้œ€่ฆ๏ผŒ่ฏทๅคๅˆถ็›ธๅ…ณๆ–‡ไปถๆˆ–ไฝฟ็”จไปฅไปฅไธ‹ไปฃ็ ไฟๅญ˜
```python
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(
path_to_adapter, # path to the output directory
trust_remote_code=True
)
tokenizer.save_pretrained(new_model_directory)
```
ๆณจๆ„๏ผšๅˆ†ๅธƒๅผ่ฎญ็ปƒ้œ€่ฆๆ นๆฎไฝ ็š„้œ€ๆฑ‚ๅ’Œๆœบๅ™จๆŒ‡ๅฎšๆญฃ็กฎ็š„ๅˆ†ๅธƒๅผ่ฎญ็ปƒ่ถ…ๅ‚ๆ•ฐใ€‚ๆญคๅค–๏ผŒไฝ ้œ€่ฆๆ นๆฎไฝ ็š„ๆ•ฐๆฎใ€ๆ˜พๅญ˜ๆƒ…ๅ†ตๅ’Œ่ฎญ็ปƒ้€Ÿๅบฆ้ข„ๆœŸ๏ผŒไฝฟ็”จ`--model_max_length`่ฎพๅฎšไฝ ็š„ๆ•ฐๆฎ้•ฟๅบฆใ€‚
### ๆ˜พๅญ˜ๅ ็”จๅŠ่ฎญ็ปƒ้€Ÿๅบฆ

View File

@@ -17,8 +17,8 @@
| | Qwen-Chat | Qwen-Chat (Int4) | Qwen-Chat (Int8) | Qwen |
|-----|:------------------------------------------------------------------------------------------------------------------------------------:|:----------------------------------------------------------------------------------------------------------------------------------------------:|:---------------------------------------------------------------:|:--------------------------------------------------------------------------------------------------------------------------:|
| 7B | <a href="https://modelscope.cn/models/qwen/Qwen-7B-Chat/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-7B-Chat">๐Ÿค—</a> | <a href="https://modelscope.cn/models/qwen/Qwen-7B-Chat-Int4/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-7B-Chat-Int4">๐Ÿค—</a> | <a href="https://huggingface.co/Qwen/Qwen-7B-Chat-Int8">๐Ÿค—</a> | <a href="https://modelscope.cn/models/qwen/Qwen-7B/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-7B">๐Ÿค—</a> |
| 14B | <a href="https://modelscope.cn/models/qwen/Qwen-14B-Chat/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-14B-Chat">๐Ÿค—</a> | <a href="https://modelscope.cn/models/qwen/Qwen-14B-Chat-Int4/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-14B-Chat-Int4">๐Ÿค—</a> | <a href="https://huggingface.co/Qwen/Qwen-14B-Chat-Int8">๐Ÿค—</a> | <a href="https://modelscope.cn/models/qwen/Qwen-14B/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-14B">๐Ÿค—</a> |
| 7B | <a href="https://modelscope.cn/models/qwen/Qwen-7B-Chat/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-7B-Chat">๐Ÿค—</a> | <a href="https://modelscope.cn/models/qwen/Qwen-7B-Chat-Int4/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-7B-Chat-Int4">๐Ÿค—</a> | <a href="https://modelscope.cn/models/qwen/Qwen-7B-Chat-Int8/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-7B-Chat-Int8">๐Ÿค—</a> | <a href="https://modelscope.cn/models/qwen/Qwen-7B/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-7B">๐Ÿค—</a> |
| 14B | <a href="https://modelscope.cn/models/qwen/Qwen-14B-Chat/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-14B-Chat">๐Ÿค—</a> | <a href="https://modelscope.cn/models/qwen/Qwen-14B-Chat-Int4/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-14B-Chat-Int4">๐Ÿค—</a> | <a href="https://modelscope.cn/models/qwen/Qwen-14B-Chat-Int8/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-14B-Chat-Int8">๐Ÿค—</a> | <a href="https://modelscope.cn/models/qwen/Qwen-14B/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-14B">๐Ÿค—</a> |
@@ -205,10 +205,10 @@ from modelscope import snapshot_download
from transformers import AutoModelForCausalLM, AutoTokenizer
# Downloading model checkpoint to a local dir model_dir
# model_dir = snapshot_download('qwen/Qwen-7B', revision='v1.1.4')
# model_dir = snapshot_download('qwen/Qwen-7B-Chat', revision='v1.1.4')
# model_dir = snapshot_download('qwen/Qwen-14B', revision='v1.0.4')
model_dir = snapshot_download('qwen/Qwen-14B-Chat', revision='v1.0.4')
# model_dir = snapshot_download('qwen/Qwen-7B')
# model_dir = snapshot_download('qwen/Qwen-7B-Chat')
# model_dir = snapshot_download('qwen/Qwen-14B')
model_dir = snapshot_download('qwen/Qwen-14B-Chat')
# Loading local checkpoints
# trust_remote_code is still set as True since we still load codes from local dir instead of transformers
@@ -229,9 +229,9 @@ from modelscope import AutoModelForCausalLM, AutoTokenizer
from modelscope import GenerationConfig
# Model names: "qwen/Qwen-7B-Chat", "qwen/Qwen-14B-Chat"
tokenizer = AutoTokenizer.from_pretrained("qwen/Qwen-7B-Chat", revision='v1.0.5', trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("qwen/Qwen-7B-Chat", revision='v1.0.5', device_map="auto", trust_remote_code=True, fp16=True).eval()
model.generation_config = GenerationConfig.from_pretrained("Qwen/Qwen-7B-Chat", revision='v1.0.5', trust_remote_code=True) # ๅฏๆŒ‡ๅฎšไธๅŒ็š„็”Ÿๆˆ้•ฟๅบฆใ€top_p็ญ‰็›ธๅ…ณ่ถ…ๅ‚
tokenizer = AutoTokenizer.from_pretrained("qwen/Qwen-7B-Chat", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("qwen/Qwen-7B-Chat", device_map="auto", trust_remote_code=True, fp16=True).eval()
model.generation_config = GenerationConfig.from_pretrained("Qwen/Qwen-7B-Chat", trust_remote_code=True) # ๅฏๆŒ‡ๅฎšไธๅŒ็š„็”Ÿๆˆ้•ฟๅบฆใ€top_p็ญ‰็›ธๅ…ณ่ถ…ๅ‚
response, history = model.chat(tokenizer, "ไฝ ๅฅฝ", history=None)
print(response)

View File

@@ -22,8 +22,8 @@
| | Qwen-Chat | Qwen-Chat (Int4) | Qwen-Chat (Int8) | Qwen |
|-----|:------------------------------------------------------------------------------------------------------------------------------------:|:----------------------------------------------------------------------------------------------------------------------------------------------:|:---------------------------------------------------------------:|:--------------------------------------------------------------------------------------------------------------------------:|
| 7B | <a href="https://modelscope.cn/models/qwen/Qwen-7B-Chat/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-7B-Chat">๐Ÿค—</a> | <a href="https://modelscope.cn/models/qwen/Qwen-7B-Chat-Int4/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-7B-Chat-Int4">๐Ÿค—</a> | <a href="https://huggingface.co/Qwen/Qwen-7B-Chat-Int8">๐Ÿค—</a> | <a href="https://modelscope.cn/models/qwen/Qwen-7B/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-7B">๐Ÿค—</a> |
| 14B | <a href="https://modelscope.cn/models/qwen/Qwen-14B-Chat/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-14B-Chat">๐Ÿค—</a> | <a href="https://modelscope.cn/models/qwen/Qwen-14B-Chat-Int4/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-14B-Chat-Int4">๐Ÿค—</a> | <a href="https://huggingface.co/Qwen/Qwen-14B-Chat-Int8">๐Ÿค—</a> | <a href="https://modelscope.cn/models/qwen/Qwen-14B/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-14B">๐Ÿค—</a> |
| 7B | <a href="https://modelscope.cn/models/qwen/Qwen-7B-Chat/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-7B-Chat">๐Ÿค—</a> | <a href="https://modelscope.cn/models/qwen/Qwen-7B-Chat-Int4/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-7B-Chat-Int4">๐Ÿค—</a> | <a href="https://modelscope.cn/models/qwen/Qwen-7B-Chat-Int8/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-7B-Chat-Int8">๐Ÿค—</a> | <a href="https://modelscope.cn/models/qwen/Qwen-7B/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-7B">๐Ÿค—</a> |
| 14B | <a href="https://modelscope.cn/models/qwen/Qwen-14B-Chat/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-14B-Chat">๐Ÿค—</a> | <a href="https://modelscope.cn/models/qwen/Qwen-14B-Chat-Int4/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-14B-Chat-Int4">๐Ÿค—</a> | <a href="https://modelscope.cn/models/qwen/Qwen-14B-Chat-Int8/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-14B-Chat-Int8">๐Ÿค—</a> | <a href="https://modelscope.cn/models/qwen/Qwen-14B/summary">๐Ÿค–</a> <a href="https://huggingface.co/Qwen/Qwen-14B">๐Ÿค—</a> |
@@ -200,10 +200,10 @@ from modelscope import snapshot_download
from transformers import AutoModelForCausalLM, AutoTokenizer
# Downloading model checkpoint to a local dir model_dir
# model_dir = snapshot_download('qwen/Qwen-7B', revision='v1.1.4')
# model_dir = snapshot_download('qwen/Qwen-7B-Chat', revision='v1.1.4')
# model_dir = snapshot_download('qwen/Qwen-14B', revision='v1.0.4')
model_dir = snapshot_download('qwen/Qwen-14B-Chat', revision='v1.0.4')
# model_dir = snapshot_download('qwen/Qwen-7B')
# model_dir = snapshot_download('qwen/Qwen-7B-Chat')
# model_dir = snapshot_download('qwen/Qwen-14B')
model_dir = snapshot_download('qwen/Qwen-14B-Chat')
# Loading local checkpoints
# trust_remote_code is still set as True since we still load codes from local dir instead of transformers
@@ -224,9 +224,9 @@ from modelscope import AutoModelForCausalLM, AutoTokenizer
from modelscope import GenerationConfig
# Model names๏ผš"Qwen/Qwen-7B-Chat"ใ€"Qwen/Qwen-14B-Chat"
tokenizer = AutoTokenizer.from_pretrained("qwen/Qwen-7B-Chat", revision='v1.0.5', trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("qwen/Qwen-7B-Chat", revision='v1.0.5', device_map="auto", trust_remote_code=True, fp16=True).eval()
model.generation_config = GenerationConfig.from_pretrained("Qwen/Qwen-7B-Chat", revision='v1.0.5', trust_remote_code=True) # ๅฏๆŒ‡ๅฎšไธๅŒ็š„็”Ÿๆˆ้•ฟๅบฆใ€top_p็ญ‰็›ธๅ…ณ่ถ…ๅ‚
tokenizer = AutoTokenizer.from_pretrained("qwen/Qwen-7B-Chat", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("qwen/Qwen-7B-Chat", device_map="auto", trust_remote_code=True, fp16=True).eval()
model.generation_config = GenerationConfig.from_pretrained("Qwen/Qwen-7B-Chat", trust_remote_code=True) # ๅฏๆŒ‡ๅฎšไธๅŒ็š„็”Ÿๆˆ้•ฟๅบฆใ€top_p็ญ‰็›ธๅ…ณ่ถ…ๅ‚
response, history = model.chat(tokenizer, "ไฝ ๅฅฝ", history=None)
print(response)