mirror of
https://github.com/QwenLM/Qwen.git
synced 2026-05-20 08:25:47 +08:00
add 72B and 1.8B Qwen models, add Ascend 910 and Hygon DCU support, add docker support
This commit is contained in:
218
dcu-support/package/fastllm_pytools/torch2flm.py
Normal file
218
dcu-support/package/fastllm_pytools/torch2flm.py
Normal file
@@ -0,0 +1,218 @@
|
||||
import struct
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
def writeString(fo, s):
|
||||
fo.write(struct.pack('i', len(s)))
|
||||
fo.write(s.encode())
|
||||
|
||||
def writeKeyValue(fo, key, value):
|
||||
writeString(fo, key)
|
||||
writeString(fo, value)
|
||||
|
||||
fastllm_data_type_dict = {
|
||||
"int4": 8,
|
||||
"int8": 3,
|
||||
"float16": 7,
|
||||
"float32": 0,
|
||||
}
|
||||
fastllm_weight_type_dict = {
|
||||
"linear": 1,
|
||||
"embedding": 2
|
||||
}
|
||||
|
||||
v = np.random.randint(-127, 127, [10, 20]);
|
||||
temp = v;
|
||||
c_max = np.expand_dims(np.abs(v).max(axis = -1), -1)
|
||||
c_scale = c_max / 127.0
|
||||
v = (v / c_scale + 128.5).clip(1, 255).astype(np.uint8)
|
||||
|
||||
def write_int8(fo, v):
|
||||
c_max = np.expand_dims(np.abs(v).max(axis = -1), -1).clip(0.1, 1e100)
|
||||
c_scale = c_max / 127.0
|
||||
v = (v / c_scale + 128.5).clip(1, 255).astype(np.uint8)
|
||||
fo.write(struct.pack('i', 3))
|
||||
fo.write(struct.pack('i', 0))
|
||||
for i in range(c_max.shape[0]):
|
||||
fo.write(struct.pack('f', -c_max[i][0]));
|
||||
fo.write(struct.pack('f', c_max[i][0]));
|
||||
fo.write(v.data)
|
||||
|
||||
def write_int4(fo, v):
|
||||
# c_min = np.expand_dims(-np.abs(v).max(axis = -1), -1)
|
||||
# c_max = np.expand_dims(np.abs(v).max(axis = -1), -1)
|
||||
# c_scale = c_max / 7.0
|
||||
# c_min = c_scale * -8.0
|
||||
|
||||
c_min = np.expand_dims(v.min(axis = -1), -1)
|
||||
c_max = np.expand_dims(v.max(axis = -1), -1)
|
||||
c_scale = (c_max - c_min) / 15.0
|
||||
c_zero = np.round(0.0 - c_min / c_scale)
|
||||
c_zero = c_zero.clip(0, 15)
|
||||
c_min = -c_scale * c_zero
|
||||
|
||||
v = (v - c_min) / c_scale
|
||||
v = (v + 0.5).astype(np.int8).clip(0, 15).astype(np.uint8)
|
||||
v = v[:, 0::2] * 16 + v[:, 1::2]
|
||||
fo.write(struct.pack('i', 8))
|
||||
fo.write(struct.pack('i', 0))
|
||||
for i in range(c_min.shape[0]):
|
||||
fo.write(struct.pack('f', c_min[i][0]));
|
||||
fo.write(struct.pack('f', c_max[i][0]));
|
||||
fo.write(v.data)
|
||||
|
||||
def tofile(exportPath,
|
||||
model,
|
||||
tokenizer = None,
|
||||
pre_prompt = None,
|
||||
user_role = None,
|
||||
bot_role = None,
|
||||
history_sep = None,
|
||||
dtype = "float16"):
|
||||
if (dtype not in fastllm_data_type_dict):
|
||||
print("dtype should in ", list(fastllm_data_type_dict.keys()))
|
||||
exit(0)
|
||||
|
||||
dict = model.state_dict()
|
||||
fo = open(exportPath, "wb")
|
||||
|
||||
# 0. version id
|
||||
fo.write(struct.pack('i', 2))
|
||||
|
||||
# 0.1 model info
|
||||
if model.config.model_type == "chatglm" and model.config.transformers_version == "4.30.2":
|
||||
model.config.model_type = "chatglm3"
|
||||
modelInfo = model.config.__dict__
|
||||
if model.generation_config is not None:
|
||||
modelInfo.update(model.generation_config.__dict__)
|
||||
if ("model_type" not in modelInfo):
|
||||
print("unknown model_type.")
|
||||
exit(0)
|
||||
|
||||
if (pre_prompt):
|
||||
modelInfo["pre_prompt"] = pre_prompt
|
||||
if (user_role):
|
||||
modelInfo["user_role"] = user_role
|
||||
if (bot_role):
|
||||
modelInfo["bot_role"] = bot_role
|
||||
if (history_sep):
|
||||
modelInfo["history_sep"] = history_sep
|
||||
if (modelInfo["model_type"] == "baichuan" and hasattr(model, "model") and hasattr(model.model, "get_alibi_mask")):
|
||||
# Baichuan 2代
|
||||
modelInfo["use_alibi"] = "1"
|
||||
modelInfo["pre_prompt"] = ""
|
||||
modelInfo["user_role"] = ("<FLM_FIX_TOKEN_" + str(model.generation_config.user_token_id) + ">") if hasattr(model.generation_config, "user_token_id") else "";
|
||||
modelInfo["bot_role"] = ("<FLM_FIX_TOKEN_" + str(model.generation_config.assistant_token_id) + ">") if hasattr(model.generation_config, "assistant_token_id") else "";
|
||||
modelInfo["history_sep"] = ""
|
||||
if (modelInfo["model_type"] == "baichuan" and modelInfo["vocab_size"] == 125696):
|
||||
# Baichuan 2代 7B
|
||||
modelInfo["pre_prompt"] = ""
|
||||
modelInfo["user_role"] = ("<FLM_FIX_TOKEN_" + str(model.generation_config.user_token_id) + ">") if hasattr(model.generation_config, "user_token_id") else "";
|
||||
modelInfo["bot_role"] = ("<FLM_FIX_TOKEN_" + str(model.generation_config.assistant_token_id) + ">") if hasattr(model.generation_config, "assistant_token_id") else "";
|
||||
modelInfo["history_sep"] = ""
|
||||
if modelInfo["model_type"] == "qwen":
|
||||
if modelInfo["chat_format"] == "chatml":
|
||||
modelInfo["im_end_id"] = tokenizer.im_end_id
|
||||
modelInfo["im_start_id"] = tokenizer.im_start_id
|
||||
|
||||
modelInfo["tokenizer_use_score"] = "1" # 分词带分数
|
||||
|
||||
if hasattr(model, "peft_config"):
|
||||
adapter_size = len(model.peft_config)
|
||||
modelInfo["peft_size"] = adapter_size
|
||||
|
||||
fo.write(struct.pack('i', len(modelInfo)))
|
||||
for it in modelInfo.keys():
|
||||
writeKeyValue(fo, str(it), str(modelInfo[it]))
|
||||
|
||||
if hasattr(model, "peft_config"):
|
||||
for adapter_name in model.peft_config.keys():
|
||||
adapter_dict = model.peft_config[adapter_name].__dict__
|
||||
writeString(fo, adapter_name)
|
||||
fo.write(struct.pack('i', len(adapter_dict)))
|
||||
for it in adapter_dict.keys():
|
||||
writeKeyValue(fo, str(it), str(adapter_dict[it]))
|
||||
|
||||
# 1. vocab
|
||||
if (tokenizer):
|
||||
if (hasattr(tokenizer, "tokenizer")):
|
||||
if (modelInfo['model_type'] == "qwen"):
|
||||
pass
|
||||
else:
|
||||
tokenizer = tokenizer.tokenizer
|
||||
if (hasattr(tokenizer, "sp_model")):
|
||||
piece_size = tokenizer.sp_model.piece_size()
|
||||
fo.write(struct.pack('i', piece_size))
|
||||
for i in range(piece_size):
|
||||
s = tokenizer.sp_model.id_to_piece(i).encode()
|
||||
fo.write(struct.pack('i', len(s)))
|
||||
for c in s:
|
||||
fo.write(struct.pack('i', c))
|
||||
fo.write(struct.pack('i', i))
|
||||
fo.write(struct.pack('f', float(tokenizer.sp_model.get_score(i))))
|
||||
else:
|
||||
vocab = tokenizer.get_vocab()
|
||||
fo.write(struct.pack('i', len(vocab)))
|
||||
for v in vocab.keys():
|
||||
if (modelInfo['model_type'] == "qwen"):
|
||||
s = v
|
||||
elif (modelInfo["model_type"] == "moss"):
|
||||
s = [(ord(c) if c not in tokenizer.byte_decoder else tokenizer.byte_decoder[c]) for c in v]
|
||||
else:
|
||||
s = v.encode()
|
||||
fo.write(struct.pack('i', len(s)))
|
||||
for c in s:
|
||||
fo.write(struct.pack('i', c))
|
||||
fo.write(struct.pack('i', vocab[v]))
|
||||
fo.write(struct.pack('f', 1.0))
|
||||
else:
|
||||
fo.write(struct.pack('i', 0))
|
||||
|
||||
weight_type_dict = {}
|
||||
module_dict = {}
|
||||
for key, m in model.named_modules():
|
||||
if (isinstance(m, torch.nn.Linear)):
|
||||
weight_type_dict[key + ".weight"] = "linear"
|
||||
module_dict[key + ".weight"] = m
|
||||
if (isinstance(m, torch.nn.Embedding)):
|
||||
weight_type_dict[key] = "embedding"
|
||||
|
||||
# 2. weight
|
||||
fo.write(struct.pack('i', len(dict)))
|
||||
tot = 0
|
||||
for key in dict:
|
||||
ori_data_type = 0
|
||||
ori_np_data_type = np.float32
|
||||
cur_weight_type = 0
|
||||
if (key in weight_type_dict and weight_type_dict[key] in fastllm_weight_type_dict):
|
||||
cur_weight_type = fastllm_weight_type_dict[weight_type_dict[key]]
|
||||
to_data_type = 0
|
||||
if (cur_weight_type == 1):
|
||||
to_data_type = fastllm_data_type_dict[dtype]
|
||||
if (to_data_type == 7):
|
||||
ori_data_type = 7
|
||||
ori_np_data_type = np.float16
|
||||
|
||||
cur = dict[key].numpy().astype(ori_np_data_type)
|
||||
|
||||
if hasattr(model, "peft_config"):
|
||||
weight_name = key.replace('base_model.model.', '')
|
||||
fo.write(struct.pack('i', len(weight_name)))
|
||||
fo.write(weight_name.encode())
|
||||
else:
|
||||
fo.write(struct.pack('i', len(key)))
|
||||
fo.write(key.encode())
|
||||
fo.write(struct.pack('i', len(cur.shape)))
|
||||
for i in cur.shape:
|
||||
fo.write(struct.pack('i', i))
|
||||
if (to_data_type == 3):
|
||||
write_int8(fo, cur)
|
||||
elif (to_data_type == 8):
|
||||
write_int4(fo, cur)
|
||||
else:
|
||||
fo.write(struct.pack('i', to_data_type))
|
||||
fo.write(cur.data)
|
||||
tot += 1
|
||||
print("output (", tot, "/", len(dict), end = " )\r")
|
||||
print("\nfinish.")
|
||||
fo.close()
|
||||
Reference in New Issue
Block a user