mirror of
https://github.com/QwenLM/Qwen.git
synced 2026-05-20 16:35:47 +08:00
release latest models
This commit is contained in:
@@ -12,7 +12,7 @@ cd ../../
|
||||
# Qwen-7B
|
||||
python evaluate_ceval.py -d data/ceval/
|
||||
|
||||
# Qwen-7B-Chat
|
||||
# Qwen-7B-Chat (We only provide 0-shot reproduction scripts. 5-shot results are obtained by OpenCompass (https://github.com/InternLM/opencompass).)
|
||||
pip install thefuzz
|
||||
python evaluate_chat_ceval.py -d data/ceval/
|
||||
```
|
||||
@@ -29,7 +29,7 @@ cd ../../
|
||||
# Qwen-7B
|
||||
python evaluate_mmlu.py -d data/mmlu/data/
|
||||
|
||||
# Qwen-7B-Chat
|
||||
# Qwen-7B-Chat (We only provide 0-shot reproduction scripts. 5-shot results are obtained by OpenCompass (https://github.com/InternLM/opencompass).)
|
||||
pip install thefuzz
|
||||
python evaluate_chat_mmlu.py -d data/mmlu/data/
|
||||
```
|
||||
@@ -73,9 +73,8 @@ This program exists to run untrusted model-generated code. Users are strongly en
|
||||
# Qwen-7B
|
||||
python evaluate_gsm8k.py
|
||||
|
||||
# Qwen-7B-Chat
|
||||
# Qwen-7B-Chat (We only provide 0-shot reproduction scripts. 5-shot results are obtained by OpenCompass (https://github.com/InternLM/opencompass).)
|
||||
python evaluate_chat_gsm8k.py # zeroshot
|
||||
python evaluate_chat_gsm8k.py --use-fewshot # fewshot
|
||||
```
|
||||
|
||||
- PLUGIN
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
|
||||
import re
|
||||
import textwrap
|
||||
import argparse
|
||||
@@ -19,6 +18,7 @@ evaluate_functional_correctness HumanEval_res.jsonl
|
||||
|
||||
DEVICE = "cuda:0"
|
||||
|
||||
|
||||
def extract_code(text, entry_point):
|
||||
# 正则表达式匹配代码块
|
||||
code_block_pattern = re.compile(
|
||||
@@ -99,7 +99,26 @@ if __name__ == "__main__":
|
||||
f = jsonlines.open(args.sample_input_file)
|
||||
with f_output as output:
|
||||
for jobj in tqdm.tqdm(f, desc="task_idx"):
|
||||
prompt = "Help me fill the following code.\n" + jobj["prompt"]
|
||||
# use humanevalpack prompt
|
||||
signature = re.search(
|
||||
rf"def\s+({jobj['entry_point']}.*?):\s*\n", jobj["prompt"]
|
||||
).group(1)
|
||||
description = "\n".join(
|
||||
[
|
||||
line.strip()
|
||||
for line in re.search(
|
||||
rf"(?:\"\"\"|''')(.*?)(?:\"\"\"|''')", jobj["prompt"], re.DOTALL
|
||||
)
|
||||
.group(1)
|
||||
.split("\n")
|
||||
]
|
||||
)
|
||||
prompt = (
|
||||
f"Write a Python function `{signature}` to solve the following problem:\n"
|
||||
f"{description}\n"
|
||||
f"{jobj['prompt']}"
|
||||
)
|
||||
|
||||
task_id = jobj["task_id"]
|
||||
answer, response = generate_sample(
|
||||
model, tokenizer, prompt, jobj["entry_point"]
|
||||
|
||||
Reference in New Issue
Block a user