release latest models

This commit is contained in:
yangapku
2023-09-25 10:41:59 +08:00
parent fb3180d8f0
commit fc57dea277
13 changed files with 938 additions and 235 deletions

View File

@@ -12,7 +12,7 @@ cd ../../
# Qwen-7B
python evaluate_ceval.py -d data/ceval/
# Qwen-7B-Chat
# Qwen-7B-Chat (We only provide 0-shot reproduction scripts. 5-shot results are obtained by OpenCompass (https://github.com/InternLM/opencompass).)
pip install thefuzz
python evaluate_chat_ceval.py -d data/ceval/
```
@@ -29,7 +29,7 @@ cd ../../
# Qwen-7B
python evaluate_mmlu.py -d data/mmlu/data/
# Qwen-7B-Chat
# Qwen-7B-Chat (We only provide 0-shot reproduction scripts. 5-shot results are obtained by OpenCompass (https://github.com/InternLM/opencompass).)
pip install thefuzz
python evaluate_chat_mmlu.py -d data/mmlu/data/
```
@@ -73,9 +73,8 @@ This program exists to run untrusted model-generated code. Users are strongly en
# Qwen-7B
python evaluate_gsm8k.py
# Qwen-7B-Chat
# Qwen-7B-Chat (We only provide 0-shot reproduction scripts. 5-shot results are obtained by OpenCompass (https://github.com/InternLM/opencompass).)
python evaluate_chat_gsm8k.py # zeroshot
python evaluate_chat_gsm8k.py --use-fewshot # fewshot
```
- PLUGIN

View File

@@ -1,4 +1,3 @@
import re
import textwrap
import argparse
@@ -19,6 +18,7 @@ evaluate_functional_correctness HumanEval_res.jsonl
DEVICE = "cuda:0"
def extract_code(text, entry_point):
# 正则表达式匹配代码块
code_block_pattern = re.compile(
@@ -99,7 +99,26 @@ if __name__ == "__main__":
f = jsonlines.open(args.sample_input_file)
with f_output as output:
for jobj in tqdm.tqdm(f, desc="task_idx"):
prompt = "Help me fill the following code.\n" + jobj["prompt"]
# use humanevalpack prompt
signature = re.search(
rf"def\s+({jobj['entry_point']}.*?):\s*\n", jobj["prompt"]
).group(1)
description = "\n".join(
[
line.strip()
for line in re.search(
rf"(?:\"\"\"|''')(.*?)(?:\"\"\"|''')", jobj["prompt"], re.DOTALL
)
.group(1)
.split("\n")
]
)
prompt = (
f"Write a Python function `{signature}` to solve the following problem:\n"
f"{description}\n"
f"{jobj['prompt']}"
)
task_id = jobj["task_id"]
answer, response = generate_sample(
model, tokenizer, prompt, jobj["entry_point"]