Skip to content

Commit 167cfdc

Browse files
zhulinJulia24zhulin1
and
zhulin1
authoredJul 3, 2024··
[ci] update daily testcase (#1285)
* Update daily-run-test.yml * Create eval_regression_chat.py * Delete .github/scripts/.github/scripts/eval_regression_chat.py * Create eval_regression_chat.py * Update pr-run-test.yml * Update daily-run-test.yml * Update daily-run-test.yml * Update daily-run-test.yml * Update oc_score_baseline.yaml * Update oc_score_assert.py * Update daily-run-test.yml * Update daily-run-test.yml * Update oc_score_baseline.yaml * Update oc_score_assert.py * Update oc_score_assert.py * fix lint * update * update * update * update * update * update * update * update * update * Update daily-run-test.yml * update --------- Co-authored-by: zhulin1 <[email protected]>
1 parent 28eba6f commit 167cfdc

7 files changed

+371
-55
lines changed
 
+52
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
from mmengine.config import read_base
2+
3+
with read_base():
4+
# choose a list of datasets
5+
from ...configs.datasets.gsm8k.gsm8k_gen_17d0dc import \
6+
gsm8k_datasets # noqa: F401, E501
7+
from ...configs.datasets.race.race_ppl import \
8+
race_datasets # noqa: F401, E501
9+
from ...configs.models.deepseek.hf_deepseek_moe_16b_base import \
10+
models as hf_deepseek_moe_16b_base_model # noqa: F401, E501
11+
# read hf models - chat models
12+
from ...configs.models.deepseek.lmdeploy_deepseek_7b_base import \
13+
models as lmdeploy_deepseek_7b_base_model # noqa: F401, E501
14+
from ...configs.models.deepseek.vllm_deepseek_moe_16b_base import \
15+
models as vllm_deepseek_moe_16b_base_model # noqa: F401, E501
16+
from ...configs.models.gemma.hf_gemma_2b import \
17+
models as hf_gemma_2b_model # noqa: F401, E501
18+
from ...configs.models.gemma.hf_gemma_7b import \
19+
models as hf_gemma_7b_model # noqa: F401, E501
20+
from ...configs.models.hf_internlm.lmdeploy_internlm2_1_8b import \
21+
models as lmdeploy_internlm2_1_8b_model # noqa: F401, E501
22+
from ...configs.models.hf_internlm.lmdeploy_internlm2_7b import \
23+
models as lmdeploy_internlm2_7b_model # noqa: F401, E501
24+
from ...configs.models.hf_internlm.lmdeploy_internlm2_base_7b import \
25+
models as lmdeploy_internlm2_base_7b_model # noqa: F401, E501
26+
from ...configs.models.hf_llama.lmdeploy_llama3_8b import \
27+
models as lmdeploy_llama3_8b_model # noqa: F401, E501
28+
from ...configs.models.mistral.hf_mistral_7b_v0_2 import \
29+
models as hf_mistral_7b_v0_2_model # noqa: F401, E501
30+
from ...configs.models.mistral.vllm_mistral_7b_v0_2 import \
31+
models as vllm_mistral_7b_v0_2_model # noqa: F401, E501
32+
from ...configs.models.qwen.hf_qwen1_5_moe_a2_7b import \
33+
models as hf_qwen1_5_moe_a2_7b_model # noqa: F401, E501
34+
from ...configs.models.qwen.hf_qwen2_0_5b import \
35+
models as hf_qwen2_0_5b_model # noqa: F401, E501
36+
from ...configs.models.qwen.lmdeploy_qwen2_1_5b import \
37+
models as lmdeploy_qwen2_1_5b_model # noqa: F401, E501
38+
from ...configs.models.qwen.lmdeploy_qwen2_7b import \
39+
models as lmdeploy_qwen2_7b_model # noqa: F401, E501
40+
from ...configs.models.qwen.vllm_qwen1_5_0_5b import \
41+
models as vllm_qwen1_5_0_5b_model # noqa: F401, E501
42+
from ...configs.models.yi.hf_yi_1_5_6b import \
43+
models as hf_yi_1_5_6b_model # noqa: F401, E501
44+
from ...configs.models.yi.hf_yi_1_5_9b import \
45+
models as hf_yi_1_5_9b_model # noqa: F401, E501
46+
from ...configs.summarizers.medium import summarizer # noqa: F401, E501
47+
48+
models = sum([v for k, v in locals().items() if k.endswith('_model')], [])
49+
datasets = sum([v for k, v in locals().items() if k.endswith('_datasets')], [])
50+
51+
for d in datasets:
52+
d['reader_cfg']['test_range'] = '[0:100]'
+70
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
from mmengine.config import read_base
2+
3+
with read_base():
4+
# choose a list of datasets
5+
from ...configs.datasets.gsm8k.gsm8k_gen import \
6+
gsm8k_datasets # noqa: F401, E501
7+
from ...configs.datasets.race.race_gen import \
8+
race_datasets # noqa: F401, E501
9+
# read hf models - chat models
10+
from ...configs.models.baichuan.hf_baichuan2_7b_chat import \
11+
models as hf_baichuan2_7b_chat_model # noqa: F401, E501
12+
from ...configs.models.chatglm.hf_glm4_9b_chat import \
13+
models as hf_glm4_9b_chat_model # noqa: F401, E501
14+
from ...configs.models.deepseek.hf_deepseek_7b_chat import \
15+
models as hf_deepseek_7b_chat_model # noqa: F401, E501
16+
from ...configs.models.deepseek.hf_deepseek_moe_16b_chat import \
17+
models as hf_deepseek_moe_16b_chat_model # noqa: F401, E501
18+
from ...configs.models.deepseek.vllm_deepseek_7b_chat import \
19+
models as vllm_deepseek_7b_chat_model # noqa: F401, E501
20+
from ...configs.models.gemma.hf_gemma_2b_it import \
21+
models as hf_gemma_2b_it_model # noqa: F401, E501
22+
from ...configs.models.gemma.hf_gemma_7b_it import \
23+
models as hf_gemma_7b_it_model # noqa: F401, E501
24+
from ...configs.models.hf_internlm.lmdeploy_internlm2_chat_1_8b import \
25+
models as lmdeploy_internlm2_chat_1_8b_model # noqa: F401, E501
26+
from ...configs.models.hf_internlm.lmdeploy_internlm2_chat_1_8b_sft import \
27+
models as lmdeploy_internlm2_chat_1_8b_sft_model # noqa: F401, E501
28+
from ...configs.models.hf_internlm.lmdeploy_internlm2_chat_7b import \
29+
models as lmdeploy_internlm2_chat_7b_model # noqa: F401, E501
30+
from ...configs.models.hf_internlm.lmdeploy_internlm2_chat_7b_sft import \
31+
models as lmdeploy_internlm2_chat_7b_sft_model # noqa: F401, E501
32+
from ...configs.models.hf_internlm.vllm_internlm2_chat_7b import \
33+
models as vllm_internlm2_chat_7b_model # noqa: F401, E501
34+
from ...configs.models.hf_llama.hf_llama3_8b_instruct import \
35+
models as hf_llama3_8b_instruct_model # noqa: F401, E501
36+
from ...configs.models.hf_llama.lmdeploy_llama3_8b_instruct import \
37+
models as lmdeploy_llama3_8b_instruct_model # noqa: F401, E501
38+
from ...configs.models.mistral.hf_mistral_7b_instruct_v0_2 import \
39+
models as hf_mistral_7b_instruct_v0_2_model # noqa: F401, E501
40+
from ...configs.models.mistral.vllm_mistral_7b_instruct_v0_2 import \
41+
models as vllm_mistral_7b_instruct_v0_2_model # noqa: F401, E501
42+
from ...configs.models.openbmb.hf_minicpm_2b_dpo_fp32 import \
43+
models as hf_minicpm_2b_dpo_fp32_model # noqa: F401, E501
44+
from ...configs.models.openbmb.hf_minicpm_2b_sft_bf16 import \
45+
models as hf_minicpm_2b_sft_bf16_model # noqa: F401, E501
46+
from ...configs.models.openbmb.hf_minicpm_2b_sft_fp32 import \
47+
models as hf_minicpm_2b_sft_fp32_model # noqa: F401, E501
48+
from ...configs.models.phi.hf_phi_3_mini_4k_instruct import \
49+
models as hf_phi_3_mini_4k_instruct_model # noqa: F401, E501
50+
from ...configs.models.phi.hf_phi_3_small_8k_instruct import \
51+
models as hf_phi_3_mini_8k_instruct_model # noqa: F401, E501
52+
from ...configs.models.qwen.hf_qwen1_5_0_5b_chat import \
53+
models as hf_qwen1_5_0_5b_chat_model # noqa: F401, E501
54+
from ...configs.models.qwen.lmdeploy_qwen2_1_5b_instruct import \
55+
models as lmdeploy_qwen2_1_5b_instruct_model # noqa: F401, E501
56+
from ...configs.models.qwen.lmdeploy_qwen2_7b_instruct import \
57+
models as lmdeploy_qwen2_7b_instruct_model # noqa: F401, E501
58+
from ...configs.models.qwen.vllm_qwen1_5_0_5b_chat import \
59+
models as vllm_qwen1_5_0_5b_chat_model # noqa: F401, E501
60+
from ...configs.models.yi.hf_yi_1_5_6b_chat import \
61+
models as hf_yi_1_5_6b_chat_model # noqa: F401, E501
62+
from ...configs.models.yi.hf_yi_1_5_9b_chat import \
63+
models as hf_yi_1_5_9b_chat_model # noqa: F401, E501
64+
from ...configs.summarizers.medium import summarizer # noqa: F401, E501
65+
66+
models = sum([v for k, v in locals().items() if k.endswith('_model')], [])
67+
datasets = sum([v for k, v in locals().items() if k.endswith('_datasets')], [])
68+
69+
for d in datasets:
70+
d['reader_cfg']['test_range'] = '[0:100]'

‎.github/scripts/oc_score_assert.py

+47-14
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,26 @@
66

77
output_path = 'regression_result_daily'
88

9-
model_list = ['internlm2-7b-hf', 'internlm-chat-7b-hf', 'chatglm3-6b-base-hf']
10-
dataset_list = [
11-
'ARC-c', 'chid-dev', 'chid-test', 'openai_humaneval', 'openbookqa',
12-
'openbookqa_fact'
9+
chat_model_list = [
10+
'baichuan2-7b-chat-hf', 'deepseek-7b-chat-hf', 'deepseek-moe-16b-chat-hf',
11+
'gemma-2b-it-hf', 'gemma-7b-it-hf', 'internlm2-chat-1.8b-turbomind',
12+
'internlm2-chat-1.8b-sft-turbomind', 'internlm2-chat-7b-turbomind',
13+
'internlm2-chat-7b-sft-turbomind', 'llama-3-8b-instruct-hf',
14+
'llama-3-8b-instruct-turbomind', 'mistral-7b-instruct-v0.2-hf',
15+
'minicpm-2b-dpo-fp32-hf', 'minicpm-2b-sft-bf16-hf',
16+
'minicpm-2b-sft-fp32-hf', 'phi-3-mini-4k-instruct-hf',
17+
'qwen1.5-0.5b-chat-hf', 'qwen2-1.5b-instruct-turbomind',
18+
'qwen2-7b-instruct-turbomind', 'yi-1.5-6b-chat-hf', 'yi-1.5-9b-chat-hf'
1319
]
20+
base_model_list = [
21+
'deepseek-moe-16b-base-hf', 'deepseek-7b-base-turbomind', 'gemma-2b-hf',
22+
'gemma-7b-hf', 'internlm2-1.8b-turbomind', 'internlm2-7b-turbomind',
23+
'internlm2-base-7b-turbomind', 'llama-3-8b-turbomind',
24+
'mistral-7b-v0.2-hf', 'qwen1.5-moe-a2.7b-hf', 'qwen2-0.5b-hf',
25+
'qwen2-1.5b-turbomind', 'qwen2-7b-turbomind', 'yi-1.5-6b-hf',
26+
'yi-1.5-9b-hf'
27+
]
28+
dataset_list = ['gsm8k', 'race-middle', 'race-high']
1429

1530

1631
@pytest.fixture()
@@ -32,10 +47,28 @@ def result_scores():
3247

3348
@pytest.mark.usefixtures('result_scores')
3449
@pytest.mark.usefixtures('baseline_scores')
50+
@pytest.mark.chat
3551
class TestChat:
3652
"""Test cases for chat model."""
3753

38-
@pytest.mark.parametrize('model, dataset', [(p1, p2) for p1 in model_list
54+
@pytest.mark.parametrize('model, dataset', [(p1, p2)
55+
for p1 in chat_model_list
56+
for p2 in dataset_list])
57+
def test_model_dataset_score(self, baseline_scores, result_scores, model,
58+
dataset):
59+
base_score = baseline_scores.get(model).get(dataset)
60+
result_score = result_scores.get(model).get(dataset)
61+
assert_score(result_score, base_score)
62+
63+
64+
@pytest.mark.usefixtures('result_scores')
65+
@pytest.mark.usefixtures('baseline_scores')
66+
@pytest.mark.base
67+
class TestBase:
68+
"""Test cases for base model."""
69+
70+
@pytest.mark.parametrize('model, dataset', [(p1, p2)
71+
for p1 in base_model_list
3972
for p2 in dataset_list])
4073
def test_model_dataset_score(self, baseline_scores, result_scores, model,
4174
dataset):
@@ -47,13 +80,13 @@ def test_model_dataset_score(self, baseline_scores, result_scores, model,
4780
def assert_score(score, baseline):
4881
if score is None or score == '-':
4982
assert False, 'value is none'
50-
if float(score) < (baseline * 1.03) and float(score) > (baseline * 0.97):
51-
print(score + ' between ' + str(baseline * 0.97) + ' and ' +
52-
str(baseline * 1.03))
83+
if float(score) <= (baseline + 5) and float(score) >= (baseline - 5):
84+
print(score + ' between ' + str(baseline - 5) + ' and ' +
85+
str(baseline + 5))
5386
assert True
5487
else:
5588
assert False, score + ' not between ' + str(
56-
baseline * 0.97) + ' and ' + str(baseline * 1.03)
89+
baseline - 5) + ' and ' + str(baseline + 5)
5790

5891

5992
def find_csv_files(directory):
@@ -62,11 +95,11 @@ def find_csv_files(directory):
6295
for file in files:
6396
if file.endswith('.csv'):
6497
csv_files.append(os.path.join(root, file))
65-
if len(csv_files) > 1:
66-
raise 'have more than 1 result file, please check the result manually'
67-
if len(csv_files) == 0:
68-
return None
69-
return csv_files[0]
98+
99+
csv_files_with_time = {f: os.path.getctime(f) for f in csv_files}
100+
sorted_csv_files = sorted(csv_files_with_time.items(), key=lambda x: x[1])
101+
latest_csv_file = sorted_csv_files[-1][0]
102+
return latest_csv_file
70103

71104

72105
def read_csv_file(file_path):
+180-31
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,180 @@
1-
internlm-7b-hf:
2-
ARC-c: 34.24
3-
chid-dev: 79.70
4-
chid-test: 81.12
5-
openai_humaneval: 10.98
6-
openbookqa: 47.20
7-
openbookqa_fact: 74.00
8-
9-
internlm-chat-7b-hf:
10-
ARC-c: 36.95
11-
chid-dev: 71.78
12-
chid-test: 76.87
13-
openai_humaneval: 21.34
14-
openbookqa: 66.6
15-
openbookqa_fact: 80.4
16-
17-
chatglm3-6b-base-hf:
18-
ARC-c: 44.41
19-
chid-dev: 78.22
20-
chid-test: 78.57
21-
openai_humaneval: 20.73
22-
openbookqa: 78.40
23-
openbookqa_fact: 92.00
24-
25-
internlm2-7b-hf:
26-
ARC-c: 36.27
27-
chid-dev: 55.94
28-
chid-test: 53.70
29-
openai_humaneval: 45.12
30-
openbookqa: 80.00
31-
openbookqa_fact: 86.40
1+
baichuan2-7b-chat-hf:
2+
gsm8k: 30
3+
race-middle: 74
4+
race-high: 79
5+
6+
deepseek-7b-chat-hf:
7+
gsm8k: 60
8+
race-middle: 74
9+
race-high: 80
10+
11+
deepseek-moe-16b-chat-hf:
12+
gsm8k: 62
13+
race-middle: 62
14+
race-high: 70
15+
16+
gemma-2b-it-hf:
17+
gsm8k: 14
18+
race-middle: 62
19+
race-high: 52
20+
21+
gemma-7b-it-hf:
22+
gsm8k: 39
23+
race-middle: 74
24+
race-high: 71
25+
26+
internlm2-chat-1.8b-turbomind:
27+
gsm8k: 40
28+
race-middle: 82
29+
race-high: 83
30+
31+
internlm2-chat-1.8b-sft-turbomind:
32+
gsm8k: 32
33+
race-middle: 81
34+
race-high: 83
35+
36+
internlm2-chat-7b-turbomind:
37+
gsm8k: 69
38+
race-middle: 90
39+
race-high: 88
40+
41+
internlm2-chat-7b-sft-turbomind:
42+
gsm8k: 71
43+
race-middle: 91
44+
race-high: 92
45+
46+
llama-3-8b-instruct-hf:
47+
gsm8k: 77
48+
race-middle: 85
49+
race-high: 87
50+
51+
llama-3-8b-instruct-turbomind:
52+
gsm8k: 77
53+
race-middle: 85
54+
race-high: 89
55+
56+
mistral-7b-instruct-v0.2-hf:
57+
gsm8k: 48
58+
race-middle: 82
59+
race-high: 78
60+
61+
minicpm-2b-dpo-fp32-hf:
62+
gsm8k: 58
63+
race-middle: 66
64+
race-high: 74
65+
66+
minicpm-2b-sft-bf16-hf:
67+
gsm8k: 58
68+
race-middle: 75
69+
race-high: 81
70+
71+
minicpm-2b-sft-fp32-hf:
72+
gsm8k: 58
73+
race-middle: 75
74+
race-high: 81
75+
76+
phi-3-mini-4k-instruct-hf:
77+
gsm8k: 67
78+
race-middle: 81
79+
race-high: 84
80+
81+
qwen1.5-0.5b-chat-hf:
82+
gsm8k: 5
83+
race-middle: 55
84+
race-high: 50
85+
86+
qwen2-1.5b-instruct-turbomind:
87+
gsm8k: 60
88+
race-middle: 77
89+
race-high: 86
90+
91+
qwen2-7b-instruct-turbomind:
92+
gsm8k: 88
93+
race-middle: 87
94+
race-high: 89
95+
96+
yi-1.5-6b-chat-hf:
97+
gsm8k: 72
98+
race-middle: 88
99+
race-high: 86
100+
101+
yi-1.5-9b-chat-hf:
102+
gsm8k: 81
103+
race-middle: 89
104+
race-high: 91
105+
106+
deepseek-moe-16b-base-hf:
107+
gsm8k: 25
108+
race-middle: 35
109+
race-high: 23
110+
111+
112+
deepseek-7b-base-turbomind:
113+
gsm8k: 21
114+
race-middle: 42
115+
race-high: 42
116+
117+
gemma-2b-hf:
118+
gsm8k: 19
119+
race-middle: 33
120+
race-high: 26
121+
122+
gemma-7b-hf:
123+
gsm8k: 65
124+
race-middle: 59
125+
race-high: 66
126+
127+
internlm2-1.8b-turbomind:
128+
gsm8k: 27
129+
race-middle: 75
130+
race-high: 72
131+
132+
internlm2-7b-turbomind:
133+
gsm8k: 67
134+
race-middle: 78
135+
race-high: 76
136+
137+
internlm2-base-7b-turbomind:
138+
gsm8k: 39
139+
race-middle: 75
140+
race-high: 81
141+
142+
llama-3-8b-turbomind:
143+
gsm8k: 52
144+
race-middle: 63
145+
race-high: 70
146+
147+
mistral-7b-v0.2-hf:
148+
gsm8k: 43
149+
race-middle: 42
150+
race-high: 60
151+
152+
qwen1.5-moe-a2.7b-hf:
153+
gsm8k: 64
154+
race-middle: 78
155+
race-high: 90
156+
157+
qwen2-0.5b-hf:
158+
gsm8k: 35
159+
race-middle: 52
160+
race-high: 48
161+
162+
qwen2-1.5b-turbomind:
163+
gsm8k: 57
164+
race-middle: 64
165+
race-high: 78
166+
167+
qwen2-7b-turbomind:
168+
gsm8k: 83
169+
race-middle: 88
170+
race-high: 88
171+
172+
yi-1.5-6b-hf:
173+
gsm8k: 59
174+
race-middle: 81
175+
race-high: 89
176+
177+
yi-1.5-9b-hf:
178+
gsm8k: 77
179+
race-middle: 90
180+
race-high: 90

‎.github/workflows/daily-run-test.yml

+20-8
Original file line numberDiff line numberDiff line change
@@ -31,34 +31,46 @@ jobs:
3131
eval "$(conda shell.bash hook)"
3232
conda create -y --name ${{env.CONDA_ENV}} python=3.10
3333
conda activate ${{env.CONDA_ENV}}
34-
pip install torch torchvision torchaudio --cache-dir ${{env.PIP_CACHE_PATH}} --index-url https://download.pytorch.org/whl/cu118
35-
pip install /cpfs01/user/qa-llm-cicd/packages/flash_attn-*.whl
34+
pip install /cpfs01/user/qa-llm-cicd/packages/lmdeploy-0.5.0+cu118-cp310-cp310-manylinux2014_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}} --index-url https://download.pytorch.org/whl/cu118
35+
pip install /cpfs01/user/qa-llm-cicd/packages/vllm-0.5.0.post1+cu118-cp310-cp310-manylinux1_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}} --index-url https://download.pytorch.org/whl/cu118
36+
FLASH_ATTENTION_FORCE_BUILD=TRUE pip install /cpfs01/user/qa-llm-cicd/packages/flash_attn-2.5.8+cu118torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl --index-url https://download.pytorch.org/whl/cu118
37+
pip install torch==2.2.2 torchvision==0.17.2 torchaudio==2.2.2 --cache-dir ${{env.PIP_CACHE_PATH}} --index-url https://download.pytorch.org/whl/cu118
38+
pip install xformers==0.0.25.post1 --cache-dir ${{env.PIP_CACHE_PATH}} --extra-index-url https://download.pytorch.org/whl/cu118
3639
conda info --envs
3740
- name: Prepare - Pip install code
3841
run: |
3942
eval "$(conda shell.bash hook)"
4043
conda activate ${{env.CONDA_ENV}}
4144
pip install -e . --cache-dir ${{env.PIP_CACHE_PATH}}
42-
pip install human_eval transformers protobuf --cache-dir ${{env.PIP_CACHE_PATH}}
45+
pip install human_eval transformers protobuf pytest --cache-dir ${{env.PIP_CACHE_PATH}}
4346
conda info --envs
4447
- name: Prepare - prepare data and hf model
4548
run: |
4649
cp -r ${{env.USERSPACE_PREFIX}}/data .
4750
rm -rf ~/.cache/huggingface/hub -f && mkdir ~/.cache -p && mkdir ~/.cache/huggingface -p
4851
ln -s ${{env.HF_CACHE_PATH}} ~/.cache/huggingface/hub
49-
- name: Run test
52+
- name: Run chat model test
5053
run: |
5154
eval "$(conda shell.bash hook)"
5255
conda activate ${{env.CONDA_ENV}}
5356
conda info --envs
5457
rm -rf regression_result_daily
5558
export from_tf=TRUE
56-
python3 run.py --models hf_internlm_chat_7b hf_internlm2_7b hf_chatglm3_6b_base hf_chatglm3_6b hf_qwen_7b_chat hf_qwen_7b --datasets FewCLUE_chid_ppl humaneval_gen ARC_c_ppl obqa_ppl --work-dir regression_result_daily
57-
- name: Get result
59+
rm -rf /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/*/summary
60+
python3 run.py .github/scripts/eval_regression_chat.py --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }} --reuse
61+
cp -r /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/*/summary regression_result_daily
62+
python -m pytest -m chat -s -v --color=yes .github/scripts/oc_score_assert.py
63+
- name: Run base model test
5864
run: |
5965
eval "$(conda shell.bash hook)"
60-
pip install pytest --cache-dir ${{env.PIP_CACHE_PATH}}
61-
python -m pytest -s -v --color=yes .github/scripts/oc_score_assert.py
66+
conda activate ${{env.CONDA_ENV}}
67+
conda info --envs
68+
rm -rf regression_result_daily
69+
export from_tf=TRUE
70+
rm -rf /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/*/summary
71+
python3 run.py .github/scripts/eval_regression_base.py --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }} --reuse
72+
cp -r /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/*/summary regression_result_daily
73+
python -m pytest -m base -s -v --color=yes .github/scripts/oc_score_assert.py
6274
- name: Remove Conda Env
6375
if: always()
6476
run: |

‎.github/workflows/pr-run-test.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ jobs:
3131
environment: 'prod'
3232
timeout-minutes: 30
3333
steps:
34-
- name: Clone repository
34+
- name: Checkout repository
3535
uses: actions/checkout@v2
3636
- name: Prepare - Install opencompass
3737
run: |

‎configs/models/deepseek/vllm_deepseek_moe_16b_base.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
models = [
44
dict(
55
type=VLLM,
6-
abbr='deepseek-moe-16b-base-hf',
6+
abbr='deepseek-moe-16b-base-vllm',
77
path='deepseek-ai/deepseek-moe-16b-base',
88
model_kwargs=dict(tensor_parallel_size=1, gpu_memory_utilization=0.6),
99
max_out_len=1024,

0 commit comments

Comments
 (0)
Please sign in to comment.