Skip to content

Commit 6042b88

Browse files
[CI] update dailytest sceduler and baseline's score(#1898)
1 parent bdb2d46 commit 6042b88

4 files changed

+27
-27
lines changed

.github/scripts/eval_regression_api.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
abbr='lmdeploy-api-test',
2525
type=OpenAISDK,
2626
key='EMPTY',
27-
openai_api_base='http://localhost:23333/v1',
27+
openai_api_base='http://0.0.0.0:23333/v1',
2828
path='internlm2',
2929
tokenizer_path='internlm/internlm2_5-7b-chat',
3030
rpm_verbose=True,

.github/scripts/oc_score_baseline_fullbench.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ internlm2_5-7b-chat-hf_fullbench:
4242
alpaca_eval_total: 20
4343
arenahard_score: 50
4444
Followbench_naive_average: 1
45-
CompassArena_naive_average: 44.00
45+
CompassArena_naive_average: 43
4646
mtbench101_avg: 7.8
4747
wildbench_average: -12.78
4848
simpleqa_accuracy_given_attempted: 0
@@ -58,7 +58,7 @@ internlm2_5-7b-chat-hf_fullbench:
5858
alpaca_eval_helpful_base: 20
5959
compassarena_language_naive_average: 35
6060
compassarena_knowledge_naive_average: 55
61-
compassarena_reason_v2_naive_average: 45.00
61+
compassarena_reason_v2_naive_average: 40
6262
compassarena_math_v2_naive_average: 55
6363
compassarena_creationv2_zh_naive_average: 30
6464
followbench_llmeval_en_HSR_AVG: 1

.github/scripts/oc_score_baseline_testrange.yaml

+16-16
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ chat:
66
gsm8k_accuracy: 71.88
77
race-high_accuracy: 90.62
88
glm-4-9b-chat-vllm:
9-
gsm8k_accuracy: 65.62
9+
gsm8k_accuracy: 71.88
1010
race-high_accuracy: 90.62
1111
deepseek-7b-chat-hf:
1212
gsm8k_accuracy: 46.88
@@ -63,7 +63,7 @@ chat:
6363
gsm8k_accuracy: 84.38
6464
race-high_accuracy: 90.62
6565
llama-3_2-3b-instruct-hf:
66-
gsm8k_accuracy: 65.62
66+
gsm8k_accuracy: 68.75
6767
race-high_accuracy: 81.25
6868
llama-3-8b-instruct-hf:
6969
gsm8k_accuracy: 68.75
@@ -75,7 +75,7 @@ chat:
7575
gsm8k_accuracy: 78.12
7676
race-high_accuracy: 90.62
7777
llama-3_2-3b-instruct-turbomind:
78-
gsm8k_accuracy: 62.50
78+
gsm8k_accuracy: 65.62
7979
race-high_accuracy: 81.25
8080
llama-3-8b-instruct-turbomind:
8181
gsm8k_accuracy: 71.88
@@ -226,25 +226,25 @@ base:
226226
race-high_accuracy: 25
227227
winogrande_accuracy: 68.75
228228
gemma2-2b-hf:
229-
gsm8k_accuracy: 28.12
229+
gsm8k_accuracy: 31.25
230230
GPQA_diamond_accuracy: 3.12
231231
race-high_accuracy: 56.25
232-
winogrande_accuracy: 71.88
232+
winogrande_accuracy: 75.00
233233
gemma2-9b-hf:
234-
gsm8k_accuracy: 68.75
234+
gsm8k_accuracy: 75.00
235235
GPQA_diamond_accuracy: 0
236-
race-high_accuracy: 81.25
237-
winogrande_accuracy: 84.38
236+
race-high_accuracy: 84.38
237+
winogrande_accuracy: 81.25
238238
gemma-2b-hf:
239-
gsm8k_accuracy: 18.75
239+
gsm8k_accuracy: 21.88
240240
GPQA_diamond_accuracy: 3.12
241-
race-high_accuracy: 25
241+
race-high_accuracy: 21.88
242242
winogrande_accuracy: 53.12
243243
gemma-7b-hf:
244244
gsm8k_accuracy: 56.25
245-
GPQA_diamond_accuracy: 6.25
245+
GPQA_diamond_accuracy: 3.12
246246
race-high_accuracy: 65.62
247-
winogrande_accuracy: 78.12
247+
winogrande_accuracy: 71.88
248248
gemma-2b-vllm:
249249
gsm8k_accuracy: 15.62
250250
GPQA_diamond_accuracy: 3.12
@@ -441,10 +441,10 @@ base:
441441
race-high_accuracy: 93.75
442442
winogrande_accuracy: 87.5
443443
deepseek-v2-turbomind:
444-
gsm8k_accuracy: 71.88
445-
GPQA_diamond_accuracy: 3.12
446-
race-high_accuracy: 81.25
447-
winogrande_accuracy: 75
444+
gsm8k_accuracy: 65.62
445+
GPQA_diamond_accuracy: 15.62
446+
race-high_accuracy: 93.75
447+
winogrande_accuracy: 84.38
448448
llama-3-70b-hf:
449449
gsm8k_accuracy: 62.5
450450
GPQA_diamond_accuracy: 3.12

.github/workflows/daily-run-test.yml

+8-8
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ on:
4444
type: string
4545
default: "['base_objective','chat_objective','chat_subjective','base_long_context','chat_long_context']"
4646
schedule:
47-
- cron: '15 14 * * *'
47+
- cron: '15 14 * * 0,2'
4848

4949
env:
5050
HF_DATASETS_OFFLINE: 1
@@ -87,7 +87,7 @@ jobs:
8787
name: my-artifact-${{ github.run_id }}
8888

8989
build-pypi-lmdeploy:
90-
if: ${{!cancelled() && (github.event_name != 'schedule' && inputs.build_lmdeploy)}}
90+
if: ${{!cancelled() && (github.event_name == 'schedule' || inputs.build_lmdeploy)}}
9191
strategy:
9292
matrix:
9393
pyver: [py310]
@@ -127,7 +127,7 @@ jobs:
127127
needs: ['build-pypi', 'build-pypi-lmdeploy']
128128
runs-on: volc_cu12
129129
environment: 'prod'
130-
timeout-minutes: 240 #4hours
130+
timeout-minutes: 120 #2hours
131131
steps:
132132
- name: Clone repository
133133
uses: actions/checkout@v2
@@ -148,7 +148,7 @@ jobs:
148148
uses: nick-fields/retry@v3
149149
with:
150150
max_attempts: 1
151-
timeout_minutes: 240
151+
timeout_minutes: 120
152152
command: |
153153
. ${{env.CONDA_PATH}}/bin/activate
154154
conda create -y --name ${{env.CONDA_ENV}} python=3.10
@@ -211,7 +211,7 @@ jobs:
211211
uses: nick-fields/retry@v3
212212
with:
213213
max_attempts: 1
214-
timeout_minutes: 120
214+
timeout_minutes: 180
215215
command: |
216216
. ${{env.CONDA_PATH}}/bin/activate
217217
conda activate ${{env.CONDA_ENV}}
@@ -230,7 +230,7 @@ jobs:
230230
regression_func: ${{fromJSON(github.event.inputs.regression_func_local || '["cmd","api","chat_sub_fullbench"]')}}
231231
runs-on: volc_cu12_local
232232
environment: 'prod'
233-
timeout-minutes: 240 #4hours
233+
timeout-minutes: 480 #6hours
234234
steps:
235235
- name: Clone repository
236236
uses: actions/checkout@v2
@@ -306,7 +306,7 @@ jobs:
306306
function_type: ${{fromJSON(github.event.inputs.fullbench_eval || '["base_objective","chat_objective","chat_subjective","base_long_context","chat_long_context"]')}}
307307
runs-on: volc_cu12
308308
environment: 'prod'
309-
timeout-minutes: 360 #6hours
309+
timeout-minutes: 480 #6hours
310310
steps:
311311
- name: Clone repository
312312
uses: actions/checkout@v2
@@ -323,7 +323,7 @@ jobs:
323323
uses: nick-fields/retry@v3
324324
with:
325325
max_attempts: 1
326-
timeout_minutes: 360
326+
timeout_minutes: 480
327327
command: |
328328
. ${{env.CONDA_PATH}}/bin/activate
329329
conda activate ${{env.CONDA_ENV}}

0 commit comments

Comments
 (0)