Skip to content

Commit fb6a0df

Browse files
zhulinJulia24zhulin1
and
zhulin1
authoredSep 4, 2024··
[ci] fix test env for vllm and add vllm baselines (#1481)
* update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update --------- Co-authored-by: zhulin1 <[email protected]>
1 parent da74cbf commit fb6a0df

File tree

8 files changed

+142
-49
lines changed

8 files changed

+142
-49
lines changed
 

‎.github/scripts/oc_score_assert.py

+20-14
Original file line numberDiff line numberDiff line change
@@ -8,25 +8,29 @@
88

99
chat_model_list = [
1010
'baichuan2-7b-chat-hf', 'deepseek-7b-chat-hf', 'deepseek-moe-16b-chat-hf',
11-
'gemma-2b-it-hf', 'gemma-7b-it-hf', 'internlm2_5-7b-chat-hf',
12-
'internlm2_5-7b-chat-turbomind', 'internlm2-chat-1.8b-turbomind',
13-
'internlm2-chat-1.8b-sft-turbomind', 'internlm2-chat-7b-turbomind',
14-
'internlm2-chat-7b-sft-turbomind', 'internlm2_5-7b-chat-turbomind',
15-
'llama-3-8b-instruct-hf', 'llama-3-8b-instruct-turbomind',
16-
'mistral-7b-instruct-v0.2-hf', 'minicpm-2b-dpo-fp32-hf',
11+
'deepseek-7b-chat-vllm', 'gemma-2b-it-hf', 'gemma-7b-it-hf',
12+
'internlm2_5-7b-chat-hf', 'internlm2_5-7b-chat-turbomind',
13+
'internlm2-chat-1.8b-turbomind', 'internlm2-chat-1.8b-sft-turbomind',
14+
'internlm2-chat-7b-turbomind', 'internlm2-chat-7b-sft-turbomind',
15+
'internlm2-chat-7b-vllm', 'llama-3-8b-instruct-hf',
16+
'llama-3-8b-instruct-turbomind', 'mistral-7b-instruct-v0.2-hf',
17+
'mistral-7b-instruct-v0.2-vllm', 'minicpm-2b-dpo-fp32-hf',
1718
'minicpm-2b-sft-bf16-hf', 'minicpm-2b-sft-fp32-hf',
18-
'phi-3-mini-4k-instruct-hf', 'qwen1.5-0.5b-chat-hf',
19-
'qwen2-1.5b-instruct-turbomind', 'qwen2-7b-instruct-turbomind',
19+
'phi-3-mini-4k-instruct-hf', 'phi-3-small-8k-instruct-hf',
20+
'qwen1.5-0.5b-chat-hf', 'qwen2-1.5b-instruct-turbomind',
21+
'qwen2-7b-instruct-turbomind', 'qwen1.5-0.5b-chat-vllm',
2022
'yi-1.5-6b-chat-hf', 'yi-1.5-9b-chat-hf', 'lmdeploy-api-test'
2123
]
2224
base_model_list = [
23-
'deepseek-moe-16b-base-hf', 'deepseek-7b-base-turbomind', 'gemma-2b-hf',
24-
'gemma-7b-hf', 'internlm2-1.8b-turbomind', 'internlm2-7b-turbomind',
25-
'internlm2_5-7b-turbomind', 'internlm2_5-7b-hf',
26-
'internlm2-base-7b-turbomind', 'internlm2-base-7b-hf',
27-
'llama-3-8b-turbomind', 'mistral-7b-v0.2-hf', 'qwen1.5-moe-a2.7b-hf',
25+
'deepseek-moe-16b-base-hf', 'deepseek-7b-base-turbomind',
26+
'deepseek-moe-16b-base-vllm', 'gemma-2b-hf', 'gemma-7b-hf',
27+
'internlm2_5-7b-hf', 'internlm2-7b-hf', 'internlm2-base-7b-hf',
28+
'internlm2_5-7b-turbomind', 'internlm2-1.8b-turbomind',
29+
'internlm2-7b-turbomind', 'internlm2-base-7b-hf',
30+
'internlm2-base-7b-turbomind', 'llama-3-8b-turbomind',
31+
'mistral-7b-v0.2-hf', 'mistral-7b-v0.2-vllm', 'qwen1.5-moe-a2.7b-hf',
2832
'qwen2-0.5b-hf', 'qwen2-1.5b-turbomind', 'qwen2-7b-turbomind',
29-
'yi-1.5-6b-hf', 'yi-1.5-9b-hf'
33+
'qwen1.5-0.5b-vllm', 'yi-1.5-6b-hf', 'yi-1.5-9b-hf'
3034
]
3135
dataset_list = ['gsm8k', 'race-middle', 'race-high']
3236

@@ -75,6 +79,8 @@ class TestBase:
7579
for p2 in dataset_list])
7680
def test_model_dataset_score(self, baseline_scores, result_scores, model,
7781
dataset):
82+
if model == 'mistral-7b-v0.2-vllm' and dataset == 'race-high':
83+
return
7884
base_score = baseline_scores.get(model).get(dataset)
7985
result_score = result_scores.get(model).get(dataset)
8086
assert_score(result_score, base_score)

‎.github/scripts/oc_score_baseline.yaml

+55-10
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,11 @@ deepseek-moe-16b-chat-hf:
1818
race-middle: 62
1919
race-high: 70
2020

21+
deepseek-7b-chat-vllm:
22+
gsm8k: 63
23+
race-middle: 74
24+
race-high: 79
25+
2126
gemma-2b-it-hf:
2227
gsm8k: 14
2328
race-middle: 62
@@ -58,6 +63,11 @@ internlm2-chat-7b-sft-turbomind:
5863
race-middle: 91
5964
race-high: 92
6065

66+
internlm2-chat-7b-vllm:
67+
gsm8k: 63
68+
race-middle: 90
69+
race-high: 91
70+
6171
llama-3-8b-instruct-hf:
6272
gsm8k: 77
6373
race-middle: 85
@@ -73,6 +83,11 @@ mistral-7b-instruct-v0.2-hf:
7383
race-middle: 82
7484
race-high: 78
7585

86+
mistral-7b-instruct-v0.2-vllm:
87+
gsm8k: 49
88+
race-middle: 81
89+
race-high: 77
90+
7691
minicpm-2b-dpo-fp32-hf:
7792
gsm8k: 58
7893
race-middle: 66
@@ -93,6 +108,11 @@ phi-3-mini-4k-instruct-hf:
93108
race-middle: 81
94109
race-high: 84
95110

111+
phi-3-small-8k-instruct-hf:
112+
gsm8k: 88
113+
race-middle: 89
114+
race-high: 88
115+
96116
qwen1.5-0.5b-chat-hf:
97117
gsm8k: 5
98118
race-middle: 55
@@ -108,6 +128,11 @@ qwen2-7b-instruct-turbomind:
108128
race-middle: 87
109129
race-high: 89
110130

131+
qwen1.5-0.5b-chat-vllm:
132+
gsm8k: 5
133+
race-middle: 57
134+
race-high: 51
135+
111136
yi-1.5-6b-chat-hf:
112137
gsm8k: 72
113138
race-middle: 88
@@ -118,21 +143,26 @@ yi-1.5-9b-chat-hf:
118143
race-middle: 89
119144
race-high: 91
120145

121-
deepseek-moe-16b-base-hf:
122-
gsm8k: 25
123-
race-middle: 35
124-
race-high: 23
125-
126146
lmdeploy-api-test:
127147
gsm8k: 90
128148
race-middle: 95
129149
race-high: 96
130150

151+
deepseek-moe-16b-base-hf:
152+
gsm8k: 25
153+
race-middle: 35
154+
race-high: 23
155+
131156
deepseek-7b-base-turbomind:
132157
gsm8k: 21
133158
race-middle: 42
134159
race-high: 42
135160

161+
deepseek-moe-16b-base-vllm:
162+
gsm8k: 22
163+
race-middle: 35
164+
race-high: 20
165+
136166
gemma-2b-hf:
137167
gsm8k: 19
138168
race-middle: 33
@@ -148,6 +178,16 @@ internlm2_5-7b-hf:
148178
race-middle: 92
149179
race-high: 91
150180

181+
internlm2-7b-hf:
182+
gsm8k: 65
183+
race-middle: 77
184+
race-high: 72
185+
186+
internlm2-base-7b-hf:
187+
gsm8k: 5
188+
race-middle: 71
189+
race-high: 74
190+
151191
internlm2_5-7b-turbomind:
152192
gsm8k: 73
153193
race-middle: 90
@@ -163,11 +203,6 @@ internlm2-7b-turbomind:
163203
race-middle: 78
164204
race-high: 76
165205

166-
internlm2-base-7b-hf:
167-
gsm8k: 2
168-
race-middle: 71
169-
race-high: 74
170-
171206
internlm2-base-7b-turbomind:
172207
gsm8k: 39
173208
race-middle: 75
@@ -183,6 +218,11 @@ mistral-7b-v0.2-hf:
183218
race-middle: 42
184219
race-high: 60
185220

221+
mistral-7b-v0.2-vllm:
222+
gsm8k: 45
223+
race-middle: 42
224+
race-high: 58
225+
186226
qwen1.5-moe-a2.7b-hf:
187227
gsm8k: 64
188228
race-middle: 78
@@ -203,6 +243,11 @@ qwen2-7b-turbomind:
203243
race-middle: 88
204244
race-high: 88
205245

246+
qwen1.5-0.5b-vllm:
247+
gsm8k: 12
248+
race-middle: 54
249+
race-high: 59
250+
206251
yi-1.5-6b-hf:
207252
gsm8k: 59
208253
race-middle: 81

‎.github/workflows/daily-run-test.yml

+35-13
Original file line numberDiff line numberDiff line change
@@ -18,33 +18,55 @@ env:
1818
HF_DATASETS_OFFLINE: 1
1919
TRANSFORMERS_OFFLINE: 1
2020
HF_HUB_OFFLINE: 1
21+
TRITON_PTXAS_PATH: /usr/local/cuda/bin/ptxas
2122

2223
jobs:
24+
build-pypi:
25+
runs-on: ubuntu-latest
26+
steps:
27+
- uses: actions/checkout@v2
28+
- name: Set up Python 3.7
29+
uses: actions/setup-python@v1
30+
with:
31+
python-version: 3.7
32+
- name: Build lagent
33+
run: |
34+
pip install wheel
35+
python setup.py sdist bdist_wheel
36+
- name: Upload Artifacts
37+
uses: actions/upload-artifact@v4
38+
with:
39+
if-no-files-found: error
40+
path: dist/*
41+
retention-days: 1
42+
name: my-artifact-${{ github.run_id }}
43+
2344
daily_run_test:
45+
needs: build-pypi
2446
runs-on: self-hosted
2547
environment: 'prod'
26-
timeout-minutes: 240 #4hours
48+
timeout-minutes: 420 #7hours
2749
steps:
2850
- name: Clone repository
2951
uses: actions/checkout@v2
52+
- name: Download Artifacts
53+
uses: actions/download-artifact@v4
54+
with:
55+
name: my-artifact-${{ github.run_id }}
3056
- name: Prepare - create conda env and install torch
3157
run: |
3258
. /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate
3359
conda create -y --name ${{env.CONDA_ENV}} python=3.10
3460
conda activate ${{env.CONDA_ENV}}
61+
pip install opencompass*.whl
3562
pip install /cpfs01/user/qa-llm-cicd/packages/lmdeploy-0.5.0+cu118-cp310-cp310-manylinux2014_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}}
36-
pip install /cpfs01/user/qa-llm-cicd/packages/vllm-0.5.2+cu118-cp310-cp310-manylinux1_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}}
37-
FLASH_ATTENTION_FORCE_BUILD=TRUE pip install /cpfs01/user/qa-llm-cicd/packages/flash_attn-2.5.8+cu118torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
38-
pip install bitsandbytes
39-
pip install torch==2.2.2 torchvision==0.17.2 torchaudio==2.2.2 --cache-dir ${{env.PIP_CACHE_PATH}} --index-url https://download.pytorch.org/whl/cu118
40-
pip install xformers==0.0.25.post1 --cache-dir ${{env.PIP_CACHE_PATH}}
41-
conda info --envs
42-
- name: Prepare - Pip install code
43-
run: |
44-
. /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate
45-
conda activate ${{env.CONDA_ENV}}
46-
pip install -e . --cache-dir ${{env.PIP_CACHE_PATH}}
47-
pip install human_eval transformers protobuf pytest --cache-dir ${{env.PIP_CACHE_PATH}}
63+
pip install /cpfs01/user/qa-llm-cicd/packages/vllm-0.5.5+cu118-cp310-cp310-manylinux1_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}}
64+
65+
pip install human_eval transformers protobuf pytest gguf msgspec librosa vllm_flash_attn bitsandbytes --cache-dir ${{env.PIP_CACHE_PATH}}
66+
pip uninstall torch torchvision torchaudio -y
67+
pip install torch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0 --cache-dir ${{env.PIP_CACHE_PATH}} --index-url https://download.pytorch.org/whl/cu118
68+
FLASH_ATTENTION_FORCE_BUILD=TRUE pip install /cpfs01/user/qa-llm-cicd/packages/flash_attn-2.6.3+cu118torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
69+
pip install /cpfs01/user/qa-llm-cicd/packages/xformers-0.0.27.post2+cu118-cp310-cp310-manylinux2014_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}}
4870
conda info --envs
4971
- name: Prepare - prepare data and hf model
5072
run: |

‎docs/en/advanced_guides/evaluation_turbomind.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,6 @@ You are expected to get the evaluation results after the inference and evaluatio
7373
**Note**:
7474

7575
- If you want to pass more arguments for `engine_config``gen_config` in the evaluation config file, please refer to [TurbomindEngineConfig](https://lmdeploy.readthedocs.io/en/latest/inference/pipeline.html#turbomindengineconfig)
76-
and [EngineGenerationConfig](https://lmdeploy.readthedocs.io/en/latest/inference/pipeline.html#generationconfig)
76+
and [GenerationConfig](https://lmdeploy.readthedocs.io/en/latest/inference/pipeline.html#generationconfig)
7777
- If you evaluate the InternLM Chat model, please use configuration file `eval_internlm_chat_turbomind.py`
7878
- If you evaluate the InternLM 7B model, please modify `eval_internlm_turbomind.py` or `eval_internlm_chat_turbomind.py` by changing to the setting `models = [internlm_7b]` in the last line.

‎docs/zh_cn/advanced_guides/evaluation_turbomind.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,6 @@ python run.py configs/eval_internlm_turbomind.py -w outputs/turbomind/internlm-2
7070

7171
**注:**
7272

73-
- 如果想在测评配置文件中`engine_config``gen_config`字段传递更多参数,请参考[TurbomindEngineConfig](https://lmdeploy.readthedocs.io/zh-cn/latest/inference/pipeline.html#turbomindengineconfig)[EngineGenerationConfig](https://lmdeploy.readthedocs.io/zh-cn/latest/inference/pipeline.html#generationconfig)
73+
- 如果想在测评配置文件中`engine_config``gen_config`字段传递更多参数,请参考[TurbomindEngineConfig](https://lmdeploy.readthedocs.io/zh-cn/latest/inference/pipeline.html#turbomindengineconfig)[GenerationConfig](https://lmdeploy.readthedocs.io/zh-cn/latest/inference/pipeline.html#generationconfig)
7474
- 如果评测 InternLM Chat 模型,请使用配置文件 `eval_internlm_chat_turbomind.py`
7575
- 如果评测 InternLM 7B 模型,请修改 `eval_internlm_turbomind.py` 或者 `eval_internlm_chat_turbomind.py`。将`models`字段配置为`models = [internlm_7b]`

‎opencompass/models/lmdeploy_pytorch.py

+19-3
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,8 @@ def __init__(self,
6060
engine_config.thread_safe = True
6161

6262
if gen_config is not None:
63-
from lmdeploy.messages import EngineGenerationConfig
64-
gen_config = EngineGenerationConfig(**gen_config)
63+
from lmdeploy.messages import GenerationConfig
64+
gen_config = GenerationConfig(**gen_config)
6565

6666
self.logger = get_logger()
6767
tm_model = tm.Engine(path, engine_config)
@@ -70,6 +70,22 @@ def __init__(self,
7070
tm_model.create_instance() for i in range(concurrency)
7171
]
7272
self.generator_ids = [i + 1 for i in range(concurrency)]
73+
74+
from transformers import GenerationConfig
75+
try:
76+
generation_config = GenerationConfig.from_pretrained(path)
77+
except Exception:
78+
generation_config = None
79+
if generation_config and hasattr(generation_config, 'eos_token_id'):
80+
if gen_config.stop_words is None:
81+
stop_words = []
82+
if isinstance(generation_config.eos_token_id, int):
83+
stop_words.append(generation_config.eos_token_id)
84+
else:
85+
assert isinstance(generation_config.eos_token_id, list)
86+
for token_id in generation_config.eos_token_id:
87+
stop_words.append(token_id)
88+
gen_config.stop_words = stop_words
7389
self.gen_config = gen_config
7490
self.end_str = end_str
7591
self.major_version, self.minor_version, _ = version_info
@@ -135,7 +151,7 @@ def _generate(self,
135151
prompt (PromptType): A string or PromptDict.
136152
The PromptDict should be organized in OpenCompass'
137153
API format.
138-
gen_config (EngineGenerationConfig, optional): Generation
154+
gen_config (GenerationConfig, optional): Generation
139155
config to set arguments like top_k, top_p, temperature.
140156
end_str (str, optional): Whether to trim generated strings
141157
with end_str if the model has special ending strings

‎opencompass/models/turbomind.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -113,8 +113,8 @@ def generate(self,
113113
gen_config['stop_words'] = list(set(stop_words))
114114
gen_config.setdefault('min_new_tokens', 1)
115115

116-
from lmdeploy.messages import EngineGenerationConfig
117-
gen_config = EngineGenerationConfig(**gen_config)
116+
from lmdeploy.messages import GenerationConfig
117+
gen_config = GenerationConfig(**gen_config)
118118

119119
results = []
120120
for batch_input in batch_inputs:
@@ -160,7 +160,7 @@ def _generate(self,
160160
The PromptDict should be organized in OpenCompass'
161161
API format.
162162
max_out_len (int): The maximum length of the output.
163-
gen_config (EngineGenerationConfig, optional): Generation
163+
gen_config (GenerationConfig, optional): Generation
164164
config to set arguments like top_k, top_p, temperature.
165165
end_str (str, optional): Whether to trim generated strings
166166
with end_str if the model has special ending strings

‎opencompass/models/turbomind_with_tf_above_v4_33.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -115,21 +115,25 @@ def generate(self,
115115
batch_messages = [messages[i:i + self.concurrency] for i in range(0, len(messages), self.concurrency)]
116116

117117
stop_words = list(set(self.stop_words + stopping_criteria))
118+
encode_stop_words = []
119+
if stop_words is not None and len(stop_words) > 0:
120+
for words in stop_words:
121+
encode_stop_words += self.tokenizer.encode(words, add_bos=False)
122+
118123
DEFAULT_GEN_CONFIG = {
119124
'max_new_tokens': max_out_len,
120125
'min_new_tokens': 1,
121126
'top_k': 1,
122-
'stop_words': stop_words,
127+
'stop_words': encode_stop_words,
123128
}
124129
gen_config = copy.deepcopy(DEFAULT_GEN_CONFIG)
125130
gen_config.update(self.gen_config)
126131
if do_sample:
127132
gen_config['top_k'] = 1000
128133
gen_config['temperature'] = temperature
129134

130-
from lmdeploy.messages import EngineGenerationConfig, GenerationConfig
135+
from lmdeploy.messages import GenerationConfig
131136
gen_config = GenerationConfig(**gen_config)
132-
gen_config = EngineGenerationConfig.From(gen_config, self.tokenizer)
133137

134138
results = []
135139
for batch_message in batch_messages:
@@ -160,7 +164,7 @@ def _generate(self,
160164
prompt (PromptType): A string or PromptDict.
161165
The PromptDict should be organized in OpenCompass'
162166
API format.
163-
gen_config (EngineGenerationConfig, optional): Generation
167+
gen_config (GenerationConfig, optional): Generation
164168
config to set arguments like top_k, top_p, temperature.
165169
Returns:
166170
str: The generated string.

0 commit comments

Comments
 (0)
Please sign in to comment.