Skip to content

Commit 9fa80b0

Browse files
author
Fengzhe Zhou
authoredMay 27, 2024··
[Feat] Update charm summary (#1194)
1 parent 608ff58 commit 9fa80b0

File tree

5 files changed

+153
-36
lines changed

5 files changed

+153
-36
lines changed
 

‎configs/_internal

-1
This file was deleted.

‎configs/datasets/CHARM/charm_rea_gen_f8fca2.py

+5-35
Original file line numberDiff line numberDiff line change
@@ -32,41 +32,11 @@
3232
Other_template = '请按照给定的例子回答问题。\n{_hint}\n\nQ:{{input}}\nA:'
3333

3434
settings = [
35-
(
36-
'Direct',
37-
'',
38-
dataset_path_ZH,
39-
fewshot_example_path_ZH,
40-
Other_template,
41-
),
42-
(
43-
'ZH-CoT',
44-
'让我们一步一步来思考。',
45-
dataset_path_ZH,
46-
fewshot_example_path_ZH,
47-
Other_template,
48-
),
49-
(
50-
'EN-CoT',
51-
"Let's think step by step.",
52-
dataset_path_ZH,
53-
fewshot_example_path_ZH,
54-
Other_template,
55-
),
56-
(
57-
'XLT',
58-
"""You should retell the request in English.\nYou should do the answer step by step to choose the right answer.\nYou should step-by-step answer the request.\nYou should tell me the answer in this format 'So the answer is'.""",
59-
dataset_path_ZH,
60-
fewshot_example_path_ZH,
61-
XLT_template,
62-
),
63-
(
64-
'Translate-EN',
65-
"Let's think step by step.",
66-
dataset_path_TransEn,
67-
fewshot_example_path_TransEn,
68-
Translate_EN_template,
69-
),
35+
('Direct', '', dataset_path_ZH, fewshot_example_path_ZH, Other_template),
36+
('ZH-CoT', '让我们一步一步来思考。', dataset_path_ZH, fewshot_example_path_ZH, Other_template),
37+
('EN-CoT', "Let's think step by step.", dataset_path_ZH, fewshot_example_path_ZH, Other_template),
38+
('XLT', """You should retell the request in English.\nYou should do the answer step by step to choose the right answer.\nYou should step-by-step answer the request.\nYou should tell me the answer in this format 'So the answer is'.""", dataset_path_ZH, fewshot_example_path_ZH, XLT_template),
39+
('Translate-EN', "Let's think step by step.", dataset_path_TransEn, fewshot_example_path_TransEn, Translate_EN_template),
7040
]
7141

7242
charm_rea_datasets = []

‎configs/eval_charm.py

+20
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,26 @@
2727

2828
# from .models.hf_llama.hf_llama3_8b_instruct import models as llama3_8b_instruct_model
2929
# from .models.hf_llama.hf_llama3_70b_instruct import models as llama3_70b_instruct_model
30+
from .summarizers.charm_rea import summarizer
3031

3132
models = sum([v for k, v in locals().items() if k.endswith('_model')], [])
3233
work_dir = './outputs/CHARM/chat/'
34+
35+
# dataset version metric mode internlm2-chat-7b-turbomind
36+
# ------------------------------------------------------------- --------- ------------- ------ -----------------------------
37+
# charm-rea-Direct - naive_average gen 49.51
38+
# charm-rea-ZH-CoT - naive_average gen 61.33
39+
# charm-rea-EN-CoT - naive_average gen 54.55
40+
# charm-rea-XLT - naive_average gen 58.46
41+
# charm-rea-Translate-EN - naive_average gen 56.15
42+
# - - - -
43+
# charm-rea-Chinese_Direct - naive_average gen 47.14
44+
# charm-rea-Chinese_ZH-CoT - naive_average gen 58.40
45+
# charm-rea-Chinese_EN-CoT - naive_average gen 48.31
46+
# charm-rea-Chinese_XLT - naive_average gen 53.57
47+
# charm-rea-Chinese_Translate-EN - naive_average gen 48.21
48+
# charm-rea-Global_Direct - naive_average gen 51.88
49+
# charm-rea-Global_ZH-CoT - naive_average gen 64.26
50+
# charm-rea-Global_EN-CoT - naive_average gen 60.79
51+
# charm-rea-Global_XLT - naive_average gen 63.36
52+
# charm-rea-Global_Translate-EN - naive_average gen 64.10

‎configs/summarizers/charm_rea.py

+98
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
from mmengine.config import read_base
2+
3+
with read_base():
4+
from .groups.charm_rea import charm_rea_summary_groups
5+
6+
summarizer = dict(
7+
dataset_abbrs=[
8+
'charm-rea-Direct',
9+
'charm-rea-ZH-CoT',
10+
'charm-rea-EN-CoT',
11+
'charm-rea-XLT',
12+
'charm-rea-Translate-EN',
13+
'',
14+
'charm-rea-Chinese_Direct',
15+
'charm-rea-Chinese_ZH-CoT',
16+
'charm-rea-Chinese_EN-CoT',
17+
'charm-rea-Chinese_XLT',
18+
'charm-rea-Chinese_Translate-EN',
19+
'charm-rea-Global_Direct',
20+
'charm-rea-Global_ZH-CoT',
21+
'charm-rea-Global_EN-CoT',
22+
'charm-rea-Global_XLT',
23+
'charm-rea-Global_Translate-EN',
24+
'',
25+
'charm-rea-Chinese_Anachronisms_Judgment_Direct',
26+
'charm-rea-Chinese_Movie_and_Music_Recommendation_Direct',
27+
'charm-rea-Chinese_Natural_Language_Inference_Direct',
28+
'charm-rea-Chinese_Reading_Comprehension_Direct',
29+
'charm-rea-Chinese_Sequence_Understanding_Direct',
30+
'charm-rea-Chinese_Sport_Understanding_Direct',
31+
'charm-rea-Chinese_Time_Understanding_Direct',
32+
'charm-rea-Global_Anachronisms_Judgment_Direct',
33+
'charm-rea-Global_Movie_and_Music_Recommendation_Direct',
34+
'charm-rea-Global_Natural_Language_Inference_Direct',
35+
'charm-rea-Global_Reading_Comprehension_Direct',
36+
'charm-rea-Global_Sequence_Understanding_Direct',
37+
'charm-rea-Global_Sport_Understanding_Direct',
38+
'charm-rea-Global_Time_Understanding_Direct',
39+
'charm-rea-Chinese_Anachronisms_Judgment_ZH-CoT',
40+
'charm-rea-Chinese_Movie_and_Music_Recommendation_ZH-CoT',
41+
'charm-rea-Chinese_Natural_Language_Inference_ZH-CoT',
42+
'charm-rea-Chinese_Reading_Comprehension_ZH-CoT',
43+
'charm-rea-Chinese_Sequence_Understanding_ZH-CoT',
44+
'charm-rea-Chinese_Sport_Understanding_ZH-CoT',
45+
'charm-rea-Chinese_Time_Understanding_ZH-CoT',
46+
'charm-rea-Global_Anachronisms_Judgment_ZH-CoT',
47+
'charm-rea-Global_Movie_and_Music_Recommendation_ZH-CoT',
48+
'charm-rea-Global_Natural_Language_Inference_ZH-CoT',
49+
'charm-rea-Global_Reading_Comprehension_ZH-CoT',
50+
'charm-rea-Global_Sequence_Understanding_ZH-CoT',
51+
'charm-rea-Global_Sport_Understanding_ZH-CoT',
52+
'charm-rea-Global_Time_Understanding_ZH-CoT',
53+
'charm-rea-Chinese_Anachronisms_Judgment_EN-CoT',
54+
'charm-rea-Chinese_Movie_and_Music_Recommendation_EN-CoT',
55+
'charm-rea-Chinese_Natural_Language_Inference_EN-CoT',
56+
'charm-rea-Chinese_Reading_Comprehension_EN-CoT',
57+
'charm-rea-Chinese_Sequence_Understanding_EN-CoT',
58+
'charm-rea-Chinese_Sport_Understanding_EN-CoT',
59+
'charm-rea-Chinese_Time_Understanding_EN-CoT',
60+
'charm-rea-Global_Anachronisms_Judgment_EN-CoT',
61+
'charm-rea-Global_Movie_and_Music_Recommendation_EN-CoT',
62+
'charm-rea-Global_Natural_Language_Inference_EN-CoT',
63+
'charm-rea-Global_Reading_Comprehension_EN-CoT',
64+
'charm-rea-Global_Sequence_Understanding_EN-CoT',
65+
'charm-rea-Global_Sport_Understanding_EN-CoT',
66+
'charm-rea-Global_Time_Understanding_EN-CoT',
67+
'charm-rea-Chinese_Anachronisms_Judgment_XLT',
68+
'charm-rea-Chinese_Movie_and_Music_Recommendation_XLT',
69+
'charm-rea-Chinese_Natural_Language_Inference_XLT',
70+
'charm-rea-Chinese_Reading_Comprehension_XLT',
71+
'charm-rea-Chinese_Sequence_Understanding_XLT',
72+
'charm-rea-Chinese_Sport_Understanding_XLT',
73+
'charm-rea-Chinese_Time_Understanding_XLT',
74+
'charm-rea-Global_Anachronisms_Judgment_XLT',
75+
'charm-rea-Global_Movie_and_Music_Recommendation_XLT',
76+
'charm-rea-Global_Natural_Language_Inference_XLT',
77+
'charm-rea-Global_Reading_Comprehension_XLT',
78+
'charm-rea-Global_Sequence_Understanding_XLT',
79+
'charm-rea-Global_Sport_Understanding_XLT',
80+
'charm-rea-Global_Time_Understanding_XLT',
81+
'charm-rea-Chinese_Anachronisms_Judgment_Translate-EN',
82+
'charm-rea-Chinese_Movie_and_Music_Recommendation_Translate-EN',
83+
'charm-rea-Chinese_Natural_Language_Inference_Translate-EN',
84+
'charm-rea-Chinese_Reading_Comprehension_Translate-EN',
85+
'charm-rea-Chinese_Sequence_Understanding_Translate-EN',
86+
'charm-rea-Chinese_Sport_Understanding_Translate-EN',
87+
'charm-rea-Chinese_Time_Understanding_Translate-EN',
88+
'charm-rea-Global_Anachronisms_Judgment_Translate-EN',
89+
'charm-rea-Global_Movie_and_Music_Recommendation_Translate-EN',
90+
'charm-rea-Global_Natural_Language_Inference_Translate-EN',
91+
'charm-rea-Global_Reading_Comprehension_Translate-EN',
92+
'charm-rea-Global_Sequence_Understanding_Translate-EN',
93+
'charm-rea-Global_Sport_Understanding_Translate-EN',
94+
'charm-rea-Global_Time_Understanding_Translate-EN',
95+
],
96+
summary_groups=sum(
97+
[v for k, v in locals().items() if k.endswith('_summary_groups')], [])
98+
)
+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
charm_tasks = [
2+
'Anachronisms_Judgment',
3+
'Movie_and_Music_Recommendation',
4+
'Natural_Language_Inference',
5+
'Reading_Comprehension',
6+
'Sequence_Understanding',
7+
'Sport_Understanding',
8+
'Time_Understanding',
9+
]
10+
regions = [
11+
'Chinese',
12+
'Global',
13+
]
14+
prompts = [
15+
'Direct',
16+
'ZH-CoT',
17+
'EN-CoT',
18+
'XLT',
19+
'Translate-EN',
20+
]
21+
22+
23+
charm_rea_summary_groups = []
24+
for prompt in prompts:
25+
for region in regions:
26+
subsets = ['charm-rea-' + region + '_' + task + '_' + prompt for task in charm_tasks]
27+
charm_rea_summary_groups.append({'name': 'charm-rea-' + region + '_' + prompt, 'subsets': subsets})
28+
for prompt in prompts:
29+
subsets = ['charm-rea-' + region + '_' + prompt for region in regions]
30+
charm_rea_summary_groups.append({'name': 'charm-rea-' + prompt, 'subsets': subsets})

0 commit comments

Comments
 (0)
Please sign in to comment.