open-compass · MaiziXiao · Sep 18, 2024 · Sep 11, 2024 · Sep 12, 2024 · Sep 14, 2024
diff --git a/configs/datasets/MathBench/mathbench_2024_gen_50a320.py b/configs/datasets/MathBench/mathbench_2024_gen_50a320.py
@@ -0,0 +1,81 @@
+from mmengine.config import read_base
+from copy import deepcopy
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer, PPLInferencer
+from opencompass.openicl.icl_evaluator import CircularEvaluator, AccEvaluator
+from opencompass.datasets import MathBenchDataset, math_postprocess_v2
+from opencompass.utils.text_postprocessors import first_option_postprocess
+
+with read_base():
+    from .mathbench_prompt import zero_shot_prompts, few_shot_prompts, mathbench_sets
+
+# Max for this dataset is 4
+num_shot = 0
+# Generate reasoning path or not, only for single choice
+with_reasoning = True
+# Use circular evaluation or not
+with_circular_eval = True
+# Use PPL mode in single choice test or not
+use_ppl_single_choice = False
+
+assert 0 <= num_shot <= 4
+if num_shot == 0:
+    prompts = zero_shot_prompts
+else:
+    prompts = {name: p[- 2 * num_shot - 2:] for name, p in few_shot_prompts.items()}
+
+mathbench_datasets = []
+for _split in mathbench_sets:
+    for _name in mathbench_sets[_split]:
+        if 'single_choice' in _name:
+            if with_reasoning:
+                template_round = prompts[_name + '_with_reasoning']
+            else:
+                template_round = prompts[_name]
+        else:
+            template_round = prompts[_name]
+
+        if 'single_choice' in _name:
+            pred_postprocessor = dict(type=first_option_postprocess, options='ABCD')
+        else:
+            pred_postprocessor = dict(type=math_postprocess_v2)
+
+        if 'single_choice' in _name and with_circular_eval:
+            evaluator = dict(type=CircularEvaluator)
+        else:
+            evaluator = dict(type=AccEvaluator)
+
+        # assemble the final config
+        mathbench_reader_cfg = dict(input_columns=['question'], output_column='answer')
+        if use_ppl_single_choice and 'single_choice' in _name and not with_reasoning:
+            template = {}
+            for answer in ['A', 'B', 'C', 'D']:
+                one_template_round = deepcopy(template_round)
+                one_template_round['round'][-1]['prompt'] = one_template_round['round'][-1]['prompt'].format(answer=answer)
+                template[answer] = dict(round=one_template_round)
+            mathbench_infer_cfg = dict(
+                prompt_template=dict(type=PromptTemplate, template=template),
+                retriever=dict(type=ZeroRetriever),
+                inferencer=dict(type=PPLInferencer),
+            )
+        else:
+            mathbench_infer_cfg = dict(
+                prompt_template=dict(type=PromptTemplate, template=dict(round=template_round)),
+                retriever=dict(type=ZeroRetriever),
+                inferencer=dict(type=GenInferencer, max_out_len=2048),
+            )
+        mathbench_eval_cfg = dict(evaluator=evaluator, pred_postprocessor=pred_postprocessor)
+
+        mathbench_datasets.append(
+            dict(
+                abbr='mathbench-' + _split + '-' + _name,
+                type=MathBenchDataset,
+                path=f'data/mathbench_v1/{_split}',
+                name=_name,
+                with_circular=with_circular_eval,
+                reader_cfg=mathbench_reader_cfg,
+                infer_cfg=mathbench_infer_cfg,
+                eval_cfg=mathbench_eval_cfg,
+            )
+        )
diff --git a/configs/datasets/MathBench/mathbench_prompt.py b/configs/datasets/MathBench/mathbench_prompt.py
@@ -11,6 +11,12 @@
     'single_choice_en': [
         dict(role='HUMAN', prompt='Question: Here is a multiple-choice question about mathematics. Please provide the correct answer option directly.\nHere is the question you need to answer:\n{question}\nThe answer is:'),
     ],
+    'cloze_en': [
+        dict(role='HUMAN', prompt='{question}\nPlease reason step by step, and put your final answer within \\boxed{}.'),
+    ],
+    'cloze_cn': [
+        dict(role='HUMAN', prompt='{question}\n请一步一步推理，并在最后用\\boxed{}给出你的答案。'),
+    ]
 }
 
 few_shot_prompts = {

diff --git a/configs/datasets/wikibench/wikibench_gen.py b/configs/datasets/wikibench/wikibench_gen.py
@@ -1,4 +1,4 @@
 from mmengine.config import read_base
 
 with read_base():
-    from .wikibench_gen_f96ece import wikibench_datasets  # noqa: F401, F403
+    from .wikibench_gen_0978ad import wikibench_datasets  # noqa: F401, F403
diff --git a/...atasets/wikibench/wikibench_gen_f96ece.py → ...atasets/wikibench/wikibench_gen_0978ad.py b/...atasets/wikibench/wikibench_gen_f96ece.py → ...atasets/wikibench/wikibench_gen_0978ad.py
@@ -7,7 +7,7 @@
 
 
 single_choice_prompts = {
-    'single_choice_cn': '以下是一道单项选择题，请你根据你了解的知识给出正确的答案选项。\n下面是你要回答的题目：\n{question}\n答案选项：',
+    'single_choice_cn': '以下是一道单项选择题，请你根据你了解的知识一步步推理，并在最后用“所以答案为选项X”给出答案，其中“X”为选项A，B，C，D中你认为正确的选项。。\n下面是你要回答的题目：\n{question}\n让我们一步步推理：',
 }
 
 wikibench_sets = {

diff --git a/opencompass/configs/datasets/MathBench/mathbench_2024_gen_50a320.py b/opencompass/configs/datasets/MathBench/mathbench_2024_gen_50a320.py
@@ -0,0 +1,81 @@
+from mmengine.config import read_base
+from copy import deepcopy
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer, PPLInferencer
+from opencompass.openicl.icl_evaluator import CircularEvaluator, AccEvaluator
+from opencompass.datasets import MathBenchDataset, math_postprocess_v2
+from opencompass.utils.text_postprocessors import first_option_postprocess
+
+with read_base():
+    from .mathbench_prompt import zero_shot_prompts, few_shot_prompts, mathbench_sets
+
+# Max for this dataset is 4
+num_shot = 0
+# Generate reasoning path or not, only for single choice
+with_reasoning = True
+# Use circular evaluation or not
+with_circular_eval = True
+# Use PPL mode in single choice test or not
+use_ppl_single_choice = False
+
+assert 0 <= num_shot <= 4
+if num_shot == 0:
+    prompts = zero_shot_prompts
+else:
+    prompts = {name: p[- 2 * num_shot - 2:] for name, p in few_shot_prompts.items()}
+
+mathbench_datasets = []
+for _split in mathbench_sets:
+    for _name in mathbench_sets[_split]:
+        if 'single_choice' in _name:
+            if with_reasoning:
+                template_round = prompts[_name + '_with_reasoning']
+            else:
+                template_round = prompts[_name]
+        else:
+            template_round = prompts[_name]
+
+        if 'single_choice' in _name:
+            pred_postprocessor = dict(type=first_option_postprocess, options='ABCD')
+        else:
+            pred_postprocessor = dict(type=math_postprocess_v2)
+
+        if 'single_choice' in _name and with_circular_eval:
+            evaluator = dict(type=CircularEvaluator)
+        else:
+            evaluator = dict(type=AccEvaluator)
+
+        # assemble the final config
+        mathbench_reader_cfg = dict(input_columns=['question'], output_column='answer')
+        if use_ppl_single_choice and 'single_choice' in _name and not with_reasoning:
+            template = {}
+            for answer in ['A', 'B', 'C', 'D']:
+                one_template_round = deepcopy(template_round)
+                one_template_round['round'][-1]['prompt'] = one_template_round['round'][-1]['prompt'].format(answer=answer)
+                template[answer] = dict(round=one_template_round)
+            mathbench_infer_cfg = dict(
+                prompt_template=dict(type=PromptTemplate, template=template),
+                retriever=dict(type=ZeroRetriever),
+                inferencer=dict(type=PPLInferencer),
+            )
+        else:
+            mathbench_infer_cfg = dict(
+                prompt_template=dict(type=PromptTemplate, template=dict(round=template_round)),
+                retriever=dict(type=ZeroRetriever),
+                inferencer=dict(type=GenInferencer, max_out_len=2048),
+            )
+        mathbench_eval_cfg = dict(evaluator=evaluator, pred_postprocessor=pred_postprocessor)
+
+        mathbench_datasets.append(
+            dict(
+                abbr='mathbench-' + _split + '-' + _name,
+                type=MathBenchDataset,
+                path=f'data/mathbench_v1/{_split}',
+                name=_name,
+                with_circular=with_circular_eval,
+                reader_cfg=mathbench_reader_cfg,
+                infer_cfg=mathbench_infer_cfg,
+                eval_cfg=mathbench_eval_cfg,
+            )
+        )
diff --git a/opencompass/configs/datasets/MathBench/mathbench_prompt.py b/opencompass/configs/datasets/MathBench/mathbench_prompt.py
@@ -11,6 +11,12 @@
     'single_choice_en': [
         dict(role='HUMAN', prompt='Question: Here is a multiple-choice question about mathematics. Please provide the correct answer option directly.\nHere is the question you need to answer:\n{question}\nThe answer is:'),
     ],
+    'cloze_en': [
+        dict(role='HUMAN', prompt='{question}\nPlease reason step by step, and put your final answer within \\boxed{}.'),
+    ],
+    'cloze_cn': [
+        dict(role='HUMAN', prompt='{question}\n请一步一步推理，并在最后用\\boxed{}给出你的答案。'),
+    ]
 }
 
 few_shot_prompts = {

diff --git a/opencompass/configs/datasets/wikibench/wikibench_gen.py b/opencompass/configs/datasets/wikibench/wikibench_gen.py
@@ -1,4 +1,4 @@
 from mmengine.config import read_base
 
 with read_base():
-    from .wikibench_gen_f96ece import wikibench_datasets  # noqa: F401, F403
+    from .wikibench_gen_0978ad import wikibench_datasets  # noqa: F401, F403
diff --git a/...atasets/wikibench/wikibench_gen_f96ece.py → ...atasets/wikibench/wikibench_gen_0978ad.py b/...atasets/wikibench/wikibench_gen_f96ece.py → ...atasets/wikibench/wikibench_gen_0978ad.py
@@ -7,7 +7,7 @@
 
 
 single_choice_prompts = {
-    'single_choice_cn': '以下是一道单项选择题，请你根据你了解的知识给出正确的答案选项。\n下面是你要回答的题目：\n{question}\n答案选项：',
+    'single_choice_cn': '以下是一道单项选择题，请你根据你了解的知识一步步推理，并在最后用“所以答案为选项X”给出答案，其中“X”为选项A，B，C，D中你认为正确的选项。。\n下面是你要回答的题目：\n{question}\n让我们一步步推理：',
 }
 
 wikibench_sets = {

diff --git a/opencompass/utils/datasets_info.py b/opencompass/utils/datasets_info.py
@@ -408,6 +408,10 @@
         "url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/mmlu_pro.zip",
         "md5": "e3200c7380f4cea5f13c768f2815fabb",
     },
+    "WikiBench": {
+        "url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/WikiBench.zip",
+        "md5": "f19f9857517148c876d9cf1b6f4c63b1",
+    },
     "/Longbench": {
         "url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/Longbench.zip",
         "md5": "ab0cb9e520ae5cfb899bf38b564249bb",