|
| 1 | +from mmengine.config import read_base |
| 2 | +from copy import deepcopy |
| 3 | +from opencompass.openicl.icl_prompt_template import PromptTemplate |
| 4 | +from opencompass.openicl.icl_retriever import ZeroRetriever |
| 5 | +from opencompass.openicl.icl_inferencer import GenInferencer, PPLInferencer |
| 6 | +from opencompass.openicl.icl_evaluator import CircularEvaluator, AccEvaluator |
| 7 | +from opencompass.datasets import MathBenchDataset, math_postprocess_v2 |
| 8 | +from opencompass.utils.text_postprocessors import first_option_postprocess |
| 9 | + |
| 10 | +with read_base(): |
| 11 | + from .mathbench_prompt import zero_shot_prompts, few_shot_prompts, mathbench_sets |
| 12 | + |
| 13 | +# Max for this dataset is 4 |
| 14 | +num_shot = 0 |
| 15 | +# Generate reasoning path or not, only for single choice |
| 16 | +with_reasoning = True |
| 17 | +# Use circular evaluation or not |
| 18 | +with_circular_eval = True |
| 19 | +# Use PPL mode in single choice test or not |
| 20 | +use_ppl_single_choice = False |
| 21 | + |
| 22 | +assert 0 <= num_shot <= 4 |
| 23 | +if num_shot == 0: |
| 24 | + prompts = zero_shot_prompts |
| 25 | +else: |
| 26 | + prompts = {name: p[- 2 * num_shot - 2:] for name, p in few_shot_prompts.items()} |
| 27 | + |
| 28 | +mathbench_datasets = [] |
| 29 | +for _split in mathbench_sets: |
| 30 | + for _name in mathbench_sets[_split]: |
| 31 | + if 'single_choice' in _name: |
| 32 | + if with_reasoning: |
| 33 | + template_round = prompts[_name + '_with_reasoning'] |
| 34 | + else: |
| 35 | + template_round = prompts[_name] |
| 36 | + else: |
| 37 | + template_round = prompts[_name] |
| 38 | + |
| 39 | + if 'single_choice' in _name: |
| 40 | + pred_postprocessor = dict(type=first_option_postprocess, options='ABCD') |
| 41 | + else: |
| 42 | + pred_postprocessor = dict(type=math_postprocess_v2) |
| 43 | + |
| 44 | + if 'single_choice' in _name and with_circular_eval: |
| 45 | + evaluator = dict(type=CircularEvaluator) |
| 46 | + else: |
| 47 | + evaluator = dict(type=AccEvaluator) |
| 48 | + |
| 49 | + # assemble the final config |
| 50 | + mathbench_reader_cfg = dict(input_columns=['question'], output_column='answer') |
| 51 | + if use_ppl_single_choice and 'single_choice' in _name and not with_reasoning: |
| 52 | + template = {} |
| 53 | + for answer in ['A', 'B', 'C', 'D']: |
| 54 | + one_template_round = deepcopy(template_round) |
| 55 | + one_template_round['round'][-1]['prompt'] = one_template_round['round'][-1]['prompt'].format(answer=answer) |
| 56 | + template[answer] = dict(round=one_template_round) |
| 57 | + mathbench_infer_cfg = dict( |
| 58 | + prompt_template=dict(type=PromptTemplate, template=template), |
| 59 | + retriever=dict(type=ZeroRetriever), |
| 60 | + inferencer=dict(type=PPLInferencer), |
| 61 | + ) |
| 62 | + else: |
| 63 | + mathbench_infer_cfg = dict( |
| 64 | + prompt_template=dict(type=PromptTemplate, template=dict(round=template_round)), |
| 65 | + retriever=dict(type=ZeroRetriever), |
| 66 | + inferencer=dict(type=GenInferencer, max_out_len=2048), |
| 67 | + ) |
| 68 | + mathbench_eval_cfg = dict(evaluator=evaluator, pred_postprocessor=pred_postprocessor) |
| 69 | + |
| 70 | + mathbench_datasets.append( |
| 71 | + dict( |
| 72 | + abbr='mathbench-' + _split + '-' + _name, |
| 73 | + type=MathBenchDataset, |
| 74 | + path=f'data/mathbench_v1/{_split}', |
| 75 | + name=_name, |
| 76 | + with_circular=with_circular_eval, |
| 77 | + reader_cfg=mathbench_reader_cfg, |
| 78 | + infer_cfg=mathbench_infer_cfg, |
| 79 | + eval_cfg=mathbench_eval_cfg, |
| 80 | + ) |
| 81 | + ) |
0 commit comments