|
| 1 | +from opencompass.openicl.icl_prompt_template import PromptTemplate |
| 2 | +from opencompass.openicl.icl_retriever import ZeroRetriever |
| 3 | +from opencompass.openicl.icl_inferencer import GenInferencer |
| 4 | +from opencompass.datasets import CaLMDataset, CaLMEvaluator |
| 5 | + |
| 6 | +task_hiearchy_dict = { |
| 7 | + # association/ |
| 8 | + # correlation/ |
| 9 | + 'CORR-B_correlation_CN':'association/correlation/', |
| 10 | + 'CORR-B_correlation_EN':'association/correlation/', |
| 11 | + # explaining_away_effect/ |
| 12 | + 'EAE-B_exp-away_CN':'association/explaining_away_effect/', |
| 13 | + 'EAE-B_exp-away_EN':'association/explaining_away_effect/', |
| 14 | + # causal_discovery/ |
| 15 | + # abstract_reasoning/ |
| 16 | + 'AR-B_CaLM-AR_CN':'causal_discovery/abstract_reasoning/', |
| 17 | + 'AR-B_CaLM-AR_EN':'causal_discovery/abstract_reasoning/', |
| 18 | + # causal_attribution/ |
| 19 | + 'CA-B_FA_CN':'causal_discovery/causal_attribution/', |
| 20 | + 'CA-B_FA_EN':'causal_discovery/causal_attribution/', |
| 21 | + 'CA-B_FP_CN':'causal_discovery/causal_attribution/', |
| 22 | + 'CA-B_FP_EN':'causal_discovery/causal_attribution/', |
| 23 | + # event_causality_identification/ |
| 24 | + 'ECI-B_CTB_CN':'causal_discovery/event_causality_identification/', |
| 25 | + 'ECI-B_CTB_EN':'causal_discovery/event_causality_identification/', |
| 26 | + 'ECI-B_ESC_CN':'causal_discovery/event_causality_identification/', |
| 27 | + 'ECI-B_ESC_EN':'causal_discovery/event_causality_identification/', |
| 28 | + 'ECI-B_MAVEN-ERE_CN':'causal_discovery/event_causality_identification/', |
| 29 | + 'ECI-B_MAVEN-ERE_EN':'causal_discovery/event_causality_identification/', |
| 30 | + # pairwise_causal_discovery/ |
| 31 | + 'PCD-B_COPA_CN':'causal_discovery/pairwise_causal_discovery/', |
| 32 | + 'PCD-B_COPA_EN':'causal_discovery/pairwise_causal_discovery/', |
| 33 | + 'PCD-B_E-CARE_CN':'causal_discovery/pairwise_causal_discovery/', |
| 34 | + 'PCD-B_E-CARE_EN':'causal_discovery/pairwise_causal_discovery/', |
| 35 | + 'PCD-C_COPA_CN':'causal_discovery/pairwise_causal_discovery/', |
| 36 | + 'PCD-C_COPA_EN':'causal_discovery/pairwise_causal_discovery/', |
| 37 | + 'PCD-C_E-CARE_CN':'causal_discovery/pairwise_causal_discovery/', |
| 38 | + 'PCD-C_E-CARE_EN':'causal_discovery/pairwise_causal_discovery/', |
| 39 | + # counterfactual/ |
| 40 | + # actual_causality/ |
| 41 | + 'AC-B_causal_judgement_CN':'counterfactual/actual_causality/', |
| 42 | + 'AC-B_causal_judgement_EN':'counterfactual/actual_causality/', |
| 43 | + # causal_explanation_generation/ |
| 44 | + 'CEG-O_E-CARE_CN':'counterfactual/causal_explanation_generation/', |
| 45 | + 'CEG-O_E-CARE_EN':'counterfactual/causal_explanation_generation/', |
| 46 | + # counterfactual_reasoning/ |
| 47 | + 'CR-B_det-counterfactual_CN':'counterfactual/counterfactual_reasoning/', |
| 48 | + 'CR-B_det-counterfactual_EN':'counterfactual/counterfactual_reasoning/', |
| 49 | + 'CR-C_CRASS_CN':'counterfactual/counterfactual_reasoning/', |
| 50 | + 'CR-C_CRASS_EN':'counterfactual/counterfactual_reasoning/', |
| 51 | + # effect_of_the_treatment_on_the_treated/ |
| 52 | + 'ETT-B_ETT-natural_CN':'counterfactual/effect_of_the_treatment_on_the_treated/', |
| 53 | + 'ETT-B_ETT-natural_EN':'counterfactual/effect_of_the_treatment_on_the_treated/', |
| 54 | + 'ETT-P_ETT-basic_CN':'counterfactual/effect_of_the_treatment_on_the_treated/', |
| 55 | + 'ETT-P_ETT-basic_EN':'counterfactual/effect_of_the_treatment_on_the_treated/', |
| 56 | + 'ETT-P_ETT-hard_CN':'counterfactual/effect_of_the_treatment_on_the_treated/', |
| 57 | + 'ETT-P_ETT-hard_EN':'counterfactual/effect_of_the_treatment_on_the_treated/', |
| 58 | + # natural_direct_effect/ |
| 59 | + 'NDE-B_NDE-natural_CN':'counterfactual/natural_direct_effect/', |
| 60 | + 'NDE-B_NDE-natural_EN':'counterfactual/natural_direct_effect/', |
| 61 | + 'NDE-P_NDE-basic_CN':'counterfactual/natural_direct_effect/', |
| 62 | + 'NDE-P_NDE-basic_EN':'counterfactual/natural_direct_effect/', |
| 63 | + 'NDE-P_NDE-hard_CN':'counterfactual/natural_direct_effect/', |
| 64 | + 'NDE-P_NDE-hard_EN':'counterfactual/natural_direct_effect/', |
| 65 | + # natural_indirect_effect/ |
| 66 | + 'NIE-B_NIE-natural_CN':'counterfactual/natural_indirect_effect/', |
| 67 | + 'NIE-B_NIE-natural_EN':'counterfactual/natural_indirect_effect/', |
| 68 | + 'NIE-P_NIE-basic_CN':'counterfactual/natural_indirect_effect/', |
| 69 | + 'NIE-P_NIE-basic_EN':'counterfactual/natural_indirect_effect/', |
| 70 | + 'NIE-P_NIE-hard_CN':'counterfactual/natural_indirect_effect/', |
| 71 | + 'NIE-P_NIE-hard_EN':'counterfactual/natural_indirect_effect/', |
| 72 | + # probability_of_necessity/ |
| 73 | + 'PN-P_PN-basic_CN':'counterfactual/probability_of_necessity/', |
| 74 | + 'PN-P_PN-basic_EN':'counterfactual/probability_of_necessity/', |
| 75 | + 'PN-P_PN-hard_CN':'counterfactual/probability_of_necessity/', |
| 76 | + 'PN-P_PN-hard_EN':'counterfactual/probability_of_necessity/', |
| 77 | + # probability_of_sufficiency/ |
| 78 | + 'PS-P_PS-basic_CN':'counterfactual/probability_of_sufficiency/', |
| 79 | + 'PS-P_PS-basic_EN':'counterfactual/probability_of_sufficiency/', |
| 80 | + 'PS-P_PS-hard_CN':'counterfactual/probability_of_sufficiency/', |
| 81 | + 'PS-P_PS-hard_EN':'counterfactual/probability_of_sufficiency/', |
| 82 | + # intervention/ |
| 83 | + # average_treatment_effect/ |
| 84 | + 'ATE-B_ATE-natural_CN':'intervention/average_treatment_effect/', |
| 85 | + 'ATE-B_ATE-natural_EN':'intervention/average_treatment_effect/', |
| 86 | + 'ATE-P_ATE-basic_CN':'intervention/average_treatment_effect/', |
| 87 | + 'ATE-P_ATE-basic_EN':'intervention/average_treatment_effect/', |
| 88 | + 'ATE-P_ATE-hard_CN':'intervention/average_treatment_effect/', |
| 89 | + 'ATE-P_ATE-hard_EN':'intervention/average_treatment_effect/', |
| 90 | + # backdoor_adjustment_set/ |
| 91 | + 'BAS-B_backadj_CN':'intervention/backdoor_adjustment_set/', |
| 92 | + 'BAS-B_backadj_EN':'intervention/backdoor_adjustment_set/', |
| 93 | + 'BAS-C_max-BAS_CN':'intervention/backdoor_adjustment_set/', |
| 94 | + 'BAS-C_max-BAS_EN':'intervention/backdoor_adjustment_set/', |
| 95 | + 'BAS-C_min-BAS_CN':'intervention/backdoor_adjustment_set/', |
| 96 | + 'BAS-C_min-BAS_EN':'intervention/backdoor_adjustment_set/', |
| 97 | + 'BAS-C_mix-BAS_CN':'intervention/backdoor_adjustment_set/', |
| 98 | + 'BAS-C_mix-BAS_EN':'intervention/backdoor_adjustment_set/', |
| 99 | + # causal_effect_identification/ |
| 100 | + 'CEI-B_0.2-UC_CN':'intervention/causal_effect_identification/', |
| 101 | + 'CEI-B_0.2-UC_EN':'intervention/causal_effect_identification/', |
| 102 | + 'CEI-B_0.4-UC_CN':'intervention/causal_effect_identification/', |
| 103 | + 'CEI-B_0.4-UC_EN':'intervention/causal_effect_identification/', |
| 104 | + 'CEI-B_0.6-UC_CN':'intervention/causal_effect_identification/', |
| 105 | + 'CEI-B_0.6-UC_EN':'intervention/causal_effect_identification/', |
| 106 | + 'CEI-B_0.8-UC_CN':'intervention/causal_effect_identification/', |
| 107 | + 'CEI-B_0.8-UC_EN':'intervention/causal_effect_identification/', |
| 108 | + # collider_bias/ |
| 109 | + 'CB-B_collider-bias_CN':'intervention/collider_bias/', |
| 110 | + 'CB-B_collider-bias_EN':'intervention/collider_bias/', |
| 111 | + # controlled_direct_effect/ |
| 112 | + 'CDE-B_CDE-natural_CN':'intervention/controlled_direct_effect/', |
| 113 | + 'CDE-B_CDE-natural_EN':'intervention/controlled_direct_effect/', |
| 114 | + 'CDE-P_CDE-basic_CN':'intervention/controlled_direct_effect/', |
| 115 | + 'CDE-P_CDE-basic_EN':'intervention/controlled_direct_effect/', |
| 116 | + 'CDE-P_CDE-hard_CN':'intervention/controlled_direct_effect/', |
| 117 | + 'CDE-P_CDE-hard_EN':'intervention/controlled_direct_effect/', |
| 118 | + # frontdoor_adjustment_set/ |
| 119 | + 'FAS-C_FAS_CN':'intervention/frontdoor_adjustment_set/', |
| 120 | + 'FAS-C_FAS_EN':'intervention/frontdoor_adjustment_set/', |
| 121 | + # instrumental_variable/ |
| 122 | + 'IV-C_CaLM-IV_CN':'intervention/instrumental_variable/', |
| 123 | + 'IV-C_CaLM-IV_EN':'intervention/instrumental_variable/',} |
| 124 | + |
| 125 | +calm_reader_cfg = dict( |
| 126 | + input_columns=['question'], |
| 127 | + output_column='gt_item') |
| 128 | + |
| 129 | +calm_all_sets = list(set(key[:-3] for key in task_hiearchy_dict.keys())) |
| 130 | + |
| 131 | +calm_datasets = [] |
| 132 | +for _name in calm_all_sets: |
| 133 | + for _prompt_style in ['basic','basic-CN']: |
| 134 | + _task_name = _name + ('_CN' if _prompt_style.endswith('-CN') else '_EN') |
| 135 | + _path = f'./data/calm/{task_hiearchy_dict[_task_name]}{_task_name}.json' |
| 136 | + |
| 137 | + calm_infer_cfg = dict( |
| 138 | + prompt_template=dict( |
| 139 | + type=PromptTemplate, |
| 140 | + template='{question}'), |
| 141 | + retriever=dict(type=ZeroRetriever), |
| 142 | + inferencer=dict(type=GenInferencer, max_out_len=500)) |
| 143 | + |
| 144 | + calm_eval_cfg = dict(evaluator=dict( |
| 145 | + type=CaLMEvaluator, |
| 146 | + core_metrics=True, |
| 147 | + error_analysis=True, |
| 148 | + prompt_style=_prompt_style, |
| 149 | + task=_task_name)) |
| 150 | + calm_datasets.append( |
| 151 | + dict( |
| 152 | + abbr=f'calm_{_task_name}', |
| 153 | + type=CaLMDataset, |
| 154 | + path=_path, |
| 155 | + prompt_style=_prompt_style, |
| 156 | + reader_cfg=calm_reader_cfg, |
| 157 | + infer_cfg=calm_infer_cfg, |
| 158 | + eval_cfg=calm_eval_cfg) |
| 159 | + ) |
| 160 | +del _prompt_style, _task_name, _path, _name |
0 commit comments