[Doc] quick start swap tabs (#1263)

Leymore · web-flow · commit 1d3a26c732d2 · 2024-07-05T23:51:42.000+08:00
* [doc] quick start swap tabs

* update docs

* update

* update

* update

* update

* update

* update

* update
diff --git a/configs/datasets/demo/demo_cmmlu_base_ppl.py b/configs/datasets/demo/demo_cmmlu_base_ppl.py
@@ -0,0 +1,8 @@
+from mmengine import read_base
+
+with read_base():
+    from ..cmmlu.cmmlu_ppl_041cbf import cmmlu_datasets
+
+for d in cmmlu_datasets:
+    d['abbr'] = 'demo_' + d['abbr']
+    d['reader_cfg']['test_range'] = '[0:4]'
diff --git a/configs/datasets/demo/demo_cmmlu_chat_gen.py b/configs/datasets/demo/demo_cmmlu_chat_gen.py
@@ -0,0 +1,8 @@
+from mmengine import read_base
+
+with read_base():
+    from ..cmmlu.cmmlu_gen_c13365 import cmmlu_datasets
+
+for d in cmmlu_datasets:
+    d['abbr'] = 'demo_' + d['abbr']
+    d['reader_cfg']['test_range'] = '[0:4]'
diff --git a/configs/datasets/demo/demo_gsm8k_base_gen.py b/configs/datasets/demo/demo_gsm8k_base_gen.py
@@ -0,0 +1,7 @@
+from mmengine import read_base
+
+with read_base():
+    from ..gsm8k.gsm8k_gen_17d0dc import gsm8k_datasets
+
+gsm8k_datasets[0]['abbr'] = 'demo_' + gsm8k_datasets[0]['abbr']
+gsm8k_datasets[0]['reader_cfg']['test_range'] = '[0:64]'
diff --git a/configs/datasets/demo/demo_gsm8k_chat_gen.py b/configs/datasets/demo/demo_gsm8k_chat_gen.py
@@ -0,0 +1,7 @@
+from mmengine import read_base
+
+with read_base():
+    from ..gsm8k.gsm8k_gen_1d7fe4 import gsm8k_datasets
+
+gsm8k_datasets[0]['abbr'] = 'demo_' + gsm8k_datasets[0]['abbr']
+gsm8k_datasets[0]['reader_cfg']['test_range'] = '[0:64]'
diff --git a/configs/datasets/demo/demo_math_base_gen.py b/configs/datasets/demo/demo_math_base_gen.py
@@ -0,0 +1,7 @@
+from mmengine import read_base
+
+with read_base():
+    from ..math.math_4shot_base_gen_db136b import math_datasets
+
+math_datasets[0]['abbr'] = 'demo_' + math_datasets[0]['abbr']
+math_datasets[0]['reader_cfg']['test_range'] = '[0:64]'
diff --git a/configs/datasets/demo/demo_math_chat_gen.py b/configs/datasets/demo/demo_math_chat_gen.py
@@ -0,0 +1,7 @@
+from mmengine import read_base
+
+with read_base():
+    from ..math.math_0shot_gen_393424 import math_datasets
+
+math_datasets[0]['abbr'] = 'demo_' + math_datasets[0]['abbr']
+math_datasets[0]['reader_cfg']['test_range'] = '[0:64]'
diff --git a/configs/eval_base_demo.py b/configs/eval_base_demo.py
@@ -0,0 +1,10 @@
+from mmengine.config import read_base
+
+with read_base():
+    from .datasets.demo.demo_gsm8k_base_gen import gsm8k_datasets
+    from .datasets.demo.demo_math_base_gen import math_datasets
+    from .models.qwen.hf_qwen2_1_5b import models as hf_qwen2_1_5b_models
+    from .models.hf_internlm.hf_internlm2_1_8b import models as hf_internlm2_1_8b_models
+
+datasets = gsm8k_datasets + math_datasets
+models = hf_qwen2_1_5b_models + hf_internlm2_1_8b_models
diff --git a/configs/eval_chat_demo.py b/configs/eval_chat_demo.py
@@ -0,0 +1,10 @@
+from mmengine.config import read_base
+
+with read_base():
+    from .datasets.demo.demo_gsm8k_chat_gen import gsm8k_datasets
+    from .datasets.demo.demo_math_chat_gen import math_datasets
+    from .models.qwen.hf_qwen2_1_5b_instruct import models as hf_qwen2_1_5b_instruct_models
+    from .models.hf_internlm.hf_internlm2_chat_1_8b import models as hf_internlm2_chat_1_8b_models
+
+datasets = gsm8k_datasets + math_datasets
+models = hf_qwen2_1_5b_instruct_models + hf_internlm2_chat_1_8b_models
diff --git a/configs/eval_demo.py b/configs/eval_demo.py
diff --git a/configs/models/hf_internlm/hf_internlm2_5_7b_chat.py b/configs/models/hf_internlm/hf_internlm2_5_7b_chat.py
@@ -8,6 +8,5 @@
         max_out_len=1024,
         batch_size=8,
         run_cfg=dict(num_gpus=1),
-        stop_words=['</s>', '<|im_end|>'],
     )
 ]
diff --git a/configs/models/hf_internlm/hf_internlm2_chat_1_8b.py b/configs/models/hf_internlm/hf_internlm2_chat_1_8b.py
@@ -8,6 +8,5 @@
         max_out_len=1024,
         batch_size=8,
         run_cfg=dict(num_gpus=1),
-        stop_words=['</s>', '<|im_end|>'],
     )
 ]
diff --git a/configs/models/hf_internlm/hf_internlm2_chat_1_8b_sft.py b/configs/models/hf_internlm/hf_internlm2_chat_1_8b_sft.py
@@ -8,6 +8,5 @@
         max_out_len=1024,
         batch_size=8,
         run_cfg=dict(num_gpus=1),
-        stop_words=['</s>', '<|im_end|>'],
     )
 ]
diff --git a/configs/models/hf_internlm/hf_internlm2_chat_20b.py b/configs/models/hf_internlm/hf_internlm2_chat_20b.py
@@ -8,6 +8,5 @@
         max_out_len=1024,
         batch_size=8,
         run_cfg=dict(num_gpus=2),
-        stop_words=['</s>', '<|im_end|>'],
     )
 ]
diff --git a/configs/models/hf_internlm/hf_internlm2_chat_20b_sft.py b/configs/models/hf_internlm/hf_internlm2_chat_20b_sft.py
@@ -8,6 +8,5 @@
         max_out_len=1024,
         batch_size=8,
         run_cfg=dict(num_gpus=2),
-        stop_words=['</s>', '<|im_end|>'],
     )
 ]
diff --git a/configs/models/hf_internlm/hf_internlm2_chat_7b.py b/configs/models/hf_internlm/hf_internlm2_chat_7b.py
@@ -8,6 +8,5 @@
         max_out_len=1024,
         batch_size=8,
         run_cfg=dict(num_gpus=1),
-        stop_words=['</s>', '<|im_end|>'],
     )
 ]
diff --git a/configs/models/hf_internlm/hf_internlm2_chat_7b_sft.py b/configs/models/hf_internlm/hf_internlm2_chat_7b_sft.py
@@ -8,6 +8,5 @@
         max_out_len=1024,
         batch_size=8,
         run_cfg=dict(num_gpus=1),
-        stop_words=['</s>', '<|im_end|>'],
     )
 ]
diff --git a/configs/models/hf_internlm/hf_internlm2_chat_math_20b_with_system.py b/configs/models/hf_internlm/hf_internlm2_chat_math_20b_with_system.py
@@ -3,9 +3,9 @@
 
 _meta_template = dict(
     round=[
-        dict(role='HUMAN', begin='[UNUSED_TOKEN_146]user\n', end='[UNUSED_TOKEN_145]\n'),
-        dict(role='SYSTEM', begin='[UNUSED_TOKEN_146]system\n', end='[UNUSED_TOKEN_145]\n'),
-        dict(role='BOT', begin='[UNUSED_TOKEN_146]assistant\n', end='[UNUSED_TOKEN_145]\n', generate=True),
+        dict(role='HUMAN', begin='<|im_start|>user\n', end='<|im_end|>\n'),
+        dict(role='SYSTEM', begin='<|im_start|>system\n', end='<|im_end|>\n'),
+        dict(role='BOT', begin='<|im_start|>assistant\n', end='<|im_end|>\n', generate=True),
     ],
 )
 
@@ -30,6 +30,6 @@
         batch_size=8,
         meta_template=_meta_template,
         run_cfg=dict(num_gpus=2, num_procs=1),
-        end_str='[UNUSED_TOKEN_145]',
+        end_str='<|im_end|>',
     )
 ]
diff --git a/configs/models/hf_internlm/hf_internlm2_chat_math_7b_with_system.py b/configs/models/hf_internlm/hf_internlm2_chat_math_7b_with_system.py
@@ -3,9 +3,9 @@
 
 _meta_template = dict(
     round=[
-        dict(role='HUMAN', begin='[UNUSED_TOKEN_146]user\n', end='[UNUSED_TOKEN_145]\n'),
-        dict(role='SYSTEM', begin='[UNUSED_TOKEN_146]system\n', end='[UNUSED_TOKEN_145]\n'),
-        dict(role='BOT', begin='[UNUSED_TOKEN_146]assistant\n', end='[UNUSED_TOKEN_145]\n', generate=True),
+        dict(role='HUMAN', begin='<|im_start|>user\n', end='<|im_end|>\n'),
+        dict(role='SYSTEM', begin='<|im_start|>system\n', end='<|im_end|>\n'),
+        dict(role='BOT', begin='<|im_start|>assistant\n', end='<|im_end|>\n', generate=True),
     ],
 )
 
@@ -30,6 +30,6 @@
         batch_size=8,
         meta_template=_meta_template,
         run_cfg=dict(num_gpus=1, num_procs=1),
-        end_str='[UNUSED_TOKEN_145]',
+        end_str='<|im_end|>',
     )
 ]
diff --git a/docs/en/get_started/quick_start.md b/docs/en/get_started/quick_start.md
@@ -22,7 +22,43 @@ For larger parameterized models like Llama-7B, refer to other examples provided
 In OpenCompass, each evaluation task consists of the model to be evaluated and the dataset. The entry point for evaluation is `run.py`. Users can select the model and dataset to be tested either via command line or configuration files.
 
 `````{tabs}
+````{tab} Command Line (Custom HF Model)
+
+For HuggingFace models, users can set model parameters directly through the command line without additional configuration files. For instance, for the `facebook/opt-125m` model, you can evaluate it with the following command:
+
+```bash
+python run.py --datasets siqa_gen winograd_ppl \
+--hf-type base \
+--hf-path facebook/opt-125m
+```
 
+Note that in this way, OpenCompass only evaluates one model at a time, while other ways can evaluate multiple models at once.
+
+```{caution}
+`--hf-num-gpus` does not stand for the actual number of GPUs to use in evaluation, but the minimum required number of GPUs for this model. [More](faq.md#how-does-opencompass-allocate-gpus)
+```
+
+:::{dropdown} More detailed example
+:animate: fade-in-slide-down
+```bash
+python run.py --datasets siqa_gen winograd_ppl \
+--hf-type base \  # HuggingFace model type, base or chat
+--hf-path facebook/opt-125m \  # HuggingFace model path
+--tokenizer-path facebook/opt-125m \  # HuggingFace tokenizer path (if the same as the model path, can be omitted)
+--tokenizer-kwargs padding_side='left' truncation='left' trust_remote_code=True \  # Arguments to construct the tokenizer
+--model-kwargs device_map='auto' \  # Arguments to construct the model
+--max-seq-len 2048 \  # Maximum sequence length the model can accept
+--max-out-len 100 \  # Maximum number of tokens to generate
+--min-out-len 100 \  # Minimum number of tokens to generate
+--batch-size 64  \  # Batch size
+--hf-num-gpus 1  # Number of GPUs required to run the model
+```
+```{seealso}
+For all HuggingFace related parameters supported by `run.py`, please read [Launching Evaluation Task](../user_guides/experimentation.md#launching-an-evaluation-task).
+```
+:::
+
+````
 ````{tab} Command Line
 
 Users can combine the models and datasets they want to test using `--models` and `--datasets`.
@@ -74,44 +110,6 @@ If you want to evaluate other models, please check out the "Command Line (Custom
 
 ````
 
-````{tab} Command Line (Custom HF Model)
-
-For HuggingFace models, users can set model parameters directly through the command line without additional configuration files. For instance, for the `facebook/opt-125m` model, you can evaluate it with the following command:
-
-```bash
-python run.py --datasets siqa_gen winograd_ppl \
---hf-type base \
---hf-path facebook/opt-125m
-```
-
-Note that in this way, OpenCompass only evaluates one model at a time, while other ways can evaluate multiple models at once.
-
-```{caution}
-`--hf-num-gpus` does not stand for the actual number of GPUs to use in evaluation, but the minimum required number of GPUs for this model. [More](faq.md#how-does-opencompass-allocate-gpus)
-```
-
-:::{dropdown} More detailed example
-:animate: fade-in-slide-down
-```bash
-python run.py --datasets siqa_gen winograd_ppl \
---hf-type base \  # HuggingFace model type, base or chat
---hf-path facebook/opt-125m \  # HuggingFace model path
---tokenizer-path facebook/opt-125m \  # HuggingFace tokenizer path (if the same as the model path, can be omitted)
---tokenizer-kwargs padding_side='left' truncation='left' trust_remote_code=True \  # Arguments to construct the tokenizer
---model-kwargs device_map='auto' \  # Arguments to construct the model
---max-seq-len 2048 \  # Maximum sequence length the model can accept
---max-out-len 100 \  # Maximum number of tokens to generate
---min-out-len 100 \  # Minimum number of tokens to generate
---batch-size 64  \  # Batch size
---hf-num-gpus 1  # Number of GPUs required to run the model
-```
-```{seealso}
-For all HuggingFace related parameters supported by `run.py`, please read [Launching Evaluation Task](../user_guides/experimentation.md#launching-an-evaluation-task).
-```
-:::
-
-
-````
 ````{tab} Configuration File
 
 In addition to configuring the experiment through the command line, OpenCompass also allows users to write the full configuration of the experiment in a configuration file and run it directly through `run.py`. The configuration file is organized in Python format and must include the `datasets` and `models` fields.
diff --git a/docs/zh_cn/get_started/extra-installation.md b/docs/zh_cn/get_started/extra-installation.md
diff --git a/docs/zh_cn/get_started/installation.md b/docs/zh_cn/get_started/installation.md
diff --git a/docs/zh_cn/get_started/quick_start.md b/docs/zh_cn/get_started/quick_start.md
diff --git a/opencompass/utils/run.py b/opencompass/utils/run.py

Original file line number	Diff line number	Diff line change
`@@ -8,6 +8,5 @@`
`8`	`8`	`max_out_len=1024,`
`9`	`9`	`batch_size=8,`
`10`	`10`	`run_cfg=dict(num_gpus=1),`
`11`		`- stop_words=['</s>', '<\|im_end\|>'],`
`12`	`11`	`)`
`13`	`12`	`]`
Original file line number	Diff line number	Diff line change
`@@ -3,9 +3,9 @@`
`3`	`3`
`4`	`4`	`_meta_template = dict(`
`5`	`5`	`round=[`
`6`		`- dict(role='HUMAN', begin='[UNUSED_TOKEN_146]user\n', end='[UNUSED_TOKEN_145]\n'),`
`7`		`- dict(role='SYSTEM', begin='[UNUSED_TOKEN_146]system\n', end='[UNUSED_TOKEN_145]\n'),`
`8`		`- dict(role='BOT', begin='[UNUSED_TOKEN_146]assistant\n', end='[UNUSED_TOKEN_145]\n', generate=True),`
	`6`	`+ dict(role='HUMAN', begin='<\|im_start\|>user\n', end='<\|im_end\|>\n'),`
	`7`	`+ dict(role='SYSTEM', begin='<\|im_start\|>system\n', end='<\|im_end\|>\n'),`
	`8`	`+ dict(role='BOT', begin='<\|im_start\|>assistant\n', end='<\|im_end\|>\n', generate=True),`
`9`	`9`	`],`
`10`	`10`	`)`
`11`	`11`
`@@ -30,6 +30,6 @@`
`30`	`30`	`batch_size=8,`
`31`	`31`	`meta_template=_meta_template,`
`32`	`32`	`run_cfg=dict(num_gpus=2, num_procs=1),`
`33`		`- end_str='[UNUSED_TOKEN_145]',`
	`33`	`+ end_str='<\|im_end\|>',`
`34`	`34`	`)`
`35`	`35`	`]`
-Original file line number
+Diff line change
 +# 其他安装说明
++
 +欢迎使用浏览器自带的搜索功能。
++
 +## 推理后端
++
 +- LMDeploy
++
 +```bash
 +pip install lmdeploy
 +```
++
 +- VLLM
++
 +```bash
 +pip install vllm
 +```
++
 +OpenCompass 开发者所使用的 CUDA 版本为 11.8，一个能够应对 2024.07 之前绝大部分模型的依赖版本如下：
++
 +```bash
 +export VLLM_VERSION=0.4.3
 +export LMDEPLOY_VERSION=0.4.1
 +export FLASH_ATTN_VERSION=2.5.7
 +export XFORMERS_VERSION=0.0.25.post1
 +export TORCH_VERSION=2.2.2
 +export TORCHVISION_VERSION=0.17.2
 +export TORCHAUDIO_VERSION=2.2.2
 +export TRITON_VERSION=2.1.0
 +export PYTHON_VERSION=310
++
++
 +pip3 install "https://github.com/InternLM/lmdeploy/releases/download/v${LMDEPLOY_VERSION}/lmdeploy-${LMDEPLOY_VERSION}+cu118-cp${PYTHON_VERSION}-cp${PYTHON_VERSION}-manylinux2014_x86_64.whl" --extra-index-url https://download.pytorch.org/whl/cu118
 +pip3 install "https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}+cu118-cp${PYTHON_VERSION}-cp${PYTHON_VERSION}-manylinux1_x86_64.whl" --extra-index-url https://download.pytorch.org/whl/cu118
 +pip3 install "https://github.com/Dao-AILab/flash-attention/releases/download/v${FLASH_ATTN_VERSION}/flash_attn-${FLASH_ATTN_VERSION}+cu118torch2.2cxx11abiFALSE-cp${PYTHON_VERSION}-cp${PYTHON_VERSION}-linux_x86_64.whl" --extra-index-url https://download.pytorch.org/whl/cu118
 +pip3 install xformers==${XFORMERS_VERSION} --extra-index-url https://download.pytorch.org/whl/cu118
 +pip3 install torch==${TORCH_VERSION} torchvision==${TORCHVISION_VERSION} torchaudio==${TORCHAUDIO_VERSION} --index-url https://download.pytorch.org/whl/cu118
 +pip3 install triton==${TRITON_VERSION} --extra-index-url https://download.pytorch.org/whl/cu118
 +```
++
 +请注意，在安装过程中，后一条 `pip install` 命令可能会覆盖前一条命令中部分依赖的版本。并且在最终安装完成后，可能有的软件依赖会不满足，但是对 lmdeploy / vllm / xformers 等有需求的模型都是可以跑起来的。很神秘。
++
 +## 模型
++
 +- LLAMA (参数，原生, 非 HF 格式)
++
 +```bash
 +   git clone https://github.com/facebookresearch/llama.git
 +   cd llama
 +   pip install -r requirements.txt
 +   pip install -e .
 +```
++
 +- Vicuna (参数)
++
 +```bash
 +pip install "fschat[model_worker,webui]
 +```
++
 +- Baichuan / Baichuan2 (参数)
++
 +```bash
 +pip install "transformers<=4.33.3"
 +```
++
 +- ChatGLM-3 / GLM-4 (参数)
++
 +```bash
 +pip install "transformers<=4.41.2"
 +```
++
 +- GPT-3.5-Turbo / GPT-4-Turbo / GPT-4 / GPT-4o (API)
++
 +```bash
 +pip install openai
 +```
++
 +- Claude (API)
++
 +```bash
 +pip install anthropic
 +```
++
 +- 字节豆包 (API)
++
 +```bash
 +pip install volcengine-python-sdk
 +```
++
 +- 腾讯混元 (API)
++
 +```bash
 +pip install tencentcloud-sdk-python
 +```
++
 +- 讯飞星火 (API)
++
 +```bash
 +pip install spark_ai_python "sseclient-py==1.7.2"  websocket-client
 +```
++
 +- 智谱 (API)
++
 +```bash
 +pip install zhipuai
 +```
++
 +- 通义千问 (API)
++
 +```bash
 +pip install dashscope
 +```
++
 +## 数据集
++
 +- HumanEval
++
 +```bash
 +git clone git@github.com:open-compass/human-eval.git
 +cd human-eval && pip install -e .
 +```
++
 +该代码库 fork 自 https://github.com/openai/human-eval.git，并且已经注释了 `human_eval/execution.py` **第48-57行** 的提示。该提示告知了直接运行 LLM 生成的代码会有风险。
++
 +- HumanEvalX / HumanEval+ / MBPP+
++
 +```bash
 +git clone --recurse-submodules git@github.com:open-compass/human-eval.git
 +cd human-eval
 +pip install -e .
 +pip install -e evalplus
 +```
++
 +- AlpacaEval
++
 +```bash
 +pip install alpaca-eval==0.6 scikit-learn==1.5
 +```
++
 +- CIBench
++
 +```bash
 +pip install -r requirements/agent.txt
 +```
++
 +- T-Eval
++
 +```bash
 +pip install lagent==0.1.2
 +```
++
 +- APPS / TACO
++
 +```bash
 +pip install pyext
 +```
++
 +- IFEval
++
 +```bash
 +pip install langdetect
 +```
++
 +- NPHardEval
++
 +```bash
 +pip install networkx
 +```
++
 +- LawBench
++
 +```bash
 +pip install cn2an
 +```
-Original file line number
+Diff line change
    pip install -e .
    ```
 -3. 安装 humaneval（可选）：
 +3. 如果需要使用推理后端，或者进行 API 模型测试，或者进行 代码、智能体、主观 等数据集的评测，请参考 [其他安装说明](./extra-installation.md)
 -   如果你需要**在 humaneval 数据集上评估模型代码能力**，请执行此步骤，否则忽略这一步。
+-
 -   <details>
 -   <summary><b>点击查看详细</b></summary>
+-
 -   ```bash
 -   git clone https://github.com/openai/human-eval.git
 -   cd human-eval
 -   pip install -r requirements.txt
 -   pip install -e .
 -   cd ..
 -   ```
+-
 -   请仔细阅读 `human_eval/execution.py` **第48-57行**的注释，了解执行模型生成的代码可能存在的风险，如果接受这些风险，请取消**第58行**的注释，启用代码执行评测。
+-
 -   </details>
+-
 -4. 安装 Llama（可选）：
+-
 -   如果你需要**使用官方实现评测 Llama / Llama-2 / Llama-2-chat 模型**，请执行此步骤，否则忽略这一步。
+-
 -   <details>
 -   <summary><b>点击查看详细</b></summary>
+-
 -   ```bash
 -   git clone https://github.com/facebookresearch/llama.git
 -   cd llama
 -   pip install -r requirements.txt
 -   pip install -e .
 -   cd ..
 -   ```
+-
 -   你可以在 `configs/models` 下找到所有 Llama / Llama-2 / Llama-2-chat 模型的配置文件示例。([示例](https://github.com/open-compass/opencompass/blob/eb4822a94d624a4e16db03adeb7a59bbd10c2012/configs/models/llama2_7b_chat.py))
+-
 -   </details>
+-
 -5. 安装 alpaca-eval（可选）：
+-
 -   如果你需要**使用官方alpaca-eval实现评测 alpaca-eval 数据集**，请执行此步骤，否则忽略这一步。
+-
 -   <details>
 -   <summary><b>点击查看详细</b></summary>
+-
 -   ```bash
 -   pip install alpaca-eval
 -   ```
+-
 -   </details>
+-
 -# 数据集准备
 +## 数据集准备
 OpenCompass 支持的数据集主要包括两个部分：
-Original file line number
+Diff line change
 from opencompass.utils import get_logger, match_files
 -def match_cfg_file(workdir: str,
 +def match_cfg_file(workdir: Union[str, List[str]],
                    pattern: Union[str, List[str]]) -> List[Tuple[str, str]]:
     """Match the config file in workdir recursively given the pattern.
     Additionally, if the pattern itself points to an existing file, it will be
     directly returned.
     """
 +    def _mf_with_multi_workdirs(workdir, pattern, fuzzy=False):
 +        if isinstance(workdir, str):
 +            workdir = [workdir]
 +        files = []
 +        for wd in workdir:
 +            files += match_files(wd, pattern, fuzzy=fuzzy)
 +        return files
++
     if isinstance(pattern, str):
         pattern = [pattern]
     pattern = [p + '.py' if not p.endswith('.py') else p for p in pattern]
 -    files = match_files(workdir, pattern, fuzzy=False)
 +    files = _mf_with_multi_workdirs(workdir, pattern, fuzzy=False)
     if len(files) != len(pattern):
         nomatched = []
         ambiguous = []
                    'You may use tools/list_configs.py to list or '
                    'locate the configurations.\n')
         for p in pattern:
 -            files = match_files(workdir, p, fuzzy=False)
 +            files = _mf_with_multi_workdirs(workdir, p, fuzzy=False)
             if len(files) == 0:
                 nomatched.append([p[:-3]])
             elif len(files) > 1:
         raise ValueError('You must specify "--datasets" or "--custom-dataset-path" if you do not specify a config file path.')
     datasets = []
     if args.datasets:
 -        datasets_dir = os.path.join(args.config_dir, 'datasets')
 +        datasets_dir = [
 +            os.path.join(args.config_dir, 'datasets'),
 +            os.path.join(args.config_dir, 'dataset_collections')
 +        ]
         for dataset_arg in args.datasets:
             if '/' in dataset_arg:
                 dataset_name, dataset_suffix = dataset_arg.split('/', 1)
                      model_kwargs=args.model_kwargs,
                      tokenizer_path=args.tokenizer_path,
                      tokenizer_kwargs=args.tokenizer_kwargs,
 +                     generation_kwargs=args.generation_kwargs,
                      peft_path=args.peft_path,
                      peft_kwargs=args.peft_kwargs,
                      max_seq_len=args.max_seq_len,