From 1731ae8ca19f08c344380b82f57d6bd2991c35f5 Mon Sep 17 00:00:00 2001 From: Shannon Shen <22512825+lolipopshock@users.noreply.github.com> Date: Fri, 5 Aug 2022 14:06:33 -0400 Subject: [PATCH 1/4] improve automodel design 1. raise warning for no available models 2. identify models based on dataset names --- src/layoutparser/models/auto_layoutmodel.py | 70 +++++++++++++++++---- 1 file changed, 58 insertions(+), 12 deletions(-) diff --git a/src/layoutparser/models/auto_layoutmodel.py b/src/layoutparser/models/auto_layoutmodel.py index 3c35d99..89bad69 100644 --- a/src/layoutparser/models/auto_layoutmodel.py +++ b/src/layoutparser/models/auto_layoutmodel.py @@ -13,26 +13,57 @@ # limitations under the License. from typing import Optional, Dict, Union, List -from .detectron2.layoutmodel import Detectron2LayoutModel -from .paddledetection.layoutmodel import PaddleDetectionLayoutModel -from .effdet.layoutmodel import EfficientDetLayoutModel +from collections import defaultdict + from .model_config import ( is_lp_layout_model_config_any_format, ) +from ..file_utils import ( + is_effdet_available, + is_detectron2_available, + is_paddle_available, +) + +ALL_AVAILABLE_BACKENDS = dict() +ALL_AVAILABLE_DATASETS = defaultdict(list) + +if is_effdet_available(): + from .effdet.layoutmodel import EfficientDetLayoutModel + from .effdet.catalog import MODEL_CATALOG as _effdet_model_catalog + + # fmt: off + ALL_AVAILABLE_BACKENDS[EfficientDetLayoutModel.DETECTOR_NAME] = EfficientDetLayoutModel + for dataset_name in _effdet_model_catalog: + ALL_AVAILABLE_DATASETS[dataset_name].append(EfficientDetLayoutModel.DETECTOR_NAME) + # fmt: on + +if is_detectron2_available(): + from .detectron2.layoutmodel import Detectron2LayoutModel + from .detectron2.catalog import MODEL_CATALOG as _detectron2_model_catalog + + # fmt: off + ALL_AVAILABLE_BACKENDS[Detectron2LayoutModel.DETECTOR_NAME] = Detectron2LayoutModel + for dataset_name in _detectron2_model_catalog: + ALL_AVAILABLE_DATASETS[dataset_name].append(Detectron2LayoutModel.DETECTOR_NAME) + # fmt: on + +if is_paddle_available(): + from .paddledetection.layoutmodel import PaddleDetectionLayoutModel + from .paddledetection.catalog import MODEL_CATALOG as _paddle_model_catalog -ALL_AVAILABLE_BACKENDS = { - Detectron2LayoutModel.DETECTOR_NAME: Detectron2LayoutModel, - PaddleDetectionLayoutModel.DETECTOR_NAME: PaddleDetectionLayoutModel, - EfficientDetLayoutModel.DETECTOR_NAME: EfficientDetLayoutModel, -} + # fmt: off + ALL_AVAILABLE_BACKENDS[PaddleDetectionLayoutModel.DETECTOR_NAME] = PaddleDetectionLayoutModel + for dataset_name in _paddle_model_catalog: + ALL_AVAILABLE_DATASETS[dataset_name].append(PaddleDetectionLayoutModel.DETECTOR_NAME) + # fmt: on def AutoLayoutModel( config_path: str, model_path: Optional[str] = None, - label_map: Optional[Dict]=None, - device: Optional[str]=None, - extra_config: Optional[Union[Dict, List]]=None, + label_map: Optional[Dict] = None, + device: Optional[str] = None, + extra_config: Optional[Union[Dict, List]] = None, ) -> "BaseLayoutModel": """[summary] @@ -50,7 +81,7 @@ def AutoLayoutModel( Defaults to `None`. device(:obj:`str`, optional): Whether to use cuda or cpu devices. If not set, LayoutParser will - automatically determine the device to initialize the models on. + automatically determine the device to initialize the models on. extra_config (:obj:`dict`, optional): Extra configuration passed used for initializing the layout model. @@ -59,6 +90,8 @@ def AutoLayoutModel( """ if not is_lp_layout_model_config_any_format(config_path): raise ValueError(f"Invalid model config_path {config_path}") + + # Try to search for the model keywords for backend_name in ALL_AVAILABLE_BACKENDS: if backend_name in config_path: return ALL_AVAILABLE_BACKENDS[backend_name]( @@ -68,3 +101,16 @@ def AutoLayoutModel( extra_config=extra_config, device=device, ) + + # Try to search for the dataset keywords + for dataset_name in ALL_AVAILABLE_DATASETS: + if dataset_name in config_path: + return ALL_AVAILABLE_BACKENDS[ALL_AVAILABLE_DATASETS[dataset_name][0]]( + config_path, + model_path=model_path, + label_map=label_map, + extra_config=extra_config, + device=device, + ) + + raise ValueError(f"No available model found for {config_path}") \ No newline at end of file From 1000513e1da9fa86674b97eae3a818b7ca563bc3 Mon Sep 17 00:00:00 2001 From: Shannon Shen <22512825+lolipopshock@users.noreply.github.com> Date: Fri, 5 Aug 2022 14:06:41 -0400 Subject: [PATCH 2/4] more tests for automodels --- tests/test_model.py | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/tests/test_model.py b/tests/test_model.py index 42ace8b..1fae065 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -44,11 +44,6 @@ "lp://MFD/tf_efficientdet_d1/config", ] -AUTOMODEL_CONFIGS = [ - "lp://detectron2/PubLayNet/faster_rcnn_R_50_FPN_3x/config", - "lp://paddledetection/PubLayNet/ppyolov2_r50vd_dcn_365e/config", - "lp://efficientdet/PubLayNet/tf_efficientdet_d0/config", -] def _construct_valid_config_variations(config, backend_name): dataset_name, arch_name, identifier = config[len("lp://") :].split("/") @@ -152,8 +147,36 @@ def test_EffDetModel(is_large_scale=False): EfficientDetLayoutModel, ALL_EFFDET_MODEL_CONFIGS[0] ) + def test_AutoModel(): - for config in AUTOMODEL_CONFIGS: + + # Full configs + auto_model_config_1 = [ + "lp://detectron2/PubLayNet/faster_rcnn_R_50_FPN_3x/config", + "lp://paddledetection/PubLayNet/ppyolov2_r50vd_dcn_365e/config", + "lp://efficientdet/PubLayNet/tf_efficientdet_d0/config", + ] + for config in auto_model_config_1: model = AutoLayoutModel(config) image = cv2.imread("tests/fixtures/model/test_model_image.jpg") - layout = model.detect(image) \ No newline at end of file + layout = model.detect(image) + + # Dataset name only + # It will use the first available model + auto_model_config_2 = [ + "lp://PubLayNet", + "lp://MFD", + ] + for config in auto_model_config_1: + model = AutoLayoutModel(config) + model.DETECTOR_NAME == "efficientdet" + + # Automodel name that doesn't work + + # 1. No available backend for the model + with pytest.raises(ValueError): + model = AutoLayoutModel("lp://prima") + + # 2. completely Invalid name + with pytest.raises(ValueError): + model = AutoLayoutModel("lp://test") From ad5e12b07b0a9ac3b997cef24f311be8a28ebb37 Mon Sep 17 00:00:00 2001 From: Shannon Shen <22512825+lolipopshock@users.noreply.github.com> Date: Fri, 5 Aug 2022 14:17:22 -0400 Subject: [PATCH 3/4] fix paddle detection protobuf error? --- .github/workflows/ci.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8a62a8b..8fa7eb1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -61,6 +61,8 @@ jobs: pip install pytest pip install -e ".[paddledetection]" pytest tests_deps/test_only_paddledetection.py + env: + PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION: python test_all_methods_all_backends: needs: [test_only_effdet_backend, test_only_detectron2_backend, test_only_paddledetection_backend] From e18e21574bdbea9059bc0c386106cd69bf8ff006 Mon Sep 17 00:00:00 2001 From: Shannon Shen <22512825+lolipopshock@users.noreply.github.com> Date: Sat, 6 Aug 2022 13:46:28 -0400 Subject: [PATCH 4/4] Case matters --- tests/test_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_model.py b/tests/test_model.py index 1fae065..016df9d 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -177,6 +177,6 @@ def test_AutoModel(): with pytest.raises(ValueError): model = AutoLayoutModel("lp://prima") - # 2. completely Invalid name + # 2. Completely invalid name with pytest.raises(ValueError): model = AutoLayoutModel("lp://test")