@@ -595,35 +595,53 @@ def _detect_primary_key(self, data):
595
595
596
596
return None
597
597
598
- def _detect_columns (self , data ):
598
+ def _detect_columns (self , data , table_name = None ):
599
599
"""Detect the columns' sdtypes from the data.
600
600
601
601
Args:
602
602
data (pandas.DataFrame):
603
603
The data to be analyzed.
604
+ table_name (str):
605
+ The name of the table to be analyzed. Defaults to ``None``.
604
606
"""
605
607
old_columns = data .columns
606
608
data .columns = data .columns .astype (str )
607
609
for field in data :
608
- column_data = data [field ]
609
- clean_data = column_data .dropna ()
610
- dtype = clean_data .infer_objects ().dtype .kind
611
-
612
- sdtype = self ._detect_pii_column (field )
613
- if sdtype is None :
614
- if dtype in self ._DTYPES_TO_SDTYPES :
615
- sdtype = self ._DTYPES_TO_SDTYPES [dtype ]
616
- elif dtype in ['i' , 'f' , 'u' ]:
617
- sdtype = self ._determine_sdtype_for_numbers (column_data )
618
-
619
- elif dtype == 'O' :
620
- sdtype = self ._determine_sdtype_for_objects (column_data )
610
+ try :
611
+ column_data = data [field ]
612
+ clean_data = column_data .dropna ()
613
+ dtype = clean_data .infer_objects ().dtype .kind
621
614
615
+ sdtype = self ._detect_pii_column (field )
622
616
if sdtype is None :
623
- raise InvalidMetadataError (
624
- f"Unsupported data type for column '{ field } ' (kind: { dtype } )."
625
- "The valid data types are: 'object', 'int', 'float', 'datetime', 'bool'."
626
- )
617
+ if dtype in self ._DTYPES_TO_SDTYPES :
618
+ sdtype = self ._DTYPES_TO_SDTYPES [dtype ]
619
+ elif dtype in ['i' , 'f' , 'u' ]:
620
+ sdtype = self ._determine_sdtype_for_numbers (column_data )
621
+
622
+ elif dtype == 'O' :
623
+ sdtype = self ._determine_sdtype_for_objects (column_data )
624
+
625
+ if sdtype is None :
626
+ table_str = f"table '{ table_name } ' " if table_name else ''
627
+ error_message = (
628
+ f"Unsupported data type for { table_str } column '{ field } ' (kind: { dtype } "
629
+ "). The valid data types are: 'object', 'int', 'float', 'datetime',"
630
+ " 'bool'."
631
+ )
632
+ raise InvalidMetadataError (error_message )
633
+
634
+ except Exception as e :
635
+ error_type = type (e ).__name__
636
+ if error_type == 'InvalidMetadataError' :
637
+ raise e
638
+
639
+ table_str = f"table '{ table_name } ' " if table_name else ''
640
+ error_message = (
641
+ f"Unable to detect metadata for { table_str } column '{ field } ' due to an invalid "
642
+ f'data format.\n { error_type } : { e } '
643
+ )
644
+ raise InvalidMetadataError (error_message ) from e
627
645
628
646
column_dict = {'sdtype' : sdtype }
629
647
sdtype_in_reference = sdtype in self ._REFERENCE_TO_SDTYPE .values ()
0 commit comments