102
102
import static com .google .common .collect .ImmutableSet .toImmutableSet ;
103
103
import static com .google .common .collect .Iterables .getOnlyElement ;
104
104
import static io .airlift .slice .Slices .utf8Slice ;
105
+ import static io .trino .parquet .writer .ParquetWriter .SUPPORTED_BLOOM_FILTER_TYPES ;
105
106
import static io .trino .plugin .base .io .ByteBuffers .getWrappedBytes ;
106
107
import static io .trino .plugin .hive .HiveMetadata .TABLE_COMMENT ;
107
108
import static io .trino .plugin .iceberg .ColumnIdentity .createColumnIdentity ;
118
119
import static io .trino .plugin .iceberg .IcebergTableProperties .LOCATION_PROPERTY ;
119
120
import static io .trino .plugin .iceberg .IcebergTableProperties .ORC_BLOOM_FILTER_COLUMNS_PROPERTY ;
120
121
import static io .trino .plugin .iceberg .IcebergTableProperties .ORC_BLOOM_FILTER_FPP_PROPERTY ;
122
+ import static io .trino .plugin .iceberg .IcebergTableProperties .PARQUET_BLOOM_FILTER_COLUMNS_PROPERTY ;
121
123
import static io .trino .plugin .iceberg .IcebergTableProperties .PARTITIONING_PROPERTY ;
122
124
import static io .trino .plugin .iceberg .IcebergTableProperties .SORTED_BY_PROPERTY ;
123
125
import static io .trino .plugin .iceberg .IcebergTableProperties .getPartitioning ;
167
169
import static org .apache .iceberg .TableProperties .OBJECT_STORE_PATH ;
168
170
import static org .apache .iceberg .TableProperties .ORC_BLOOM_FILTER_COLUMNS ;
169
171
import static org .apache .iceberg .TableProperties .ORC_BLOOM_FILTER_FPP ;
172
+ import static org .apache .iceberg .TableProperties .PARQUET_BLOOM_FILTER_COLUMN_ENABLED_PREFIX ;
170
173
import static org .apache .iceberg .TableProperties .WRITE_DATA_LOCATION ;
171
174
import static org .apache .iceberg .TableProperties .WRITE_LOCATION_PROVIDER_IMPL ;
172
175
import static org .apache .iceberg .TableProperties .WRITE_METADATA_LOCATION ;
@@ -303,6 +306,12 @@ public static Map<String, Object> getIcebergTableProperties(Table icebergTable)
303
306
properties .put (ORC_BLOOM_FILTER_FPP_PROPERTY , Double .parseDouble (orcBloomFilterFpp .get ()));
304
307
}
305
308
309
+ // iceberg Parquet format bloom filter properties
310
+ Set <String > parquetBloomFilterColumns = getParquetBloomFilterColumns (icebergTable .properties ());
311
+ if (!parquetBloomFilterColumns .isEmpty ()) {
312
+ properties .put (PARQUET_BLOOM_FILTER_COLUMNS_PROPERTY , ImmutableList .copyOf (parquetBloomFilterColumns ));
313
+ }
314
+
306
315
return properties .buildOrThrow ();
307
316
}
308
317
@@ -319,6 +328,14 @@ public static Optional<String> getOrcBloomFilterColumns(Map<String, String> prop
319
328
return orcBloomFilterColumns ;
320
329
}
321
330
331
+ public static Set <String > getParquetBloomFilterColumns (Map <String , String > properties )
332
+ {
333
+ return properties .entrySet ().stream ()
334
+ .filter (entry -> entry .getKey ().startsWith (PARQUET_BLOOM_FILTER_COLUMN_ENABLED_PREFIX ) && "true" .equals (entry .getValue ()))
335
+ .map (entry -> entry .getKey ().substring (PARQUET_BLOOM_FILTER_COLUMN_ENABLED_PREFIX .length ()))
336
+ .collect (toImmutableSet ());
337
+ }
338
+
322
339
public static Optional <String > getOrcBloomFilterFpp (Map <String , String > properties )
323
340
{
324
341
return Stream .of (
@@ -776,14 +793,24 @@ public static Map<String, String> createTableProperties(ConnectorTableMetadata t
776
793
propertiesBuilder .put (FORMAT_VERSION , Integer .toString (IcebergTableProperties .getFormatVersion (tableMetadata .getProperties ())));
777
794
778
795
// iceberg ORC format bloom filter properties used by create table
779
- List <String > columns = IcebergTableProperties .getOrcBloomFilterColumns (tableMetadata .getProperties ());
780
- if (!columns .isEmpty ()) {
796
+ List <String > orcBloomFilterColumns = IcebergTableProperties .getOrcBloomFilterColumns (tableMetadata .getProperties ());
797
+ if (!orcBloomFilterColumns .isEmpty ()) {
781
798
checkFormatForProperty (fileFormat .toIceberg (), FileFormat .ORC , ORC_BLOOM_FILTER_COLUMNS_PROPERTY );
782
- validateOrcBloomFilterColumns (tableMetadata , columns );
783
- propertiesBuilder .put (ORC_BLOOM_FILTER_COLUMNS , Joiner .on ("," ).join (columns ));
799
+ validateOrcBloomFilterColumns (tableMetadata , orcBloomFilterColumns );
800
+ propertiesBuilder .put (ORC_BLOOM_FILTER_COLUMNS , Joiner .on ("," ).join (orcBloomFilterColumns ));
784
801
propertiesBuilder .put (ORC_BLOOM_FILTER_FPP , String .valueOf (IcebergTableProperties .getOrcBloomFilterFpp (tableMetadata .getProperties ())));
785
802
}
786
803
804
+ // iceberg Parquet format bloom filter properties used by create table
805
+ List <String > parquetBloomFilterColumns = IcebergTableProperties .getParquetBloomFilterColumns (tableMetadata .getProperties ());
806
+ if (!parquetBloomFilterColumns .isEmpty ()) {
807
+ checkFormatForProperty (fileFormat .toIceberg (), FileFormat .PARQUET , PARQUET_BLOOM_FILTER_COLUMNS_PROPERTY );
808
+ validateParquetBloomFilterColumns (tableMetadata , parquetBloomFilterColumns );
809
+ for (String column : parquetBloomFilterColumns ) {
810
+ propertiesBuilder .put (PARQUET_BLOOM_FILTER_COLUMN_ENABLED_PREFIX + column , "true" );
811
+ }
812
+ }
813
+
787
814
if (tableMetadata .getComment ().isPresent ()) {
788
815
propertiesBuilder .put (TABLE_COMMENT , tableMetadata .getComment ().get ());
789
816
}
@@ -884,6 +911,21 @@ private static void validateOrcBloomFilterColumns(ConnectorTableMetadata tableMe
884
911
}
885
912
}
886
913
914
+ private static void validateParquetBloomFilterColumns (ConnectorTableMetadata tableMetadata , List <String > parquetBloomFilterColumns )
915
+ {
916
+ Map <String , Type > columnTypes = tableMetadata .getColumns ().stream ()
917
+ .collect (toImmutableMap (ColumnMetadata ::getName , ColumnMetadata ::getType ));
918
+ for (String column : parquetBloomFilterColumns ) {
919
+ Type type = columnTypes .get (column );
920
+ if (type == null ) {
921
+ throw new TrinoException (INVALID_TABLE_PROPERTY , format ("Parquet Bloom filter column %s not present in schema" , column ));
922
+ }
923
+ if (!SUPPORTED_BLOOM_FILTER_TYPES .contains (type )) {
924
+ throw new TrinoException (INVALID_TABLE_PROPERTY , format ("Parquet Bloom filter column %s has unsupported type %s" , column , type .getDisplayName ()));
925
+ }
926
+ }
927
+ }
928
+
887
929
public static int parseVersion (String metadataFileName )
888
930
throws TrinoException
889
931
{
0 commit comments