diff --git a/.mvn/modernizer/violations.xml b/.mvn/modernizer/violations.xml index eddc9222574f..2fe8904bdb85 100644 --- a/.mvn/modernizer/violations.xml +++ b/.mvn/modernizer/violations.xml @@ -150,42 +150,6 @@ Use AssertJ's assertThatThrownBy, see https://github.com/trinodb/trino/issues/5320 for rationale - - com/amazonaws/services/glue/model/Table.getStorageDescriptor:()Lcom/amazonaws/services/glue/model/StorageDescriptor; - 1.1 - Storage descriptor is nullable in Glue model, which is too easy to forget about. Prefer GlueToTrinoConverter.getStorageDescriptor - - - - com/amazonaws/services/glue/model/Table.getTableType:()Ljava/lang/String; - 1.1 - Table type is nullable in Glue model, which is too easy to forget about. Prefer GlueToTrinoConverter.getTableType - - - - com/amazonaws/services/glue/model/Column.getParameters:()Ljava/util/Map; - 1.1 - Column parameters map is nullable in Glue model, which is too easy to forget about. Prefer GlueToTrinoConverter.getColumnParameters - - - - com/amazonaws/services/glue/model/Table.getParameters:()Ljava/util/Map; - 1.1 - Table parameters map is nullable in Glue model, which is too easy to forget about. Prefer GlueToTrinoConverter.getTableParameters - - - - com/amazonaws/services/glue/model/Partition.getParameters:()Ljava/util/Map; - 1.1 - Partition parameters map is nullable in Glue model, which is too easy to forget about. Prefer GlueToTrinoConverter.getPartitionParameters - - - - com/amazonaws/services/glue/model/SerDeInfo.getParameters:()Ljava/util/Map; - 1.1 - SerDeInfo parameters map is nullable in Glue model, which is too easy to forget about. Prefer GlueToTrinoConverter.getSerDeInfoParameters - - org/apache/hadoop/fs/FileSystem.close:()V 1.1 diff --git a/plugin/trino-delta-lake/pom.xml b/plugin/trino-delta-lake/pom.xml index c34a30e0a7e3..902b30bd4d55 100644 --- a/plugin/trino-delta-lake/pom.xml +++ b/plugin/trino-delta-lake/pom.xml @@ -14,11 +14,6 @@ Trino - Delta Lake connector - - com.amazonaws - aws-java-sdk-glue - - com.fasterxml.jackson.core jackson-core diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/DeltaLakeMetastoreModule.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/DeltaLakeMetastoreModule.java index 3ca5e360247a..7f7d24a62e2b 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/DeltaLakeMetastoreModule.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/DeltaLakeMetastoreModule.java @@ -17,7 +17,6 @@ import io.airlift.configuration.AbstractConfigurationAwareModule; import io.trino.plugin.deltalake.metastore.file.DeltaLakeFileMetastoreModule; import io.trino.plugin.deltalake.metastore.glue.DeltaLakeGlueMetastoreModule; -import io.trino.plugin.deltalake.metastore.glue.v1.DeltaLakeGlueV1MetastoreModule; import io.trino.plugin.deltalake.metastore.thrift.DeltaLakeThriftMetastoreModule; import io.trino.plugin.hive.metastore.CachingHiveMetastoreModule; import io.trino.plugin.hive.metastore.MetastoreTypeConfig; @@ -32,7 +31,6 @@ protected void setup(Binder binder) case THRIFT -> new DeltaLakeThriftMetastoreModule(); case FILE -> new DeltaLakeFileMetastoreModule(); case GLUE -> new DeltaLakeGlueMetastoreModule(); - case GLUE_V1 -> new DeltaLakeGlueV1MetastoreModule(); }); install(new CachingHiveMetastoreModule()); diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/glue/v1/DeltaLakeGlueMetastoreTableFilterProvider.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/glue/v1/DeltaLakeGlueMetastoreTableFilterProvider.java deleted file mode 100644 index 654b251bbb8f..000000000000 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/glue/v1/DeltaLakeGlueMetastoreTableFilterProvider.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.deltalake.metastore.glue.v1; - -import com.amazonaws.services.glue.model.Table; -import com.google.inject.Inject; -import com.google.inject.Provider; -import io.trino.plugin.deltalake.metastore.glue.DeltaLakeGlueMetastoreConfig; -import io.trino.plugin.hive.util.HiveUtil; - -import java.util.function.Predicate; - -import static io.trino.plugin.hive.metastore.glue.v1.GlueToTrinoConverter.getTableParameters; - -public class DeltaLakeGlueMetastoreTableFilterProvider - implements Provider> -{ - private final boolean hideNonDeltaLakeTables; - - @Inject - public DeltaLakeGlueMetastoreTableFilterProvider(DeltaLakeGlueMetastoreConfig config) - { - this.hideNonDeltaLakeTables = config.isHideNonDeltaLakeTables(); - } - - @Override - public Predicate get() - { - if (hideNonDeltaLakeTables) { - return table -> HiveUtil.isDeltaLakeTable(getTableParameters(table)); - } - return table -> true; - } -} diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/glue/v1/DeltaLakeGlueV1MetastoreModule.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/glue/v1/DeltaLakeGlueV1MetastoreModule.java deleted file mode 100644 index 2a68a126d8a9..000000000000 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/glue/v1/DeltaLakeGlueV1MetastoreModule.java +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.deltalake.metastore.glue.v1; - -import com.amazonaws.services.glue.model.Table; -import com.google.inject.Binder; -import com.google.inject.Key; -import com.google.inject.Scopes; -import com.google.inject.TypeLiteral; -import io.airlift.configuration.AbstractConfigurationAwareModule; -import io.trino.plugin.deltalake.AllowDeltaLakeManagedTableRename; -import io.trino.plugin.deltalake.MaxTableParameterLength; -import io.trino.plugin.deltalake.metastore.DeltaLakeTableOperationsProvider; -import io.trino.plugin.deltalake.metastore.glue.DeltaLakeGlueMetastoreConfig; -import io.trino.plugin.hive.metastore.glue.GlueMetastoreStats; -import io.trino.plugin.hive.metastore.glue.v1.ForGlueHiveMetastore; -import io.trino.plugin.hive.metastore.glue.v1.GlueMetastoreModule; - -import java.util.function.Predicate; - -import static com.google.inject.multibindings.OptionalBinder.newOptionalBinder; -import static io.airlift.configuration.ConfigBinder.configBinder; -import static org.weakref.jmx.guice.ExportBinder.newExporter; - -public class DeltaLakeGlueV1MetastoreModule - extends AbstractConfigurationAwareModule -{ - @Override - protected void setup(Binder binder) - { - configBinder(binder).bindConfig(DeltaLakeGlueMetastoreConfig.class); - - newOptionalBinder(binder, Key.get(new TypeLiteral>() {}, ForGlueHiveMetastore.class)) - .setBinding().toProvider(DeltaLakeGlueMetastoreTableFilterProvider.class); - - install(new GlueMetastoreModule()); - binder.bind(GlueMetastoreStats.class).in(Scopes.SINGLETON); - newExporter(binder).export(GlueMetastoreStats.class).withGeneratedName(); - binder.bind(DeltaLakeTableOperationsProvider.class).to(DeltaLakeGlueV1MetastoreTableOperationsProvider.class).in(Scopes.SINGLETON); - binder.bind(Key.get(boolean.class, AllowDeltaLakeManagedTableRename.class)).toInstance(true); - // Limit per Glue API docs (https://docs.aws.amazon.com/glue/latest/webapi/API_TableInput.html#Glue-Type-TableInput-Parameters as of this writing) - binder.bind(Key.get(int.class, MaxTableParameterLength.class)).toInstance(512000); - } -} diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/glue/v1/DeltaLakeGlueV1MetastoreTableOperations.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/glue/v1/DeltaLakeGlueV1MetastoreTableOperations.java deleted file mode 100644 index d3e0eed81a98..000000000000 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/glue/v1/DeltaLakeGlueV1MetastoreTableOperations.java +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.deltalake.metastore.glue.v1; - -import com.amazonaws.services.glue.AWSGlueAsync; -import com.amazonaws.services.glue.model.EntityNotFoundException; -import com.amazonaws.services.glue.model.GetTableRequest; -import com.amazonaws.services.glue.model.Table; -import com.amazonaws.services.glue.model.TableInput; -import com.amazonaws.services.glue.model.UpdateTableRequest; -import com.google.common.collect.ImmutableMap; -import io.trino.plugin.deltalake.metastore.DeltaLakeTableOperations; -import io.trino.plugin.hive.metastore.glue.GlueMetastoreStats; -import io.trino.spi.connector.SchemaTableName; -import io.trino.spi.connector.TableNotFoundException; - -import java.util.Optional; - -import static io.trino.plugin.deltalake.metastore.DeltaLakeTableMetadataScheduler.tableMetadataParameters; -import static io.trino.plugin.hive.metastore.glue.v1.GlueInputConverter.convertGlueTableToTableInput; -import static io.trino.plugin.hive.metastore.glue.v1.GlueToTrinoConverter.getTableParameters; -import static java.util.Objects.requireNonNull; - -public class DeltaLakeGlueV1MetastoreTableOperations - implements DeltaLakeTableOperations -{ - private final AWSGlueAsync glueClient; - private final GlueMetastoreStats stats; - - public DeltaLakeGlueV1MetastoreTableOperations(AWSGlueAsync glueClient, GlueMetastoreStats stats) - { - this.glueClient = requireNonNull(glueClient, "glueClient is null"); - this.stats = requireNonNull(stats, "stats is null"); - } - - @Override - public void commitToExistingTable(SchemaTableName schemaTableName, long version, String schemaString, Optional tableComment) - { - GetTableRequest getTableRequest = new GetTableRequest() - .withDatabaseName(schemaTableName.getSchemaName()) - .withName(schemaTableName.getTableName()); - Table currentTable; - try { - currentTable = glueClient.getTable(getTableRequest).getTable(); - } - catch (EntityNotFoundException e) { - throw new TableNotFoundException(schemaTableName); - } - String glueVersionId = currentTable.getVersionId(); - - TableInput tableInput = convertGlueTableToTableInput(currentTable); - ImmutableMap.Builder parameters = ImmutableMap.builder(); - parameters.putAll(getTableParameters(currentTable)); - parameters.putAll(tableMetadataParameters(version, schemaString, tableComment)); - tableInput.withParameters(parameters.buildKeepingLast()); - - UpdateTableRequest updateTableRequest = new UpdateTableRequest() - .withDatabaseName(schemaTableName.getSchemaName()) - .withTableInput(tableInput) - .withVersionId(glueVersionId); - stats.getUpdateTable().call(() -> glueClient.updateTable(updateTableRequest)); - } -} diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/glue/v1/DeltaLakeGlueV1MetastoreTableOperationsProvider.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/glue/v1/DeltaLakeGlueV1MetastoreTableOperationsProvider.java deleted file mode 100644 index 1c81a7f7e04b..000000000000 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/glue/v1/DeltaLakeGlueV1MetastoreTableOperationsProvider.java +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.deltalake.metastore.glue.v1; - -import com.amazonaws.services.glue.AWSGlueAsync; -import com.google.inject.Inject; -import io.trino.plugin.deltalake.metastore.DeltaLakeTableOperations; -import io.trino.plugin.deltalake.metastore.DeltaLakeTableOperationsProvider; -import io.trino.plugin.hive.metastore.glue.GlueMetastoreStats; -import io.trino.spi.connector.ConnectorSession; - -import static java.util.Objects.requireNonNull; - -public class DeltaLakeGlueV1MetastoreTableOperationsProvider - implements DeltaLakeTableOperationsProvider -{ - private final AWSGlueAsync glueClient; - private final GlueMetastoreStats stats; - - @Inject - public DeltaLakeGlueV1MetastoreTableOperationsProvider(AWSGlueAsync glueClient, GlueMetastoreStats stats) - { - this.glueClient = requireNonNull(glueClient, "glueClient is null"); - this.stats = requireNonNull(stats, "stats is null"); - } - - @Override - public DeltaLakeTableOperations createTableOperations(ConnectorSession session) - { - return new DeltaLakeGlueV1MetastoreTableOperations(glueClient, stats); - } -} diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakePlugin.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakePlugin.java index 2ebc98f68e5b..db8bf0cc1918 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakePlugin.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakePlugin.java @@ -82,30 +82,6 @@ public void testThriftMetastore() .hasMessageContaining("Error: Configuration property 'delta.hide-non-delta-lake-tables' was not used"); } - @Test - public void testGlueV1Metastore() - { - ConnectorFactory factory = getConnectorFactory(); - factory.create( - "test", - ImmutableMap.of( - "hive.metastore", "glue-v1", - "hive.metastore.glue.region", "us-east-2", - "bootstrap.quiet", "true"), - new TestingConnectorContext()) - .shutdown(); - - assertThatThrownBy(() -> factory.create( - "test", - ImmutableMap.of( - "hive.metastore", "glue", - "hive.metastore.uri", "thrift://foo:1234", - "bootstrap.quiet", "true"), - new TestingConnectorContext())) - .isInstanceOf(ApplicationConfigurationException.class) - .hasMessageContaining("Error: Configuration property 'hive.metastore.uri' was not used"); - } - @Test public void testGlueMetastore() { diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/metastore/glue/TestDeltaLakeCleanUpGlueMetastore.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/metastore/glue/TestDeltaLakeCleanUpGlueMetastore.java index 00d56dd78887..5938e3570fee 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/metastore/glue/TestDeltaLakeCleanUpGlueMetastore.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/metastore/glue/TestDeltaLakeCleanUpGlueMetastore.java @@ -13,54 +13,43 @@ */ package io.trino.plugin.deltalake.metastore.glue; -import com.amazonaws.services.glue.AWSGlueAsync; -import com.amazonaws.services.glue.AWSGlueAsyncClientBuilder; -import com.amazonaws.services.glue.model.DeleteDatabaseRequest; -import com.amazonaws.services.glue.model.EntityNotFoundException; -import com.amazonaws.services.glue.model.GetDatabasesRequest; -import com.amazonaws.services.glue.model.GetDatabasesResult; import io.airlift.log.Logger; -import io.trino.plugin.hive.metastore.glue.AwsApiCallStats; import org.junit.jupiter.api.Test; +import software.amazon.awssdk.services.glue.GlueClient; +import software.amazon.awssdk.services.glue.model.Database; +import software.amazon.awssdk.services.glue.model.EntityNotFoundException; +import software.amazon.awssdk.services.glue.model.GetDatabasesResponse; +import java.time.Duration; +import java.time.Instant; import java.util.List; -import static com.google.common.collect.ImmutableList.toImmutableList; -import static io.trino.plugin.hive.metastore.glue.v1.AwsSdkUtil.getPaginatedResults; -import static java.lang.System.currentTimeMillis; -import static java.util.concurrent.TimeUnit.DAYS; - public class TestDeltaLakeCleanUpGlueMetastore { private static final Logger log = Logger.get(TestDeltaLakeCleanUpGlueMetastore.class); private static final String TEST_DATABASE_NAME_PREFIX = "test_"; + private static final Duration CLEANUP_THRESHOLD = Duration.ofDays(1); @Test public void cleanupOrphanedDatabases() { - AWSGlueAsync glueClient = AWSGlueAsyncClientBuilder.defaultClient(); - long creationTimeMillisThreshold = currentTimeMillis() - DAYS.toMillis(1); - List orphanedDatabases = getPaginatedResults( - glueClient::getDatabases, - new GetDatabasesRequest(), - GetDatabasesRequest::setNextToken, - GetDatabasesResult::getNextToken, - new AwsApiCallStats()) - .map(GetDatabasesResult::getDatabaseList) + GlueClient glueClient = GlueClient.create(); + Instant creationTimeThreshold = Instant.now().minus(CLEANUP_THRESHOLD); + List orphanedDatabases = glueClient.getDatabasesPaginator(_ -> {}).stream() + .map(GetDatabasesResponse::databaseList) .flatMap(List::stream) - .filter(glueDatabase -> glueDatabase.getName().startsWith(TEST_DATABASE_NAME_PREFIX) && - glueDatabase.getCreateTime().getTime() <= creationTimeMillisThreshold) - .map(com.amazonaws.services.glue.model.Database::getName) - .collect(toImmutableList()); + .filter(database -> database.name().startsWith(TEST_DATABASE_NAME_PREFIX)) + .filter(database -> database.createTime().isBefore(creationTimeThreshold)) + .map(Database::name) + .toList(); if (!orphanedDatabases.isEmpty()) { log.info("Found %s %s* databases that look orphaned, removing", orphanedDatabases.size(), TEST_DATABASE_NAME_PREFIX); orphanedDatabases.forEach(database -> { try { log.info("Deleting %s database", database); - glueClient.deleteDatabase(new DeleteDatabaseRequest() - .withName(database)); + glueClient.deleteDatabase(builder -> builder.name(database)); } catch (EntityNotFoundException e) { log.info("Database [%s] not found, could be removed by other cleanup process", database); diff --git a/plugin/trino-hive/pom.xml b/plugin/trino-hive/pom.xml index a00ad376d66e..5598fe12d48d 100644 --- a/plugin/trino-hive/pom.xml +++ b/plugin/trino-hive/pom.xml @@ -14,21 +14,6 @@ Trino - Hive connector - - com.amazonaws - aws-java-sdk-core - - - - com.amazonaws - aws-java-sdk-glue - - - - com.amazonaws - aws-java-sdk-sts - - com.fasterxml.jackson.core jackson-core @@ -104,11 +89,6 @@ opentelemetry-apache-httpclient-5.2 - - io.opentelemetry.instrumentation - opentelemetry-aws-sdk-1.11 - - io.opentelemetry.instrumentation opentelemetry-aws-sdk-2.2 diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/HiveMetastoreModule.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/HiveMetastoreModule.java index b57d17044b56..f069dec6f132 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/HiveMetastoreModule.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/HiveMetastoreModule.java @@ -48,7 +48,6 @@ protected void setup(Binder binder) case THRIFT -> new ThriftMetastoreModule(); case FILE -> new FileMetastoreModule(); case GLUE -> new GlueMetastoreModule(); - case GLUE_V1 -> new io.trino.plugin.hive.metastore.glue.v1.GlueMetastoreModule(); }); } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/MetastoreTypeConfig.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/MetastoreTypeConfig.java index fb77c387792e..0a2e3ab0df95 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/MetastoreTypeConfig.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/MetastoreTypeConfig.java @@ -25,7 +25,6 @@ public enum MetastoreType THRIFT, FILE, GLUE, - GLUE_V1, } private MetastoreType metastoreType = THRIFT; diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueConverter.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueConverter.java index 03785bde62f2..d9edc04537ae 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueConverter.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueConverter.java @@ -101,7 +101,7 @@ import static io.trino.plugin.hive.util.HiveUtil.isIcebergTable; import static java.util.Objects.requireNonNull; -final class GlueConverter +public final class GlueConverter { static final String PUBLIC_OWNER = "PUBLIC"; private static final Storage FAKE_PARQUET_STORAGE = new Storage( diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueMetastoreStats.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueMetastoreStats.java index a613b6e8f437..e92d993ccbe7 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueMetastoreStats.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueMetastoreStats.java @@ -13,9 +13,6 @@ */ package io.trino.plugin.hive.metastore.glue; -import com.amazonaws.metrics.RequestMetricCollector; -import io.trino.plugin.hive.metastore.glue.v1.AwsSdkClientCoreStats; -import org.weakref.jmx.Flatten; import org.weakref.jmx.Managed; import org.weakref.jmx.Nested; @@ -51,8 +48,6 @@ public class GlueMetastoreStats private final AwsApiCallStats updateUserDefinedFunction = new AwsApiCallStats(); private final AwsApiCallStats deleteUserDefinedFunction = new AwsApiCallStats(); - private final AwsSdkClientCoreStats clientCoreStats = new AwsSdkClientCoreStats(); - @Managed @Nested public AwsApiCallStats getGetDatabases() @@ -255,16 +250,4 @@ public AwsApiCallStats getDeleteUserDefinedFunction() { return deleteUserDefinedFunction; } - - @Managed - @Flatten - public AwsSdkClientCoreStats getClientCoreStats() - { - return clientCoreStats; - } - - public RequestMetricCollector newRequestMetricsCollector() - { - return clientCoreStats.newRequestMetricCollector(); - } } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/AwsCurrentRegionHolder.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/AwsCurrentRegionHolder.java deleted file mode 100644 index ce97fd7d6f7d..000000000000 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/AwsCurrentRegionHolder.java +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.metastore.glue.v1; - -import com.amazonaws.regions.Region; -import com.amazonaws.regions.Regions; -import com.google.common.base.Suppliers; - -import java.util.function.Supplier; - -/** - * Caches the result of calling {@link Regions#getCurrentRegion()} since accessing EC2 instance - * metadata repeatedly can result in being throttled and prevent other metadata accessing operations - * such as refreshing instance credentials from working normally - */ -final class AwsCurrentRegionHolder -{ - private static final Supplier SUPPLIER = Suppliers.memoize(AwsCurrentRegionHolder::loadCurrentRegion); - - private AwsCurrentRegionHolder() {} - - /** - * Attempts to resolve the current region from EC2's instance metadata through {@link Regions#getCurrentRegion()}. - * An exception is thrown if the region cannot be resolved. - */ - public static Region getCurrentRegionFromEc2Metadata() - { - return SUPPLIER.get(); - } - - private static Region loadCurrentRegion() - { - Region result = Regions.getCurrentRegion(); - if (result == null) { - throw new IllegalStateException("Failed to resolve current AWS region from EC2 metadata"); - } - return result; - } -} diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/AwsSdkClientCoreStats.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/AwsSdkClientCoreStats.java deleted file mode 100644 index 95517512bbc2..000000000000 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/AwsSdkClientCoreStats.java +++ /dev/null @@ -1,195 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.metastore.glue.v1; - -import com.amazonaws.Request; -import com.amazonaws.Response; -import com.amazonaws.metrics.RequestMetricCollector; -import com.amazonaws.util.AWSRequestMetrics; -import com.amazonaws.util.TimingInfo; -import com.google.errorprone.annotations.ThreadSafe; -import io.airlift.stats.CounterStat; -import io.airlift.stats.TimeStat; -import org.weakref.jmx.Managed; -import org.weakref.jmx.Nested; - -import java.util.List; -import java.util.concurrent.atomic.AtomicLong; - -import static com.amazonaws.util.AWSRequestMetrics.Field.ClientExecuteTime; -import static com.amazonaws.util.AWSRequestMetrics.Field.HttpClientPoolAvailableCount; -import static com.amazonaws.util.AWSRequestMetrics.Field.HttpClientPoolLeasedCount; -import static com.amazonaws.util.AWSRequestMetrics.Field.HttpClientPoolPendingCount; -import static com.amazonaws.util.AWSRequestMetrics.Field.HttpClientRetryCount; -import static com.amazonaws.util.AWSRequestMetrics.Field.HttpRequestTime; -import static com.amazonaws.util.AWSRequestMetrics.Field.RequestCount; -import static com.amazonaws.util.AWSRequestMetrics.Field.RetryPauseTime; -import static com.amazonaws.util.AWSRequestMetrics.Field.ThrottleException; -import static java.util.Objects.requireNonNull; -import static java.util.concurrent.TimeUnit.MILLISECONDS; - -@ThreadSafe -public final class AwsSdkClientCoreStats -{ - private final CounterStat awsRequestCount = new CounterStat(); - private final CounterStat awsRetryCount = new CounterStat(); - private final CounterStat awsHttpClientRetryCount = new CounterStat(); - private final CounterStat awsThrottleExceptions = new CounterStat(); - private final TimeStat awsRequestTime = new TimeStat(MILLISECONDS); - private final TimeStat awsClientExecuteTime = new TimeStat(MILLISECONDS); - private final TimeStat awsClientRetryPauseTime = new TimeStat(MILLISECONDS); - private final AtomicLong awsHttpClientPoolAvailableCount = new AtomicLong(); - private final AtomicLong awsHttpClientPoolLeasedCount = new AtomicLong(); - private final AtomicLong awsHttpClientPoolPendingCount = new AtomicLong(); - - @Managed - @Nested - public CounterStat getAwsRequestCount() - { - return awsRequestCount; - } - - @Managed - @Nested - public CounterStat getAwsRetryCount() - { - return awsRetryCount; - } - - @Managed - @Nested - public CounterStat getAwsHttpClientRetryCount() - { - return awsHttpClientRetryCount; - } - - @Managed - @Nested - public CounterStat getAwsThrottleExceptions() - { - return awsThrottleExceptions; - } - - @Managed - @Nested - public TimeStat getAwsRequestTime() - { - return awsRequestTime; - } - - @Managed - @Nested - public TimeStat getAwsClientExecuteTime() - { - return awsClientExecuteTime; - } - - @Managed - @Nested - public TimeStat getAwsClientRetryPauseTime() - { - return awsClientRetryPauseTime; - } - - @Managed - public long getAwsHttpClientPoolAvailableCount() - { - return awsHttpClientPoolAvailableCount.get(); - } - - @Managed - public long getAwsHttpClientPoolLeasedCount() - { - return awsHttpClientPoolLeasedCount.get(); - } - - @Managed - public long getAwsHttpClientPoolPendingCount() - { - return awsHttpClientPoolPendingCount.get(); - } - - public AwsSdkClientCoreRequestMetricCollector newRequestMetricCollector() - { - return new AwsSdkClientCoreRequestMetricCollector(this); - } - - public static class AwsSdkClientCoreRequestMetricCollector - extends RequestMetricCollector - { - private final AwsSdkClientCoreStats stats; - - protected AwsSdkClientCoreRequestMetricCollector(AwsSdkClientCoreStats stats) - { - this.stats = requireNonNull(stats, "stats is null"); - } - - @Override - public void collectMetrics(Request request, Response response) - { - TimingInfo timingInfo = request.getAWSRequestMetrics().getTimingInfo(); - - Number requestCounts = timingInfo.getCounter(RequestCount.name()); - if (requestCounts != null) { - long count = requestCounts.longValue(); - stats.awsRequestCount.update(count); - if (count > 1) { - stats.awsRetryCount.update(count - 1); - } - } - - Number httpClientRetryCounts = timingInfo.getCounter(HttpClientRetryCount.name()); - if (httpClientRetryCounts != null) { - stats.awsHttpClientRetryCount.update(httpClientRetryCounts.longValue()); - } - - Number throttleExceptions = timingInfo.getCounter(ThrottleException.name()); - if (throttleExceptions != null) { - stats.awsThrottleExceptions.update(throttleExceptions.longValue()); - } - - Number httpClientPoolAvailableCount = timingInfo.getCounter(HttpClientPoolAvailableCount.name()); - if (httpClientPoolAvailableCount != null) { - stats.awsHttpClientPoolAvailableCount.set(httpClientPoolAvailableCount.longValue()); - } - - Number httpClientPoolLeasedCount = timingInfo.getCounter(HttpClientPoolLeasedCount.name()); - if (httpClientPoolLeasedCount != null) { - stats.awsHttpClientPoolLeasedCount.set(httpClientPoolLeasedCount.longValue()); - } - - Number httpClientPoolPendingCount = timingInfo.getCounter(HttpClientPoolPendingCount.name()); - if (httpClientPoolPendingCount != null) { - stats.awsHttpClientPoolPendingCount.set(httpClientPoolPendingCount.longValue()); - } - - recordSubTimingDurations(timingInfo, HttpRequestTime, stats.awsRequestTime); - recordSubTimingDurations(timingInfo, ClientExecuteTime, stats.awsClientExecuteTime); - recordSubTimingDurations(timingInfo, RetryPauseTime, stats.awsClientRetryPauseTime); - } - - private static void recordSubTimingDurations(TimingInfo timingInfo, AWSRequestMetrics.Field field, TimeStat timeStat) - { - List subTimings = timingInfo.getAllSubMeasurements(field.name()); - if (subTimings != null) { - for (TimingInfo subTiming : subTimings) { - Long endTimeNanos = subTiming.getEndTimeNanoIfKnown(); - if (endTimeNanos != null) { - timeStat.addNanos(endTimeNanos - subTiming.getStartTimeNano()); - } - } - } - } - } -} diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/AwsSdkUtil.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/AwsSdkUtil.java deleted file mode 100644 index 43663809489c..000000000000 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/AwsSdkUtil.java +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.metastore.glue.v1; - -import com.google.common.collect.AbstractIterator; -import io.trino.plugin.hive.metastore.glue.AwsApiCallStats; - -import java.util.Iterator; -import java.util.function.BiConsumer; -import java.util.function.Function; -import java.util.stream.Stream; - -import static com.google.common.collect.Streams.stream; -import static java.util.Objects.requireNonNull; - -public final class AwsSdkUtil -{ - private AwsSdkUtil() {} - - /** - * Helper method to get all results from a paginated API. - * - * @param request request object reused for subsequent requests with - * {@code setNextToken} being used to set the next token in the request object - */ - public static Stream getPaginatedResults( - Function submission, - Request request, - BiConsumer setNextToken, - Function extractNextToken, - AwsApiCallStats stats) - { - requireNonNull(submission, "submission is null"); - requireNonNull(request, "request is null"); - requireNonNull(setNextToken, "setNextToken is null"); - requireNonNull(extractNextToken, "extractNextToken is null"); - - Iterator iterator = new AbstractIterator<>() - { - private String nextToken; - private boolean firstRequest = true; - - @Override - protected Result computeNext() - { - if (nextToken == null && !firstRequest) { - return endOfData(); - } - - setNextToken.accept(request, nextToken); - Result result = stats.call(() -> submission.apply(request)); - firstRequest = false; - nextToken = extractNextToken.apply(result); - return result; - } - }; - - return stream(iterator); - } -} diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/DefaultGlueColumnStatisticsProvider.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/DefaultGlueColumnStatisticsProvider.java deleted file mode 100644 index 7fe6b886bf62..000000000000 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/DefaultGlueColumnStatisticsProvider.java +++ /dev/null @@ -1,290 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.metastore.glue.v1; - -import com.amazonaws.services.glue.AWSGlueAsync; -import com.amazonaws.services.glue.model.ColumnStatistics; -import com.amazonaws.services.glue.model.ColumnStatisticsData; -import com.amazonaws.services.glue.model.ColumnStatisticsType; -import com.amazonaws.services.glue.model.DateColumnStatisticsData; -import com.amazonaws.services.glue.model.DecimalColumnStatisticsData; -import com.amazonaws.services.glue.model.DeleteColumnStatisticsForPartitionRequest; -import com.amazonaws.services.glue.model.DeleteColumnStatisticsForTableRequest; -import com.amazonaws.services.glue.model.DoubleColumnStatisticsData; -import com.amazonaws.services.glue.model.EntityNotFoundException; -import com.amazonaws.services.glue.model.GetColumnStatisticsForPartitionRequest; -import com.amazonaws.services.glue.model.GetColumnStatisticsForPartitionResult; -import com.amazonaws.services.glue.model.GetColumnStatisticsForTableRequest; -import com.amazonaws.services.glue.model.GetColumnStatisticsForTableResult; -import com.amazonaws.services.glue.model.LongColumnStatisticsData; -import com.amazonaws.services.glue.model.UpdateColumnStatisticsForPartitionRequest; -import com.amazonaws.services.glue.model.UpdateColumnStatisticsForTableRequest; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; -import com.google.common.collect.Lists; -import io.trino.metastore.Column; -import io.trino.metastore.HiveColumnStatistics; -import io.trino.metastore.Partition; -import io.trino.metastore.Table; -import io.trino.plugin.hive.metastore.glue.GlueMetastoreStats; -import io.trino.spi.TrinoException; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.Executor; - -import static com.google.common.collect.ImmutableList.toImmutableList; -import static com.google.common.collect.ImmutableSet.toImmutableSet; -import static com.google.common.collect.Sets.difference; -import static io.airlift.concurrent.MoreFutures.getFutureValue; -import static io.trino.metastore.Partitions.toPartitionValues; -import static io.trino.plugin.hive.HiveErrorCode.HIVE_METASTORE_ERROR; -import static io.trino.plugin.hive.HiveErrorCode.HIVE_PARTITION_NOT_FOUND; -import static io.trino.plugin.hive.metastore.glue.v1.GlueStatConverter.fromGlueColumnStatistics; -import static io.trino.plugin.hive.metastore.glue.v1.GlueStatConverter.toGlueColumnStatistics; -import static java.util.concurrent.CompletableFuture.allOf; -import static java.util.concurrent.CompletableFuture.runAsync; -import static java.util.concurrent.CompletableFuture.supplyAsync; -import static java.util.stream.Collectors.toUnmodifiableList; - -public class DefaultGlueColumnStatisticsProvider - implements GlueColumnStatisticsProvider -{ - // Read limit for AWS Glue API GetColumnStatisticsForPartition - // https://docs.aws.amazon.com/glue/latest/dg/aws-glue-api-catalog-partitions.html#aws-glue-api-catalog-partitions-GetColumnStatisticsForPartition - private static final int GLUE_COLUMN_READ_STAT_PAGE_SIZE = 100; - - // Write limit for AWS Glue API UpdateColumnStatisticsForPartition - // https://docs.aws.amazon.com/glue/latest/dg/aws-glue-api-catalog-partitions.html#aws-glue-api-catalog-partitions-UpdateColumnStatisticsForPartition - private static final int GLUE_COLUMN_WRITE_STAT_PAGE_SIZE = 25; - - private final GlueMetastoreStats stats; - private final AWSGlueAsync glueClient; - private final Executor readExecutor; - private final Executor writeExecutor; - - public DefaultGlueColumnStatisticsProvider(AWSGlueAsync glueClient, Executor readExecutor, Executor writeExecutor, GlueMetastoreStats stats) - { - this.glueClient = glueClient; - this.readExecutor = readExecutor; - this.writeExecutor = writeExecutor; - this.stats = stats; - } - - @Override - public Map getTableColumnStatistics(String databaseName, String tableName, Set columnNames) - { - try { - List> columnChunks = Lists.partition(ImmutableList.copyOf(columnNames), GLUE_COLUMN_READ_STAT_PAGE_SIZE); - List> getStatsFutures = columnChunks.stream() - .map(partialColumns -> supplyAsync(() -> { - GetColumnStatisticsForTableRequest request = new GetColumnStatisticsForTableRequest() - .withDatabaseName(databaseName) - .withTableName(tableName) - .withColumnNames(partialColumns); - return stats.getGetColumnStatisticsForTable().call(() -> glueClient.getColumnStatisticsForTable(request)); - }, readExecutor)).collect(toImmutableList()); - - ImmutableMap.Builder columnStatsMapBuilder = ImmutableMap.builder(); - for (CompletableFuture future : getStatsFutures) { - GetColumnStatisticsForTableResult tableColumnsStats = getFutureValue(future, TrinoException.class); - for (ColumnStatistics columnStatistics : tableColumnsStats.getColumnStatisticsList()) { - columnStatsMapBuilder.put( - columnStatistics.getColumnName(), - fromGlueColumnStatistics(columnStatistics.getStatisticsData())); - } - } - return columnStatsMapBuilder.buildOrThrow(); - } - catch (RuntimeException ex) { - throw new TrinoException(HIVE_METASTORE_ERROR, ex); - } - } - - @Override - public Map> getPartitionColumnStatistics( - String databaseName, - String tableName, - Set partitionNames, - Set columnNames) - { - Map>> resultsForPartition = new HashMap<>(); - for (String partitionName : partitionNames) { - ImmutableList.Builder> futures = ImmutableList.builder(); - for (List columnBatch : Lists.partition(ImmutableList.copyOf(columnNames), GLUE_COLUMN_READ_STAT_PAGE_SIZE)) { - GetColumnStatisticsForPartitionRequest request = new GetColumnStatisticsForPartitionRequest() - .withDatabaseName(databaseName) - .withTableName(tableName) - .withColumnNames(columnBatch) - .withPartitionValues(toPartitionValues(partitionName)); - futures.add(supplyAsync(() -> stats.getGetColumnStatisticsForPartition().call(() -> glueClient.getColumnStatisticsForPartition(request)), readExecutor)); - } - resultsForPartition.put(partitionName, futures.build()); - } - - try { - ImmutableMap.Builder> partitionStatistics = ImmutableMap.builder(); - resultsForPartition.forEach((partitionName, futures) -> { - ImmutableMap.Builder columnStatsMapBuilder = ImmutableMap.builder(); - for (CompletableFuture getColumnStatisticsResultFuture : futures) { - GetColumnStatisticsForPartitionResult getColumnStatisticsResult = getFutureValue(getColumnStatisticsResultFuture); - getColumnStatisticsResult.getColumnStatisticsList().forEach(columnStatistics -> - columnStatsMapBuilder.put( - columnStatistics.getColumnName(), - fromGlueColumnStatistics(columnStatistics.getStatisticsData()))); - } - - partitionStatistics.put(partitionName, columnStatsMapBuilder.buildOrThrow()); - }); - - return partitionStatistics.buildOrThrow(); - } - catch (RuntimeException ex) { - if (ex.getCause() != null && ex.getCause() instanceof EntityNotFoundException) { - throw new TrinoException(HIVE_PARTITION_NOT_FOUND, ex.getCause()); - } - throw new TrinoException(HIVE_METASTORE_ERROR, ex); - } - } - - // Glue will accept null as min/max values but return 0 when reading - // to avoid incorrect stats we skip writes for column statistics that have min/max null - // this can be removed once glue fix this behaviour - private boolean isGlueWritable(ColumnStatistics stats) - { - ColumnStatisticsData statisticsData = stats.getStatisticsData(); - String columnType = stats.getStatisticsData().getType(); - if (columnType.equals(ColumnStatisticsType.DATE.toString())) { - DateColumnStatisticsData data = statisticsData.getDateColumnStatisticsData(); - return data.getMaximumValue() != null && data.getMinimumValue() != null; - } - if (columnType.equals(ColumnStatisticsType.DECIMAL.toString())) { - DecimalColumnStatisticsData data = statisticsData.getDecimalColumnStatisticsData(); - return data.getMaximumValue() != null && data.getMinimumValue() != null; - } - if (columnType.equals(ColumnStatisticsType.DOUBLE.toString())) { - DoubleColumnStatisticsData data = statisticsData.getDoubleColumnStatisticsData(); - return data.getMaximumValue() != null && data.getMinimumValue() != null; - } - if (columnType.equals(ColumnStatisticsType.LONG.toString())) { - LongColumnStatisticsData data = statisticsData.getLongColumnStatisticsData(); - return data.getMaximumValue() != null && data.getMinimumValue() != null; - } - return true; - } - - @Override - public void updateTableColumnStatistics(Table table, Map updatedTableColumnStatistics) - { - try { - List columnStats = toGlueColumnStatistics(table, updatedTableColumnStatistics).stream() - .filter(this::isGlueWritable) - .collect(toUnmodifiableList()); - - List> columnChunks = Lists.partition(columnStats, GLUE_COLUMN_WRITE_STAT_PAGE_SIZE); - - List> updateFutures = columnChunks.stream().map(columnChunk -> runAsync( - () -> stats.getUpdateColumnStatisticsForTable().call(() -> glueClient.updateColumnStatisticsForTable( - new UpdateColumnStatisticsForTableRequest() - .withDatabaseName(table.getDatabaseName()) - .withTableName(table.getTableName()) - .withColumnStatisticsList(columnChunk))), this.writeExecutor)) - .collect(toUnmodifiableList()); - - Set removedStatistics = difference(ImmutableSet.copyOf(getAllColumns(table)), updatedTableColumnStatistics.keySet()); - List> deleteFutures = removedStatistics.stream() - .map(column -> runAsync(() -> stats.getDeleteColumnStatisticsForTable().call(() -> { - try { - glueClient.deleteColumnStatisticsForTable( - new DeleteColumnStatisticsForTableRequest() - .withDatabaseName(table.getDatabaseName()) - .withTableName(table.getTableName()) - .withColumnName(column)); - } - catch (EntityNotFoundException _) { - } - return null; - }), this.writeExecutor)) - .collect(toUnmodifiableList()); - - ImmutableList> updateOperationsFutures = ImmutableList.>builder() - .addAll(updateFutures) - .addAll(deleteFutures) - .build(); - - getFutureValue(allOf(updateOperationsFutures.toArray(CompletableFuture[]::new))); - } - catch (RuntimeException ex) { - throw new TrinoException(HIVE_METASTORE_ERROR, ex); - } - } - - @Override - public void updatePartitionStatistics(Set partitionStatisticsUpdates) - { - List> updateFutures = new ArrayList<>(); - for (PartitionStatisticsUpdate update : partitionStatisticsUpdates) { - Partition partition = update.getPartition(); - Map updatedColumnStatistics = update.getColumnStatistics(); - - List columnStats = toGlueColumnStatistics(partition, updatedColumnStatistics).stream() - .filter(this::isGlueWritable) - .collect(toUnmodifiableList()); - - List> columnChunks = Lists.partition(columnStats, GLUE_COLUMN_WRITE_STAT_PAGE_SIZE); - columnChunks.forEach(columnChunk -> - updateFutures.add(runAsync(() -> stats.getUpdateColumnStatisticsForPartition().call(() -> - glueClient.updateColumnStatisticsForPartition( - new UpdateColumnStatisticsForPartitionRequest() - .withDatabaseName(partition.getDatabaseName()) - .withTableName(partition.getTableName()) - .withPartitionValues(partition.getValues()) - .withColumnStatisticsList(columnChunk))), - writeExecutor))); - - Set removedStatistics = difference(partition.getColumns().stream().map(Column::getName).collect(toImmutableSet()), updatedColumnStatistics.keySet()); - removedStatistics.forEach(column -> - updateFutures.add(runAsync(() -> stats.getDeleteColumnStatisticsForPartition().call(() -> - glueClient.deleteColumnStatisticsForPartition( - new DeleteColumnStatisticsForPartitionRequest() - .withDatabaseName(partition.getDatabaseName()) - .withTableName(partition.getTableName()) - .withPartitionValues(partition.getValues()) - .withColumnName(column))), - writeExecutor))); - } - try { - getFutureValue(allOf(updateFutures.toArray(CompletableFuture[]::new))); - } - catch (RuntimeException ex) { - if (ex.getCause() != null && ex.getCause() instanceof EntityNotFoundException) { - throw new TrinoException(HIVE_PARTITION_NOT_FOUND, ex.getCause()); - } - throw new TrinoException(HIVE_METASTORE_ERROR, ex); - } - } - - private Set getAllColumns(Table table) - { - ImmutableSet.Builder allColumns = ImmutableSet.builderWithExpectedSize(table.getDataColumns().size() + table.getPartitionColumns().size()); - table.getDataColumns().stream().map(Column::getName).forEach(allColumns::add); - table.getPartitionColumns().stream().map(Column::getName).forEach(allColumns::add); - return allColumns.build(); - } -} diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/DefaultGlueColumnStatisticsProviderFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/DefaultGlueColumnStatisticsProviderFactory.java deleted file mode 100644 index 3d26fb8e9e8a..000000000000 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/DefaultGlueColumnStatisticsProviderFactory.java +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.metastore.glue.v1; - -import com.amazonaws.services.glue.AWSGlueAsync; -import com.google.inject.Inject; -import io.trino.plugin.hive.metastore.glue.GlueMetastoreStats; - -import java.util.concurrent.Executor; - -import static java.util.Objects.requireNonNull; - -public class DefaultGlueColumnStatisticsProviderFactory - implements GlueColumnStatisticsProviderFactory -{ - private final Executor statisticsReadExecutor; - private final Executor statisticsWriteExecutor; - - @Inject - public DefaultGlueColumnStatisticsProviderFactory( - @ForGlueColumnStatisticsRead Executor statisticsReadExecutor, - @ForGlueColumnStatisticsWrite Executor statisticsWriteExecutor) - { - this.statisticsReadExecutor = requireNonNull(statisticsReadExecutor, "statisticsReadExecutor is null"); - this.statisticsWriteExecutor = requireNonNull(statisticsWriteExecutor, "statisticsWriteExecutor is null"); - } - - @Override - public GlueColumnStatisticsProvider createGlueColumnStatisticsProvider(AWSGlueAsync glueClient, GlueMetastoreStats stats) - { - return new DefaultGlueColumnStatisticsProvider( - glueClient, - statisticsReadExecutor, - statisticsWriteExecutor, - stats); - } -} diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/DefaultGlueMetastoreTableFilterProvider.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/DefaultGlueMetastoreTableFilterProvider.java deleted file mode 100644 index 54f81a1eb184..000000000000 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/DefaultGlueMetastoreTableFilterProvider.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.metastore.glue.v1; - -import com.amazonaws.services.glue.model.Table; -import com.google.inject.Inject; -import com.google.inject.Provider; -import io.trino.plugin.hive.HideDeltaLakeTables; - -import java.util.function.Predicate; - -import static io.trino.plugin.hive.metastore.glue.v1.GlueToTrinoConverter.getTableParameters; -import static io.trino.plugin.hive.util.HiveUtil.isDeltaLakeTable; -import static java.util.function.Predicate.not; - -public class DefaultGlueMetastoreTableFilterProvider - implements Provider> -{ - private final boolean hideDeltaLakeTables; - - @Inject - public DefaultGlueMetastoreTableFilterProvider(@HideDeltaLakeTables boolean hideDeltaLakeTables) - { - this.hideDeltaLakeTables = hideDeltaLakeTables; - } - - @Override - public Predicate
get() - { - if (hideDeltaLakeTables) { - return not(table -> isDeltaLakeTable(getTableParameters(table))); - } - return table -> true; - } -} diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/ForGlueColumnStatisticsRead.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/ForGlueColumnStatisticsRead.java deleted file mode 100644 index d8c9b51c53df..000000000000 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/ForGlueColumnStatisticsRead.java +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.metastore.glue.v1; - -import com.google.inject.BindingAnnotation; - -import java.lang.annotation.Retention; -import java.lang.annotation.Target; - -import static java.lang.annotation.ElementType.FIELD; -import static java.lang.annotation.ElementType.METHOD; -import static java.lang.annotation.ElementType.PARAMETER; -import static java.lang.annotation.RetentionPolicy.RUNTIME; - -@Retention(RUNTIME) -@Target({FIELD, PARAMETER, METHOD}) -@BindingAnnotation -public @interface ForGlueColumnStatisticsRead {} diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/ForGlueColumnStatisticsWrite.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/ForGlueColumnStatisticsWrite.java deleted file mode 100644 index 240e313b4177..000000000000 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/ForGlueColumnStatisticsWrite.java +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.metastore.glue.v1; - -import com.google.inject.BindingAnnotation; - -import java.lang.annotation.Retention; -import java.lang.annotation.Target; - -import static java.lang.annotation.ElementType.FIELD; -import static java.lang.annotation.ElementType.METHOD; -import static java.lang.annotation.ElementType.PARAMETER; -import static java.lang.annotation.RetentionPolicy.RUNTIME; - -@Retention(RUNTIME) -@Target({FIELD, PARAMETER, METHOD}) -@BindingAnnotation -public @interface ForGlueColumnStatisticsWrite {} diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/ForGlueHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/ForGlueHiveMetastore.java deleted file mode 100644 index 5cac1223d9ad..000000000000 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/ForGlueHiveMetastore.java +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.metastore.glue.v1; - -import com.google.inject.BindingAnnotation; - -import java.lang.annotation.Retention; -import java.lang.annotation.Target; - -import static java.lang.annotation.ElementType.FIELD; -import static java.lang.annotation.ElementType.METHOD; -import static java.lang.annotation.ElementType.PARAMETER; -import static java.lang.annotation.RetentionPolicy.RUNTIME; - -@Retention(RUNTIME) -@Target({FIELD, PARAMETER, METHOD}) -@BindingAnnotation -public @interface ForGlueHiveMetastore {} diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/GlueCatalogIdRequestHandler.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/GlueCatalogIdRequestHandler.java deleted file mode 100644 index 55df75c3a52b..000000000000 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/GlueCatalogIdRequestHandler.java +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.metastore.glue.v1; - -import com.amazonaws.AmazonWebServiceRequest; -import com.amazonaws.handlers.RequestHandler2; -import com.amazonaws.services.glue.model.BatchCreatePartitionRequest; -import com.amazonaws.services.glue.model.BatchGetPartitionRequest; -import com.amazonaws.services.glue.model.BatchUpdatePartitionRequest; -import com.amazonaws.services.glue.model.CreateDatabaseRequest; -import com.amazonaws.services.glue.model.CreateTableRequest; -import com.amazonaws.services.glue.model.CreateUserDefinedFunctionRequest; -import com.amazonaws.services.glue.model.DeleteColumnStatisticsForPartitionRequest; -import com.amazonaws.services.glue.model.DeleteColumnStatisticsForTableRequest; -import com.amazonaws.services.glue.model.DeleteDatabaseRequest; -import com.amazonaws.services.glue.model.DeletePartitionRequest; -import com.amazonaws.services.glue.model.DeleteTableRequest; -import com.amazonaws.services.glue.model.DeleteUserDefinedFunctionRequest; -import com.amazonaws.services.glue.model.GetColumnStatisticsForPartitionRequest; -import com.amazonaws.services.glue.model.GetColumnStatisticsForTableRequest; -import com.amazonaws.services.glue.model.GetDatabaseRequest; -import com.amazonaws.services.glue.model.GetDatabasesRequest; -import com.amazonaws.services.glue.model.GetPartitionRequest; -import com.amazonaws.services.glue.model.GetPartitionsRequest; -import com.amazonaws.services.glue.model.GetTableRequest; -import com.amazonaws.services.glue.model.GetTablesRequest; -import com.amazonaws.services.glue.model.GetUserDefinedFunctionRequest; -import com.amazonaws.services.glue.model.GetUserDefinedFunctionsRequest; -import com.amazonaws.services.glue.model.UpdateColumnStatisticsForPartitionRequest; -import com.amazonaws.services.glue.model.UpdateColumnStatisticsForTableRequest; -import com.amazonaws.services.glue.model.UpdateDatabaseRequest; -import com.amazonaws.services.glue.model.UpdatePartitionRequest; -import com.amazonaws.services.glue.model.UpdateTableRequest; -import com.amazonaws.services.glue.model.UpdateUserDefinedFunctionRequest; - -import static java.util.Objects.requireNonNull; - -public class GlueCatalogIdRequestHandler - extends RequestHandler2 -{ - private final String catalogId; - - public GlueCatalogIdRequestHandler(String catalogId) - { - this.catalogId = requireNonNull(catalogId, "catalogId is null"); - } - - @Override - public AmazonWebServiceRequest beforeExecution(AmazonWebServiceRequest serviceRequest) - { - return switch (serviceRequest) { - case GetDatabasesRequest request -> request.withCatalogId(catalogId); - case GetDatabaseRequest request -> request.withCatalogId(catalogId); - case CreateDatabaseRequest request -> request.withCatalogId(catalogId); - case UpdateDatabaseRequest request -> request.withCatalogId(catalogId); - case DeleteDatabaseRequest request -> request.withCatalogId(catalogId); - case GetTablesRequest request -> request.withCatalogId(catalogId); - case GetTableRequest request -> request.withCatalogId(catalogId); - case CreateTableRequest request -> request.withCatalogId(catalogId); - case UpdateTableRequest request -> request.withCatalogId(catalogId); - case DeleteTableRequest request -> request.withCatalogId(catalogId); - case GetPartitionsRequest request -> request.withCatalogId(catalogId); - case GetPartitionRequest request -> request.withCatalogId(catalogId); - case UpdatePartitionRequest request -> request.withCatalogId(catalogId); - case DeletePartitionRequest request -> request.withCatalogId(catalogId); - case BatchGetPartitionRequest request -> request.withCatalogId(catalogId); - case BatchCreatePartitionRequest request -> request.withCatalogId(catalogId); - case BatchUpdatePartitionRequest request -> request.withCatalogId(catalogId); - case GetColumnStatisticsForTableRequest request -> request.withCatalogId(catalogId); - case UpdateColumnStatisticsForTableRequest request -> request.withCatalogId(catalogId); - case DeleteColumnStatisticsForTableRequest request -> request.withCatalogId(catalogId); - case GetColumnStatisticsForPartitionRequest request -> request.withCatalogId(catalogId); - case UpdateColumnStatisticsForPartitionRequest request -> request.withCatalogId(catalogId); - case DeleteColumnStatisticsForPartitionRequest request -> request.withCatalogId(catalogId); - case GetUserDefinedFunctionsRequest request -> request.withCatalogId(catalogId); - case GetUserDefinedFunctionRequest request -> request.withCatalogId(catalogId); - case CreateUserDefinedFunctionRequest request -> request.withCatalogId(catalogId); - case UpdateUserDefinedFunctionRequest request -> request.withCatalogId(catalogId); - case DeleteUserDefinedFunctionRequest request -> request.withCatalogId(catalogId); - default -> throw new IllegalArgumentException("Unsupported request: " + serviceRequest); - }; - } -} diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/GlueClientUtil.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/GlueClientUtil.java deleted file mode 100644 index 1172ea9ea4f7..000000000000 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/GlueClientUtil.java +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.metastore.glue.v1; - -import com.amazonaws.ClientConfiguration; -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.client.builder.AwsClientBuilder; -import com.amazonaws.handlers.RequestHandler2; -import com.amazonaws.metrics.RequestMetricCollector; -import com.amazonaws.retry.PredefinedBackoffStrategies.ExponentialBackoffStrategy; -import com.amazonaws.retry.PredefinedRetryPolicies; -import com.amazonaws.retry.RetryPolicy; -import com.amazonaws.retry.RetryPolicy.BackoffStrategy; -import com.amazonaws.retry.RetryPolicy.RetryCondition; -import com.amazonaws.services.glue.AWSGlueAsync; -import com.amazonaws.services.glue.AWSGlueAsyncClientBuilder; -import com.amazonaws.services.glue.model.ConcurrentModificationException; - -import java.util.Set; - -import static com.google.common.base.Preconditions.checkArgument; -import static io.trino.plugin.hive.metastore.glue.v1.AwsCurrentRegionHolder.getCurrentRegionFromEc2Metadata; - -public final class GlueClientUtil -{ - private GlueClientUtil() {} - - public static AWSGlueAsync createAsyncGlueClient( - GlueHiveMetastoreConfig config, - AWSCredentialsProvider credentialsProvider, - Set requestHandlers, - RequestMetricCollector metricsCollector) - { - RetryPolicy defaultRetryPolicy = PredefinedRetryPolicies.getDefaultRetryPolicy(); - - RetryCondition customRetryCondition = (requestContext, exception, retriesAttempted) -> - defaultRetryPolicy.getRetryCondition().shouldRetry(requestContext, exception, retriesAttempted) - || exception instanceof ConcurrentModificationException; - BackoffStrategy customBackoffStrategy = new ExponentialBackoffStrategy(20, 1500); - - RetryPolicy glueRetryPolicy = RetryPolicy.builder() - .withRetryMode(defaultRetryPolicy.getRetryMode()) - .withRetryCondition(customRetryCondition) - .withBackoffStrategy(customBackoffStrategy) - .withFastFailRateLimiting(defaultRetryPolicy.isFastFailRateLimiting()) - .withMaxErrorRetry(config.getMaxGlueErrorRetries()) - .build(); - - ClientConfiguration clientConfig = new ClientConfiguration() - .withMaxConnections(config.getMaxGlueConnections()) - .withRetryPolicy(glueRetryPolicy); - AWSGlueAsyncClientBuilder asyncGlueClientBuilder = AWSGlueAsyncClientBuilder.standard() - .withMetricsCollector(metricsCollector) - .withClientConfiguration(clientConfig); - - asyncGlueClientBuilder.setRequestHandlers(requestHandlers.toArray(RequestHandler2[]::new)); - - if (config.getGlueEndpointUrl().isPresent()) { - checkArgument(config.getGlueRegion().isPresent(), "Glue region must be set when Glue endpoint URL is set"); - asyncGlueClientBuilder.setEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration( - config.getGlueEndpointUrl().get(), - config.getGlueRegion().get())); - } - else if (config.getGlueRegion().isPresent()) { - asyncGlueClientBuilder.setRegion(config.getGlueRegion().get()); - } - else if (config.getPinGlueClientToCurrentRegion()) { - asyncGlueClientBuilder.setRegion(getCurrentRegionFromEc2Metadata().getName()); - } - - asyncGlueClientBuilder.setCredentials(credentialsProvider); - - return asyncGlueClientBuilder.build(); - } -} diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/GlueColumnStatisticsProvider.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/GlueColumnStatisticsProvider.java deleted file mode 100644 index d328b84c3bf5..000000000000 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/GlueColumnStatisticsProvider.java +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.metastore.glue.v1; - -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; -import io.trino.metastore.HiveColumnStatistics; -import io.trino.metastore.Partition; -import io.trino.metastore.Table; - -import java.util.Map; -import java.util.Set; - -import static java.util.Objects.requireNonNull; - -public interface GlueColumnStatisticsProvider -{ - Map getTableColumnStatistics(String databaseName, String tableName, Set columnNames); - - Map> getPartitionColumnStatistics( - String databaseName, - String tableName, - Set partitionNames, - Set columns); - - void updateTableColumnStatistics(Table table, Map columnStatistics); - - default void updatePartitionStatistics(Partition partition, Map columnStatistics) - { - updatePartitionStatistics(ImmutableSet.of(new PartitionStatisticsUpdate(partition, columnStatistics))); - } - - void updatePartitionStatistics(Set partitionStatisticsUpdates); - - class PartitionStatisticsUpdate - { - private final Partition partition; - private final Map columnStatistics; - - public PartitionStatisticsUpdate(Partition partition, Map columnStatistics) - { - this.partition = requireNonNull(partition, "partition is null"); - this.columnStatistics = ImmutableMap.copyOf(requireNonNull(columnStatistics, "columnStatistics is null")); - } - - public Partition getPartition() - { - return partition; - } - - public Map getColumnStatistics() - { - return columnStatistics; - } - } -} diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/GlueColumnStatisticsProviderFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/GlueColumnStatisticsProviderFactory.java deleted file mode 100644 index 3a8cceda0266..000000000000 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/GlueColumnStatisticsProviderFactory.java +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.metastore.glue.v1; - -import com.amazonaws.services.glue.AWSGlueAsync; -import io.trino.plugin.hive.metastore.glue.GlueMetastoreStats; - -public interface GlueColumnStatisticsProviderFactory -{ - GlueColumnStatisticsProvider createGlueColumnStatisticsProvider(AWSGlueAsync glueClient, GlueMetastoreStats stats); -} diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/GlueCredentialsProvider.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/GlueCredentialsProvider.java deleted file mode 100644 index 715030d44a1f..000000000000 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/GlueCredentialsProvider.java +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.metastore.glue.v1; - -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.auth.AWSStaticCredentialsProvider; -import com.amazonaws.auth.BasicAWSCredentials; -import com.amazonaws.auth.DefaultAWSCredentialsProviderChain; -import com.amazonaws.auth.STSAssumeRoleSessionCredentialsProvider; -import com.amazonaws.client.builder.AwsClientBuilder; -import com.amazonaws.services.securitytoken.AWSSecurityTokenServiceClientBuilder; -import com.google.inject.Inject; -import com.google.inject.Provider; - -import static io.trino.plugin.hive.metastore.glue.v1.AwsCurrentRegionHolder.getCurrentRegionFromEc2Metadata; -import static java.lang.String.format; - -public class GlueCredentialsProvider - implements Provider -{ - private final AWSCredentialsProvider credentialsProvider; - - @Inject - public GlueCredentialsProvider(GlueHiveMetastoreConfig config) - { - if (config.getAwsCredentialsProvider().isPresent()) { - this.credentialsProvider = getCustomAWSCredentialsProvider(config.getAwsCredentialsProvider().get()); - } - else { - AWSCredentialsProvider provider; - if (config.getAwsAccessKey().isPresent() && config.getAwsSecretKey().isPresent()) { - provider = new AWSStaticCredentialsProvider( - new BasicAWSCredentials(config.getAwsAccessKey().get(), config.getAwsSecretKey().get())); - } - else { - provider = DefaultAWSCredentialsProviderChain.getInstance(); - } - if (config.getIamRole().isPresent()) { - AWSSecurityTokenServiceClientBuilder stsClientBuilder = AWSSecurityTokenServiceClientBuilder - .standard() - .withCredentials(provider); - - if (config.getGlueStsEndpointUrl().isPresent() && config.getGlueStsRegion().isPresent()) { - stsClientBuilder.setEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(config.getGlueStsEndpointUrl().get(), config.getGlueStsRegion().get())); - } - else if (config.getGlueStsRegion().isPresent()) { - stsClientBuilder.setRegion(config.getGlueStsRegion().get()); - } - else if (config.getPinGlueClientToCurrentRegion()) { - stsClientBuilder.setRegion(getCurrentRegionFromEc2Metadata().getName()); - } - - provider = new STSAssumeRoleSessionCredentialsProvider - .Builder(config.getIamRole().get(), "trino-session") - .withExternalId(config.getExternalId().orElse(null)) - .withStsClient(stsClientBuilder.build()) - .build(); - } - this.credentialsProvider = provider; - } - } - - @Override - public AWSCredentialsProvider get() - { - return credentialsProvider; - } - - private static AWSCredentialsProvider getCustomAWSCredentialsProvider(String providerClass) - { - try { - Object instance = Class.forName(providerClass).getConstructor().newInstance(); - if (!(instance instanceof AWSCredentialsProvider)) { - throw new RuntimeException("Invalid credentials provider class: " + instance.getClass().getName()); - } - return (AWSCredentialsProvider) instance; - } - catch (ReflectiveOperationException e) { - throw new RuntimeException(format("Error creating an instance of %s", providerClass), e); - } - } -} diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/GlueHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/GlueHiveMetastore.java deleted file mode 100644 index 8e24cdfcf873..000000000000 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/GlueHiveMetastore.java +++ /dev/null @@ -1,1321 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.metastore.glue.v1; - -import com.amazonaws.AmazonServiceException; -import com.amazonaws.AmazonWebServiceRequest; -import com.amazonaws.handlers.AsyncHandler; -import com.amazonaws.services.glue.AWSGlueAsync; -import com.amazonaws.services.glue.model.AccessDeniedException; -import com.amazonaws.services.glue.model.AlreadyExistsException; -import com.amazonaws.services.glue.model.BatchCreatePartitionRequest; -import com.amazonaws.services.glue.model.BatchCreatePartitionResult; -import com.amazonaws.services.glue.model.BatchGetPartitionRequest; -import com.amazonaws.services.glue.model.BatchGetPartitionResult; -import com.amazonaws.services.glue.model.BatchUpdatePartitionRequest; -import com.amazonaws.services.glue.model.BatchUpdatePartitionRequestEntry; -import com.amazonaws.services.glue.model.BatchUpdatePartitionResult; -import com.amazonaws.services.glue.model.CreateDatabaseRequest; -import com.amazonaws.services.glue.model.CreateTableRequest; -import com.amazonaws.services.glue.model.CreateUserDefinedFunctionRequest; -import com.amazonaws.services.glue.model.DatabaseInput; -import com.amazonaws.services.glue.model.DeleteDatabaseRequest; -import com.amazonaws.services.glue.model.DeletePartitionRequest; -import com.amazonaws.services.glue.model.DeleteTableRequest; -import com.amazonaws.services.glue.model.DeleteUserDefinedFunctionRequest; -import com.amazonaws.services.glue.model.EntityNotFoundException; -import com.amazonaws.services.glue.model.ErrorDetail; -import com.amazonaws.services.glue.model.GetDatabaseRequest; -import com.amazonaws.services.glue.model.GetDatabaseResult; -import com.amazonaws.services.glue.model.GetDatabasesRequest; -import com.amazonaws.services.glue.model.GetDatabasesResult; -import com.amazonaws.services.glue.model.GetPartitionRequest; -import com.amazonaws.services.glue.model.GetPartitionResult; -import com.amazonaws.services.glue.model.GetPartitionsRequest; -import com.amazonaws.services.glue.model.GetPartitionsResult; -import com.amazonaws.services.glue.model.GetTableRequest; -import com.amazonaws.services.glue.model.GetTableResult; -import com.amazonaws.services.glue.model.GetTablesRequest; -import com.amazonaws.services.glue.model.GetTablesResult; -import com.amazonaws.services.glue.model.GetUserDefinedFunctionRequest; -import com.amazonaws.services.glue.model.GetUserDefinedFunctionsRequest; -import com.amazonaws.services.glue.model.GetUserDefinedFunctionsResult; -import com.amazonaws.services.glue.model.PartitionError; -import com.amazonaws.services.glue.model.PartitionInput; -import com.amazonaws.services.glue.model.PartitionValueList; -import com.amazonaws.services.glue.model.Segment; -import com.amazonaws.services.glue.model.TableInput; -import com.amazonaws.services.glue.model.UpdateDatabaseRequest; -import com.amazonaws.services.glue.model.UpdatePartitionRequest; -import com.amazonaws.services.glue.model.UpdateTableRequest; -import com.amazonaws.services.glue.model.UpdateUserDefinedFunctionRequest; -import com.amazonaws.services.glue.model.UserDefinedFunctionInput; -import com.google.common.base.Stopwatch; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; -import com.google.common.collect.Iterables; -import com.google.common.collect.Lists; -import com.google.inject.Inject; -import io.airlift.concurrent.MoreFutures; -import io.airlift.log.Logger; -import io.trino.filesystem.Location; -import io.trino.filesystem.TrinoFileSystem; -import io.trino.filesystem.TrinoFileSystemFactory; -import io.trino.metastore.Column; -import io.trino.metastore.Database; -import io.trino.metastore.HiveBasicStatistics; -import io.trino.metastore.HiveColumnStatistics; -import io.trino.metastore.HiveMetastore; -import io.trino.metastore.HivePrincipal; -import io.trino.metastore.HivePrivilegeInfo; -import io.trino.metastore.HivePrivilegeInfo.HivePrivilege; -import io.trino.metastore.HiveType; -import io.trino.metastore.Partition; -import io.trino.metastore.PartitionStatistics; -import io.trino.metastore.PartitionWithStatistics; -import io.trino.metastore.Partitions; -import io.trino.metastore.PrincipalPrivileges; -import io.trino.metastore.SchemaAlreadyExistsException; -import io.trino.metastore.StatisticsUpdateMode; -import io.trino.metastore.Table; -import io.trino.metastore.TableAlreadyExistsException; -import io.trino.metastore.TableInfo; -import io.trino.plugin.hive.PartitionNotFoundException; -import io.trino.plugin.hive.metastore.glue.AwsApiCallStats; -import io.trino.plugin.hive.metastore.glue.GlueExpressionUtil; -import io.trino.plugin.hive.metastore.glue.GlueMetastoreStats; -import io.trino.plugin.hive.metastore.glue.v1.GlueToTrinoConverter.GluePartitionConverter; -import io.trino.spi.TrinoException; -import io.trino.spi.connector.ColumnNotFoundException; -import io.trino.spi.connector.SchemaNotFoundException; -import io.trino.spi.connector.SchemaTableName; -import io.trino.spi.connector.TableNotFoundException; -import io.trino.spi.function.LanguageFunction; -import io.trino.spi.predicate.TupleDomain; -import io.trino.spi.security.ConnectorIdentity; -import io.trino.spi.security.RoleGrant; -import jakarta.annotation.Nullable; -import org.weakref.jmx.Flatten; -import org.weakref.jmx.Managed; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Comparator; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Optional; -import java.util.OptionalLong; -import java.util.Set; -import java.util.concurrent.CompletionService; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.Executor; -import java.util.concurrent.ExecutorCompletionService; -import java.util.concurrent.Future; -import java.util.function.Predicate; -import java.util.regex.Pattern; -import java.util.stream.Stream; - -import static com.google.common.base.Preconditions.checkArgument; -import static com.google.common.base.Strings.isNullOrEmpty; -import static com.google.common.collect.Comparators.lexicographical; -import static com.google.common.collect.ImmutableList.toImmutableList; -import static com.google.common.collect.ImmutableMap.toImmutableMap; -import static com.google.common.collect.ImmutableSet.toImmutableSet; -import static io.trino.metastore.Partitions.toPartitionValues; -import static io.trino.metastore.Table.TABLE_COMMENT; -import static io.trino.plugin.hive.HiveErrorCode.HIVE_FILESYSTEM_ERROR; -import static io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_METADATA; -import static io.trino.plugin.hive.HiveErrorCode.HIVE_METASTORE_ERROR; -import static io.trino.plugin.hive.HiveMetadata.TRINO_QUERY_ID_NAME; -import static io.trino.plugin.hive.TableType.MANAGED_TABLE; -import static io.trino.plugin.hive.metastore.MetastoreUtil.getHiveBasicStatistics; -import static io.trino.plugin.hive.metastore.MetastoreUtil.metastoreFunctionName; -import static io.trino.plugin.hive.metastore.MetastoreUtil.toPartitionName; -import static io.trino.plugin.hive.metastore.MetastoreUtil.updateStatisticsParameters; -import static io.trino.plugin.hive.metastore.MetastoreUtil.verifyCanDropColumn; -import static io.trino.plugin.hive.metastore.glue.v1.AwsSdkUtil.getPaginatedResults; -import static io.trino.plugin.hive.metastore.glue.v1.GlueInputConverter.convertFunction; -import static io.trino.plugin.hive.metastore.glue.v1.GlueInputConverter.convertGlueTableToTableInput; -import static io.trino.plugin.hive.metastore.glue.v1.GlueInputConverter.convertPartition; -import static io.trino.plugin.hive.metastore.glue.v1.GlueToTrinoConverter.getTableParameters; -import static io.trino.plugin.hive.metastore.glue.v1.GlueToTrinoConverter.getTableType; -import static io.trino.plugin.hive.metastore.glue.v1.GlueToTrinoConverter.mappedCopy; -import static io.trino.plugin.hive.util.HiveUtil.escapeSchemaName; -import static io.trino.spi.StandardErrorCode.ALREADY_EXISTS; -import static io.trino.spi.StandardErrorCode.FUNCTION_NOT_FOUND; -import static io.trino.spi.StandardErrorCode.NOT_SUPPORTED; -import static io.trino.spi.security.PrincipalType.USER; -import static java.util.Objects.requireNonNull; -import static java.util.concurrent.TimeUnit.NANOSECONDS; -import static java.util.function.Predicate.not; -import static java.util.function.UnaryOperator.identity; -import static java.util.stream.Collectors.toCollection; -import static java.util.stream.Collectors.toMap; - -public class GlueHiveMetastore - implements HiveMetastore -{ - private static final Logger log = Logger.get(GlueHiveMetastore.class); - - private static final String PUBLIC_ROLE_NAME = "public"; - private static final String DEFAULT_METASTORE_USER = "presto"; - private static final int BATCH_GET_PARTITION_MAX_PAGE_SIZE = 1000; - private static final int BATCH_CREATE_PARTITION_MAX_PAGE_SIZE = 100; - private static final int BATCH_UPDATE_PARTITION_MAX_PAGE_SIZE = 100; - private static final int AWS_GLUE_GET_PARTITIONS_MAX_RESULTS = 1000; - private static final int AWS_GLUE_GET_DATABASES_MAX_RESULTS = 100; - private static final int AWS_GLUE_GET_FUNCTIONS_MAX_RESULTS = 100; - private static final int AWS_GLUE_GET_TABLES_MAX_RESULTS = 100; - private static final Comparator> PARTITION_VALUE_COMPARATOR = lexicographical(String.CASE_INSENSITIVE_ORDER); - - private final TrinoFileSystem fileSystem; - private final AWSGlueAsync glueClient; - private final Optional defaultDir; - private final int partitionSegments; - private final Executor partitionsReadExecutor; - private final GlueMetastoreStats stats; - private final GlueColumnStatisticsProvider columnStatisticsProvider; - private final boolean assumeCanonicalPartitionKeys; - private final Predicate tableFilter; - - @Inject - public GlueHiveMetastore( - TrinoFileSystemFactory fileSystemFactory, - GlueHiveMetastoreConfig glueConfig, - @ForGlueHiveMetastore Executor partitionsReadExecutor, - GlueColumnStatisticsProviderFactory columnStatisticsProviderFactory, - AWSGlueAsync glueClient, - @ForGlueHiveMetastore GlueMetastoreStats stats, - @ForGlueHiveMetastore Predicate tableFilter) - { - this.fileSystem = fileSystemFactory.create(ConnectorIdentity.ofUser(DEFAULT_METASTORE_USER)); - this.glueClient = requireNonNull(glueClient, "glueClient is null"); - this.defaultDir = glueConfig.getDefaultWarehouseDir(); - this.partitionSegments = glueConfig.getPartitionSegments(); - this.partitionsReadExecutor = requireNonNull(partitionsReadExecutor, "partitionsReadExecutor is null"); - this.assumeCanonicalPartitionKeys = glueConfig.isAssumeCanonicalPartitionKeys(); - this.tableFilter = requireNonNull(tableFilter, "tableFilter is null"); - this.stats = requireNonNull(stats, "stats is null"); - this.columnStatisticsProvider = columnStatisticsProviderFactory.createGlueColumnStatisticsProvider(glueClient, stats); - } - - @Managed - @Flatten - public GlueMetastoreStats getStats() - { - return stats; - } - - @Override - public List getAllDatabases() - { - try { - List databaseNames = getPaginatedResults( - glueClient::getDatabases, - new GetDatabasesRequest() - .withMaxResults(AWS_GLUE_GET_DATABASES_MAX_RESULTS), - GetDatabasesRequest::setNextToken, - GetDatabasesResult::getNextToken, - stats.getGetDatabases()) - .map(GetDatabasesResult::getDatabaseList) - .flatMap(List::stream) - .map(com.amazonaws.services.glue.model.Database::getName) - .collect(toImmutableList()); - return databaseNames; - } - catch (AmazonServiceException e) { - throw new TrinoException(HIVE_METASTORE_ERROR, e); - } - } - - @Override - public Optional getDatabase(String databaseName) - { - try { - GetDatabaseResult result = stats.getGetDatabase().call(() -> - glueClient.getDatabase(new GetDatabaseRequest().withName(databaseName))); - return Optional.of(GlueToTrinoConverter.convertDatabase(result.getDatabase())); - } - catch (EntityNotFoundException e) { - return Optional.empty(); - } - catch (AmazonServiceException e) { - throw new TrinoException(HIVE_METASTORE_ERROR, e); - } - } - - @Override - public void createDatabase(Database database) - { - if (database.getLocation().isEmpty() && defaultDir.isPresent()) { - Location location = Location.of(defaultDir.get()) - .appendPath(escapeSchemaName(database.getDatabaseName())); - database = Database.builder(database) - .setLocation(Optional.of(location.toString())) - .build(); - } - - try { - DatabaseInput databaseInput = GlueInputConverter.convertDatabase(database); - stats.getCreateDatabase().call(() -> - glueClient.createDatabase(new CreateDatabaseRequest().withDatabaseInput(databaseInput))); - } - catch (AlreadyExistsException e) { - // Do not throw SchemaAlreadyExistsException if this query has already created the database. - // This may happen when an actually successful metastore create call is retried - // because of a timeout on our side. - String expectedQueryId = database.getParameters().get(TRINO_QUERY_ID_NAME); - if (expectedQueryId != null) { - String existingQueryId = getDatabase(database.getDatabaseName()) - .map(Database::getParameters) - .map(parameters -> parameters.get(TRINO_QUERY_ID_NAME)) - .orElse(null); - if (expectedQueryId.equals(existingQueryId)) { - return; - } - } - throw new SchemaAlreadyExistsException(database.getDatabaseName(), e); - } - catch (AmazonServiceException e) { - throw new TrinoException(HIVE_METASTORE_ERROR, e); - } - - if (database.getLocation().isPresent()) { - Location location = Location.of(database.getLocation().get()); - try { - fileSystem.createDirectory(location); - } - catch (IOException e) { - throw new TrinoException(HIVE_FILESYSTEM_ERROR, "Failed to create directory: " + location, e); - } - } - } - - @Override - public void dropDatabase(String databaseName, boolean deleteData) - { - Optional location = Optional.empty(); - if (deleteData) { - location = getDatabase(databaseName) - .orElseThrow(() -> new SchemaNotFoundException(databaseName)) - .getLocation(); - } - - try { - stats.getDeleteDatabase().call(() -> - glueClient.deleteDatabase(new DeleteDatabaseRequest().withName(databaseName))); - } - catch (EntityNotFoundException e) { - throw new SchemaNotFoundException(databaseName, e); - } - catch (AmazonServiceException e) { - throw new TrinoException(HIVE_METASTORE_ERROR, e); - } - - if (deleteData) { - location.map(Location::of).ifPresent(this::deleteDir); - } - } - - @Override - public void renameDatabase(String databaseName, String newDatabaseName) - { - try { - Database database = getDatabase(databaseName).orElseThrow(() -> new SchemaNotFoundException(databaseName)); - DatabaseInput renamedDatabase = GlueInputConverter.convertDatabase(database).withName(newDatabaseName); - stats.getUpdateDatabase().call(() -> - glueClient.updateDatabase(new UpdateDatabaseRequest() - .withName(databaseName) - .withDatabaseInput(renamedDatabase))); - } - catch (AmazonServiceException e) { - throw new TrinoException(HIVE_METASTORE_ERROR, e); - } - } - - @Override - public void setDatabaseOwner(String databaseName, HivePrincipal principal) - { - throw new TrinoException(NOT_SUPPORTED, "setting the database owner is not supported by Glue"); - } - - @Override - public List getTables(String databaseName) - { - try { - return getGlueTables(databaseName) - .filter(tableFilter) - .map(table -> new TableInfo( - new SchemaTableName(databaseName, table.getName()), - TableInfo.ExtendedRelationType.fromTableTypeAndComment(getTableType(table), getTableParameters(table).get(TABLE_COMMENT)))) - .collect(toImmutableList()); - } - catch (EntityNotFoundException | AccessDeniedException e) { - // database does not exist or permission denied - return ImmutableList.of(); - } - catch (AmazonServiceException e) { - throw new TrinoException(HIVE_METASTORE_ERROR, e); - } - } - - @Override - public List getTableNamesWithParameters(String databaseName, String parameterKey, ImmutableSet parameterValues) - { - try { - return getGlueTables(databaseName) - .filter(tableFilter) - .filter(table -> parameterValues.contains(getTableParameters(table).get(parameterKey))) - .map(com.amazonaws.services.glue.model.Table::getName) - .collect(toImmutableList()); - } - catch (EntityNotFoundException | AccessDeniedException e) { - // database does not exist or permission denied - return ImmutableList.of(); - } - catch (AmazonServiceException e) { - throw new TrinoException(HIVE_METASTORE_ERROR, e); - } - } - - @Override - public Optional
getTable(String databaseName, String tableName) - { - try { - GetTableResult result = stats.getGetTable().call(() -> - glueClient.getTable(new GetTableRequest() - .withDatabaseName(databaseName) - .withName(tableName))); - return Optional.of(GlueToTrinoConverter.convertTable(result.getTable(), databaseName)); - } - catch (EntityNotFoundException e) { - return Optional.empty(); - } - catch (AmazonServiceException e) { - throw new TrinoException(HIVE_METASTORE_ERROR, e); - } - } - - private Table getExistingTable(String databaseName, String tableName) - { - return getTable(databaseName, tableName) - .orElseThrow(() -> new TableNotFoundException(new SchemaTableName(databaseName, tableName))); - } - - @Override - public void createTable(Table table, PrincipalPrivileges principalPrivileges) - { - try { - TableInput input = GlueInputConverter.convertTable(table); - stats.getCreateTable().call(() -> - glueClient.createTable(new CreateTableRequest() - .withDatabaseName(table.getDatabaseName()) - .withTableInput(input))); - } - catch (AlreadyExistsException e) { - // Do not throw TableAlreadyExistsException if this query has already created the table. - // This may happen when an actually successful metastore create call is retried - // because of a timeout on our side. - String expectedQueryId = table.getParameters().get(TRINO_QUERY_ID_NAME); - if (expectedQueryId != null) { - String existingQueryId = getTable(table.getDatabaseName(), table.getTableName()) - .map(Table::getParameters) - .map(parameters -> parameters.get(TRINO_QUERY_ID_NAME)) - .orElse(null); - if (expectedQueryId.equals(existingQueryId)) { - return; - } - } - throw new TableAlreadyExistsException(new SchemaTableName(table.getDatabaseName(), table.getTableName()), e); - } - catch (EntityNotFoundException e) { - throw new SchemaNotFoundException(table.getDatabaseName(), e); - } - catch (AmazonServiceException e) { - throw new TrinoException(HIVE_METASTORE_ERROR, e); - } - } - - @Override - public void dropTable(String databaseName, String tableName, boolean deleteData) - { - Table table = getExistingTable(databaseName, tableName); - DeleteTableRequest deleteTableRequest = new DeleteTableRequest() - .withDatabaseName(databaseName) - .withName(tableName); - try { - stats.getDeleteTable().call(() -> glueClient.deleteTable(deleteTableRequest)); - } - catch (AmazonServiceException e) { - throw new TrinoException(HIVE_METASTORE_ERROR, e); - } - - Optional location = table.getStorage().getOptionalLocation() - .filter(not(String::isEmpty)); - if (deleteData && isManagedTable(table) && location.isPresent()) { - deleteDir(Location.of(location.get())); - } - } - - private static boolean isManagedTable(Table table) - { - return table.getTableType().equals(MANAGED_TABLE.name()); - } - - private void deleteDir(Location path) - { - try { - fileSystem.deleteDirectory(path); - } - catch (Exception e) { - // don't fail if unable to delete path - log.warn(e, "Failed to delete path: %s", path); - } - } - - @Override - public void replaceTable(String databaseName, String tableName, Table newTable, PrincipalPrivileges principalPrivileges) - { - if (!tableName.equals(newTable.getTableName()) || !databaseName.equals(newTable.getDatabaseName())) { - throw new TrinoException(NOT_SUPPORTED, "Table rename is not yet supported by Glue service"); - } - try { - TableInput newTableInput = GlueInputConverter.convertTable(newTable); - stats.getUpdateTable().call(() -> - glueClient.updateTable(new UpdateTableRequest() - .withDatabaseName(databaseName) - .withTableInput(newTableInput))); - } - catch (EntityNotFoundException e) { - throw new TableNotFoundException(new SchemaTableName(databaseName, tableName), e); - } - catch (AmazonServiceException e) { - throw new TrinoException(HIVE_METASTORE_ERROR, e); - } - } - - @Override - public void renameTable(String databaseName, String tableName, String newDatabaseName, String newTableName) - { - boolean newTableCreated = false; - try { - GetTableRequest getTableRequest = new GetTableRequest().withDatabaseName(databaseName) - .withName(tableName); - GetTableResult glueTable = glueClient.getTable(getTableRequest); - TableInput tableInput = convertGlueTableToTableInput(glueTable.getTable()).withName(newTableName); - CreateTableRequest createTableRequest = new CreateTableRequest() - .withDatabaseName(newDatabaseName) - .withTableInput(tableInput); - stats.getCreateTable().call(() -> glueClient.createTable(createTableRequest)); - newTableCreated = true; - dropTable(databaseName, tableName, false); - } - catch (RuntimeException e) { - if (newTableCreated) { - try { - dropTable(databaseName, tableName, false); - } - catch (RuntimeException cleanupException) { - if (!cleanupException.equals(e)) { - e.addSuppressed(cleanupException); - } - } - } - throw e; - } - } - - @Override - public void commentTable(String databaseName, String tableName, Optional comment) - { - Table oldTable = getExistingTable(databaseName, tableName); - Table newTable = Table.builder(oldTable) - .setParameter(TABLE_COMMENT, comment) - .build(); - replaceTable(databaseName, tableName, newTable, null); - } - - @Override - public void setTableOwner(String databaseName, String tableName, HivePrincipal principal) - { - // TODO Add role support https://github.com/trinodb/trino/issues/5706 - if (principal.getType() != USER) { - throw new TrinoException(NOT_SUPPORTED, "Setting table owner type as a role is not supported"); - } - - try { - Table table = getExistingTable(databaseName, tableName); - TableInput newTableInput = GlueInputConverter.convertTable(table); - newTableInput.setOwner(principal.getName()); - - stats.getUpdateTable().call(() -> - glueClient.updateTable(new UpdateTableRequest() - .withDatabaseName(databaseName) - .withTableInput(newTableInput))); - } - catch (EntityNotFoundException e) { - throw new TableNotFoundException(new SchemaTableName(databaseName, tableName), e); - } - catch (AmazonServiceException e) { - throw new TrinoException(HIVE_METASTORE_ERROR, e); - } - } - - @Override - public Map getTableColumnStatistics(String databaseName, String tableName, Set columnNames) - { - checkArgument(!columnNames.isEmpty(), "columnNames is empty"); - return columnStatisticsProvider.getTableColumnStatistics(databaseName, tableName, columnNames); - } - - @Override - public void updateTableStatistics(String databaseName, String tableName, OptionalLong acidWriteId, StatisticsUpdateMode mode, PartitionStatistics statisticsUpdate) - { - Table table = getExistingTable(databaseName, tableName); - if (acidWriteId.isPresent()) { - table = Table.builder(table).setWriteId(acidWriteId).build(); - } - // load current statistics - HiveBasicStatistics currentBasicStatistics = getHiveBasicStatistics(table.getParameters()); - Map currentColumnStatistics = getTableColumnStatistics( - databaseName, - tableName, - Stream.concat(table.getDataColumns().stream(), table.getPartitionColumns().stream()).map(Column::getName).collect(toImmutableSet())); - PartitionStatistics currentStatistics = new PartitionStatistics(currentBasicStatistics, currentColumnStatistics); - - PartitionStatistics updatedStatistics = mode.updatePartitionStatistics(currentStatistics, statisticsUpdate); - - try { - TableInput tableInput = GlueInputConverter.convertTable(table); - final Map statisticsParameters = updateStatisticsParameters(table.getParameters(), updatedStatistics.basicStatistics()); - tableInput.setParameters(statisticsParameters); - table = Table.builder(table).setParameters(statisticsParameters).build(); - stats.getUpdateTable().call(() -> glueClient.updateTable(new UpdateTableRequest() - .withDatabaseName(databaseName) - .withTableInput(tableInput))); - columnStatisticsProvider.updateTableColumnStatistics(table, updatedStatistics.columnStatistics()); - } - catch (EntityNotFoundException e) { - throw new TableNotFoundException(new SchemaTableName(databaseName, tableName), e); - } - catch (AmazonServiceException e) { - throw new TrinoException(HIVE_METASTORE_ERROR, e); - } - } - - @Override - public void addColumn(String databaseName, String tableName, String columnName, HiveType columnType, String columnComment) - { - Table oldTable = getExistingTable(databaseName, tableName); - Table newTable = Table.builder(oldTable) - .addDataColumn(new Column(columnName, columnType, Optional.ofNullable(columnComment), ImmutableMap.of())) - .build(); - replaceTable(databaseName, tableName, newTable, null); - } - - @Override - public void renameColumn(String databaseName, String tableName, String oldColumnName, String newColumnName) - { - Table oldTable = getExistingTable(databaseName, tableName); - if (oldTable.getPartitionColumns().stream().anyMatch(c -> c.getName().equals(oldColumnName))) { - throw new TrinoException(NOT_SUPPORTED, "Renaming partition columns is not supported"); - } - - ImmutableList.Builder newDataColumns = ImmutableList.builder(); - for (Column column : oldTable.getDataColumns()) { - if (column.getName().equals(oldColumnName)) { - newDataColumns.add(new Column(newColumnName, column.getType(), column.getComment(), column.getProperties())); - } - else { - newDataColumns.add(column); - } - } - - Table newTable = Table.builder(oldTable) - .setDataColumns(newDataColumns.build()) - .build(); - replaceTable(databaseName, tableName, newTable, null); - } - - @Override - public void dropColumn(String databaseName, String tableName, String columnName) - { - verifyCanDropColumn(this, databaseName, tableName, columnName); - Table oldTable = getExistingTable(databaseName, tableName); - - if (oldTable.getColumn(columnName).isEmpty()) { - SchemaTableName name = new SchemaTableName(databaseName, tableName); - throw new ColumnNotFoundException(name, columnName); - } - - ImmutableList.Builder newDataColumns = ImmutableList.builder(); - oldTable.getDataColumns().stream() - .filter(fieldSchema -> !fieldSchema.getName().equals(columnName)) - .forEach(newDataColumns::add); - - Table newTable = Table.builder(oldTable) - .setDataColumns(newDataColumns.build()) - .build(); - replaceTable(databaseName, tableName, newTable, null); - } - - @Override - public void commentColumn(String databaseName, String tableName, String columnName, Optional comment) - { - Table table = getExistingTable(databaseName, tableName); - List dataColumns = table.getDataColumns(); - List partitionColumns = table.getPartitionColumns(); - - Optional matchingDataColumn = indexOfColumnWithName(dataColumns, columnName); - Optional matchingPartitionColumn = indexOfColumnWithName(partitionColumns, columnName); - - if (matchingDataColumn.isPresent() && matchingPartitionColumn.isPresent()) { - throw new TrinoException(HIVE_INVALID_METADATA, "Found two columns with names matching " + columnName); - } - if (matchingDataColumn.isEmpty() && matchingPartitionColumn.isEmpty()) { - throw new ColumnNotFoundException(table.getSchemaTableName(), columnName); - } - - Table updatedTable = Table.builder(table) - .setDataColumns(matchingDataColumn.map(index -> setColumnCommentForIndex(dataColumns, index, comment)).orElse(dataColumns)) - .setPartitionColumns(matchingPartitionColumn.map(index -> setColumnCommentForIndex(partitionColumns, index, comment)).orElse(partitionColumns)) - .build(); - - replaceTable(databaseName, tableName, updatedTable, null); - } - - private static Optional indexOfColumnWithName(List columns, String columnName) - { - Optional index = Optional.empty(); - for (int i = 0; i < columns.size(); i++) { - // Glue columns are always lowercase - if (columns.get(i).getName().equals(columnName)) { - index.ifPresent(_ -> { - throw new TrinoException(HIVE_INVALID_METADATA, "Found two columns with names matching " + columnName); - }); - index = Optional.of(i); - } - } - return index; - } - - private static List setColumnCommentForIndex(List columns, int indexToUpdate, Optional comment) - { - ImmutableList.Builder newColumns = ImmutableList.builder(); - for (int i = 0; i < columns.size(); i++) { - Column originalColumn = columns.get(i); - if (i == indexToUpdate) { - newColumns.add(new Column(originalColumn.getName(), originalColumn.getType(), comment, originalColumn.getProperties())); - } - else { - newColumns.add(originalColumn); - } - } - return newColumns.build(); - } - - @Override - public Optional> getPartitionNamesByFilter( - String databaseName, - String tableName, - List columnNames, - TupleDomain partitionKeysFilter) - { - if (partitionKeysFilter.isNone()) { - return Optional.of(ImmutableList.of()); - } - String expression = GlueExpressionUtil.buildGlueExpression(columnNames, partitionKeysFilter, assumeCanonicalPartitionKeys); - List> partitionValues = getPartitionValues(databaseName, tableName, expression); - return Optional.of(buildPartitionNames(columnNames, partitionValues)); - } - - private static List buildPartitionNames(List partitionColumns, List> partitions) - { - return mappedCopy(partitions, partition -> toPartitionName(partitionColumns, partition)); - } - - private List> getPartitionValues(String databaseName, String tableName, String expression) - { - if (partitionSegments == 1) { - return getPartitionValues(databaseName, tableName, expression, null); - } - - // Do parallel partition fetch. - CompletionService>> completionService = new ExecutorCompletionService<>(partitionsReadExecutor); - List> futures = new ArrayList<>(partitionSegments); - List> partitions = new ArrayList<>(); - try { - for (int i = 0; i < partitionSegments; i++) { - Segment segment = new Segment().withSegmentNumber(i).withTotalSegments(partitionSegments); - futures.add(completionService.submit(() -> getPartitionValues(databaseName, tableName, expression, segment))); - } - for (int i = 0; i < partitionSegments; i++) { - Future>> futurePartitions = completionService.take(); - partitions.addAll(futurePartitions.get()); - } - // All futures completed normally - futures.clear(); - } - catch (ExecutionException | InterruptedException e) { - if (e instanceof InterruptedException) { - Thread.currentThread().interrupt(); - } - throw new TrinoException(HIVE_METASTORE_ERROR, "Failed to fetch partitions from Glue Data Catalog", e); - } - finally { - // Ensure any futures still running are canceled in case of failure - futures.forEach(future -> future.cancel(true)); - } - - partitions.sort(PARTITION_VALUE_COMPARATOR); - return partitions; - } - - private List> getPartitionValues(String databaseName, String tableName, String expression, @Nullable Segment segment) - { - try { - // Reuse immutable field instances opportunistically between partitions - return getPaginatedResults( - glueClient::getPartitions, - new GetPartitionsRequest() - .withDatabaseName(databaseName) - .withTableName(tableName) - .withExpression(expression) - .withSegment(segment) - // We need the partition values, and not column schema which is very large - .withExcludeColumnSchema(true) - .withMaxResults(AWS_GLUE_GET_PARTITIONS_MAX_RESULTS), - GetPartitionsRequest::setNextToken, - GetPartitionsResult::getNextToken, - stats.getGetPartitions()) - .map(GetPartitionsResult::getPartitions) - .flatMap(List::stream) - .map(com.amazonaws.services.glue.model.Partition::getValues) - .collect(toImmutableList()); - } - catch (AmazonServiceException e) { - throw new TrinoException(HIVE_METASTORE_ERROR, e); - } - } - - @Override - public Optional getPartition(Table table, List partitionValues) - { - try { - GetPartitionResult result = stats.getGetPartition().call(() -> - glueClient.getPartition(new GetPartitionRequest() - .withDatabaseName(table.getDatabaseName()) - .withTableName(table.getTableName()) - .withPartitionValues(partitionValues))); - return Optional.of(new GluePartitionConverter(table.getDatabaseName(), table.getTableName()).apply(result.getPartition())); - } - catch (EntityNotFoundException e) { - return Optional.empty(); - } - catch (AmazonServiceException e) { - throw new TrinoException(HIVE_METASTORE_ERROR, e); - } - } - - /** - *
-     * Ex: Partition keys = ['a', 'b']
-     *     Partition names = ['a=1/b=2', 'a=2/b=2']
-     * 
- * - * @param partitionNames List of full partition names - * @return Mapping of partition name to the partition object - */ - @Override - public Map> getPartitionsByNames(Table table, List partitionNames) - { - return getPartitionsByNamesInternal(table, partitionNames); - } - - private Map> getPartitionsByNamesInternal(Table table, Collection partitionNames) - { - requireNonNull(partitionNames, "partitionNames is null"); - if (partitionNames.isEmpty()) { - return ImmutableMap.of(); - } - - List partitions = batchGetPartition(table, partitionNames); - - Map> partitionNameToPartitionValuesMap = partitionNames.stream() - .collect(toMap(identity(), Partitions::toPartitionValues)); - Map, Partition> partitionValuesToPartitionMap = partitions.stream() - .collect(toMap(Partition::getValues, identity())); - - ImmutableMap.Builder> resultBuilder = ImmutableMap.builder(); - for (Entry> entry : partitionNameToPartitionValuesMap.entrySet()) { - Partition partition = partitionValuesToPartitionMap.get(entry.getValue()); - resultBuilder.put(entry.getKey(), Optional.ofNullable(partition)); - } - return resultBuilder.buildOrThrow(); - } - - private List batchGetPartition(Table table, Collection partitionNames) - { - List> batchGetPartitionFutures = new ArrayList<>(); - try { - List pendingPartitions = partitionNames.stream() - .map(partitionName -> new PartitionValueList().withValues(toPartitionValues(partitionName))) - .collect(toCollection(ArrayList::new)); - - ImmutableList.Builder resultsBuilder = ImmutableList.builderWithExpectedSize(partitionNames.size()); - - // Reuse immutable field instances opportunistically between partitions - GluePartitionConverter converter = new GluePartitionConverter(table.getDatabaseName(), table.getTableName()); - - while (!pendingPartitions.isEmpty()) { - for (List partitions : Lists.partition(pendingPartitions, BATCH_GET_PARTITION_MAX_PAGE_SIZE)) { - batchGetPartitionFutures.add(glueClient.batchGetPartitionAsync(new BatchGetPartitionRequest() - .withDatabaseName(table.getDatabaseName()) - .withTableName(table.getTableName()) - .withPartitionsToGet(partitions), - new StatsRecordingAsyncHandler<>(stats.getGetPartitions()))); - } - pendingPartitions.clear(); - - for (Future future : batchGetPartitionFutures) { - BatchGetPartitionResult batchGetPartitionResult = future.get(); - List partitions = batchGetPartitionResult.getPartitions(); - List unprocessedKeys = batchGetPartitionResult.getUnprocessedKeys(); - - // In the unlikely scenario where batchGetPartition call cannot make progress on retrieving partitions, avoid infinite loop - // We fail only in case there are still unprocessedKeys. Case with empty partitions and empty unprocessedKeys is correct in case partitions from request are not found. - if (partitions.isEmpty() && !unprocessedKeys.isEmpty()) { - throw new TrinoException(HIVE_METASTORE_ERROR, "Cannot make progress retrieving partitions. Unable to retrieve partitions: " + unprocessedKeys); - } - - partitions.stream() - .map(converter) - .forEach(resultsBuilder::add); - pendingPartitions.addAll(unprocessedKeys); - } - batchGetPartitionFutures.clear(); - } - - return resultsBuilder.build(); - } - catch (AmazonServiceException | InterruptedException | ExecutionException e) { - if (e instanceof InterruptedException) { - Thread.currentThread().interrupt(); - } - throw new TrinoException(HIVE_METASTORE_ERROR, e); - } - finally { - // Ensure any futures still running are canceled in case of failure - batchGetPartitionFutures.forEach(future -> future.cancel(true)); - } - } - - @Override - public void addPartitions(String databaseName, String tableName, List partitions) - { - try { - stats.getCreatePartitions().call(() -> { - List> futures = new ArrayList<>(); - - for (List partitionBatch : Lists.partition(partitions, BATCH_CREATE_PARTITION_MAX_PAGE_SIZE)) { - List partitionInputs = mappedCopy(partitionBatch, GlueInputConverter::convertPartition); - futures.add(glueClient.batchCreatePartitionAsync( - new BatchCreatePartitionRequest() - .withDatabaseName(databaseName) - .withTableName(tableName) - .withPartitionInputList(partitionInputs), - new StatsRecordingAsyncHandler<>(stats.getBatchCreatePartition()))); - } - - for (Future future : futures) { - try { - BatchCreatePartitionResult result = future.get(); - propagatePartitionErrorToTrinoException(databaseName, tableName, result.getErrors()); - } - catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw new TrinoException(HIVE_METASTORE_ERROR, e); - } - } - - Set updates = partitions.stream() - .map(partitionWithStatistics -> new GlueColumnStatisticsProvider.PartitionStatisticsUpdate( - partitionWithStatistics.getPartition(), - partitionWithStatistics.getStatistics().columnStatistics())) - .collect(toImmutableSet()); - columnStatisticsProvider.updatePartitionStatistics(updates); - - return null; - }); - } - catch (AmazonServiceException | ExecutionException e) { - throw new TrinoException(HIVE_METASTORE_ERROR, e); - } - } - - private static void propagatePartitionErrorToTrinoException(String databaseName, String tableName, List partitionErrors) - { - if (partitionErrors != null && !partitionErrors.isEmpty()) { - ErrorDetail errorDetail = partitionErrors.getFirst().getErrorDetail(); - String glueExceptionCode = errorDetail.getErrorCode(); - - switch (glueExceptionCode) { - case "AlreadyExistsException": - throw new TrinoException(ALREADY_EXISTS, errorDetail.getErrorMessage()); - case "EntityNotFoundException": - throw new TableNotFoundException(new SchemaTableName(databaseName, tableName), errorDetail.getErrorMessage()); - default: - throw new TrinoException(HIVE_METASTORE_ERROR, errorDetail.getErrorCode() + ": " + errorDetail.getErrorMessage()); - } - } - } - - @Override - public void dropPartition(String databaseName, String tableName, List parts, boolean deleteData) - { - Table table = getExistingTable(databaseName, tableName); - Partition partition = getPartition(table, parts) - .orElseThrow(() -> new PartitionNotFoundException(new SchemaTableName(databaseName, tableName), parts)); - - try { - stats.getDeletePartition().call(() -> - glueClient.deletePartition(new DeletePartitionRequest() - .withDatabaseName(databaseName) - .withTableName(tableName) - .withPartitionValues(parts))); - } - catch (AmazonServiceException e) { - throw new TrinoException(HIVE_METASTORE_ERROR, e); - } - - String partLocation = partition.getStorage().getLocation(); - if (deleteData && isManagedTable(table) && !isNullOrEmpty(partLocation)) { - deleteDir(Location.of(partLocation)); - } - } - - @Override - public void alterPartition(String databaseName, String tableName, PartitionWithStatistics partition) - { - try { - PartitionInput newPartition = convertPartition(partition); - stats.getUpdatePartition().call(() -> - glueClient.updatePartition(new UpdatePartitionRequest() - .withDatabaseName(databaseName) - .withTableName(tableName) - .withPartitionInput(newPartition) - .withPartitionValueList(partition.getPartition().getValues()))); - columnStatisticsProvider.updatePartitionStatistics( - partition.getPartition(), - partition.getStatistics().columnStatistics()); - } - catch (EntityNotFoundException e) { - throw new PartitionNotFoundException(new SchemaTableName(databaseName, tableName), partition.getPartition().getValues(), e); - } - catch (AmazonServiceException e) { - throw new TrinoException(HIVE_METASTORE_ERROR, e); - } - } - - @Override - public Map> getPartitionColumnStatistics( - String databaseName, - String tableName, - Set partitionNames, - Set columnNames) - { - checkArgument(!columnNames.isEmpty(), "columnNames is empty"); - return columnStatisticsProvider.getPartitionColumnStatistics(databaseName, tableName, partitionNames, columnNames); - } - - @Override - public void updatePartitionStatistics(Table table, StatisticsUpdateMode mode, Map partitionUpdates) - { - Iterables.partition(partitionUpdates.entrySet(), BATCH_CREATE_PARTITION_MAX_PAGE_SIZE) - .forEach(batch -> updatePartitionStatisticsBatch(table, mode, batch.stream().collect(toImmutableMap(Entry::getKey, Entry::getValue)))); - } - - private void updatePartitionStatisticsBatch(Table table, StatisticsUpdateMode mode, Map partitionUpdates) - { - // Missing partitions are ignored - Map partitions = getPartitionsByNamesInternal(table, partitionUpdates.keySet()).entrySet().stream() - .filter(entry -> entry.getValue().isPresent()) - .collect(toImmutableMap(Entry::getKey, entry -> entry.getValue().orElseThrow())); - Map> currentColumnStats = columnStatisticsProvider.getPartitionColumnStatistics( - table.getDatabaseName(), - table.getTableName(), - partitionUpdates.keySet(), - table.getDataColumns().stream().map(Column::getName).collect(toImmutableSet())); - - ImmutableList.Builder partitionUpdateRequests = ImmutableList.builder(); - ImmutableSet.Builder columnStatisticsUpdates = ImmutableSet.builder(); - partitions.forEach((partitionName, partition) -> { - PartitionStatistics update = partitionUpdates.get(partitionName); - - PartitionStatistics currentStatistics = new PartitionStatistics(getHiveBasicStatistics(partition.getParameters()), currentColumnStats.get(partitionName)); - PartitionStatistics updatedStatistics = mode.updatePartitionStatistics(currentStatistics, update); - - Map updatedStatisticsParameters = updateStatisticsParameters(partition.getParameters(), updatedStatistics.basicStatistics()); - - partition = Partition.builder(partition).setParameters(updatedStatisticsParameters).build(); - Map updatedColumnStatistics = updatedStatistics.columnStatistics(); - - PartitionInput partitionInput = convertPartition(partition); - partitionInput.setParameters(partition.getParameters()); - - partitionUpdateRequests.add(new BatchUpdatePartitionRequestEntry() - .withPartitionValueList(partition.getValues()) - .withPartitionInput(partitionInput)); - columnStatisticsUpdates.add(new GlueColumnStatisticsProvider.PartitionStatisticsUpdate(partition, updatedColumnStatistics)); - }); - - List> partitionUpdateRequestsPartitioned = Lists.partition(partitionUpdateRequests.build(), BATCH_UPDATE_PARTITION_MAX_PAGE_SIZE); - List> partitionUpdateRequestsFutures = new ArrayList<>(); - partitionUpdateRequestsPartitioned.forEach(partitionUpdateRequestsPartition -> { - // Update basic statistics - partitionUpdateRequestsFutures.add(glueClient.batchUpdatePartitionAsync(new BatchUpdatePartitionRequest() - .withDatabaseName(table.getDatabaseName()) - .withTableName(table.getTableName()) - .withEntries(partitionUpdateRequestsPartition), - new StatsRecordingAsyncHandler<>(stats.getBatchUpdatePartition()))); - }); - - try { - // Update column statistics - columnStatisticsProvider.updatePartitionStatistics(columnStatisticsUpdates.build()); - // Don't block on the batch update call until the column statistics have finished updating - partitionUpdateRequestsFutures.forEach(MoreFutures::getFutureValue); - } - catch (AmazonServiceException e) { - throw new TrinoException(HIVE_METASTORE_ERROR, e); - } - } - - @Override - public boolean functionExists(String databaseName, String functionName, String signatureToken) - { - try { - stats.getGetUserDefinedFunction().call(() -> - glueClient.getUserDefinedFunction(new GetUserDefinedFunctionRequest() - .withDatabaseName(databaseName) - .withFunctionName(metastoreFunctionName(functionName, signatureToken)))); - return true; - } - catch (EntityNotFoundException e) { - return false; - } - catch (AmazonServiceException e) { - throw new TrinoException(HIVE_METASTORE_ERROR, e); - } - } - - @Override - public Collection getAllFunctions(String databaseName) - { - return getFunctionsByPattern(databaseName, "trino__.*"); - } - - @Override - public Collection getFunctions(String databaseName, String functionName) - { - return getFunctionsByPattern(databaseName, "trino__" + Pattern.quote(functionName) + "__.*"); - } - - private Collection getFunctionsByPattern(String databaseName, String functionNamePattern) - { - try { - return getPaginatedResults( - glueClient::getUserDefinedFunctions, - new GetUserDefinedFunctionsRequest() - .withDatabaseName(databaseName) - .withPattern(functionNamePattern) - .withMaxResults(AWS_GLUE_GET_FUNCTIONS_MAX_RESULTS), - GetUserDefinedFunctionsRequest::setNextToken, - GetUserDefinedFunctionsResult::getNextToken, - stats.getGetUserDefinedFunctions()) - .map(GetUserDefinedFunctionsResult::getUserDefinedFunctions) - .flatMap(List::stream) - .map(GlueToTrinoConverter::convertFunction) - .collect(toImmutableList()); - } - catch (EntityNotFoundException | AccessDeniedException e) { - return ImmutableList.of(); - } - catch (AmazonServiceException e) { - throw new TrinoException(HIVE_METASTORE_ERROR, e); - } - } - - @Override - public void createFunction(String databaseName, String functionName, LanguageFunction function) - { - if (functionName.contains("__")) { - throw new TrinoException(NOT_SUPPORTED, "Function names with double underscore are not supported"); - } - try { - UserDefinedFunctionInput functionInput = convertFunction(functionName, function); - stats.getCreateUserDefinedFunction().call(() -> - glueClient.createUserDefinedFunction(new CreateUserDefinedFunctionRequest() - .withDatabaseName(databaseName) - .withFunctionInput(functionInput))); - } - catch (AlreadyExistsException e) { - throw new TrinoException(ALREADY_EXISTS, "Function already exists", e); - } - catch (EntityNotFoundException e) { - throw new SchemaNotFoundException(databaseName, e); - } - catch (AmazonServiceException e) { - throw new TrinoException(HIVE_METASTORE_ERROR, e); - } - } - - @Override - public void replaceFunction(String databaseName, String functionName, LanguageFunction function) - { - try { - UserDefinedFunctionInput functionInput = convertFunction(functionName, function); - stats.getUpdateUserDefinedFunction().call(() -> - glueClient.updateUserDefinedFunction(new UpdateUserDefinedFunctionRequest() - .withDatabaseName(databaseName) - .withFunctionName(metastoreFunctionName(functionName, function.signatureToken())) - .withFunctionInput(functionInput))); - } - catch (AmazonServiceException e) { - throw new TrinoException(HIVE_METASTORE_ERROR, e); - } - } - - @Override - public void dropFunction(String databaseName, String functionName, String signatureToken) - { - try { - stats.getDeleteUserDefinedFunction().call(() -> - glueClient.deleteUserDefinedFunction(new DeleteUserDefinedFunctionRequest() - .withDatabaseName(databaseName) - .withFunctionName(metastoreFunctionName(functionName, signatureToken)))); - } - catch (EntityNotFoundException e) { - throw new TrinoException(FUNCTION_NOT_FOUND, "Function not found", e); - } - catch (AmazonServiceException e) { - throw new TrinoException(HIVE_METASTORE_ERROR, e); - } - } - - @Override - public void createRole(String role, String grantor) - { - throw new TrinoException(NOT_SUPPORTED, "createRole is not supported by Glue"); - } - - @Override - public void dropRole(String role) - { - throw new TrinoException(NOT_SUPPORTED, "dropRole is not supported by Glue"); - } - - @Override - public Set listRoles() - { - return ImmutableSet.of(PUBLIC_ROLE_NAME); - } - - @Override - public void grantRoles(Set roles, Set grantees, boolean adminOption, HivePrincipal grantor) - { - throw new TrinoException(NOT_SUPPORTED, "grantRoles is not supported by Glue"); - } - - @Override - public void revokeRoles(Set roles, Set grantees, boolean adminOption, HivePrincipal grantor) - { - throw new TrinoException(NOT_SUPPORTED, "revokeRoles is not supported by Glue"); - } - - @Override - public Set listRoleGrants(HivePrincipal principal) - { - if (principal.getType() == USER) { - return ImmutableSet.of(new RoleGrant(principal.toTrinoPrincipal(), PUBLIC_ROLE_NAME, false)); - } - return ImmutableSet.of(); - } - - @Override - public void grantTablePrivileges(String databaseName, String tableName, String tableOwner, HivePrincipal grantee, HivePrincipal grantor, Set privileges, boolean grantOption) - { - throw new TrinoException(NOT_SUPPORTED, "grantTablePrivileges is not supported by Glue"); - } - - @Override - public void revokeTablePrivileges(String databaseName, String tableName, String tableOwner, HivePrincipal grantee, HivePrincipal grantor, Set privileges, boolean grantOption) - { - throw new TrinoException(NOT_SUPPORTED, "revokeTablePrivileges is not supported by Glue"); - } - - @Override - public Set listTablePrivileges(String databaseName, String tableName, Optional tableOwner, Optional principal) - { - return ImmutableSet.of(); - } - - @Override - public void checkSupportsTransactions() - { - throw new TrinoException(NOT_SUPPORTED, "Glue does not support ACID tables"); - } - - private Stream getGlueTables(String databaseName) - { - return getPaginatedResults( - glueClient::getTables, - new GetTablesRequest() - .withDatabaseName(databaseName) - .withMaxResults(AWS_GLUE_GET_TABLES_MAX_RESULTS), - GetTablesRequest::setNextToken, - GetTablesResult::getNextToken, - stats.getGetTables()) - .map(GetTablesResult::getTableList) - .flatMap(List::stream); - } - - static class StatsRecordingAsyncHandler - implements AsyncHandler - { - private final AwsApiCallStats stats; - private final Stopwatch stopwatch; - - public StatsRecordingAsyncHandler(AwsApiCallStats stats) - { - this.stats = requireNonNull(stats, "stats is null"); - this.stopwatch = Stopwatch.createStarted(); - } - - @Override - public void onError(Exception e) - { - stats.recordCall(stopwatch.elapsed(NANOSECONDS), true); - } - - @Override - public void onSuccess(AmazonWebServiceRequest request, Object o) - { - stats.recordCall(stopwatch.elapsed(NANOSECONDS), false); - } - } -} diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/GlueHiveMetastoreConfig.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/GlueHiveMetastoreConfig.java deleted file mode 100644 index 2fde27caaa62..000000000000 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/GlueHiveMetastoreConfig.java +++ /dev/null @@ -1,342 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.metastore.glue.v1; - -import io.airlift.configuration.Config; -import io.airlift.configuration.ConfigDescription; -import io.airlift.configuration.ConfigSecuritySensitive; -import io.airlift.configuration.DefunctConfig; -import io.airlift.configuration.LegacyConfig; -import jakarta.validation.constraints.AssertTrue; -import jakarta.validation.constraints.Max; -import jakarta.validation.constraints.Min; - -import java.util.Optional; - -@DefunctConfig("hive.metastore.glue.use-instance-credentials") -public class GlueHiveMetastoreConfig -{ - private Optional glueRegion = Optional.empty(); - private Optional glueEndpointUrl = Optional.empty(); - private Optional glueStsRegion = Optional.empty(); - private Optional glueStsEndpointUrl = Optional.empty(); - private Optional glueProxyApiId = Optional.empty(); - private boolean pinGlueClientToCurrentRegion; - private int maxGlueErrorRetries = 10; - private int maxGlueConnections = 30; - private Optional defaultWarehouseDir = Optional.empty(); - private Optional iamRole = Optional.empty(); - private Optional externalId = Optional.empty(); - private Optional awsAccessKey = Optional.empty(); - private Optional awsSecretKey = Optional.empty(); - private Optional awsCredentialsProvider = Optional.empty(); - private Optional catalogId = Optional.empty(); - private int partitionSegments = 5; - private int getPartitionThreads = 20; - private int readStatisticsThreads = 5; - private int writeStatisticsThreads = 20; - private boolean assumeCanonicalPartitionKeys; - private boolean skipArchive; - - public Optional getGlueRegion() - { - return glueRegion; - } - - @Config("hive.metastore.glue.region") - @ConfigDescription("AWS Region for Glue Data Catalog") - public GlueHiveMetastoreConfig setGlueRegion(String region) - { - this.glueRegion = Optional.ofNullable(region); - return this; - } - - public Optional getGlueEndpointUrl() - { - return glueEndpointUrl; - } - - @Config("hive.metastore.glue.endpoint-url") - @ConfigDescription("Glue API endpoint URL") - public GlueHiveMetastoreConfig setGlueEndpointUrl(String glueEndpointUrl) - { - this.glueEndpointUrl = Optional.ofNullable(glueEndpointUrl); - return this; - } - - public Optional getGlueStsRegion() - { - return glueStsRegion; - } - - @Config("hive.metastore.glue.sts.region") - @ConfigDescription("AWS STS signing region for Glue authentication") - public GlueHiveMetastoreConfig setGlueStsRegion(String glueStsRegion) - { - this.glueStsRegion = Optional.ofNullable(glueStsRegion); - return this; - } - - public Optional getGlueStsEndpointUrl() - { - return glueStsEndpointUrl; - } - - @Config("hive.metastore.glue.sts.endpoint") - @ConfigDescription("AWS STS endpoint for Glue authentication") - public GlueHiveMetastoreConfig setGlueStsEndpointUrl(String glueStsEndpointUrl) - { - this.glueStsEndpointUrl = Optional.ofNullable(glueStsEndpointUrl); - return this; - } - - public Optional getGlueProxyApiId() - { - return glueProxyApiId; - } - - @Config("hive.metastore.glue.proxy-api-id") - @ConfigDescription("ID of Glue Proxy API") - public GlueHiveMetastoreConfig setGlueProxyApiId(String glueProxyApiId) - { - this.glueProxyApiId = Optional.ofNullable(glueProxyApiId); - return this; - } - - public boolean getPinGlueClientToCurrentRegion() - { - return pinGlueClientToCurrentRegion; - } - - @Config("hive.metastore.glue.pin-client-to-current-region") - @ConfigDescription("Should the Glue client be pinned to the current EC2 region") - public GlueHiveMetastoreConfig setPinGlueClientToCurrentRegion(boolean pinGlueClientToCurrentRegion) - { - this.pinGlueClientToCurrentRegion = pinGlueClientToCurrentRegion; - return this; - } - - @Min(1) - public int getMaxGlueConnections() - { - return maxGlueConnections; - } - - @Config("hive.metastore.glue.max-connections") - @ConfigDescription("Max number of concurrent connections to Glue") - public GlueHiveMetastoreConfig setMaxGlueConnections(int maxGlueConnections) - { - this.maxGlueConnections = maxGlueConnections; - return this; - } - - @Min(0) - public int getMaxGlueErrorRetries() - { - return maxGlueErrorRetries; - } - - @Config("hive.metastore.glue.max-error-retries") - @ConfigDescription("Maximum number of error retries for the Glue client") - public GlueHiveMetastoreConfig setMaxGlueErrorRetries(int maxGlueErrorRetries) - { - this.maxGlueErrorRetries = maxGlueErrorRetries; - return this; - } - - public Optional getDefaultWarehouseDir() - { - return defaultWarehouseDir; - } - - @Config("hive.metastore.glue.default-warehouse-dir") - @ConfigDescription("Hive Glue metastore default warehouse directory") - public GlueHiveMetastoreConfig setDefaultWarehouseDir(String defaultWarehouseDir) - { - this.defaultWarehouseDir = Optional.ofNullable(defaultWarehouseDir); - return this; - } - - public Optional getIamRole() - { - return iamRole; - } - - @Config("hive.metastore.glue.iam-role") - @ConfigDescription("ARN of an IAM role to assume when connecting to Glue") - public GlueHiveMetastoreConfig setIamRole(String iamRole) - { - this.iamRole = Optional.ofNullable(iamRole); - return this; - } - - public Optional getExternalId() - { - return externalId; - } - - @Config("hive.metastore.glue.external-id") - @ConfigDescription("External ID for the IAM role trust policy when connecting to Glue") - public GlueHiveMetastoreConfig setExternalId(String externalId) - { - this.externalId = Optional.ofNullable(externalId); - return this; - } - - public Optional getAwsAccessKey() - { - return awsAccessKey; - } - - @Config("hive.metastore.glue.aws-access-key") - @ConfigDescription("Hive Glue metastore AWS access key") - public GlueHiveMetastoreConfig setAwsAccessKey(String awsAccessKey) - { - this.awsAccessKey = Optional.ofNullable(awsAccessKey); - return this; - } - - public Optional getAwsSecretKey() - { - return awsSecretKey; - } - - @Config("hive.metastore.glue.aws-secret-key") - @ConfigDescription("Hive Glue metastore AWS secret key") - @ConfigSecuritySensitive - public GlueHiveMetastoreConfig setAwsSecretKey(String awsSecretKey) - { - this.awsSecretKey = Optional.ofNullable(awsSecretKey); - return this; - } - - public Optional getCatalogId() - { - return catalogId; - } - - @Config("hive.metastore.glue.catalogid") - @ConfigDescription("Hive Glue metastore catalog id") - public GlueHiveMetastoreConfig setCatalogId(String catalogId) - { - this.catalogId = Optional.ofNullable(catalogId); - return this; - } - - public Optional getAwsCredentialsProvider() - { - return awsCredentialsProvider; - } - - @Config("hive.metastore.glue.aws-credentials-provider") - @ConfigDescription("Fully qualified name of the Java class to use for obtaining AWS credentials") - public GlueHiveMetastoreConfig setAwsCredentialsProvider(String awsCredentialsProvider) - { - this.awsCredentialsProvider = Optional.ofNullable(awsCredentialsProvider); - return this; - } - - @Min(1) - @Max(10) - public int getPartitionSegments() - { - return partitionSegments; - } - - @Config("hive.metastore.glue.partitions-segments") - @ConfigDescription("Number of segments for partitioned Glue tables") - public GlueHiveMetastoreConfig setPartitionSegments(int partitionSegments) - { - this.partitionSegments = partitionSegments; - return this; - } - - @Min(1) - public int getGetPartitionThreads() - { - return getPartitionThreads; - } - - @Config("hive.metastore.glue.get-partition-threads") - @ConfigDescription("Number of threads for parallel partition fetches from Glue") - public GlueHiveMetastoreConfig setGetPartitionThreads(int getPartitionThreads) - { - this.getPartitionThreads = getPartitionThreads; - return this; - } - - @Min(1) - public int getReadStatisticsThreads() - { - return readStatisticsThreads; - } - - @Config("hive.metastore.glue.read-statistics-threads") - @ConfigDescription("Number of threads for parallel statistics reads from Glue") - public GlueHiveMetastoreConfig setReadStatisticsThreads(int getReadStatisticsThreads) - { - this.readStatisticsThreads = getReadStatisticsThreads; - return this; - } - - @Min(1) - public int getWriteStatisticsThreads() - { - return writeStatisticsThreads; - } - - @Config("hive.metastore.glue.write-statistics-threads") - @ConfigDescription("Number of threads for parallel statistics writes to Glue") - public GlueHiveMetastoreConfig setWriteStatisticsThreads(int writeStatisticsThreads) - { - this.writeStatisticsThreads = writeStatisticsThreads; - return this; - } - - public boolean isAssumeCanonicalPartitionKeys() - { - return assumeCanonicalPartitionKeys; - } - - @Config("hive.metastore.glue.assume-canonical-partition-keys") - @ConfigDescription("Allow conversion of non-char types (eg BIGINT, timestamp) to canonical string formats") - public GlueHiveMetastoreConfig setAssumeCanonicalPartitionKeys(boolean assumeCanonicalPartitionKeys) - { - this.assumeCanonicalPartitionKeys = assumeCanonicalPartitionKeys; - return this; - } - - public boolean isSkipArchive() - { - return skipArchive; - } - - @Config("hive.metastore.glue.skip-archive") - @LegacyConfig("iceberg.glue.skip-archive") - @ConfigDescription("Skip archiving an old table version when updating a table in the Glue metastore") - public GlueHiveMetastoreConfig setSkipArchive(boolean skipArchive) - { - this.skipArchive = skipArchive; - return this; - } - - @AssertTrue(message = "Both hive.metastore.glue.region and hive.metastore.glue.endpoint-url must be provided when Glue proxy API ID is present") - public boolean isGlueProxyApiIdValid() - { - if (getGlueProxyApiId().isPresent()) { - return getGlueRegion().isPresent() && getGlueEndpointUrl().isPresent(); - } - return true; - } -} diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/GlueHiveMetastoreFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/GlueHiveMetastoreFactory.java deleted file mode 100644 index 8d6412b08f5f..000000000000 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/GlueHiveMetastoreFactory.java +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.metastore.glue.v1; - -import com.google.inject.Inject; -import io.opentelemetry.api.trace.Tracer; -import io.trino.metastore.HiveMetastore; -import io.trino.metastore.HiveMetastoreFactory; -import io.trino.metastore.tracing.TracingHiveMetastore; -import io.trino.spi.security.ConnectorIdentity; - -import java.util.Optional; - -public class GlueHiveMetastoreFactory - implements HiveMetastoreFactory -{ - private final HiveMetastore metastore; - - // Glue metastore does not support impersonation, so just use single shared instance - @Inject - public GlueHiveMetastoreFactory(GlueHiveMetastore metastore, Tracer tracer) - { - this.metastore = new TracingHiveMetastore(tracer, metastore); - } - - @Override - public boolean isImpersonationEnabled() - { - return false; - } - - @Override - public HiveMetastore createMetastore(Optional identity) - { - return metastore; - } -} diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/GlueInputConverter.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/GlueInputConverter.java deleted file mode 100644 index 9d16be3760ce..000000000000 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/GlueInputConverter.java +++ /dev/null @@ -1,183 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.metastore.glue.v1; - -import com.amazonaws.services.glue.model.DatabaseInput; -import com.amazonaws.services.glue.model.Order; -import com.amazonaws.services.glue.model.PartitionInput; -import com.amazonaws.services.glue.model.PrincipalType; -import com.amazonaws.services.glue.model.ResourceType; -import com.amazonaws.services.glue.model.ResourceUri; -import com.amazonaws.services.glue.model.SerDeInfo; -import com.amazonaws.services.glue.model.StorageDescriptor; -import com.amazonaws.services.glue.model.TableInput; -import com.amazonaws.services.glue.model.UserDefinedFunctionInput; -import com.google.common.collect.ImmutableMap; -import io.airlift.json.JsonCodec; -import io.trino.metastore.Column; -import io.trino.metastore.Database; -import io.trino.metastore.HiveBucketProperty; -import io.trino.metastore.Partition; -import io.trino.metastore.PartitionStatistics; -import io.trino.metastore.PartitionWithStatistics; -import io.trino.metastore.Storage; -import io.trino.metastore.Table; -import io.trino.spi.function.LanguageFunction; - -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Optional; - -import static com.google.common.collect.ImmutableList.toImmutableList; -import static com.google.common.collect.ImmutableMap.toImmutableMap; -import static io.trino.metastore.Table.TABLE_COMMENT; -import static io.trino.plugin.hive.ViewReaderUtil.isTrinoMaterializedView; -import static io.trino.plugin.hive.ViewReaderUtil.isTrinoView; -import static io.trino.plugin.hive.metastore.MetastoreUtil.metastoreFunctionName; -import static io.trino.plugin.hive.metastore.MetastoreUtil.toResourceUris; -import static io.trino.plugin.hive.metastore.MetastoreUtil.updateStatisticsParameters; -import static io.trino.plugin.hive.metastore.glue.v1.GlueToTrinoConverter.getStorageDescriptor; -import static io.trino.plugin.hive.metastore.glue.v1.GlueToTrinoConverter.getTableParameters; -import static io.trino.plugin.hive.metastore.glue.v1.GlueToTrinoConverter.getTableTypeNullable; - -public final class GlueInputConverter -{ - static final JsonCodec LANGUAGE_FUNCTION_CODEC = JsonCodec.jsonCodec(LanguageFunction.class); - - private GlueInputConverter() {} - - public static DatabaseInput convertDatabase(Database database) - { - DatabaseInput input = new DatabaseInput(); - input.setName(database.getDatabaseName()); - input.setParameters(database.getParameters()); - database.getComment().ifPresent(input::setDescription); - database.getLocation().ifPresent(input::setLocationUri); - return input; - } - - public static TableInput convertTable(Table table) - { - Map tableParameters = table.getParameters(); - Optional comment = Optional.empty(); - if (!isTrinoView(table) && !isTrinoMaterializedView(table)) { - comment = Optional.ofNullable(tableParameters.get(TABLE_COMMENT)); - tableParameters = tableParameters.entrySet().stream() - .filter(entry -> !entry.getKey().equals(TABLE_COMMENT)) - .collect(toImmutableMap(Entry::getKey, Entry::getValue)); - } - - TableInput input = new TableInput(); - input.setName(table.getTableName()); - input.setOwner(table.getOwner().orElse(null)); - input.setTableType(table.getTableType()); - input.setStorageDescriptor(convertStorage(table.getStorage(), table.getDataColumns())); - input.setPartitionKeys(table.getPartitionColumns().stream().map(GlueInputConverter::convertColumn).collect(toImmutableList())); - input.setParameters(tableParameters); - table.getViewOriginalText().ifPresent(input::setViewOriginalText); - table.getViewExpandedText().ifPresent(input::setViewExpandedText); - comment.ifPresent(input::setDescription); - return input; - } - - public static TableInput convertGlueTableToTableInput(com.amazonaws.services.glue.model.Table glueTable) - { - return new TableInput() - .withName(glueTable.getName()) - .withDescription(glueTable.getDescription()) - .withOwner(glueTable.getOwner()) - .withLastAccessTime(glueTable.getLastAccessTime()) - .withLastAnalyzedTime(glueTable.getLastAnalyzedTime()) - .withRetention(glueTable.getRetention()) - .withStorageDescriptor(getStorageDescriptor(glueTable).orElse(null)) - .withPartitionKeys(glueTable.getPartitionKeys()) - .withViewOriginalText(glueTable.getViewOriginalText()) - .withViewExpandedText(glueTable.getViewExpandedText()) - .withTableType(getTableTypeNullable(glueTable)) - .withTargetTable(glueTable.getTargetTable()) - .withParameters(getTableParameters(glueTable)); - } - - public static PartitionInput convertPartition(PartitionWithStatistics partitionWithStatistics) - { - PartitionInput input = convertPartition(partitionWithStatistics.getPartition()); - PartitionStatistics statistics = partitionWithStatistics.getStatistics(); - input.setParameters(updateStatisticsParameters(input.getParameters(), statistics.basicStatistics())); - return input; - } - - public static PartitionInput convertPartition(Partition partition) - { - PartitionInput input = new PartitionInput(); - input.setValues(partition.getValues()); - input.setStorageDescriptor(convertStorage(partition.getStorage(), partition.getColumns())); - input.setParameters(partition.getParameters()); - return input; - } - - private static StorageDescriptor convertStorage(Storage storage, List columns) - { - if (storage.isSkewed()) { - throw new IllegalArgumentException("Writing to skewed table/partition is not supported"); - } - SerDeInfo serdeInfo = new SerDeInfo() - .withSerializationLibrary(storage.getStorageFormat().getSerDeNullable()) - .withParameters(storage.getSerdeParameters()); - - StorageDescriptor sd = new StorageDescriptor(); - sd.setLocation(storage.getLocation()); - sd.setColumns(columns.stream().map(GlueInputConverter::convertColumn).collect(toImmutableList())); - sd.setSerdeInfo(serdeInfo); - sd.setInputFormat(storage.getStorageFormat().getInputFormatNullable()); - sd.setOutputFormat(storage.getStorageFormat().getOutputFormatNullable()); - sd.setParameters(ImmutableMap.of()); - - Optional bucketProperty = storage.getBucketProperty(); - if (bucketProperty.isPresent()) { - sd.setNumberOfBuckets(bucketProperty.get().bucketCount()); - sd.setBucketColumns(bucketProperty.get().bucketedBy()); - if (!bucketProperty.get().sortedBy().isEmpty()) { - sd.setSortColumns(bucketProperty.get().sortedBy().stream() - .map(column -> new Order().withColumn(column.columnName()).withSortOrder(column.order().getHiveOrder())) - .collect(toImmutableList())); - } - } - - return sd; - } - - private static com.amazonaws.services.glue.model.Column convertColumn(Column trinoColumn) - { - return new com.amazonaws.services.glue.model.Column() - .withName(trinoColumn.getName()) - .withType(trinoColumn.getType().toString()) - .withComment(trinoColumn.getComment().orElse(null)) - .withParameters(trinoColumn.getProperties()); - } - - public static UserDefinedFunctionInput convertFunction(String functionName, LanguageFunction function) - { - return new UserDefinedFunctionInput() - .withFunctionName(metastoreFunctionName(functionName, function.signatureToken())) - .withClassName("TrinoFunction") - .withOwnerType(PrincipalType.USER) - .withOwnerName(function.owner().orElse(null)) - .withResourceUris(toResourceUris(LANGUAGE_FUNCTION_CODEC.toJsonBytes(function)).stream() - .map(uri -> new ResourceUri() - .withResourceType(ResourceType.FILE) - .withUri(uri.getUri())) - .toList()); - } -} diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/GlueMetastoreModule.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/GlueMetastoreModule.java deleted file mode 100644 index 1d1bbf384cf3..000000000000 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/GlueMetastoreModule.java +++ /dev/null @@ -1,158 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.metastore.glue.v1; - -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.handlers.RequestHandler2; -import com.amazonaws.services.glue.AWSGlueAsync; -import com.amazonaws.services.glue.model.Table; -import com.google.inject.Binder; -import com.google.inject.Inject; -import com.google.inject.Injector; -import com.google.inject.Key; -import com.google.inject.Provider; -import com.google.inject.Scopes; -import com.google.inject.Singleton; -import com.google.inject.TypeLiteral; -import com.google.inject.multibindings.Multibinder; -import com.google.inject.multibindings.ProvidesIntoSet; -import io.airlift.concurrent.BoundedExecutor; -import io.airlift.configuration.AbstractConfigurationAwareModule; -import io.opentelemetry.api.OpenTelemetry; -import io.opentelemetry.instrumentation.awssdk.v1_11.AwsSdkTelemetry; -import io.trino.metastore.HiveMetastoreFactory; -import io.trino.metastore.RawHiveMetastoreFactory; -import io.trino.plugin.hive.AllowHiveTableRename; -import io.trino.plugin.hive.metastore.glue.GlueMetastoreStats; - -import java.lang.annotation.Annotation; -import java.util.concurrent.Executor; -import java.util.concurrent.ExecutorService; -import java.util.function.Function; -import java.util.function.Predicate; - -import static com.google.common.base.Preconditions.checkArgument; -import static com.google.common.util.concurrent.MoreExecutors.directExecutor; -import static com.google.inject.multibindings.Multibinder.newSetBinder; -import static com.google.inject.multibindings.OptionalBinder.newOptionalBinder; -import static io.airlift.concurrent.Threads.daemonThreadsNamed; -import static io.airlift.configuration.ConditionalModule.conditionalModule; -import static io.trino.plugin.base.ClosingBinder.closingBinder; -import static java.util.concurrent.Executors.newCachedThreadPool; -import static org.weakref.jmx.guice.ExportBinder.newExporter; - -public class GlueMetastoreModule - extends AbstractConfigurationAwareModule -{ - @Override - protected void setup(Binder binder) - { - GlueHiveMetastoreConfig glueConfig = buildConfigObject(GlueHiveMetastoreConfig.class); - Multibinder requestHandlers = newSetBinder(binder, RequestHandler2.class, ForGlueHiveMetastore.class); - glueConfig.getCatalogId().ifPresent(catalogId -> requestHandlers.addBinding().toInstance(new GlueCatalogIdRequestHandler(catalogId))); - glueConfig.getGlueProxyApiId().ifPresent(glueProxyApiId -> requestHandlers.addBinding() - .toInstance(new ProxyApiRequestHandler(glueProxyApiId))); - binder.bind(AWSCredentialsProvider.class).toProvider(GlueCredentialsProvider.class).in(Scopes.SINGLETON); - - newOptionalBinder(binder, Key.get(new TypeLiteral>() {}, ForGlueHiveMetastore.class)) - .setDefault().toProvider(DefaultGlueMetastoreTableFilterProvider.class).in(Scopes.SINGLETON); - - binder.bind(GlueHiveMetastore.class).in(Scopes.SINGLETON); - newOptionalBinder(binder, Key.get(HiveMetastoreFactory.class, RawHiveMetastoreFactory.class)) - .setDefault() - .to(GlueHiveMetastoreFactory.class) - .in(Scopes.SINGLETON); - - // export under the old name, for backwards compatibility - binder.bind(GlueHiveMetastoreFactory.class).in(Scopes.SINGLETON); - binder.bind(Key.get(GlueMetastoreStats.class, ForGlueHiveMetastore.class)).toInstance(new GlueMetastoreStats()); - binder.bind(AWSGlueAsync.class).toProvider(HiveGlueClientProvider.class).in(Scopes.SINGLETON); - closingBinder(binder).registerResource(AWSGlueAsync.class, AWSGlueAsync::shutdown); - newExporter(binder).export(GlueHiveMetastore.class).withGeneratedName(); - - binder.bind(Key.get(boolean.class, AllowHiveTableRename.class)).toInstance(false); - - newOptionalBinder(binder, GlueColumnStatisticsProviderFactory.class) - .setDefault().to(DefaultGlueColumnStatisticsProviderFactory.class).in(Scopes.SINGLETON); - - createExecutor(ForGlueHiveMetastore.class, "hive-glue-partitions-%s", GlueHiveMetastoreConfig::getGetPartitionThreads); - createExecutor(ForGlueColumnStatisticsRead.class, "hive-glue-statistics-read-%s", GlueHiveMetastoreConfig::getReadStatisticsThreads); - createExecutor(ForGlueColumnStatisticsWrite.class, "hive-glue-statistics-write-%s", GlueHiveMetastoreConfig::getWriteStatisticsThreads); - } - - @ProvidesIntoSet - @Singleton - @ForGlueHiveMetastore - public RequestHandler2 createSkipArchiveRequestHandler(GlueHiveMetastoreConfig config) - { - if (!config.isSkipArchive()) { - return new RequestHandler2() {}; - } - return new SkipArchiveRequestHandler(); - } - - @ProvidesIntoSet - @Singleton - @ForGlueHiveMetastore - public RequestHandler2 createTelemetryRequestHandler(OpenTelemetry openTelemetry) - { - return AwsSdkTelemetry.builder(openTelemetry) - .setCaptureExperimentalSpanAttributes(true) - .build() - .newRequestHandler(); - } - - private void createExecutor(Class annotationClass, String nameTemplate, Function threads) - { - install(conditionalModule( - GlueHiveMetastoreConfig.class, - config -> threads.apply(config) > 1, - binder -> { - binder.bind(Key.get(ExecutorService.class, annotationClass)).toInstance(newCachedThreadPool(daemonThreadsNamed(nameTemplate))); - binder.bind(Key.get(Executor.class, annotationClass)).toProvider(new BoundedExecutorProvider(annotationClass, threads)).in(Scopes.SINGLETON); - closingBinder(binder).registerExecutor(Key.get(ExecutorService.class, annotationClass)); - }, - binder -> binder.bind(Key.get(Executor.class, annotationClass)) - .toInstance(directExecutor()))); - } - - private static class BoundedExecutorProvider - implements Provider - { - private final Class annotationClass; - private final Function threads; - private Injector injector; - - public BoundedExecutorProvider(Class annotationClass, Function threads) - { - this.annotationClass = annotationClass; - this.threads = threads; - } - - @Inject - public void setInjector(Injector injector) - { - this.injector = injector; - } - - @Override - public BoundedExecutor get() - { - ExecutorService executor = injector.getInstance(Key.get(ExecutorService.class, annotationClass)); - int threads = this.threads.apply(injector.getInstance(GlueHiveMetastoreConfig.class)); - checkArgument(threads > 0, "Invalid number of threads: %s", threads); - return new BoundedExecutor(executor, threads); - } - } -} diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/GlueStatConverter.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/GlueStatConverter.java deleted file mode 100644 index fc71676c5a36..000000000000 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/GlueStatConverter.java +++ /dev/null @@ -1,291 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.metastore.glue.v1; - -import com.amazonaws.services.glue.model.BinaryColumnStatisticsData; -import com.amazonaws.services.glue.model.BooleanColumnStatisticsData; -import com.amazonaws.services.glue.model.ColumnStatistics; -import com.amazonaws.services.glue.model.ColumnStatisticsData; -import com.amazonaws.services.glue.model.ColumnStatisticsType; -import com.amazonaws.services.glue.model.DateColumnStatisticsData; -import com.amazonaws.services.glue.model.DecimalColumnStatisticsData; -import com.amazonaws.services.glue.model.DecimalNumber; -import com.amazonaws.services.glue.model.DoubleColumnStatisticsData; -import com.amazonaws.services.glue.model.LongColumnStatisticsData; -import com.amazonaws.services.glue.model.StringColumnStatisticsData; -import io.trino.hive.thrift.metastore.Decimal; -import io.trino.metastore.Column; -import io.trino.metastore.HiveColumnStatistics; -import io.trino.metastore.HiveType; -import io.trino.metastore.Partition; -import io.trino.metastore.Table; -import io.trino.metastore.type.PrimitiveTypeInfo; -import io.trino.metastore.type.TypeInfo; -import io.trino.spi.TrinoException; - -import java.math.BigDecimal; -import java.math.BigInteger; -import java.nio.ByteBuffer; -import java.time.LocalDate; -import java.util.Date; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.OptionalDouble; -import java.util.OptionalLong; -import java.util.concurrent.TimeUnit; - -import static com.google.common.base.Preconditions.checkArgument; -import static com.google.common.collect.ImmutableList.toImmutableList; -import static io.trino.metastore.HiveColumnStatistics.createBinaryColumnStatistics; -import static io.trino.metastore.HiveColumnStatistics.createBooleanColumnStatistics; -import static io.trino.metastore.HiveColumnStatistics.createDateColumnStatistics; -import static io.trino.metastore.HiveColumnStatistics.createDecimalColumnStatistics; -import static io.trino.metastore.HiveColumnStatistics.createDoubleColumnStatistics; -import static io.trino.metastore.HiveColumnStatistics.createIntegerColumnStatistics; -import static io.trino.metastore.HiveColumnStatistics.createStringColumnStatistics; -import static io.trino.metastore.type.Category.PRIMITIVE; -import static io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_METADATA; -import static io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.fromMetastoreNullsCount; - -public class GlueStatConverter -{ - private GlueStatConverter() {} - - private static final long MILLIS_PER_DAY = TimeUnit.DAYS.toMillis(1); - - public static List toGlueColumnStatistics(Partition partition, Map trinoColumnStats) - { - return partition.getColumns().stream() - .filter(column -> trinoColumnStats.containsKey(column.getName())) - .map(c -> toColumnStatistics(c, trinoColumnStats.get(c.getName()))) - .collect(toImmutableList()); - } - - public static List toGlueColumnStatistics(Table table, Map trinoColumnStats) - { - return trinoColumnStats.entrySet().stream() - .map(e -> toColumnStatistics(table.getColumn(e.getKey()).get(), e.getValue())) - .collect(toImmutableList()); - } - - private static ColumnStatistics toColumnStatistics(Column column, HiveColumnStatistics statistics) - { - ColumnStatistics columnStatistics = new ColumnStatistics(); - HiveType columnType = column.getType(); - columnStatistics.setColumnName(column.getName()); - columnStatistics.setColumnType(columnType.toString()); - ColumnStatisticsData catalogColumnStatisticsData = toGlueColumnStatisticsData(statistics, columnType); - columnStatistics.setStatisticsData(catalogColumnStatisticsData); - columnStatistics.setAnalyzedTime(new Date()); - return columnStatistics; - } - - public static HiveColumnStatistics fromGlueColumnStatistics(ColumnStatisticsData catalogColumnStatisticsData) - { - ColumnStatisticsType type = ColumnStatisticsType.fromValue(catalogColumnStatisticsData.getType()); - switch (type) { - case BINARY: { - BinaryColumnStatisticsData data = catalogColumnStatisticsData.getBinaryColumnStatisticsData(); - OptionalLong max = OptionalLong.of(data.getMaximumLength()); - OptionalDouble avg = OptionalDouble.of(data.getAverageLength()); - OptionalLong nulls = fromMetastoreNullsCount(data.getNumberOfNulls()); - return createBinaryColumnStatistics(max, avg, nulls); - } - case BOOLEAN: { - BooleanColumnStatisticsData catalogBooleanData = catalogColumnStatisticsData.getBooleanColumnStatisticsData(); - return createBooleanColumnStatistics( - OptionalLong.of(catalogBooleanData.getNumberOfTrues()), - OptionalLong.of(catalogBooleanData.getNumberOfFalses()), - fromMetastoreNullsCount(catalogBooleanData.getNumberOfNulls())); - } - case DATE: { - DateColumnStatisticsData data = catalogColumnStatisticsData.getDateColumnStatisticsData(); - Optional min = dateToLocalDate(data.getMinimumValue()); - Optional max = dateToLocalDate(data.getMaximumValue()); - OptionalLong nullsCount = fromMetastoreNullsCount(data.getNumberOfNulls()); - OptionalLong distinctValuesWithNullCount = OptionalLong.of(data.getNumberOfDistinctValues()); - return createDateColumnStatistics(min, max, nullsCount, distinctValuesWithNullCount); - } - case DECIMAL: { - DecimalColumnStatisticsData data = catalogColumnStatisticsData.getDecimalColumnStatisticsData(); - Optional min = glueDecimalToBigDecimal(data.getMinimumValue()); - Optional max = glueDecimalToBigDecimal(data.getMaximumValue()); - OptionalLong distinctValuesWithNullCount = OptionalLong.of(data.getNumberOfDistinctValues()); - OptionalLong nullsCount = fromMetastoreNullsCount(data.getNumberOfNulls()); - return createDecimalColumnStatistics(min, max, nullsCount, distinctValuesWithNullCount); - } - case DOUBLE: { - DoubleColumnStatisticsData data = catalogColumnStatisticsData.getDoubleColumnStatisticsData(); - OptionalDouble min = OptionalDouble.of(data.getMinimumValue()); - OptionalDouble max = OptionalDouble.of(data.getMaximumValue()); - OptionalLong nulls = fromMetastoreNullsCount(data.getNumberOfNulls()); - OptionalLong distinctValuesWithNullCount = OptionalLong.of(data.getNumberOfDistinctValues()); - return createDoubleColumnStatistics(min, max, nulls, distinctValuesWithNullCount); - } - case LONG: { - LongColumnStatisticsData data = catalogColumnStatisticsData.getLongColumnStatisticsData(); - OptionalLong min = OptionalLong.of(data.getMinimumValue()); - OptionalLong max = OptionalLong.of(data.getMaximumValue()); - OptionalLong nullsCount = fromMetastoreNullsCount(data.getNumberOfNulls()); - OptionalLong distinctValuesWithNullCount = OptionalLong.of(data.getNumberOfDistinctValues()); - return createIntegerColumnStatistics(min, max, nullsCount, distinctValuesWithNullCount); - } - case STRING: { - StringColumnStatisticsData data = catalogColumnStatisticsData.getStringColumnStatisticsData(); - OptionalLong max = OptionalLong.of(data.getMaximumLength()); - OptionalDouble avg = OptionalDouble.of(data.getAverageLength()); - OptionalLong nullsCount = fromMetastoreNullsCount(data.getNumberOfNulls()); - OptionalLong distinctValuesWithNullCount = OptionalLong.of(data.getNumberOfDistinctValues()); - return createStringColumnStatistics(max, avg, nullsCount, distinctValuesWithNullCount); - } - } - - throw new TrinoException(HIVE_INVALID_METADATA, "Invalid column statistics data: " + catalogColumnStatisticsData); - } - - private static ColumnStatisticsData toGlueColumnStatisticsData(HiveColumnStatistics statistics, HiveType columnType) - { - TypeInfo typeInfo = columnType.getTypeInfo(); - checkArgument(typeInfo.getCategory() == PRIMITIVE, "Unsupported statistics type: %s", columnType); - - ColumnStatisticsData catalogColumnStatisticsData = new ColumnStatisticsData(); - - switch (((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory()) { - case BOOLEAN: { - BooleanColumnStatisticsData data = new BooleanColumnStatisticsData(); - statistics.getNullsCount().ifPresent(data::setNumberOfNulls); - statistics.getBooleanStatistics().ifPresent(booleanStatistics -> { - booleanStatistics.getFalseCount().ifPresent(data::setNumberOfFalses); - booleanStatistics.getTrueCount().ifPresent(data::setNumberOfTrues); - }); - catalogColumnStatisticsData.setType(ColumnStatisticsType.BOOLEAN.toString()); - catalogColumnStatisticsData.setBooleanColumnStatisticsData(data); - break; - } - case BINARY: { - BinaryColumnStatisticsData data = new BinaryColumnStatisticsData(); - statistics.getNullsCount().ifPresent(data::setNumberOfNulls); - data.setMaximumLength(statistics.getMaxValueSizeInBytes().orElse(0)); - data.setAverageLength(statistics.getAverageColumnLength().orElse(0)); - catalogColumnStatisticsData.setType(ColumnStatisticsType.BINARY.toString()); - catalogColumnStatisticsData.setBinaryColumnStatisticsData(data); - break; - } - case DATE: { - DateColumnStatisticsData data = new DateColumnStatisticsData(); - statistics.getDateStatistics().ifPresent(dateStatistics -> { - dateStatistics.getMin().ifPresent(value -> data.setMinimumValue(localDateToDate(value))); - dateStatistics.getMax().ifPresent(value -> data.setMaximumValue(localDateToDate(value))); - }); - statistics.getNullsCount().ifPresent(data::setNumberOfNulls); - statistics.getDistinctValuesWithNullCount().ifPresent(data::setNumberOfDistinctValues); - catalogColumnStatisticsData.setType(ColumnStatisticsType.DATE.toString()); - catalogColumnStatisticsData.setDateColumnStatisticsData(data); - break; - } - case DECIMAL: { - DecimalColumnStatisticsData data = new DecimalColumnStatisticsData(); - statistics.getDecimalStatistics().ifPresent(decimalStatistics -> { - decimalStatistics.getMin().ifPresent(value -> data.setMinimumValue(bigDecimalToGlueDecimal(value))); - decimalStatistics.getMax().ifPresent(value -> data.setMaximumValue(bigDecimalToGlueDecimal(value))); - }); - statistics.getNullsCount().ifPresent(data::setNumberOfNulls); - statistics.getDistinctValuesWithNullCount().ifPresent(data::setNumberOfDistinctValues); - catalogColumnStatisticsData.setType(ColumnStatisticsType.DECIMAL.toString()); - catalogColumnStatisticsData.setDecimalColumnStatisticsData(data); - break; - } - case FLOAT: - case DOUBLE: { - DoubleColumnStatisticsData data = new DoubleColumnStatisticsData(); - statistics.getDoubleStatistics().ifPresent(doubleStatistics -> { - doubleStatistics.getMin().ifPresent(data::setMinimumValue); - doubleStatistics.getMax().ifPresent(data::setMaximumValue); - }); - statistics.getNullsCount().ifPresent(data::setNumberOfNulls); - statistics.getDistinctValuesWithNullCount().ifPresent(data::setNumberOfDistinctValues); - catalogColumnStatisticsData.setType(ColumnStatisticsType.DOUBLE.toString()); - catalogColumnStatisticsData.setDoubleColumnStatisticsData(data); - break; - } - case BYTE: - case SHORT: - case INT: - case LONG: - case TIMESTAMP: { - LongColumnStatisticsData data = new LongColumnStatisticsData(); - statistics.getIntegerStatistics().ifPresent(stats -> { - stats.getMin().ifPresent(data::setMinimumValue); - stats.getMax().ifPresent(data::setMaximumValue); - }); - statistics.getNullsCount().ifPresent(data::setNumberOfNulls); - statistics.getDistinctValuesWithNullCount().ifPresent(data::setNumberOfDistinctValues); - catalogColumnStatisticsData.setType(ColumnStatisticsType.LONG.toString()); - catalogColumnStatisticsData.setLongColumnStatisticsData(data); - break; - } - case VARCHAR: - case CHAR: - case STRING: { - StringColumnStatisticsData data = new StringColumnStatisticsData(); - statistics.getNullsCount().ifPresent(data::setNumberOfNulls); - statistics.getDistinctValuesWithNullCount().ifPresent(data::setNumberOfDistinctValues); - data.setMaximumLength(statistics.getMaxValueSizeInBytes().orElse(0)); - data.setAverageLength(statistics.getAverageColumnLength().orElse(0)); - catalogColumnStatisticsData.setType(ColumnStatisticsType.STRING.toString()); - catalogColumnStatisticsData.setStringColumnStatisticsData(data); - break; - } - default: - throw new TrinoException(HIVE_INVALID_METADATA, "Invalid column statistics type: " + ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory()); - } - return catalogColumnStatisticsData; - } - - private static DecimalNumber bigDecimalToGlueDecimal(BigDecimal decimal) - { - Decimal hiveDecimal = new Decimal((short) decimal.scale(), ByteBuffer.wrap(decimal.unscaledValue().toByteArray())); - DecimalNumber catalogDecimal = new DecimalNumber(); - catalogDecimal.setUnscaledValue(ByteBuffer.wrap(hiveDecimal.getUnscaled())); - catalogDecimal.setScale((int) hiveDecimal.getScale()); - return catalogDecimal; - } - - private static Optional glueDecimalToBigDecimal(DecimalNumber catalogDecimal) - { - if (catalogDecimal == null) { - return Optional.empty(); - } - Decimal decimal = new Decimal(); - decimal.setUnscaled(catalogDecimal.getUnscaledValue()); - decimal.setScale(catalogDecimal.getScale().shortValue()); - return Optional.of(new BigDecimal(new BigInteger(decimal.getUnscaled()), decimal.getScale())); - } - - private static Optional dateToLocalDate(Date date) - { - if (date == null) { - return Optional.empty(); - } - long daysSinceEpoch = date.getTime() / MILLIS_PER_DAY; - return Optional.of(LocalDate.ofEpochDay(daysSinceEpoch)); - } - - private static Date localDateToDate(LocalDate date) - { - long millisecondsSinceEpoch = date.toEpochDay() * MILLIS_PER_DAY; - return new Date(millisecondsSinceEpoch); - } -} diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/GlueToTrinoConverter.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/GlueToTrinoConverter.java deleted file mode 100644 index 3ab9906c170c..000000000000 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/GlueToTrinoConverter.java +++ /dev/null @@ -1,372 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.metastore.glue.v1; - -import com.amazonaws.services.glue.model.SerDeInfo; -import com.amazonaws.services.glue.model.StorageDescriptor; -import com.amazonaws.services.glue.model.UserDefinedFunction; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import io.trino.hive.thrift.metastore.ResourceType; -import io.trino.hive.thrift.metastore.ResourceUri; -import io.trino.metastore.Column; -import io.trino.metastore.Database; -import io.trino.metastore.HiveBucketProperty; -import io.trino.metastore.HiveType; -import io.trino.metastore.Partition; -import io.trino.metastore.SortingColumn; -import io.trino.metastore.SortingColumn.Order; -import io.trino.metastore.Storage; -import io.trino.metastore.StorageFormat; -import io.trino.metastore.Table; -import io.trino.plugin.hive.HiveStorageFormat; -import io.trino.spi.TrinoException; -import io.trino.spi.connector.SchemaTableName; -import io.trino.spi.function.LanguageFunction; -import io.trino.spi.security.PrincipalType; -import jakarta.annotation.Nullable; -import org.gaul.modernizer_maven_annotations.SuppressModernizer; - -import java.util.List; -import java.util.Locale; -import java.util.Map; -import java.util.Optional; -import java.util.function.BiFunction; -import java.util.function.Function; -import java.util.function.UnaryOperator; - -import static com.google.common.base.MoreObjects.firstNonNull; -import static com.google.common.base.Strings.emptyToNull; -import static com.google.common.base.Strings.nullToEmpty; -import static io.trino.metastore.HiveType.HIVE_INT; -import static io.trino.metastore.Table.TABLE_COMMENT; -import static io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_METADATA; -import static io.trino.plugin.hive.HiveErrorCode.HIVE_UNSUPPORTED_FORMAT; -import static io.trino.plugin.hive.TableType.EXTERNAL_TABLE; -import static io.trino.plugin.hive.ViewReaderUtil.isTrinoMaterializedView; -import static io.trino.plugin.hive.metastore.glue.v1.Memoizers.memoizeLast; -import static io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.decodeFunction; -import static io.trino.plugin.hive.util.HiveUtil.isDeltaLakeTable; -import static io.trino.plugin.hive.util.HiveUtil.isIcebergTable; -import static java.lang.String.format; -import static java.util.Objects.requireNonNull; - -public final class GlueToTrinoConverter -{ - private static final String PUBLIC_OWNER = "PUBLIC"; - - private GlueToTrinoConverter() {} - - @SuppressModernizer // Usage of `Table.getStorageDescriptor` is not allowed. Only this method can call that. - public static Optional getStorageDescriptor(com.amazonaws.services.glue.model.Table glueTable) - { - return Optional.ofNullable(glueTable.getStorageDescriptor()); - } - - @SuppressModernizer // Usage of `Column.getParameters` is not allowed. Only this method can call that. - public static Map getColumnParameters(com.amazonaws.services.glue.model.Column glueColumn) - { - return firstNonNull(glueColumn.getParameters(), ImmutableMap.of()); - } - - public static String getTableType(com.amazonaws.services.glue.model.Table glueTable) - { - // Athena treats missing table type as EXTERNAL_TABLE. - return firstNonNull(getTableTypeNullable(glueTable), EXTERNAL_TABLE.name()); - } - - @Nullable - @SuppressModernizer // Usage of `Table.getTableType` is not allowed. Only this method can call that. - public static String getTableTypeNullable(com.amazonaws.services.glue.model.Table glueTable) - { - return glueTable.getTableType(); - } - - @SuppressModernizer // Usage of `Table.getParameters` is not allowed. Only this method can call that. - public static Map getTableParameters(com.amazonaws.services.glue.model.Table glueTable) - { - return firstNonNull(glueTable.getParameters(), ImmutableMap.of()); - } - - @SuppressModernizer // Usage of `Partition.getParameters` is not allowed. Only this method can call that. - public static Map getPartitionParameters(com.amazonaws.services.glue.model.Partition gluePartition) - { - return firstNonNull(gluePartition.getParameters(), ImmutableMap.of()); - } - - @SuppressModernizer // Usage of `SerDeInfo.getParameters` is not allowed. Only this method can call that. - public static Map getSerDeInfoParameters(com.amazonaws.services.glue.model.SerDeInfo glueSerDeInfo) - { - return firstNonNull(glueSerDeInfo.getParameters(), ImmutableMap.of()); - } - - public static Database convertDatabase(com.amazonaws.services.glue.model.Database glueDb) - { - return Database.builder() - .setDatabaseName(glueDb.getName()) - // Currently it's not possible to create a Glue database with empty location string "" - // (validation error detected: Value '' at 'database.locationUri' failed to satisfy constraint: Member must have length greater than or equal to 1) - // However, it has been observed that Glue databases with empty location do exist in the wild. - .setLocation(Optional.ofNullable(emptyToNull(glueDb.getLocationUri()))) - .setComment(Optional.ofNullable(glueDb.getDescription())) - .setParameters(firstNonNull(glueDb.getParameters(), ImmutableMap.of())) - .setOwnerName(Optional.of(PUBLIC_OWNER)) - .setOwnerType(Optional.of(PrincipalType.ROLE)) - .build(); - } - - public static Table convertTable(com.amazonaws.services.glue.model.Table glueTable, String dbName) - { - SchemaTableName table = new SchemaTableName(dbName, glueTable.getName()); - - String tableType = getTableType(glueTable); - - ImmutableMap.Builder parameters = ImmutableMap.builder(); - Optional description = Optional.ofNullable(glueTable.getDescription()); - description.ifPresent(comment -> parameters.put(TABLE_COMMENT, comment)); - getTableParameters(glueTable).entrySet().stream() - // If the description was set we may have two "comment"s, prefer the description field - .filter(entry -> description.isEmpty() || !entry.getKey().equals(TABLE_COMMENT)) - .forEach(parameters::put); - Map tableParameters = parameters.buildOrThrow(); - - Table.Builder tableBuilder = Table.builder() - .setDatabaseName(table.getSchemaName()) - .setTableName(table.getTableName()) - .setOwner(Optional.ofNullable(glueTable.getOwner())) - .setTableType(tableType) - .setParameters(tableParameters) - .setViewOriginalText(Optional.ofNullable(glueTable.getViewOriginalText())) - .setViewExpandedText(Optional.ofNullable(glueTable.getViewExpandedText())); - - Optional storageDescriptor = getStorageDescriptor(glueTable); - - if (isIcebergTable(tableParameters) || - (storageDescriptor.isEmpty() && isTrinoMaterializedView(tableType, tableParameters))) { - // Iceberg tables do not need to read the StorageDescriptor field, but we still need to return dummy properties for compatibility - // Materialized views do not need to read the StorageDescriptor, but we still need to return dummy properties for compatibility - tableBuilder.setDataColumns(ImmutableList.of(new Column("dummy", HIVE_INT, Optional.empty(), ImmutableMap.of()))); - tableBuilder.getStorageBuilder().setStorageFormat(HiveStorageFormat.PARQUET.toStorageFormat()); - } - else if (isDeltaLakeTable(tableParameters)) { - tableBuilder.setDataColumns(ImmutableList.of(new Column("dummy", HIVE_INT, Optional.empty(), ImmutableMap.of()))); - tableBuilder.setPartitionColumns(ImmutableList.of()); - if (storageDescriptor.isEmpty()) { - tableBuilder.getStorageBuilder().setStorageFormat(HiveStorageFormat.PARQUET.toStorageFormat()); - } - else { - StorageDescriptor sd = storageDescriptor.get(); - if (sd.getSerdeInfo() == null) { - throw new TrinoException(HIVE_UNSUPPORTED_FORMAT, "Table SerdeInfo is null for table '%s' %s".formatted(table, glueTable)); - } - new StorageConverter().setStorageBuilder(sd, tableBuilder.getStorageBuilder()); - } - } - else { - if (storageDescriptor.isEmpty()) { - throw new TrinoException(HIVE_UNSUPPORTED_FORMAT, "Table StorageDescriptor is null for table '%s' %s".formatted(table, glueTable)); - } - StorageDescriptor sd = storageDescriptor.get(); - if (sd.getSerdeInfo() == null) { - throw new TrinoException(HIVE_UNSUPPORTED_FORMAT, "Table SerdeInfo is null for table '%s' %s".formatted(table, glueTable)); - } - boolean isCsv = HiveStorageFormat.CSV.getSerde().equals(sd.getSerdeInfo().getSerializationLibrary()); - tableBuilder.setDataColumns(convertColumns(table, sd.getColumns(), ColumnType.DATA, isCsv)); - if (glueTable.getPartitionKeys() != null) { - tableBuilder.setPartitionColumns(convertColumns(table, glueTable.getPartitionKeys(), ColumnType.PARTITION, isCsv)); - } - else { - tableBuilder.setPartitionColumns(ImmutableList.of()); - } - // No benefit to memoizing here, just reusing the implementation - new StorageConverter().setStorageBuilder(sd, tableBuilder.getStorageBuilder()); - } - - return tableBuilder.build(); - } - - private static Column convertColumn(SchemaTableName table, com.amazonaws.services.glue.model.Column glueColumn, ColumnType columnType, boolean isCsv) - { - // OpenCSVSerde deserializes columns from csv file into strings, so we set the column type from the metastore - // to string to avoid cast exceptions. - if (columnType == ColumnType.DATA && isCsv) { - //TODO(https://github.com/trinodb/trino/issues/7240) Add tests - return new Column(glueColumn.getName(), HiveType.HIVE_STRING, Optional.ofNullable(glueColumn.getComment()), getColumnParameters(glueColumn)); - } - return new Column(glueColumn.getName(), convertType(table, glueColumn), Optional.ofNullable(glueColumn.getComment()), getColumnParameters(glueColumn)); - } - - private static HiveType convertType(SchemaTableName table, com.amazonaws.services.glue.model.Column column) - { - try { - return HiveType.valueOf(column.getType().toLowerCase(Locale.ENGLISH)); - } - catch (IllegalArgumentException e) { - throw new TrinoException(HIVE_INVALID_METADATA, "Glue table '%s' column '%s' has invalid data type: %s".formatted(table, column.getName(), column.getType()), e); - } - } - - private static List convertColumns(SchemaTableName table, List glueColumns, ColumnType columnType, boolean isCsv) - { - return mappedCopy(glueColumns, glueColumn -> convertColumn(table, glueColumn, columnType, isCsv)); - } - - private static Function, Map> parametersConverter() - { - return memoizeLast(ImmutableMap::copyOf); - } - - private static boolean isNullOrEmpty(List list) - { - return list == null || list.isEmpty(); - } - - public static final class GluePartitionConverter - implements Function - { - private final BiFunction, Boolean, List> dataColumnsConverter; - private final Function, Map> parametersConverter = parametersConverter(); - private final StorageConverter storageConverter = new StorageConverter(); - private final String databaseName; - private final String tableName; - - public GluePartitionConverter(String databaseName, String tableName) - { - this.databaseName = requireNonNull(databaseName, "databaseName is null"); - this.tableName = requireNonNull(tableName, "tableName is null"); - this.dataColumnsConverter = memoizeLast((glueColumns, isCsv) -> convertColumns(new SchemaTableName(databaseName, tableName), glueColumns, ColumnType.DATA, isCsv)); - } - - @Override - public Partition apply(com.amazonaws.services.glue.model.Partition gluePartition) - { - requireNonNull(gluePartition.getStorageDescriptor(), "Partition StorageDescriptor is null"); - StorageDescriptor sd = gluePartition.getStorageDescriptor(); - - if (!databaseName.equals(gluePartition.getDatabaseName())) { - throw new IllegalArgumentException(format("Unexpected databaseName, expected: %s, but found: %s", databaseName, gluePartition.getDatabaseName())); - } - if (!tableName.equals(gluePartition.getTableName())) { - throw new IllegalArgumentException(format("Unexpected tableName, expected: %s, but found: %s", tableName, gluePartition.getTableName())); - } - boolean isCsv = sd.getSerdeInfo() != null && HiveStorageFormat.CSV.getSerde().equals(sd.getSerdeInfo().getSerializationLibrary()); - Partition.Builder partitionBuilder = Partition.builder() - .setDatabaseName(databaseName) - .setTableName(tableName) - .setValues(gluePartition.getValues()) // No memoization benefit - .setColumns(dataColumnsConverter.apply(sd.getColumns(), isCsv)) - .setParameters(parametersConverter.apply(getPartitionParameters(gluePartition))); - - storageConverter.setStorageBuilder(sd, partitionBuilder.getStorageBuilder()); - - return partitionBuilder.build(); - } - } - - private static final class StorageConverter - { - private final Function, List> bucketColumns = memoizeLast(ImmutableList::copyOf); - private final Function, List> sortColumns = memoizeLast(StorageConverter::createSortingColumns); - private final UnaryOperator> bucketProperty = memoizeLast(); - private final Function, Map> serdeParametersConverter = parametersConverter(); - private final StorageFormatConverter storageFormatConverter = new StorageFormatConverter(); - - public void setStorageBuilder(StorageDescriptor sd, Storage.Builder storageBuilder) - { - requireNonNull(sd.getSerdeInfo(), "StorageDescriptor SerDeInfo is null"); - SerDeInfo serdeInfo = sd.getSerdeInfo(); - - storageBuilder.setStorageFormat(storageFormatConverter.createStorageFormat(serdeInfo, sd)) - .setLocation(nullToEmpty(sd.getLocation())) - .setBucketProperty(convertToBucketProperty(sd)) - .setSkewed(sd.getSkewedInfo() != null && !isNullOrEmpty(sd.getSkewedInfo().getSkewedColumnNames())) - .setSerdeParameters(serdeParametersConverter.apply(getSerDeInfoParameters(serdeInfo))) - .build(); - } - - private Optional convertToBucketProperty(StorageDescriptor sd) - { - if (sd.getNumberOfBuckets() > 0) { - if (isNullOrEmpty(sd.getBucketColumns())) { - throw new TrinoException(HIVE_INVALID_METADATA, "Table/partition metadata has 'numBuckets' set, but 'bucketCols' is not set"); - } - List bucketColumns = this.bucketColumns.apply(sd.getBucketColumns()); - List sortedBy = this.sortColumns.apply(sd.getSortColumns()); - return bucketProperty.apply(Optional.of(new HiveBucketProperty(bucketColumns, sd.getNumberOfBuckets(), sortedBy))); - } - return Optional.empty(); - } - - private static List createSortingColumns(List sortColumns) - { - if (isNullOrEmpty(sortColumns)) { - return ImmutableList.of(); - } - return mappedCopy(sortColumns, column -> new SortingColumn(column.getColumn(), Order.fromMetastoreApiOrder(column.getSortOrder(), "unknown"))); - } - } - - private static final class StorageFormatConverter - { - private static final StorageFormat ALL_NULLS = StorageFormat.createNullable(null, null, null); - private final UnaryOperator serializationLib = memoizeLast(); - private final UnaryOperator inputFormat = memoizeLast(); - private final UnaryOperator outputFormat = memoizeLast(); - // Second phase to attempt memoization on the entire instance beyond just the fields - private final UnaryOperator storageFormat = memoizeLast(); - - public StorageFormat createStorageFormat(SerDeInfo serdeInfo, StorageDescriptor storageDescriptor) - { - String serializationLib = this.serializationLib.apply(serdeInfo.getSerializationLibrary()); - String inputFormat = this.inputFormat.apply(storageDescriptor.getInputFormat()); - String outputFormat = this.outputFormat.apply(storageDescriptor.getOutputFormat()); - if (serializationLib == null && inputFormat == null && outputFormat == null) { - return ALL_NULLS; - } - return this.storageFormat.apply(StorageFormat.createNullable(serializationLib, inputFormat, outputFormat)); - } - } - - public static LanguageFunction convertFunction(UserDefinedFunction function) - { - List uris = mappedCopy(function.getResourceUris(), uri -> new ResourceUri(ResourceType.FILE, uri.getUri())); - - LanguageFunction result = decodeFunction(function.getFunctionName(), uris); - - return new LanguageFunction( - result.signatureToken(), - result.sql(), - result.path(), - Optional.ofNullable(function.getOwnerName())); - } - - public static List mappedCopy(List list, Function mapper) - { - requireNonNull(list, "list is null"); - requireNonNull(mapper, "mapper is null"); - // Uses a pre-sized builder to avoid intermediate allocations and copies, which is especially significant when the - // number of elements is large and the size of the resulting list can be known in advance - ImmutableList.Builder builder = ImmutableList.builderWithExpectedSize(list.size()); - for (T item : list) { - builder.add(mapper.apply(item)); - } - return builder.build(); - } - - private enum ColumnType - { - DATA, - PARTITION, - } -} diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/HiveGlueClientProvider.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/HiveGlueClientProvider.java deleted file mode 100644 index b823ac771323..000000000000 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/HiveGlueClientProvider.java +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.metastore.glue.v1; - -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.handlers.RequestHandler2; -import com.amazonaws.services.glue.AWSGlueAsync; -import com.google.common.collect.ImmutableSet; -import com.google.inject.Inject; -import com.google.inject.Provider; -import io.trino.plugin.hive.metastore.glue.GlueMetastoreStats; - -import java.util.Set; - -import static io.trino.plugin.hive.metastore.glue.v1.GlueClientUtil.createAsyncGlueClient; -import static java.util.Objects.requireNonNull; - -public class HiveGlueClientProvider - implements Provider -{ - private final GlueMetastoreStats stats; - private final AWSCredentialsProvider credentialsProvider; - private final GlueHiveMetastoreConfig glueConfig; // TODO do not keep mutable config instance on a field - private final Set requestHandlers; - - @Inject - public HiveGlueClientProvider( - @ForGlueHiveMetastore GlueMetastoreStats stats, - AWSCredentialsProvider credentialsProvider, - @ForGlueHiveMetastore Set requestHandlers, - GlueHiveMetastoreConfig glueConfig) - { - this.stats = requireNonNull(stats, "stats is null"); - this.credentialsProvider = requireNonNull(credentialsProvider, "credentialsProvider is null"); - this.requestHandlers = ImmutableSet.copyOf(requireNonNull(requestHandlers, "requestHandlers is null")); - this.glueConfig = glueConfig; - } - - @Override - public AWSGlueAsync get() - { - return createAsyncGlueClient(glueConfig, credentialsProvider, requestHandlers, stats.newRequestMetricsCollector()); - } -} diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/Memoizers.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/Memoizers.java deleted file mode 100644 index 43d4af02a23b..000000000000 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/Memoizers.java +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.metastore.glue.v1; - -import java.util.Objects; -import java.util.function.BiFunction; -import java.util.function.Function; -import java.util.function.UnaryOperator; - -import static java.util.Objects.requireNonNull; - -public final class Memoizers -{ - private Memoizers() {} - - public static UnaryOperator memoizeLast() - { - return new Simple<>(); - } - - public static Function memoizeLast(Function transform) - { - return new Transforming<>(transform); - } - - public static BiFunction memoizeLast(BiFunction transform) - { - requireNonNull(transform, "transform is null"); - Function, R> memoized = memoizeLast(pair -> transform.apply(pair.first, pair.second)); - return (a, b) -> memoized.apply(new Pair<>(a, b)); - } - - private static final class Simple - implements UnaryOperator - { - private T lastInput; - - @Override - public T apply(T input) - { - if (!Objects.equals(lastInput, input)) { - lastInput = input; - } - return lastInput; - } - } - - private static final class Transforming - implements Function - { - private final Function transform; - private I lastInput; - private O lastOutput; - private boolean inputSeen; - - private Transforming(Function transform) - { - this.transform = requireNonNull(transform, "transform is null"); - } - - @Override - public O apply(I input) - { - if (!inputSeen || !Objects.equals(lastInput, input)) { - lastOutput = transform.apply(input); - lastInput = input; - inputSeen = true; - } - return lastOutput; - } - } - - private record Pair(T first, U second) {} -} diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/ProxyApiRequestHandler.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/ProxyApiRequestHandler.java deleted file mode 100644 index 0d1c86c52553..000000000000 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/ProxyApiRequestHandler.java +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.metastore.glue.v1; - -import com.amazonaws.AmazonWebServiceRequest; -import com.amazonaws.Request; -import com.amazonaws.handlers.RequestHandler2; - -import static java.util.Objects.requireNonNull; - -public class ProxyApiRequestHandler - extends RequestHandler2 -{ - private final String proxyApiId; - - public ProxyApiRequestHandler(String proxyApiId) - { - this.proxyApiId = requireNonNull(proxyApiId, "proxyApiId is null"); - } - - @Override - public AmazonWebServiceRequest beforeExecution(AmazonWebServiceRequest request) - { - request.putCustomRequestHeader("x-apigw-api-id", proxyApiId); - return request; - } - - @Override - public void beforeRequest(Request request) - { - // AWS Glue SDK will append "X-Amz-Target" header to requests (with "AWSGlue" prefix). - // This misleads API Gateway (Glue proxy) that it's not the target of the REST call. Therefore, we - // need to pass "X-Amz-Target" value in a special HTTP header that is translated back to "X-Amz-Target" - // when API Gateway makes request to AWSGlue. - request.getHeaders().put("X-Trino-Amz-Target-Proxy", request.getHeaders().remove("X-Amz-Target")); - } -} diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/SkipArchiveRequestHandler.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/SkipArchiveRequestHandler.java deleted file mode 100644 index d5ba1bc434f2..000000000000 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/SkipArchiveRequestHandler.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.metastore.glue.v1; - -import com.amazonaws.AmazonWebServiceRequest; -import com.amazonaws.handlers.RequestHandler2; -import com.amazonaws.services.glue.model.BatchGetPartitionRequest; -import com.amazonaws.services.glue.model.CreateDatabaseRequest; -import com.amazonaws.services.glue.model.CreateTableRequest; -import com.amazonaws.services.glue.model.DeleteDatabaseRequest; -import com.amazonaws.services.glue.model.DeleteTableRequest; -import com.amazonaws.services.glue.model.GetDatabaseRequest; -import com.amazonaws.services.glue.model.GetDatabasesRequest; -import com.amazonaws.services.glue.model.GetPartitionsRequest; -import com.amazonaws.services.glue.model.GetTableRequest; -import com.amazonaws.services.glue.model.GetTablesRequest; -import com.amazonaws.services.glue.model.UpdateTableRequest; - -public class SkipArchiveRequestHandler - extends RequestHandler2 -{ - @Override - public AmazonWebServiceRequest beforeExecution(AmazonWebServiceRequest request) - { - if (request instanceof UpdateTableRequest updateTableRequest) { - return updateTableRequest.withSkipArchive(true); - } - if (request instanceof CreateDatabaseRequest || - request instanceof DeleteDatabaseRequest || - request instanceof GetDatabasesRequest || - request instanceof GetDatabaseRequest || - request instanceof CreateTableRequest || - request instanceof DeleteTableRequest || - request instanceof GetTablesRequest || - request instanceof GetTableRequest || - // The following requests are required for migrate procedure - request instanceof GetPartitionsRequest || - request instanceof BatchGetPartitionRequest) { - return request; - } - throw new IllegalArgumentException("Unsupported request: " + request); - } -} diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHivePlugin.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHivePlugin.java index 46d2ce5575c9..f1caca016817 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHivePlugin.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHivePlugin.java @@ -77,30 +77,6 @@ public void testThriftMetastore() .shutdown(); } - @Test - public void testGlueV1Metastore() - { - ConnectorFactory factory = getHiveConnectorFactory(); - - factory.create( - "test", - ImmutableMap.of( - "hive.metastore", "glue-v1", - "hive.metastore.glue.region", "us-east-2", - "bootstrap.quiet", "true"), - new TestingConnectorContext()) - .shutdown(); - - assertThatThrownBy(() -> factory.create( - "test", - ImmutableMap.of( - "hive.metastore", "glue", - "hive.metastore.uri", "thrift://foo:1234", - "bootstrap.quiet", "true"), - new TestingConnectorContext())) - .hasMessageContaining("Error: Configuration property 'hive.metastore.uri' was not used"); - } - @Test public void testGlueMetastore() { diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/glue/v1/TestGlueHiveMetastoreConfig.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/glue/v1/TestGlueHiveMetastoreConfig.java deleted file mode 100644 index 38c71002bbe4..000000000000 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/glue/v1/TestGlueHiveMetastoreConfig.java +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.metastore.glue.v1; - -import com.google.common.collect.ImmutableMap; -import org.junit.jupiter.api.Test; - -import java.util.Map; - -import static io.airlift.configuration.testing.ConfigAssertions.assertFullMapping; -import static io.airlift.configuration.testing.ConfigAssertions.assertRecordedDefaults; -import static io.airlift.configuration.testing.ConfigAssertions.recordDefaults; - -public class TestGlueHiveMetastoreConfig -{ - @Test - public void testDefaults() - { - assertRecordedDefaults(recordDefaults(GlueHiveMetastoreConfig.class) - .setGlueRegion(null) - .setGlueEndpointUrl(null) - .setGlueStsRegion(null) - .setGlueStsEndpointUrl(null) - .setGlueProxyApiId(null) - .setPinGlueClientToCurrentRegion(false) - .setMaxGlueConnections(30) - .setMaxGlueErrorRetries(10) - .setDefaultWarehouseDir(null) - .setIamRole(null) - .setExternalId(null) - .setAwsAccessKey(null) - .setAwsSecretKey(null) - .setAwsCredentialsProvider(null) - .setCatalogId(null) - .setPartitionSegments(5) - .setGetPartitionThreads(20) - .setAssumeCanonicalPartitionKeys(false) - .setSkipArchive(false) - .setReadStatisticsThreads(5) - .setWriteStatisticsThreads(20)); - } - - @Test - public void testExplicitPropertyMapping() - { - Map properties = ImmutableMap.builder() - .put("hive.metastore.glue.region", "us-east-1") - .put("hive.metastore.glue.endpoint-url", "http://foo.bar") - .put("hive.metastore.glue.sts.region", "us-east-3") - .put("hive.metastore.glue.sts.endpoint", "http://sts.foo.bar") - .put("hive.metastore.glue.proxy-api-id", "abc123") - .put("hive.metastore.glue.pin-client-to-current-region", "true") - .put("hive.metastore.glue.max-connections", "10") - .put("hive.metastore.glue.max-error-retries", "20") - .put("hive.metastore.glue.default-warehouse-dir", "/location") - .put("hive.metastore.glue.iam-role", "role") - .put("hive.metastore.glue.external-id", "external-id") - .put("hive.metastore.glue.aws-access-key", "ABC") - .put("hive.metastore.glue.aws-secret-key", "DEF") - .put("hive.metastore.glue.aws-credentials-provider", "custom") - .put("hive.metastore.glue.catalogid", "0123456789") - .put("hive.metastore.glue.partitions-segments", "10") - .put("hive.metastore.glue.get-partition-threads", "42") - .put("hive.metastore.glue.assume-canonical-partition-keys", "true") - .put("hive.metastore.glue.skip-archive", "true") - .put("hive.metastore.glue.read-statistics-threads", "42") - .put("hive.metastore.glue.write-statistics-threads", "43") - .buildOrThrow(); - - GlueHiveMetastoreConfig expected = new GlueHiveMetastoreConfig() - .setGlueRegion("us-east-1") - .setGlueEndpointUrl("http://foo.bar") - .setGlueStsRegion("us-east-3") - .setGlueStsEndpointUrl("http://sts.foo.bar") - .setGlueProxyApiId("abc123") - .setPinGlueClientToCurrentRegion(true) - .setMaxGlueConnections(10) - .setMaxGlueErrorRetries(20) - .setDefaultWarehouseDir("/location") - .setIamRole("role") - .setExternalId("external-id") - .setAwsAccessKey("ABC") - .setAwsSecretKey("DEF") - .setAwsCredentialsProvider("custom") - .setCatalogId("0123456789") - .setPartitionSegments(10) - .setGetPartitionThreads(42) - .setAssumeCanonicalPartitionKeys(true) - .setSkipArchive(true) - .setReadStatisticsThreads(42) - .setWriteStatisticsThreads(43); - - assertFullMapping(properties, expected); - } -} diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/glue/v1/TestGlueInputConverter.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/glue/v1/TestGlueInputConverter.java deleted file mode 100644 index b2c3c42da904..000000000000 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/glue/v1/TestGlueInputConverter.java +++ /dev/null @@ -1,142 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.metastore.glue.v1; - -import com.amazonaws.services.glue.model.DatabaseInput; -import com.amazonaws.services.glue.model.PartitionInput; -import com.amazonaws.services.glue.model.StorageDescriptor; -import com.amazonaws.services.glue.model.TableInput; -import com.amazonaws.services.glue.model.UserDefinedFunction; -import com.amazonaws.services.glue.model.UserDefinedFunctionInput; -import com.google.common.collect.ImmutableList; -import io.airlift.slice.Slices; -import io.trino.metastore.Column; -import io.trino.metastore.Database; -import io.trino.metastore.HiveBucketProperty; -import io.trino.metastore.Partition; -import io.trino.metastore.Storage; -import io.trino.metastore.Table; -import io.trino.spi.function.LanguageFunction; -import org.junit.jupiter.api.Test; - -import java.util.HexFormat; -import java.util.List; -import java.util.Optional; -import java.util.Random; - -import static io.trino.plugin.hive.metastore.glue.v1.TestingMetastoreObjects.getTrinoTestDatabase; -import static io.trino.plugin.hive.metastore.glue.v1.TestingMetastoreObjects.getTrinoTestPartition; -import static io.trino.plugin.hive.metastore.glue.v1.TestingMetastoreObjects.getTrinoTestTable; -import static org.assertj.core.api.Assertions.assertThat; - -public class TestGlueInputConverter -{ - private final Database testDb = getTrinoTestDatabase(); - private final Table testTbl = getTrinoTestTable(testDb.getDatabaseName()); - private final Partition testPartition = getTrinoTestPartition(testDb.getDatabaseName(), testTbl.getTableName(), ImmutableList.of("val1")); - - @Test - public void testConvertDatabase() - { - DatabaseInput dbInput = GlueInputConverter.convertDatabase(testDb); - - assertThat(dbInput.getName()).isEqualTo(testDb.getDatabaseName()); - assertThat(dbInput.getDescription()).isEqualTo(testDb.getComment().get()); - assertThat(dbInput.getLocationUri()).isEqualTo(testDb.getLocation().get()); - assertThat(dbInput.getParameters()).isEqualTo(testDb.getParameters()); - } - - @Test - public void testConvertTable() - { - TableInput tblInput = GlueInputConverter.convertTable(testTbl); - - assertThat(tblInput.getName()).isEqualTo(testTbl.getTableName()); - assertThat(tblInput.getOwner()).isEqualTo(testTbl.getOwner().orElse(null)); - assertThat(tblInput.getTableType()).isEqualTo(testTbl.getTableType()); - assertThat(tblInput.getParameters()).isEqualTo(testTbl.getParameters()); - assertColumnList(tblInput.getStorageDescriptor().getColumns(), testTbl.getDataColumns()); - assertColumnList(tblInput.getPartitionKeys(), testTbl.getPartitionColumns()); - assertStorage(tblInput.getStorageDescriptor(), testTbl.getStorage()); - assertThat(tblInput.getViewExpandedText()).isEqualTo(testTbl.getViewExpandedText().get()); - assertThat(tblInput.getViewOriginalText()).isEqualTo(testTbl.getViewOriginalText().get()); - } - - @Test - public void testConvertPartition() - { - PartitionInput partitionInput = GlueInputConverter.convertPartition(testPartition); - - assertThat(partitionInput.getParameters()).isEqualTo(testPartition.getParameters()); - assertStorage(partitionInput.getStorageDescriptor(), testPartition.getStorage()); - assertThat(partitionInput.getValues()).isEqualTo(testPartition.getValues()); - } - - @Test - public void testConvertFunction() - { - // random data to avoid compression, but deterministic for size assertion - String sql = HexFormat.of().formatHex(Slices.random(2000, new Random(0)).getBytes()); - LanguageFunction expected = new LanguageFunction("(integer,bigint,varchar)", sql, List.of(), Optional.of("owner")); - - UserDefinedFunctionInput input = GlueInputConverter.convertFunction("test_name", expected); - assertThat(input.getOwnerName()).isEqualTo(expected.owner().orElseThrow()); - - UserDefinedFunction function = new UserDefinedFunction() - .withOwnerName(input.getOwnerName()) - .withResourceUris(input.getResourceUris()); - LanguageFunction actual = GlueToTrinoConverter.convertFunction(function); - - assertThat(input.getResourceUris()).hasSize(3); - assertThat(actual).isEqualTo(expected); - - // verify that the owner comes from the metastore - function.setOwnerName("other"); - actual = GlueToTrinoConverter.convertFunction(function); - assertThat(actual.owner()).isEqualTo(Optional.of("other")); - } - - private static void assertColumnList(List actual, List expected) - { - if (expected == null) { - assertThat(actual).isNull(); - } - assertThat(actual).hasSize(expected.size()); - - for (int i = 0; i < expected.size(); i++) { - assertColumn(actual.get(i), expected.get(i)); - } - } - - private static void assertColumn(com.amazonaws.services.glue.model.Column actual, Column expected) - { - assertThat(actual.getName()).isEqualTo(expected.getName()); - assertThat(actual.getType()).isEqualTo(expected.getType().getHiveTypeName().toString()); - assertThat(actual.getComment()).isEqualTo(expected.getComment().get()); - } - - private static void assertStorage(StorageDescriptor actual, Storage expected) - { - assertThat(actual.getLocation()).isEqualTo(expected.getLocation()); - assertThat(actual.getSerdeInfo().getSerializationLibrary()).isEqualTo(expected.getStorageFormat().getSerde()); - assertThat(actual.getInputFormat()).isEqualTo(expected.getStorageFormat().getInputFormat()); - assertThat(actual.getOutputFormat()).isEqualTo(expected.getStorageFormat().getOutputFormat()); - - if (expected.getBucketProperty().isPresent()) { - HiveBucketProperty bucketProperty = expected.getBucketProperty().get(); - assertThat(actual.getBucketColumns()).isEqualTo(bucketProperty.bucketedBy()); - assertThat(actual.getNumberOfBuckets().intValue()).isEqualTo(bucketProperty.bucketCount()); - } - } -} diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/glue/v1/TestGlueToTrinoConverter.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/glue/v1/TestGlueToTrinoConverter.java deleted file mode 100644 index 49356c25abe9..000000000000 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/glue/v1/TestGlueToTrinoConverter.java +++ /dev/null @@ -1,302 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.metastore.glue.v1; - -import com.amazonaws.services.glue.model.Database; -import com.amazonaws.services.glue.model.Partition; -import com.amazonaws.services.glue.model.StorageDescriptor; -import com.amazonaws.services.glue.model.Table; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import io.trino.metastore.Column; -import io.trino.metastore.HiveBucketProperty; -import io.trino.metastore.Storage; -import io.trino.plugin.hive.metastore.glue.v1.GlueToTrinoConverter.GluePartitionConverter; -import io.trino.spi.security.PrincipalType; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.TestInstance; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Optional; - -import static com.amazonaws.util.CollectionUtils.isNullOrEmpty; -import static io.trino.metastore.HiveType.HIVE_STRING; -import static io.trino.plugin.hive.TableType.EXTERNAL_TABLE; -import static io.trino.plugin.hive.metastore.glue.v1.GlueToTrinoConverter.getPartitionParameters; -import static io.trino.plugin.hive.metastore.glue.v1.GlueToTrinoConverter.getStorageDescriptor; -import static io.trino.plugin.hive.metastore.glue.v1.GlueToTrinoConverter.getTableParameters; -import static io.trino.plugin.hive.metastore.glue.v1.GlueToTrinoConverter.getTableTypeNullable; -import static io.trino.plugin.hive.metastore.glue.v1.TestingMetastoreObjects.getGlueTestColumn; -import static io.trino.plugin.hive.metastore.glue.v1.TestingMetastoreObjects.getGlueTestDatabase; -import static io.trino.plugin.hive.metastore.glue.v1.TestingMetastoreObjects.getGlueTestPartition; -import static io.trino.plugin.hive.metastore.glue.v1.TestingMetastoreObjects.getGlueTestStorageDescriptor; -import static io.trino.plugin.hive.metastore.glue.v1.TestingMetastoreObjects.getGlueTestTable; -import static io.trino.plugin.hive.metastore.glue.v1.TestingMetastoreObjects.getGlueTestTrinoMaterializedView; -import static io.trino.plugin.hive.util.HiveUtil.DELTA_LAKE_PROVIDER; -import static io.trino.plugin.hive.util.HiveUtil.ICEBERG_TABLE_TYPE_NAME; -import static io.trino.plugin.hive.util.HiveUtil.ICEBERG_TABLE_TYPE_VALUE; -import static io.trino.plugin.hive.util.HiveUtil.SPARK_TABLE_PROVIDER_KEY; -import static org.assertj.core.api.Assertions.assertThat; -import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_METHOD; - -@TestInstance(PER_METHOD) -public class TestGlueToTrinoConverter -{ - private static final String PUBLIC_OWNER = "PUBLIC"; - - private Database testDatabase; - private Table testTable; - private Partition testPartition; - - @BeforeEach - public void setup() - { - testDatabase = getGlueTestDatabase(); - testTable = getGlueTestTable(testDatabase.getName()); - testPartition = getGlueTestPartition(testDatabase.getName(), testTable.getName(), ImmutableList.of("val1")); - } - - private static GluePartitionConverter createPartitionConverter(Table table) - { - return new GluePartitionConverter(table.getDatabaseName(), table.getName()); - } - - @Test - public void testConvertDatabase() - { - io.trino.metastore.Database trinoDatabase = GlueToTrinoConverter.convertDatabase(testDatabase); - assertThat(trinoDatabase.getDatabaseName()).isEqualTo(testDatabase.getName()); - assertThat(trinoDatabase.getLocation().get()).isEqualTo(testDatabase.getLocationUri()); - assertThat(trinoDatabase.getComment().get()).isEqualTo(testDatabase.getDescription()); - assertThat(trinoDatabase.getParameters()).isEqualTo(testDatabase.getParameters()); - assertThat(trinoDatabase.getOwnerName()).isEqualTo(Optional.of(PUBLIC_OWNER)); - assertThat(trinoDatabase.getOwnerType()).isEqualTo(Optional.of(PrincipalType.ROLE)); - } - - @Test - public void testConvertTable() - { - io.trino.metastore.Table trinoTable = GlueToTrinoConverter.convertTable(testTable, testDatabase.getName()); - assertThat(trinoTable.getTableName()).isEqualTo(testTable.getName()); - assertThat(trinoTable.getDatabaseName()).isEqualTo(testDatabase.getName()); - assertThat(trinoTable.getTableType()).isEqualTo(getTableTypeNullable(testTable)); - assertThat(trinoTable.getOwner().orElse(null)).isEqualTo(testTable.getOwner()); - assertThat(trinoTable.getParameters()).isEqualTo(getTableParameters(testTable)); - assertColumnList(trinoTable.getDataColumns(), getStorageDescriptor(testTable).orElseThrow().getColumns()); - assertColumnList(trinoTable.getPartitionColumns(), testTable.getPartitionKeys()); - assertStorage(trinoTable.getStorage(), getStorageDescriptor(testTable).orElseThrow()); - assertThat(trinoTable.getViewOriginalText().get()).isEqualTo(testTable.getViewOriginalText()); - assertThat(trinoTable.getViewExpandedText().get()).isEqualTo(testTable.getViewExpandedText()); - } - - @Test - public void testConvertTableWithOpenCSVSerDe() - { - Table glueTable = getGlueTestTable(testDatabase.getName()); - glueTable.setStorageDescriptor(getGlueTestStorageDescriptor( - ImmutableList.of(getGlueTestColumn("int")), - "org.apache.hadoop.hive.serde2.OpenCSVSerde")); - io.trino.metastore.Table trinoTable = GlueToTrinoConverter.convertTable(glueTable, testDatabase.getName()); - - assertThat(trinoTable.getTableName()).isEqualTo(glueTable.getName()); - assertThat(trinoTable.getDatabaseName()).isEqualTo(testDatabase.getName()); - assertThat(trinoTable.getTableType()).isEqualTo(getTableTypeNullable(glueTable)); - assertThat(trinoTable.getOwner().orElse(null)).isEqualTo(glueTable.getOwner()); - assertThat(trinoTable.getParameters()).isEqualTo(getTableParameters(glueTable)); - assertThat(trinoTable.getDataColumns()).hasSize(1); - assertThat(trinoTable.getDataColumns().get(0).getType()).isEqualTo(HIVE_STRING); - - assertColumnList(trinoTable.getPartitionColumns(), glueTable.getPartitionKeys()); - assertStorage(trinoTable.getStorage(), getStorageDescriptor(glueTable).orElseThrow()); - assertThat(trinoTable.getViewOriginalText().get()).isEqualTo(glueTable.getViewOriginalText()); - assertThat(trinoTable.getViewExpandedText().get()).isEqualTo(glueTable.getViewExpandedText()); - } - - @Test - public void testConvertTableWithoutTableType() - { - Table table = getGlueTestTable(testDatabase.getName()); - table.setTableType(null); - io.trino.metastore.Table trinoTable = GlueToTrinoConverter.convertTable(table, testDatabase.getName()); - assertThat(trinoTable.getTableType()).isEqualTo(EXTERNAL_TABLE.name()); - } - - @Test - public void testConvertTableNullPartitions() - { - testTable.setPartitionKeys(null); - io.trino.metastore.Table trinoTable = GlueToTrinoConverter.convertTable(testTable, testDatabase.getName()); - assertThat(trinoTable.getPartitionColumns()).isEmpty(); - } - - @Test - public void testConvertTableUppercaseColumnType() - { - com.amazonaws.services.glue.model.Column uppercaseColumn = getGlueTestColumn().withType("String"); - getStorageDescriptor(testTable).orElseThrow().setColumns(ImmutableList.of(uppercaseColumn)); - GlueToTrinoConverter.convertTable(testTable, testDatabase.getName()); - } - - @Test - public void testConvertPartition() - { - GluePartitionConverter converter = createPartitionConverter(testTable); - io.trino.metastore.Partition trinoPartition = converter.apply(testPartition); - assertThat(trinoPartition.getDatabaseName()).isEqualTo(testPartition.getDatabaseName()); - assertThat(trinoPartition.getTableName()).isEqualTo(testPartition.getTableName()); - assertColumnList(trinoPartition.getColumns(), testPartition.getStorageDescriptor().getColumns()); - assertThat(trinoPartition.getValues()).isEqualTo(testPartition.getValues()); - assertStorage(trinoPartition.getStorage(), testPartition.getStorageDescriptor()); - assertThat(trinoPartition.getParameters()).isEqualTo(getPartitionParameters(testPartition)); - } - - @Test - public void testPartitionConversionMemoization() - { - String fakeS3Location = "s3://some-fake-location"; - testPartition.getStorageDescriptor().setLocation(fakeS3Location); - // Second partition to convert with equal (but not aliased) values - Partition partitionTwo = getGlueTestPartition("" + testDatabase.getName(), "" + testTable.getName(), new ArrayList<>(testPartition.getValues())); - // Ensure storage fields match as well - partitionTwo.getStorageDescriptor().setColumns(new ArrayList<>(testPartition.getStorageDescriptor().getColumns())); - partitionTwo.getStorageDescriptor().setBucketColumns(new ArrayList<>(testPartition.getStorageDescriptor().getBucketColumns())); - partitionTwo.getStorageDescriptor().setLocation("" + fakeS3Location); - partitionTwo.getStorageDescriptor().setInputFormat("" + testPartition.getStorageDescriptor().getInputFormat()); - partitionTwo.getStorageDescriptor().setOutputFormat("" + testPartition.getStorageDescriptor().getOutputFormat()); - partitionTwo.getStorageDescriptor().setParameters(new HashMap<>(testPartition.getStorageDescriptor().getParameters())); - - GluePartitionConverter converter = createPartitionConverter(testTable); - io.trino.metastore.Partition trinoPartition = converter.apply(testPartition); - io.trino.metastore.Partition trinoPartition2 = converter.apply(partitionTwo); - - assertThat(trinoPartition).isNotSameAs(trinoPartition2); - assertThat(trinoPartition2.getDatabaseName()).isSameAs(trinoPartition.getDatabaseName()); - assertThat(trinoPartition2.getTableName()).isSameAs(trinoPartition.getTableName()); - assertThat(trinoPartition2.getColumns()).isSameAs(trinoPartition.getColumns()); - assertThat(trinoPartition2.getParameters()).isSameAs(trinoPartition.getParameters()); - assertThat(trinoPartition2.getValues()).isNotSameAs(trinoPartition.getValues()); - - Storage storage = trinoPartition.getStorage(); - Storage storage2 = trinoPartition2.getStorage(); - - assertThat(storage2.getStorageFormat()).isSameAs(storage.getStorageFormat()); - assertThat(storage2.getBucketProperty()).isSameAs(storage.getBucketProperty()); - assertThat(storage2.getSerdeParameters()).isSameAs(storage.getSerdeParameters()); - assertThat(storage2.getLocation()).isNotSameAs(storage.getLocation()); - } - - @Test - public void testDatabaseNullParameters() - { - testDatabase.setParameters(null); - assertThat(GlueToTrinoConverter.convertDatabase(testDatabase).getParameters()).isNotNull(); - } - - @Test - public void testTableNullParameters() - { - testTable.setParameters(null); - getStorageDescriptor(testTable).orElseThrow().getSerdeInfo().setParameters(null); - io.trino.metastore.Table trinoTable = GlueToTrinoConverter.convertTable(testTable, testDatabase.getName()); - assertThat(trinoTable.getParameters()).isNotNull(); - assertThat(trinoTable.getStorage().getSerdeParameters()).isNotNull(); - } - - @Test - public void testIcebergTableNullStorageDescriptor() - { - testTable.setParameters(ImmutableMap.of(ICEBERG_TABLE_TYPE_NAME, ICEBERG_TABLE_TYPE_VALUE)); - testTable.setStorageDescriptor(null); - io.trino.metastore.Table trinoTable = GlueToTrinoConverter.convertTable(testTable, testDatabase.getName()); - assertThat(trinoTable.getDataColumns()).hasSize(1); - } - - @Test - public void testIcebergTableNonNullStorageDescriptor() - { - testTable.setParameters(ImmutableMap.of(ICEBERG_TABLE_TYPE_NAME, ICEBERG_TABLE_TYPE_VALUE)); - assertThat(getStorageDescriptor(testTable)).isPresent(); - io.trino.metastore.Table trinoTable = GlueToTrinoConverter.convertTable(testTable, testDatabase.getName()); - assertThat(trinoTable.getDataColumns()).hasSize(1); - } - - @Test - public void testDeltaTableNullStorageDescriptor() - { - testTable.setParameters(ImmutableMap.of(SPARK_TABLE_PROVIDER_KEY, DELTA_LAKE_PROVIDER)); - testTable.setStorageDescriptor(null); - io.trino.metastore.Table trinoTable = GlueToTrinoConverter.convertTable(testTable, testDatabase.getName()); - assertThat(trinoTable.getDataColumns()).hasSize(1); - } - - @Test - public void testDeltaTableNonNullStorageDescriptor() - { - testTable.setParameters(ImmutableMap.of(SPARK_TABLE_PROVIDER_KEY, DELTA_LAKE_PROVIDER)); - assertThat(getStorageDescriptor(testTable)).isPresent(); - io.trino.metastore.Table trinoTable = GlueToTrinoConverter.convertTable(testTable, testDatabase.getName()); - assertThat(trinoTable.getDataColumns()).hasSize(1); - } - - @Test - public void testIcebergMaterializedViewNullStorageDescriptor() - { - Table testMaterializedView = getGlueTestTrinoMaterializedView(testDatabase.getName()); - assertThat(getStorageDescriptor(testMaterializedView)).isEmpty(); - io.trino.metastore.Table trinoTable = GlueToTrinoConverter.convertTable(testMaterializedView, testDatabase.getName()); - assertThat(trinoTable.getDataColumns()).hasSize(1); - } - - @Test - public void testPartitionNullParameters() - { - testPartition.setParameters(null); - assertThat(createPartitionConverter(testTable).apply(testPartition).getParameters()).isNotNull(); - } - - private static void assertColumnList(List actual, List expected) - { - if (expected == null) { - assertThat(actual).isNull(); - } - assertThat(actual).hasSize(expected.size()); - - for (int i = 0; i < expected.size(); i++) { - assertColumn(actual.get(i), expected.get(i)); - } - } - - private static void assertColumn(Column actual, com.amazonaws.services.glue.model.Column expected) - { - assertThat(actual.getName()).isEqualTo(expected.getName()); - assertThat(actual.getType().getHiveTypeName().toString()).isEqualTo(expected.getType()); - assertThat(actual.getComment().get()).isEqualTo(expected.getComment()); - } - - private static void assertStorage(Storage actual, StorageDescriptor expected) - { - assertThat(actual.getLocation()).isEqualTo(expected.getLocation()); - assertThat(actual.getStorageFormat().getSerde()).isEqualTo(expected.getSerdeInfo().getSerializationLibrary()); - assertThat(actual.getStorageFormat().getInputFormat()).isEqualTo(expected.getInputFormat()); - assertThat(actual.getStorageFormat().getOutputFormat()).isEqualTo(expected.getOutputFormat()); - if (!isNullOrEmpty(expected.getBucketColumns())) { - HiveBucketProperty bucketProperty = actual.getBucketProperty().get(); - assertThat(bucketProperty.bucketedBy()).isEqualTo(expected.getBucketColumns()); - assertThat(bucketProperty.bucketCount()).isEqualTo(expected.getNumberOfBuckets().intValue()); - } - } -} diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/glue/v1/TestingMetastoreObjects.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/glue/v1/TestingMetastoreObjects.java deleted file mode 100644 index 918eb1a40580..000000000000 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/glue/v1/TestingMetastoreObjects.java +++ /dev/null @@ -1,183 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.metastore.glue.v1; - -import com.amazonaws.services.glue.model.Column; -import com.amazonaws.services.glue.model.Database; -import com.amazonaws.services.glue.model.Partition; -import com.amazonaws.services.glue.model.SerDeInfo; -import com.amazonaws.services.glue.model.StorageDescriptor; -import com.amazonaws.services.glue.model.Table; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import io.trino.metastore.HiveType; -import io.trino.metastore.Storage; -import io.trino.metastore.StorageFormat; -import io.trino.plugin.hive.TableType; -import io.trino.spi.security.PrincipalType; - -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.concurrent.ThreadLocalRandom; -import java.util.function.Consumer; - -import static io.trino.metastore.Table.TABLE_COMMENT; -import static io.trino.metastore.TableInfo.ICEBERG_MATERIALIZED_VIEW_COMMENT; -import static io.trino.plugin.hive.TableType.EXTERNAL_TABLE; -import static io.trino.plugin.hive.ViewReaderUtil.PRESTO_VIEW_FLAG; -import static java.lang.String.format; - -public final class TestingMetastoreObjects -{ - private TestingMetastoreObjects() {} - - // --------------- Glue Objects --------------- - - public static Database getGlueTestDatabase() - { - return new Database() - .withName("test-db" + generateRandom()) - .withDescription("database desc") - .withLocationUri("/db") - .withParameters(ImmutableMap.of()); - } - - public static Table getGlueTestTable(String dbName) - { - return new Table() - .withDatabaseName(dbName) - .withName("test-tbl" + generateRandom()) - .withOwner("owner") - .withParameters(ImmutableMap.of()) - .withPartitionKeys(ImmutableList.of(getGlueTestColumn())) - .withStorageDescriptor(getGlueTestStorageDescriptor()) - .withTableType(EXTERNAL_TABLE.name()) - .withViewOriginalText("originalText") - .withViewExpandedText("expandedText"); - } - - public static Table getGlueTestTrinoMaterializedView(String dbName) - { - return new Table() - .withDatabaseName(dbName) - .withName("test-mv" + generateRandom()) - .withOwner("owner") - .withParameters(ImmutableMap.of(PRESTO_VIEW_FLAG, "true", TABLE_COMMENT, ICEBERG_MATERIALIZED_VIEW_COMMENT)) - .withPartitionKeys() - .withStorageDescriptor(null) - .withTableType(TableType.VIRTUAL_VIEW.name()) - .withViewOriginalText("/* %s: base64encodedquery */".formatted(ICEBERG_MATERIALIZED_VIEW_COMMENT)) - .withViewExpandedText(ICEBERG_MATERIALIZED_VIEW_COMMENT); - } - - public static Column getGlueTestColumn() - { - return getGlueTestColumn("string"); - } - - public static Column getGlueTestColumn(String type) - { - return new Column() - .withName("test-col" + generateRandom()) - .withType(type) - .withComment("column comment"); - } - - public static StorageDescriptor getGlueTestStorageDescriptor() - { - return getGlueTestStorageDescriptor(ImmutableList.of(getGlueTestColumn()), "SerdeLib"); - } - - public static StorageDescriptor getGlueTestStorageDescriptor(List columns, String serde) - { - return new StorageDescriptor() - .withBucketColumns(ImmutableList.of("test-bucket-col")) - .withColumns(columns) - .withParameters(ImmutableMap.of()) - .withSerdeInfo(new SerDeInfo() - .withSerializationLibrary(serde) - .withParameters(ImmutableMap.of())) - .withInputFormat("InputFormat") - .withOutputFormat("OutputFormat") - .withLocation("/test-tbl") - .withNumberOfBuckets(1); - } - - public static Partition getGlueTestPartition(String dbName, String tblName, List values) - { - return new Partition() - .withDatabaseName(dbName) - .withTableName(tblName) - .withValues(values) - .withParameters(ImmutableMap.of()) - .withStorageDescriptor(getGlueTestStorageDescriptor()); - } - - // --------------- Trino Objects --------------- - - public static io.trino.metastore.Database getTrinoTestDatabase() - { - return io.trino.metastore.Database.builder() - .setDatabaseName("test-db" + generateRandom()) - .setComment(Optional.of("database desc")) - .setLocation(Optional.of("/db")) - .setParameters(ImmutableMap.of()) - .setOwnerName(Optional.of("PUBLIC")) - .setOwnerType(Optional.of(PrincipalType.ROLE)) - .build(); - } - - public static io.trino.metastore.Table getTrinoTestTable(String dbName) - { - return io.trino.metastore.Table.builder() - .setDatabaseName(dbName) - .setTableName("test-tbl" + generateRandom()) - .setOwner(Optional.of("owner")) - .setParameters(ImmutableMap.of()) - .setTableType(TableType.EXTERNAL_TABLE.name()) - .setDataColumns(ImmutableList.of(getTrinoTestColumn())) - .setPartitionColumns(ImmutableList.of(getTrinoTestColumn())) - .setViewOriginalText(Optional.of("originalText")) - .setViewExpandedText(Optional.of("expandedText")) - .withStorage(STORAGE_CONSUMER).build(); - } - - public static io.trino.metastore.Partition getTrinoTestPartition(String dbName, String tblName, List values) - { - return io.trino.metastore.Partition.builder() - .setDatabaseName(dbName) - .setTableName(tblName) - .setValues(values) - .setColumns(ImmutableList.of(getTrinoTestColumn())) - .setParameters(ImmutableMap.of()) - .withStorage(STORAGE_CONSUMER).build(); - } - - public static io.trino.metastore.Column getTrinoTestColumn() - { - return new io.trino.metastore.Column("test-col" + generateRandom(), HiveType.HIVE_STRING, Optional.of("column comment"), Map.of()); - } - - private static final Consumer STORAGE_CONSUMER = storage -> - storage.setStorageFormat(StorageFormat.create("SerdeLib", "InputFormat", "OutputFormat")) - .setLocation("/test-tbl") - .setBucketProperty(Optional.empty()) - .setSerdeParameters(ImmutableMap.of()); - - private static String generateRandom() - { - return format("%04x", ThreadLocalRandom.current().nextInt()); - } -} diff --git a/plugin/trino-iceberg/pom.xml b/plugin/trino-iceberg/pom.xml index 776bb604ab5c..e791bc8f4f4a 100644 --- a/plugin/trino-iceberg/pom.xml +++ b/plugin/trino-iceberg/pom.xml @@ -29,16 +29,6 @@ - - com.amazonaws - aws-java-sdk-core - - - - com.amazonaws - aws-java-sdk-glue - - com.fasterxml.jackson.core jackson-core @@ -297,6 +287,21 @@ jmxutils + + software.amazon.awssdk + aws-core + + + + software.amazon.awssdk + glue + + + + software.amazon.awssdk + sdk-core + + com.fasterxml.jackson.core jackson-annotations @@ -339,12 +344,6 @@ provided - - com.amazonaws - aws-java-sdk-s3 - runtime - - io.airlift http-client @@ -419,12 +418,6 @@ runtime - - software.amazon.awssdk - aws-core - runtime - - software.amazon.awssdk regions @@ -437,12 +430,6 @@ runtime - - software.amazon.awssdk - sdk-core - runtime - - software.amazon.awssdk sts @@ -706,6 +693,7 @@ org.apache.parquet:parquet-common + software.amazon.awssdk:aws-core diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/AbstractTrinoCatalog.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/AbstractTrinoCatalog.java index 545c05459c55..735aea3917a5 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/AbstractTrinoCatalog.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/AbstractTrinoCatalog.java @@ -71,7 +71,6 @@ import static io.trino.metastore.TableInfo.ICEBERG_MATERIALIZED_VIEW_COMMENT; import static io.trino.plugin.hive.HiveMetadata.STORAGE_TABLE; import static io.trino.plugin.hive.ViewReaderUtil.PRESTO_VIEW_FLAG; -import static io.trino.plugin.hive.metastore.glue.v1.GlueToTrinoConverter.mappedCopy; import static io.trino.plugin.hive.util.HiveUtil.escapeTableName; import static io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_FILESYSTEM_ERROR; import static io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_INVALID_METADATA; @@ -356,9 +355,8 @@ protected SchemaTableName createMaterializedViewStorageTable( private List columnsForMaterializedView(ConnectorMaterializedViewDefinition definition, Map materializedViewProperties) { - Schema schemaWithTimestampTzPreserved = schemaFromMetadata(mappedCopy( - definition.getColumns(), - column -> { + Schema schemaWithTimestampTzPreserved = schemaFromMetadata(definition.getColumns().stream() + .map(column -> { Type type = typeManager.getType(column.getType()); if (type instanceof TimestampWithTimeZoneType timestampTzType && timestampTzType.getPrecision() <= 6) { // For now preserve timestamptz columns so that we can parse partitioning @@ -368,7 +366,8 @@ private List columnsForMaterializedView(ConnectorMaterializedVie type = typeForMaterializedViewStorageTable(type); } return new ColumnMetadata(column.getName(), type); - })); + }) + .collect(toImmutableList())); PartitionSpec partitionSpec = parsePartitionFields(schemaWithTimestampTzPreserved, getPartitioning(materializedViewProperties)); Set temporalPartitioningSources = partitionSpec.fields().stream() .flatMap(partitionField -> { @@ -382,9 +381,8 @@ private List columnsForMaterializedView(ConnectorMaterializedVie }) .collect(toImmutableSet()); - return mappedCopy( - definition.getColumns(), - column -> { + return definition.getColumns().stream() + .map(column -> { Type type = typeManager.getType(column.getType()); if (type instanceof TimestampWithTimeZoneType timestampTzType && timestampTzType.getPrecision() <= 6 && temporalPartitioningSources.contains(column.getName())) { // Apply point-in-time semantics to maintain partitioning capabilities @@ -394,7 +392,8 @@ private List columnsForMaterializedView(ConnectorMaterializedVie type = typeForMaterializedViewStorageTable(type); } return new ColumnMetadata(column.getName(), type); - }); + }) + .collect(toImmutableList()); } /** diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/GlueIcebergTableOperations.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/GlueIcebergTableOperations.java index ec88e7f0618f..a66f46fca5e6 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/GlueIcebergTableOperations.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/GlueIcebergTableOperations.java @@ -13,18 +13,6 @@ */ package io.trino.plugin.iceberg.catalog.glue; -import com.amazonaws.services.glue.AWSGlueAsync; -import com.amazonaws.services.glue.model.AWSGlueException; -import com.amazonaws.services.glue.model.AlreadyExistsException; -import com.amazonaws.services.glue.model.ConcurrentModificationException; -import com.amazonaws.services.glue.model.CreateTableRequest; -import com.amazonaws.services.glue.model.EntityNotFoundException; -import com.amazonaws.services.glue.model.InvalidInputException; -import com.amazonaws.services.glue.model.ResourceNumberLimitExceededException; -import com.amazonaws.services.glue.model.StorageDescriptor; -import com.amazonaws.services.glue.model.Table; -import com.amazonaws.services.glue.model.TableInput; -import com.amazonaws.services.glue.model.UpdateTableRequest; import com.google.common.collect.ImmutableMap; import io.trino.plugin.hive.metastore.glue.GlueMetastoreStats; import io.trino.plugin.iceberg.UnknownTableTypeException; @@ -39,6 +27,16 @@ import org.apache.iceberg.exceptions.CommitFailedException; import org.apache.iceberg.exceptions.CommitStateUnknownException; import org.apache.iceberg.io.FileIO; +import software.amazon.awssdk.services.glue.GlueClient; +import software.amazon.awssdk.services.glue.model.AlreadyExistsException; +import software.amazon.awssdk.services.glue.model.ConcurrentModificationException; +import software.amazon.awssdk.services.glue.model.EntityNotFoundException; +import software.amazon.awssdk.services.glue.model.GlueException; +import software.amazon.awssdk.services.glue.model.InvalidInputException; +import software.amazon.awssdk.services.glue.model.ResourceNumberLimitExceededException; +import software.amazon.awssdk.services.glue.model.StorageDescriptor; +import software.amazon.awssdk.services.glue.model.Table; +import software.amazon.awssdk.services.glue.model.TableInput; import java.util.HashMap; import java.util.Map; @@ -48,9 +46,7 @@ import static com.google.common.base.Verify.verify; import static io.trino.plugin.hive.ViewReaderUtil.isTrinoMaterializedView; import static io.trino.plugin.hive.ViewReaderUtil.isTrinoView; -import static io.trino.plugin.hive.metastore.glue.v1.GlueToTrinoConverter.getStorageDescriptor; -import static io.trino.plugin.hive.metastore.glue.v1.GlueToTrinoConverter.getTableParameters; -import static io.trino.plugin.hive.metastore.glue.v1.GlueToTrinoConverter.getTableType; +import static io.trino.plugin.hive.metastore.glue.GlueConverter.getTableType; import static io.trino.plugin.hive.util.HiveUtil.isIcebergTable; import static io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_COMMIT_ERROR; import static io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_INVALID_METADATA; @@ -68,7 +64,7 @@ public class GlueIcebergTableOperations { private final TypeManager typeManager; private final boolean cacheTableMetadata; - private final AWSGlueAsync glueClient; + private final GlueClient glueClient; private final GlueMetastoreStats stats; private final GetGlueTable getGlueTable; @@ -78,7 +74,7 @@ public class GlueIcebergTableOperations protected GlueIcebergTableOperations( TypeManager typeManager, boolean cacheTableMetadata, - AWSGlueAsync glueClient, + GlueClient glueClient, GlueMetastoreStats stats, GetGlueTable getGlueTable, FileIO fileIo, @@ -108,10 +104,10 @@ protected String getRefreshedLocation(boolean invalidateCaches) else { table = getTable(database, tableName, invalidateCaches); } - glueVersionId = table.getVersionId(); + glueVersionId = table.versionId(); String tableType = getTableType(table); - Map parameters = getTableParameters(table); + Map parameters = table.parameters(); if (!isMaterializedViewStorageTable && (isTrinoView(tableType, parameters) || isTrinoMaterializedView(tableType, parameters))) { // this is a Hive view or Trino/Presto view, or Trino materialized view, hence not a table // TODO table operations should not be constructed for views (remove exception-driven code path) @@ -135,13 +131,12 @@ protected void commitNewTable(TableMetadata metadata) String newMetadataLocation = writeNewMetadata(metadata, 0); TableInput tableInput = getTableInput(typeManager, tableName, owner, metadata, metadata.location(), newMetadataLocation, ImmutableMap.of(), cacheTableMetadata); - CreateTableRequest createTableRequest = new CreateTableRequest() - .withDatabaseName(database) - .withTableInput(tableInput); try { - stats.getCreateTable().call(() -> glueClient.createTable(createTableRequest)); + stats.getCreateTable().call(() -> glueClient.createTable(x -> x + .databaseName(database) + .tableInput(tableInput))); } - catch (AWSGlueException e) { + catch (GlueException e) { switch (e) { case AlreadyExistsException _, EntityNotFoundException _, @@ -168,7 +163,7 @@ protected void commitToExistingTable(TableMetadata base, TableMetadata metadata) tableName, owner, metadata, - getStorageDescriptor(table).map(StorageDescriptor::getLocation).orElse(null), + Optional.ofNullable(table.storageDescriptor()).map(StorageDescriptor::location).orElse(null), newMetadataLocation, ImmutableMap.of(PREVIOUS_METADATA_LOCATION_PROP, currentMetadataLocation), cacheTableMetadata)); @@ -181,14 +176,14 @@ protected void commitMaterializedViewRefresh(TableMetadata base, TableMetadata m getTable(database, tableNameFrom(tableName), false), metadata, (table, newMetadataLocation) -> { - Map parameters = new HashMap<>(getTableParameters(table)); + Map parameters = new HashMap<>(table.parameters()); parameters.put(METADATA_LOCATION_PROP, newMetadataLocation); parameters.put(PREVIOUS_METADATA_LOCATION_PROP, currentMetadataLocation); return getMaterializedViewTableInput( - table.getName(), - table.getViewOriginalText(), - table.getOwner(), + table.name(), + table.viewOriginalText(), + table.owner(), parameters); }); } @@ -198,12 +193,11 @@ private void commitTableUpdate(Table table, TableMetadata metadata, BiFunction glueClient.updateTable(updateTableRequest)); + stats.getUpdateTable().call(() -> glueClient.updateTable(x -> x + .databaseName(database) + .tableInput(tableInput) + .versionId(glueVersionId))); } catch (ConcurrentModificationException e) { // CommitFailedException is handled as a special case in the Iceberg library. This commit will automatically retry diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/GlueIcebergTableOperationsProvider.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/GlueIcebergTableOperationsProvider.java index 4b54259edf7a..62310372a5a0 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/GlueIcebergTableOperationsProvider.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/GlueIcebergTableOperationsProvider.java @@ -13,7 +13,6 @@ */ package io.trino.plugin.iceberg.catalog.glue; -import com.amazonaws.services.glue.AWSGlueAsync; import com.google.inject.Inject; import io.trino.filesystem.TrinoFileSystemFactory; import io.trino.plugin.hive.metastore.glue.GlueMetastoreStats; @@ -23,6 +22,7 @@ import io.trino.plugin.iceberg.fileio.ForwardingFileIo; import io.trino.spi.connector.ConnectorSession; import io.trino.spi.type.TypeManager; +import software.amazon.awssdk.services.glue.GlueClient; import java.util.Optional; @@ -34,7 +34,7 @@ public class GlueIcebergTableOperationsProvider private final TypeManager typeManager; private final boolean cacheTableMetadata; private final TrinoFileSystemFactory fileSystemFactory; - private final AWSGlueAsync glueClient; + private final GlueClient glueClient; private final GlueMetastoreStats stats; @Inject @@ -43,7 +43,7 @@ public GlueIcebergTableOperationsProvider( IcebergGlueCatalogConfig catalogConfig, TrinoFileSystemFactory fileSystemFactory, GlueMetastoreStats stats, - AWSGlueAsync glueClient) + GlueClient glueClient) { this.typeManager = requireNonNull(typeManager, "typeManager is null"); this.cacheTableMetadata = catalogConfig.isCacheTableMetadata(); diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/GlueIcebergUtil.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/GlueIcebergUtil.java index e95cb57ee231..21982b331060 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/GlueIcebergUtil.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/GlueIcebergUtil.java @@ -13,9 +13,6 @@ */ package io.trino.plugin.iceberg.catalog.glue; -import com.amazonaws.services.glue.model.Column; -import com.amazonaws.services.glue.model.StorageDescriptor; -import com.amazonaws.services.glue.model.TableInput; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import io.trino.plugin.iceberg.TypeConverter; @@ -24,6 +21,9 @@ import org.apache.iceberg.TableMetadata; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; +import software.amazon.awssdk.services.glue.model.Column; +import software.amazon.awssdk.services.glue.model.StorageDescriptor; +import software.amazon.awssdk.services.glue.model.TableInput; import java.util.HashMap; import java.util.List; @@ -79,20 +79,22 @@ public static TableInput getTableInput( parameters.put(METADATA_LOCATION_PROP, newMetadataLocation); parameters.remove(TRINO_TABLE_METADATA_INFO_VALID_FOR); // no longer valid - StorageDescriptor storageDescriptor = new StorageDescriptor().withLocation(tableLocation); + StorageDescriptor.Builder storageDescriptor = StorageDescriptor.builder() + .location(tableLocation); - TableInput tableInput = new TableInput() - .withName(tableName) - .withOwner(owner.orElse(null)) + TableInput.Builder tableInput = TableInput.builder() + .name(tableName) + .owner(owner.orElse(null)) // Iceberg does not distinguish managed and external tables, all tables are treated the same and marked as EXTERNAL - .withTableType(EXTERNAL_TABLE.name()) - .withStorageDescriptor(storageDescriptor); + .tableType(EXTERNAL_TABLE.name()) + .storageDescriptor(storageDescriptor.build()); if (cacheTableMetadata) { // Store table metadata sufficient to answer information_schema.columns and system.metadata.table_comments queries, which are often queried in bulk by e.g. BI tools Optional> glueColumns = glueColumns(typeManager, metadata); - glueColumns.ifPresent(columns -> tableInput.withStorageDescriptor(storageDescriptor.withColumns(columns))); + glueColumns.ifPresent(columns -> tableInput.storageDescriptor( + storageDescriptor.columns(columns).build())); String comment = metadata.properties().get(TABLE_COMMENT); if (comment != null) { @@ -112,9 +114,9 @@ public static TableInput getTableInput( parameters.put(TRINO_TABLE_METADATA_INFO_VALID_FOR, newMetadataLocation); } - tableInput.withParameters(parameters); + tableInput.parameters(parameters); - return tableInput; + return tableInput.build(); } private static Optional> glueColumns(TypeManager typeManager, TableMetadata metadata) @@ -131,10 +133,10 @@ private static Optional> glueColumns(TypeManager typeManager, Table return Optional.empty(); } String trinoTypeId = TypeConverter.toTrinoType(icebergColumn.type(), typeManager).getTypeId().getId(); - Column column = new Column() - .withName(icebergColumn.name()) - .withType(glueTypeString) - .withComment(icebergColumn.doc()); + Column.Builder column = Column.builder() + .name(icebergColumn.name()) + .type(glueTypeString) + .comment(icebergColumn.doc()); ImmutableMap.Builder parameters = ImmutableMap.builder(); if (icebergColumn.isRequired()) { @@ -147,8 +149,8 @@ private static Optional> glueColumns(TypeManager typeManager, Table // Store type parameter for some (first) column so that we can later detect whether column parameters weren't erased by something. parameters.put(COLUMN_TRINO_TYPE_ID_PROPERTY, trinoTypeId); } - column.setParameters(parameters.buildOrThrow()); - glueColumns.add(column); + column.parameters(parameters.buildOrThrow()); + glueColumns.add(column.build()); firstColumn = false; } @@ -205,23 +207,25 @@ private static String toGlueTypeStringLossy(Type type) public static TableInput getViewTableInput(String viewName, String viewOriginalText, @Nullable String owner, Map parameters) { - return new TableInput() - .withName(viewName) - .withTableType(VIRTUAL_VIEW.name()) - .withViewOriginalText(viewOriginalText) - .withViewExpandedText(PRESTO_VIEW_EXPANDED_TEXT_MARKER) - .withOwner(owner) - .withParameters(parameters); + return TableInput.builder() + .name(viewName) + .tableType(VIRTUAL_VIEW.name()) + .viewOriginalText(viewOriginalText) + .viewExpandedText(PRESTO_VIEW_EXPANDED_TEXT_MARKER) + .owner(owner) + .parameters(parameters) + .build(); } public static TableInput getMaterializedViewTableInput(String viewName, String viewOriginalText, String owner, Map parameters) { - return new TableInput() - .withName(viewName) - .withTableType(VIRTUAL_VIEW.name()) - .withViewOriginalText(viewOriginalText) - .withViewExpandedText(ICEBERG_MATERIALIZED_VIEW_COMMENT) - .withOwner(owner) - .withParameters(parameters); + return TableInput.builder() + .name(viewName) + .tableType(VIRTUAL_VIEW.name()) + .viewOriginalText(viewOriginalText) + .viewExpandedText(ICEBERG_MATERIALIZED_VIEW_COMMENT) + .owner(owner) + .parameters(parameters) + .build(); } } diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/IcebergGlueCatalogModule.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/IcebergGlueCatalogModule.java index 3aeef110d388..7d4dbf1c5d0d 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/IcebergGlueCatalogModule.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/IcebergGlueCatalogModule.java @@ -13,25 +13,27 @@ */ package io.trino.plugin.iceberg.catalog.glue; -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.services.glue.model.Table; import com.google.inject.Binder; import com.google.inject.Key; import com.google.inject.Scopes; import com.google.inject.TypeLiteral; import com.google.inject.multibindings.Multibinder; import io.airlift.configuration.AbstractConfigurationAwareModule; +import io.airlift.units.Duration; +import io.trino.metastore.cache.CachingHiveMetastoreConfig; import io.trino.plugin.hive.HideDeltaLakeTables; +import io.trino.plugin.hive.metastore.CachingHiveMetastoreModule; +import io.trino.plugin.hive.metastore.glue.ForGlueHiveMetastore; +import io.trino.plugin.hive.metastore.glue.GlueHiveMetastoreConfig; +import io.trino.plugin.hive.metastore.glue.GlueMetastoreModule; import io.trino.plugin.hive.metastore.glue.GlueMetastoreStats; -import io.trino.plugin.hive.metastore.glue.v1.ForGlueHiveMetastore; -import io.trino.plugin.hive.metastore.glue.v1.GlueCredentialsProvider; -import io.trino.plugin.hive.metastore.glue.v1.GlueHiveMetastoreConfig; -import io.trino.plugin.hive.metastore.glue.v1.GlueMetastoreModule; import io.trino.plugin.iceberg.catalog.IcebergTableOperationsProvider; import io.trino.plugin.iceberg.catalog.TrinoCatalogFactory; import io.trino.plugin.iceberg.procedure.MigrateProcedure; import io.trino.spi.procedure.Procedure; +import software.amazon.awssdk.services.glue.model.Table; +import java.util.concurrent.TimeUnit; import java.util.function.Predicate; import static com.google.inject.multibindings.Multibinder.newSetBinder; @@ -45,16 +47,20 @@ public class IcebergGlueCatalogModule @Override protected void setup(Binder binder) { - configBinder(binder).bindConfig(GlueHiveMetastoreConfig.class); configBinder(binder).bindConfigDefaults(GlueHiveMetastoreConfig.class, config -> config.setSkipArchive(true)); configBinder(binder).bindConfig(IcebergGlueCatalogConfig.class); binder.bind(GlueMetastoreStats.class).in(Scopes.SINGLETON); newExporter(binder).export(GlueMetastoreStats.class).withGeneratedName(); - binder.bind(AWSCredentialsProvider.class).toProvider(GlueCredentialsProvider.class).in(Scopes.SINGLETON); binder.bind(IcebergTableOperationsProvider.class).to(GlueIcebergTableOperationsProvider.class).in(Scopes.SINGLETON); binder.bind(TrinoCatalogFactory.class).to(TrinoGlueCatalogFactory.class).in(Scopes.SINGLETON); newExporter(binder).export(TrinoCatalogFactory.class).withGeneratedName(); + install(new CachingHiveMetastoreModule()); + configBinder(binder).bindConfigDefaults(CachingHiveMetastoreConfig.class, config -> { + // ensure caching metastore wrapper isn't created, as it's not leveraged by Iceberg + config.setStatsCacheTtl(new Duration(0, TimeUnit.SECONDS)); + }); + // Required to inject HiveMetastoreFactory for migrate procedure binder.bind(Key.get(boolean.class, HideDeltaLakeTables.class)).toInstance(false); newOptionalBinder(binder, Key.get(new TypeLiteral>() {}, ForGlueHiveMetastore.class)) diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/TrinoGlueCatalog.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/TrinoGlueCatalog.java index 3d7a55d4e6e1..c5673e4a92e3 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/TrinoGlueCatalog.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/TrinoGlueCatalog.java @@ -13,27 +13,6 @@ */ package io.trino.plugin.iceberg.catalog.glue; -import com.amazonaws.AmazonServiceException; -import com.amazonaws.services.glue.AWSGlueAsync; -import com.amazonaws.services.glue.model.AccessDeniedException; -import com.amazonaws.services.glue.model.AlreadyExistsException; -import com.amazonaws.services.glue.model.Column; -import com.amazonaws.services.glue.model.CreateDatabaseRequest; -import com.amazonaws.services.glue.model.CreateTableRequest; -import com.amazonaws.services.glue.model.Database; -import com.amazonaws.services.glue.model.DatabaseInput; -import com.amazonaws.services.glue.model.DeleteDatabaseRequest; -import com.amazonaws.services.glue.model.DeleteTableRequest; -import com.amazonaws.services.glue.model.EntityNotFoundException; -import com.amazonaws.services.glue.model.GetDatabaseRequest; -import com.amazonaws.services.glue.model.GetDatabasesRequest; -import com.amazonaws.services.glue.model.GetDatabasesResult; -import com.amazonaws.services.glue.model.GetTableRequest; -import com.amazonaws.services.glue.model.GetTablesRequest; -import com.amazonaws.services.glue.model.GetTablesResult; -import com.amazonaws.services.glue.model.StorageDescriptor; -import com.amazonaws.services.glue.model.TableInput; -import com.amazonaws.services.glue.model.UpdateTableRequest; import com.google.common.cache.Cache; import com.google.common.collect.AbstractIterator; import com.google.common.collect.ImmutableList; @@ -88,6 +67,19 @@ import org.apache.iceberg.Transaction; import org.apache.iceberg.exceptions.NotFoundException; import org.apache.iceberg.io.FileIO; +import software.amazon.awssdk.core.exception.SdkException; +import software.amazon.awssdk.services.glue.GlueClient; +import software.amazon.awssdk.services.glue.model.AccessDeniedException; +import software.amazon.awssdk.services.glue.model.AlreadyExistsException; +import software.amazon.awssdk.services.glue.model.Column; +import software.amazon.awssdk.services.glue.model.Database; +import software.amazon.awssdk.services.glue.model.DatabaseInput; +import software.amazon.awssdk.services.glue.model.EntityNotFoundException; +import software.amazon.awssdk.services.glue.model.GetDatabasesResponse; +import software.amazon.awssdk.services.glue.model.GetTablesResponse; +import software.amazon.awssdk.services.glue.model.StorageDescriptor; +import software.amazon.awssdk.services.glue.model.Table; +import software.amazon.awssdk.services.glue.model.TableInput; import java.io.IOException; import java.time.Duration; @@ -128,12 +120,8 @@ import static io.trino.plugin.hive.ViewReaderUtil.encodeViewData; import static io.trino.plugin.hive.ViewReaderUtil.isTrinoMaterializedView; import static io.trino.plugin.hive.ViewReaderUtil.isTrinoView; -import static io.trino.plugin.hive.metastore.glue.v1.AwsSdkUtil.getPaginatedResults; -import static io.trino.plugin.hive.metastore.glue.v1.GlueToTrinoConverter.getColumnParameters; -import static io.trino.plugin.hive.metastore.glue.v1.GlueToTrinoConverter.getStorageDescriptor; -import static io.trino.plugin.hive.metastore.glue.v1.GlueToTrinoConverter.getTableParameters; -import static io.trino.plugin.hive.metastore.glue.v1.GlueToTrinoConverter.getTableType; -import static io.trino.plugin.hive.metastore.glue.v1.GlueToTrinoConverter.getTableTypeNullable; +import static io.trino.plugin.hive.metastore.glue.GlueConverter.getTableType; +import static io.trino.plugin.hive.metastore.glue.GlueConverter.getTableTypeNullable; import static io.trino.plugin.hive.util.HiveUtil.isHiveSystemSchema; import static io.trino.plugin.hive.util.HiveUtil.isIcebergTable; import static io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_BAD_DATA; @@ -182,13 +170,13 @@ public class TrinoGlueCatalog private final boolean cacheTableMetadata; private final TrinoFileSystemFactory fileSystemFactory; private final Optional defaultSchemaLocation; - private final AWSGlueAsync glueClient; + private final GlueClient glueClient; private final GlueMetastoreStats stats; private final boolean hideMaterializedViewStorageTable; private final boolean isUsingSystemSecurity; private final Executor metadataFetchingExecutor; - private final Cache glueTableCache = EvictableCacheBuilder.newBuilder() + private final Cache glueTableCache = EvictableCacheBuilder.newBuilder() // Even though this is query-scoped, this still needs to be bounded. information_schema queries can access large number of tables. .maximumSize(Math.max(PER_QUERY_CACHES_SIZE, IcebergMetadata.GET_METADATA_BATCH_SIZE)) .build(); @@ -210,7 +198,7 @@ public TrinoGlueCatalog( boolean cacheTableMetadata, IcebergTableOperationsProvider tableOperationsProvider, String trinoVersion, - AWSGlueAsync glueClient, + GlueClient glueClient, GlueMetastoreStats stats, boolean isUsingSystemSecurity, Optional defaultSchemaLocation, @@ -240,13 +228,13 @@ public boolean namespaceExists(ConnectorSession session, String namespace) } return stats.getGetDatabase().call(() -> { try { - glueClient.getDatabase(new GetDatabaseRequest().withName(namespace)); + glueClient.getDatabase(x -> x.name(namespace)); return true; } catch (EntityNotFoundException e) { return false; } - catch (AmazonServiceException e) { + catch (SdkException e) { throw new TrinoException(ICEBERG_CATALOG_ERROR, e); } }); @@ -256,18 +244,14 @@ public boolean namespaceExists(ConnectorSession session, String namespace) public List listNamespaces(ConnectorSession session) { try { - return getPaginatedResults( - glueClient::getDatabases, - new GetDatabasesRequest(), - GetDatabasesRequest::setNextToken, - GetDatabasesResult::getNextToken, - stats.getGetDatabases()) - .map(GetDatabasesResult::getDatabaseList) - .flatMap(List::stream) - .map(com.amazonaws.services.glue.model.Database::getName) - .collect(toImmutableList()); - } - catch (AmazonServiceException e) { + return stats.getGetDatabases().call(() -> + glueClient.getDatabasesPaginator(_ -> {}).stream() + .map(GetDatabasesResponse::databaseList) + .flatMap(List::stream) + .map(Database::name) + .collect(toImmutableList())); + } + catch (SdkException e) { throw new TrinoException(ICEBERG_CATALOG_ERROR, e); } } @@ -286,12 +270,12 @@ public void dropNamespace(ConnectorSession session, String namespace) try { glueTableCache.invalidateAll(); stats.getDeleteDatabase().call(() -> - glueClient.deleteDatabase(new DeleteDatabaseRequest().withName(namespace))); + glueClient.deleteDatabase(x -> x.name(namespace))); } catch (EntityNotFoundException e) { throw new SchemaNotFoundException(namespace, e); } - catch (AmazonServiceException e) { + catch (SdkException e) { throw new TrinoException(ICEBERG_CATALOG_ERROR, e); } } @@ -300,22 +284,21 @@ public void dropNamespace(ConnectorSession session, String namespace) public Map loadNamespaceMetadata(ConnectorSession session, String namespace) { try { - GetDatabaseRequest getDatabaseRequest = new GetDatabaseRequest().withName(namespace); Database database = stats.getGetDatabase().call(() -> - glueClient.getDatabase(getDatabaseRequest).getDatabase()); + glueClient.getDatabase(x -> x.name(namespace)).database()); ImmutableMap.Builder metadata = ImmutableMap.builder(); - if (database.getLocationUri() != null) { - metadata.put(LOCATION_PROPERTY, database.getLocationUri()); + if (database.locationUri() != null) { + metadata.put(LOCATION_PROPERTY, database.locationUri()); } - if (database.getParameters() != null) { - metadata.putAll(database.getParameters()); + if (database.parameters() != null) { + metadata.putAll(database.parameters()); } return metadata.buildOrThrow(); } catch (EntityNotFoundException e) { throw new SchemaNotFoundException(namespace, e); } - catch (AmazonServiceException e) { + catch (SdkException e) { throw new TrinoException(ICEBERG_CATALOG_ERROR, e); } } @@ -334,28 +317,28 @@ public void createNamespace(ConnectorSession session, String namespace, Map - glueClient.createDatabase(new CreateDatabaseRequest() - .withDatabaseInput(createDatabaseInput(namespace, properties)))); + glueClient.createDatabase(x -> x + .databaseInput(createDatabaseInput(namespace, properties)))); } catch (AlreadyExistsException e) { throw new SchemaAlreadyExistsException(namespace, e); } - catch (AmazonServiceException e) { + catch (SdkException e) { throw new TrinoException(ICEBERG_CATALOG_ERROR, e); } } - private DatabaseInput createDatabaseInput(String namespace, Map properties) + private static DatabaseInput createDatabaseInput(String namespace, Map properties) { - DatabaseInput databaseInput = new DatabaseInput().withName(namespace); + DatabaseInput.Builder databaseInput = DatabaseInput.builder().name(namespace); properties.forEach((property, value) -> { switch (property) { - case LOCATION_PROPERTY -> databaseInput.setLocationUri((String) value); + case LOCATION_PROPERTY -> databaseInput.locationUri((String) value); default -> throw new IllegalArgumentException("Unrecognized property: " + property); } }); - return databaseInput; + return databaseInput.build(); } @Override @@ -379,7 +362,7 @@ public List listTables(ConnectorSession session, Optional nam @Override public List listIcebergTables(ConnectorSession session, Optional namespace) { - return listTables(session, namespace, table -> isIcebergTable(getTableParameters(table))).stream() + return listTables(session, namespace, table -> isIcebergTable(table.parameters())).stream() .map(TableInfo::tableName) .collect(toImmutableList()); } @@ -387,7 +370,7 @@ public List listIcebergTables(ConnectorSession session, Optiona private List listTables( ConnectorSession session, Optional namespace, - Predicate tablePredicate) + Predicate
tablePredicate) { List>> tasks = listNamespaces(session, namespace).stream() .map(glueNamespace -> (Callable>) () -> getGlueTablesWithExceptionHandling(glueNamespace) @@ -405,11 +388,11 @@ private List listTables( } } - private TableInfo mapToTableInfo(String glueNamespace, com.amazonaws.services.glue.model.Table table) + private TableInfo mapToTableInfo(String glueNamespace, Table table) { return new TableInfo( - new SchemaTableName(glueNamespace, table.getName()), - TableInfo.ExtendedRelationType.fromTableTypeAndComment(getTableType(table), getTableParameters(table).get(TABLE_COMMENT))); + new SchemaTableName(glueNamespace, table.name()), + TableInfo.ExtendedRelationType.fromTableTypeAndComment(getTableType(table), table.parameters().get(TABLE_COMMENT))); } @Override @@ -421,22 +404,22 @@ public Optional> streamRelationColumns( { ImmutableList.Builder unfilteredResult = ImmutableList.builder(); ImmutableList.Builder filteredResult = ImmutableList.builder(); - Map unprocessed = new HashMap<>(); + Map unprocessed = new HashMap<>(); listNamespaces(session, namespace).stream() .flatMap(glueNamespace -> getGlueTables(glueNamespace) - .map(table -> entry(new SchemaTableName(glueNamespace, table.getName()), table))) + .map(table -> entry(new SchemaTableName(glueNamespace, table.name()), table))) .forEach(entry -> { SchemaTableName name = entry.getKey(); - com.amazonaws.services.glue.model.Table table = entry.getValue(); + Table table = entry.getValue(); String tableType = getTableType(table); - Map tableParameters = getTableParameters(table); + Map tableParameters = table.parameters(); if (isTrinoMaterializedView(tableType, tableParameters)) { - IcebergMaterializedViewDefinition definition = decodeMaterializedViewData(table.getViewOriginalText()); + IcebergMaterializedViewDefinition definition = decodeMaterializedViewData(table.viewOriginalText()); unfilteredResult.add(RelationColumnsMetadata.forMaterializedView(name, toSpiMaterializedViewColumns(definition.columns()))); } else if (isTrinoView(tableType, tableParameters)) { - ConnectorViewDefinition definition = ViewReaderUtil.PrestoViewReader.decodeViewData(table.getViewOriginalText()); + ConnectorViewDefinition definition = ViewReaderUtil.PrestoViewReader.decodeViewData(table.viewOriginalText()); unfilteredResult.add(RelationColumnsMetadata.forView(name, definition.getColumns())); } else if (isRedirected.test(name)) { @@ -478,12 +461,12 @@ else if (!isIcebergTable(tableParameters)) { private void getColumnsFromIcebergMetadata( ConnectorSession session, - Map glueTables, // only Iceberg tables + Map glueTables, // only Iceberg tables UnaryOperator> relationFilter, Consumer resultsCollector) { for (SchemaTableName tableName : relationFilter.apply(glueTables.keySet())) { - com.amazonaws.services.glue.model.Table table = glueTables.get(tableName); + Table table = glueTables.get(tableName); // potentially racy with invalidation, but TrinoGlueCatalog is session-scoped uncheckedCacheGet(glueTableCache, tableName, () -> table); List columns; @@ -513,22 +496,22 @@ public Optional> streamRelationComments( ImmutableList.Builder unfilteredResult = ImmutableList.builder(); ImmutableList.Builder filteredResult = ImmutableList.builder(); - Map unprocessed = new HashMap<>(); + Map unprocessed = new HashMap<>(); listNamespaces(session, namespace).stream() .flatMap(glueNamespace -> getGlueTables(glueNamespace) - .map(table -> entry(new SchemaTableName(glueNamespace, table.getName()), table))) + .map(table -> entry(new SchemaTableName(glueNamespace, table.name()), table))) .forEach(entry -> { SchemaTableName name = entry.getKey(); - com.amazonaws.services.glue.model.Table table = entry.getValue(); + Table table = entry.getValue(); String tableType = getTableType(table); - Map tableParameters = getTableParameters(table); + Map tableParameters = table.parameters(); if (isTrinoMaterializedView(tableType, tableParameters)) { - Optional comment = decodeMaterializedViewData(table.getViewOriginalText()).comment(); + Optional comment = decodeMaterializedViewData(table.viewOriginalText()).comment(); unfilteredResult.add(RelationCommentMetadata.forRelation(name, comment)); } else if (isTrinoView(tableType, tableParameters)) { - Optional comment = ViewReaderUtil.PrestoViewReader.decodeViewData(table.getViewOriginalText()).getComment(); + Optional comment = ViewReaderUtil.PrestoViewReader.decodeViewData(table.viewOriginalText()).getComment(); unfilteredResult.add(RelationCommentMetadata.forRelation(name, comment)); } else if (isRedirected.test(name)) { @@ -574,12 +557,12 @@ else if (!isIcebergTable(tableParameters)) { private void getCommentsFromIcebergMetadata( ConnectorSession session, - Map glueTables, // only Iceberg tables + Map glueTables, // only Iceberg tables UnaryOperator> relationFilter, Consumer resultsCollector) { for (SchemaTableName tableName : relationFilter.apply(glueTables.keySet())) { - com.amazonaws.services.glue.model.Table table = glueTables.get(tableName); + Table table = glueTables.get(tableName); // potentially racy with invalidation, but TrinoGlueCatalog is session-scoped uncheckedCacheGet(glueTableCache, tableName, () -> table); Optional comment; @@ -665,42 +648,42 @@ private Optional> getCachedColumnMetadata(SchemaTableName t return Optional.empty(); } - com.amazonaws.services.glue.model.Table glueTable = getTable(tableName, false); + Table glueTable = getTable(tableName, false); return getCachedColumnMetadata(glueTable); } - private Optional> getCachedColumnMetadata(com.amazonaws.services.glue.model.Table glueTable) + private Optional> getCachedColumnMetadata(Table glueTable) { if (!cacheTableMetadata) { return Optional.empty(); } - Map tableParameters = getTableParameters(glueTable); + Map tableParameters = glueTable.parameters(); String metadataLocation = tableParameters.get(METADATA_LOCATION_PROP); String metadataValidForMetadata = tableParameters.get(TRINO_TABLE_METADATA_INFO_VALID_FOR); - Optional storageDescriptor = getStorageDescriptor(glueTable); + Optional storageDescriptor = Optional.ofNullable(glueTable.storageDescriptor()); if (metadataLocation == null || !metadataLocation.equals(metadataValidForMetadata) || storageDescriptor.isEmpty() || - storageDescriptor.get().getColumns() == null) { + !storageDescriptor.get().hasColumns()) { return Optional.empty(); } - List glueColumns = storageDescriptor.get().getColumns(); - if (glueColumns.stream().noneMatch(column -> getColumnParameters(column).containsKey(COLUMN_TRINO_TYPE_ID_PROPERTY))) { + List glueColumns = storageDescriptor.get().columns(); + if (glueColumns.stream().noneMatch(column -> column.parameters().containsKey(COLUMN_TRINO_TYPE_ID_PROPERTY))) { // No column has type parameter, maybe the parameters were erased return Optional.empty(); } ImmutableList.Builder columns = ImmutableList.builderWithExpectedSize(glueColumns.size()); for (Column glueColumn : glueColumns) { - Map columnParameters = getColumnParameters(glueColumn); - String trinoTypeId = columnParameters.getOrDefault(COLUMN_TRINO_TYPE_ID_PROPERTY, glueColumn.getType()); + Map columnParameters = glueColumn.parameters(); + String trinoTypeId = columnParameters.getOrDefault(COLUMN_TRINO_TYPE_ID_PROPERTY, glueColumn.type()); boolean notNull = parseBoolean(columnParameters.getOrDefault(COLUMN_TRINO_NOT_NULL_PROPERTY, "false")); Type type = typeManager.getType(TypeId.of(trinoTypeId)); columns.add(ColumnMetadata.builder() - .setName(glueColumn.getName()) + .setName(glueColumn.name()) .setType(type) - .setComment(Optional.ofNullable(glueColumn.getComment())) + .setComment(Optional.ofNullable(glueColumn.comment())) .setNullable(!notNull) .build()); } @@ -714,7 +697,7 @@ public void dropTable(ConnectorSession session, SchemaTableName schemaTableName) try { deleteTable(schemaTableName.getSchemaName(), schemaTableName.getTableName()); } - catch (AmazonServiceException e) { + catch (SdkException e) { throw new TrinoException(HIVE_METASTORE_ERROR, e); } try { @@ -732,8 +715,8 @@ public void dropTable(ConnectorSession session, SchemaTableName schemaTableName) @Override public void dropCorruptedTable(ConnectorSession session, SchemaTableName schemaTableName) { - com.amazonaws.services.glue.model.Table table = dropTableFromMetastore(session, schemaTableName); - String metadataLocation = getTableParameters(table).get(METADATA_LOCATION_PROP); + Table table = dropTableFromMetastore(session, schemaTableName); + String metadataLocation = table.parameters().get(METADATA_LOCATION_PROP); if (metadataLocation == null) { throw new TrinoException(ICEBERG_INVALID_METADATA, format("Table %s is missing [%s] property", schemaTableName, METADATA_LOCATION_PROP)); } @@ -807,18 +790,18 @@ public void unregisterTable(ConnectorSession session, SchemaTableName schemaTabl invalidateTableCache(schemaTableName); } - private com.amazonaws.services.glue.model.Table dropTableFromMetastore(ConnectorSession session, SchemaTableName schemaTableName) + private Table dropTableFromMetastore(ConnectorSession session, SchemaTableName schemaTableName) { - com.amazonaws.services.glue.model.Table table = getTableAndCacheMetadata(session, schemaTableName) + Table table = getTableAndCacheMetadata(session, schemaTableName) .orElseThrow(() -> new TableNotFoundException(schemaTableName)); - if (!isIcebergTable(getTableParameters(table))) { + if (!isIcebergTable(table.parameters())) { throw new UnknownTableTypeException(schemaTableName); } try { deleteTable(schemaTableName.getSchemaName(), schemaTableName.getTableName()); } - catch (AmazonServiceException e) { + catch (SdkException e) { throw new TrinoException(HIVE_METASTORE_ERROR, e); } return table; @@ -829,9 +812,9 @@ public void renameTable(ConnectorSession session, SchemaTableName from, SchemaTa { boolean newTableCreated = false; try { - com.amazonaws.services.glue.model.Table table = getTableAndCacheMetadata(session, from) + Table table = getTableAndCacheMetadata(session, from) .orElseThrow(() -> new TableNotFoundException(from)); - Map tableParameters = new HashMap<>(getTableParameters(table)); + Map tableParameters = new HashMap<>(table.parameters()); FileIO io = loadTable(session, from).io(); String metadataLocation = tableParameters.remove(METADATA_LOCATION_PROP); if (metadataLocation == null) { @@ -841,9 +824,9 @@ public void renameTable(ConnectorSession session, SchemaTableName from, SchemaTa TableInput tableInput = getTableInput( typeManager, to.getTableName(), - Optional.ofNullable(table.getOwner()), + Optional.ofNullable(table.owner()), metadata, - getStorageDescriptor(table).map(StorageDescriptor::getLocation).orElse(null), + Optional.ofNullable(table.storageDescriptor()).map(StorageDescriptor::location).orElse(null), metadataLocation, tableParameters, cacheTableMetadata); @@ -867,9 +850,9 @@ public void renameTable(ConnectorSession session, SchemaTableName from, SchemaTa } } - private Optional getTableAndCacheMetadata(ConnectorSession session, SchemaTableName schemaTableName) + private Optional
getTableAndCacheMetadata(ConnectorSession session, SchemaTableName schemaTableName) { - com.amazonaws.services.glue.model.Table table; + Table table; try { table = getTable(schemaTableName, false); } @@ -878,7 +861,7 @@ private Optional getTableAndCacheMetada } String tableType = getTableType(table); - Map parameters = getTableParameters(table); + Map parameters = table.parameters(); if (isIcebergTable(parameters) && !tableMetadataCache.asMap().containsKey(schemaTableName)) { if (viewCache.asMap().containsKey(schemaTableName) || materializedViewCache.asMap().containsKey(schemaTableName)) { throw new TrinoException(GENERIC_INTERNAL_ERROR, "Glue table cache inconsistency. Table cannot also be a view/materialized view"); @@ -919,10 +902,10 @@ else if (isTrinoView(tableType, parameters) && !viewCache.asMap().containsKey(sc try { TrinoViewUtil.getView( - Optional.ofNullable(table.getViewOriginalText()), + Optional.ofNullable(table.viewOriginalText()), tableType, parameters, - Optional.ofNullable(table.getOwner())) + Optional.ofNullable(table.owner())) .ifPresent(viewDefinition -> { // Note: this is racy from cache invalidation perspective, but it should not matter here uncheckedCacheGet(viewCache, schemaTableName, () -> viewDefinition); @@ -939,12 +922,9 @@ else if (isTrinoView(tableType, parameters) && !viewCache.asMap().containsKey(sc @Override public String defaultTableLocation(ConnectorSession session, SchemaTableName schemaTableName) { - GetDatabaseRequest getDatabaseRequest = new GetDatabaseRequest() - .withName(schemaTableName.getSchemaName()); String databaseLocation = stats.getGetDatabase().call(() -> - glueClient.getDatabase(getDatabaseRequest) - .getDatabase() - .getLocationUri()); + glueClient.getDatabase(x -> x.name(schemaTableName.getSchemaName())) + .database().locationUri()); String tableName = createNewTableName(schemaTableName.getTableName()); @@ -990,9 +970,9 @@ public void createView(ConnectorSession session, SchemaTableName schemaViewName, private void doCreateView(ConnectorSession session, SchemaTableName schemaViewName, TableInput viewTableInput, boolean replace) { - Optional existing = getTableAndCacheMetadata(session, schemaViewName); + Optional
existing = getTableAndCacheMetadata(session, schemaViewName); if (existing.isPresent()) { - if (!replace || !isTrinoView(getTableType(existing.get()), getTableParameters(existing.get()))) { + if (!replace || !isTrinoView(getTableType(existing.get()), existing.get().parameters())) { // TODO: ViewAlreadyExists is misleading if the name is used by a table https://github.com/trinodb/trino/issues/10037 throw new ViewAlreadyExistsException(schemaViewName); } @@ -1014,13 +994,13 @@ public void renameView(ConnectorSession session, SchemaTableName source, SchemaT { boolean newTableCreated = false; try { - com.amazonaws.services.glue.model.Table existingView = getTableAndCacheMetadata(session, source) + Table existingView = getTableAndCacheMetadata(session, source) .orElseThrow(() -> new TableNotFoundException(source)); viewCache.invalidate(source); TableInput viewTableInput = getViewTableInput( target.getTableName(), - existingView.getViewOriginalText(), - existingView.getOwner(), + existingView.viewOriginalText(), + existingView.owner(), createViewProperties(session, trinoVersion, TRINO_CREATED_BY_VALUE)); createTable(target.getSchemaName(), viewTableInput); newTableCreated = true; @@ -1058,7 +1038,7 @@ public void dropView(ConnectorSession session, SchemaTableName schemaViewName) viewCache.invalidate(schemaViewName); deleteTable(schemaViewName.getSchemaName(), schemaViewName.getTableName()); } - catch (AmazonServiceException e) { + catch (SdkException e) { throw new TrinoException(HIVE_METASTORE_ERROR, e); } } @@ -1076,16 +1056,16 @@ public Optional getView(ConnectorSession session, Schem return Optional.empty(); } - Optional table = getTableAndCacheMetadata(session, viewName); + Optional
table = getTableAndCacheMetadata(session, viewName); if (table.isEmpty()) { return Optional.empty(); } - com.amazonaws.services.glue.model.Table viewDefinition = table.get(); + Table viewDefinition = table.get(); return TrinoViewUtil.getView( - Optional.ofNullable(viewDefinition.getViewOriginalText()), + Optional.ofNullable(viewDefinition.viewOriginalText()), getTableType(viewDefinition), - getTableParameters(viewDefinition), - Optional.ofNullable(viewDefinition.getOwner())); + viewDefinition.parameters(), + Optional.ofNullable(viewDefinition.owner())); } @Override @@ -1137,7 +1117,7 @@ private void updateView(ConnectorSession session, SchemaTableName viewName, Conn try { updateTable(viewName.getSchemaName(), viewTableInput); } - catch (AmazonServiceException e) { + catch (SdkException e) { throw new TrinoException(ICEBERG_CATALOG_ERROR, e); } } @@ -1151,10 +1131,10 @@ public void createMaterializedView( boolean replace, boolean ignoreExisting) { - Optional existing = getTableAndCacheMetadata(session, viewName); + Optional
existing = getTableAndCacheMetadata(session, viewName); if (existing.isPresent()) { - if (!isTrinoMaterializedView(getTableType(existing.get()), getTableParameters(existing.get()))) { + if (!isTrinoMaterializedView(getTableType(existing.get()), existing.get().parameters())) { throw new TrinoException(UNSUPPORTED_TABLE_TYPE, "Existing table is not a Materialized View: " + viewName); } if (!replace) { @@ -1205,7 +1185,7 @@ private void createMaterializedViewWithStorageTable( SchemaTableName viewName, ConnectorMaterializedViewDefinition definition, Map materializedViewProperties, - Optional existing) + Optional
existing) { // Create the storage table SchemaTableName storageTable = createMaterializedViewStorageTable(session, viewName, definition, materializedViewProperties); @@ -1262,16 +1242,16 @@ public void updateMaterializedViewColumnComment(ConnectorSession session, Schema private void updateMaterializedView(SchemaTableName viewName, ConnectorMaterializedViewDefinition newDefinition) { - com.amazonaws.services.glue.model.Table table = getTable(viewName, false); + Table table = getTable(viewName, false); TableInput materializedViewTableInput = getMaterializedViewTableInput( viewName.getTableName(), encodeMaterializedViewData(fromConnectorMaterializedViewDefinition(newDefinition)), - table.getOwner(), - getTableParameters(table)); + table.owner(), + table.parameters()); try { updateTable(viewName.getSchemaName(), materializedViewTableInput); } - catch (AmazonServiceException e) { + catch (SdkException e) { throw new TrinoException(ICEBERG_CATALOG_ERROR, e); } } @@ -1279,39 +1259,39 @@ private void updateMaterializedView(SchemaTableName viewName, ConnectorMateriali @Override public void dropMaterializedView(ConnectorSession session, SchemaTableName viewName) { - com.amazonaws.services.glue.model.Table view = getTableAndCacheMetadata(session, viewName) + Table view = getTableAndCacheMetadata(session, viewName) .orElseThrow(() -> new MaterializedViewNotFoundException(viewName)); - if (!isTrinoMaterializedView(getTableType(view), getTableParameters(view))) { - throw new TrinoException(UNSUPPORTED_TABLE_TYPE, "Not a Materialized View: " + view.getDatabaseName() + "." + view.getName()); + if (!isTrinoMaterializedView(getTableType(view), view.parameters())) { + throw new TrinoException(UNSUPPORTED_TABLE_TYPE, "Not a Materialized View: " + view.databaseName() + "." + view.name()); } materializedViewCache.invalidate(viewName); dropMaterializedViewStorage(session, view); - deleteTable(view.getDatabaseName(), view.getName()); + deleteTable(view.databaseName(), view.name()); } - private void dropMaterializedViewStorage(ConnectorSession session, com.amazonaws.services.glue.model.Table view) + private void dropMaterializedViewStorage(ConnectorSession session, Table view) { - Map parameters = getTableParameters(view); + Map parameters = view.parameters(); String storageTableName = parameters.get(STORAGE_TABLE); if (storageTableName != null) { String storageSchema = Optional.ofNullable(parameters.get(STORAGE_SCHEMA)) - .orElse(view.getDatabaseName()); + .orElse(view.databaseName()); try { dropTable(session, new SchemaTableName(storageSchema, storageTableName)); } catch (TrinoException e) { - LOG.warn(e, "Failed to drop storage table '%s.%s' for materialized view '%s'", storageSchema, storageTableName, view.getName()); + LOG.warn(e, "Failed to drop storage table '%s.%s' for materialized view '%s'", storageSchema, storageTableName, view.name()); } } else { String storageMetadataLocation = parameters.get(METADATA_LOCATION_PROP); - checkState(storageMetadataLocation != null, "Storage location missing in definition of materialized view " + view.getName()); + checkState(storageMetadataLocation != null, "Storage location missing in definition of materialized view " + view.name()); try { dropMaterializedViewStorage(fileSystemFactory.create(session), storageMetadataLocation); } catch (IOException e) { - LOG.warn(e, "Failed to delete storage table metadata '%s' for materialized view '%s'", storageMetadataLocation, view.getName()); + LOG.warn(e, "Failed to delete storage table metadata '%s' for materialized view '%s'", storageMetadataLocation, view.name()); } } } @@ -1329,13 +1309,13 @@ protected Optional doGetMaterializedView(Co return Optional.empty(); } - Optional maybeTable = getTableAndCacheMetadata(session, viewName); + Optional
maybeTable = getTableAndCacheMetadata(session, viewName); if (maybeTable.isEmpty()) { return Optional.empty(); } - com.amazonaws.services.glue.model.Table table = maybeTable.get(); - if (!isTrinoMaterializedView(getTableType(table), getTableParameters(table))) { + Table table = maybeTable.get(); + if (!isTrinoMaterializedView(getTableType(table), table.parameters())) { return Optional.empty(); } @@ -1344,9 +1324,9 @@ protected Optional doGetMaterializedView(Co private ConnectorMaterializedViewDefinition createMaterializedViewDefinition( SchemaTableName viewName, - com.amazonaws.services.glue.model.Table table) + Table table) { - Map materializedViewParameters = getTableParameters(table); + Map materializedViewParameters = table.parameters(); String storageTable = materializedViewParameters.get(STORAGE_TABLE); String storageMetadataLocation = materializedViewParameters.get(METADATA_LOCATION_PROP); if ((storageTable == null) == (storageMetadataLocation == null)) { @@ -1361,7 +1341,7 @@ private ConnectorMaterializedViewDefinition createMaterializedViewDefinition( .orElse(viewName.getSchemaName()); storageTableName = new SchemaTableName(storageSchema, storageTable); - if (table.getViewOriginalText() == null) { + if (table.viewOriginalText() == null) { throw new TrinoException(ICEBERG_BAD_DATA, "Materialized view did not have original text " + viewName); } } @@ -1370,8 +1350,8 @@ private ConnectorMaterializedViewDefinition createMaterializedViewDefinition( } return getMaterializedViewDefinition( - Optional.ofNullable(table.getOwner()), - table.getViewOriginalText(), + Optional.ofNullable(table.owner()), + table.viewOriginalText(), storageTableName); } @@ -1381,17 +1361,17 @@ public Optional getMaterializedViewStorageTable(ConnectorSession sess String storageMetadataLocation; MaterializedViewData materializedViewData = materializedViewCache.getIfPresent(viewName); if (materializedViewData == null) { - Optional maybeTable = getTableAndCacheMetadata(session, viewName); + Optional
maybeTable = getTableAndCacheMetadata(session, viewName); if (maybeTable.isEmpty()) { return Optional.empty(); } - com.amazonaws.services.glue.model.Table materializedView = maybeTable.get(); - verify(isTrinoMaterializedView(getTableType(materializedView), getTableParameters(materializedView)), + Table materializedView = maybeTable.get(); + verify(isTrinoMaterializedView(getTableType(materializedView), materializedView.parameters()), "getMaterializedViewStorageTable received a table, not a materialized view"); // TODO getTableAndCacheMetadata saved the value in materializedViewCache, so we could just use that, except when conversion fails - storageMetadataLocation = getTableParameters(materializedView).get(METADATA_LOCATION_PROP); - checkState(storageMetadataLocation != null, "Storage location missing in definition of materialized view " + materializedView.getName()); + storageMetadataLocation = materializedView.parameters().get(METADATA_LOCATION_PROP); + checkState(storageMetadataLocation != null, "Storage location missing in definition of materialized view " + materializedView.name()); } else { storageMetadataLocation = materializedViewData.storageMetadataLocation @@ -1436,14 +1416,14 @@ public void renameMaterializedView(ConnectorSession session, SchemaTableName sou { boolean newTableCreated = false; try { - com.amazonaws.services.glue.model.Table glueTable = getTableAndCacheMetadata(session, source) + Table glueTable = getTableAndCacheMetadata(session, source) .orElseThrow(() -> new TableNotFoundException(source)); materializedViewCache.invalidate(source); - Map tableParameters = getTableParameters(glueTable); + Map tableParameters = glueTable.parameters(); if (!isTrinoMaterializedView(getTableType(glueTable), tableParameters)) { throw new TrinoException(UNSUPPORTED_TABLE_TYPE, "Not a Materialized View: " + source); } - TableInput tableInput = getMaterializedViewTableInput(target.getTableName(), glueTable.getViewOriginalText(), glueTable.getOwner(), tableParameters); + TableInput tableInput = getMaterializedViewTableInput(target.getTableName(), glueTable.viewOriginalText(), glueTable.owner(), tableParameters); createTable(target.getSchemaName(), tableInput); newTableCreated = true; deleteTable(source.getSchemaName(), source.getTableName()); @@ -1480,12 +1460,12 @@ public Optional redirectTable(ConnectorSession session, tableName.getSchemaName(), tableName.getTableName().substring(0, metadataMarkerIndex)); - Optional table = getTableAndCacheMetadata(session, new SchemaTableName(tableNameBase.getSchemaName(), tableNameBase.getTableName())); + Optional
table = getTableAndCacheMetadata(session, new SchemaTableName(tableNameBase.getSchemaName(), tableNameBase.getTableName())); if (table.isEmpty() || VIRTUAL_VIEW.name().equals(getTableTypeNullable(table.get()))) { return Optional.empty(); } - if (!isIcebergTable(getTableParameters(table.get()))) { + if (!isIcebergTable(table.get().parameters())) { // After redirecting, use the original table name, with "$partitions" and similar suffixes return Optional.of(new CatalogSchemaTableName(hiveCatalogName, tableName)); } @@ -1498,7 +1478,7 @@ protected void invalidateTableCache(SchemaTableName schemaTableName) tableMetadataCache.invalidate(schemaTableName); } - com.amazonaws.services.glue.model.Table getTable(SchemaTableName tableName, boolean invalidateCaches) + Table getTable(SchemaTableName tableName, boolean invalidateCaches) { if (invalidateCaches) { glueTableCache.invalidate(tableName); @@ -1507,15 +1487,16 @@ com.amazonaws.services.glue.model.Table getTable(SchemaTableName tableName, bool try { return uncheckedCacheGet(glueTableCache, tableName, () -> { try { - GetTableRequest getTableRequest = new GetTableRequest() - .withDatabaseName(tableName.getSchemaName()) - .withName(tableName.getTableName()); - return stats.getGetTable().call(() -> glueClient.getTable(getTableRequest).getTable()); + return stats.getGetTable().call(() -> + glueClient.getTable(x -> x + .databaseName(tableName.getSchemaName()) + .name(tableName.getTableName())) + .table()); } catch (EntityNotFoundException e) { throw new TableNotFoundException(tableName, e); } - catch (AmazonServiceException e) { + catch (SdkException e) { throw new TrinoException(ICEBERG_CATALOG_ERROR, e); } }); @@ -1526,14 +1507,14 @@ com.amazonaws.services.glue.model.Table getTable(SchemaTableName tableName, bool } } - private Stream getGlueTablesWithExceptionHandling(String glueNamespace) + private Stream
getGlueTablesWithExceptionHandling(String glueNamespace) { return stream(new AbstractIterator<>() { - private Iterator delegate; + private Iterator
delegate; @Override - protected com.amazonaws.services.glue.model.Table computeNext() + protected Table computeNext() { boolean firstCall = (delegate == null); try { @@ -1558,50 +1539,47 @@ protected com.amazonaws.services.glue.model.Table computeNext() } return endOfData(); } - catch (AmazonServiceException e) { + catch (SdkException e) { throw new TrinoException(ICEBERG_CATALOG_ERROR, e); } } }); } - private Stream getGlueTables(String glueNamespace) + private Stream
getGlueTables(String glueNamespace) { - return getPaginatedResults( - glueClient::getTables, - new GetTablesRequest().withDatabaseName(glueNamespace), - GetTablesRequest::setNextToken, - GetTablesResult::getNextToken, - stats.getGetTables()) - .map(GetTablesResult::getTableList) - .flatMap(List::stream); + return stats.getGetTables().call(() -> + glueClient.getTablesPaginator(x -> x.databaseName(glueNamespace)) + .stream() + .map(GetTablesResponse::tableList) + .flatMap(List::stream)); } private void createTable(String schemaName, TableInput tableInput) { glueTableCache.invalidateAll(); stats.getCreateTable().call(() -> - glueClient.createTable(new CreateTableRequest() - .withDatabaseName(schemaName) - .withTableInput(tableInput))); + glueClient.createTable(x -> x + .databaseName(schemaName) + .tableInput(tableInput))); } private void updateTable(String schemaName, TableInput tableInput) { glueTableCache.invalidateAll(); stats.getUpdateTable().call(() -> - glueClient.updateTable(new UpdateTableRequest() - .withDatabaseName(schemaName) - .withTableInput(tableInput))); + glueClient.updateTable(x -> x + .databaseName(schemaName) + .tableInput(tableInput))); } private void deleteTable(String schema, String table) { glueTableCache.invalidateAll(); stats.getDeleteTable().call(() -> - glueClient.deleteTable(new DeleteTableRequest() - .withDatabaseName(schema) - .withName(table))); + glueClient.deleteTable(x -> x + .databaseName(schema) + .name(table))); } private record MaterializedViewData( diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/TrinoGlueCatalogFactory.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/TrinoGlueCatalogFactory.java index cd17da539a22..c8b8a3ef24aa 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/TrinoGlueCatalogFactory.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/TrinoGlueCatalogFactory.java @@ -13,13 +13,12 @@ */ package io.trino.plugin.iceberg.catalog.glue; -import com.amazonaws.services.glue.AWSGlueAsync; import com.google.inject.Inject; import io.airlift.concurrent.BoundedExecutor; import io.trino.filesystem.TrinoFileSystemFactory; import io.trino.plugin.hive.NodeVersion; +import io.trino.plugin.hive.metastore.glue.GlueHiveMetastoreConfig; import io.trino.plugin.hive.metastore.glue.GlueMetastoreStats; -import io.trino.plugin.hive.metastore.glue.v1.GlueHiveMetastoreConfig; import io.trino.plugin.hive.security.UsingSystemSecurity; import io.trino.plugin.iceberg.ForIcebergMetadata; import io.trino.plugin.iceberg.IcebergConfig; @@ -31,6 +30,7 @@ import io.trino.spi.type.TypeManager; import org.weakref.jmx.Flatten; import org.weakref.jmx.Managed; +import software.amazon.awssdk.services.glue.GlueClient; import java.util.Optional; import java.util.concurrent.Executor; @@ -49,7 +49,7 @@ public class TrinoGlueCatalogFactory private final IcebergTableOperationsProvider tableOperationsProvider; private final String trinoVersion; private final Optional defaultSchemaLocation; - private final AWSGlueAsync glueClient; + private final GlueClient glueClient; private final boolean isUniqueTableLocation; private final boolean hideMaterializedViewStorageTable; private final GlueMetastoreStats stats; @@ -68,7 +68,7 @@ public TrinoGlueCatalogFactory( IcebergGlueCatalogConfig catalogConfig, @UsingSystemSecurity boolean usingSystemSecurity, GlueMetastoreStats stats, - AWSGlueAsync glueClient, + GlueClient glueClient, @ForIcebergMetadata ExecutorService metadataExecutorService) { this.catalogName = requireNonNull(catalogName, "catalogName is null"); diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/delete/EqualityDeleteFilter.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/delete/EqualityDeleteFilter.java index f1845de3c987..af093e5288b1 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/delete/EqualityDeleteFilter.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/delete/EqualityDeleteFilter.java @@ -13,10 +13,10 @@ */ package io.trino.plugin.iceberg.delete; -import com.amazonaws.annotation.ThreadSafe; import com.google.common.util.concurrent.Futures; import com.google.common.util.concurrent.ListenableFuture; import com.google.common.util.concurrent.ListenableFutureTask; +import com.google.errorprone.annotations.ThreadSafe; import io.trino.plugin.iceberg.IcebergColumnHandle; import io.trino.plugin.iceberg.delete.DeleteManager.DeletePageSourceProvider; import io.trino.spi.TrinoException; diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/AWSGlueAsyncAdapterProvider.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/AWSGlueAsyncAdapterProvider.java deleted file mode 100644 index e4084971e6eb..000000000000 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/AWSGlueAsyncAdapterProvider.java +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.iceberg.catalog.glue; - -import com.amazonaws.services.glue.AWSGlueAsync; - -public interface AWSGlueAsyncAdapterProvider -{ - AWSGlueAsync createAWSGlueAsyncAdapter(AWSGlueAsync delegate); -} diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestIcebergGlueCatalogAccessOperations.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestIcebergGlueCatalogAccessOperations.java index 29109e31d2ce..79f79a3cd1f8 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestIcebergGlueCatalogAccessOperations.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestIcebergGlueCatalogAccessOperations.java @@ -74,7 +74,7 @@ /* * The test currently uses AWS Default Credential Provider Chain, - * See https://docs.aws.amazon.com/sdk-for-java/v1/developer-guide/credentials.html#credentials-default + * See https://docs.aws.amazon.com/sdk-for-java/latest/developer-guide/credentials-chain.html#credentials-default * on ways to set your AWS credentials which will be needed to run this test. */ @Execution(SAME_THREAD) diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestIcebergGlueCatalogConnectorSmokeTest.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestIcebergGlueCatalogConnectorSmokeTest.java index 9527b8538ccb..f5aae5c13079 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestIcebergGlueCatalogConnectorSmokeTest.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestIcebergGlueCatalogConnectorSmokeTest.java @@ -13,14 +13,6 @@ */ package io.trino.plugin.iceberg.catalog.glue; -import com.amazonaws.services.glue.AWSGlueAsync; -import com.amazonaws.services.glue.AWSGlueAsyncClientBuilder; -import com.amazonaws.services.glue.model.DeleteTableRequest; -import com.amazonaws.services.glue.model.EntityNotFoundException; -import com.amazonaws.services.glue.model.GetTableRequest; -import com.amazonaws.services.glue.model.Table; -import com.amazonaws.services.glue.model.TableInput; -import com.amazonaws.services.glue.model.UpdateTableRequest; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import io.trino.filesystem.Location; @@ -43,6 +35,10 @@ import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInstance; +import software.amazon.awssdk.services.glue.GlueClient; +import software.amazon.awssdk.services.glue.model.EntityNotFoundException; +import software.amazon.awssdk.services.glue.model.StorageDescriptor; +import software.amazon.awssdk.services.glue.model.Table; import software.amazon.awssdk.services.s3.S3Client; import software.amazon.awssdk.services.s3.model.DeleteObjectsRequest; import software.amazon.awssdk.services.s3.model.ListObjectsRequest; @@ -53,14 +49,13 @@ import java.util.List; import static com.google.common.collect.ImmutableList.toImmutableList; -import static io.trino.plugin.hive.metastore.glue.v1.GlueToTrinoConverter.getStorageDescriptor; -import static io.trino.plugin.hive.metastore.glue.v1.GlueToTrinoConverter.getTableParameters; -import static io.trino.plugin.hive.metastore.glue.v1.GlueToTrinoConverter.getTableType; +import static io.trino.plugin.hive.metastore.glue.GlueConverter.getTableTypeNullable; import static io.trino.plugin.iceberg.IcebergTestUtils.checkParquetFileSorting; import static io.trino.testing.SystemEnvironmentUtils.requireEnv; import static io.trino.testing.TestingConnectorSession.SESSION; import static io.trino.testing.TestingNames.randomNameSuffix; import static java.lang.String.format; +import static java.util.Objects.requireNonNull; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; @@ -76,7 +71,7 @@ public class TestIcebergGlueCatalogConnectorSmokeTest { private final String bucketName; private final String schemaName; - private final AWSGlueAsync glueClient; + private final GlueClient glueClient; private final TrinoFileSystemFactory fileSystemFactory; public TestIcebergGlueCatalogConnectorSmokeTest() @@ -84,7 +79,7 @@ public TestIcebergGlueCatalogConnectorSmokeTest() super(FileFormat.PARQUET); this.bucketName = requireEnv("S3_BUCKET"); this.schemaName = "test_iceberg_smoke_" + randomNameSuffix(); - glueClient = AWSGlueAsyncClientBuilder.defaultClient(); + glueClient = GlueClient.create(); HdfsConfigurationInitializer initializer = new HdfsConfigurationInitializer(new HdfsConfig(), ImmutableSet.of()); HdfsConfiguration hdfsConfiguration = new DynamicHdfsConfiguration(initializer, ImmutableSet.of()); @@ -156,70 +151,57 @@ public void testRenameSchema() void testGlueTableLocation() { try (TestTable table = newTrinoTable("test_table_location", "AS SELECT 1 x")) { - String initialLocation = getStorageDescriptor(getGlueTable(table.getName())).orElseThrow().getLocation(); - assertThat(getStorageDescriptor(getGlueTable(table.getName())).orElseThrow().getLocation()) + String initialLocation = getStorageDescriptor(getGlueTable(table.getName())).location(); + assertThat(getStorageDescriptor(getGlueTable(table.getName())).location()) // Using startsWith because the location has UUID suffix .startsWith("%s/%s.db/%s".formatted(schemaPath(), schemaName, table.getName())); assertUpdate("INSERT INTO " + table.getName() + " VALUES 2", 1); Table glueTable = getGlueTable(table.getName()); - assertThat(getStorageDescriptor(glueTable).orElseThrow().getLocation()) + assertThat(getStorageDescriptor(glueTable).location()) .isEqualTo(initialLocation); String newTableLocation = initialLocation + "_new"; updateTableLocation(glueTable, newTableLocation); assertUpdate("INSERT INTO " + table.getName() + " VALUES 3", 1); - assertThat(getStorageDescriptor(getGlueTable(table.getName())).orElseThrow().getLocation()) + assertThat(getStorageDescriptor(getGlueTable(table.getName())).location()) .isEqualTo(newTableLocation); assertUpdate("CALL system.unregister_table(CURRENT_SCHEMA, '" + table.getName() + "')"); assertUpdate("CALL system.register_table(CURRENT_SCHEMA, '" + table.getName() + "', '" + initialLocation + "')"); - assertThat(getStorageDescriptor(getGlueTable(table.getName())).orElseThrow().getLocation()) + assertThat(getStorageDescriptor(getGlueTable(table.getName())).location()) .isEqualTo(initialLocation); } } private Table getGlueTable(String tableName) { - GetTableRequest request = new GetTableRequest().withDatabaseName(schemaName).withName(tableName); - return glueClient.getTable(request).getTable(); + return glueClient.getTable(x -> x.databaseName(schemaName).name(tableName)).table(); } private void updateTableLocation(Table table, String newLocation) { - TableInput tableInput = new TableInput() - .withName(table.getName()) - .withTableType(getTableType(table)) - .withStorageDescriptor(getStorageDescriptor(table).orElseThrow().withLocation(newLocation)) - .withParameters(getTableParameters(table)); - UpdateTableRequest updateTableRequest = new UpdateTableRequest() - .withDatabaseName(schemaName) - .withTableInput(tableInput); - glueClient.updateTable(updateTableRequest); + glueClient.updateTable(update -> update + .databaseName(schemaName) + .tableInput(input -> input + .name(table.name()) + .tableType(getTableTypeNullable(table)) + .storageDescriptor(getStorageDescriptor(table).toBuilder().location(newLocation).build()) + .parameters(table.parameters()))); } @Override protected void dropTableFromMetastore(String tableName) { - DeleteTableRequest deleteTableRequest = new DeleteTableRequest() - .withDatabaseName(schemaName) - .withName(tableName); - glueClient.deleteTable(deleteTableRequest); - GetTableRequest getTableRequest = new GetTableRequest() - .withDatabaseName(schemaName) - .withName(tableName); - assertThatThrownBy(() -> glueClient.getTable(getTableRequest)) + glueClient.deleteTable(x -> x.databaseName(schemaName).name(tableName)); + assertThatThrownBy(() -> getGlueTable(tableName)) .isInstanceOf(EntityNotFoundException.class); } @Override protected String getMetadataLocation(String tableName) { - GetTableRequest getTableRequest = new GetTableRequest() - .withDatabaseName(schemaName) - .withName(tableName); - return getTableParameters(glueClient.getTable(getTableRequest).getTable()) - .get("metadata_location"); + return getGlueTable(tableName).parameters().get("metadata_location"); } @Override @@ -273,4 +255,9 @@ protected boolean locationExists(String location) return !s3.listObjectsV2(request).contents().isEmpty(); } } + + private static StorageDescriptor getStorageDescriptor(Table table) + { + return requireNonNull(table.storageDescriptor()); + } } diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestIcebergGlueCatalogMaterializedView.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestIcebergGlueCatalogMaterializedView.java index 299f7720af5e..6b8ad26befe6 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestIcebergGlueCatalogMaterializedView.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestIcebergGlueCatalogMaterializedView.java @@ -13,23 +13,17 @@ */ package io.trino.plugin.iceberg.catalog.glue; -import com.amazonaws.services.glue.AWSGlueAsync; -import com.amazonaws.services.glue.AWSGlueAsyncClientBuilder; -import com.amazonaws.services.glue.model.BatchDeleteTableRequest; -import com.amazonaws.services.glue.model.DeleteDatabaseRequest; -import com.amazonaws.services.glue.model.GetTableRequest; -import com.amazonaws.services.glue.model.GetTablesRequest; -import com.amazonaws.services.glue.model.GetTablesResult; -import com.amazonaws.services.glue.model.Table; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; -import io.trino.plugin.hive.metastore.glue.AwsApiCallStats; import io.trino.plugin.iceberg.BaseIcebergMaterializedViewTest; import io.trino.plugin.iceberg.IcebergQueryRunner; import io.trino.plugin.iceberg.SchemaInitializer; import io.trino.testing.DistributedQueryRunner; import io.trino.testing.QueryRunner; import org.junit.jupiter.api.AfterAll; +import software.amazon.awssdk.services.glue.GlueClient; +import software.amazon.awssdk.services.glue.model.GetTablesResponse; +import software.amazon.awssdk.services.glue.model.Table; import java.io.File; import java.nio.file.Files; @@ -39,8 +33,6 @@ import static com.google.common.collect.ImmutableSet.toImmutableSet; import static io.trino.plugin.base.util.Closables.closeAllSuppress; -import static io.trino.plugin.hive.metastore.glue.v1.AwsSdkUtil.getPaginatedResults; -import static io.trino.plugin.hive.metastore.glue.v1.GlueToTrinoConverter.getTableParameters; import static io.trino.testing.TestingNames.randomNameSuffix; import static org.apache.iceberg.BaseMetastoreTableOperations.METADATA_LOCATION_PROP; @@ -96,12 +88,12 @@ protected String getSchemaDirectory() @Override protected String getStorageMetadataLocation(String materializedViewName) { - AWSGlueAsync glueClient = AWSGlueAsyncClientBuilder.defaultClient(); - Table table = glueClient.getTable(new GetTableRequest() - .withDatabaseName(schemaName) - .withName(materializedViewName)) - .getTable(); - return getTableParameters(table).get(METADATA_LOCATION_PROP); + return GlueClient.create() + .getTable(x -> x + .databaseName(schemaName) + .name(materializedViewName)) + .table() + .parameters().get(METADATA_LOCATION_PROP); } @AfterAll @@ -112,21 +104,17 @@ public void cleanup() private static void cleanUpSchema(String schema) { - AWSGlueAsync glueClient = AWSGlueAsyncClientBuilder.defaultClient(); - Set tableNames = getPaginatedResults( - glueClient::getTables, - new GetTablesRequest().withDatabaseName(schema), - GetTablesRequest::setNextToken, - GetTablesResult::getNextToken, - new AwsApiCallStats()) - .map(GetTablesResult::getTableList) + GlueClient glueClient = GlueClient.create(); + Set tableNames = glueClient + .getTablesPaginator(x -> x.databaseName(schema)) + .stream() + .map(GetTablesResponse::tableList) .flatMap(Collection::stream) - .map(Table::getName) + .map(Table::name) .collect(toImmutableSet()); - glueClient.batchDeleteTable(new BatchDeleteTableRequest() - .withDatabaseName(schema) - .withTablesToDelete(tableNames)); - glueClient.deleteDatabase(new DeleteDatabaseRequest() - .withName(schema)); + glueClient.batchDeleteTable(x -> x + .databaseName(schema) + .tablesToDelete(tableNames)); + glueClient.deleteDatabase(x -> x.name(schema)); } } diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestIcebergGlueCatalogSkipArchive.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestIcebergGlueCatalogSkipArchive.java index 81e86808e7db..b9d7be3660fd 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestIcebergGlueCatalogSkipArchive.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestIcebergGlueCatalogSkipArchive.java @@ -13,17 +13,7 @@ */ package io.trino.plugin.iceberg.catalog.glue; -import com.amazonaws.services.glue.AWSGlueAsync; -import com.amazonaws.services.glue.AWSGlueAsyncClientBuilder; -import com.amazonaws.services.glue.model.GetTableRequest; -import com.amazonaws.services.glue.model.GetTableVersionsRequest; -import com.amazonaws.services.glue.model.GetTableVersionsResult; -import com.amazonaws.services.glue.model.Table; -import com.amazonaws.services.glue.model.TableInput; -import com.amazonaws.services.glue.model.TableVersion; -import com.amazonaws.services.glue.model.UpdateTableRequest; import com.google.common.collect.ImmutableMap; -import io.trino.plugin.hive.metastore.glue.AwsApiCallStats; import io.trino.plugin.iceberg.IcebergQueryRunner; import io.trino.plugin.iceberg.SchemaInitializer; import io.trino.plugin.iceberg.fileio.ForwardingFileIo; @@ -36,6 +26,11 @@ import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInstance; +import software.amazon.awssdk.services.glue.GlueClient; +import software.amazon.awssdk.services.glue.model.GetTableVersionsResponse; +import software.amazon.awssdk.services.glue.model.Table; +import software.amazon.awssdk.services.glue.model.TableInput; +import software.amazon.awssdk.services.glue.model.TableVersion; import java.io.File; import java.nio.file.Files; @@ -45,10 +40,7 @@ import java.util.Map; import java.util.Optional; -import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.common.collect.Iterables.getOnlyElement; -import static io.trino.plugin.hive.metastore.glue.v1.AwsSdkUtil.getPaginatedResults; -import static io.trino.plugin.hive.metastore.glue.v1.GlueToTrinoConverter.getTableParameters; import static io.trino.plugin.iceberg.IcebergTestUtils.getFileSystemFactory; import static io.trino.plugin.iceberg.catalog.glue.GlueIcebergUtil.getTableInput; import static io.trino.testing.TestingConnectorSession.SESSION; @@ -60,7 +52,7 @@ /* * The test currently uses AWS Default Credential Provider Chain, - * See https://docs.aws.amazon.com/sdk-for-java/v1/developer-guide/credentials.html#credentials-default + * See https://docs.aws.amazon.com/sdk-for-java/latest/developer-guide/credentials-chain.html#credentials-default * on ways to set your AWS credentials which will be needed to run this test. */ @TestInstance(PER_CLASS) @@ -68,13 +60,13 @@ public class TestIcebergGlueCatalogSkipArchive extends AbstractTestQueryFramework { private final String schemaName = "test_iceberg_skip_archive_" + randomNameSuffix(); - private AWSGlueAsync glueClient; + private GlueClient glueClient; @Override protected QueryRunner createQueryRunner() throws Exception { - glueClient = AWSGlueAsyncClientBuilder.defaultClient(); + glueClient = GlueClient.create(); File schemaDirectory = Files.createTempDirectory("test_iceberg").toFile(); schemaDirectory.deleteOnExit(); @@ -103,14 +95,14 @@ public void testSkipArchive() try (TestTable table = newTrinoTable("test_skip_archive", "(col int)")) { List tableVersionsBeforeInsert = getTableVersions(schemaName, table.getName()); assertThat(tableVersionsBeforeInsert).hasSize(1); - String versionIdBeforeInsert = getOnlyElement(tableVersionsBeforeInsert).getVersionId(); + String versionIdBeforeInsert = getOnlyElement(tableVersionsBeforeInsert).versionId(); assertUpdate("INSERT INTO " + table.getName() + " VALUES 1", 1); // Verify count of table versions isn't increased, but version id is changed List tableVersionsAfterInsert = getTableVersions(schemaName, table.getName()); assertThat(tableVersionsAfterInsert).hasSize(1); - String versionIdAfterInsert = getOnlyElement(tableVersionsAfterInsert).getVersionId(); + String versionIdAfterInsert = getOnlyElement(tableVersionsAfterInsert).versionId(); assertThat(versionIdBeforeInsert).isNotEqualTo(versionIdAfterInsert); } } @@ -124,14 +116,14 @@ public void testNotRemoveExistingArchive() TableVersion initialVersion = getOnlyElement(tableVersionsBeforeInsert); // Add a new archive using Glue client - Table glueTable = glueClient.getTable(new GetTableRequest().withDatabaseName(schemaName).withName(table.getName())).getTable(); - Map tableParameters = new HashMap<>(getTableParameters(glueTable)); + Table glueTable = glueClient.getTable(builder -> builder.databaseName(schemaName).name(table.getName())).table(); + Map tableParameters = new HashMap<>(glueTable.parameters()); String metadataLocation = tableParameters.remove(METADATA_LOCATION_PROP); FileIO io = new ForwardingFileIo(getFileSystemFactory(getDistributedQueryRunner()).create(SESSION)); TableMetadata metadata = TableMetadataParser.read(io, io.newInputFile(metadataLocation)); boolean cacheTableMetadata = new IcebergGlueCatalogConfig().isCacheTableMetadata(); TableInput tableInput = getTableInput(TESTING_TYPE_MANAGER, table.getName(), Optional.empty(), metadata, metadata.location(), metadataLocation, tableParameters, cacheTableMetadata); - glueClient.updateTable(new UpdateTableRequest().withDatabaseName(schemaName).withTableInput(tableInput)); + glueClient.updateTable(builder -> builder.databaseName(schemaName).tableInput(tableInput)); assertThat(getTableVersions(schemaName, table.getName())).hasSize(2); assertUpdate("INSERT INTO " + table.getName() + " VALUES 1", 1); @@ -144,14 +136,13 @@ public void testNotRemoveExistingArchive() private List getTableVersions(String databaseName, String tableName) { - return getPaginatedResults( - glueClient::getTableVersions, - new GetTableVersionsRequest().withDatabaseName(databaseName).withTableName(tableName), - GetTableVersionsRequest::setNextToken, - GetTableVersionsResult::getNextToken, - new AwsApiCallStats()) - .map(GetTableVersionsResult::getTableVersions) + return glueClient + .getTableVersionsPaginator(x -> x + .databaseName(databaseName) + .tableName(tableName)) + .stream() + .map(GetTableVersionsResponse::tableVersions) .flatMap(Collection::stream) - .collect(toImmutableList()); + .toList(); } } diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestIcebergGlueTableOperationsInsertFailure.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestIcebergGlueTableOperationsInsertFailure.java index 5c7f7519b1a9..2595d91907f6 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestIcebergGlueTableOperationsInsertFailure.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestIcebergGlueTableOperationsInsertFailure.java @@ -13,14 +13,18 @@ */ package io.trino.plugin.iceberg.catalog.glue; -import com.amazonaws.services.glue.AWSGlueAsync; import com.google.common.collect.ImmutableMap; +import com.google.inject.Binder; +import com.google.inject.multibindings.ProvidesIntoSet; +import io.airlift.configuration.AbstractConfigurationAwareModule; import io.airlift.log.Logger; import io.trino.Session; import io.trino.execution.Failure; import io.trino.metastore.Database; +import io.trino.plugin.hive.metastore.glue.ForGlueHiveMetastore; import io.trino.plugin.hive.metastore.glue.GlueHiveMetastore; import io.trino.plugin.iceberg.TestingIcebergPlugin; +import io.trino.plugin.iceberg.catalog.IcebergCatalogModule; import io.trino.spi.security.PrincipalType; import io.trino.testing.AbstractTestQueryFramework; import io.trino.testing.QueryFailedException; @@ -29,13 +33,15 @@ import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInstance; +import software.amazon.awssdk.core.interceptor.Context; +import software.amazon.awssdk.core.interceptor.ExecutionAttributes; +import software.amazon.awssdk.core.interceptor.ExecutionInterceptor; +import software.amazon.awssdk.services.glue.model.UpdateTableRequest; -import java.lang.reflect.InvocationTargetException; import java.nio.file.Files; import java.nio.file.Path; import java.util.Optional; -import static com.google.common.reflect.Reflection.newProxy; import static io.trino.plugin.hive.metastore.glue.TestingGlueHiveMetastore.createTestingGlueHiveMetastore; import static io.trino.testing.TestingNames.randomNameSuffix; import static io.trino.testing.TestingSession.testSessionBuilder; @@ -46,7 +52,7 @@ /* * The test currently uses AWS Default Credential Provider Chain, - * See https://docs.aws.amazon.com/sdk-for-java/v1/developer-guide/credentials.html#credentials-default + * See https://docs.aws.amazon.com/sdk-for-java/latest/developer-guide/credentials-chain.html#credentials-default * on ways to set your AWS credentials which will be needed to run this test. */ @TestInstance(PER_CLASS) @@ -71,25 +77,14 @@ protected QueryRunner createQueryRunner() .build(); QueryRunner queryRunner = new StandaloneQueryRunner(session); - AWSGlueAsyncAdapterProvider awsGlueAsyncAdapterProvider = delegate -> newProxy(AWSGlueAsync.class, (proxy, method, methodArgs) -> { - Object result; - try { - result = method.invoke(delegate, methodArgs); - } - catch (InvocationTargetException e) { - throw e.getCause(); - } - if (method.getName().equals("updateTable")) { - throw new RuntimeException("Test-simulated Glue timeout exception"); - } - return result; - }); - Path dataDirectory = Files.createTempDirectory("iceberg_data"); dataDirectory.toFile().deleteOnExit(); - queryRunner.installPlugin(new TestingIcebergPlugin(dataDirectory, Optional.of(new TestingIcebergGlueCatalogModule(awsGlueAsyncAdapterProvider)))); - queryRunner.createCatalog(ICEBERG_CATALOG, "iceberg", ImmutableMap.of("fs.hadoop.enabled", "true")); + queryRunner.installPlugin(new TestingIcebergPlugin(dataDirectory, Optional.of(new TestingGlueCatalogModule()))); + queryRunner.createCatalog(ICEBERG_CATALOG, "iceberg", ImmutableMap.builder() + .put("iceberg.catalog.type", "glue") + .put("fs.hadoop.enabled", "true") + .buildOrThrow()); glueHiveMetastore = createTestingGlueHiveMetastore(dataDirectory, this::closeAfterClass); @@ -135,4 +130,30 @@ public void testInsertFailureDoesNotCorruptTheTableMetadata() }); assertQuery("SELECT * FROM " + tableName, "VALUES 'Trino', 'rocks'"); } + + private static class TestingGlueCatalogModule + extends AbstractConfigurationAwareModule + { + @Override + protected void setup(Binder binder) + { + install(new IcebergCatalogModule()); + } + + @ProvidesIntoSet + @ForGlueHiveMetastore + public ExecutionInterceptor createExecutionInterceptor() + { + return new ExecutionInterceptor() + { + @Override + public void afterExecution(Context.AfterExecution context, ExecutionAttributes executionAttributes) + { + if (context.request() instanceof UpdateTableRequest) { + throw new RuntimeException("Test-simulated Glue timeout exception"); + } + } + }; + } + } } diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestTrinoGlueCatalog.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestTrinoGlueCatalog.java index 4e30c6a7d8c4..96bb49bd278a 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestTrinoGlueCatalog.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestTrinoGlueCatalog.java @@ -13,11 +13,6 @@ */ package io.trino.plugin.iceberg.catalog.glue; -import com.amazonaws.services.glue.AWSGlueAsync; -import com.amazonaws.services.glue.AWSGlueAsyncClientBuilder; -import com.amazonaws.services.glue.model.CreateDatabaseRequest; -import com.amazonaws.services.glue.model.DatabaseInput; -import com.amazonaws.services.glue.model.DeleteDatabaseRequest; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import io.airlift.log.Logger; @@ -42,6 +37,7 @@ import io.trino.spi.security.TrinoPrincipal; import io.trino.spi.type.TestingTypeManager; import org.junit.jupiter.api.Test; +import software.amazon.awssdk.services.glue.GlueClient; import java.io.File; import java.io.IOException; @@ -80,7 +76,7 @@ protected TrinoCatalog createTrinoCatalog(boolean useUniqueTableLocations) private TrinoCatalog createGlueTrinoCatalog(boolean useUniqueTableLocations, boolean useSystemSecurity) { - AWSGlueAsync glueClient = AWSGlueAsyncClientBuilder.defaultClient(); + GlueClient glueClient = GlueClient.create(); IcebergGlueCatalogConfig catalogConfig = new IcebergGlueCatalogConfig(); return new TrinoGlueCatalog( new CatalogName("catalog_name"), @@ -113,11 +109,11 @@ public void testNonLowercaseGlueDatabase() // Trino schema names are always lowercase (until https://github.com/trinodb/trino/issues/17) String trinoSchemaName = databaseName.toLowerCase(ENGLISH); - AWSGlueAsync glueClient = AWSGlueAsyncClientBuilder.defaultClient(); - glueClient.createDatabase(new CreateDatabaseRequest() - .withDatabaseInput(new DatabaseInput() + GlueClient glueClient = GlueClient.create(); + glueClient.createDatabase(database -> database + .databaseInput(input -> input // Currently this is actually stored in lowercase - .withName(databaseName))); + .name(databaseName))); try { TrinoCatalog catalog = createTrinoCatalog(false); assertThat(catalog.namespaceExists(SESSION, databaseName)).as("catalog.namespaceExists(databaseName)") @@ -153,8 +149,7 @@ public void testNonLowercaseGlueDatabase() .contains(trinoSchemaName); } finally { - glueClient.deleteDatabase(new DeleteDatabaseRequest() - .withName(databaseName)); + glueClient.deleteDatabase(delete -> delete.name(databaseName)); } } @@ -218,7 +213,7 @@ public void testDefaultLocation() tmpDirectory.toFile().deleteOnExit(); TrinoFileSystemFactory fileSystemFactory = HDFS_FILE_SYSTEM_FACTORY; - AWSGlueAsync glueClient = AWSGlueAsyncClientBuilder.defaultClient(); + GlueClient glueClient = GlueClient.create(); IcebergGlueCatalogConfig catalogConfig = new IcebergGlueCatalogConfig(); TrinoCatalog catalogWithDefaultLocation = new TrinoGlueCatalog( new CatalogName("catalog_name"), diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestingGlueIcebergTableOperationsProvider.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestingGlueIcebergTableOperationsProvider.java deleted file mode 100644 index 085d8185877f..000000000000 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestingGlueIcebergTableOperationsProvider.java +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.iceberg.catalog.glue; - -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.services.glue.AWSGlueAsync; -import com.google.common.collect.ImmutableSet; -import com.google.inject.Inject; -import io.trino.filesystem.TrinoFileSystemFactory; -import io.trino.plugin.hive.metastore.glue.GlueMetastoreStats; -import io.trino.plugin.hive.metastore.glue.v1.GlueHiveMetastoreConfig; -import io.trino.plugin.iceberg.catalog.IcebergTableOperations; -import io.trino.plugin.iceberg.catalog.IcebergTableOperationsProvider; -import io.trino.plugin.iceberg.catalog.TrinoCatalog; -import io.trino.plugin.iceberg.fileio.ForwardingFileIo; -import io.trino.spi.connector.ConnectorSession; -import io.trino.spi.type.TypeManager; - -import java.util.Optional; - -import static io.trino.plugin.hive.metastore.glue.v1.GlueClientUtil.createAsyncGlueClient; -import static java.util.Objects.requireNonNull; - -public class TestingGlueIcebergTableOperationsProvider - implements IcebergTableOperationsProvider -{ - private final TypeManager typeManager; - private final boolean cacheTableMetadata; - private final TrinoFileSystemFactory fileSystemFactory; - private final AWSGlueAsync glueClient; - private final GlueMetastoreStats stats; - - @Inject - public TestingGlueIcebergTableOperationsProvider( - TypeManager typeManager, - IcebergGlueCatalogConfig catalogConfig, - TrinoFileSystemFactory fileSystemFactory, - GlueMetastoreStats stats, - GlueHiveMetastoreConfig glueConfig, - AWSCredentialsProvider credentialsProvider, - AWSGlueAsyncAdapterProvider awsGlueAsyncAdapterProvider) - { - this.typeManager = requireNonNull(typeManager, "typeManager is null"); - this.cacheTableMetadata = catalogConfig.isCacheTableMetadata(); - this.fileSystemFactory = requireNonNull(fileSystemFactory, "fileSystemFactory is null"); - this.stats = requireNonNull(stats, "stats is null"); - requireNonNull(glueConfig, "glueConfig is null"); - requireNonNull(credentialsProvider, "credentialsProvider is null"); - requireNonNull(awsGlueAsyncAdapterProvider, "awsGlueAsyncAdapterProvider is null"); - this.glueClient = awsGlueAsyncAdapterProvider.createAWSGlueAsyncAdapter( - createAsyncGlueClient(glueConfig, credentialsProvider, ImmutableSet.of(), stats.newRequestMetricsCollector())); - } - - @Override - public IcebergTableOperations createTableOperations( - TrinoCatalog catalog, - ConnectorSession session, - String database, - String table, - Optional owner, - Optional location) - { - return new GlueIcebergTableOperations( - typeManager, - cacheTableMetadata, - glueClient, - stats, - ((TrinoGlueCatalog) catalog)::getTable, - new ForwardingFileIo(fileSystemFactory.create(session)), - session, - database, - table, - owner, - location); - } -} diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestingIcebergGlueCatalogModule.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestingIcebergGlueCatalogModule.java deleted file mode 100644 index 2db57a9288f1..000000000000 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestingIcebergGlueCatalogModule.java +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.iceberg.catalog.glue; - -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.services.glue.model.Table; -import com.google.inject.Binder; -import com.google.inject.Key; -import com.google.inject.Scopes; -import com.google.inject.TypeLiteral; -import io.airlift.configuration.AbstractConfigurationAwareModule; -import io.trino.plugin.hive.HideDeltaLakeTables; -import io.trino.plugin.hive.metastore.glue.GlueMetastoreStats; -import io.trino.plugin.hive.metastore.glue.v1.ForGlueHiveMetastore; -import io.trino.plugin.hive.metastore.glue.v1.GlueCredentialsProvider; -import io.trino.plugin.hive.metastore.glue.v1.GlueHiveMetastoreConfig; -import io.trino.plugin.hive.metastore.glue.v1.GlueMetastoreModule; -import io.trino.plugin.iceberg.catalog.IcebergTableOperationsProvider; -import io.trino.plugin.iceberg.catalog.TrinoCatalogFactory; - -import java.util.function.Predicate; - -import static com.google.inject.multibindings.OptionalBinder.newOptionalBinder; -import static io.airlift.configuration.ConfigBinder.configBinder; -import static java.util.Objects.requireNonNull; -import static org.weakref.jmx.guice.ExportBinder.newExporter; - -public class TestingIcebergGlueCatalogModule - extends AbstractConfigurationAwareModule -{ - private final AWSGlueAsyncAdapterProvider awsGlueAsyncAdapterProvider; - - public TestingIcebergGlueCatalogModule(AWSGlueAsyncAdapterProvider awsGlueAsyncAdapterProvider) - { - this.awsGlueAsyncAdapterProvider = requireNonNull(awsGlueAsyncAdapterProvider, "awsGlueAsyncAdapterProvider is null"); - } - - @Override - protected void setup(Binder binder) - { - configBinder(binder).bindConfig(GlueHiveMetastoreConfig.class); - configBinder(binder).bindConfigDefaults(GlueHiveMetastoreConfig.class, config -> config.setSkipArchive(true)); - configBinder(binder).bindConfig(IcebergGlueCatalogConfig.class); - binder.bind(GlueMetastoreStats.class).in(Scopes.SINGLETON); - newExporter(binder).export(GlueMetastoreStats.class).withGeneratedName(); - binder.bind(AWSCredentialsProvider.class).toProvider(GlueCredentialsProvider.class).in(Scopes.SINGLETON); - binder.bind(IcebergTableOperationsProvider.class).to(TestingGlueIcebergTableOperationsProvider.class).in(Scopes.SINGLETON); - binder.bind(TrinoCatalogFactory.class).to(TrinoGlueCatalogFactory.class).in(Scopes.SINGLETON); - newExporter(binder).export(TrinoCatalogFactory.class).withGeneratedName(); - binder.bind(AWSGlueAsyncAdapterProvider.class).toInstance(awsGlueAsyncAdapterProvider); - - // Required to inject HiveMetastoreFactory for migrate procedure - binder.bind(Key.get(boolean.class, HideDeltaLakeTables.class)).toInstance(false); - newOptionalBinder(binder, Key.get(new TypeLiteral>() {}, ForGlueHiveMetastore.class)) - .setBinding().toInstance(table -> true); - install(new GlueMetastoreModule()); - } -} diff --git a/testing/trino-product-tests/pom.xml b/testing/trino-product-tests/pom.xml index 94811b3376d7..eda592db41e3 100644 --- a/testing/trino-product-tests/pom.xml +++ b/testing/trino-product-tests/pom.xml @@ -23,11 +23,6 @@ aws-java-sdk-core - - com.amazonaws - aws-java-sdk-glue - - com.amazonaws aws-java-sdk-s3 @@ -227,6 +222,11 @@ testng + + software.amazon.awssdk + glue + + com.clickhouse clickhouse-jdbc diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/deltalake/TestDatabricksWithGlueMetastoreCleanUp.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/deltalake/TestDatabricksWithGlueMetastoreCleanUp.java index 70fea807582c..42a95b129004 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/deltalake/TestDatabricksWithGlueMetastoreCleanUp.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/deltalake/TestDatabricksWithGlueMetastoreCleanUp.java @@ -13,19 +13,16 @@ */ package io.trino.tests.product.deltalake; -import com.amazonaws.services.glue.AWSGlueAsync; -import com.amazonaws.services.glue.AWSGlueAsyncClientBuilder; -import com.amazonaws.services.glue.model.Database; -import com.amazonaws.services.glue.model.EntityNotFoundException; -import com.amazonaws.services.glue.model.GetDatabaseRequest; -import com.amazonaws.services.glue.model.GetTableRequest; -import com.amazonaws.services.glue.model.Table; import com.google.common.collect.ImmutableSet; import io.airlift.log.Logger; import io.trino.tempto.ProductTest; import io.trino.tempto.query.QueryResult; import io.trino.testng.services.Flaky; import org.testng.annotations.Test; +import software.amazon.awssdk.services.glue.GlueClient; +import software.amazon.awssdk.services.glue.model.Database; +import software.amazon.awssdk.services.glue.model.EntityNotFoundException; +import software.amazon.awssdk.services.glue.model.Table; import java.time.Instant; import java.time.temporal.ChronoUnit; @@ -34,7 +31,7 @@ import java.util.stream.Collectors; import static com.google.common.collect.ImmutableSet.toImmutableSet; -import static io.trino.plugin.hive.metastore.glue.v1.GlueToTrinoConverter.getTableType; +import static io.trino.plugin.hive.metastore.glue.GlueConverter.getTableType; import static io.trino.tests.product.TestGroups.DELTA_LAKE_DATABRICKS; import static io.trino.tests.product.TestGroups.PROFILE_SPECIFIC_TESTS; import static io.trino.tests.product.deltalake.util.DeltaLakeTestUtils.DATABRICKS_COMMUNICATION_FAILURE_ISSUE; @@ -57,7 +54,7 @@ public class TestDatabricksWithGlueMetastoreCleanUp @Flaky(issue = DATABRICKS_COMMUNICATION_FAILURE_ISSUE, match = DATABRICKS_COMMUNICATION_FAILURE_MATCH) public void testCleanUpOldTablesUsingDelta() { - AWSGlueAsync glueClient = AWSGlueAsyncClientBuilder.standard().build(); + GlueClient glueClient = GlueClient.create(); long startTime = currentTimeMillis(); List schemas = onTrino().executeQuery("SELECT DISTINCT(table_schema) FROM information_schema.tables") .rows().stream() @@ -70,18 +67,18 @@ public void testCleanUpOldTablesUsingDelta() schemas.forEach(schema -> cleanSchema(schema, startTime, glueClient)); } - private void cleanSchema(String schema, long startTime, AWSGlueAsync glueClient) + private void cleanSchema(String schema, long startTime, GlueClient glueClient) { Database database; try { - database = glueClient.getDatabase(new GetDatabaseRequest().withName(schema)).getDatabase(); + database = glueClient.getDatabase(builder -> builder.name(schema)).database(); } catch (EntityNotFoundException _) { // this may happen when database is being deleted concurrently return; } - if (database.getCreateTime().toInstant().isAfter(SCHEMA_CLEANUP_THRESHOLD)) { + if (database.createTime().isAfter(SCHEMA_CLEANUP_THRESHOLD)) { log.info("Skip dropping recently created schema %s", schema); return; } @@ -93,8 +90,8 @@ private void cleanSchema(String schema, long startTime, AWSGlueAsync glueClient) int droppedTablesCount = 0; for (String tableName : allTestTableNames) { try { - Table table = glueClient.getTable(new GetTableRequest().withDatabaseName(schema).withName(tableName)).getTable(); - Instant createTime = table.getCreateTime().toInstant(); + Table table = glueClient.getTable(builder -> builder.databaseName(schema).name(tableName)).table(); + Instant createTime = table.createTime(); if (createTime.isBefore(SCHEMA_CLEANUP_THRESHOLD)) { if (getTableType(table).contains("VIEW")) { onTrino().executeQuery(format("DROP VIEW IF EXISTS %s.%s", schema, tableName)); diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/deltalake/TestDeltaLakeDatabricksCleanUpGlueMetastore.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/deltalake/TestDeltaLakeDatabricksCleanUpGlueMetastore.java index 01b02d984683..e538cc2a1840 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/deltalake/TestDeltaLakeDatabricksCleanUpGlueMetastore.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/deltalake/TestDeltaLakeDatabricksCleanUpGlueMetastore.java @@ -13,22 +13,16 @@ */ package io.trino.tests.product.deltalake; -import com.amazonaws.services.glue.AWSGlueAsync; -import com.amazonaws.services.glue.AWSGlueAsyncClientBuilder; -import com.amazonaws.services.glue.model.Database; -import com.amazonaws.services.glue.model.DeleteDatabaseRequest; -import com.amazonaws.services.glue.model.EntityNotFoundException; -import com.amazonaws.services.glue.model.GetDatabasesRequest; -import com.amazonaws.services.glue.model.GetDatabasesResult; import io.airlift.log.Logger; -import io.trino.plugin.hive.metastore.glue.AwsApiCallStats; import io.trino.tempto.ProductTest; import org.testng.annotations.Test; +import software.amazon.awssdk.services.glue.GlueClient; +import software.amazon.awssdk.services.glue.model.Database; +import software.amazon.awssdk.services.glue.model.EntityNotFoundException; +import software.amazon.awssdk.services.glue.model.GetDatabasesResponse; import java.util.List; -import static com.google.common.collect.ImmutableList.toImmutableList; -import static io.trino.plugin.hive.metastore.glue.v1.AwsSdkUtil.getPaginatedResults; import static io.trino.tests.product.TestGroups.DELTA_LAKE_DATABRICKS; import static io.trino.tests.product.TestGroups.PROFILE_SPECIFIC_TESTS; import static java.lang.System.currentTimeMillis; @@ -43,27 +37,21 @@ public class TestDeltaLakeDatabricksCleanUpGlueMetastore @Test(groups = {DELTA_LAKE_DATABRICKS, PROFILE_SPECIFIC_TESTS}) public void testCleanupOrphanedDatabases() { - AWSGlueAsync glueClient = AWSGlueAsyncClientBuilder.defaultClient(); + GlueClient glueClient = GlueClient.create(); long creationTimeMillisThreshold = currentTimeMillis() - DAYS.toMillis(1); - List orphanedDatabases = getPaginatedResults( - glueClient::getDatabases, - new GetDatabasesRequest(), - GetDatabasesRequest::setNextToken, - GetDatabasesResult::getNextToken, - new AwsApiCallStats()) - .map(GetDatabasesResult::getDatabaseList) + List orphanedDatabases = glueClient.getDatabasesPaginator(_ -> {}).stream() + .map(GetDatabasesResponse::databaseList) .flatMap(List::stream) .filter(database -> isOrphanedTestDatabase(database, creationTimeMillisThreshold)) - .map(Database::getName) - .collect(toImmutableList()); + .map(Database::name) + .toList(); if (!orphanedDatabases.isEmpty()) { log.info("Found %s %s* databases that look orphaned, removing", orphanedDatabases.size(), TEST_DATABASE_NAME_PREFIX); orphanedDatabases.forEach(database -> { try { log.info("Deleting %s database", database); - glueClient.deleteDatabase(new DeleteDatabaseRequest() - .withName(database)); + glueClient.deleteDatabase(builder -> builder.name(database)); } catch (EntityNotFoundException e) { log.info("Database [%s] not found, could be removed by other cleanup process", database); @@ -77,7 +65,7 @@ public void testCleanupOrphanedDatabases() private static boolean isOrphanedTestDatabase(Database database, long creationTimeMillisThreshold) { - return database.getName().startsWith(TEST_DATABASE_NAME_PREFIX) && - database.getCreateTime().getTime() <= creationTimeMillisThreshold; + return database.name().startsWith(TEST_DATABASE_NAME_PREFIX) && + database.createTime().toEpochMilli() <= creationTimeMillisThreshold; } } diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/deltalake/util/DeltaLakeTestUtils.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/deltalake/util/DeltaLakeTestUtils.java index ea84c144a1d4..1ad6c19a787a 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/deltalake/util/DeltaLakeTestUtils.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/deltalake/util/DeltaLakeTestUtils.java @@ -13,7 +13,6 @@ */ package io.trino.tests.product.deltalake.util; -import com.amazonaws.services.glue.model.ConcurrentModificationException; import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.model.DeleteObjectsRequest; import com.amazonaws.services.s3.model.ObjectListing; @@ -24,6 +23,7 @@ import io.airlift.log.Logger; import io.trino.tempto.query.QueryResult; import org.intellij.lang.annotations.Language; +import software.amazon.awssdk.services.glue.model.ConcurrentModificationException; import java.sql.SQLException; import java.time.temporal.ChronoUnit;