Skip to content

Commit

Permalink
Remove Glue v1 metastore
Browse files Browse the repository at this point in the history
  • Loading branch information
electrum committed Mar 8, 2025
1 parent e437b16 commit 17fcbb7
Show file tree
Hide file tree
Showing 66 changed files with 440 additions and 5,878 deletions.
36 changes: 0 additions & 36 deletions .mvn/modernizer/violations.xml
Original file line number Diff line number Diff line change
Expand Up @@ -150,42 +150,6 @@
<comment>Use AssertJ's assertThatThrownBy, see https://github.com/trinodb/trino/issues/5320 for rationale</comment>
</violation>

<violation>
<name>com/amazonaws/services/glue/model/Table.getStorageDescriptor:()Lcom/amazonaws/services/glue/model/StorageDescriptor;</name>
<version>1.1</version>
<comment>Storage descriptor is nullable in Glue model, which is too easy to forget about. Prefer GlueToTrinoConverter.getStorageDescriptor</comment>
</violation>

<violation>
<name>com/amazonaws/services/glue/model/Table.getTableType:()Ljava/lang/String;</name>
<version>1.1</version>
<comment>Table type is nullable in Glue model, which is too easy to forget about. Prefer GlueToTrinoConverter.getTableType</comment>
</violation>

<violation>
<name>com/amazonaws/services/glue/model/Column.getParameters:()Ljava/util/Map;</name>
<version>1.1</version>
<comment>Column parameters map is nullable in Glue model, which is too easy to forget about. Prefer GlueToTrinoConverter.getColumnParameters</comment>
</violation>

<violation>
<name>com/amazonaws/services/glue/model/Table.getParameters:()Ljava/util/Map;</name>
<version>1.1</version>
<comment>Table parameters map is nullable in Glue model, which is too easy to forget about. Prefer GlueToTrinoConverter.getTableParameters</comment>
</violation>

<violation>
<name>com/amazonaws/services/glue/model/Partition.getParameters:()Ljava/util/Map;</name>
<version>1.1</version>
<comment>Partition parameters map is nullable in Glue model, which is too easy to forget about. Prefer GlueToTrinoConverter.getPartitionParameters</comment>
</violation>

<violation>
<name>com/amazonaws/services/glue/model/SerDeInfo.getParameters:()Ljava/util/Map;</name>
<version>1.1</version>
<comment>SerDeInfo parameters map is nullable in Glue model, which is too easy to forget about. Prefer GlueToTrinoConverter.getSerDeInfoParameters</comment>
</violation>

<violation>
<name>org/apache/hadoop/fs/FileSystem.close:()V</name>
<version>1.1</version>
Expand Down
5 changes: 0 additions & 5 deletions plugin/trino-delta-lake/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,6 @@
<description>Trino - Delta Lake connector</description>

<dependencies>
<dependency>
<groupId>com.amazonaws</groupId>
<artifactId>aws-java-sdk-glue</artifactId>
</dependency>

<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
import io.airlift.configuration.AbstractConfigurationAwareModule;
import io.trino.plugin.deltalake.metastore.file.DeltaLakeFileMetastoreModule;
import io.trino.plugin.deltalake.metastore.glue.DeltaLakeGlueMetastoreModule;
import io.trino.plugin.deltalake.metastore.glue.v1.DeltaLakeGlueV1MetastoreModule;
import io.trino.plugin.deltalake.metastore.thrift.DeltaLakeThriftMetastoreModule;
import io.trino.plugin.hive.metastore.CachingHiveMetastoreModule;
import io.trino.plugin.hive.metastore.MetastoreTypeConfig;
Expand All @@ -32,7 +31,6 @@ protected void setup(Binder binder)
case THRIFT -> new DeltaLakeThriftMetastoreModule();
case FILE -> new DeltaLakeFileMetastoreModule();
case GLUE -> new DeltaLakeGlueMetastoreModule();
case GLUE_V1 -> new DeltaLakeGlueV1MetastoreModule();
});

install(new CachingHiveMetastoreModule());
Expand Down

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -82,30 +82,6 @@ public void testThriftMetastore()
.hasMessageContaining("Error: Configuration property 'delta.hide-non-delta-lake-tables' was not used");
}

@Test
public void testGlueV1Metastore()
{
ConnectorFactory factory = getConnectorFactory();
factory.create(
"test",
ImmutableMap.of(
"hive.metastore", "glue-v1",
"hive.metastore.glue.region", "us-east-2",
"bootstrap.quiet", "true"),
new TestingConnectorContext())
.shutdown();

assertThatThrownBy(() -> factory.create(
"test",
ImmutableMap.of(
"hive.metastore", "glue",
"hive.metastore.uri", "thrift://foo:1234",
"bootstrap.quiet", "true"),
new TestingConnectorContext()))
.isInstanceOf(ApplicationConfigurationException.class)
.hasMessageContaining("Error: Configuration property 'hive.metastore.uri' was not used");
}

@Test
public void testGlueMetastore()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,54 +13,43 @@
*/
package io.trino.plugin.deltalake.metastore.glue;

import com.amazonaws.services.glue.AWSGlueAsync;
import com.amazonaws.services.glue.AWSGlueAsyncClientBuilder;
import com.amazonaws.services.glue.model.DeleteDatabaseRequest;
import com.amazonaws.services.glue.model.EntityNotFoundException;
import com.amazonaws.services.glue.model.GetDatabasesRequest;
import com.amazonaws.services.glue.model.GetDatabasesResult;
import io.airlift.log.Logger;
import io.trino.plugin.hive.metastore.glue.AwsApiCallStats;
import org.junit.jupiter.api.Test;
import software.amazon.awssdk.services.glue.GlueClient;
import software.amazon.awssdk.services.glue.model.Database;
import software.amazon.awssdk.services.glue.model.EntityNotFoundException;
import software.amazon.awssdk.services.glue.model.GetDatabasesResponse;

import java.time.Duration;
import java.time.Instant;
import java.util.List;

import static com.google.common.collect.ImmutableList.toImmutableList;
import static io.trino.plugin.hive.metastore.glue.v1.AwsSdkUtil.getPaginatedResults;
import static java.lang.System.currentTimeMillis;
import static java.util.concurrent.TimeUnit.DAYS;

public class TestDeltaLakeCleanUpGlueMetastore
{
private static final Logger log = Logger.get(TestDeltaLakeCleanUpGlueMetastore.class);

private static final String TEST_DATABASE_NAME_PREFIX = "test_";
private static final Duration CLEANUP_THRESHOLD = Duration.ofDays(1);

@Test
public void cleanupOrphanedDatabases()
{
AWSGlueAsync glueClient = AWSGlueAsyncClientBuilder.defaultClient();
long creationTimeMillisThreshold = currentTimeMillis() - DAYS.toMillis(1);
List<String> orphanedDatabases = getPaginatedResults(
glueClient::getDatabases,
new GetDatabasesRequest(),
GetDatabasesRequest::setNextToken,
GetDatabasesResult::getNextToken,
new AwsApiCallStats())
.map(GetDatabasesResult::getDatabaseList)
GlueClient glueClient = GlueClient.create();
Instant creationTimeThreshold = Instant.now().minus(CLEANUP_THRESHOLD);
List<String> orphanedDatabases = glueClient.getDatabasesPaginator(_ -> {}).stream()
.map(GetDatabasesResponse::databaseList)
.flatMap(List::stream)
.filter(glueDatabase -> glueDatabase.getName().startsWith(TEST_DATABASE_NAME_PREFIX) &&
glueDatabase.getCreateTime().getTime() <= creationTimeMillisThreshold)
.map(com.amazonaws.services.glue.model.Database::getName)
.collect(toImmutableList());
.filter(database -> database.name().startsWith(TEST_DATABASE_NAME_PREFIX))
.filter(database -> database.createTime().isBefore(creationTimeThreshold))
.map(Database::name)
.toList();

if (!orphanedDatabases.isEmpty()) {
log.info("Found %s %s* databases that look orphaned, removing", orphanedDatabases.size(), TEST_DATABASE_NAME_PREFIX);
orphanedDatabases.forEach(database -> {
try {
log.info("Deleting %s database", database);
glueClient.deleteDatabase(new DeleteDatabaseRequest()
.withName(database));
glueClient.deleteDatabase(builder -> builder.name(database));
}
catch (EntityNotFoundException e) {
log.info("Database [%s] not found, could be removed by other cleanup process", database);
Expand Down
Loading

0 comments on commit 17fcbb7

Please sign in to comment.