Skip to content

Commit 384e38b

Browse files
felipepessotovkorukanti
authored andcommitted
Optimize Min/Max using Delta metadata
## Description Follow up of #1192, which optimizes COUNT. This PR adds support for MIN/MAX as well. Fix #2092 Created additional unit tests to cover MIN/MAX. ## Does this PR introduce _any_ user-facing changes? Only performance improvement Closes #1525 Signed-off-by: vkorukanti <[email protected]> GitOrigin-RevId: 9b88f76bf99cc38bd4cf9d3397b7bb8ade822d0b
1 parent 9ef9eb1 commit 384e38b

File tree

5 files changed

+1178
-226
lines changed

5 files changed

+1178
-226
lines changed

spark/src/main/scala/org/apache/spark/sql/delta/actions/actions.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -528,7 +528,7 @@ object Protocol {
528528
}
529529
if (manifestGenerationEnabled) {
530530
// Only allow enabling this, if there are no DVs present.
531-
if (!DeletionVectorUtils.isTableDVFree(spark, snapshot)) {
531+
if (!DeletionVectorUtils.isTableDVFree(snapshot)) {
532532
throw new DeltaTablePropertyValidationFailedException(
533533
table = tableName,
534534
subClass = ExistingDeletionVectorsWithIncrementalManifestGeneration)

spark/src/main/scala/org/apache/spark/sql/delta/commands/DeletionVectorUtils.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ trait DeletionVectorUtils {
3333
* Run a query on the delta log to determine if the given snapshot contains no deletion vectors.
3434
* Return `false` if it does contain deletion vectors.
3535
*/
36-
def isTableDVFree(spark: SparkSession, snapshot: Snapshot): Boolean = {
36+
def isTableDVFree(snapshot: Snapshot): Boolean = {
3737
val dvsReadable = deletionVectorsReadable(snapshot)
3838

3939
if (dvsReadable) {

spark/src/main/scala/org/apache/spark/sql/delta/hooks/GenerateSymlinkManifest.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,7 @@ trait GenerateSymlinkManifestImpl extends PostCommitHook with DeltaLogging with
245245
}
246246

247247
protected def assertTableIsDVFree(spark: SparkSession, snapshot: Snapshot): Unit = {
248-
if (!isTableDVFree(spark, snapshot)) {
248+
if (!isTableDVFree(snapshot)) {
249249
throw DeltaErrors.generateNotSupportedWithDeletionVectors()
250250
}
251251
}

0 commit comments

Comments
 (0)