Skip to content

Commit

Permalink
Refactor merging of partitioned table statistics to occur earlier
Browse files Browse the repository at this point in the history
Previously we merged this data after we completed gathering all output
statistics. Besides this being challenging to make work correctly in
light of locked tables (which mean indexes into the statistics array
need to be looked up carefully via RelationIdx), it also had to resort
to some workarounds to find the relationship of child indexes to their
parent indexes.

Instead, replace the logic with an approach that gathers all declarative
partition child tables and indexes during the input phase, and then
use that to look up the statistics to be added when we first construct
the output statistics data.

Note this replaces the previous index child/parent heuristic (which worked
based on the columns being identical, and only considered unique indexes)
with simply relying on child indexes being tracked in pg_inherits to
belong to their parent.

This means the parent index sizes will now exclude any indexes that were
directly created on the children but shared a name with an empty parent
index, which may be the case when inheritance-based partitioning was
migrated to declarative partitioning.

On the other hand, this will now consistently make most declaratively
partitioned tables have their index sizes correctly added up to the
parent, even if the indexes are not unique, or use expressions.
  • Loading branch information
lfittl committed Feb 16, 2025
1 parent ea6654e commit 6f41c5f
Show file tree
Hide file tree
Showing 6 changed files with 54 additions and 76 deletions.
2 changes: 1 addition & 1 deletion input/postgres/relation_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ SELECT c.oid,
JOIN pg_class t ON inhrelid = t.oid
WHERE inhparent = c.oid
-- Only include sizes from child partitions skipped by ignore_schema_regexp.
-- Child partitions tracked by the collector have their sizes added by mergePartitionSizes.
-- Child partitions tracked by the collector have their sizes added later.
AND ($1 != '' AND (n.nspname || '.' || t.relname) ~* $1)
), 0) AS size_bytes,
CASE c.reltoastrelid WHEN NULL THEN 0 ELSE COALESCE(pg_catalog.pg_total_relation_size(c.reltoastrelid), 0) END AS toast_bytes,
Expand Down
19 changes: 15 additions & 4 deletions input/postgres/relations.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ const relationsSQLOidField = "c.relhasoids AS relation_has_oids"
const relationsSQLpg12OidField = "false AS relation_has_oids"

const relationsSQL string = `
WITH locked_relids AS (SELECT DISTINCT relation relid FROM pg_catalog.pg_locks WHERE mode = 'AccessExclusiveLock' AND relation IS NOT NULL AND locktype = 'relation')
WITH locked_relids AS (SELECT DISTINCT relation relid FROM pg_catalog.pg_locks WHERE mode = 'AccessExclusiveLock' AND relation IS NOT NULL AND locktype = 'relation'),
partition_children AS (SELECT inhparent, array_agg(inhrelid) AS relids FROM pg_catalog.pg_inherits JOIN pg_catalog.pg_partitioned_table ON (partrelid = inhparent) GROUP BY 1)
SELECT c.oid,
n.nspname AS schema_name,
c.relname AS table_name,
Expand All @@ -35,6 +36,7 @@ const relationsSQL string = `
COALESCE((SELECT p.partstrat FROM pg_partitioned_table p WHERE p.partrelid = c.oid), '') AS partition_strategy,
(SELECT p.partattrs FROM pg_partitioned_table p WHERE p.partrelid = c.oid) AS partition_columns,
COALESCE(pg_catalog.pg_get_partkeydef(c.oid), '') AS partition_expr,
(SELECT relids FROM partition_children WHERE inhparent = c.oid) AS child_relids,
locked_relids.relid IS NOT NULL AS exclusively_locked,
COALESCE(toast.relname, '') AS toast_table
FROM pg_catalog.pg_class c
Expand Down Expand Up @@ -84,7 +86,8 @@ const columnsSQL string = `
FROM locked_relids`

const indicesSQL string = `
WITH locked_relids AS (SELECT DISTINCT relation relid FROM pg_catalog.pg_locks WHERE mode = 'AccessExclusiveLock' AND relation IS NOT NULL AND locktype = 'relation')
WITH locked_relids AS (SELECT DISTINCT relation relid FROM pg_catalog.pg_locks WHERE mode = 'AccessExclusiveLock' AND relation IS NOT NULL AND locktype = 'relation'),
inheritance_children AS (SELECT inhparent, array_agg(inhrelid) AS relids FROM pg_catalog.pg_inherits GROUP BY 1)
SELECT c.oid,
c2.oid,
i.indkey::text,
Expand All @@ -96,6 +99,7 @@ SELECT c.oid,
pg_catalog.pg_get_constraintdef(con.oid, FALSE),
c2.reloptions,
(SELECT a.amname FROM pg_catalog.pg_am a JOIN pg_catalog.pg_opclass o ON (a.oid = o.opcmethod) WHERE o.oid = i.indclass[0]),
(SELECT relids FROM inheritance_children WHERE inhparent = i.indexrelid) AS child_relids,
false AS exclusively_locked
FROM pg_catalog.pg_class c
JOIN pg_catalog.pg_namespace n ON (n.oid = c.relnamespace)
Expand Down Expand Up @@ -123,6 +127,7 @@ SELECT c.oid,
NULL,
NULL,
'',
NULL,
true
FROM locked_relids
`
Expand Down Expand Up @@ -213,18 +218,21 @@ func GetRelations(ctx context.Context, db *sql.DB, postgresVersion state.Postgre
for rows.Next() {
var row state.PostgresRelation
var options null.String
var childRelids null.String
var partCols null.String

err = rows.Scan(&row.Oid, &row.SchemaName, &row.RelationName, &row.RelationType,
&options, &row.HasOids, &row.PersistenceType, &row.HasInheritanceChildren,
&row.HasToast, &row.FrozenXID, &row.MinimumMultixactXID, &row.ParentTableOid,
&row.PartitionBoundary, &row.PartitionStrategy, &partCols, &row.PartitionedBy,
&row.ExclusivelyLocked, &row.ToastName)
&childRelids, &row.ExclusivelyLocked, &row.ToastName)
if err != nil {
err = fmt.Errorf("Relations/Scan: %s", err)
return nil, err
}

row.ChildTableOids = unpackPostgresOidArray(childRelids)

row.Options = make(map[string]string)
if options.Valid {
for _, cstr := range strings.Split(strings.Trim(options.String, "{}"), ",") {
Expand Down Expand Up @@ -303,15 +311,18 @@ func GetRelations(ctx context.Context, db *sql.DB, postgresVersion state.Postgre
var columns string
var options null.String
var exclusivelyLocked bool
var childRelids null.String

err = rows.Scan(&row.RelationOid, &row.IndexOid, &columns, &row.Name, &row.IsPrimary,
&row.IsUnique, &row.IsValid, &row.IndexDef, &row.ConstraintDef, &options, &row.IndexType,
&exclusivelyLocked)
&childRelids, &exclusivelyLocked)
if err != nil {
err = fmt.Errorf("Indices/Scan: %s", err)
return nil, err
}

row.ChildIndexOids = unpackPostgresOidArray(childRelids)

for _, cstr := range strings.Split(columns, " ") {
cint, _ := strconv.ParseInt(cstr, 10, 32)
row.Columns = append(row.Columns, int32(cint))
Expand Down
70 changes: 0 additions & 70 deletions output/transform/merge_partition_sizes.go

This file was deleted.

1 change: 0 additions & 1 deletion output/transform/postgres.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ func transformPostgres(s snapshot.FullSnapshot, newState state.PersistedState, d
s = transformPostgresBackendCounts(s, transientState, roleOidToIdx, databaseOidToIdx)
s = transformPostgresExtensions(s, transientState, databaseOidToIdx)
s = transformPostgresBufferCache(s, transientState, databaseOidToIdx)
s = mergePartitionSizes(s, newState, transientState, databaseOidToIdx)

return s
}
Expand Down
31 changes: 31 additions & 0 deletions output/transform/postgres_relations.go
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,23 @@ func transformPostgresRelations(s snapshot.FullSnapshot, newState state.Persiste
} else {
statistic.AnalyzedAt = snapshot.NullTimeToNullTimestamp(stats.LastAnalyze)
}

// Add child table statistics to parent table statistics
for _, childTableOid := range relation.ChildTableOids {
stat, ok := diffedSchemaStats.RelationStats[childTableOid]
if ok {
statistic.NTupIns += stat.NTupIns
statistic.NTupUpd += stat.NTupUpd
statistic.NTupDel += stat.NTupDel
statistic.NTupHotUpd += stat.NTupHotUpd
statistic.NLiveTup += stat.NLiveTup
statistic.NDeadTup += stat.NDeadTup
statistic.SizeBytes += stat.SizeBytes
statistic.ToastSizeBytes += stat.ToastSizeBytes
statistic.CachedDataBytes += stat.CachedDataBytes
statistic.CachedToastBytes += stat.CachedToastBytes
}
}
s.RelationStatistics = append(s.RelationStatistics, &statistic)

// Events
Expand Down Expand Up @@ -256,6 +273,20 @@ func transformPostgresRelations(s snapshot.FullSnapshot, newState state.Persiste
if diffedSchemaStatsExist {
indexStats = diffedSchemaStats.IndexStats[index.IndexOid]
}
// Add child index statistics to parent index statistics
for _, childIndexOid := range index.ChildIndexOids {
stat, ok := diffedSchemaStats.IndexStats[childIndexOid]

if ok {
indexStats.SizeBytes += stat.SizeBytes
indexStats.IdxScan += stat.IdxScan
indexStats.IdxTupRead += stat.IdxTupRead
indexStats.IdxTupFetch += stat.IdxTupFetch
indexStats.IdxBlksRead += stat.IdxBlksRead
indexStats.IdxBlksHit += stat.IdxBlksHit
indexStats.CachedBytes += stat.CachedBytes
}
}
s.IndexStatistics = append(s.IndexStatistics, &snapshot.IndexStatistic{
IndexIdx: indexIdx,
SizeBytes: indexStats.SizeBytes,
Expand Down
7 changes: 7 additions & 0 deletions state/postgres_relations.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ type PostgresRelation struct {
PartitionColumns []int32
PartitionedBy string

// OIDs for declaratively partitioned child tables
ChildTableOids []Oid

// True if another process is currently holding an AccessExclusiveLock on this
// relation, this also means we don't collect columns/index/constraints data
ExclusivelyLocked bool
Expand Down Expand Up @@ -58,6 +61,10 @@ type PostgresIndex struct {
IndexDef string
ConstraintDef null.String
Options map[string]string

// OIDs for child indexes. This only includes indexes on declaratively
// partitioned child tables, where the index was declared on the parent.
ChildIndexOids []Oid
}

type PostgresConstraint struct {
Expand Down

0 comments on commit 6f41c5f

Please sign in to comment.