code.vegaprotocol.io/vega@v0.79.0/datanode/networkhistory/snapshot/database_meta_data.go (about) 1 // Copyright (C) 2023 Gobalsky Labs Limited 2 // 3 // This program is free software: you can redistribute it and/or modify 4 // it under the terms of the GNU Affero General Public License as 5 // published by the Free Software Foundation, either version 3 of the 6 // License, or (at your option) any later version. 7 // 8 // This program is distributed in the hope that it will be useful, 9 // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 // GNU Affero General Public License for more details. 12 // 13 // You should have received a copy of the GNU Affero General Public License 14 // along with this program. If not, see <http://www.gnu.org/licenses/>. 15 16 package snapshot 17 18 import ( 19 "context" 20 "errors" 21 "fmt" 22 "regexp" 23 "strings" 24 "time" 25 26 "code.vegaprotocol.io/vega/datanode/sqlstore" 27 28 "github.com/georgysavva/scany/pgxscan" 29 "github.com/jackc/pgx/v4" 30 "github.com/jackc/pgx/v4/pgxpool" 31 "github.com/pressly/goose/v3" 32 "github.com/shopspring/decimal" 33 ) 34 35 type DatabaseMetadata struct { 36 TableNameToMetaData map[string]TableMetadata 37 ContinuousAggregatesMetaData []ContinuousAggregateMetaData 38 DatabaseVersion int64 39 CurrentStateTablesCreateConstraintsSql []string 40 CurrentStateTablesDropConstraintsSql []string 41 HistoryStateTablesCreateConstraintsSql []string 42 HistoryStateTablesDropConstraintsSql []string 43 44 AllTablesEnableAutoVacuumSql []string 45 AllTablesDisableAutoVacuumSql []string 46 } 47 48 type TableMetadata struct { 49 Name string 50 SortOrder string 51 Hypertable bool 52 PartitionColumn string 53 } 54 55 type ContinuousAggregateMetaData struct { 56 ID int 57 Name string 58 BucketInterval time.Duration 59 } 60 61 type IndexInfo struct { 62 Tablename string 63 Indexname string 64 Indexdef string 65 } 66 67 type HypertablePartitionColumns struct { 68 HypertableName string 69 ColumnName string 70 } 71 72 func NewDatabaseMetaData(ctx context.Context, connPool *pgxpool.Pool) (DatabaseMetadata, error) { 73 // Ensure timescale extension is enabled before attempting to get metadata 74 _, err := connPool.Exec(ctx, "create extension if not exists timescaledb") 75 if err != nil { 76 return DatabaseMetadata{}, fmt.Errorf("failed to create timescale extension: %w", err) 77 } 78 79 dbVersion, err := getDBVersion(ctx, connPool) 80 if err != nil { 81 return DatabaseMetadata{}, fmt.Errorf("failed to get database version: %w", err) 82 } 83 84 if dbVersion == 0 { 85 return DatabaseMetadata{}, nil 86 } 87 88 tableNames, err := sqlstore.GetAllTableNames(ctx, connPool) 89 if err != nil { 90 return DatabaseMetadata{}, fmt.Errorf("failed to get names of tables to copy:%w", err) 91 } 92 93 tableNameToSortOrder, err := getTableSortOrders(ctx, connPool) 94 if err != nil { 95 return DatabaseMetadata{}, fmt.Errorf("failed to get table sort orders:%w", err) 96 } 97 98 hyperTableNames, err := getHyperTableNames(ctx, connPool) 99 if err != nil { 100 return DatabaseMetadata{}, fmt.Errorf("failed to get hyper table names:%w", err) 101 } 102 103 hypertablePartitionColumns, err := getHyperTablePartitionColumns(ctx, connPool) 104 if err != nil { 105 return DatabaseMetadata{}, fmt.Errorf("failed to get hyper table partition columns:%w", err) 106 } 107 108 caggsMeta, err := getContinuousAggregatesMetaData(ctx, connPool) 109 if err != nil { 110 return DatabaseMetadata{}, fmt.Errorf("failed to get continuous aggregate view names:%w", err) 111 } 112 113 currentStateCreateConstraintsSql, historyCreateConstraintsSql, err := getCreateConstraintsSql(ctx, connPool, hyperTableNames) 114 if err != nil { 115 return DatabaseMetadata{}, fmt.Errorf("failed to get create constrains sql:%w", err) 116 } 117 118 currentStateDropConstraintsSql, historyDropConstraintsSql, err := getDropConstraintsSql(ctx, connPool, hyperTableNames) 119 if err != nil { 120 return DatabaseMetadata{}, fmt.Errorf("failed to get drop constrains sql:%w", err) 121 } 122 123 allTablesEnableAutoVacuumSql, allTablesDisableAutoVacuumSql := createAutovacuumSql(tableNames) 124 125 result := DatabaseMetadata{ 126 TableNameToMetaData: map[string]TableMetadata{}, 127 DatabaseVersion: dbVersion, 128 ContinuousAggregatesMetaData: caggsMeta, 129 CurrentStateTablesCreateConstraintsSql: currentStateCreateConstraintsSql, 130 CurrentStateTablesDropConstraintsSql: currentStateDropConstraintsSql, 131 HistoryStateTablesCreateConstraintsSql: historyCreateConstraintsSql, 132 HistoryStateTablesDropConstraintsSql: historyDropConstraintsSql, 133 AllTablesEnableAutoVacuumSql: allTablesEnableAutoVacuumSql, 134 AllTablesDisableAutoVacuumSql: allTablesDisableAutoVacuumSql, 135 } 136 for _, tableName := range tableNames { 137 partitionCol := "" 138 ok := false 139 if hyperTableNames[tableName] { 140 partitionCol, ok = hypertablePartitionColumns[tableName] 141 if !ok { 142 return DatabaseMetadata{}, fmt.Errorf("failed to get partition column for hyper table %s", tableName) 143 } 144 } 145 146 result.TableNameToMetaData[tableName] = TableMetadata{ 147 Name: tableName, 148 SortOrder: tableNameToSortOrder[tableName], 149 Hypertable: hyperTableNames[tableName], 150 PartitionColumn: partitionCol, 151 } 152 } 153 154 return result, nil 155 } 156 157 func createAutovacuumSql(tableNames []string) ([]string, []string) { 158 allTablesEnableAutoVacuumSql := make([]string, 0, len(tableNames)) 159 for _, tableName := range tableNames { 160 allTablesEnableAutoVacuumSql = append(allTablesEnableAutoVacuumSql, fmt.Sprintf("ALTER TABLE %s SET (autovacuum_enabled = true)", tableName)) 161 } 162 163 allTablesDisableAutoVacuumSql := make([]string, 0, len(tableNames)) 164 for _, tableName := range tableNames { 165 allTablesDisableAutoVacuumSql = append(allTablesDisableAutoVacuumSql, fmt.Sprintf("ALTER TABLE %s SET (autovacuum_enabled = false)", tableName)) 166 } 167 return allTablesEnableAutoVacuumSql, allTablesDisableAutoVacuumSql 168 } 169 170 func (d DatabaseMetadata) GetHistoryTableNames() []string { 171 var result []string 172 for _, meta := range d.TableNameToMetaData { 173 if meta.Hypertable { 174 result = append(result, meta.Name) 175 } 176 } 177 178 return result 179 } 180 181 func getTableSortOrders(ctx context.Context, conn *pgxpool.Pool) (map[string]string, error) { 182 var primaryKeyIndexes []IndexInfo 183 err := pgxscan.Select(ctx, conn, &primaryKeyIndexes, 184 `select tablename, Indexname, Indexdef from pg_indexes where schemaname ='public' and Indexname like '%_pkey' order by tablename`) 185 if err != nil { 186 return nil, fmt.Errorf("failed to get primary key indexes:%w", err) 187 } 188 189 includeRegexp := regexp.MustCompile(`(?i)include(\s*)\(.*$`) 190 tableNameToSortOrder := map[string]string{} 191 for _, pkIdx := range primaryKeyIndexes { 192 withoutInclude := includeRegexp.ReplaceAllString(pkIdx.Indexdef, "") 193 split := strings.Split(withoutInclude, "(") 194 if len(split) != 2 { 195 return nil, fmt.Errorf("unexpected primary key index definition:%s", pkIdx.Indexdef) 196 } 197 so := strings.Replace(split[1], ")", "", 1) 198 tableNameToSortOrder[pkIdx.Tablename] = so 199 } 200 return tableNameToSortOrder, nil 201 } 202 203 func getHyperTableNames(ctx context.Context, conn *pgxpool.Pool) (map[string]bool, error) { 204 tableNameRows, err := conn.Query(ctx, "SELECT hypertable_name FROM timescaledb_information.hypertables") 205 if err != nil { 206 return nil, fmt.Errorf("failed to query Hypertable names:%w", err) 207 } 208 209 result := map[string]bool{} 210 for tableNameRows.Next() { 211 tableName := "" 212 err = tableNameRows.Scan(&tableName) 213 if err != nil { 214 return nil, fmt.Errorf("failed to scan table Name:%w", err) 215 } 216 result[tableName] = true 217 } 218 return result, nil 219 } 220 221 func getHyperTablePartitionColumns(ctx context.Context, conn *pgxpool.Pool) (map[string]string, error) { 222 var partitionColumns []HypertablePartitionColumns 223 err := pgxscan.Select(ctx, conn, &partitionColumns, 224 `select hypertable_name, column_name from timescaledb_information.dimensions where hypertable_schema='public' and dimension_number=1`) 225 if err != nil { 226 return nil, fmt.Errorf("failed to partition columns:%w", err) 227 } 228 229 tableNameToPartitionColumn := map[string]string{} 230 for _, column := range partitionColumns { 231 tableNameToPartitionColumn[column.HypertableName] = column.ColumnName 232 } 233 return tableNameToPartitionColumn, nil 234 } 235 236 func getContinuousAggregatesMetaData(ctx context.Context, conn *pgxpool.Pool) ([]ContinuousAggregateMetaData, error) { 237 var views []struct { 238 ViewName string 239 ViewDefinition string 240 } 241 err := pgxscan.Select(ctx, conn, &views, "SELECT view_name, view_definition FROM timescaledb_information.continuous_aggregates") 242 if err != nil { 243 return nil, fmt.Errorf("failed to query continuous aggregate definitions:%w", err) 244 } 245 246 metas := make([]ContinuousAggregateMetaData, 0, len(views)) 247 248 for _, view := range views { 249 interval, err := extractIntervalFromViewDefinition(view.ViewDefinition) 250 if err != nil { 251 return nil, fmt.Errorf("failed to get interval for view %s: %w", view.ViewName, err) 252 } 253 254 intervalAsDuration, err := intervalToSeconds(ctx, conn, interval) 255 if err != nil { 256 return nil, fmt.Errorf("failed to convert interval to seconds duration: %w", err) 257 } 258 259 query := fmt.Sprintf(`SELECT id from _timescaledb_catalog.hypertable 260 WHERE table_name=( 261 SELECT materialization_hypertable_name FROM timescaledb_information.continuous_aggregates WHERE view_name='%s');`, view.ViewName) 262 row := conn.QueryRow(ctx, query) 263 264 var caggID int 265 err = row.Scan(&caggID) 266 if err != nil { 267 return nil, fmt.Errorf("failed to get cagg id : %w", err) 268 } 269 270 meta := ContinuousAggregateMetaData{ 271 ID: caggID, 272 Name: view.ViewName, 273 BucketInterval: intervalAsDuration, 274 } 275 276 metas = append(metas, meta) 277 } 278 279 return metas, nil 280 } 281 282 func getCreateConstraintsSql(ctx context.Context, conn *pgxpool.Pool, hyperTableNames map[string]bool) (currentState []string, 283 history []string, err error, 284 ) { 285 var constraints []struct { 286 Tablename string 287 Sql string 288 } 289 290 err = pgxscan.Select(ctx, conn, &constraints, 291 `SELECT relname as tablename, 'ALTER TABLE '||nspname||'.'||relname||' ADD CONSTRAINT '||conname||' '|| pg_get_constraintdef(pg_constraint.oid)||';' as sql 292 FROM pg_constraint 293 INNER JOIN pg_class ON conrelid=pg_class.oid 294 INNER JOIN pg_namespace ON pg_namespace.oid=pg_class.relnamespace where pg_namespace.nspname='public' 295 ORDER BY CASE WHEN contype='f' THEN 0 ELSE 1 END DESC,contype DESC,nspname DESC,relname DESC,conname DESC`) 296 297 if err != nil { 298 return nil, nil, fmt.Errorf("failed to get create constraints sql:%w", err) 299 } 300 301 for _, constraint := range constraints { 302 if hyperTableNames[constraint.Tablename] { 303 history = append(history, constraint.Sql) 304 } else { 305 currentState = append(currentState, constraint.Sql) 306 } 307 } 308 309 return currentState, history, nil 310 } 311 312 func getDropConstraintsSql(ctx context.Context, conn *pgxpool.Pool, hyperTableNames map[string]bool) (currentState []string, 313 history []string, err error, 314 ) { 315 var constraints []struct { 316 Tablename string 317 Sql string 318 } 319 320 err = pgxscan.Select(ctx, conn, &constraints, 321 `SELECT relname as tablename, 'ALTER TABLE '||nspname||'.'||relname||' DROP CONSTRAINT '||conname||';' as sql 322 FROM pg_constraint 323 INNER JOIN pg_class ON conrelid=pg_class.oid 324 INNER JOIN pg_namespace ON pg_namespace.oid=pg_class.relnamespace where pg_namespace.nspname='public' 325 ORDER BY CASE WHEN contype='f' THEN 0 ELSE 1 END,contype,nspname,relname,conname`) 326 327 if err != nil { 328 return nil, nil, fmt.Errorf("failed to get drop constraints sql:%w", err) 329 } 330 331 for _, constraint := range constraints { 332 if hyperTableNames[constraint.Tablename] { 333 history = append(history, constraint.Sql) 334 } else { 335 currentState = append(currentState, constraint.Sql) 336 } 337 } 338 339 return currentState, history, nil 340 } 341 342 func extractIntervalFromViewDefinition(viewDefinition string) (string, error) { 343 re := regexp.MustCompile(`time_bucket\('(.*)'`) 344 match := re.FindStringSubmatch(viewDefinition) 345 if match == nil || len(match) != 2 { 346 return "", errors.New("failed to extract interval from view definition") 347 } 348 349 return match[1], nil 350 } 351 352 func intervalToSeconds(ctx context.Context, conn sqlstore.Connection, interval string) (time.Duration, error) { 353 query := fmt.Sprintf("SELECT EXTRACT(epoch FROM INTERVAL '%s')", interval) 354 row := conn.QueryRow(ctx, query) 355 356 var seconds decimal.Decimal 357 err := row.Scan(&seconds) 358 if err != nil { 359 return 0, fmt.Errorf("failed to get interval in seconds %s: %w", interval, err) 360 } 361 362 return time.Duration(seconds.IntPart()) * time.Second, nil 363 } 364 365 // getDBVersion copied from the goose library and modified to support using a pre-allocated connection. It's worth noting 366 // that this method also has the side effect of creating the goose version table if it does not exist as per the original 367 // goose code. 368 func getDBVersion(ctx context.Context, conn *pgxpool.Pool) (int64, error) { 369 version, err := ensureDBVersion(ctx, conn) 370 if err != nil { 371 return -1, err 372 } 373 374 return version, nil 375 } 376 377 // ensureDBVersion copied from the goose library and modified to support using a pre-allocated connection. 378 func ensureDBVersion(ctx context.Context, conn *pgxpool.Pool) (int64, error) { 379 rows, err := dbVersionQuery(ctx, conn) 380 if err != nil { 381 return 0, createVersionTable(ctx, conn) 382 } 383 defer rows.Close() 384 385 // The most recent record for each migration specifies 386 // whether it has been applied or rolled back. 387 // The first version we find that has been applied is the current version. 388 389 toSkip := make([]int64, 0) 390 391 for rows.Next() { 392 var row goose.MigrationRecord 393 if err = rows.Scan(&row.VersionID, &row.IsApplied); err != nil { 394 return 0, fmt.Errorf("failed to scan row: %w", err) 395 } 396 397 // have we already marked this version to be skipped? 398 skip := false 399 for _, v := range toSkip { 400 if v == row.VersionID { 401 skip = true 402 break 403 } 404 } 405 406 if skip { 407 continue 408 } 409 410 // if version has been applied we're done 411 if row.IsApplied { 412 return row.VersionID, nil 413 } 414 415 // latest version of migration has not been applied. 416 toSkip = append(toSkip, row.VersionID) 417 } 418 if err := rows.Err(); err != nil { 419 return 0, fmt.Errorf("failed to get next row: %w", err) 420 } 421 422 return 0, goose.ErrNoNextVersion 423 } 424 425 // dbVersionQuery copied from the goose library and modified to support using a pre-allocated connection. 426 func dbVersionQuery(ctx context.Context, conn *pgxpool.Pool) (pgx.Rows, error) { 427 rows, err := conn.Query(ctx, fmt.Sprintf("SELECT version_id, is_applied from %s ORDER BY id DESC", goose.TableName())) 428 if err != nil { 429 return nil, err 430 } 431 432 return rows, err 433 } 434 435 // createVersionTable copied from the goose library and modified to support using a pre-allocated connection. 436 func createVersionTable(ctx context.Context, conn *pgxpool.Pool) error { 437 txn, err := conn.Begin(ctx) 438 if err != nil { 439 return err 440 } 441 442 if _, err := txn.Exec(ctx, fmt.Sprintf(`CREATE TABLE %s ( 443 id serial NOT NULL, 444 version_id bigint NOT NULL, 445 is_applied boolean NOT NULL, 446 tstamp timestamp NULL default now(), 447 PRIMARY KEY(id) 448 );`, goose.TableName())); err != nil { 449 txn.Rollback(ctx) 450 return err 451 } 452 453 version := 0 454 applied := true 455 if _, err := txn.Exec(ctx, fmt.Sprintf("INSERT INTO %s (version_id, is_applied) VALUES ($1, $2);", goose.TableName()), version, applied); err != nil { 456 txn.Rollback(ctx) 457 return err 458 } 459 460 return txn.Commit(ctx) 461 }