code.vegaprotocol.io/vega@v0.79.0/datanode/networkhistory/snapshot/database_meta_data.go (about)

     1  // Copyright (C) 2023 Gobalsky Labs Limited
     2  //
     3  // This program is free software: you can redistribute it and/or modify
     4  // it under the terms of the GNU Affero General Public License as
     5  // published by the Free Software Foundation, either version 3 of the
     6  // License, or (at your option) any later version.
     7  //
     8  // This program is distributed in the hope that it will be useful,
     9  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    10  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    11  // GNU Affero General Public License for more details.
    12  //
    13  // You should have received a copy of the GNU Affero General Public License
    14  // along with this program.  If not, see <http://www.gnu.org/licenses/>.
    15  
    16  package snapshot
    17  
    18  import (
    19  	"context"
    20  	"errors"
    21  	"fmt"
    22  	"regexp"
    23  	"strings"
    24  	"time"
    25  
    26  	"code.vegaprotocol.io/vega/datanode/sqlstore"
    27  
    28  	"github.com/georgysavva/scany/pgxscan"
    29  	"github.com/jackc/pgx/v4"
    30  	"github.com/jackc/pgx/v4/pgxpool"
    31  	"github.com/pressly/goose/v3"
    32  	"github.com/shopspring/decimal"
    33  )
    34  
    35  type DatabaseMetadata struct {
    36  	TableNameToMetaData                    map[string]TableMetadata
    37  	ContinuousAggregatesMetaData           []ContinuousAggregateMetaData
    38  	DatabaseVersion                        int64
    39  	CurrentStateTablesCreateConstraintsSql []string
    40  	CurrentStateTablesDropConstraintsSql   []string
    41  	HistoryStateTablesCreateConstraintsSql []string
    42  	HistoryStateTablesDropConstraintsSql   []string
    43  
    44  	AllTablesEnableAutoVacuumSql  []string
    45  	AllTablesDisableAutoVacuumSql []string
    46  }
    47  
    48  type TableMetadata struct {
    49  	Name            string
    50  	SortOrder       string
    51  	Hypertable      bool
    52  	PartitionColumn string
    53  }
    54  
    55  type ContinuousAggregateMetaData struct {
    56  	ID             int
    57  	Name           string
    58  	BucketInterval time.Duration
    59  }
    60  
    61  type IndexInfo struct {
    62  	Tablename string
    63  	Indexname string
    64  	Indexdef  string
    65  }
    66  
    67  type HypertablePartitionColumns struct {
    68  	HypertableName string
    69  	ColumnName     string
    70  }
    71  
    72  func NewDatabaseMetaData(ctx context.Context, connPool *pgxpool.Pool) (DatabaseMetadata, error) {
    73  	// Ensure timescale extension is enabled before attempting to get metadata
    74  	_, err := connPool.Exec(ctx, "create extension if not exists timescaledb")
    75  	if err != nil {
    76  		return DatabaseMetadata{}, fmt.Errorf("failed to create timescale extension: %w", err)
    77  	}
    78  
    79  	dbVersion, err := getDBVersion(ctx, connPool)
    80  	if err != nil {
    81  		return DatabaseMetadata{}, fmt.Errorf("failed to get database version: %w", err)
    82  	}
    83  
    84  	if dbVersion == 0 {
    85  		return DatabaseMetadata{}, nil
    86  	}
    87  
    88  	tableNames, err := sqlstore.GetAllTableNames(ctx, connPool)
    89  	if err != nil {
    90  		return DatabaseMetadata{}, fmt.Errorf("failed to get names of tables to copy:%w", err)
    91  	}
    92  
    93  	tableNameToSortOrder, err := getTableSortOrders(ctx, connPool)
    94  	if err != nil {
    95  		return DatabaseMetadata{}, fmt.Errorf("failed to get table sort orders:%w", err)
    96  	}
    97  
    98  	hyperTableNames, err := getHyperTableNames(ctx, connPool)
    99  	if err != nil {
   100  		return DatabaseMetadata{}, fmt.Errorf("failed to get hyper table names:%w", err)
   101  	}
   102  
   103  	hypertablePartitionColumns, err := getHyperTablePartitionColumns(ctx, connPool)
   104  	if err != nil {
   105  		return DatabaseMetadata{}, fmt.Errorf("failed to get hyper table partition columns:%w", err)
   106  	}
   107  
   108  	caggsMeta, err := getContinuousAggregatesMetaData(ctx, connPool)
   109  	if err != nil {
   110  		return DatabaseMetadata{}, fmt.Errorf("failed to get continuous aggregate view names:%w", err)
   111  	}
   112  
   113  	currentStateCreateConstraintsSql, historyCreateConstraintsSql, err := getCreateConstraintsSql(ctx, connPool, hyperTableNames)
   114  	if err != nil {
   115  		return DatabaseMetadata{}, fmt.Errorf("failed to get create constrains sql:%w", err)
   116  	}
   117  
   118  	currentStateDropConstraintsSql, historyDropConstraintsSql, err := getDropConstraintsSql(ctx, connPool, hyperTableNames)
   119  	if err != nil {
   120  		return DatabaseMetadata{}, fmt.Errorf("failed to get drop constrains sql:%w", err)
   121  	}
   122  
   123  	allTablesEnableAutoVacuumSql, allTablesDisableAutoVacuumSql := createAutovacuumSql(tableNames)
   124  
   125  	result := DatabaseMetadata{
   126  		TableNameToMetaData:                    map[string]TableMetadata{},
   127  		DatabaseVersion:                        dbVersion,
   128  		ContinuousAggregatesMetaData:           caggsMeta,
   129  		CurrentStateTablesCreateConstraintsSql: currentStateCreateConstraintsSql,
   130  		CurrentStateTablesDropConstraintsSql:   currentStateDropConstraintsSql,
   131  		HistoryStateTablesCreateConstraintsSql: historyCreateConstraintsSql,
   132  		HistoryStateTablesDropConstraintsSql:   historyDropConstraintsSql,
   133  		AllTablesEnableAutoVacuumSql:           allTablesEnableAutoVacuumSql,
   134  		AllTablesDisableAutoVacuumSql:          allTablesDisableAutoVacuumSql,
   135  	}
   136  	for _, tableName := range tableNames {
   137  		partitionCol := ""
   138  		ok := false
   139  		if hyperTableNames[tableName] {
   140  			partitionCol, ok = hypertablePartitionColumns[tableName]
   141  			if !ok {
   142  				return DatabaseMetadata{}, fmt.Errorf("failed to get partition column for hyper table %s", tableName)
   143  			}
   144  		}
   145  
   146  		result.TableNameToMetaData[tableName] = TableMetadata{
   147  			Name:            tableName,
   148  			SortOrder:       tableNameToSortOrder[tableName],
   149  			Hypertable:      hyperTableNames[tableName],
   150  			PartitionColumn: partitionCol,
   151  		}
   152  	}
   153  
   154  	return result, nil
   155  }
   156  
   157  func createAutovacuumSql(tableNames []string) ([]string, []string) {
   158  	allTablesEnableAutoVacuumSql := make([]string, 0, len(tableNames))
   159  	for _, tableName := range tableNames {
   160  		allTablesEnableAutoVacuumSql = append(allTablesEnableAutoVacuumSql, fmt.Sprintf("ALTER TABLE %s SET (autovacuum_enabled = true)", tableName))
   161  	}
   162  
   163  	allTablesDisableAutoVacuumSql := make([]string, 0, len(tableNames))
   164  	for _, tableName := range tableNames {
   165  		allTablesDisableAutoVacuumSql = append(allTablesDisableAutoVacuumSql, fmt.Sprintf("ALTER TABLE %s SET (autovacuum_enabled = false)", tableName))
   166  	}
   167  	return allTablesEnableAutoVacuumSql, allTablesDisableAutoVacuumSql
   168  }
   169  
   170  func (d DatabaseMetadata) GetHistoryTableNames() []string {
   171  	var result []string
   172  	for _, meta := range d.TableNameToMetaData {
   173  		if meta.Hypertable {
   174  			result = append(result, meta.Name)
   175  		}
   176  	}
   177  
   178  	return result
   179  }
   180  
   181  func getTableSortOrders(ctx context.Context, conn *pgxpool.Pool) (map[string]string, error) {
   182  	var primaryKeyIndexes []IndexInfo
   183  	err := pgxscan.Select(ctx, conn, &primaryKeyIndexes,
   184  		`select tablename, Indexname, Indexdef from pg_indexes where schemaname ='public' and Indexname like '%_pkey' order by tablename`)
   185  	if err != nil {
   186  		return nil, fmt.Errorf("failed to get primary key indexes:%w", err)
   187  	}
   188  
   189  	includeRegexp := regexp.MustCompile(`(?i)include(\s*)\(.*$`)
   190  	tableNameToSortOrder := map[string]string{}
   191  	for _, pkIdx := range primaryKeyIndexes {
   192  		withoutInclude := includeRegexp.ReplaceAllString(pkIdx.Indexdef, "")
   193  		split := strings.Split(withoutInclude, "(")
   194  		if len(split) != 2 {
   195  			return nil, fmt.Errorf("unexpected primary key index definition:%s", pkIdx.Indexdef)
   196  		}
   197  		so := strings.Replace(split[1], ")", "", 1)
   198  		tableNameToSortOrder[pkIdx.Tablename] = so
   199  	}
   200  	return tableNameToSortOrder, nil
   201  }
   202  
   203  func getHyperTableNames(ctx context.Context, conn *pgxpool.Pool) (map[string]bool, error) {
   204  	tableNameRows, err := conn.Query(ctx, "SELECT hypertable_name FROM timescaledb_information.hypertables")
   205  	if err != nil {
   206  		return nil, fmt.Errorf("failed to query Hypertable names:%w", err)
   207  	}
   208  
   209  	result := map[string]bool{}
   210  	for tableNameRows.Next() {
   211  		tableName := ""
   212  		err = tableNameRows.Scan(&tableName)
   213  		if err != nil {
   214  			return nil, fmt.Errorf("failed to scan table Name:%w", err)
   215  		}
   216  		result[tableName] = true
   217  	}
   218  	return result, nil
   219  }
   220  
   221  func getHyperTablePartitionColumns(ctx context.Context, conn *pgxpool.Pool) (map[string]string, error) {
   222  	var partitionColumns []HypertablePartitionColumns
   223  	err := pgxscan.Select(ctx, conn, &partitionColumns,
   224  		`select hypertable_name, column_name from timescaledb_information.dimensions where hypertable_schema='public' and dimension_number=1`)
   225  	if err != nil {
   226  		return nil, fmt.Errorf("failed to partition columns:%w", err)
   227  	}
   228  
   229  	tableNameToPartitionColumn := map[string]string{}
   230  	for _, column := range partitionColumns {
   231  		tableNameToPartitionColumn[column.HypertableName] = column.ColumnName
   232  	}
   233  	return tableNameToPartitionColumn, nil
   234  }
   235  
   236  func getContinuousAggregatesMetaData(ctx context.Context, conn *pgxpool.Pool) ([]ContinuousAggregateMetaData, error) {
   237  	var views []struct {
   238  		ViewName       string
   239  		ViewDefinition string
   240  	}
   241  	err := pgxscan.Select(ctx, conn, &views, "SELECT view_name, view_definition FROM timescaledb_information.continuous_aggregates")
   242  	if err != nil {
   243  		return nil, fmt.Errorf("failed to query continuous aggregate definitions:%w", err)
   244  	}
   245  
   246  	metas := make([]ContinuousAggregateMetaData, 0, len(views))
   247  
   248  	for _, view := range views {
   249  		interval, err := extractIntervalFromViewDefinition(view.ViewDefinition)
   250  		if err != nil {
   251  			return nil, fmt.Errorf("failed to get interval for view %s: %w", view.ViewName, err)
   252  		}
   253  
   254  		intervalAsDuration, err := intervalToSeconds(ctx, conn, interval)
   255  		if err != nil {
   256  			return nil, fmt.Errorf("failed to convert interval to seconds duration: %w", err)
   257  		}
   258  
   259  		query := fmt.Sprintf(`SELECT id from _timescaledb_catalog.hypertable
   260      			WHERE table_name=(
   261          		SELECT materialization_hypertable_name FROM timescaledb_information.continuous_aggregates WHERE view_name='%s');`, view.ViewName)
   262  		row := conn.QueryRow(ctx, query)
   263  
   264  		var caggID int
   265  		err = row.Scan(&caggID)
   266  		if err != nil {
   267  			return nil, fmt.Errorf("failed to get cagg id : %w", err)
   268  		}
   269  
   270  		meta := ContinuousAggregateMetaData{
   271  			ID:             caggID,
   272  			Name:           view.ViewName,
   273  			BucketInterval: intervalAsDuration,
   274  		}
   275  
   276  		metas = append(metas, meta)
   277  	}
   278  
   279  	return metas, nil
   280  }
   281  
   282  func getCreateConstraintsSql(ctx context.Context, conn *pgxpool.Pool, hyperTableNames map[string]bool) (currentState []string,
   283  	history []string, err error,
   284  ) {
   285  	var constraints []struct {
   286  		Tablename string
   287  		Sql       string
   288  	}
   289  
   290  	err = pgxscan.Select(ctx, conn, &constraints,
   291  		`SELECT relname as tablename, 'ALTER TABLE '||nspname||'.'||relname||' ADD CONSTRAINT '||conname||' '|| pg_get_constraintdef(pg_constraint.oid)||';' as sql
   292  		FROM pg_constraint
   293  		INNER JOIN pg_class ON conrelid=pg_class.oid
   294  		INNER JOIN pg_namespace ON pg_namespace.oid=pg_class.relnamespace where pg_namespace.nspname='public'
   295  		ORDER BY CASE WHEN contype='f' THEN 0 ELSE 1 END DESC,contype DESC,nspname DESC,relname DESC,conname DESC`)
   296  
   297  	if err != nil {
   298  		return nil, nil, fmt.Errorf("failed to get create constraints sql:%w", err)
   299  	}
   300  
   301  	for _, constraint := range constraints {
   302  		if hyperTableNames[constraint.Tablename] {
   303  			history = append(history, constraint.Sql)
   304  		} else {
   305  			currentState = append(currentState, constraint.Sql)
   306  		}
   307  	}
   308  
   309  	return currentState, history, nil
   310  }
   311  
   312  func getDropConstraintsSql(ctx context.Context, conn *pgxpool.Pool, hyperTableNames map[string]bool) (currentState []string,
   313  	history []string, err error,
   314  ) {
   315  	var constraints []struct {
   316  		Tablename string
   317  		Sql       string
   318  	}
   319  
   320  	err = pgxscan.Select(ctx, conn, &constraints,
   321  		`SELECT relname as tablename, 'ALTER TABLE '||nspname||'.'||relname||' DROP CONSTRAINT '||conname||';' as sql
   322  		FROM pg_constraint
   323  		INNER JOIN pg_class ON conrelid=pg_class.oid
   324  		INNER JOIN pg_namespace ON pg_namespace.oid=pg_class.relnamespace where pg_namespace.nspname='public'
   325  		ORDER BY CASE WHEN contype='f' THEN 0 ELSE 1 END,contype,nspname,relname,conname`)
   326  
   327  	if err != nil {
   328  		return nil, nil, fmt.Errorf("failed to get drop constraints sql:%w", err)
   329  	}
   330  
   331  	for _, constraint := range constraints {
   332  		if hyperTableNames[constraint.Tablename] {
   333  			history = append(history, constraint.Sql)
   334  		} else {
   335  			currentState = append(currentState, constraint.Sql)
   336  		}
   337  	}
   338  
   339  	return currentState, history, nil
   340  }
   341  
   342  func extractIntervalFromViewDefinition(viewDefinition string) (string, error) {
   343  	re := regexp.MustCompile(`time_bucket\('(.*)'`)
   344  	match := re.FindStringSubmatch(viewDefinition)
   345  	if match == nil || len(match) != 2 {
   346  		return "", errors.New("failed to extract interval from view definition")
   347  	}
   348  
   349  	return match[1], nil
   350  }
   351  
   352  func intervalToSeconds(ctx context.Context, conn sqlstore.Connection, interval string) (time.Duration, error) {
   353  	query := fmt.Sprintf("SELECT EXTRACT(epoch FROM INTERVAL '%s')", interval)
   354  	row := conn.QueryRow(ctx, query)
   355  
   356  	var seconds decimal.Decimal
   357  	err := row.Scan(&seconds)
   358  	if err != nil {
   359  		return 0, fmt.Errorf("failed to get interval in seconds %s: %w", interval, err)
   360  	}
   361  
   362  	return time.Duration(seconds.IntPart()) * time.Second, nil
   363  }
   364  
   365  // getDBVersion copied from the goose library and modified to support using a pre-allocated connection. It's worth noting
   366  // that this method also has the side effect of creating the goose version table if it does not exist as per the original
   367  // goose code.
   368  func getDBVersion(ctx context.Context, conn *pgxpool.Pool) (int64, error) {
   369  	version, err := ensureDBVersion(ctx, conn)
   370  	if err != nil {
   371  		return -1, err
   372  	}
   373  
   374  	return version, nil
   375  }
   376  
   377  // ensureDBVersion copied from the goose library and modified to support using a pre-allocated connection.
   378  func ensureDBVersion(ctx context.Context, conn *pgxpool.Pool) (int64, error) {
   379  	rows, err := dbVersionQuery(ctx, conn)
   380  	if err != nil {
   381  		return 0, createVersionTable(ctx, conn)
   382  	}
   383  	defer rows.Close()
   384  
   385  	// The most recent record for each migration specifies
   386  	// whether it has been applied or rolled back.
   387  	// The first version we find that has been applied is the current version.
   388  
   389  	toSkip := make([]int64, 0)
   390  
   391  	for rows.Next() {
   392  		var row goose.MigrationRecord
   393  		if err = rows.Scan(&row.VersionID, &row.IsApplied); err != nil {
   394  			return 0, fmt.Errorf("failed to scan row: %w", err)
   395  		}
   396  
   397  		// have we already marked this version to be skipped?
   398  		skip := false
   399  		for _, v := range toSkip {
   400  			if v == row.VersionID {
   401  				skip = true
   402  				break
   403  			}
   404  		}
   405  
   406  		if skip {
   407  			continue
   408  		}
   409  
   410  		// if version has been applied we're done
   411  		if row.IsApplied {
   412  			return row.VersionID, nil
   413  		}
   414  
   415  		// latest version of migration has not been applied.
   416  		toSkip = append(toSkip, row.VersionID)
   417  	}
   418  	if err := rows.Err(); err != nil {
   419  		return 0, fmt.Errorf("failed to get next row: %w", err)
   420  	}
   421  
   422  	return 0, goose.ErrNoNextVersion
   423  }
   424  
   425  // dbVersionQuery copied from the goose library and modified to support using a pre-allocated connection.
   426  func dbVersionQuery(ctx context.Context, conn *pgxpool.Pool) (pgx.Rows, error) {
   427  	rows, err := conn.Query(ctx, fmt.Sprintf("SELECT version_id, is_applied from %s ORDER BY id DESC", goose.TableName()))
   428  	if err != nil {
   429  		return nil, err
   430  	}
   431  
   432  	return rows, err
   433  }
   434  
   435  // createVersionTable copied from the goose library and modified to support using a pre-allocated connection.
   436  func createVersionTable(ctx context.Context, conn *pgxpool.Pool) error {
   437  	txn, err := conn.Begin(ctx)
   438  	if err != nil {
   439  		return err
   440  	}
   441  
   442  	if _, err := txn.Exec(ctx, fmt.Sprintf(`CREATE TABLE %s (
   443              	id serial NOT NULL,
   444                  version_id bigint NOT NULL,
   445                  is_applied boolean NOT NULL,
   446                  tstamp timestamp NULL default now(),
   447                  PRIMARY KEY(id)
   448              );`, goose.TableName())); err != nil {
   449  		txn.Rollback(ctx)
   450  		return err
   451  	}
   452  
   453  	version := 0
   454  	applied := true
   455  	if _, err := txn.Exec(ctx, fmt.Sprintf("INSERT INTO %s (version_id, is_applied) VALUES ($1, $2);", goose.TableName()), version, applied); err != nil {
   456  		txn.Rollback(ctx)
   457  		return err
   458  	}
   459  
   460  	return txn.Commit(ctx)
   461  }