github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/merge/fulltext_rebuild.go (about)

     1  // Copyright 2023 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package merge
    16  
    17  import (
    18  	"fmt"
    19  	"io"
    20  	"strings"
    21  
    22  	"github.com/dolthub/go-mysql-server/sql"
    23  	"github.com/dolthub/go-mysql-server/sql/fulltext"
    24  
    25  	"github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
    26  	"github.com/dolthub/dolt/go/libraries/doltcore/doltdb/durable"
    27  	"github.com/dolthub/dolt/go/libraries/doltcore/schema"
    28  	"github.com/dolthub/dolt/go/libraries/doltcore/sqle/index"
    29  )
    30  
    31  // rebuildableFulltextTable contains a table and schema that should have its Full-Text indexes rebuilt.
    32  type rebuildableFulltextTable struct {
    33  	Name   string
    34  	Table  *doltdb.Table
    35  	Schema schema.Schema
    36  }
    37  
    38  // rebuildFullTextIndexes scans the mergedRoot and rebuilds all of the pseudo-index tables that were modified by both
    39  // roots (ours and theirs), or had parents that were modified by both roots.
    40  func rebuildFullTextIndexes(ctx *sql.Context, mergedRoot, ourRoot, theirRoot doltdb.RootValue, visitedTables map[string]struct{}) (doltdb.RootValue, error) {
    41  	// Grab a list of all tables on the root
    42  	allTableNames, err := mergedRoot.GetTableNames(ctx, doltdb.DefaultSchemaName)
    43  	if err != nil {
    44  		return nil, err
    45  	}
    46  
    47  	// Contains all of the tables for which we need to rebuild full-text indexes.
    48  	var tablesToRebuild []rebuildableFulltextTable
    49  
    50  	// This loop will create a set of tables and psuedo-index tables which
    51  	// will not be deleted at the end of this loop. Orphaned psuedo-index
    52  	// tables, which no longer have a parent table, will be deleted, for
    53  	// example, because they will not appear in this set.
    54  	doNotDeleteTables := make(map[string]struct{})
    55  
    56  	// The following loop will populate |doNotDeleteTables| and
    57  	// |tablesToRebuild|.
    58  	//
    59  	// For |doNotDeleteTables|, its logic is as follows:
    60  	// 1) Every existing real table in |mergedRoot| should be in it.
    61  	// 2) The psuedo-table for every existing full-text index in every
    62  	// existing table in |mergedRoot| should be in it.
    63  	//
    64  	// For |tablesToRebuild|, its logic is as follows:
    65  	//
    66  	// 1) If the table or any of its full-text index pseudo-tables were
    67  	// visited by the merge--i.e., merger.MergeTable() reported an
    68  	// operation result other than |TableUnmodified|.
    69  	// 2) *And* if the table or any of its full-text index pseudo-tables
    70  	// are different between the merge base and ours.
    71  	// 3) *And* if the table or any of its full-text index pseudo-tables
    72  	// are different between the merge base and theirs.
    73  	//
    74  	// Then the table or its full-text index pseudo-tables were potentially
    75  	// involved in an actual three-way merge and the full-text index
    76  	// pseudo-tables could be out of date.
    77  	for _, tblName := range allTableNames {
    78  		if doltdb.IsFullTextTable(tblName) {
    79  			continue
    80  		}
    81  		// Add this table to the non-deletion set tables, since it's not a pseudo-index table.
    82  		doNotDeleteTables[tblName] = struct{}{}
    83  
    84  		tbl, ok, err := mergedRoot.GetTable(ctx, doltdb.TableName{Name: tblName})
    85  		if err != nil {
    86  			return nil, err
    87  		}
    88  		if !ok {
    89  			return nil, fmt.Errorf("attempted to load `%s` during Full-Text merge but it could not be found", tblName)
    90  		}
    91  		sch, err := tbl.GetSchema(ctx)
    92  		if err != nil {
    93  			return nil, err
    94  		}
    95  		if !sch.Indexes().ContainsFullTextIndex() {
    96  			continue
    97  		}
    98  
    99  		// Also adds items to |doNotDeleteTables|.
   100  		needsRebuild, err := tableNeedsFullTextIndexRebuild(ctx, tblName, tbl, sch, mergedRoot, ourRoot, theirRoot, visitedTables, doNotDeleteTables)
   101  		if err != nil {
   102  			return nil, err
   103  		}
   104  		if needsRebuild {
   105  			tablesToRebuild = append(tablesToRebuild, rebuildableFulltextTable{
   106  				Name:   tblName,
   107  				Table:  tbl,
   108  				Schema: sch,
   109  			})
   110  		}
   111  
   112  	}
   113  
   114  	// Now loop over the tables that we were visited and rebuild only if they were modified in both roots
   115  	for _, tableToRebuild := range tablesToRebuild {
   116  		mergedRoot, err = rebuildFullTextIndexesForTable(ctx, tableToRebuild, mergedRoot)
   117  		if err != nil {
   118  			return nil, err
   119  		}
   120  	}
   121  
   122  	// Our last loop removes any orphaned pseudo-index tables
   123  	for _, tblName := range allTableNames {
   124  		if _, doNotDelete := doNotDeleteTables[tblName]; doNotDelete || !doltdb.IsFullTextTable(tblName) {
   125  			continue
   126  		}
   127  		mergedRoot, err = mergedRoot.RemoveTables(ctx, true, true, tblName)
   128  		if err != nil {
   129  			return nil, err
   130  		}
   131  	}
   132  
   133  	return mergedRoot, nil
   134  }
   135  
   136  func tableNeedsFullTextIndexRebuild(ctx *sql.Context, tblName string, tbl *doltdb.Table, sch schema.Schema,
   137  	mergedRoot, ourRoot, theirRoot doltdb.RootValue,
   138  	visitedTables map[string]struct{}, doNotDeleteTables map[string]struct{}) (bool, error) {
   139  	// Even if the parent table was not visited, we still need to check every pseudo-index table due to potential
   140  	// name overlapping between roots. This also applies to checking whether both ours and theirs have changes.
   141  	_, wasVisited := visitedTables[tblName]
   142  	oursChanged, err := tableChangedFromRoot(ctx, tblName, tbl, ourRoot)
   143  	if err != nil {
   144  		return false, err
   145  	}
   146  	theirsChanged, err := tableChangedFromRoot(ctx, tblName, tbl, theirRoot)
   147  	if err != nil {
   148  		return false, err
   149  	}
   150  	for _, idx := range sch.Indexes().AllIndexes() {
   151  		if !idx.IsFullText() {
   152  			continue
   153  		}
   154  		props := idx.FullTextProperties()
   155  		for _, ftTable := range props.TableNameSlice() {
   156  			// Add all of the pseudo-index tables to the non-deletion set
   157  			doNotDeleteTables[ftTable] = struct{}{}
   158  
   159  			// Check if the pseudo-index table was visited
   160  			if !wasVisited {
   161  				_, wasVisited = visitedTables[ftTable]
   162  			}
   163  
   164  			// Check if the pseudo-index table changed in both our root and their root
   165  			if !oursChanged {
   166  				oursChanged, err = tableChangedBetweenRoots(ctx, tblName, ourRoot, mergedRoot)
   167  				if err != nil {
   168  					return false, err
   169  				}
   170  			}
   171  
   172  			if !theirsChanged {
   173  				theirsChanged, err = tableChangedBetweenRoots(ctx, tblName, theirRoot, mergedRoot)
   174  				if err != nil {
   175  					return false, err
   176  				}
   177  			}
   178  		}
   179  	}
   180  
   181  	// If least one table was visited and something was different in all three roots, we rebuild all the indexes.
   182  	return wasVisited && oursChanged && theirsChanged, nil
   183  }
   184  
   185  func rebuildFullTextIndexesForTable(ctx *sql.Context, tableToRebuild rebuildableFulltextTable, mergedRoot doltdb.RootValue) (doltdb.RootValue, error) {
   186  	parentTable, err := createFulltextTable(ctx, tableToRebuild.Name, mergedRoot)
   187  	if err != nil {
   188  		return nil, err
   189  	}
   190  
   191  	var configTable *fulltextTable
   192  	var tableSet []fulltext.TableSet
   193  	allFTDoltTables := make(map[string]*fulltextTable)
   194  	for _, idx := range tableToRebuild.Schema.Indexes().AllIndexes() {
   195  		if !idx.IsFullText() {
   196  			continue
   197  		}
   198  		props := idx.FullTextProperties()
   199  		// Purge the existing data in each table
   200  		mergedRoot, err = purgeFulltextTableData(ctx, mergedRoot, props.TableNameSlice()...)
   201  		if err != nil {
   202  			return nil, err
   203  		}
   204  		// The config table is shared, and it's not written to during this process
   205  		if configTable == nil {
   206  			configTable, err = createFulltextTable(ctx, props.ConfigTable, mergedRoot)
   207  			if err != nil {
   208  				return nil, err
   209  			}
   210  			allFTDoltTables[props.ConfigTable] = configTable
   211  		}
   212  		positionTable, err := createFulltextTable(ctx, props.PositionTable, mergedRoot)
   213  		if err != nil {
   214  			return nil, err
   215  		}
   216  		docCountTable, err := createFulltextTable(ctx, props.DocCountTable, mergedRoot)
   217  		if err != nil {
   218  			return nil, err
   219  		}
   220  		globalCountTable, err := createFulltextTable(ctx, props.GlobalCountTable, mergedRoot)
   221  		if err != nil {
   222  			return nil, err
   223  		}
   224  		rowCountTable, err := createFulltextTable(ctx, props.RowCountTable, mergedRoot)
   225  		if err != nil {
   226  			return nil, err
   227  		}
   228  		allFTDoltTables[props.PositionTable] = positionTable
   229  		allFTDoltTables[props.DocCountTable] = docCountTable
   230  		allFTDoltTables[props.GlobalCountTable] = globalCountTable
   231  		allFTDoltTables[props.RowCountTable] = rowCountTable
   232  		ftIndex, err := index.ConvertFullTextToSql(ctx, "", tableToRebuild.Name, tableToRebuild.Schema, idx)
   233  		if err != nil {
   234  			return nil, err
   235  		}
   236  		tableSet = append(tableSet, fulltext.TableSet{
   237  			Index:       ftIndex.(fulltext.Index),
   238  			Position:    positionTable,
   239  			DocCount:    docCountTable,
   240  			GlobalCount: globalCountTable,
   241  			RowCount:    rowCountTable,
   242  		})
   243  	}
   244  
   245  	// We'll write the entire contents of our table into the Full-Text editor
   246  	ftEditor, err := fulltext.CreateEditor(ctx, parentTable, configTable, tableSet...)
   247  	if err != nil {
   248  		return nil, err
   249  	}
   250  	err = func() error {
   251  		defer ftEditor.Close(ctx)
   252  		ftEditor.StatementBegin(ctx)
   253  		defer ftEditor.StatementComplete(ctx)
   254  
   255  		rowIter, err := createRowIterForTable(ctx, tableToRebuild.Table, tableToRebuild.Schema)
   256  		if err != nil {
   257  			return err
   258  		}
   259  		defer rowIter.Close(ctx)
   260  
   261  		row, err := rowIter.Next(ctx)
   262  		for ; err == nil; row, err = rowIter.Next(ctx) {
   263  			if err = ftEditor.Insert(ctx, row); err != nil {
   264  				return err
   265  			}
   266  		}
   267  		if err != nil && err != io.EOF {
   268  			return err
   269  		}
   270  		return nil
   271  	}()
   272  	if err != nil {
   273  		return nil, err
   274  	}
   275  
   276  	// Update the root with all of the new tables' contents
   277  	for _, ftTable := range allFTDoltTables {
   278  		newTbl, err := ftTable.ApplyToTable(ctx)
   279  		if err != nil {
   280  			return nil, err
   281  		}
   282  		mergedRoot, err = mergedRoot.PutTable(ctx, doltdb.TableName{Name: ftTable.Name()}, newTbl)
   283  		if err != nil {
   284  			return nil, err
   285  		}
   286  	}
   287  
   288  	return mergedRoot, nil
   289  }
   290  
   291  // createRowIterForTable creates a sql.RowIter for the given table.
   292  func createRowIterForTable(ctx *sql.Context, t *doltdb.Table, sch schema.Schema) (sql.RowIter, error) {
   293  	rowData, err := t.GetRowData(ctx)
   294  	if err != nil {
   295  		return nil, err
   296  	}
   297  	rows := durable.ProllyMapFromIndex(rowData)
   298  	rowCount, err := rows.Count()
   299  	if err != nil {
   300  		return nil, err
   301  	}
   302  
   303  	iter, err := rows.FetchOrdinalRange(ctx, 0, uint64(rowCount))
   304  	if err != nil {
   305  		return nil, err
   306  	}
   307  
   308  	return index.NewProllyRowIterForMap(sch, rows, iter, nil), nil
   309  }
   310  
   311  // purgeFulltextTableData purges all Full-Text tables with the names given. Ignores any tables that are not Full-Text.
   312  // Also ignores Full-Text config tables. Returns the updated root with the tables purged.
   313  func purgeFulltextTableData(ctx *sql.Context, root doltdb.RootValue, tableNames ...string) (doltdb.RootValue, error) {
   314  	for _, tableName := range tableNames {
   315  		if !doltdb.IsFullTextTable(tableName) {
   316  			continue
   317  		} else if strings.HasSuffix(tableName, "config") {
   318  			// We don't want to purge the config table, we'll just roll with whatever is there for now
   319  			continue
   320  		}
   321  		tbl, ok, err := root.GetTable(ctx, doltdb.TableName{Name: tableName})
   322  		if err != nil {
   323  			return nil, err
   324  		}
   325  		if !ok {
   326  			return nil, fmt.Errorf("attempted to purge `%s` during Full-Text merge but it could not be found", tableName)
   327  		}
   328  		sch, err := tbl.GetSchema(ctx)
   329  		if err != nil {
   330  			return nil, err
   331  		}
   332  		rows, err := durable.NewEmptyIndex(ctx, tbl.ValueReadWriter(), tbl.NodeStore(), sch)
   333  		if err != nil {
   334  			return nil, err
   335  		}
   336  		tbl, err = tbl.UpdateRows(ctx, rows)
   337  		if err != nil {
   338  			return nil, err
   339  		}
   340  		root, err = root.PutTable(ctx, doltdb.TableName{Name: tableName}, tbl)
   341  		if err != nil {
   342  			return nil, err
   343  		}
   344  	}
   345  	return root, nil
   346  }
   347  
   348  // tableChangedBetweenRoots returns whether the given table changed between roots.
   349  func tableChangedBetweenRoots(ctx *sql.Context, tblName string, fromRoot, toRoot doltdb.RootValue) (bool, error) {
   350  	tbl, ok, err := toRoot.GetTable(ctx, doltdb.TableName{Name: tblName})
   351  	if err != nil {
   352  		return false, err
   353  	}
   354  	if !ok {
   355  		return tableChangedFromRoot(ctx, tblName, nil, fromRoot)
   356  	}
   357  	return tableChangedFromRoot(ctx, tblName, tbl, fromRoot)
   358  }
   359  
   360  // tableChangedFromRoot returns whether the given table has changed compared to the one found in the given root. If the
   361  // table does not exist in the root, then that counts as a change. A nil `tbl` is valid, which then checks if the table
   362  // exists in the root.
   363  func tableChangedFromRoot(ctx *sql.Context, tblName string, tbl *doltdb.Table, root doltdb.RootValue) (bool, error) {
   364  	// If `tbl` is nil, then we simply check if the table exists in the root
   365  	if tbl == nil {
   366  		return root.HasTable(ctx, tblName)
   367  	}
   368  	fromTbl, ok, err := root.GetTable(ctx, doltdb.TableName{Name: tblName})
   369  	if err != nil {
   370  		return false, err
   371  	}
   372  	if !ok {
   373  		return true, nil
   374  	}
   375  	// If the tables have different hashes, then something has changed. We don't know exactly what has changed, but
   376  	// we'll be conservative and accept any change.
   377  	tblHash, err := tbl.HashOf()
   378  	if err != nil {
   379  		return false, err
   380  	}
   381  	fromHash, err := fromTbl.HashOf()
   382  	if err != nil {
   383  		return false, err
   384  	}
   385  	return !tblHash.Equal(fromHash), nil
   386  }