github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/merge/fulltext_rebuild.go (about) 1 // Copyright 2023 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package merge 16 17 import ( 18 "fmt" 19 "io" 20 "strings" 21 22 "github.com/dolthub/go-mysql-server/sql" 23 "github.com/dolthub/go-mysql-server/sql/fulltext" 24 25 "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" 26 "github.com/dolthub/dolt/go/libraries/doltcore/doltdb/durable" 27 "github.com/dolthub/dolt/go/libraries/doltcore/schema" 28 "github.com/dolthub/dolt/go/libraries/doltcore/sqle/index" 29 ) 30 31 // rebuildableFulltextTable contains a table and schema that should have its Full-Text indexes rebuilt. 32 type rebuildableFulltextTable struct { 33 Name string 34 Table *doltdb.Table 35 Schema schema.Schema 36 } 37 38 // rebuildFullTextIndexes scans the mergedRoot and rebuilds all of the pseudo-index tables that were modified by both 39 // roots (ours and theirs), or had parents that were modified by both roots. 40 func rebuildFullTextIndexes(ctx *sql.Context, mergedRoot, ourRoot, theirRoot doltdb.RootValue, visitedTables map[string]struct{}) (doltdb.RootValue, error) { 41 // Grab a list of all tables on the root 42 allTableNames, err := mergedRoot.GetTableNames(ctx, doltdb.DefaultSchemaName) 43 if err != nil { 44 return nil, err 45 } 46 47 // Contains all of the tables for which we need to rebuild full-text indexes. 48 var tablesToRebuild []rebuildableFulltextTable 49 50 // This loop will create a set of tables and psuedo-index tables which 51 // will not be deleted at the end of this loop. Orphaned psuedo-index 52 // tables, which no longer have a parent table, will be deleted, for 53 // example, because they will not appear in this set. 54 doNotDeleteTables := make(map[string]struct{}) 55 56 // The following loop will populate |doNotDeleteTables| and 57 // |tablesToRebuild|. 58 // 59 // For |doNotDeleteTables|, its logic is as follows: 60 // 1) Every existing real table in |mergedRoot| should be in it. 61 // 2) The psuedo-table for every existing full-text index in every 62 // existing table in |mergedRoot| should be in it. 63 // 64 // For |tablesToRebuild|, its logic is as follows: 65 // 66 // 1) If the table or any of its full-text index pseudo-tables were 67 // visited by the merge--i.e., merger.MergeTable() reported an 68 // operation result other than |TableUnmodified|. 69 // 2) *And* if the table or any of its full-text index pseudo-tables 70 // are different between the merge base and ours. 71 // 3) *And* if the table or any of its full-text index pseudo-tables 72 // are different between the merge base and theirs. 73 // 74 // Then the table or its full-text index pseudo-tables were potentially 75 // involved in an actual three-way merge and the full-text index 76 // pseudo-tables could be out of date. 77 for _, tblName := range allTableNames { 78 if doltdb.IsFullTextTable(tblName) { 79 continue 80 } 81 // Add this table to the non-deletion set tables, since it's not a pseudo-index table. 82 doNotDeleteTables[tblName] = struct{}{} 83 84 tbl, ok, err := mergedRoot.GetTable(ctx, doltdb.TableName{Name: tblName}) 85 if err != nil { 86 return nil, err 87 } 88 if !ok { 89 return nil, fmt.Errorf("attempted to load `%s` during Full-Text merge but it could not be found", tblName) 90 } 91 sch, err := tbl.GetSchema(ctx) 92 if err != nil { 93 return nil, err 94 } 95 if !sch.Indexes().ContainsFullTextIndex() { 96 continue 97 } 98 99 // Also adds items to |doNotDeleteTables|. 100 needsRebuild, err := tableNeedsFullTextIndexRebuild(ctx, tblName, tbl, sch, mergedRoot, ourRoot, theirRoot, visitedTables, doNotDeleteTables) 101 if err != nil { 102 return nil, err 103 } 104 if needsRebuild { 105 tablesToRebuild = append(tablesToRebuild, rebuildableFulltextTable{ 106 Name: tblName, 107 Table: tbl, 108 Schema: sch, 109 }) 110 } 111 112 } 113 114 // Now loop over the tables that we were visited and rebuild only if they were modified in both roots 115 for _, tableToRebuild := range tablesToRebuild { 116 mergedRoot, err = rebuildFullTextIndexesForTable(ctx, tableToRebuild, mergedRoot) 117 if err != nil { 118 return nil, err 119 } 120 } 121 122 // Our last loop removes any orphaned pseudo-index tables 123 for _, tblName := range allTableNames { 124 if _, doNotDelete := doNotDeleteTables[tblName]; doNotDelete || !doltdb.IsFullTextTable(tblName) { 125 continue 126 } 127 mergedRoot, err = mergedRoot.RemoveTables(ctx, true, true, tblName) 128 if err != nil { 129 return nil, err 130 } 131 } 132 133 return mergedRoot, nil 134 } 135 136 func tableNeedsFullTextIndexRebuild(ctx *sql.Context, tblName string, tbl *doltdb.Table, sch schema.Schema, 137 mergedRoot, ourRoot, theirRoot doltdb.RootValue, 138 visitedTables map[string]struct{}, doNotDeleteTables map[string]struct{}) (bool, error) { 139 // Even if the parent table was not visited, we still need to check every pseudo-index table due to potential 140 // name overlapping between roots. This also applies to checking whether both ours and theirs have changes. 141 _, wasVisited := visitedTables[tblName] 142 oursChanged, err := tableChangedFromRoot(ctx, tblName, tbl, ourRoot) 143 if err != nil { 144 return false, err 145 } 146 theirsChanged, err := tableChangedFromRoot(ctx, tblName, tbl, theirRoot) 147 if err != nil { 148 return false, err 149 } 150 for _, idx := range sch.Indexes().AllIndexes() { 151 if !idx.IsFullText() { 152 continue 153 } 154 props := idx.FullTextProperties() 155 for _, ftTable := range props.TableNameSlice() { 156 // Add all of the pseudo-index tables to the non-deletion set 157 doNotDeleteTables[ftTable] = struct{}{} 158 159 // Check if the pseudo-index table was visited 160 if !wasVisited { 161 _, wasVisited = visitedTables[ftTable] 162 } 163 164 // Check if the pseudo-index table changed in both our root and their root 165 if !oursChanged { 166 oursChanged, err = tableChangedBetweenRoots(ctx, tblName, ourRoot, mergedRoot) 167 if err != nil { 168 return false, err 169 } 170 } 171 172 if !theirsChanged { 173 theirsChanged, err = tableChangedBetweenRoots(ctx, tblName, theirRoot, mergedRoot) 174 if err != nil { 175 return false, err 176 } 177 } 178 } 179 } 180 181 // If least one table was visited and something was different in all three roots, we rebuild all the indexes. 182 return wasVisited && oursChanged && theirsChanged, nil 183 } 184 185 func rebuildFullTextIndexesForTable(ctx *sql.Context, tableToRebuild rebuildableFulltextTable, mergedRoot doltdb.RootValue) (doltdb.RootValue, error) { 186 parentTable, err := createFulltextTable(ctx, tableToRebuild.Name, mergedRoot) 187 if err != nil { 188 return nil, err 189 } 190 191 var configTable *fulltextTable 192 var tableSet []fulltext.TableSet 193 allFTDoltTables := make(map[string]*fulltextTable) 194 for _, idx := range tableToRebuild.Schema.Indexes().AllIndexes() { 195 if !idx.IsFullText() { 196 continue 197 } 198 props := idx.FullTextProperties() 199 // Purge the existing data in each table 200 mergedRoot, err = purgeFulltextTableData(ctx, mergedRoot, props.TableNameSlice()...) 201 if err != nil { 202 return nil, err 203 } 204 // The config table is shared, and it's not written to during this process 205 if configTable == nil { 206 configTable, err = createFulltextTable(ctx, props.ConfigTable, mergedRoot) 207 if err != nil { 208 return nil, err 209 } 210 allFTDoltTables[props.ConfigTable] = configTable 211 } 212 positionTable, err := createFulltextTable(ctx, props.PositionTable, mergedRoot) 213 if err != nil { 214 return nil, err 215 } 216 docCountTable, err := createFulltextTable(ctx, props.DocCountTable, mergedRoot) 217 if err != nil { 218 return nil, err 219 } 220 globalCountTable, err := createFulltextTable(ctx, props.GlobalCountTable, mergedRoot) 221 if err != nil { 222 return nil, err 223 } 224 rowCountTable, err := createFulltextTable(ctx, props.RowCountTable, mergedRoot) 225 if err != nil { 226 return nil, err 227 } 228 allFTDoltTables[props.PositionTable] = positionTable 229 allFTDoltTables[props.DocCountTable] = docCountTable 230 allFTDoltTables[props.GlobalCountTable] = globalCountTable 231 allFTDoltTables[props.RowCountTable] = rowCountTable 232 ftIndex, err := index.ConvertFullTextToSql(ctx, "", tableToRebuild.Name, tableToRebuild.Schema, idx) 233 if err != nil { 234 return nil, err 235 } 236 tableSet = append(tableSet, fulltext.TableSet{ 237 Index: ftIndex.(fulltext.Index), 238 Position: positionTable, 239 DocCount: docCountTable, 240 GlobalCount: globalCountTable, 241 RowCount: rowCountTable, 242 }) 243 } 244 245 // We'll write the entire contents of our table into the Full-Text editor 246 ftEditor, err := fulltext.CreateEditor(ctx, parentTable, configTable, tableSet...) 247 if err != nil { 248 return nil, err 249 } 250 err = func() error { 251 defer ftEditor.Close(ctx) 252 ftEditor.StatementBegin(ctx) 253 defer ftEditor.StatementComplete(ctx) 254 255 rowIter, err := createRowIterForTable(ctx, tableToRebuild.Table, tableToRebuild.Schema) 256 if err != nil { 257 return err 258 } 259 defer rowIter.Close(ctx) 260 261 row, err := rowIter.Next(ctx) 262 for ; err == nil; row, err = rowIter.Next(ctx) { 263 if err = ftEditor.Insert(ctx, row); err != nil { 264 return err 265 } 266 } 267 if err != nil && err != io.EOF { 268 return err 269 } 270 return nil 271 }() 272 if err != nil { 273 return nil, err 274 } 275 276 // Update the root with all of the new tables' contents 277 for _, ftTable := range allFTDoltTables { 278 newTbl, err := ftTable.ApplyToTable(ctx) 279 if err != nil { 280 return nil, err 281 } 282 mergedRoot, err = mergedRoot.PutTable(ctx, doltdb.TableName{Name: ftTable.Name()}, newTbl) 283 if err != nil { 284 return nil, err 285 } 286 } 287 288 return mergedRoot, nil 289 } 290 291 // createRowIterForTable creates a sql.RowIter for the given table. 292 func createRowIterForTable(ctx *sql.Context, t *doltdb.Table, sch schema.Schema) (sql.RowIter, error) { 293 rowData, err := t.GetRowData(ctx) 294 if err != nil { 295 return nil, err 296 } 297 rows := durable.ProllyMapFromIndex(rowData) 298 rowCount, err := rows.Count() 299 if err != nil { 300 return nil, err 301 } 302 303 iter, err := rows.FetchOrdinalRange(ctx, 0, uint64(rowCount)) 304 if err != nil { 305 return nil, err 306 } 307 308 return index.NewProllyRowIterForMap(sch, rows, iter, nil), nil 309 } 310 311 // purgeFulltextTableData purges all Full-Text tables with the names given. Ignores any tables that are not Full-Text. 312 // Also ignores Full-Text config tables. Returns the updated root with the tables purged. 313 func purgeFulltextTableData(ctx *sql.Context, root doltdb.RootValue, tableNames ...string) (doltdb.RootValue, error) { 314 for _, tableName := range tableNames { 315 if !doltdb.IsFullTextTable(tableName) { 316 continue 317 } else if strings.HasSuffix(tableName, "config") { 318 // We don't want to purge the config table, we'll just roll with whatever is there for now 319 continue 320 } 321 tbl, ok, err := root.GetTable(ctx, doltdb.TableName{Name: tableName}) 322 if err != nil { 323 return nil, err 324 } 325 if !ok { 326 return nil, fmt.Errorf("attempted to purge `%s` during Full-Text merge but it could not be found", tableName) 327 } 328 sch, err := tbl.GetSchema(ctx) 329 if err != nil { 330 return nil, err 331 } 332 rows, err := durable.NewEmptyIndex(ctx, tbl.ValueReadWriter(), tbl.NodeStore(), sch) 333 if err != nil { 334 return nil, err 335 } 336 tbl, err = tbl.UpdateRows(ctx, rows) 337 if err != nil { 338 return nil, err 339 } 340 root, err = root.PutTable(ctx, doltdb.TableName{Name: tableName}, tbl) 341 if err != nil { 342 return nil, err 343 } 344 } 345 return root, nil 346 } 347 348 // tableChangedBetweenRoots returns whether the given table changed between roots. 349 func tableChangedBetweenRoots(ctx *sql.Context, tblName string, fromRoot, toRoot doltdb.RootValue) (bool, error) { 350 tbl, ok, err := toRoot.GetTable(ctx, doltdb.TableName{Name: tblName}) 351 if err != nil { 352 return false, err 353 } 354 if !ok { 355 return tableChangedFromRoot(ctx, tblName, nil, fromRoot) 356 } 357 return tableChangedFromRoot(ctx, tblName, tbl, fromRoot) 358 } 359 360 // tableChangedFromRoot returns whether the given table has changed compared to the one found in the given root. If the 361 // table does not exist in the root, then that counts as a change. A nil `tbl` is valid, which then checks if the table 362 // exists in the root. 363 func tableChangedFromRoot(ctx *sql.Context, tblName string, tbl *doltdb.Table, root doltdb.RootValue) (bool, error) { 364 // If `tbl` is nil, then we simply check if the table exists in the root 365 if tbl == nil { 366 return root.HasTable(ctx, tblName) 367 } 368 fromTbl, ok, err := root.GetTable(ctx, doltdb.TableName{Name: tblName}) 369 if err != nil { 370 return false, err 371 } 372 if !ok { 373 return true, nil 374 } 375 // If the tables have different hashes, then something has changed. We don't know exactly what has changed, but 376 // we'll be conservative and accept any change. 377 tblHash, err := tbl.HashOf() 378 if err != nil { 379 return false, err 380 } 381 fromHash, err := fromTbl.HashOf() 382 if err != nil { 383 return false, err 384 } 385 return !tblHash.Equal(fromHash), nil 386 }