github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/sqle/enginetest/validation.go (about) 1 // Copyright 2020 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package enginetest 16 17 import ( 18 "context" 19 "fmt" 20 "io" 21 22 "github.com/dolthub/go-mysql-server/sql" 23 "github.com/dolthub/go-mysql-server/sql/mysql_db" 24 sqltypes "github.com/dolthub/go-mysql-server/sql/types" 25 26 "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" 27 "github.com/dolthub/dolt/go/libraries/doltcore/doltdb/durable" 28 "github.com/dolthub/dolt/go/libraries/doltcore/ref" 29 "github.com/dolthub/dolt/go/libraries/doltcore/schema" 30 "github.com/dolthub/dolt/go/libraries/doltcore/sqle" 31 "github.com/dolthub/dolt/go/store/prolly" 32 "github.com/dolthub/dolt/go/store/prolly/tree" 33 "github.com/dolthub/dolt/go/store/types" 34 "github.com/dolthub/dolt/go/store/val" 35 ) 36 37 func ValidateDatabase(ctx context.Context, db sql.Database) (err error) { 38 switch tdb := db.(type) { 39 case sqle.Database: 40 return ValidateDoltDatabase(ctx, tdb) 41 case mysql_db.PrivilegedDatabase: 42 return ValidateDatabase(ctx, tdb.Unwrap()) 43 default: 44 return nil 45 } 46 } 47 48 func ValidateDoltDatabase(ctx context.Context, db sqle.Database) (err error) { 49 if !types.IsFormat_DOLT(db.GetDoltDB().Format()) { 50 return nil 51 } 52 for _, stage := range validationStages { 53 if err = stage(ctx, db); err != nil { 54 return err 55 } 56 } 57 return 58 } 59 60 type validator func(ctx context.Context, db sqle.Database) error 61 62 var validationStages = []validator{ 63 validateChunkReferences, 64 validateSecondaryIndexes, 65 } 66 67 // validateChunkReferences checks for dangling chunks. 68 func validateChunkReferences(ctx context.Context, db sqle.Database) error { 69 validateIndex := func(ctx context.Context, idx durable.Index) error { 70 pm := durable.ProllyMapFromIndex(idx) 71 return pm.WalkNodes(ctx, func(ctx context.Context, nd tree.Node) error { 72 if nd.Size() <= 0 { 73 return fmt.Errorf("encountered nil tree.Node") 74 } 75 return nil 76 }) 77 } 78 79 cb := func(n string, t *doltdb.Table, sch schema.Schema) (stop bool, err error) { 80 if sch == nil { 81 return true, fmt.Errorf("expected non-nil schema: %v", sch) 82 } 83 84 rows, err := t.GetRowData(ctx) 85 if err != nil { 86 return true, err 87 } 88 if err = validateIndex(ctx, rows); err != nil { 89 return true, err 90 } 91 92 indexes, err := t.GetIndexSet(ctx) 93 if err != nil { 94 return true, err 95 } 96 err = durable.IterAllIndexes(ctx, sch, indexes, func(_ string, idx durable.Index) error { 97 return validateIndex(ctx, idx) 98 }) 99 if err != nil { 100 return true, err 101 } 102 return 103 } 104 105 return iterDatabaseTables(ctx, db, cb) 106 } 107 108 // validateSecondaryIndexes checks that secondary index contents are consistent 109 // with primary index contents. 110 func validateSecondaryIndexes(ctx context.Context, db sqle.Database) error { 111 cb := func(n string, t *doltdb.Table, sch schema.Schema) (stop bool, err error) { 112 rows, err := t.GetRowData(ctx) 113 if err != nil { 114 return false, err 115 } 116 primary := durable.ProllyMapFromIndex(rows) 117 118 for _, def := range sch.Indexes().AllIndexes() { 119 set, err := t.GetIndexSet(ctx) 120 if err != nil { 121 return true, err 122 } 123 idx, err := set.GetIndex(ctx, sch, def.Name()) 124 if err != nil { 125 return true, err 126 } 127 secondary := durable.ProllyMapFromIndex(idx) 128 129 err = validateIndexConsistency(ctx, sch, def, primary, secondary) 130 if err != nil { 131 return true, err 132 } 133 } 134 return false, nil 135 } 136 return iterDatabaseTables(ctx, db, cb) 137 } 138 139 func validateIndexConsistency( 140 ctx context.Context, 141 sch schema.Schema, 142 def schema.Index, 143 primary, secondary prolly.Map, 144 ) error { 145 if schema.IsKeyless(sch) { 146 return validateKeylessIndex(ctx, sch, def, primary, secondary) 147 } else { 148 return validatePkIndex(ctx, sch, def, primary, secondary) 149 } 150 } 151 152 // printIndexContents prints the contents of |prollyMap| to stdout. Intended for use debugging 153 // index consistency issues. 154 func printIndexContents(ctx context.Context, prollyMap prolly.Map) { 155 fmt.Printf("Secondary index contents:\n") 156 iterAll, _ := prollyMap.IterAll(ctx) 157 for { 158 k, _, err := iterAll.Next(ctx) 159 if err == io.EOF { 160 break 161 } 162 fmt.Printf(" - k: %v \n", k) 163 } 164 } 165 166 func validateKeylessIndex(ctx context.Context, sch schema.Schema, def schema.Index, primary, secondary prolly.Map) error { 167 // Full-Text indexes do not make use of their internal map, so we may safely skip this check 168 if def.IsFullText() { 169 return nil 170 } 171 172 // Indexes on virtual columns cannot be rebuilt via the method below 173 if isVirtualIndex(def, sch) { 174 return nil 175 } 176 177 secondary = prolly.ConvertToSecondaryKeylessIndex(secondary) 178 idxDesc, _ := secondary.Descriptors() 179 builder := val.NewTupleBuilder(idxDesc) 180 mapping := ordinalMappingsForSecondaryIndex(sch, def) 181 _, vd := primary.Descriptors() 182 183 iter, err := primary.IterAll(ctx) 184 if err != nil { 185 return err 186 } 187 188 for { 189 hashId, value, err := iter.Next(ctx) 190 if err == io.EOF { 191 return nil 192 } 193 if err != nil { 194 return err 195 } 196 197 // make secondary index key 198 for i := range mapping { 199 j := mapping.MapOrdinal(i) 200 // first field in |value| is cardinality 201 field := value.GetField(j + 1) 202 203 if shouldDereferenceContent(j+1, vd, i, idxDesc) { 204 field, err = dereferenceContent(ctx, vd, j+1, value, secondary.NodeStore()) 205 if err != nil { 206 return err 207 } 208 } else if def.IsSpatial() { 209 geom, err := dereferenceGeometry(ctx, vd, j+1, value, secondary.NodeStore()) 210 if err != nil { 211 return err 212 } 213 geom, _, err = sqltypes.GeometryType{}.Convert(geom) 214 if err != nil { 215 return err 216 } 217 cell := tree.ZCell(geom.(sqltypes.GeometryValue)) 218 field = cell[:] 219 } 220 221 // Apply prefix lengths if they are configured 222 if len(def.PrefixLengths()) > i { 223 field = trimValueToPrefixLength(field, def.PrefixLengths()[i], vd.Types[j+1].Enc) 224 } 225 226 builder.PutRaw(i, field) 227 } 228 builder.PutRaw(idxDesc.Count()-1, hashId.GetField(0)) 229 k := builder.Build(primary.Pool()) 230 231 ok, err := secondary.Has(ctx, k) 232 if err != nil { 233 return err 234 } 235 if !ok { 236 printIndexContents(ctx, secondary) 237 return fmt.Errorf("index key %s not found in index %s", builder.Desc.Format(k), def.Name()) 238 } 239 } 240 } 241 242 func validatePkIndex(ctx context.Context, sch schema.Schema, def schema.Index, primary, secondary prolly.Map) error { 243 // Full-Text indexes do not make use of their internal map, so we may safely skip this check 244 if def.IsFullText() { 245 return nil 246 } 247 248 // Indexes on virtual columns cannot be rebuilt via the method below 249 if isVirtualIndex(def, sch) { 250 return nil 251 } 252 253 // secondary indexes have empty values 254 idxDesc, _ := secondary.Descriptors() 255 builder := val.NewTupleBuilder(idxDesc) 256 mapping := ordinalMappingsForSecondaryIndex(sch, def) 257 kd, vd := primary.Descriptors() 258 259 // Before we walk through the primary index data and validate that every row in the primary index exists in the 260 // secondary index, we also check that the primary index and secondary index have the same number of rows. 261 // Otherwise, we won't catch if the secondary index has extra, bogus data in it. 262 totalSecondaryCount, err := secondary.Count() 263 if err != nil { 264 return err 265 } 266 totalPrimaryCount, err := primary.Count() 267 if err != nil { 268 return err 269 } 270 if totalSecondaryCount != totalPrimaryCount { 271 return fmt.Errorf("primary index row count (%d) does not match secondary index row count (%d)", 272 totalPrimaryCount, totalSecondaryCount) 273 } 274 275 pkSize := kd.Count() 276 iter, err := primary.IterAll(ctx) 277 if err != nil { 278 return err 279 } 280 281 for { 282 key, value, err := iter.Next(ctx) 283 if err == io.EOF { 284 return nil 285 } 286 if err != nil { 287 return err 288 } 289 290 // make secondary index key 291 for i := range mapping { 292 j := mapping.MapOrdinal(i) 293 if j < pkSize { 294 builder.PutRaw(i, key.GetField(j)) 295 } else { 296 field := value.GetField(j - pkSize) 297 298 if shouldDereferenceContent(j-pkSize, vd, i, idxDesc) { 299 field, err = dereferenceContent(ctx, vd, j-pkSize, value, secondary.NodeStore()) 300 if err != nil { 301 return err 302 } 303 } else if def.IsSpatial() { 304 geom, err := dereferenceGeometry(ctx, vd, j-pkSize, value, secondary.NodeStore()) 305 if err != nil { 306 return err 307 } 308 geom, _, err = sqltypes.GeometryType{}.Convert(geom) 309 if err != nil { 310 return err 311 } 312 cell := tree.ZCell(geom.(sqltypes.GeometryValue)) 313 field = cell[:] 314 } 315 316 // Apply prefix lengths if they are configured 317 if len(def.PrefixLengths()) > i { 318 field = trimValueToPrefixLength(field, def.PrefixLengths()[i], vd.Types[j-pkSize].Enc) 319 } 320 321 builder.PutRaw(i, field) 322 } 323 } 324 k := builder.Build(primary.Pool()) 325 326 ok, err := secondary.Has(ctx, k) 327 if err != nil { 328 return err 329 } 330 if !ok { 331 printIndexContents(ctx, secondary) 332 return fmt.Errorf("index key %v not found in index %s", builder.Desc.Format(k), def.Name()) 333 } 334 } 335 } 336 337 func isVirtualIndex(def schema.Index, sch schema.Schema) bool { 338 for _, colName := range def.ColumnNames() { 339 col, ok := sch.GetAllCols().GetByName(colName) 340 if !ok { 341 panic(fmt.Sprintf("column not found: %s", colName)) 342 } 343 if col.Virtual { 344 return true 345 } 346 } 347 return false 348 } 349 350 // shouldDereferenceContent returns true if address encoded content should be dereferenced when 351 // building a key for a secondary index. This is determined by looking at the encoding of the field 352 // in the main table (|tablePos| and |tableValueDescriptor|) and the encoding of the field in the index 353 // (|indexPos| and |indexKeyDescriptor|) and seeing if one is an address encoding and the other is not. 354 func shouldDereferenceContent(tablePos int, tableValueDescriptor val.TupleDesc, indexPos int, indexKeyDescriptor val.TupleDesc) bool { 355 if tableValueDescriptor.Types[tablePos].Enc == val.StringAddrEnc && indexKeyDescriptor.Types[indexPos].Enc != val.StringAddrEnc { 356 return true 357 } 358 359 if tableValueDescriptor.Types[tablePos].Enc == val.BytesAddrEnc && indexKeyDescriptor.Types[indexPos].Enc != val.BytesAddrEnc { 360 return true 361 } 362 363 return false 364 } 365 366 // dereferenceContent dereferences an address encoded field (e.g. TEXT, BLOB) to load the content 367 // and return a []byte. |tableValueDescriptor| is the tuple descriptor for the value tuple of the main 368 // table, |tablePos| is the field index into the value tuple, and |tuple| is the value tuple from the 369 // main table. 370 func dereferenceContent(ctx context.Context, tableValueDescriptor val.TupleDesc, tablePos int, tuple val.Tuple, ns tree.NodeStore) ([]byte, error) { 371 v, err := tree.GetField(ctx, tableValueDescriptor, tablePos, tuple, ns) 372 if err != nil { 373 return nil, err 374 } 375 if v == nil { 376 return nil, nil 377 } 378 379 switch x := v.(type) { 380 case string: 381 return []byte(x), nil 382 case []byte: 383 return x, nil 384 default: 385 return nil, fmt.Errorf("unexpected type for address encoded content: %T", v) 386 } 387 } 388 389 // dereferenceGeometry dereferences an address encoded geometry field to load the content 390 // and return a GeometryType. |tableValueDescriptor| is the tuple descriptor for the value tuple of the main 391 // table, |tablePos| is the field index into the value tuple, and |tuple| is the value tuple from the 392 // main table. 393 func dereferenceGeometry(ctx context.Context, tableValueDescriptor val.TupleDesc, tablePos int, tuple val.Tuple, ns tree.NodeStore) (interface{}, error) { 394 v, err := tree.GetField(ctx, tableValueDescriptor, tablePos, tuple, ns) 395 if err != nil { 396 return nil, err 397 } 398 if v == nil { 399 return nil, nil 400 } 401 402 switch x := v.(type) { 403 case string: 404 return []byte(x), nil 405 case []byte: 406 return x, nil 407 case sqltypes.Point, sqltypes.LineString, sqltypes.Polygon, sqltypes.MultiPoint, sqltypes.MultiLineString, sqltypes.MultiPolygon, sqltypes.GeometryType, sqltypes.GeomColl: 408 return x, nil 409 default: 410 return nil, fmt.Errorf("unexpected type for address encoded content: %T", v) 411 } 412 } 413 414 // trimValueToPrefixLength trims |value| by truncating the bytes after |prefixLength|. If |prefixLength| 415 // is zero or if |value| is nil, then no trimming is done and |value| is directly returned. The 416 // |encoding| param indicates the original encoding of |value| in the source table. 417 func trimValueToPrefixLength(value []byte, prefixLength uint16, encoding val.Encoding) []byte { 418 if value == nil || prefixLength == 0 { 419 return value 420 } 421 422 if uint16(len(value)) < prefixLength { 423 prefixLength = uint16(len(value)) 424 } 425 426 addTerminatingNullByte := false 427 if encoding == val.BytesAddrEnc || encoding == val.StringAddrEnc { 428 // If the original encoding was for a BLOB or TEXT field, then we need to add 429 // a null byte at the end of the prefix to get it into StringEnc format. 430 addTerminatingNullByte = true 431 } else if prefixLength < uint16(len(value)) { 432 // Otherwise, if we're trimming a StringEnc value, we also need to re-add the 433 // null terminating byte. 434 addTerminatingNullByte = true 435 } 436 437 newValue := make([]byte, prefixLength) 438 copy(newValue, value[:prefixLength]) 439 if addTerminatingNullByte { 440 newValue = append(newValue, byte(0)) 441 } 442 443 return newValue 444 } 445 446 func ordinalMappingsForSecondaryIndex(sch schema.Schema, def schema.Index) (ord val.OrdinalMapping) { 447 // assert empty values for secondary indexes 448 if def.Schema().GetNonPKCols().Size() > 0 { 449 panic("expected empty secondary index values") 450 } 451 452 secondary := def.Schema().GetPKCols() 453 ord = make(val.OrdinalMapping, secondary.Size()) 454 455 for i := range ord { 456 name := secondary.GetByIndex(i).Name 457 ord[i] = -1 458 459 pks := sch.GetPKCols().GetColumns() 460 for j, col := range pks { 461 if col.Name == name { 462 ord[i] = j 463 } 464 } 465 vals := sch.GetNonPKCols().GetColumns() 466 for _, col := range vals { 467 if col.Name == name { 468 storedIdx, ok := sch.GetNonPKCols().StoredIndexByTag(col.Tag) 469 if !ok { 470 panic("column " + name + " not found") 471 } 472 ord[i] = storedIdx + len(pks) 473 } 474 } 475 if ord[i] < 0 { 476 panic("column " + name + " not found") 477 } 478 } 479 return 480 } 481 482 // iterDatabaseTables is a utility to factor out common validation access patterns. 483 func iterDatabaseTables( 484 ctx context.Context, 485 db sqle.Database, 486 cb func(name string, t *doltdb.Table, sch schema.Schema) (bool, error), 487 ) error { 488 ddb := db.GetDoltDB() 489 branches, err := ddb.GetBranches(ctx) 490 if err != nil { 491 return err 492 } 493 494 for _, branchRef := range branches { 495 wsRef, err := ref.WorkingSetRefForHead(branchRef) 496 if err != nil { 497 return err 498 } 499 ws, err := ddb.ResolveWorkingSet(ctx, wsRef) 500 if err != nil { 501 return err 502 } 503 504 r := ws.WorkingRoot() 505 506 if err = r.IterTables(ctx, cb); err != nil { 507 return err 508 } 509 } 510 return nil 511 }