github.com/whtcorpsinc/milevadb-prod@v0.0.0-20211104133533-f57f4be3b597/causetstore/petri/acyclic/causet/embedded/stats.go (about) 1 // Copyright 2020 WHTCORPS INC, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package embedded 15 16 import ( 17 "math" 18 "sort" 19 20 "github.com/whtcorpsinc/BerolinaSQL/allegrosql" 21 "github.com/whtcorpsinc/BerolinaSQL/ast" 22 "github.com/whtcorpsinc/errors" 23 "github.com/whtcorpsinc/milevadb/causet/property" 24 "github.com/whtcorpsinc/milevadb/causet/soliton" 25 "github.com/whtcorpsinc/milevadb/memex" 26 "github.com/whtcorpsinc/milevadb/soliton/logutil" 27 "github.com/whtcorpsinc/milevadb/soliton/ranger" 28 "github.com/whtcorpsinc/milevadb/statistics" 29 "github.com/whtcorpsinc/milevadb/types" 30 "go.uber.org/zap" 31 ) 32 33 func (p *basePhysicalCauset) StatsCount() float64 { 34 return p.stats.RowCount 35 } 36 37 // DeriveStats implement LogicalCauset DeriveStats interface. 38 func (p *LogicalBlockDual) DeriveStats(childStats []*property.StatsInfo, selfSchema *memex.Schema, childSchema []*memex.Schema, _ [][]*memex.DeferredCauset) (*property.StatsInfo, error) { 39 if p.stats != nil { 40 return p.stats, nil 41 } 42 profile := &property.StatsInfo{ 43 RowCount: float64(p.RowCount), 44 Cardinality: make(map[int64]float64, selfSchema.Len()), 45 } 46 for _, col := range selfSchema.DeferredCausets { 47 profile.Cardinality[col.UniqueID] = float64(p.RowCount) 48 } 49 p.stats = profile 50 return p.stats, nil 51 } 52 53 // DeriveStats implement LogicalCauset DeriveStats interface. 54 func (p *LogicalMemBlock) DeriveStats(childStats []*property.StatsInfo, selfSchema *memex.Schema, childSchema []*memex.Schema, _ [][]*memex.DeferredCauset) (*property.StatsInfo, error) { 55 if p.stats != nil { 56 return p.stats, nil 57 } 58 statsBlock := statistics.PseudoBlock(p.BlockInfo) 59 stats := &property.StatsInfo{ 60 RowCount: float64(statsBlock.Count), 61 Cardinality: make(map[int64]float64, len(p.BlockInfo.DeferredCausets)), 62 HistDefCausl: statsBlock.GenerateHistDefCauslFromDeferredCausetInfo(p.BlockInfo.DeferredCausets, p.schemaReplicant.DeferredCausets), 63 StatsVersion: statistics.PseudoVersion, 64 } 65 for _, col := range selfSchema.DeferredCausets { 66 stats.Cardinality[col.UniqueID] = float64(statsBlock.Count) 67 } 68 p.stats = stats 69 return p.stats, nil 70 } 71 72 // DeriveStats implement LogicalCauset DeriveStats interface. 73 func (p *LogicalShow) DeriveStats(childStats []*property.StatsInfo, selfSchema *memex.Schema, childSchema []*memex.Schema, _ [][]*memex.DeferredCauset) (*property.StatsInfo, error) { 74 if p.stats != nil { 75 return p.stats, nil 76 } 77 // A fake count, just to avoid panic now. 78 p.stats = getFakeStats(selfSchema) 79 return p.stats, nil 80 } 81 82 func getFakeStats(schemaReplicant *memex.Schema) *property.StatsInfo { 83 profile := &property.StatsInfo{ 84 RowCount: 1, 85 Cardinality: make(map[int64]float64, schemaReplicant.Len()), 86 } 87 for _, col := range schemaReplicant.DeferredCausets { 88 profile.Cardinality[col.UniqueID] = 1 89 } 90 return profile 91 } 92 93 // DeriveStats implement LogicalCauset DeriveStats interface. 94 func (p *LogicalShowDBSJobs) DeriveStats(childStats []*property.StatsInfo, selfSchema *memex.Schema, childSchema []*memex.Schema, _ [][]*memex.DeferredCauset) (*property.StatsInfo, error) { 95 if p.stats != nil { 96 return p.stats, nil 97 } 98 // A fake count, just to avoid panic now. 99 p.stats = getFakeStats(selfSchema) 100 return p.stats, nil 101 } 102 103 // RecursiveDeriveStats4Test is a exporter just for test. 104 func RecursiveDeriveStats4Test(p LogicalCauset) (*property.StatsInfo, error) { 105 return p.recursiveDeriveStats(nil) 106 } 107 108 // GetStats4Test is a exporter just for test. 109 func GetStats4Test(p LogicalCauset) *property.StatsInfo { 110 return p.statsInfo() 111 } 112 113 func (p *baseLogicalCauset) recursiveDeriveStats(colGroups [][]*memex.DeferredCauset) (*property.StatsInfo, error) { 114 childStats := make([]*property.StatsInfo, len(p.children)) 115 childSchema := make([]*memex.Schema, len(p.children)) 116 cumDefCausGroups := p.self.ExtractDefCausGroups(colGroups) 117 for i, child := range p.children { 118 childProfile, err := child.recursiveDeriveStats(cumDefCausGroups) 119 if err != nil { 120 return nil, err 121 } 122 childStats[i] = childProfile 123 childSchema[i] = child.Schema() 124 } 125 return p.self.DeriveStats(childStats, p.self.Schema(), childSchema, colGroups) 126 } 127 128 // ExtractDefCausGroups implements LogicalCauset ExtractDefCausGroups interface. 129 func (p *baseLogicalCauset) ExtractDefCausGroups(_ [][]*memex.DeferredCauset) [][]*memex.DeferredCauset { 130 return nil 131 } 132 133 // DeriveStats implement LogicalCauset DeriveStats interface. 134 func (p *baseLogicalCauset) DeriveStats(childStats []*property.StatsInfo, selfSchema *memex.Schema, childSchema []*memex.Schema, _ [][]*memex.DeferredCauset) (*property.StatsInfo, error) { 135 if len(childStats) == 1 { 136 p.stats = childStats[0] 137 return p.stats, nil 138 } 139 if len(childStats) > 1 { 140 err := ErrInternal.GenWithStack("LogicalCausets with more than one child should implement their own DeriveStats().") 141 return nil, err 142 } 143 if p.stats != nil { 144 return p.stats, nil 145 } 146 profile := &property.StatsInfo{ 147 RowCount: float64(1), 148 Cardinality: make(map[int64]float64, selfSchema.Len()), 149 } 150 for _, col := range selfSchema.DeferredCausets { 151 profile.Cardinality[col.UniqueID] = 1 152 } 153 p.stats = profile 154 return profile, nil 155 } 156 157 // getDeferredCausetNDV computes estimated NDV of specified column using the original 158 // histogram of `DataSource` which is retrieved from storage(not the derived one). 159 func (ds *DataSource) getDeferredCausetNDV(colID int64) (ndv float64) { 160 hist, ok := ds.statisticBlock.DeferredCausets[colID] 161 if ok && hist.Count > 0 { 162 factor := float64(ds.statisticBlock.Count) / float64(hist.Count) 163 ndv = float64(hist.NDV) * factor 164 } else { 165 ndv = float64(ds.statisticBlock.Count) * distinctFactor 166 } 167 return ndv 168 } 169 170 func (ds *DataSource) getGroupNDVs(colGroups [][]*memex.DeferredCauset) []property.GroupNDV { 171 if colGroups == nil { 172 return nil 173 } 174 tbl := ds.blockStats.HistDefCausl 175 ndvs := make([]property.GroupNDV, 0, len(colGroups)) 176 for idxID, idx := range tbl.Indices { 177 idxDefCauss := make([]int64, len(tbl.Idx2DeferredCausetIDs[idxID])) 178 copy(idxDefCauss, tbl.Idx2DeferredCausetIDs[idxID]) 179 sort.Slice(idxDefCauss, func(i, j int) bool { 180 return idxDefCauss[i] < idxDefCauss[j] 181 }) 182 for _, g := range colGroups { 183 // We only want those exact matches. 184 if len(g) != len(idxDefCauss) { 185 continue 186 } 187 match := true 188 for i, col := range g { 189 // Both slices are sorted according to UniqueID. 190 if col.UniqueID != idxDefCauss[i] { 191 match = false 192 break 193 } 194 } 195 if match { 196 ndv := property.GroupNDV{ 197 DefCauss: idxDefCauss, 198 NDV: float64(idx.NDV), 199 } 200 ndvs = append(ndvs, ndv) 201 break 202 } 203 } 204 } 205 return ndvs 206 } 207 208 func (ds *DataSource) initStats(colGroups [][]*memex.DeferredCauset) { 209 if ds.blockStats != nil { 210 // Reload GroupNDVs since colGroups may have changed. 211 ds.blockStats.GroupNDVs = ds.getGroupNDVs(colGroups) 212 return 213 } 214 if ds.statisticBlock == nil { 215 ds.statisticBlock = getStatsBlock(ds.ctx, ds.blockInfo, ds.causet.Meta().ID) 216 } 217 blockStats := &property.StatsInfo{ 218 RowCount: float64(ds.statisticBlock.Count), 219 Cardinality: make(map[int64]float64, ds.schemaReplicant.Len()), 220 HistDefCausl: ds.statisticBlock.GenerateHistDefCauslFromDeferredCausetInfo(ds.DeferredCausets, ds.schemaReplicant.DeferredCausets), 221 StatsVersion: ds.statisticBlock.Version, 222 } 223 if ds.statisticBlock.Pseudo { 224 blockStats.StatsVersion = statistics.PseudoVersion 225 } 226 for _, col := range ds.schemaReplicant.DeferredCausets { 227 blockStats.Cardinality[col.UniqueID] = ds.getDeferredCausetNDV(col.ID) 228 } 229 ds.blockStats = blockStats 230 ds.blockStats.GroupNDVs = ds.getGroupNDVs(colGroups) 231 ds.TblDefCausHists = ds.statisticBlock.ID2UniqueID(ds.TblDefCauss) 232 } 233 234 func (ds *DataSource) deriveStatsByFilter(conds memex.CNFExprs, filledPaths []*soliton.AccessPath) *property.StatsInfo { 235 selectivity, nodes, err := ds.blockStats.HistDefCausl.Selectivity(ds.ctx, conds, filledPaths) 236 if err != nil { 237 logutil.BgLogger().Debug("something wrong happened, use the default selectivity", zap.Error(err)) 238 selectivity = SelectionFactor 239 } 240 stats := ds.blockStats.Scale(selectivity) 241 if ds.ctx.GetStochastikVars().OptimizerSelectivityLevel >= 1 { 242 stats.HistDefCausl = stats.HistDefCausl.NewHistDefCauslBySelectivity(ds.ctx.GetStochastikVars().StmtCtx, nodes) 243 } 244 return stats 245 } 246 247 // DeriveStats implement LogicalCauset DeriveStats interface. 248 func (ds *DataSource) DeriveStats(childStats []*property.StatsInfo, selfSchema *memex.Schema, childSchema []*memex.Schema, colGroups [][]*memex.DeferredCauset) (*property.StatsInfo, error) { 249 if ds.stats != nil && len(colGroups) == 0 { 250 return ds.stats, nil 251 } 252 ds.initStats(colGroups) 253 if ds.stats != nil { 254 // Just reload the GroupNDVs. 255 selectivity := ds.stats.RowCount / ds.blockStats.RowCount 256 ds.stats = ds.blockStats.Scale(selectivity) 257 return ds.stats, nil 258 } 259 // PushDownNot here can convert query 'not (a != 1)' to 'a = 1'. 260 for i, expr := range ds.pushedDownConds { 261 ds.pushedDownConds[i] = memex.PushDownNot(ds.ctx, expr) 262 } 263 for _, path := range ds.possibleAccessPaths { 264 if path.IsBlockPath() { 265 continue 266 } 267 err := ds.fillIndexPath(path, ds.pushedDownConds) 268 if err != nil { 269 return nil, err 270 } 271 } 272 ds.stats = ds.deriveStatsByFilter(ds.pushedDownConds, ds.possibleAccessPaths) 273 for _, path := range ds.possibleAccessPaths { 274 if path.IsBlockPath() { 275 noIntervalRanges, err := ds.deriveBlockPathStats(path, ds.pushedDownConds, false) 276 if err != nil { 277 return nil, err 278 } 279 // If we have point or empty range, just remove other possible paths. 280 if noIntervalRanges || len(path.Ranges) == 0 { 281 ds.possibleAccessPaths[0] = path 282 ds.possibleAccessPaths = ds.possibleAccessPaths[:1] 283 break 284 } 285 continue 286 } 287 noIntervalRanges := ds.deriveIndexPathStats(path, ds.pushedDownConds, false) 288 // If we have empty range, or point range on unique index, just remove other possible paths. 289 if (noIntervalRanges && path.Index.Unique) || len(path.Ranges) == 0 { 290 ds.possibleAccessPaths[0] = path 291 ds.possibleAccessPaths = ds.possibleAccessPaths[:1] 292 break 293 } 294 } 295 296 // TODO: implement UnionScan + IndexMerge 297 isReadOnlyTxn := true 298 txn, err := ds.ctx.Txn(false) 299 if err != nil { 300 return nil, err 301 } 302 if txn.Valid() && !txn.IsReadOnly() { 303 isReadOnlyTxn = false 304 } 305 // Consider the IndexMergePath. Now, we just generate `IndexMergePath` in DNF case. 306 isPossibleIdxMerge := len(ds.pushedDownConds) > 0 && len(ds.possibleAccessPaths) > 1 307 stochastikAndStmtPermission := (ds.ctx.GetStochastikVars().GetEnableIndexMerge() || len(ds.indexMergeHints) > 0) && !ds.ctx.GetStochastikVars().StmtCtx.NoIndexMergeHint 308 // If there is an index path, we current do not consider `IndexMergePath`. 309 needConsiderIndexMerge := true 310 for i := 1; i < len(ds.possibleAccessPaths); i++ { 311 if len(ds.possibleAccessPaths[i].AccessConds) != 0 { 312 needConsiderIndexMerge = false 313 break 314 } 315 } 316 if isPossibleIdxMerge && stochastikAndStmtPermission && needConsiderIndexMerge && isReadOnlyTxn { 317 ds.generateAndPruneIndexMergePath(ds.indexMergeHints != nil) 318 } else if len(ds.indexMergeHints) > 0 { 319 ds.indexMergeHints = nil 320 ds.ctx.GetStochastikVars().StmtCtx.AppendWarning(errors.Errorf("IndexMerge is inapplicable or disabled")) 321 } 322 return ds.stats, nil 323 } 324 325 func (ds *DataSource) generateAndPruneIndexMergePath(needPrune bool) { 326 regularPathCount := len(ds.possibleAccessPaths) 327 ds.generateIndexMergeOrPaths() 328 // If without hints, it means that `enableIndexMerge` is true 329 if len(ds.indexMergeHints) == 0 { 330 return 331 } 332 // With hints and without generated IndexMerge paths 333 if regularPathCount == len(ds.possibleAccessPaths) { 334 ds.indexMergeHints = nil 335 ds.ctx.GetStochastikVars().StmtCtx.AppendWarning(errors.Errorf("IndexMerge is inapplicable or disabled")) 336 return 337 } 338 // Do not need to consider the regular paths in find_best_task(). 339 if needPrune { 340 ds.possibleAccessPaths = ds.possibleAccessPaths[regularPathCount:] 341 } 342 } 343 344 // DeriveStats implements LogicalCauset DeriveStats interface. 345 func (ts *LogicalBlockScan) DeriveStats(childStats []*property.StatsInfo, selfSchema *memex.Schema, childSchema []*memex.Schema, _ [][]*memex.DeferredCauset) (_ *property.StatsInfo, err error) { 346 ts.Source.initStats(nil) 347 // PushDownNot here can convert query 'not (a != 1)' to 'a = 1'. 348 for i, expr := range ts.AccessConds { 349 // TODO The memexs may be shared by BlockScan and several IndexScans, there would be redundant 350 // `PushDownNot` function call in multiple `DeriveStats` then. 351 ts.AccessConds[i] = memex.PushDownNot(ts.ctx, expr) 352 } 353 ts.stats = ts.Source.deriveStatsByFilter(ts.AccessConds, nil) 354 sc := ts.SCtx().GetStochastikVars().StmtCtx 355 // ts.Handle could be nil if PK is Handle, and PK column has been pruned. 356 // TODO: support clustered index. 357 if ts.HandleDefCauss != nil { 358 ts.Ranges, err = ranger.BuildBlockRange(ts.AccessConds, sc, ts.HandleDefCauss.GetDefCaus(0).RetType) 359 } else { 360 isUnsigned := false 361 if ts.Source.blockInfo.PKIsHandle { 362 if pkDefCausInfo := ts.Source.blockInfo.GetPkDefCausInfo(); pkDefCausInfo != nil { 363 isUnsigned = allegrosql.HasUnsignedFlag(pkDefCausInfo.Flag) 364 } 365 } 366 ts.Ranges = ranger.FullIntRange(isUnsigned) 367 } 368 if err != nil { 369 return nil, err 370 } 371 return ts.stats, nil 372 } 373 374 // DeriveStats implements LogicalCauset DeriveStats interface. 375 func (is *LogicalIndexScan) DeriveStats(childStats []*property.StatsInfo, selfSchema *memex.Schema, childSchema []*memex.Schema, _ [][]*memex.DeferredCauset) (*property.StatsInfo, error) { 376 is.Source.initStats(nil) 377 for i, expr := range is.AccessConds { 378 is.AccessConds[i] = memex.PushDownNot(is.ctx, expr) 379 } 380 is.stats = is.Source.deriveStatsByFilter(is.AccessConds, nil) 381 if len(is.AccessConds) == 0 { 382 is.Ranges = ranger.FullRange() 383 } 384 is.IdxDefCauss, is.IdxDefCausLens = memex.IndexInfo2PrefixDefCauss(is.DeferredCausets, selfSchema.DeferredCausets, is.Index) 385 is.FullIdxDefCauss, is.FullIdxDefCausLens = memex.IndexInfo2DefCauss(is.DeferredCausets, selfSchema.DeferredCausets, is.Index) 386 if !is.Index.Unique && !is.Index.Primary && len(is.Index.DeferredCausets) == len(is.IdxDefCauss) { 387 handleDefCaus := is.getPKIsHandleDefCaus(selfSchema) 388 if handleDefCaus != nil && !allegrosql.HasUnsignedFlag(handleDefCaus.RetType.Flag) { 389 is.IdxDefCauss = append(is.IdxDefCauss, handleDefCaus) 390 is.IdxDefCausLens = append(is.IdxDefCausLens, types.UnspecifiedLength) 391 } 392 } 393 return is.stats, nil 394 } 395 396 // getIndexMergeOrPath generates all possible IndexMergeOrPaths. 397 func (ds *DataSource) generateIndexMergeOrPaths() { 398 usedIndexCount := len(ds.possibleAccessPaths) 399 for i, cond := range ds.pushedDownConds { 400 sf, ok := cond.(*memex.ScalarFunction) 401 if !ok || sf.FuncName.L != ast.LogicOr { 402 continue 403 } 404 var partialPaths = make([]*soliton.AccessPath, 0, usedIndexCount) 405 dnfItems := memex.FlattenDNFConditions(sf) 406 for _, item := range dnfItems { 407 cnfItems := memex.SplitCNFItems(item) 408 itemPaths := ds.accessPathsForConds(cnfItems, usedIndexCount) 409 if len(itemPaths) == 0 { 410 partialPaths = nil 411 break 412 } 413 partialPath := ds.buildIndexMergePartialPath(itemPaths) 414 if partialPath == nil { 415 partialPaths = nil 416 break 417 } 418 partialPaths = append(partialPaths, partialPath) 419 } 420 if len(partialPaths) > 1 { 421 possiblePath := ds.buildIndexMergeOrPath(partialPaths, i) 422 if possiblePath != nil { 423 ds.possibleAccessPaths = append(ds.possibleAccessPaths, possiblePath) 424 } 425 } 426 } 427 } 428 429 // isInIndexMergeHints checks whether current index or primary key is in IndexMerge hints. 430 func (ds *DataSource) isInIndexMergeHints(name string) bool { 431 if len(ds.indexMergeHints) == 0 { 432 return true 433 } 434 for _, hint := range ds.indexMergeHints { 435 if hint.indexHint == nil || len(hint.indexHint.IndexNames) == 0 { 436 return true 437 } 438 for _, hintName := range hint.indexHint.IndexNames { 439 if name == hintName.String() { 440 return true 441 } 442 } 443 } 444 return false 445 } 446 447 // accessPathsForConds generates all possible index paths for conditions. 448 func (ds *DataSource) accessPathsForConds(conditions []memex.Expression, usedIndexCount int) []*soliton.AccessPath { 449 var results = make([]*soliton.AccessPath, 0, usedIndexCount) 450 for i := 0; i < usedIndexCount; i++ { 451 path := &soliton.AccessPath{} 452 if ds.possibleAccessPaths[i].IsBlockPath() { 453 if !ds.isInIndexMergeHints("primary") { 454 continue 455 } 456 if ds.blockInfo.IsCommonHandle { 457 path.IsCommonHandlePath = true 458 path.Index = ds.possibleAccessPaths[i].Index 459 } else { 460 path.IsIntHandlePath = true 461 } 462 noIntervalRanges, err := ds.deriveBlockPathStats(path, conditions, true) 463 if err != nil { 464 logutil.BgLogger().Debug("can not derive statistics of a path", zap.Error(err)) 465 continue 466 } 467 if len(path.BlockFilters) > 0 || len(path.AccessConds) == 0 { 468 // If AccessConds is empty or blockFilter is not empty, we ignore the access path. 469 // Now these conditions are too strict. 470 // For example, a allegrosql `select * from t where a > 1 or (b < 2 and c > 3)` and causet `t` with indexes 471 // on a and b separately. we can generate a `IndexMergePath` with causet filter `a > 1 or (b < 2 and c > 3)`. 472 // TODO: solve the above case 473 continue 474 } 475 // If we have point or empty range, just remove other possible paths. 476 if noIntervalRanges || len(path.Ranges) == 0 { 477 if len(results) == 0 { 478 results = append(results, path) 479 } else { 480 results[0] = path 481 results = results[:1] 482 } 483 break 484 } 485 } else { 486 path.Index = ds.possibleAccessPaths[i].Index 487 if !ds.isInIndexMergeHints(path.Index.Name.L) { 488 continue 489 } 490 err := ds.fillIndexPath(path, conditions) 491 if err != nil { 492 logutil.BgLogger().Debug("can not derive statistics of a path", zap.Error(err)) 493 continue 494 } 495 noIntervalRanges := ds.deriveIndexPathStats(path, conditions, true) 496 if len(path.BlockFilters) > 0 || len(path.AccessConds) == 0 { 497 // If AccessConds is empty or blockFilter is not empty, we ignore the access path. 498 // Now these conditions are too strict. 499 // For example, a allegrosql `select * from t where a > 1 or (b < 2 and c > 3)` and causet `t` with indexes 500 // on a and b separately. we can generate a `IndexMergePath` with causet filter `a > 1 or (b < 2 and c > 3)`. 501 // TODO: solve the above case 502 continue 503 } 504 // If we have empty range, or point range on unique index, just remove other possible paths. 505 if (noIntervalRanges && path.Index.Unique) || len(path.Ranges) == 0 { 506 if len(results) == 0 { 507 results = append(results, path) 508 } else { 509 results[0] = path 510 results = results[:1] 511 } 512 break 513 } 514 } 515 results = append(results, path) 516 } 517 return results 518 } 519 520 // buildIndexMergePartialPath chooses the best index path from all possible paths. 521 // Now we just choose the index with most columns. 522 // We should improve this strategy, because it is not always better to choose index 523 // with most columns, e.g, filter is c > 1 and the input indexes are c and c_d_e, 524 // the former one is enough, and it is less expensive in execution compared with the latter one. 525 // TODO: improve strategy of the partial path selection 526 func (ds *DataSource) buildIndexMergePartialPath(indexAccessPaths []*soliton.AccessPath) *soliton.AccessPath { 527 if len(indexAccessPaths) == 1 { 528 return indexAccessPaths[0] 529 } 530 531 maxDefCaussIndex := 0 532 maxDefCauss := len(indexAccessPaths[0].IdxDefCauss) 533 for i := 1; i < len(indexAccessPaths); i++ { 534 current := len(indexAccessPaths[i].IdxDefCauss) 535 if current > maxDefCauss { 536 maxDefCaussIndex = i 537 maxDefCauss = current 538 } 539 } 540 return indexAccessPaths[maxDefCaussIndex] 541 } 542 543 // buildIndexMergeOrPath generates one possible IndexMergePath. 544 func (ds *DataSource) buildIndexMergeOrPath(partialPaths []*soliton.AccessPath, current int) *soliton.AccessPath { 545 indexMergePath := &soliton.AccessPath{PartialIndexPaths: partialPaths} 546 indexMergePath.BlockFilters = append(indexMergePath.BlockFilters, ds.pushedDownConds[:current]...) 547 indexMergePath.BlockFilters = append(indexMergePath.BlockFilters, ds.pushedDownConds[current+1:]...) 548 return indexMergePath 549 } 550 551 // DeriveStats implement LogicalCauset DeriveStats interface. 552 func (p *LogicalSelection) DeriveStats(childStats []*property.StatsInfo, selfSchema *memex.Schema, childSchema []*memex.Schema, _ [][]*memex.DeferredCauset) (*property.StatsInfo, error) { 553 if p.stats != nil { 554 return p.stats, nil 555 } 556 p.stats = childStats[0].Scale(SelectionFactor) 557 p.stats.GroupNDVs = nil 558 return p.stats, nil 559 } 560 561 // DeriveStats implement LogicalCauset DeriveStats interface. 562 func (p *LogicalUnionAll) DeriveStats(childStats []*property.StatsInfo, selfSchema *memex.Schema, childSchema []*memex.Schema, _ [][]*memex.DeferredCauset) (*property.StatsInfo, error) { 563 if p.stats != nil { 564 return p.stats, nil 565 } 566 p.stats = &property.StatsInfo{ 567 Cardinality: make(map[int64]float64, selfSchema.Len()), 568 } 569 for _, childProfile := range childStats { 570 p.stats.RowCount += childProfile.RowCount 571 for _, col := range selfSchema.DeferredCausets { 572 p.stats.Cardinality[col.UniqueID] += childProfile.Cardinality[col.UniqueID] 573 } 574 } 575 return p.stats, nil 576 } 577 578 func deriveLimitStats(childProfile *property.StatsInfo, limitCount float64) *property.StatsInfo { 579 stats := &property.StatsInfo{ 580 RowCount: math.Min(limitCount, childProfile.RowCount), 581 Cardinality: make(map[int64]float64, len(childProfile.Cardinality)), 582 } 583 for id, c := range childProfile.Cardinality { 584 stats.Cardinality[id] = math.Min(c, stats.RowCount) 585 } 586 return stats 587 } 588 589 // DeriveStats implement LogicalCauset DeriveStats interface. 590 func (p *LogicalLimit) DeriveStats(childStats []*property.StatsInfo, selfSchema *memex.Schema, childSchema []*memex.Schema, _ [][]*memex.DeferredCauset) (*property.StatsInfo, error) { 591 if p.stats != nil { 592 return p.stats, nil 593 } 594 p.stats = deriveLimitStats(childStats[0], float64(p.Count)) 595 return p.stats, nil 596 } 597 598 // DeriveStats implement LogicalCauset DeriveStats interface. 599 func (lt *LogicalTopN) DeriveStats(childStats []*property.StatsInfo, selfSchema *memex.Schema, childSchema []*memex.Schema, _ [][]*memex.DeferredCauset) (*property.StatsInfo, error) { 600 if lt.stats != nil { 601 return lt.stats, nil 602 } 603 lt.stats = deriveLimitStats(childStats[0], float64(lt.Count)) 604 return lt.stats, nil 605 } 606 607 // getCardinality will return the Cardinality of a couple of columns. We simply return the max one, because we cannot know 608 // the Cardinality for multi-dimension attributes properly. This is a simple and naive scheme of Cardinality estimation. 609 func getCardinality(defcaus []*memex.DeferredCauset, schemaReplicant *memex.Schema, profile *property.StatsInfo) float64 { 610 cardinality := 1.0 611 indices := schemaReplicant.DeferredCausetsIndices(defcaus) 612 if indices == nil { 613 logutil.BgLogger().Error("column not found in schemaReplicant", zap.Any("columns", defcaus), zap.String("schemaReplicant", schemaReplicant.String())) 614 return cardinality 615 } 616 for _, idx := range indices { 617 // It is a very elementary estimation. 618 col := schemaReplicant.DeferredCausets[idx] 619 cardinality = math.Max(cardinality, profile.Cardinality[col.UniqueID]) 620 } 621 return cardinality 622 } 623 624 func (p *LogicalProjection) getGroupNDVs(colGroups [][]*memex.DeferredCauset, childProfile *property.StatsInfo, selfSchema *memex.Schema) []property.GroupNDV { 625 if len(colGroups) == 0 || len(childProfile.GroupNDVs) == 0 { 626 return nil 627 } 628 exprDefCaus2ProjDefCaus := make(map[int64]int64) 629 for i, expr := range p.Exprs { 630 exprDefCaus, ok := expr.(*memex.DeferredCauset) 631 if !ok { 632 continue 633 } 634 exprDefCaus2ProjDefCaus[exprDefCaus.UniqueID] = selfSchema.DeferredCausets[i].UniqueID 635 } 636 ndvs := make([]property.GroupNDV, 0, len(childProfile.GroupNDVs)) 637 for _, childGroupNDV := range childProfile.GroupNDVs { 638 projDefCauss := make([]int64, len(childGroupNDV.DefCauss)) 639 for i, col := range childGroupNDV.DefCauss { 640 projDefCaus, ok := exprDefCaus2ProjDefCaus[col] 641 if !ok { 642 projDefCauss = nil 643 break 644 } 645 projDefCauss[i] = projDefCaus 646 } 647 if projDefCauss == nil { 648 continue 649 } 650 sort.Slice(projDefCauss, func(i, j int) bool { 651 return projDefCauss[i] < projDefCauss[j] 652 }) 653 groupNDV := property.GroupNDV{ 654 DefCauss: projDefCauss, 655 NDV: childGroupNDV.NDV, 656 } 657 ndvs = append(ndvs, groupNDV) 658 } 659 return ndvs 660 } 661 662 // DeriveStats implement LogicalCauset DeriveStats interface. 663 func (p *LogicalProjection) DeriveStats(childStats []*property.StatsInfo, selfSchema *memex.Schema, childSchema []*memex.Schema, colGroups [][]*memex.DeferredCauset) (*property.StatsInfo, error) { 664 childProfile := childStats[0] 665 if p.stats != nil { 666 // Reload GroupNDVs since colGroups may have changed. 667 p.stats.GroupNDVs = p.getGroupNDVs(colGroups, childProfile, selfSchema) 668 return p.stats, nil 669 } 670 p.stats = &property.StatsInfo{ 671 RowCount: childProfile.RowCount, 672 Cardinality: make(map[int64]float64, len(p.Exprs)), 673 } 674 for i, expr := range p.Exprs { 675 defcaus := memex.ExtractDeferredCausets(expr) 676 p.stats.Cardinality[selfSchema.DeferredCausets[i].UniqueID] = getCardinality(defcaus, childSchema[0], childProfile) 677 } 678 p.stats.GroupNDVs = p.getGroupNDVs(colGroups, childProfile, selfSchema) 679 return p.stats, nil 680 } 681 682 // ExtractDefCausGroups implements LogicalCauset ExtractDefCausGroups interface. 683 func (p *LogicalProjection) ExtractDefCausGroups(colGroups [][]*memex.DeferredCauset) [][]*memex.DeferredCauset { 684 if len(colGroups) == 0 { 685 return nil 686 } 687 extDefCausGroups, _ := p.Schema().ExtractDefCausGroups(colGroups) 688 if len(extDefCausGroups) == 0 { 689 return nil 690 } 691 extracted := make([][]*memex.DeferredCauset, 0, len(extDefCausGroups)) 692 for _, defcaus := range extDefCausGroups { 693 exprs := make([]*memex.DeferredCauset, len(defcaus)) 694 allDefCauss := true 695 for i, offset := range defcaus { 696 col, ok := p.Exprs[offset].(*memex.DeferredCauset) 697 // TODO: for functional dependent projections like `col1 + 1` -> `col2`, we can maintain GroupNDVs actually. 698 if !ok { 699 allDefCauss = false 700 break 701 } 702 exprs[i] = col 703 } 704 if allDefCauss { 705 extracted = append(extracted, memex.SortDeferredCausets(exprs)) 706 } 707 } 708 return extracted 709 } 710 711 func (la *LogicalAggregation) getGroupNDVs(colGroups [][]*memex.DeferredCauset, childProfile *property.StatsInfo, selfSchema *memex.Schema, gbyDefCauss []*memex.DeferredCauset) []property.GroupNDV { 712 if len(colGroups) == 0 || len(childProfile.GroupNDVs) == 0 { 713 return nil 714 } 715 // Check if the child profile provides GroupNDV for the GROUP BY columns. 716 // Note that gbyDefCauss may not be the exact GROUP BY columns, e.g, GROUP BY a+b, 717 // but we have no other approaches for the cardinality estimation of these cases 718 // except for using the independent assumption, unless we can use stats of memex index. 719 gbyDefCauss = memex.SortDeferredCausets(gbyDefCauss) 720 for _, groupNDV := range childProfile.GroupNDVs { 721 if len(gbyDefCauss) != len(groupNDV.DefCauss) { 722 continue 723 } 724 match := true 725 for i, col := range groupNDV.DefCauss { 726 if col != gbyDefCauss[i].UniqueID { 727 match = false 728 break 729 } 730 } 731 if match { 732 return []property.GroupNDV{groupNDV} 733 } 734 } 735 return nil 736 } 737 738 // DeriveStats implement LogicalCauset DeriveStats interface. 739 func (la *LogicalAggregation) DeriveStats(childStats []*property.StatsInfo, selfSchema *memex.Schema, childSchema []*memex.Schema, colGroups [][]*memex.DeferredCauset) (*property.StatsInfo, error) { 740 childProfile := childStats[0] 741 gbyDefCauss := make([]*memex.DeferredCauset, 0, len(la.GroupByItems)) 742 for _, gbyExpr := range la.GroupByItems { 743 defcaus := memex.ExtractDeferredCausets(gbyExpr) 744 gbyDefCauss = append(gbyDefCauss, defcaus...) 745 } 746 if la.stats != nil { 747 // Reload GroupNDVs since colGroups may have changed. 748 la.stats.GroupNDVs = la.getGroupNDVs(colGroups, childProfile, selfSchema, gbyDefCauss) 749 return la.stats, nil 750 } 751 cardinality := getCardinality(gbyDefCauss, childSchema[0], childProfile) 752 la.stats = &property.StatsInfo{ 753 RowCount: cardinality, 754 Cardinality: make(map[int64]float64, selfSchema.Len()), 755 } 756 // We cannot estimate the Cardinality for every output, so we use a conservative strategy. 757 for _, col := range selfSchema.DeferredCausets { 758 la.stats.Cardinality[col.UniqueID] = cardinality 759 } 760 la.inputCount = childProfile.RowCount 761 la.stats.GroupNDVs = la.getGroupNDVs(colGroups, childProfile, selfSchema, gbyDefCauss) 762 return la.stats, nil 763 } 764 765 // ExtractDefCausGroups implements LogicalCauset ExtractDefCausGroups interface. 766 func (la *LogicalAggregation) ExtractDefCausGroups(_ [][]*memex.DeferredCauset) [][]*memex.DeferredCauset { 767 // Parent colGroups would be dicarded, because aggregation would make NDV of colGroups 768 // which does not match GroupByItems invalid. 769 // Note that gbyDefCauss may not be the exact GROUP BY columns, e.g, GROUP BY a+b, 770 // but we have no other approaches for the cardinality estimation of these cases 771 // except for using the independent assumption, unless we can use stats of memex index. 772 gbyDefCauss := make([]*memex.DeferredCauset, 0, len(la.GroupByItems)) 773 for _, gbyExpr := range la.GroupByItems { 774 defcaus := memex.ExtractDeferredCausets(gbyExpr) 775 gbyDefCauss = append(gbyDefCauss, defcaus...) 776 } 777 if len(gbyDefCauss) > 0 { 778 return [][]*memex.DeferredCauset{memex.SortDeferredCausets(gbyDefCauss)} 779 } 780 return nil 781 } 782 783 func (p *LogicalJoin) getGroupNDVs(colGroups [][]*memex.DeferredCauset, childStats []*property.StatsInfo) []property.GroupNDV { 784 outerIdx := int(-1) 785 if p.JoinType == LeftOuterJoin || p.JoinType == LeftOuterSemiJoin || p.JoinType == AntiLeftOuterSemiJoin { 786 outerIdx = 0 787 } else if p.JoinType == RightOuterJoin { 788 outerIdx = 1 789 } 790 if outerIdx >= 0 && len(colGroups) > 0 { 791 return childStats[outerIdx].GroupNDVs 792 } 793 return nil 794 } 795 796 // DeriveStats implement LogicalCauset DeriveStats interface. 797 // If the type of join is SemiJoin, the selectivity of it will be same as selection's. 798 // If the type of join is LeftOuterSemiJoin, it will not add or remove any event. The last column is a boolean value, whose Cardinality should be two. 799 // If the type of join is inner/outer join, the output of join(s, t) should be N(s) * N(t) / (V(s.key) * V(t.key)) * Min(s.key, t.key). 800 // N(s) stands for the number of rows in relation s. V(s.key) means the Cardinality of join key in s. 801 // This is a quite simple strategy: We assume every bucket of relation which will participate join has the same number of rows, and apply cross join for 802 // every matched bucket. 803 func (p *LogicalJoin) DeriveStats(childStats []*property.StatsInfo, selfSchema *memex.Schema, childSchema []*memex.Schema, colGroups [][]*memex.DeferredCauset) (*property.StatsInfo, error) { 804 if p.stats != nil { 805 // Reload GroupNDVs since colGroups may have changed. 806 p.stats.GroupNDVs = p.getGroupNDVs(colGroups, childStats) 807 return p.stats, nil 808 } 809 leftProfile, rightProfile := childStats[0], childStats[1] 810 leftJoinKeys, rightJoinKeys, _, _ := p.GetJoinKeys() 811 helper := &fullJoinRowCountHelper{ 812 cartesian: 0 == len(p.EqualConditions), 813 leftProfile: leftProfile, 814 rightProfile: rightProfile, 815 leftJoinKeys: leftJoinKeys, 816 rightJoinKeys: rightJoinKeys, 817 leftSchema: childSchema[0], 818 rightSchema: childSchema[1], 819 } 820 p.equalCondOutCnt = helper.estimate() 821 if p.JoinType == SemiJoin || p.JoinType == AntiSemiJoin { 822 p.stats = &property.StatsInfo{ 823 RowCount: leftProfile.RowCount * SelectionFactor, 824 Cardinality: make(map[int64]float64, len(leftProfile.Cardinality)), 825 } 826 for id, c := range leftProfile.Cardinality { 827 p.stats.Cardinality[id] = c * SelectionFactor 828 } 829 return p.stats, nil 830 } 831 if p.JoinType == LeftOuterSemiJoin || p.JoinType == AntiLeftOuterSemiJoin { 832 p.stats = &property.StatsInfo{ 833 RowCount: leftProfile.RowCount, 834 Cardinality: make(map[int64]float64, selfSchema.Len()), 835 } 836 for id, c := range leftProfile.Cardinality { 837 p.stats.Cardinality[id] = c 838 } 839 p.stats.Cardinality[selfSchema.DeferredCausets[selfSchema.Len()-1].UniqueID] = 2.0 840 p.stats.GroupNDVs = p.getGroupNDVs(colGroups, childStats) 841 return p.stats, nil 842 } 843 count := p.equalCondOutCnt 844 if p.JoinType == LeftOuterJoin { 845 count = math.Max(count, leftProfile.RowCount) 846 } else if p.JoinType == RightOuterJoin { 847 count = math.Max(count, rightProfile.RowCount) 848 } 849 cardinality := make(map[int64]float64, selfSchema.Len()) 850 for id, c := range leftProfile.Cardinality { 851 cardinality[id] = math.Min(c, count) 852 } 853 for id, c := range rightProfile.Cardinality { 854 cardinality[id] = math.Min(c, count) 855 } 856 p.stats = &property.StatsInfo{ 857 RowCount: count, 858 Cardinality: cardinality, 859 } 860 p.stats.GroupNDVs = p.getGroupNDVs(colGroups, childStats) 861 return p.stats, nil 862 } 863 864 // ExtractDefCausGroups implements LogicalCauset ExtractDefCausGroups interface. 865 func (p *LogicalJoin) ExtractDefCausGroups(colGroups [][]*memex.DeferredCauset) [][]*memex.DeferredCauset { 866 leftJoinKeys, rightJoinKeys, _, _ := p.GetJoinKeys() 867 extracted := make([][]*memex.DeferredCauset, 0, 2+len(colGroups)) 868 if len(leftJoinKeys) > 1 && (p.JoinType == InnerJoin || p.JoinType == LeftOuterJoin || p.JoinType == RightOuterJoin) { 869 extracted = append(extracted, memex.SortDeferredCausets(leftJoinKeys), memex.SortDeferredCausets(rightJoinKeys)) 870 } 871 var outerSchema *memex.Schema 872 if p.JoinType == LeftOuterJoin || p.JoinType == LeftOuterSemiJoin || p.JoinType == AntiLeftOuterSemiJoin { 873 outerSchema = p.Children()[0].Schema() 874 } else if p.JoinType == RightOuterJoin { 875 outerSchema = p.Children()[1].Schema() 876 } 877 if len(colGroups) == 0 || outerSchema == nil { 878 return extracted 879 } 880 _, offsets := outerSchema.ExtractDefCausGroups(colGroups) 881 if len(offsets) == 0 { 882 return extracted 883 } 884 for _, offset := range offsets { 885 extracted = append(extracted, colGroups[offset]) 886 } 887 return extracted 888 } 889 890 type fullJoinRowCountHelper struct { 891 cartesian bool 892 leftProfile *property.StatsInfo 893 rightProfile *property.StatsInfo 894 leftJoinKeys []*memex.DeferredCauset 895 rightJoinKeys []*memex.DeferredCauset 896 leftSchema *memex.Schema 897 rightSchema *memex.Schema 898 } 899 900 func (h *fullJoinRowCountHelper) estimate() float64 { 901 if h.cartesian { 902 return h.leftProfile.RowCount * h.rightProfile.RowCount 903 } 904 leftKeyCardinality := getCardinality(h.leftJoinKeys, h.leftSchema, h.leftProfile) 905 rightKeyCardinality := getCardinality(h.rightJoinKeys, h.rightSchema, h.rightProfile) 906 count := h.leftProfile.RowCount * h.rightProfile.RowCount / math.Max(leftKeyCardinality, rightKeyCardinality) 907 return count 908 } 909 910 func (la *LogicalApply) getGroupNDVs(colGroups [][]*memex.DeferredCauset, childStats []*property.StatsInfo) []property.GroupNDV { 911 if len(colGroups) > 0 && (la.JoinType == LeftOuterSemiJoin || la.JoinType == AntiLeftOuterSemiJoin || la.JoinType == LeftOuterJoin) { 912 return childStats[0].GroupNDVs 913 } 914 return nil 915 } 916 917 // DeriveStats implement LogicalCauset DeriveStats interface. 918 func (la *LogicalApply) DeriveStats(childStats []*property.StatsInfo, selfSchema *memex.Schema, childSchema []*memex.Schema, colGroups [][]*memex.DeferredCauset) (*property.StatsInfo, error) { 919 if la.stats != nil { 920 // Reload GroupNDVs since colGroups may have changed. 921 la.stats.GroupNDVs = la.getGroupNDVs(colGroups, childStats) 922 return la.stats, nil 923 } 924 leftProfile := childStats[0] 925 la.stats = &property.StatsInfo{ 926 RowCount: leftProfile.RowCount, 927 Cardinality: make(map[int64]float64, selfSchema.Len()), 928 } 929 for id, c := range leftProfile.Cardinality { 930 la.stats.Cardinality[id] = c 931 } 932 if la.JoinType == LeftOuterSemiJoin || la.JoinType == AntiLeftOuterSemiJoin { 933 la.stats.Cardinality[selfSchema.DeferredCausets[selfSchema.Len()-1].UniqueID] = 2.0 934 } else { 935 for i := childSchema[0].Len(); i < selfSchema.Len(); i++ { 936 la.stats.Cardinality[selfSchema.DeferredCausets[i].UniqueID] = leftProfile.RowCount 937 } 938 } 939 la.stats.GroupNDVs = la.getGroupNDVs(colGroups, childStats) 940 return la.stats, nil 941 } 942 943 // ExtractDefCausGroups implements LogicalCauset ExtractDefCausGroups interface. 944 func (la *LogicalApply) ExtractDefCausGroups(colGroups [][]*memex.DeferredCauset) [][]*memex.DeferredCauset { 945 var outerSchema *memex.Schema 946 // Apply doesn't have RightOuterJoin. 947 if la.JoinType == LeftOuterJoin || la.JoinType == LeftOuterSemiJoin || la.JoinType == AntiLeftOuterSemiJoin { 948 outerSchema = la.Children()[0].Schema() 949 } 950 if len(colGroups) == 0 || outerSchema == nil { 951 return nil 952 } 953 _, offsets := outerSchema.ExtractDefCausGroups(colGroups) 954 if len(offsets) == 0 { 955 return nil 956 } 957 extracted := make([][]*memex.DeferredCauset, len(offsets)) 958 for i, offset := range offsets { 959 extracted[i] = colGroups[offset] 960 } 961 return extracted 962 } 963 964 // Exists and MaxOneRow produce at most one event, so we set the RowCount of stats one. 965 func getSingletonStats(schemaReplicant *memex.Schema) *property.StatsInfo { 966 ret := &property.StatsInfo{ 967 RowCount: 1.0, 968 Cardinality: make(map[int64]float64, schemaReplicant.Len()), 969 } 970 for _, col := range schemaReplicant.DeferredCausets { 971 ret.Cardinality[col.UniqueID] = 1 972 } 973 return ret 974 } 975 976 // DeriveStats implement LogicalCauset DeriveStats interface. 977 func (p *LogicalMaxOneRow) DeriveStats(childStats []*property.StatsInfo, selfSchema *memex.Schema, childSchema []*memex.Schema, _ [][]*memex.DeferredCauset) (*property.StatsInfo, error) { 978 if p.stats != nil { 979 return p.stats, nil 980 } 981 p.stats = getSingletonStats(selfSchema) 982 return p.stats, nil 983 } 984 985 func (p *LogicalWindow) getGroupNDVs(colGroups [][]*memex.DeferredCauset, childStats []*property.StatsInfo) []property.GroupNDV { 986 if len(colGroups) > 0 { 987 return childStats[0].GroupNDVs 988 } 989 return nil 990 } 991 992 // DeriveStats implement LogicalCauset DeriveStats interface. 993 func (p *LogicalWindow) DeriveStats(childStats []*property.StatsInfo, selfSchema *memex.Schema, childSchema []*memex.Schema, colGroups [][]*memex.DeferredCauset) (*property.StatsInfo, error) { 994 if p.stats != nil { 995 // Reload GroupNDVs since colGroups may have changed. 996 p.stats.GroupNDVs = p.getGroupNDVs(colGroups, childStats) 997 return p.stats, nil 998 } 999 childProfile := childStats[0] 1000 p.stats = &property.StatsInfo{ 1001 RowCount: childProfile.RowCount, 1002 Cardinality: make(map[int64]float64, selfSchema.Len()), 1003 } 1004 childLen := selfSchema.Len() - len(p.WindowFuncDescs) 1005 for i := 0; i < childLen; i++ { 1006 id := selfSchema.DeferredCausets[i].UniqueID 1007 p.stats.Cardinality[id] = childProfile.Cardinality[id] 1008 } 1009 for i := childLen; i < selfSchema.Len(); i++ { 1010 p.stats.Cardinality[selfSchema.DeferredCausets[i].UniqueID] = childProfile.RowCount 1011 } 1012 p.stats.GroupNDVs = p.getGroupNDVs(colGroups, childStats) 1013 return p.stats, nil 1014 } 1015 1016 // ExtractDefCausGroups implements LogicalCauset ExtractDefCausGroups interface. 1017 func (p *LogicalWindow) ExtractDefCausGroups(colGroups [][]*memex.DeferredCauset) [][]*memex.DeferredCauset { 1018 if len(colGroups) == 0 { 1019 return nil 1020 } 1021 childSchema := p.Children()[0].Schema() 1022 _, offsets := childSchema.ExtractDefCausGroups(colGroups) 1023 if len(offsets) == 0 { 1024 return nil 1025 } 1026 extracted := make([][]*memex.DeferredCauset, len(offsets)) 1027 for i, offset := range offsets { 1028 extracted[i] = colGroups[offset] 1029 } 1030 return extracted 1031 }