github.com/whtcorpsinc/MilevaDB-Prod@v0.0.0-20211104133533-f57f4be3b597/soliton/ranger/ranger.go (about) 1 // Copyright 2020 WHTCORPS INC, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package ranger 15 16 import ( 17 "bytes" 18 "math" 19 "sort" 20 "unicode/utf8" 21 22 "github.com/whtcorpsinc/BerolinaSQL/allegrosql" 23 "github.com/whtcorpsinc/BerolinaSQL/ast" 24 "github.com/whtcorpsinc/BerolinaSQL/charset" 25 "github.com/whtcorpsinc/errors" 26 "github.com/whtcorpsinc/milevadb/ekv" 27 "github.com/whtcorpsinc/milevadb/memex" 28 "github.com/whtcorpsinc/milevadb/soliton/codec" 29 "github.com/whtcorpsinc/milevadb/stochastikctx" 30 "github.com/whtcorpsinc/milevadb/stochastikctx/stmtctx" 31 "github.com/whtcorpsinc/milevadb/types" 32 ) 33 34 func validInterval(sc *stmtctx.StatementContext, low, high point) (bool, error) { 35 l, err := codec.EncodeKey(sc, nil, low.value) 36 if err != nil { 37 return false, errors.Trace(err) 38 } 39 if low.excl { 40 l = ekv.Key(l).PrefixNext() 41 } 42 r, err := codec.EncodeKey(sc, nil, high.value) 43 if err != nil { 44 return false, errors.Trace(err) 45 } 46 if !high.excl { 47 r = ekv.Key(r).PrefixNext() 48 } 49 return bytes.Compare(l, r) < 0, nil 50 } 51 52 // points2Ranges build index ranges from range points. 53 // Only one defCausumn is built there. If there're multiple defCausumns, use appendPoints2Ranges. 54 func points2Ranges(sc *stmtctx.StatementContext, rangePoints []point, tp *types.FieldType) ([]*Range, error) { 55 ranges := make([]*Range, 0, len(rangePoints)/2) 56 for i := 0; i < len(rangePoints); i += 2 { 57 startPoint, err := convertPoint(sc, rangePoints[i], tp) 58 if err != nil { 59 return nil, errors.Trace(err) 60 } 61 endPoint, err := convertPoint(sc, rangePoints[i+1], tp) 62 if err != nil { 63 return nil, errors.Trace(err) 64 } 65 less, err := validInterval(sc, startPoint, endPoint) 66 if err != nil { 67 return nil, errors.Trace(err) 68 } 69 if !less { 70 continue 71 } 72 // If defCausumn has not null flag, [null, null] should be removed. 73 if allegrosql.HasNotNullFlag(tp.Flag) && endPoint.value.HoTT() == types.HoTTNull { 74 continue 75 } 76 77 ran := &Range{ 78 LowVal: []types.Causet{startPoint.value}, 79 LowExclude: startPoint.excl, 80 HighVal: []types.Causet{endPoint.value}, 81 HighExclude: endPoint.excl, 82 } 83 ranges = append(ranges, ran) 84 } 85 return ranges, nil 86 } 87 88 func convertPoint(sc *stmtctx.StatementContext, point point, tp *types.FieldType) (point, error) { 89 switch point.value.HoTT() { 90 case types.HoTTMaxValue, types.HoTTMinNotNull: 91 return point, nil 92 } 93 casted, err := point.value.ConvertTo(sc, tp) 94 if err != nil { 95 return point, errors.Trace(err) 96 } 97 valCmpCasted, err := point.value.CompareCauset(sc, &casted) 98 if err != nil { 99 return point, errors.Trace(err) 100 } 101 point.value = casted 102 if valCmpCasted == 0 { 103 return point, nil 104 } 105 if point.start { 106 if point.excl { 107 if valCmpCasted < 0 { 108 // e.g. "a > 1.9" convert to "a >= 2". 109 point.excl = false 110 } 111 } else { 112 if valCmpCasted > 0 { 113 // e.g. "a >= 1.1 convert to "a > 1" 114 point.excl = true 115 } 116 } 117 } else { 118 if point.excl { 119 if valCmpCasted > 0 { 120 // e.g. "a < 1.1" convert to "a <= 1" 121 point.excl = false 122 } 123 } else { 124 if valCmpCasted < 0 { 125 // e.g. "a <= 1.9" convert to "a < 2" 126 point.excl = true 127 } 128 } 129 } 130 return point, nil 131 } 132 133 // appendPoints2Ranges appends additional defCausumn ranges for multi-defCausumn index. 134 // The additional defCausumn ranges can only be appended to point ranges. 135 // for example we have an index (a, b), if the condition is (a > 1 and b = 2) 136 // then we can not build a conjunctive ranges for this index. 137 func appendPoints2Ranges(sc *stmtctx.StatementContext, origin []*Range, rangePoints []point, 138 ft *types.FieldType) ([]*Range, error) { 139 var newIndexRanges []*Range 140 for i := 0; i < len(origin); i++ { 141 oRange := origin[i] 142 if !oRange.IsPoint(sc) { 143 newIndexRanges = append(newIndexRanges, oRange) 144 } else { 145 newRanges, err := appendPoints2IndexRange(sc, oRange, rangePoints, ft) 146 if err != nil { 147 return nil, errors.Trace(err) 148 } 149 newIndexRanges = append(newIndexRanges, newRanges...) 150 } 151 } 152 return newIndexRanges, nil 153 } 154 155 func appendPoints2IndexRange(sc *stmtctx.StatementContext, origin *Range, rangePoints []point, 156 ft *types.FieldType) ([]*Range, error) { 157 newRanges := make([]*Range, 0, len(rangePoints)/2) 158 for i := 0; i < len(rangePoints); i += 2 { 159 startPoint, err := convertPoint(sc, rangePoints[i], ft) 160 if err != nil { 161 return nil, errors.Trace(err) 162 } 163 endPoint, err := convertPoint(sc, rangePoints[i+1], ft) 164 if err != nil { 165 return nil, errors.Trace(err) 166 } 167 less, err := validInterval(sc, startPoint, endPoint) 168 if err != nil { 169 return nil, errors.Trace(err) 170 } 171 if !less { 172 continue 173 } 174 175 lowVal := make([]types.Causet, len(origin.LowVal)+1) 176 copy(lowVal, origin.LowVal) 177 lowVal[len(origin.LowVal)] = startPoint.value 178 179 highVal := make([]types.Causet, len(origin.HighVal)+1) 180 copy(highVal, origin.HighVal) 181 highVal[len(origin.HighVal)] = endPoint.value 182 183 ir := &Range{ 184 LowVal: lowVal, 185 LowExclude: startPoint.excl, 186 HighVal: highVal, 187 HighExclude: endPoint.excl, 188 } 189 newRanges = append(newRanges, ir) 190 } 191 return newRanges, nil 192 } 193 194 func appendRanges2PointRanges(pointRanges []*Range, ranges []*Range) []*Range { 195 if len(ranges) == 0 { 196 return pointRanges 197 } 198 newRanges := make([]*Range, 0, len(pointRanges)*len(ranges)) 199 for _, pointRange := range pointRanges { 200 for _, r := range ranges { 201 lowVal := append(pointRange.LowVal, r.LowVal...) 202 highVal := append(pointRange.HighVal, r.HighVal...) 203 newRange := &Range{ 204 LowVal: lowVal, 205 LowExclude: r.LowExclude, 206 HighVal: highVal, 207 HighExclude: r.HighExclude, 208 } 209 newRanges = append(newRanges, newRange) 210 } 211 } 212 return newRanges 213 } 214 215 // points2BlockRanges build ranges for causet scan from range points. 216 // It will remove the nil and convert MinNotNull and MaxValue to MinInt64 or MinUint64 and MaxInt64 or MaxUint64. 217 func points2BlockRanges(sc *stmtctx.StatementContext, rangePoints []point, tp *types.FieldType) ([]*Range, error) { 218 ranges := make([]*Range, 0, len(rangePoints)/2) 219 var minValueCauset, maxValueCauset types.Causet 220 // Currently, causet's ekv range cannot accept encoded value of MaxValueCauset. we need to convert it. 221 if allegrosql.HasUnsignedFlag(tp.Flag) { 222 minValueCauset.SetUint64(0) 223 maxValueCauset.SetUint64(math.MaxUint64) 224 } else { 225 minValueCauset.SetInt64(math.MinInt64) 226 maxValueCauset.SetInt64(math.MaxInt64) 227 } 228 for i := 0; i < len(rangePoints); i += 2 { 229 startPoint, err := convertPoint(sc, rangePoints[i], tp) 230 if err != nil { 231 return nil, errors.Trace(err) 232 } 233 if startPoint.value.HoTT() == types.HoTTNull { 234 startPoint.value = minValueCauset 235 startPoint.excl = false 236 } else if startPoint.value.HoTT() == types.HoTTMinNotNull { 237 startPoint.value = minValueCauset 238 } 239 endPoint, err := convertPoint(sc, rangePoints[i+1], tp) 240 if err != nil { 241 return nil, errors.Trace(err) 242 } 243 if endPoint.value.HoTT() == types.HoTTMaxValue { 244 endPoint.value = maxValueCauset 245 } else if endPoint.value.HoTT() == types.HoTTNull { 246 continue 247 } 248 less, err := validInterval(sc, startPoint, endPoint) 249 if err != nil { 250 return nil, errors.Trace(err) 251 } 252 if !less { 253 continue 254 } 255 ran := &Range{ 256 LowVal: []types.Causet{startPoint.value}, 257 LowExclude: startPoint.excl, 258 HighVal: []types.Causet{endPoint.value}, 259 HighExclude: endPoint.excl, 260 } 261 ranges = append(ranges, ran) 262 } 263 return ranges, nil 264 } 265 266 // buildDeferredCausetRange builds range from CNF conditions. 267 func buildDeferredCausetRange(accessConditions []memex.Expression, sc *stmtctx.StatementContext, tp *types.FieldType, blockRange bool, defCausLen int) (ranges []*Range, err error) { 268 rb := builder{sc: sc} 269 rangePoints := fullRange 270 for _, cond := range accessConditions { 271 rangePoints = rb.intersection(rangePoints, rb.build(cond)) 272 if rb.err != nil { 273 return nil, errors.Trace(rb.err) 274 } 275 } 276 newTp := newFieldType(tp) 277 if blockRange { 278 ranges, err = points2BlockRanges(sc, rangePoints, newTp) 279 } else { 280 ranges, err = points2Ranges(sc, rangePoints, newTp) 281 } 282 if err != nil { 283 return nil, errors.Trace(err) 284 } 285 if defCausLen != types.UnspecifiedLength { 286 for _, ran := range ranges { 287 if CutCausetByPrefixLen(&ran.LowVal[0], defCausLen, tp) { 288 ran.LowExclude = false 289 } 290 if CutCausetByPrefixLen(&ran.HighVal[0], defCausLen, tp) { 291 ran.HighExclude = false 292 } 293 } 294 ranges, err = UnionRanges(sc, ranges, true) 295 if err != nil { 296 return nil, err 297 } 298 } 299 return ranges, nil 300 } 301 302 // BuildBlockRange builds range of PK defCausumn for PhysicalBlockScan. 303 func BuildBlockRange(accessConditions []memex.Expression, sc *stmtctx.StatementContext, tp *types.FieldType) ([]*Range, error) { 304 return buildDeferredCausetRange(accessConditions, sc, tp, true, types.UnspecifiedLength) 305 } 306 307 // BuildDeferredCausetRange builds range from access conditions for general defCausumns. 308 func BuildDeferredCausetRange(conds []memex.Expression, sc *stmtctx.StatementContext, tp *types.FieldType, defCausLen int) ([]*Range, error) { 309 if len(conds) == 0 { 310 return []*Range{{LowVal: []types.Causet{{}}, HighVal: []types.Causet{types.MaxValueCauset()}}}, nil 311 } 312 return buildDeferredCausetRange(conds, sc, tp, false, defCausLen) 313 } 314 315 // buildCNFIndexRange builds the range for index where the top layer is CNF. 316 func (d *rangeDetacher) buildCNFIndexRange(newTp []*types.FieldType, 317 eqAndInCount int, accessCondition []memex.Expression) ([]*Range, error) { 318 sc := d.sctx.GetStochastikVars().StmtCtx 319 rb := builder{sc: sc} 320 var ( 321 ranges []*Range 322 err error 323 ) 324 for _, defCaus := range d.defcaus { 325 newTp = append(newTp, newFieldType(defCaus.RetType)) 326 } 327 for i := 0; i < eqAndInCount; i++ { 328 // Build ranges for equal or in access conditions. 329 point := rb.build(accessCondition[i]) 330 if rb.err != nil { 331 return nil, errors.Trace(rb.err) 332 } 333 if i == 0 { 334 ranges, err = points2Ranges(sc, point, newTp[i]) 335 } else { 336 ranges, err = appendPoints2Ranges(sc, ranges, point, newTp[i]) 337 } 338 if err != nil { 339 return nil, errors.Trace(err) 340 } 341 } 342 rangePoints := fullRange 343 // Build rangePoints for non-equal access conditions. 344 for i := eqAndInCount; i < len(accessCondition); i++ { 345 rangePoints = rb.intersection(rangePoints, rb.build(accessCondition[i])) 346 if rb.err != nil { 347 return nil, errors.Trace(rb.err) 348 } 349 } 350 if eqAndInCount == 0 { 351 ranges, err = points2Ranges(sc, rangePoints, newTp[0]) 352 } else if eqAndInCount < len(accessCondition) { 353 ranges, err = appendPoints2Ranges(sc, ranges, rangePoints, newTp[eqAndInCount]) 354 } 355 if err != nil { 356 return nil, errors.Trace(err) 357 } 358 359 // Take prefix index into consideration. 360 if hasPrefix(d.lengths) { 361 if fixPrefixDefCausRange(ranges, d.lengths, newTp) { 362 ranges, err = UnionRanges(sc, ranges, d.mergeConsecutive) 363 if err != nil { 364 return nil, errors.Trace(err) 365 } 366 } 367 } 368 369 return ranges, nil 370 } 371 372 type sortRange struct { 373 originalValue *Range 374 encodedStart []byte 375 encodedEnd []byte 376 } 377 378 // UnionRanges sorts `ranges`, union adjacent ones if possible. 379 // For two intervals [a, b], [c, d], we have guaranteed that a <= c. If b >= c. Then two intervals are overlapped. 380 // And this two can be merged as [a, max(b, d)]. 381 // Otherwise they aren't overlapped. 382 func UnionRanges(sc *stmtctx.StatementContext, ranges []*Range, mergeConsecutive bool) ([]*Range, error) { 383 if len(ranges) == 0 { 384 return nil, nil 385 } 386 objects := make([]*sortRange, 0, len(ranges)) 387 for _, ran := range ranges { 388 left, err := codec.EncodeKey(sc, nil, ran.LowVal...) 389 if err != nil { 390 return nil, errors.Trace(err) 391 } 392 if ran.LowExclude { 393 left = ekv.Key(left).PrefixNext() 394 } 395 right, err := codec.EncodeKey(sc, nil, ran.HighVal...) 396 if err != nil { 397 return nil, errors.Trace(err) 398 } 399 if !ran.HighExclude { 400 right = ekv.Key(right).PrefixNext() 401 } 402 objects = append(objects, &sortRange{originalValue: ran, encodedStart: left, encodedEnd: right}) 403 } 404 sort.Slice(objects, func(i, j int) bool { 405 return bytes.Compare(objects[i].encodedStart, objects[j].encodedStart) < 0 406 }) 407 ranges = ranges[:0] 408 lastRange := objects[0] 409 for i := 1; i < len(objects); i++ { 410 if (mergeConsecutive && bytes.Compare(lastRange.encodedEnd, objects[i].encodedStart) >= 0) || 411 (!mergeConsecutive && bytes.Compare(lastRange.encodedEnd, objects[i].encodedStart) > 0) { 412 if bytes.Compare(lastRange.encodedEnd, objects[i].encodedEnd) < 0 { 413 lastRange.encodedEnd = objects[i].encodedEnd 414 lastRange.originalValue.HighVal = objects[i].originalValue.HighVal 415 lastRange.originalValue.HighExclude = objects[i].originalValue.HighExclude 416 } 417 } else { 418 ranges = append(ranges, lastRange.originalValue) 419 lastRange = objects[i] 420 } 421 } 422 ranges = append(ranges, lastRange.originalValue) 423 return ranges, nil 424 } 425 426 func hasPrefix(lengths []int) bool { 427 for _, l := range lengths { 428 if l != types.UnspecifiedLength { 429 return true 430 } 431 } 432 return false 433 } 434 435 // fixPrefixDefCausRange checks whether the range of one defCausumn exceeds the length and needs to be cut. 436 // It specially handles the last defCausumn of each range point. If the last one need to be cut, it will 437 // change the exclude status of that point and return `true` to tell 438 // that we need do a range merging since that interval may have intersection. 439 // e.g. if the interval is (-inf -inf, a xxxxx), (a xxxxx, +inf +inf) and the length of the last defCausumn is 3, 440 // then we'll change it to (-inf -inf, a xxx], [a xxx, +inf +inf). You can see that this two interval intersect, 441 // so we need a merge operation. 442 // Q: only checking the last defCausumn to decide whether the endpoint's exclude status needs to be reset is enough? 443 // A: Yes, suppose that the interval is (-inf -inf, a xxxxx b) and only the second defCausumn needs to be cut. 444 // The result would be (-inf -inf, a xxx b) if the length of it is 3. Obviously we only need to care about the data 445 // whose the first two key is `a` and `xxx`. It read all data whose index value begins with `a` and `xxx` and the third 446 // value less than `b`, covering the values begin with `a` and `xxxxx` and the third value less than `b` perfectly. 447 // So in this case we don't need to reset its exclude status. The right endpoint case can be proved in the same way. 448 func fixPrefixDefCausRange(ranges []*Range, lengths []int, tp []*types.FieldType) bool { 449 var hasCut bool 450 for _, ran := range ranges { 451 lowTail := len(ran.LowVal) - 1 452 for i := 0; i < lowTail; i++ { 453 CutCausetByPrefixLen(&ran.LowVal[i], lengths[i], tp[i]) 454 } 455 lowCut := CutCausetByPrefixLen(&ran.LowVal[lowTail], lengths[lowTail], tp[lowTail]) 456 if lowCut { 457 ran.LowExclude = false 458 } 459 highTail := len(ran.HighVal) - 1 460 for i := 0; i < highTail; i++ { 461 CutCausetByPrefixLen(&ran.HighVal[i], lengths[i], tp[i]) 462 } 463 highCut := CutCausetByPrefixLen(&ran.HighVal[highTail], lengths[highTail], tp[highTail]) 464 if highCut { 465 ran.HighExclude = false 466 } 467 hasCut = lowCut || highCut 468 } 469 return hasCut 470 } 471 472 // CutCausetByPrefixLen cuts the causet according to the prefix length. 473 // If it's UTF8 encoded, we will cut it by characters rather than bytes. 474 func CutCausetByPrefixLen(v *types.Causet, length int, tp *types.FieldType) bool { 475 if v.HoTT() == types.HoTTString || v.HoTT() == types.HoTTBytes { 476 defCausCharset := tp.Charset 477 defCausValue := v.GetBytes() 478 isUTF8Charset := defCausCharset == charset.CharsetUTF8 || defCausCharset == charset.CharsetUTF8MB4 479 if isUTF8Charset { 480 if length != types.UnspecifiedLength && utf8.RuneCount(defCausValue) > length { 481 rs := bytes.Runes(defCausValue) 482 truncateStr := string(rs[:length]) 483 // truncate value and limit its length 484 v.SetString(truncateStr, tp.DefCauslate) 485 return true 486 } 487 } else if length != types.UnspecifiedLength && len(defCausValue) > length { 488 // truncate value and limit its length 489 v.SetBytes(defCausValue[:length]) 490 if v.HoTT() == types.HoTTString { 491 v.SetString(v.GetString(), tp.DefCauslate) 492 } 493 return true 494 } 495 } 496 return false 497 } 498 499 // We cannot use the FieldType of defCausumn directly. e.g. the defCausumn a is int32 and we have a > 1111111111111111111. 500 // Obviously the constant is bigger than MaxInt32, so we will get overflow error if we use the FieldType of defCausumn a. 501 func newFieldType(tp *types.FieldType) *types.FieldType { 502 switch tp.Tp { 503 // To avoid overflow error. 504 case allegrosql.TypeTiny, allegrosql.TypeShort, allegrosql.TypeInt24, allegrosql.TypeLong, allegrosql.TypeLonglong: 505 newTp := types.NewFieldType(allegrosql.TypeLonglong) 506 newTp.Flag = tp.Flag 507 newTp.Charset = tp.Charset 508 return newTp 509 // To avoid data truncate error. 510 case allegrosql.TypeFloat, allegrosql.TypeDouble, allegrosql.TypeBlob, allegrosql.TypeTinyBlob, allegrosql.TypeMediumBlob, allegrosql.TypeLongBlob, 511 allegrosql.TypeString, allegrosql.TypeVarchar, allegrosql.TypeVarString: 512 newTp := types.NewFieldTypeWithDefCauslation(tp.Tp, tp.DefCauslate, types.UnspecifiedLength) 513 newTp.Charset = tp.Charset 514 return newTp 515 default: 516 return tp 517 } 518 } 519 520 // points2EqOrInCond constructs a 'EQUAL' or 'IN' scalar function based on the 521 // 'points'. The target defCausumn is extracted from the 'expr'. 522 // NOTE: 523 // 1. 'expr' must be either 'EQUAL' or 'IN' function. 524 // 2. 'points' should not be empty. 525 func points2EqOrInCond(ctx stochastikctx.Context, points []point, expr memex.Expression) memex.Expression { 526 // len(points) cannot be 0 here, since we impose early termination in ExtractEqAndInCondition 527 sf, _ := expr.(*memex.ScalarFunction) 528 // Constant and DeferredCauset args should have same RetType, simply get from first arg 529 retType := sf.GetArgs()[0].GetType() 530 args := make([]memex.Expression, 0, len(points)/2) 531 if sf.FuncName.L == ast.EQ { 532 if c, ok := sf.GetArgs()[0].(*memex.DeferredCauset); ok { 533 args = append(args, c) 534 } else if c, ok := sf.GetArgs()[1].(*memex.DeferredCauset); ok { 535 args = append(args, c) 536 } 537 } else { 538 args = append(args, sf.GetArgs()[0]) 539 } 540 for i := 0; i < len(points); i = i + 2 { 541 value := &memex.Constant{ 542 Value: points[i].value, 543 RetType: retType, 544 } 545 args = append(args, value) 546 } 547 funcName := ast.EQ 548 if len(args) > 2 { 549 funcName = ast.In 550 } 551 return memex.NewFunctionInternal(ctx, funcName, sf.GetType(), args...) 552 } 553 554 // DetachCondAndBuildRangeForPartition will detach the index filters from causet filters. 555 // The returned values are encapsulated into a struct DetachRangeResult, see its comments for explanation. 556 func DetachCondAndBuildRangeForPartition(sctx stochastikctx.Context, conditions []memex.Expression, defcaus []*memex.DeferredCauset, 557 lengths []int) (*DetachRangeResult, error) { 558 d := &rangeDetacher{ 559 sctx: sctx, 560 allConds: conditions, 561 defcaus: defcaus, 562 lengths: lengths, 563 mergeConsecutive: false, 564 } 565 return d.detachCondAndBuildRangeForDefCauss() 566 }