github.com/matrixorigin/matrixone@v0.7.0/pkg/vm/engine/tae/index/zonemap.go (about) 1 // Copyright 2021 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package index 16 17 import ( 18 "fmt" 19 20 "github.com/RoaringBitmap/roaring" 21 "github.com/matrixorigin/matrixone/pkg/common/moerr" 22 "github.com/matrixorigin/matrixone/pkg/container/types" 23 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/common" 24 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/compute" 25 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/containers" 26 ) 27 28 // A zonemap with 64-byte serialized data. 29 // 30 // If the data type is string, only a part of prefix of minimum and maximum will be written to disk 31 // Rule of thumb: false positive is allowed but false negative is not 32 // That means the searialized min-max range should cover the original min-max range. 33 // 34 // Therefore, we must record minv length, because filling zero for minv makes it bigger, which is not acceptable. 35 // For maxv, we have to construct a bigger value in 32 bytes by plus one if needed. 36 // What if the leading 32 bytes are all 0xff? That is means +inf, we should 37 // compare specifically, refer to the comments on isInf field 38 // 39 // Layout for string: 40 // [B0,...B30,B31,B32,...B62,B63] 41 // --------- - -------------- 42 // minv | maxv 43 // | 44 // [b7=init,b6~b5 unused,b4~b0=len(minv)] 45 46 const ( 47 constZMInited uint8 = 0x80 48 constMaxU64 uint64 = ^uint64(0) 49 ) 50 51 func is32BytesMax(bs []byte) bool { 52 isMax := true 53 // iter u64 is about 8x faster than iter byte 54 for i := 0; i < 32; i += 8 { 55 if types.DecodeFixed[uint64](bs[i:i+8]) != constMaxU64 { 56 isMax = false 57 break 58 } 59 } 60 return isMax 61 } 62 63 type ZoneMap struct { 64 typ types.Type 65 min, max any 66 inited bool 67 // only in a deserialized zonemap, this field is possibile to be True. 68 // isInf is true means we can't find a 32-byte upper bound for original maximum when serializing, 69 // and after deserializing, we have to infer that the original maximum is positive infinite. 70 isInf bool 71 } 72 73 func NewZoneMap(typ types.Type) *ZoneMap { 74 zm := &ZoneMap{typ: typ} 75 return zm 76 } 77 78 func (zm *ZoneMap) GetType() types.Type { 79 return zm.typ 80 } 81 82 func (zm *ZoneMap) String() string { 83 return fmt.Sprintf( 84 "ZM<init-%v,isInf-%v, %v-%v>", 85 zm.inited, zm.isInf, 86 common.TypeStringValue(zm.typ, zm.min), 87 common.TypeStringValue(zm.typ, zm.max), 88 ) 89 } 90 91 func (zm *ZoneMap) init(v any) { 92 // We cannot just shallow copy v. 93 // If v is of type []byte, zm.min or zm.max will point to part of a 94 // memory buffer, which may be released later. 95 if src, ok := v.([]byte); ok { 96 dst := make([]byte, len(src)) 97 copy(dst, src) 98 zm.min = dst 99 zm.max = dst 100 } else { 101 zm.min = v 102 zm.max = v 103 } 104 zm.inited = true 105 } 106 107 func (zm *ZoneMap) Update(v any) (err error) { 108 if types.IsNull(v) { 109 return 110 } 111 if !zm.inited { 112 zm.init(v) 113 return 114 } 115 if compute.CompareGeneric(v, zm.max, zm.typ) > 0 { 116 if src, ok := v.([]byte); ok { 117 dst := make([]byte, len(src)) 118 copy(dst, src) 119 zm.max = dst 120 } else { 121 zm.max = v 122 } 123 } else if compute.CompareGeneric(v, zm.min, zm.typ) < 0 { 124 if src, ok := v.([]byte); ok { 125 dst := make([]byte, len(src)) 126 copy(dst, src) 127 zm.min = dst 128 } else { 129 zm.min = v 130 } 131 } 132 return 133 } 134 135 func (zm *ZoneMap) BatchUpdate(KeysCtx *KeysCtx) error { 136 if !zm.typ.Eq(KeysCtx.Keys.GetType()) { 137 return ErrWrongType 138 } 139 update := func(v any, _ int) error { 140 return zm.Update(v) 141 } 142 if err := KeysCtx.Keys.ForeachWindow(KeysCtx.Start, KeysCtx.Count, update, nil); err != nil { 143 return err 144 } 145 return nil 146 } 147 148 func (zm *ZoneMap) Contains(key any) (ok bool) { 149 if types.IsNull(key) { 150 return true 151 } 152 if !zm.inited { 153 return 154 } 155 if (zm.isInf || compute.CompareGeneric(key, zm.max, zm.typ) <= 0) && compute.CompareGeneric(key, zm.min, zm.typ) >= 0 { 156 ok = true 157 } 158 return 159 } 160 161 func (zm *ZoneMap) FastContainsAny(keys containers.Vector) (ok bool) { 162 if !zm.inited { 163 return 164 } 165 op := func(key any, _ int) (err error) { 166 if types.IsNull(key) || 167 ((zm.isInf || compute.CompareGeneric(key, zm.max, zm.typ) <= 0) && 168 compute.CompareGeneric(key, zm.min, zm.typ) >= 0) { 169 err = moerr.GetOkExpectedEOB() 170 ok = true 171 } 172 return 173 } 174 keys.Foreach(op, nil) 175 return 176 } 177 178 func (zm *ZoneMap) ContainsAny(keys containers.Vector) (visibility *roaring.Bitmap, ok bool) { 179 if !zm.inited { 180 return 181 } 182 visibility = roaring.NewBitmap() 183 row := uint32(0) 184 op := func(key any, _ int) (err error) { 185 // exist if key is null or (<= maxv && >= minv) 186 if types.IsNull(key) || 187 ((zm.isInf || compute.CompareGeneric(key, zm.max, zm.typ) <= 0) && 188 compute.CompareGeneric(key, zm.min, zm.typ) >= 0) { 189 visibility.Add(row) 190 } 191 row++ 192 return 193 } 194 if err := keys.Foreach(op, nil); err != nil { 195 panic(err) 196 } 197 if visibility.GetCardinality() != 0 { 198 ok = true 199 } 200 return 201 } 202 203 func (zm *ZoneMap) SetMax(v any) { 204 if types.IsNull(v) { 205 return 206 } 207 if !zm.inited { 208 zm.init(v) 209 return 210 } 211 if compute.CompareGeneric(v, zm.max, zm.typ) > 0 { 212 zm.max = v 213 } 214 } 215 216 func (zm *ZoneMap) GetMax() any { 217 return zm.max 218 } 219 220 func (zm *ZoneMap) SetMin(v any) { 221 if types.IsNull(v) { 222 return 223 } 224 if !zm.inited { 225 zm.init(v) 226 return 227 } 228 if compute.CompareGeneric(v, zm.min, zm.typ) < 0 { 229 zm.min = v 230 } 231 } 232 233 func (zm *ZoneMap) GetMin() any { 234 return zm.min 235 } 236 237 // func (zm *ZoneMap) Print() string { 238 // // default int32 239 // s := "<ZM>\n[" 240 // s += strconv.Itoa(int(zm.min.(int32))) 241 // s += "," 242 // s += strconv.Itoa(int(zm.max.(int32))) 243 // s += "]\n" 244 // s += "</ZM>" 245 // return s 246 // } 247 248 func (zm *ZoneMap) Marshal() (buf []byte, err error) { 249 buf = make([]byte, 64) 250 if !zm.inited { 251 return 252 } 253 buf[31] |= constZMInited 254 switch zm.typ.Oid { 255 case types.T_char, types.T_varchar, types.T_json, types.T_blob, types.T_text: 256 minv, maxv := zm.min.([]byte), zm.max.([]byte) 257 // write 31-byte prefix of minv 258 copy(buf[0:31], minv) 259 minLen := uint8(31) 260 if len(minv) < 31 { 261 minLen = uint8(len(minv)) 262 } 263 buf[31] |= minLen 264 265 // write 32-byte prefix of maxv 266 copy(buf[32:64], maxv) 267 // no truncation, get a bigger value by filling tail zeros 268 if len(maxv) > 32 && !is32BytesMax(buf[32:64]) { 269 // truncation happens, get a bigger one by plus one 270 for i := 63; i >= 32; i-- { 271 buf[i] += 1 272 if buf[i] != 0 { 273 break 274 } 275 } 276 } 277 default: 278 minv := types.EncodeValue(zm.min, zm.typ) 279 maxv := types.EncodeValue(zm.max, zm.typ) 280 if len(maxv) > 32 || len(minv) > 32 { 281 panic("zonemap: large fixed length type, check again") 282 } 283 copy(buf[0:], minv) 284 copy(buf[32:], maxv) 285 } 286 return 287 } 288 289 func (zm *ZoneMap) Unmarshal(buf []byte) error { 290 init := buf[31] & constZMInited 291 if init == 0 { 292 zm.inited = false 293 return nil 294 } 295 zm.inited = true 296 switch zm.typ.Oid { 297 case types.T_bool: 298 zm.min = types.DecodeFixed[bool](buf[:1]) 299 buf = buf[32:] 300 zm.max = types.DecodeFixed[bool](buf[:1]) 301 return nil 302 case types.T_int8: 303 zm.min = types.DecodeFixed[int8](buf[:1]) 304 buf = buf[32:] 305 zm.max = types.DecodeFixed[int8](buf[:1]) 306 return nil 307 case types.T_int16: 308 zm.min = types.DecodeFixed[int16](buf[:2]) 309 buf = buf[32:] 310 zm.max = types.DecodeFixed[int16](buf[:2]) 311 return nil 312 case types.T_int32: 313 zm.min = types.DecodeFixed[int32](buf[:4]) 314 buf = buf[32:] 315 zm.max = types.DecodeFixed[int32](buf[:4]) 316 return nil 317 case types.T_int64: 318 zm.min = types.DecodeFixed[int64](buf[:8]) 319 buf = buf[32:] 320 zm.max = types.DecodeFixed[int64](buf[:8]) 321 return nil 322 case types.T_uint8: 323 zm.min = types.DecodeFixed[uint8](buf[:1]) 324 buf = buf[32:] 325 zm.max = types.DecodeFixed[uint8](buf[:1]) 326 return nil 327 case types.T_uint16: 328 zm.min = types.DecodeFixed[uint16](buf[:2]) 329 buf = buf[32:] 330 zm.max = types.DecodeFixed[uint16](buf[:2]) 331 return nil 332 case types.T_uint32: 333 zm.min = types.DecodeFixed[uint32](buf[:4]) 334 buf = buf[32:] 335 zm.max = types.DecodeFixed[uint32](buf[:4]) 336 return nil 337 case types.T_uint64: 338 zm.min = types.DecodeFixed[uint64](buf[:8]) 339 buf = buf[32:] 340 zm.max = types.DecodeFixed[uint64](buf[:8]) 341 return nil 342 case types.T_float32: 343 zm.min = types.DecodeFixed[float32](buf[:4]) 344 buf = buf[32:] 345 zm.max = types.DecodeFixed[float32](buf[:4]) 346 return nil 347 case types.T_float64: 348 zm.min = types.DecodeFixed[float64](buf[:8]) 349 buf = buf[32:] 350 zm.max = types.DecodeFixed[float64](buf[:8]) 351 return nil 352 case types.T_date: 353 zm.min = types.DecodeFixed[types.Date](buf[:4]) 354 buf = buf[32:] 355 zm.max = types.DecodeFixed[types.Date](buf[:4]) 356 return nil 357 case types.T_time: 358 zm.min = types.DecodeFixed[types.Time](buf[:8]) 359 buf = buf[32:] 360 zm.max = types.DecodeFixed[types.Time](buf[:8]) 361 return nil 362 case types.T_datetime: 363 zm.min = types.DecodeFixed[types.Datetime](buf[:8]) 364 buf = buf[32:] 365 zm.max = types.DecodeFixed[types.Datetime](buf[:8]) 366 return nil 367 case types.T_timestamp: 368 zm.min = types.DecodeFixed[types.Timestamp](buf[:8]) 369 buf = buf[32:] 370 zm.max = types.DecodeFixed[types.Timestamp](buf[:8]) 371 return nil 372 case types.T_decimal64: 373 zm.min = types.DecodeFixed[types.Decimal64](buf[:8]) 374 buf = buf[32:] 375 zm.max = types.DecodeFixed[types.Decimal64](buf[:8]) 376 return nil 377 case types.T_decimal128: 378 zm.min = types.DecodeFixed[types.Decimal128](buf[:16]) 379 buf = buf[32:] 380 zm.max = types.DecodeFixed[types.Decimal128](buf[:16]) 381 return nil 382 case types.T_uuid: 383 zm.min = types.DecodeFixed[types.Uuid](buf[:16]) 384 buf = buf[32:] 385 zm.max = types.DecodeFixed[types.Uuid](buf[:16]) 386 return nil 387 case types.T_TS: 388 zm.min = buf[:types.TxnTsSize] 389 buf = buf[32:] 390 zm.max = buf[:types.TxnTsSize] 391 return nil 392 case types.T_Rowid: 393 zm.min = buf[:types.RowidSize] 394 buf = buf[32:] 395 zm.max = buf[:types.RowidSize] 396 return nil 397 case types.T_char, types.T_varchar, types.T_json, types.T_blob, types.T_text: 398 minBuf := make([]byte, buf[31]&0x7f) 399 copy(minBuf, buf[0:32]) 400 maxBuf := make([]byte, 32) 401 copy(maxBuf, buf[32:64]) 402 zm.min = minBuf 403 zm.max = maxBuf 404 405 zm.isInf = is32BytesMax(maxBuf) 406 return nil 407 408 default: 409 panic("unsupported type") 410 } 411 } 412 413 /*func (zm *ZoneMap) Unmarshal(min, max []byte) error { 414 init := min[31] & constZMInited 415 if init == 0 { 416 zm.inited = false 417 return nil 418 } 419 zm.inited = true 420 switch zm.typ.Oid { 421 case types.T_bool: 422 zm.min = types.DecodeFixed[bool](min[:1]) 423 zm.max = types.DecodeFixed[bool](max[:1]) 424 return nil 425 case types.T_int8: 426 zm.min = types.DecodeFixed[int8](min[:1]) 427 zm.max = types.DecodeFixed[int8](max[:1]) 428 return nil 429 case types.T_int16: 430 zm.min = types.DecodeFixed[int16](min[:2]) 431 zm.max = types.DecodeFixed[int16](max[:2]) 432 return nil 433 case types.T_int32: 434 zm.min = types.DecodeFixed[int32](min[:4]) 435 zm.max = types.DecodeFixed[int32](max[:4]) 436 return nil 437 case types.T_int64: 438 zm.min = types.DecodeFixed[int64](min[:8]) 439 zm.max = types.DecodeFixed[int64](max[:8]) 440 return nil 441 case types.T_uint8: 442 zm.min = types.DecodeFixed[uint8](min[:1]) 443 zm.max = types.DecodeFixed[uint8](max[:1]) 444 return nil 445 case types.T_uint16: 446 zm.min = types.DecodeFixed[uint16](min[:2]) 447 zm.max = types.DecodeFixed[uint16](max[:2]) 448 return nil 449 case types.T_uint32: 450 zm.min = types.DecodeFixed[uint32](min[:4]) 451 //buf = buf[32:] 452 zm.max = types.DecodeFixed[uint32](max[:4]) 453 return nil 454 case types.T_uint64: 455 zm.min = types.DecodeFixed[uint64](min[:8]) 456 zm.max = types.DecodeFixed[uint64](max[:8]) 457 return nil 458 case types.T_float32: 459 zm.min = types.DecodeFixed[float32](min[:4]) 460 zm.max = types.DecodeFixed[float32](max[:4]) 461 return nil 462 case types.T_float64: 463 zm.min = types.DecodeFixed[float64](min[:8]) 464 zm.max = types.DecodeFixed[float64](max[:8]) 465 return nil 466 case types.T_date: 467 zm.min = types.DecodeFixed[types.Date](min[:4]) 468 zm.max = types.DecodeFixed[types.Date](max[:4]) 469 return nil 470 case types.T_datetime: 471 zm.min = types.DecodeFixed[types.Datetime](min[:8]) 472 zm.max = types.DecodeFixed[types.Datetime](max[:8]) 473 return nil 474 case types.T_timestamp: 475 zm.min = types.DecodeFixed[types.Timestamp](min[:8]) 476 zm.max = types.DecodeFixed[types.Timestamp](max[:8]) 477 return nil 478 case types.T_decimal64: 479 zm.min = types.DecodeFixed[types.Decimal64](min[:8]) 480 zm.max = types.DecodeFixed[types.Decimal64](max[:8]) 481 return nil 482 case types.T_decimal128: 483 zm.min = types.DecodeFixed[types.Decimal128](min[:16]) 484 zm.max = types.DecodeFixed[types.Decimal128](max[:16]) 485 return nil 486 case types.T_uuid: 487 zm.min = types.DecodeFixed[types.Uuid](min[:16]) 488 zm.max = types.DecodeFixed[types.Uuid](max[:16]) 489 return nil 490 case types.T_TS: 491 zm.min = min[:types.TxnTsSize] 492 zm.max = max[:types.TxnTsSize] 493 return nil 494 case types.T_Rowid: 495 zm.min = min[:types.RowidSize] 496 zm.max = max[:types.RowidSize] 497 return nil 498 case types.T_char, types.T_varchar, types.T_json, types.T_blob: 499 minBuf := make([]byte, min[31]&0x7f) 500 copy(minBuf, min) 501 maxBuf := make([]byte, 32) 502 copy(maxBuf, max) 503 zm.min = minBuf 504 zm.max = maxBuf 505 506 zm.isInf = is32BytesMax(maxBuf) 507 return nil 508 509 default: 510 panic("unsupported type") 511 } 512 } 513 */ 514 515 func (zm *ZoneMap) GetMemoryUsage() uint64 { 516 return 64 517 }