github.com/matrixorigin/matrixone@v0.7.0/pkg/vm/engine/tae/index/zonemap.go (about)

     1  // Copyright 2021 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package index
    16  
    17  import (
    18  	"fmt"
    19  
    20  	"github.com/RoaringBitmap/roaring"
    21  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    22  	"github.com/matrixorigin/matrixone/pkg/container/types"
    23  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/common"
    24  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/compute"
    25  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/containers"
    26  )
    27  
    28  // A zonemap with 64-byte serialized data.
    29  //
    30  // If the data type is string, only a part of prefix of minimum and maximum will be written to disk
    31  // Rule of thumb: false positive is allowed but false negative is not
    32  // That means the searialized min-max range should cover the original min-max range.
    33  //
    34  // Therefore, we must record minv length, because filling zero for minv makes it bigger, which is not acceptable.
    35  // For maxv, we have to construct a bigger value in 32 bytes by plus one if needed.
    36  // What if the leading 32 bytes are all 0xff? That is means +inf, we should
    37  // compare specifically, refer to the comments on isInf field
    38  //
    39  // Layout for string:
    40  // [B0,...B30,B31,B32,...B62,B63]
    41  //  ---------  -  --------------
    42  //     minv    |       maxv
    43  //             |
    44  //         [b7=init,b6~b5 unused,b4~b0=len(minv)]
    45  
    46  const (
    47  	constZMInited uint8  = 0x80
    48  	constMaxU64   uint64 = ^uint64(0)
    49  )
    50  
    51  func is32BytesMax(bs []byte) bool {
    52  	isMax := true
    53  	// iter u64 is about 8x faster than iter byte
    54  	for i := 0; i < 32; i += 8 {
    55  		if types.DecodeFixed[uint64](bs[i:i+8]) != constMaxU64 {
    56  			isMax = false
    57  			break
    58  		}
    59  	}
    60  	return isMax
    61  }
    62  
    63  type ZoneMap struct {
    64  	typ      types.Type
    65  	min, max any
    66  	inited   bool
    67  	// only in a deserialized zonemap, this field is possibile to be True.
    68  	// isInf is true means we can't find a 32-byte upper bound for original maximum when serializing,
    69  	// and after deserializing, we have to infer that the original maximum is positive infinite.
    70  	isInf bool
    71  }
    72  
    73  func NewZoneMap(typ types.Type) *ZoneMap {
    74  	zm := &ZoneMap{typ: typ}
    75  	return zm
    76  }
    77  
    78  func (zm *ZoneMap) GetType() types.Type {
    79  	return zm.typ
    80  }
    81  
    82  func (zm *ZoneMap) String() string {
    83  	return fmt.Sprintf(
    84  		"ZM<init-%v,isInf-%v, %v-%v>",
    85  		zm.inited, zm.isInf,
    86  		common.TypeStringValue(zm.typ, zm.min),
    87  		common.TypeStringValue(zm.typ, zm.max),
    88  	)
    89  }
    90  
    91  func (zm *ZoneMap) init(v any) {
    92  	// We cannot just shallow copy v.
    93  	// If v is of type []byte, zm.min or zm.max will point to part of a
    94  	// memory buffer, which may be released later.
    95  	if src, ok := v.([]byte); ok {
    96  		dst := make([]byte, len(src))
    97  		copy(dst, src)
    98  		zm.min = dst
    99  		zm.max = dst
   100  	} else {
   101  		zm.min = v
   102  		zm.max = v
   103  	}
   104  	zm.inited = true
   105  }
   106  
   107  func (zm *ZoneMap) Update(v any) (err error) {
   108  	if types.IsNull(v) {
   109  		return
   110  	}
   111  	if !zm.inited {
   112  		zm.init(v)
   113  		return
   114  	}
   115  	if compute.CompareGeneric(v, zm.max, zm.typ) > 0 {
   116  		if src, ok := v.([]byte); ok {
   117  			dst := make([]byte, len(src))
   118  			copy(dst, src)
   119  			zm.max = dst
   120  		} else {
   121  			zm.max = v
   122  		}
   123  	} else if compute.CompareGeneric(v, zm.min, zm.typ) < 0 {
   124  		if src, ok := v.([]byte); ok {
   125  			dst := make([]byte, len(src))
   126  			copy(dst, src)
   127  			zm.min = dst
   128  		} else {
   129  			zm.min = v
   130  		}
   131  	}
   132  	return
   133  }
   134  
   135  func (zm *ZoneMap) BatchUpdate(KeysCtx *KeysCtx) error {
   136  	if !zm.typ.Eq(KeysCtx.Keys.GetType()) {
   137  		return ErrWrongType
   138  	}
   139  	update := func(v any, _ int) error {
   140  		return zm.Update(v)
   141  	}
   142  	if err := KeysCtx.Keys.ForeachWindow(KeysCtx.Start, KeysCtx.Count, update, nil); err != nil {
   143  		return err
   144  	}
   145  	return nil
   146  }
   147  
   148  func (zm *ZoneMap) Contains(key any) (ok bool) {
   149  	if types.IsNull(key) {
   150  		return true
   151  	}
   152  	if !zm.inited {
   153  		return
   154  	}
   155  	if (zm.isInf || compute.CompareGeneric(key, zm.max, zm.typ) <= 0) && compute.CompareGeneric(key, zm.min, zm.typ) >= 0 {
   156  		ok = true
   157  	}
   158  	return
   159  }
   160  
   161  func (zm *ZoneMap) FastContainsAny(keys containers.Vector) (ok bool) {
   162  	if !zm.inited {
   163  		return
   164  	}
   165  	op := func(key any, _ int) (err error) {
   166  		if types.IsNull(key) ||
   167  			((zm.isInf || compute.CompareGeneric(key, zm.max, zm.typ) <= 0) &&
   168  				compute.CompareGeneric(key, zm.min, zm.typ) >= 0) {
   169  			err = moerr.GetOkExpectedEOB()
   170  			ok = true
   171  		}
   172  		return
   173  	}
   174  	keys.Foreach(op, nil)
   175  	return
   176  }
   177  
   178  func (zm *ZoneMap) ContainsAny(keys containers.Vector) (visibility *roaring.Bitmap, ok bool) {
   179  	if !zm.inited {
   180  		return
   181  	}
   182  	visibility = roaring.NewBitmap()
   183  	row := uint32(0)
   184  	op := func(key any, _ int) (err error) {
   185  		// exist if key is null or (<= maxv && >= minv)
   186  		if types.IsNull(key) ||
   187  			((zm.isInf || compute.CompareGeneric(key, zm.max, zm.typ) <= 0) &&
   188  				compute.CompareGeneric(key, zm.min, zm.typ) >= 0) {
   189  			visibility.Add(row)
   190  		}
   191  		row++
   192  		return
   193  	}
   194  	if err := keys.Foreach(op, nil); err != nil {
   195  		panic(err)
   196  	}
   197  	if visibility.GetCardinality() != 0 {
   198  		ok = true
   199  	}
   200  	return
   201  }
   202  
   203  func (zm *ZoneMap) SetMax(v any) {
   204  	if types.IsNull(v) {
   205  		return
   206  	}
   207  	if !zm.inited {
   208  		zm.init(v)
   209  		return
   210  	}
   211  	if compute.CompareGeneric(v, zm.max, zm.typ) > 0 {
   212  		zm.max = v
   213  	}
   214  }
   215  
   216  func (zm *ZoneMap) GetMax() any {
   217  	return zm.max
   218  }
   219  
   220  func (zm *ZoneMap) SetMin(v any) {
   221  	if types.IsNull(v) {
   222  		return
   223  	}
   224  	if !zm.inited {
   225  		zm.init(v)
   226  		return
   227  	}
   228  	if compute.CompareGeneric(v, zm.min, zm.typ) < 0 {
   229  		zm.min = v
   230  	}
   231  }
   232  
   233  func (zm *ZoneMap) GetMin() any {
   234  	return zm.min
   235  }
   236  
   237  // func (zm *ZoneMap) Print() string {
   238  // 	// default int32
   239  // 	s := "<ZM>\n["
   240  // 	s += strconv.Itoa(int(zm.min.(int32)))
   241  // 	s += ","
   242  // 	s += strconv.Itoa(int(zm.max.(int32)))
   243  // 	s += "]\n"
   244  // 	s += "</ZM>"
   245  // 	return s
   246  // }
   247  
   248  func (zm *ZoneMap) Marshal() (buf []byte, err error) {
   249  	buf = make([]byte, 64)
   250  	if !zm.inited {
   251  		return
   252  	}
   253  	buf[31] |= constZMInited
   254  	switch zm.typ.Oid {
   255  	case types.T_char, types.T_varchar, types.T_json, types.T_blob, types.T_text:
   256  		minv, maxv := zm.min.([]byte), zm.max.([]byte)
   257  		// write 31-byte prefix of minv
   258  		copy(buf[0:31], minv)
   259  		minLen := uint8(31)
   260  		if len(minv) < 31 {
   261  			minLen = uint8(len(minv))
   262  		}
   263  		buf[31] |= minLen
   264  
   265  		// write 32-byte prefix of maxv
   266  		copy(buf[32:64], maxv)
   267  		// no truncation, get a bigger value by filling tail zeros
   268  		if len(maxv) > 32 && !is32BytesMax(buf[32:64]) {
   269  			// truncation happens, get a bigger one by plus one
   270  			for i := 63; i >= 32; i-- {
   271  				buf[i] += 1
   272  				if buf[i] != 0 {
   273  					break
   274  				}
   275  			}
   276  		}
   277  	default:
   278  		minv := types.EncodeValue(zm.min, zm.typ)
   279  		maxv := types.EncodeValue(zm.max, zm.typ)
   280  		if len(maxv) > 32 || len(minv) > 32 {
   281  			panic("zonemap: large fixed length type, check again")
   282  		}
   283  		copy(buf[0:], minv)
   284  		copy(buf[32:], maxv)
   285  	}
   286  	return
   287  }
   288  
   289  func (zm *ZoneMap) Unmarshal(buf []byte) error {
   290  	init := buf[31] & constZMInited
   291  	if init == 0 {
   292  		zm.inited = false
   293  		return nil
   294  	}
   295  	zm.inited = true
   296  	switch zm.typ.Oid {
   297  	case types.T_bool:
   298  		zm.min = types.DecodeFixed[bool](buf[:1])
   299  		buf = buf[32:]
   300  		zm.max = types.DecodeFixed[bool](buf[:1])
   301  		return nil
   302  	case types.T_int8:
   303  		zm.min = types.DecodeFixed[int8](buf[:1])
   304  		buf = buf[32:]
   305  		zm.max = types.DecodeFixed[int8](buf[:1])
   306  		return nil
   307  	case types.T_int16:
   308  		zm.min = types.DecodeFixed[int16](buf[:2])
   309  		buf = buf[32:]
   310  		zm.max = types.DecodeFixed[int16](buf[:2])
   311  		return nil
   312  	case types.T_int32:
   313  		zm.min = types.DecodeFixed[int32](buf[:4])
   314  		buf = buf[32:]
   315  		zm.max = types.DecodeFixed[int32](buf[:4])
   316  		return nil
   317  	case types.T_int64:
   318  		zm.min = types.DecodeFixed[int64](buf[:8])
   319  		buf = buf[32:]
   320  		zm.max = types.DecodeFixed[int64](buf[:8])
   321  		return nil
   322  	case types.T_uint8:
   323  		zm.min = types.DecodeFixed[uint8](buf[:1])
   324  		buf = buf[32:]
   325  		zm.max = types.DecodeFixed[uint8](buf[:1])
   326  		return nil
   327  	case types.T_uint16:
   328  		zm.min = types.DecodeFixed[uint16](buf[:2])
   329  		buf = buf[32:]
   330  		zm.max = types.DecodeFixed[uint16](buf[:2])
   331  		return nil
   332  	case types.T_uint32:
   333  		zm.min = types.DecodeFixed[uint32](buf[:4])
   334  		buf = buf[32:]
   335  		zm.max = types.DecodeFixed[uint32](buf[:4])
   336  		return nil
   337  	case types.T_uint64:
   338  		zm.min = types.DecodeFixed[uint64](buf[:8])
   339  		buf = buf[32:]
   340  		zm.max = types.DecodeFixed[uint64](buf[:8])
   341  		return nil
   342  	case types.T_float32:
   343  		zm.min = types.DecodeFixed[float32](buf[:4])
   344  		buf = buf[32:]
   345  		zm.max = types.DecodeFixed[float32](buf[:4])
   346  		return nil
   347  	case types.T_float64:
   348  		zm.min = types.DecodeFixed[float64](buf[:8])
   349  		buf = buf[32:]
   350  		zm.max = types.DecodeFixed[float64](buf[:8])
   351  		return nil
   352  	case types.T_date:
   353  		zm.min = types.DecodeFixed[types.Date](buf[:4])
   354  		buf = buf[32:]
   355  		zm.max = types.DecodeFixed[types.Date](buf[:4])
   356  		return nil
   357  	case types.T_time:
   358  		zm.min = types.DecodeFixed[types.Time](buf[:8])
   359  		buf = buf[32:]
   360  		zm.max = types.DecodeFixed[types.Time](buf[:8])
   361  		return nil
   362  	case types.T_datetime:
   363  		zm.min = types.DecodeFixed[types.Datetime](buf[:8])
   364  		buf = buf[32:]
   365  		zm.max = types.DecodeFixed[types.Datetime](buf[:8])
   366  		return nil
   367  	case types.T_timestamp:
   368  		zm.min = types.DecodeFixed[types.Timestamp](buf[:8])
   369  		buf = buf[32:]
   370  		zm.max = types.DecodeFixed[types.Timestamp](buf[:8])
   371  		return nil
   372  	case types.T_decimal64:
   373  		zm.min = types.DecodeFixed[types.Decimal64](buf[:8])
   374  		buf = buf[32:]
   375  		zm.max = types.DecodeFixed[types.Decimal64](buf[:8])
   376  		return nil
   377  	case types.T_decimal128:
   378  		zm.min = types.DecodeFixed[types.Decimal128](buf[:16])
   379  		buf = buf[32:]
   380  		zm.max = types.DecodeFixed[types.Decimal128](buf[:16])
   381  		return nil
   382  	case types.T_uuid:
   383  		zm.min = types.DecodeFixed[types.Uuid](buf[:16])
   384  		buf = buf[32:]
   385  		zm.max = types.DecodeFixed[types.Uuid](buf[:16])
   386  		return nil
   387  	case types.T_TS:
   388  		zm.min = buf[:types.TxnTsSize]
   389  		buf = buf[32:]
   390  		zm.max = buf[:types.TxnTsSize]
   391  		return nil
   392  	case types.T_Rowid:
   393  		zm.min = buf[:types.RowidSize]
   394  		buf = buf[32:]
   395  		zm.max = buf[:types.RowidSize]
   396  		return nil
   397  	case types.T_char, types.T_varchar, types.T_json, types.T_blob, types.T_text:
   398  		minBuf := make([]byte, buf[31]&0x7f)
   399  		copy(minBuf, buf[0:32])
   400  		maxBuf := make([]byte, 32)
   401  		copy(maxBuf, buf[32:64])
   402  		zm.min = minBuf
   403  		zm.max = maxBuf
   404  
   405  		zm.isInf = is32BytesMax(maxBuf)
   406  		return nil
   407  
   408  	default:
   409  		panic("unsupported type")
   410  	}
   411  }
   412  
   413  /*func (zm *ZoneMap) Unmarshal(min, max []byte) error {
   414  	init := min[31] & constZMInited
   415  	if init == 0 {
   416  		zm.inited = false
   417  		return nil
   418  	}
   419  	zm.inited = true
   420  	switch zm.typ.Oid {
   421  	case types.T_bool:
   422  		zm.min = types.DecodeFixed[bool](min[:1])
   423  		zm.max = types.DecodeFixed[bool](max[:1])
   424  		return nil
   425  	case types.T_int8:
   426  		zm.min = types.DecodeFixed[int8](min[:1])
   427  		zm.max = types.DecodeFixed[int8](max[:1])
   428  		return nil
   429  	case types.T_int16:
   430  		zm.min = types.DecodeFixed[int16](min[:2])
   431  		zm.max = types.DecodeFixed[int16](max[:2])
   432  		return nil
   433  	case types.T_int32:
   434  		zm.min = types.DecodeFixed[int32](min[:4])
   435  		zm.max = types.DecodeFixed[int32](max[:4])
   436  		return nil
   437  	case types.T_int64:
   438  		zm.min = types.DecodeFixed[int64](min[:8])
   439  		zm.max = types.DecodeFixed[int64](max[:8])
   440  		return nil
   441  	case types.T_uint8:
   442  		zm.min = types.DecodeFixed[uint8](min[:1])
   443  		zm.max = types.DecodeFixed[uint8](max[:1])
   444  		return nil
   445  	case types.T_uint16:
   446  		zm.min = types.DecodeFixed[uint16](min[:2])
   447  		zm.max = types.DecodeFixed[uint16](max[:2])
   448  		return nil
   449  	case types.T_uint32:
   450  		zm.min = types.DecodeFixed[uint32](min[:4])
   451  		//buf = buf[32:]
   452  		zm.max = types.DecodeFixed[uint32](max[:4])
   453  		return nil
   454  	case types.T_uint64:
   455  		zm.min = types.DecodeFixed[uint64](min[:8])
   456  		zm.max = types.DecodeFixed[uint64](max[:8])
   457  		return nil
   458  	case types.T_float32:
   459  		zm.min = types.DecodeFixed[float32](min[:4])
   460  		zm.max = types.DecodeFixed[float32](max[:4])
   461  		return nil
   462  	case types.T_float64:
   463  		zm.min = types.DecodeFixed[float64](min[:8])
   464  		zm.max = types.DecodeFixed[float64](max[:8])
   465  		return nil
   466  	case types.T_date:
   467  		zm.min = types.DecodeFixed[types.Date](min[:4])
   468  		zm.max = types.DecodeFixed[types.Date](max[:4])
   469  		return nil
   470  	case types.T_datetime:
   471  		zm.min = types.DecodeFixed[types.Datetime](min[:8])
   472  		zm.max = types.DecodeFixed[types.Datetime](max[:8])
   473  		return nil
   474  	case types.T_timestamp:
   475  		zm.min = types.DecodeFixed[types.Timestamp](min[:8])
   476  		zm.max = types.DecodeFixed[types.Timestamp](max[:8])
   477  		return nil
   478  	case types.T_decimal64:
   479  		zm.min = types.DecodeFixed[types.Decimal64](min[:8])
   480  		zm.max = types.DecodeFixed[types.Decimal64](max[:8])
   481  		return nil
   482  	case types.T_decimal128:
   483  		zm.min = types.DecodeFixed[types.Decimal128](min[:16])
   484  		zm.max = types.DecodeFixed[types.Decimal128](max[:16])
   485  		return nil
   486  	case types.T_uuid:
   487  		zm.min = types.DecodeFixed[types.Uuid](min[:16])
   488  		zm.max = types.DecodeFixed[types.Uuid](max[:16])
   489  		return nil
   490  	case types.T_TS:
   491  		zm.min = min[:types.TxnTsSize]
   492  		zm.max = max[:types.TxnTsSize]
   493  		return nil
   494  	case types.T_Rowid:
   495  		zm.min = min[:types.RowidSize]
   496  		zm.max = max[:types.RowidSize]
   497  		return nil
   498  	case types.T_char, types.T_varchar, types.T_json, types.T_blob:
   499  		minBuf := make([]byte, min[31]&0x7f)
   500  		copy(minBuf, min)
   501  		maxBuf := make([]byte, 32)
   502  		copy(maxBuf, max)
   503  		zm.min = minBuf
   504  		zm.max = maxBuf
   505  
   506  		zm.isInf = is32BytesMax(maxBuf)
   507  		return nil
   508  
   509  	default:
   510  		panic("unsupported type")
   511  	}
   512  }
   513  */
   514  
   515  func (zm *ZoneMap) GetMemoryUsage() uint64 {
   516  	return 64
   517  }