github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/prolly/tree/z_encoding.go

github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/prolly/tree/z_encoding.go (about)

     1  // Copyright 2023 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package tree
    16  
    17  import (
    18  	"encoding/binary"
    19  	"math"
    20  	"math/bits"
    21  
    22  	"github.com/dolthub/go-mysql-server/sql/expression/function/spatial"
    23  	"github.com/dolthub/go-mysql-server/sql/types"
    24  
    25  	"github.com/dolthub/dolt/go/store/val"
    26  )
    27  
    28  // LexFloat maps the float64 into an uint64 representation in lexicographical order
    29  // For negative floats, we flip all the bits
    30  // For non-negative floats, we flip the signed bit
    31  func LexFloat(f float64) uint64 {
    32  	b := math.Float64bits(f)
    33  	if b>>63 == 1 {
    34  		return ^b
    35  	}
    36  	return b ^ (1 << 63)
    37  }
    38  
    39  // UnLexFloat maps the lexicographic uint64 representation of a float64 back into a float64
    40  // For negative int64s, we flip all the bits
    41  // For non-negative int64s, we flip the signed bit
    42  func UnLexFloat(b uint64) float64 {
    43  	if b>>63 == 1 {
    44  		b = b ^ (1 << 63)
    45  	} else {
    46  		b = ^b
    47  	}
    48  	return math.Float64frombits(b)
    49  }
    50  
    51  // InterleaveUInt64 interleaves the bits of the uint64s x and y.
    52  // The first 32 bits of x and y must be 0.
    53  // Example:
    54  // 0000 0000 0000 0000 0000 0000 0000 0000 abcd efgh ijkl mnop abcd efgh ijkl mnop
    55  // 0000 0000 0000 0000 abcd efgh ijkl mnop 0000 0000 0000 0000 abcd efgh ijkl mnop
    56  // 0000 0000 abcd efgh 0000 0000 ijkl mnop 0000 0000 abcd efgh 0000 0000 ijkl mnop
    57  // 0000 abcd 0000 efgh 0000 ijkl 0000 mnop 0000 abcd 0000 efgh 0000 ijkl 0000 mnop
    58  // 00ab 00cd 00ef 00gh 00ij 00kl 00mn 00op 00ab 00cd 00ef 00gh 00ij 00kl 00mn 00op
    59  // 0a0b 0c0d 0e0f 0g0h 0i0j 0k0l 0m0n 0o0p 0a0b 0c0d 0e0f 0g0h 0i0j 0k0l 0m0n 0o0p
    60  // Alternatively, just precompute all the results from 0 to 0x0000FFFFF
    61  func InterleaveUInt64(x, y uint64) uint64 {
    62  	x = (x | (x << 16)) & 0x0000FFFF0000FFFF
    63  	y = (y | (y << 16)) & 0x0000FFFF0000FFFF
    64  
    65  	x = (x | (x << 8)) & 0x00FF00FF00FF00FF
    66  	y = (y | (y << 8)) & 0x00FF00FF00FF00FF
    67  
    68  	x = (x | (x << 4)) & 0x0F0F0F0F0F0F0F0F
    69  	y = (y | (y << 4)) & 0x0F0F0F0F0F0F0F0F
    70  
    71  	x = (x | (x << 2)) & 0x3333333333333333
    72  	y = (y | (y << 2)) & 0x3333333333333333
    73  
    74  	x = (x | (x << 1)) & 0x5555555555555555
    75  	y = (y | (y << 1)) & 0x5555555555555555
    76  
    77  	return x | (y << 1)
    78  }
    79  
    80  // UnInterleaveUint64 splits up the bits of the uint64 z into two uint64s
    81  // The first 32 bits of x and y must be 0.
    82  // Example:
    83  // abcd efgh ijkl mnop abcd efgh ijkl mnop abcd efgh ijkl mnop abcd efgh ijkl mnop 0x5555555555555555
    84  // 0b0d 0f0h 0j0l 0n0p 0b0d 0f0h 0j0l 0n0p 0b0d 0f0h 0j0l 0n0p 0b0d 0f0h 0j0l 0n0p x | x >> 1
    85  // 0bbd dffh hjjl lnnp pbbd dffh hjjl lnnp pbbd dffh hjjl lnnp pnbd dffh hjjl lnnp 0x3333333333333333
    86  // 00bd 00fh 00jl 00np 00bd 00fh 00jl 00np 00bd 00fh 00jl 00np 00bd 00fh 00jl 00np x | x >> 2
    87  // 0000 bdfh fhjl jlnp npbd bdfh fhjl jlnp npdb bdfh fhjl jlnp npdb bdfh fhjl jlnp 0x0F0F0F0F0F0F0F0F
    88  // 0000 bdfh 0000 jlnp 0000 bdfh 0000 jlnp 0000 bdfh 0000 jlnp 0000 bdfh 0000 jlnp x | x >> 4
    89  // 0000 bdfh bdfh jlnp jlnp bdfh bdfh jlnp jlnp bdfh bdfh jlnp jlnp bdfh bdfh jlnp 0x00FF00FF00FF00FF
    90  // 0000 0000 bdfh jlnp 0000 0000 bdfh jlnp 0000 0000 bdfh jlnp 0000 0000 bdfh jlnp x | x >> 8
    91  // 0000 0000 0000 0000 bdfh jlnp bdfh jlnp bdfh jlnp bdfh jlnp bdfh jlnp bdfh jlnp 0x0000FFFF0000FFFF
    92  // 0000 0000 0000 0000 bdfh jlnp bdfh jlnp 0000 0000 0000 0000 bdfh jlnp bdfh jlnp x | x >> 16
    93  // 0000 0000 0000 0000 bdfh jlnp bdfh jlnp bdfh jlnp bdfh jlnp bdfh jlnp bdfh jlnp 0x00000000FFFFFFFF
    94  // 0000 0000 0000 0000 0000 0000 0000 0000 bdfh jlnp bdfh jlnp bdfh jlnp bdfh jlnp
    95  func UnInterleaveUint64(z uint64) (x, y uint64) {
    96  	x, y = z, z>>1
    97  
    98  	x &= 0x5555555555555555
    99  	x |= x >> 1
   100  	y &= 0x5555555555555555
   101  	y |= y >> 1
   102  
   103  	x &= 0x3333333333333333
   104  	x |= x >> 2
   105  	y &= 0x3333333333333333
   106  	y |= y >> 2
   107  
   108  	x &= 0x0F0F0F0F0F0F0F0F
   109  	x |= x >> 4
   110  	y &= 0x0F0F0F0F0F0F0F0F
   111  	y |= y >> 4
   112  
   113  	x &= 0x00FF00FF00FF00FF
   114  	x |= x >> 8
   115  	y &= 0x00FF00FF00FF00FF
   116  	y |= y >> 8
   117  
   118  	x &= 0x0000FFFF0000FFFF
   119  	x |= x >> 16
   120  	y &= 0x0000FFFF0000FFFF
   121  	y |= y >> 16
   122  
   123  	x &= 0xFFFFFFFF
   124  	y &= 0xFFFFFFFF
   125  	return
   126  }
   127  
   128  // ZVal consists of uint64 x and y with bits their interleaved
   129  // ZVal[0] contains the upper 64 bits of x and y interleaved
   130  // ZVal[1] contains the lower 64 bits of x and y interleaved
   131  type ZVal = [2]uint64
   132  
   133  // ZValue takes a Point, Lexes the x and y values, and interleaves the bits into a [2]uint64
   134  // It will put the bits in this order: x_0, y_0, x_1, y_1 ... x_63, Y_63
   135  func ZValue(p types.Point) (z ZVal) {
   136  	xLex, yLex := LexFloat(p.X), LexFloat(p.Y)
   137  	z[0], z[1] = InterleaveUInt64(xLex>>32, yLex>>32), InterleaveUInt64(xLex&0xFFFFFFFF, yLex&0xFFFFFFFF)
   138  	return
   139  }
   140  
   141  // UnZValue takes a ZVal and converts it back to a sql.Point
   142  func UnZValue(z [2]uint64) types.Point {
   143  	xl, yl := UnInterleaveUint64(z[0])
   144  	xr, yr := UnInterleaveUint64(z[1])
   145  	xf := UnLexFloat((xl << 32) | xr)
   146  	yf := UnLexFloat((yl << 32) | yr)
   147  	return types.Point{X: xf, Y: yf}
   148  }
   149  
   150  // ZMask masks in pairs by shifting based off of level (shift amount)
   151  func ZMask(level byte, zVal ZVal) val.Cell {
   152  	cell := val.Cell{}
   153  	cell[0] = level
   154  	if level < 32 {
   155  		shamt := level << 1
   156  		binary.BigEndian.PutUint64(cell[1:], zVal[0])
   157  		binary.BigEndian.PutUint64(cell[9:], (zVal[1]>>shamt)<<shamt)
   158  	} else {
   159  		shamt := (level - 32) << 1
   160  		binary.BigEndian.PutUint64(cell[1:], (zVal[0]>>shamt)<<shamt)
   161  	}
   162  	return cell
   163  }
   164  
   165  // ZCell converts the GeometryValue into a Cell
   166  // Note: there is an inefficiency here where small polygons may be placed into a level that's significantly larger
   167  func ZCell(v types.GeometryValue) val.Cell {
   168  	bbox := spatial.FindBBox(v)
   169  	zMin := ZValue(types.Point{X: bbox[0], Y: bbox[1]})
   170  	zMax := ZValue(types.Point{X: bbox[2], Y: bbox[3]})
   171  
   172  	// Level rounds up by adding 1 and dividing by two (same as a left shift by 1)
   173  	var level byte
   174  	if zMin[0] != zMax[0] {
   175  		level = byte((bits.Len64(zMin[0]^zMax[0])+1)>>1) + 32
   176  	} else {
   177  		level = byte((bits.Len64(zMin[1]^zMax[1]) + 1) >> 1)
   178  	}
   179  	return ZMask(level, zMin)
   180  }
   181  
   182  // ZRange is a pair of two ZVals
   183  // ZRange[0] is the lower bound (z-min)
   184  // ZRange[1] is the upper bound (z-max)
   185  type ZRange = [2]ZVal
   186  
   187  // mergeZRanges combines the z-ranges in acc with zRange by either
   188  // 1. combining the last ZRange in acc with zRange if the ranges are next to each other or
   189  // 2. appending zRange to acc
   190  func mergeZRanges(acc []ZRange, zRange ZRange) []ZRange {
   191  	n := len(acc) - 1
   192  	if n >= 0 && acc[n][1][0] == zRange[0][0] && zRange[0][1]-acc[n][1][1] == 1 {
   193  		acc[n][1] = zRange[1]
   194  		return acc
   195  	}
   196  	return append(acc, zRange)
   197  }
   198  
   199  // zRangeSize retrieves the approximate size of the zRange
   200  // it only takes the top 64 bits of the difference
   201  // it accepts and returns a shift-amount so that comparison between two zRangeSizes are consistent
   202  func zRangeSize(zRange ZRange, shamt int) (uint64, int) {
   203  	zVal := ZVal{}
   204  	zVal[0] = zRange[1][0] - zRange[0][0]
   205  	if zRange[1][1] < zRange[0][1] {
   206  		zVal[0] -= 1
   207  		zVal[1] = ^zRange[1][1] - zRange[0][1]
   208  	} else {
   209  		zVal[1] = zRange[1][1] - zRange[0][1]
   210  	}
   211  	if shamt == -1 {
   212  		shamt = bits.LeadingZeros64(zVal[0])
   213  	}
   214  	zVal[0] = zVal[0] << shamt
   215  	zVal[1] = zVal[1] >> (64 - shamt)
   216  	return zVal[0] | zVal[1], shamt
   217  }
   218  
   219  // Thresholds to stop splitting ZRanges
   220  const cutThresh = 0.02
   221  const depthThresh = 4
   222  
   223  // Masks for every other bit to avoid un-interleaving
   224  // Depending on prefixLength these will be shifted to either fill x or y values with 0s or 1s
   225  // while not altering the bits of their counterparts
   226  const xMask = 0x5555555555555555
   227  const yMask = 0xAAAAAAAAAAAAAAAA
   228  
   229  // shouldCut checks if the size of the removed ZRange divided by the size of the whole ZRange is smaller than cutThresh
   230  // This is used to get splitZRanges to stop recursing
   231  func shouldCut(cutRange ZRange, size float64, shamt int) bool {
   232  	cut, _ := zRangeSize(cutRange, shamt)
   233  	return (float64(cut) / size) >= cutThresh
   234  }
   235  
   236  // isContinuous checks if the provided zRange is entirely within the bounding box
   237  func isContinuous(zl, zh uint64, prefixLength int) bool {
   238  	mask := uint64(math.MaxUint64 >> prefixLength)
   239  	return (zl&mask) == 0 && (zh&mask) == mask
   240  }
   241  
   242  // splitZRanges is a helper function to SplitZRanges
   243  func splitZRanges(zRange ZRange, zSize float64, zShamt, depth int, acc []ZRange) []ZRange {
   244  	// prevent too much splitting and point lookup is continuous
   245  	if depth == 0 || zRange[0] == zRange[1] {
   246  		return mergeZRanges(acc, zRange)
   247  	}
   248  
   249  	zl, zh := zRange[0], zRange[1]
   250  	zRangeL, zRangeR := zRange, zRange
   251  	if zl[0] != zh[0] {
   252  		prefixLength := bits.LeadingZeros64(zl[0] ^ zh[0])
   253  		if zl[1] == 0 && zh[1] == math.MaxUint64 && isContinuous(zl[0], zh[0], prefixLength) {
   254  			return mergeZRanges(acc, zRange)
   255  		}
   256  
   257  		// upper bound for left range; set 0 fill with 1s
   258  		suffixLength := 64 - prefixLength
   259  		zRangeL[1][0] |= yMask >> prefixLength       // set suffix to all 1s
   260  		zRangeL[1][0] &= ^(1 << (suffixLength - 1))  // set first suffix bit to 0
   261  		zRangeL[1][1] |= yMask >> (prefixLength % 2) // set suffix to all 1s
   262  
   263  		// lower bound for right range; set 1 fill with 0s
   264  		suffixMask := uint64(math.MaxUint64<<suffixLength) | (xMask >> prefixLength)
   265  		zRangeR[0][0] &= suffixMask                  // set suffix to all 0s
   266  		zRangeR[0][0] |= 1 << (suffixLength - 1)     // set first suffix bit to 1
   267  		zRangeR[0][1] &= xMask << (prefixLength % 2) // set suffix to all 0s
   268  	} else {
   269  		prefixLength := bits.LeadingZeros64(zl[1] ^ zh[1])
   270  		if isContinuous(zl[1], zh[1], prefixLength) {
   271  			return mergeZRanges(acc, zRange)
   272  		}
   273  
   274  		// upper bound for left range; set 0 fill with 1s
   275  		suffixLength := 64 - prefixLength
   276  		zRangeL[1][1] |= yMask >> prefixLength      // set suffix to all 1s
   277  		zRangeL[1][1] &= ^(1 << (suffixLength - 1)) // set at prefix to 0
   278  
   279  		// lower bound for right range; set 1 fill with 0s
   280  		suffixMask := uint64(math.MaxUint64<<suffixLength) | (xMask >> prefixLength)
   281  		zRangeR[0][1] &= suffixMask              // set suffix to all 0s
   282  		zRangeR[0][1] |= 1 << (suffixLength - 1) // set at prefix to 1
   283  	}
   284  
   285  	if !shouldCut(ZRange{zRangeL[1], zRangeR[0]}, zSize, zShamt) {
   286  		return mergeZRanges(acc, zRange)
   287  	}
   288  
   289  	// recurse on left and right ranges
   290  	acc = splitZRanges(zRangeL, zSize, zShamt, depth-1, acc)
   291  	acc = splitZRanges(zRangeR, zSize, zShamt, depth-1, acc)
   292  
   293  	return acc
   294  }
   295  
   296  // SplitZRanges takes a ZRange and splits it into continuous ZRanges within the bounding box
   297  // A ZRange is continuous if
   298  // 1. it is a point (the lower and upper bounds are equal)
   299  // 2. the ranges are within a cell (the suffixes of the bounds range from 00...0 to 11...1)
   300  func SplitZRanges(zRange ZRange) []ZRange {
   301  	zSize, zShamt := zRangeSize(zRange, -1)
   302  	return splitZRanges(zRange, float64(zSize), zShamt, depthThresh, make([]ZRange, 0, 128))
   303  }