github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/prolly/tree/z_encoding.go (about) 1 // Copyright 2023 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package tree 16 17 import ( 18 "encoding/binary" 19 "math" 20 "math/bits" 21 22 "github.com/dolthub/go-mysql-server/sql/expression/function/spatial" 23 "github.com/dolthub/go-mysql-server/sql/types" 24 25 "github.com/dolthub/dolt/go/store/val" 26 ) 27 28 // LexFloat maps the float64 into an uint64 representation in lexicographical order 29 // For negative floats, we flip all the bits 30 // For non-negative floats, we flip the signed bit 31 func LexFloat(f float64) uint64 { 32 b := math.Float64bits(f) 33 if b>>63 == 1 { 34 return ^b 35 } 36 return b ^ (1 << 63) 37 } 38 39 // UnLexFloat maps the lexicographic uint64 representation of a float64 back into a float64 40 // For negative int64s, we flip all the bits 41 // For non-negative int64s, we flip the signed bit 42 func UnLexFloat(b uint64) float64 { 43 if b>>63 == 1 { 44 b = b ^ (1 << 63) 45 } else { 46 b = ^b 47 } 48 return math.Float64frombits(b) 49 } 50 51 // InterleaveUInt64 interleaves the bits of the uint64s x and y. 52 // The first 32 bits of x and y must be 0. 53 // Example: 54 // 0000 0000 0000 0000 0000 0000 0000 0000 abcd efgh ijkl mnop abcd efgh ijkl mnop 55 // 0000 0000 0000 0000 abcd efgh ijkl mnop 0000 0000 0000 0000 abcd efgh ijkl mnop 56 // 0000 0000 abcd efgh 0000 0000 ijkl mnop 0000 0000 abcd efgh 0000 0000 ijkl mnop 57 // 0000 abcd 0000 efgh 0000 ijkl 0000 mnop 0000 abcd 0000 efgh 0000 ijkl 0000 mnop 58 // 00ab 00cd 00ef 00gh 00ij 00kl 00mn 00op 00ab 00cd 00ef 00gh 00ij 00kl 00mn 00op 59 // 0a0b 0c0d 0e0f 0g0h 0i0j 0k0l 0m0n 0o0p 0a0b 0c0d 0e0f 0g0h 0i0j 0k0l 0m0n 0o0p 60 // Alternatively, just precompute all the results from 0 to 0x0000FFFFF 61 func InterleaveUInt64(x, y uint64) uint64 { 62 x = (x | (x << 16)) & 0x0000FFFF0000FFFF 63 y = (y | (y << 16)) & 0x0000FFFF0000FFFF 64 65 x = (x | (x << 8)) & 0x00FF00FF00FF00FF 66 y = (y | (y << 8)) & 0x00FF00FF00FF00FF 67 68 x = (x | (x << 4)) & 0x0F0F0F0F0F0F0F0F 69 y = (y | (y << 4)) & 0x0F0F0F0F0F0F0F0F 70 71 x = (x | (x << 2)) & 0x3333333333333333 72 y = (y | (y << 2)) & 0x3333333333333333 73 74 x = (x | (x << 1)) & 0x5555555555555555 75 y = (y | (y << 1)) & 0x5555555555555555 76 77 return x | (y << 1) 78 } 79 80 // UnInterleaveUint64 splits up the bits of the uint64 z into two uint64s 81 // The first 32 bits of x and y must be 0. 82 // Example: 83 // abcd efgh ijkl mnop abcd efgh ijkl mnop abcd efgh ijkl mnop abcd efgh ijkl mnop 0x5555555555555555 84 // 0b0d 0f0h 0j0l 0n0p 0b0d 0f0h 0j0l 0n0p 0b0d 0f0h 0j0l 0n0p 0b0d 0f0h 0j0l 0n0p x | x >> 1 85 // 0bbd dffh hjjl lnnp pbbd dffh hjjl lnnp pbbd dffh hjjl lnnp pnbd dffh hjjl lnnp 0x3333333333333333 86 // 00bd 00fh 00jl 00np 00bd 00fh 00jl 00np 00bd 00fh 00jl 00np 00bd 00fh 00jl 00np x | x >> 2 87 // 0000 bdfh fhjl jlnp npbd bdfh fhjl jlnp npdb bdfh fhjl jlnp npdb bdfh fhjl jlnp 0x0F0F0F0F0F0F0F0F 88 // 0000 bdfh 0000 jlnp 0000 bdfh 0000 jlnp 0000 bdfh 0000 jlnp 0000 bdfh 0000 jlnp x | x >> 4 89 // 0000 bdfh bdfh jlnp jlnp bdfh bdfh jlnp jlnp bdfh bdfh jlnp jlnp bdfh bdfh jlnp 0x00FF00FF00FF00FF 90 // 0000 0000 bdfh jlnp 0000 0000 bdfh jlnp 0000 0000 bdfh jlnp 0000 0000 bdfh jlnp x | x >> 8 91 // 0000 0000 0000 0000 bdfh jlnp bdfh jlnp bdfh jlnp bdfh jlnp bdfh jlnp bdfh jlnp 0x0000FFFF0000FFFF 92 // 0000 0000 0000 0000 bdfh jlnp bdfh jlnp 0000 0000 0000 0000 bdfh jlnp bdfh jlnp x | x >> 16 93 // 0000 0000 0000 0000 bdfh jlnp bdfh jlnp bdfh jlnp bdfh jlnp bdfh jlnp bdfh jlnp 0x00000000FFFFFFFF 94 // 0000 0000 0000 0000 0000 0000 0000 0000 bdfh jlnp bdfh jlnp bdfh jlnp bdfh jlnp 95 func UnInterleaveUint64(z uint64) (x, y uint64) { 96 x, y = z, z>>1 97 98 x &= 0x5555555555555555 99 x |= x >> 1 100 y &= 0x5555555555555555 101 y |= y >> 1 102 103 x &= 0x3333333333333333 104 x |= x >> 2 105 y &= 0x3333333333333333 106 y |= y >> 2 107 108 x &= 0x0F0F0F0F0F0F0F0F 109 x |= x >> 4 110 y &= 0x0F0F0F0F0F0F0F0F 111 y |= y >> 4 112 113 x &= 0x00FF00FF00FF00FF 114 x |= x >> 8 115 y &= 0x00FF00FF00FF00FF 116 y |= y >> 8 117 118 x &= 0x0000FFFF0000FFFF 119 x |= x >> 16 120 y &= 0x0000FFFF0000FFFF 121 y |= y >> 16 122 123 x &= 0xFFFFFFFF 124 y &= 0xFFFFFFFF 125 return 126 } 127 128 // ZVal consists of uint64 x and y with bits their interleaved 129 // ZVal[0] contains the upper 64 bits of x and y interleaved 130 // ZVal[1] contains the lower 64 bits of x and y interleaved 131 type ZVal = [2]uint64 132 133 // ZValue takes a Point, Lexes the x and y values, and interleaves the bits into a [2]uint64 134 // It will put the bits in this order: x_0, y_0, x_1, y_1 ... x_63, Y_63 135 func ZValue(p types.Point) (z ZVal) { 136 xLex, yLex := LexFloat(p.X), LexFloat(p.Y) 137 z[0], z[1] = InterleaveUInt64(xLex>>32, yLex>>32), InterleaveUInt64(xLex&0xFFFFFFFF, yLex&0xFFFFFFFF) 138 return 139 } 140 141 // UnZValue takes a ZVal and converts it back to a sql.Point 142 func UnZValue(z [2]uint64) types.Point { 143 xl, yl := UnInterleaveUint64(z[0]) 144 xr, yr := UnInterleaveUint64(z[1]) 145 xf := UnLexFloat((xl << 32) | xr) 146 yf := UnLexFloat((yl << 32) | yr) 147 return types.Point{X: xf, Y: yf} 148 } 149 150 // ZMask masks in pairs by shifting based off of level (shift amount) 151 func ZMask(level byte, zVal ZVal) val.Cell { 152 cell := val.Cell{} 153 cell[0] = level 154 if level < 32 { 155 shamt := level << 1 156 binary.BigEndian.PutUint64(cell[1:], zVal[0]) 157 binary.BigEndian.PutUint64(cell[9:], (zVal[1]>>shamt)<<shamt) 158 } else { 159 shamt := (level - 32) << 1 160 binary.BigEndian.PutUint64(cell[1:], (zVal[0]>>shamt)<<shamt) 161 } 162 return cell 163 } 164 165 // ZCell converts the GeometryValue into a Cell 166 // Note: there is an inefficiency here where small polygons may be placed into a level that's significantly larger 167 func ZCell(v types.GeometryValue) val.Cell { 168 bbox := spatial.FindBBox(v) 169 zMin := ZValue(types.Point{X: bbox[0], Y: bbox[1]}) 170 zMax := ZValue(types.Point{X: bbox[2], Y: bbox[3]}) 171 172 // Level rounds up by adding 1 and dividing by two (same as a left shift by 1) 173 var level byte 174 if zMin[0] != zMax[0] { 175 level = byte((bits.Len64(zMin[0]^zMax[0])+1)>>1) + 32 176 } else { 177 level = byte((bits.Len64(zMin[1]^zMax[1]) + 1) >> 1) 178 } 179 return ZMask(level, zMin) 180 } 181 182 // ZRange is a pair of two ZVals 183 // ZRange[0] is the lower bound (z-min) 184 // ZRange[1] is the upper bound (z-max) 185 type ZRange = [2]ZVal 186 187 // mergeZRanges combines the z-ranges in acc with zRange by either 188 // 1. combining the last ZRange in acc with zRange if the ranges are next to each other or 189 // 2. appending zRange to acc 190 func mergeZRanges(acc []ZRange, zRange ZRange) []ZRange { 191 n := len(acc) - 1 192 if n >= 0 && acc[n][1][0] == zRange[0][0] && zRange[0][1]-acc[n][1][1] == 1 { 193 acc[n][1] = zRange[1] 194 return acc 195 } 196 return append(acc, zRange) 197 } 198 199 // zRangeSize retrieves the approximate size of the zRange 200 // it only takes the top 64 bits of the difference 201 // it accepts and returns a shift-amount so that comparison between two zRangeSizes are consistent 202 func zRangeSize(zRange ZRange, shamt int) (uint64, int) { 203 zVal := ZVal{} 204 zVal[0] = zRange[1][0] - zRange[0][0] 205 if zRange[1][1] < zRange[0][1] { 206 zVal[0] -= 1 207 zVal[1] = ^zRange[1][1] - zRange[0][1] 208 } else { 209 zVal[1] = zRange[1][1] - zRange[0][1] 210 } 211 if shamt == -1 { 212 shamt = bits.LeadingZeros64(zVal[0]) 213 } 214 zVal[0] = zVal[0] << shamt 215 zVal[1] = zVal[1] >> (64 - shamt) 216 return zVal[0] | zVal[1], shamt 217 } 218 219 // Thresholds to stop splitting ZRanges 220 const cutThresh = 0.02 221 const depthThresh = 4 222 223 // Masks for every other bit to avoid un-interleaving 224 // Depending on prefixLength these will be shifted to either fill x or y values with 0s or 1s 225 // while not altering the bits of their counterparts 226 const xMask = 0x5555555555555555 227 const yMask = 0xAAAAAAAAAAAAAAAA 228 229 // shouldCut checks if the size of the removed ZRange divided by the size of the whole ZRange is smaller than cutThresh 230 // This is used to get splitZRanges to stop recursing 231 func shouldCut(cutRange ZRange, size float64, shamt int) bool { 232 cut, _ := zRangeSize(cutRange, shamt) 233 return (float64(cut) / size) >= cutThresh 234 } 235 236 // isContinuous checks if the provided zRange is entirely within the bounding box 237 func isContinuous(zl, zh uint64, prefixLength int) bool { 238 mask := uint64(math.MaxUint64 >> prefixLength) 239 return (zl&mask) == 0 && (zh&mask) == mask 240 } 241 242 // splitZRanges is a helper function to SplitZRanges 243 func splitZRanges(zRange ZRange, zSize float64, zShamt, depth int, acc []ZRange) []ZRange { 244 // prevent too much splitting and point lookup is continuous 245 if depth == 0 || zRange[0] == zRange[1] { 246 return mergeZRanges(acc, zRange) 247 } 248 249 zl, zh := zRange[0], zRange[1] 250 zRangeL, zRangeR := zRange, zRange 251 if zl[0] != zh[0] { 252 prefixLength := bits.LeadingZeros64(zl[0] ^ zh[0]) 253 if zl[1] == 0 && zh[1] == math.MaxUint64 && isContinuous(zl[0], zh[0], prefixLength) { 254 return mergeZRanges(acc, zRange) 255 } 256 257 // upper bound for left range; set 0 fill with 1s 258 suffixLength := 64 - prefixLength 259 zRangeL[1][0] |= yMask >> prefixLength // set suffix to all 1s 260 zRangeL[1][0] &= ^(1 << (suffixLength - 1)) // set first suffix bit to 0 261 zRangeL[1][1] |= yMask >> (prefixLength % 2) // set suffix to all 1s 262 263 // lower bound for right range; set 1 fill with 0s 264 suffixMask := uint64(math.MaxUint64<<suffixLength) | (xMask >> prefixLength) 265 zRangeR[0][0] &= suffixMask // set suffix to all 0s 266 zRangeR[0][0] |= 1 << (suffixLength - 1) // set first suffix bit to 1 267 zRangeR[0][1] &= xMask << (prefixLength % 2) // set suffix to all 0s 268 } else { 269 prefixLength := bits.LeadingZeros64(zl[1] ^ zh[1]) 270 if isContinuous(zl[1], zh[1], prefixLength) { 271 return mergeZRanges(acc, zRange) 272 } 273 274 // upper bound for left range; set 0 fill with 1s 275 suffixLength := 64 - prefixLength 276 zRangeL[1][1] |= yMask >> prefixLength // set suffix to all 1s 277 zRangeL[1][1] &= ^(1 << (suffixLength - 1)) // set at prefix to 0 278 279 // lower bound for right range; set 1 fill with 0s 280 suffixMask := uint64(math.MaxUint64<<suffixLength) | (xMask >> prefixLength) 281 zRangeR[0][1] &= suffixMask // set suffix to all 0s 282 zRangeR[0][1] |= 1 << (suffixLength - 1) // set at prefix to 1 283 } 284 285 if !shouldCut(ZRange{zRangeL[1], zRangeR[0]}, zSize, zShamt) { 286 return mergeZRanges(acc, zRange) 287 } 288 289 // recurse on left and right ranges 290 acc = splitZRanges(zRangeL, zSize, zShamt, depth-1, acc) 291 acc = splitZRanges(zRangeR, zSize, zShamt, depth-1, acc) 292 293 return acc 294 } 295 296 // SplitZRanges takes a ZRange and splits it into continuous ZRanges within the bounding box 297 // A ZRange is continuous if 298 // 1. it is a point (the lower and upper bounds are equal) 299 // 2. the ranges are within a cell (the suffixes of the bounds range from 00...0 to 11...1) 300 func SplitZRanges(zRange ZRange) []ZRange { 301 zSize, zShamt := zRangeSize(zRange, -1) 302 return splitZRanges(zRange, float64(zSize), zShamt, depthThresh, make([]ZRange, 0, 128)) 303 }