github.com/matrixorigin/matrixone@v1.2.0/pkg/pb/statsinfo/shuffle.go (about) 1 // Copyright 2021 - 2024 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package statsinfo 16 17 import "math" 18 19 const DefaultEvalSize = 1024 20 21 type ShuffleList struct { 22 Size int64 23 Value float64 24 Next *ShuffleList 25 Tree *ShuffleHeap 26 } 27 28 func (t *ShuffleHeap) Merge(s *ShuffleHeap) *ShuffleHeap { 29 if t.Key > s.Key != t.Reverse { 30 if s.Right == nil { 31 s.Right = t 32 } else { 33 s.Right = t.Merge(s.Right) 34 } 35 if s.Left == nil || s.Left.Height < s.Right.Height { 36 tmp := s.Left 37 s.Left = s.Right 38 s.Right = tmp 39 } 40 s.Height = s.Left.Height + 1 41 return s 42 } else { 43 if t.Right == nil { 44 t.Right = s 45 } else { 46 t.Right = t.Right.Merge(s) 47 } 48 if t.Left == nil || t.Left.Height < t.Right.Height { 49 tmp := t.Left 50 t.Left = t.Right 51 t.Right = tmp 52 } 53 t.Height = t.Left.Height + 1 54 return t 55 } 56 } 57 58 func (t *ShuffleHeap) Pop() (*ShuffleHeap, *ShuffleHeap) { 59 if t.Left == nil { 60 return nil, t 61 } 62 if t.Right == nil { 63 return t.Left, t 64 } 65 return t.Left.Merge(t.Right), t 66 } 67 68 func (s *ShuffleRange) UpdateString(zmmin []byte, zmmax []byte, rowCount int64, nullCount int64) { 69 if len(zmmin) > 8 { 70 zmmin = zmmin[:8] 71 } 72 if len(zmmax) > 8 { 73 zmmax = zmmax[:8] 74 } 75 if s.Sz == 0 { 76 s.Sz = rowCount 77 s.Flags = make([]bool, 256) 78 s.Mins = make([][]byte, 0) 79 s.Maxs = make([][]byte, 0) 80 s.Mins = append(s.Mins, zmmin) 81 s.Maxs = append(s.Maxs, zmmax) 82 s.Rows = make([]int64, 0) 83 s.Rows = append(s.Rows, rowCount) 84 s.Nulls = make([]int64, 0) 85 s.Nulls = append(s.Nulls, nullCount) 86 } else { 87 s.Sz += rowCount 88 s.Mins = append(s.Mins, zmmin) 89 s.Maxs = append(s.Maxs, zmmax) 90 s.Rows = append(s.Rows, rowCount) 91 s.Nulls = append(s.Nulls, nullCount) 92 } 93 if s.MaxLen < int64(len(zmmin)) { 94 s.MaxLen = int64(len(zmmin)) 95 } 96 for _, c := range zmmin { 97 s.Flags[int(c)] = true 98 } 99 if s.MaxLen < int64(len(zmmax)) { 100 s.MaxLen = int64(len(zmmax)) 101 } 102 for _, c := range zmmax { 103 s.Flags[int(c)] = true 104 } 105 } 106 107 func (s *ShuffleRange) Update(zmmin float64, zmmax float64, rowCount int64, nullCount int64) { 108 s.Sz += rowCount 109 if s.Tree == nil { 110 s.Tree = &ShuffleHeap{ 111 Height: 1, 112 Key: zmmax, 113 Value: zmmin, 114 Sz: rowCount, 115 Nulls: nullCount, 116 } 117 s.Min = zmmin 118 s.Max = zmmax 119 } else { 120 s.Tree = s.Tree.Merge(&ShuffleHeap{ 121 Height: 1, 122 Key: zmmax, 123 Value: zmmin, 124 Sz: rowCount, 125 Nulls: nullCount, 126 }) 127 if s.Min > zmmin { 128 s.Min = zmmin 129 } 130 if s.Max < zmmax { 131 s.Max = zmmax 132 } 133 } 134 } 135 136 func (s *ShuffleRange) Eval() { 137 k := DefaultEvalSize 138 if s.Sz == 0 { 139 return 140 } 141 bytetoint := make(map[byte]int) 142 inttobyte := make([]byte, 0) 143 var lens float64 144 if s.IsStrType { 145 for i := 0; i < 256; i++ { 146 if s.Flags[i] { 147 bytetoint[byte(i)] = len(inttobyte) 148 inttobyte = append(inttobyte, byte(i)) 149 } 150 } 151 if len(inttobyte) == 0 { 152 return 153 } 154 lens = float64(len(inttobyte)) 155 for i := range s.Mins { 156 node := &ShuffleHeap{ 157 Height: 1, 158 Key: 0, 159 Value: 0, 160 Sz: s.Rows[i], 161 Nulls: s.Nulls[i], 162 } 163 for _, c := range s.Maxs[i] { 164 node.Key = node.Key*lens + float64(bytetoint[c]) 165 } 166 for j := int64(len(s.Maxs[i])); j < s.MaxLen; j++ { 167 node.Key = node.Key * lens 168 } 169 for _, c := range s.Mins[i] { 170 node.Value = node.Value*lens + float64(bytetoint[c]) 171 } 172 for j := int64(len(s.Mins[i])); j < s.MaxLen; j++ { 173 node.Value = node.Value * lens 174 } 175 if s.Tree == nil { 176 s.Tree = node 177 } else { 178 s.Tree = s.Tree.Merge(node) 179 } 180 } 181 } 182 var head *ShuffleList 183 var node *ShuffleHeap 184 var nulls int64 185 s.Result = make([]float64, k-1) 186 for s.Tree != nil { 187 s.Tree, node = s.Tree.Pop() 188 node.Left = nil 189 node.Right = nil 190 node.Height = 1 191 node.Sz -= node.Nulls 192 nulls += node.Nulls 193 node.Reverse = true 194 head = &ShuffleList{ 195 Next: head, 196 Tree: node, 197 Size: node.Sz, 198 Value: node.Value, 199 } 200 if head.Next != nil { 201 for head.Next != nil { 202 next := head.Next 203 if head.Tree.Value >= next.Tree.Key { 204 break 205 } 206 if head.Tree.Key != head.Value { 207 if head.Value <= next.Value { 208 s.Overlap += float64(head.Size) * float64(next.Size) * (next.Tree.Key - next.Value) / (head.Tree.Key - head.Value) 209 } else { 210 s.Overlap += float64(head.Size) * float64(next.Size) * (next.Tree.Key - head.Value) * (next.Tree.Key - head.Value) / (head.Tree.Key - head.Value) / (next.Tree.Key - next.Value) 211 head.Value = next.Value 212 } 213 } 214 head.Tree = head.Tree.Merge(next.Tree) 215 head.Size += next.Size 216 head.Next = next.Next 217 } 218 219 } 220 } 221 s.Overlap /= float64(s.Sz) * float64(s.Sz) 222 223 step := float64(s.Sz) / float64(k) 224 if float64(nulls) >= step { 225 step = float64(s.Sz-nulls) / float64(k-1) 226 } 227 last := step 228 k -= 2 229 s.Uniform = float64(s.Sz) / (s.Max - s.Min) 230 for { 231 if head == nil { 232 for i := 0; i <= k; i++ { 233 s.Result[k-i] = s.Min 234 } 235 break 236 } 237 Sz := float64(head.Size) 238 var valueTree *ShuffleHeap 239 var speed float64 240 now := head.Tree.Key 241 for { 242 if valueTree == nil || (head.Tree != nil && valueTree.Key < head.Tree.Key) { 243 if head.Tree == nil { 244 break 245 } 246 head.Tree, node = head.Tree.Pop() 247 delta := speed * (now - node.Key) 248 last -= delta 249 Sz -= delta 250 for last <= 0 { 251 s.Result[k] = node.Key - (last/delta)*(now-node.Key) 252 if s.Result[k] != s.Result[k] { 253 s.Result[k] = node.Key 254 } 255 last += step 256 k-- 257 if k < 0 || last > Sz { 258 break 259 } 260 261 } 262 if k < 0 { 263 break 264 } 265 now = node.Key 266 if node.Key-node.Value < 0.1 { 267 last -= float64(node.Sz) 268 Sz -= float64(node.Sz) 269 if last <= 0 { 270 if -last <= last+float64(node.Sz) { 271 s.Result[k] = now 272 last = step 273 k-- 274 if k < 0 { 275 break 276 } 277 } else { 278 s.Result[k] = now + 1 279 last = step - float64(node.Sz) 280 k-- 281 if k < 0 { 282 break 283 } 284 if last <= 0 { 285 s.Result[k] = now 286 last = step 287 k-- 288 if k < 0 { 289 break 290 } 291 } 292 } 293 294 } 295 continue 296 } 297 speed += float64(node.Sz) / (node.Key - node.Value) 298 if s.Uniform < speed { 299 s.Uniform = speed 300 } 301 node.Left = nil 302 node.Right = nil 303 node.Height = 1 304 node.Key += node.Value 305 node.Value = node.Key - node.Value 306 node.Key -= node.Value 307 if valueTree == nil { 308 valueTree = node 309 } else { 310 valueTree = valueTree.Merge(node) 311 } 312 } else { 313 valueTree, node = valueTree.Pop() 314 delta := speed * (now - node.Key) 315 last -= delta 316 Sz -= delta 317 for last < 0 { 318 s.Result[k] = node.Key - (last/delta)*(now-node.Key) 319 if s.Result[k] != s.Result[k] { 320 s.Result[k] = node.Key 321 } 322 last += step 323 k-- 324 if k < 0 || last > Sz { 325 break 326 } 327 328 } 329 if k < 0 { 330 break 331 } 332 now = node.Key 333 speed -= float64(node.Sz) / (node.Value - node.Key) 334 } 335 } 336 if k < 0 { 337 break 338 } 339 head = head.Next 340 } 341 s.Uniform = float64(s.Sz) / (s.Max - s.Min) / s.Uniform 342 for i := range s.Result { 343 if s.Result[i] != s.Result[i] { 344 s.Result = nil 345 return 346 } 347 } 348 if s.IsStrType { 349 for i := range s.Result { 350 var frac float64 351 str := make([]byte, s.MaxLen) 352 s.Result[i], _ = math.Modf(s.Result[i]) 353 for j := int64(0); j < s.MaxLen; j++ { 354 s.Result[i], frac = math.Modf(s.Result[i] / lens) 355 k := int(frac*lens + 0.01) 356 if k < 0 { 357 s.Result = nil 358 return 359 } 360 str[j] = inttobyte[k] 361 } 362 s.Result[i] = 0 363 for j := len(str) - 1; j >= 0; j-- { 364 s.Result[i] = s.Result[i]*256 + float64(str[j]) 365 } 366 for j := 8 - len(str); j > 0; j-- { 367 s.Result[i] = s.Result[i] * 256 368 } 369 } 370 } 371 for i := 1; i < len(s.Result); i++ { 372 if s.Result[i] == s.Result[i-1] { 373 s.Result = nil 374 return 375 } 376 } 377 }