github.com/matrixorigin/matrixone@v1.2.0/pkg/pb/statsinfo/shuffle.go (about)

     1  // Copyright 2021 - 2024 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package statsinfo
    16  
    17  import "math"
    18  
    19  const DefaultEvalSize = 1024
    20  
    21  type ShuffleList struct {
    22  	Size  int64
    23  	Value float64
    24  	Next  *ShuffleList
    25  	Tree  *ShuffleHeap
    26  }
    27  
    28  func (t *ShuffleHeap) Merge(s *ShuffleHeap) *ShuffleHeap {
    29  	if t.Key > s.Key != t.Reverse {
    30  		if s.Right == nil {
    31  			s.Right = t
    32  		} else {
    33  			s.Right = t.Merge(s.Right)
    34  		}
    35  		if s.Left == nil || s.Left.Height < s.Right.Height {
    36  			tmp := s.Left
    37  			s.Left = s.Right
    38  			s.Right = tmp
    39  		}
    40  		s.Height = s.Left.Height + 1
    41  		return s
    42  	} else {
    43  		if t.Right == nil {
    44  			t.Right = s
    45  		} else {
    46  			t.Right = t.Right.Merge(s)
    47  		}
    48  		if t.Left == nil || t.Left.Height < t.Right.Height {
    49  			tmp := t.Left
    50  			t.Left = t.Right
    51  			t.Right = tmp
    52  		}
    53  		t.Height = t.Left.Height + 1
    54  		return t
    55  	}
    56  }
    57  
    58  func (t *ShuffleHeap) Pop() (*ShuffleHeap, *ShuffleHeap) {
    59  	if t.Left == nil {
    60  		return nil, t
    61  	}
    62  	if t.Right == nil {
    63  		return t.Left, t
    64  	}
    65  	return t.Left.Merge(t.Right), t
    66  }
    67  
    68  func (s *ShuffleRange) UpdateString(zmmin []byte, zmmax []byte, rowCount int64, nullCount int64) {
    69  	if len(zmmin) > 8 {
    70  		zmmin = zmmin[:8]
    71  	}
    72  	if len(zmmax) > 8 {
    73  		zmmax = zmmax[:8]
    74  	}
    75  	if s.Sz == 0 {
    76  		s.Sz = rowCount
    77  		s.Flags = make([]bool, 256)
    78  		s.Mins = make([][]byte, 0)
    79  		s.Maxs = make([][]byte, 0)
    80  		s.Mins = append(s.Mins, zmmin)
    81  		s.Maxs = append(s.Maxs, zmmax)
    82  		s.Rows = make([]int64, 0)
    83  		s.Rows = append(s.Rows, rowCount)
    84  		s.Nulls = make([]int64, 0)
    85  		s.Nulls = append(s.Nulls, nullCount)
    86  	} else {
    87  		s.Sz += rowCount
    88  		s.Mins = append(s.Mins, zmmin)
    89  		s.Maxs = append(s.Maxs, zmmax)
    90  		s.Rows = append(s.Rows, rowCount)
    91  		s.Nulls = append(s.Nulls, nullCount)
    92  	}
    93  	if s.MaxLen < int64(len(zmmin)) {
    94  		s.MaxLen = int64(len(zmmin))
    95  	}
    96  	for _, c := range zmmin {
    97  		s.Flags[int(c)] = true
    98  	}
    99  	if s.MaxLen < int64(len(zmmax)) {
   100  		s.MaxLen = int64(len(zmmax))
   101  	}
   102  	for _, c := range zmmax {
   103  		s.Flags[int(c)] = true
   104  	}
   105  }
   106  
   107  func (s *ShuffleRange) Update(zmmin float64, zmmax float64, rowCount int64, nullCount int64) {
   108  	s.Sz += rowCount
   109  	if s.Tree == nil {
   110  		s.Tree = &ShuffleHeap{
   111  			Height: 1,
   112  			Key:    zmmax,
   113  			Value:  zmmin,
   114  			Sz:     rowCount,
   115  			Nulls:  nullCount,
   116  		}
   117  		s.Min = zmmin
   118  		s.Max = zmmax
   119  	} else {
   120  		s.Tree = s.Tree.Merge(&ShuffleHeap{
   121  			Height: 1,
   122  			Key:    zmmax,
   123  			Value:  zmmin,
   124  			Sz:     rowCount,
   125  			Nulls:  nullCount,
   126  		})
   127  		if s.Min > zmmin {
   128  			s.Min = zmmin
   129  		}
   130  		if s.Max < zmmax {
   131  			s.Max = zmmax
   132  		}
   133  	}
   134  }
   135  
   136  func (s *ShuffleRange) Eval() {
   137  	k := DefaultEvalSize
   138  	if s.Sz == 0 {
   139  		return
   140  	}
   141  	bytetoint := make(map[byte]int)
   142  	inttobyte := make([]byte, 0)
   143  	var lens float64
   144  	if s.IsStrType {
   145  		for i := 0; i < 256; i++ {
   146  			if s.Flags[i] {
   147  				bytetoint[byte(i)] = len(inttobyte)
   148  				inttobyte = append(inttobyte, byte(i))
   149  			}
   150  		}
   151  		if len(inttobyte) == 0 {
   152  			return
   153  		}
   154  		lens = float64(len(inttobyte))
   155  		for i := range s.Mins {
   156  			node := &ShuffleHeap{
   157  				Height: 1,
   158  				Key:    0,
   159  				Value:  0,
   160  				Sz:     s.Rows[i],
   161  				Nulls:  s.Nulls[i],
   162  			}
   163  			for _, c := range s.Maxs[i] {
   164  				node.Key = node.Key*lens + float64(bytetoint[c])
   165  			}
   166  			for j := int64(len(s.Maxs[i])); j < s.MaxLen; j++ {
   167  				node.Key = node.Key * lens
   168  			}
   169  			for _, c := range s.Mins[i] {
   170  				node.Value = node.Value*lens + float64(bytetoint[c])
   171  			}
   172  			for j := int64(len(s.Mins[i])); j < s.MaxLen; j++ {
   173  				node.Value = node.Value * lens
   174  			}
   175  			if s.Tree == nil {
   176  				s.Tree = node
   177  			} else {
   178  				s.Tree = s.Tree.Merge(node)
   179  			}
   180  		}
   181  	}
   182  	var head *ShuffleList
   183  	var node *ShuffleHeap
   184  	var nulls int64
   185  	s.Result = make([]float64, k-1)
   186  	for s.Tree != nil {
   187  		s.Tree, node = s.Tree.Pop()
   188  		node.Left = nil
   189  		node.Right = nil
   190  		node.Height = 1
   191  		node.Sz -= node.Nulls
   192  		nulls += node.Nulls
   193  		node.Reverse = true
   194  		head = &ShuffleList{
   195  			Next:  head,
   196  			Tree:  node,
   197  			Size:  node.Sz,
   198  			Value: node.Value,
   199  		}
   200  		if head.Next != nil {
   201  			for head.Next != nil {
   202  				next := head.Next
   203  				if head.Tree.Value >= next.Tree.Key {
   204  					break
   205  				}
   206  				if head.Tree.Key != head.Value {
   207  					if head.Value <= next.Value {
   208  						s.Overlap += float64(head.Size) * float64(next.Size) * (next.Tree.Key - next.Value) / (head.Tree.Key - head.Value)
   209  					} else {
   210  						s.Overlap += float64(head.Size) * float64(next.Size) * (next.Tree.Key - head.Value) * (next.Tree.Key - head.Value) / (head.Tree.Key - head.Value) / (next.Tree.Key - next.Value)
   211  						head.Value = next.Value
   212  					}
   213  				}
   214  				head.Tree = head.Tree.Merge(next.Tree)
   215  				head.Size += next.Size
   216  				head.Next = next.Next
   217  			}
   218  
   219  		}
   220  	}
   221  	s.Overlap /= float64(s.Sz) * float64(s.Sz)
   222  
   223  	step := float64(s.Sz) / float64(k)
   224  	if float64(nulls) >= step {
   225  		step = float64(s.Sz-nulls) / float64(k-1)
   226  	}
   227  	last := step
   228  	k -= 2
   229  	s.Uniform = float64(s.Sz) / (s.Max - s.Min)
   230  	for {
   231  		if head == nil {
   232  			for i := 0; i <= k; i++ {
   233  				s.Result[k-i] = s.Min
   234  			}
   235  			break
   236  		}
   237  		Sz := float64(head.Size)
   238  		var valueTree *ShuffleHeap
   239  		var speed float64
   240  		now := head.Tree.Key
   241  		for {
   242  			if valueTree == nil || (head.Tree != nil && valueTree.Key < head.Tree.Key) {
   243  				if head.Tree == nil {
   244  					break
   245  				}
   246  				head.Tree, node = head.Tree.Pop()
   247  				delta := speed * (now - node.Key)
   248  				last -= delta
   249  				Sz -= delta
   250  				for last <= 0 {
   251  					s.Result[k] = node.Key - (last/delta)*(now-node.Key)
   252  					if s.Result[k] != s.Result[k] {
   253  						s.Result[k] = node.Key
   254  					}
   255  					last += step
   256  					k--
   257  					if k < 0 || last > Sz {
   258  						break
   259  					}
   260  
   261  				}
   262  				if k < 0 {
   263  					break
   264  				}
   265  				now = node.Key
   266  				if node.Key-node.Value < 0.1 {
   267  					last -= float64(node.Sz)
   268  					Sz -= float64(node.Sz)
   269  					if last <= 0 {
   270  						if -last <= last+float64(node.Sz) {
   271  							s.Result[k] = now
   272  							last = step
   273  							k--
   274  							if k < 0 {
   275  								break
   276  							}
   277  						} else {
   278  							s.Result[k] = now + 1
   279  							last = step - float64(node.Sz)
   280  							k--
   281  							if k < 0 {
   282  								break
   283  							}
   284  							if last <= 0 {
   285  								s.Result[k] = now
   286  								last = step
   287  								k--
   288  								if k < 0 {
   289  									break
   290  								}
   291  							}
   292  						}
   293  
   294  					}
   295  					continue
   296  				}
   297  				speed += float64(node.Sz) / (node.Key - node.Value)
   298  				if s.Uniform < speed {
   299  					s.Uniform = speed
   300  				}
   301  				node.Left = nil
   302  				node.Right = nil
   303  				node.Height = 1
   304  				node.Key += node.Value
   305  				node.Value = node.Key - node.Value
   306  				node.Key -= node.Value
   307  				if valueTree == nil {
   308  					valueTree = node
   309  				} else {
   310  					valueTree = valueTree.Merge(node)
   311  				}
   312  			} else {
   313  				valueTree, node = valueTree.Pop()
   314  				delta := speed * (now - node.Key)
   315  				last -= delta
   316  				Sz -= delta
   317  				for last < 0 {
   318  					s.Result[k] = node.Key - (last/delta)*(now-node.Key)
   319  					if s.Result[k] != s.Result[k] {
   320  						s.Result[k] = node.Key
   321  					}
   322  					last += step
   323  					k--
   324  					if k < 0 || last > Sz {
   325  						break
   326  					}
   327  
   328  				}
   329  				if k < 0 {
   330  					break
   331  				}
   332  				now = node.Key
   333  				speed -= float64(node.Sz) / (node.Value - node.Key)
   334  			}
   335  		}
   336  		if k < 0 {
   337  			break
   338  		}
   339  		head = head.Next
   340  	}
   341  	s.Uniform = float64(s.Sz) / (s.Max - s.Min) / s.Uniform
   342  	for i := range s.Result {
   343  		if s.Result[i] != s.Result[i] {
   344  			s.Result = nil
   345  			return
   346  		}
   347  	}
   348  	if s.IsStrType {
   349  		for i := range s.Result {
   350  			var frac float64
   351  			str := make([]byte, s.MaxLen)
   352  			s.Result[i], _ = math.Modf(s.Result[i])
   353  			for j := int64(0); j < s.MaxLen; j++ {
   354  				s.Result[i], frac = math.Modf(s.Result[i] / lens)
   355  				k := int(frac*lens + 0.01)
   356  				if k < 0 {
   357  					s.Result = nil
   358  					return
   359  				}
   360  				str[j] = inttobyte[k]
   361  			}
   362  			s.Result[i] = 0
   363  			for j := len(str) - 1; j >= 0; j-- {
   364  				s.Result[i] = s.Result[i]*256 + float64(str[j])
   365  			}
   366  			for j := 8 - len(str); j > 0; j-- {
   367  				s.Result[i] = s.Result[i] * 256
   368  			}
   369  		}
   370  	}
   371  	for i := 1; i < len(s.Result); i++ {
   372  		if s.Result[i] == s.Result[i-1] {
   373  			s.Result = nil
   374  			return
   375  		}
   376  	}
   377  }