vitess.io/vitess@v0.16.2/go/mysql/collations/internal/uca/layout.go (about)

     1  /*
     2  Copyright 2021 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package uca
    18  
    19  import (
    20  	"reflect"
    21  	"sync"
    22  	"unsafe"
    23  )
    24  
    25  const MaxCodepoint = 0x10FFFF + 1
    26  const CodepointsPerPage = 256
    27  const MaxCollationElementsPerCodepoint = 8
    28  
    29  func PageOffset(cp rune) (int, int) {
    30  	return int(cp) >> 8, int(cp) & 0xFF
    31  }
    32  
    33  type Weights []*[]uint16
    34  
    35  type Patch struct {
    36  	Codepoint rune
    37  	Patch     []uint16
    38  }
    39  
    40  type Layout interface {
    41  	MaxCodepoint() rune
    42  	DebugWeights(table Weights, codepoint rune) []uint16
    43  
    44  	allocPage(original *[]uint16, patches []Patch) []uint16
    45  	applyPatches(page []uint16, offset int, weights []uint16)
    46  }
    47  
    48  type Layout_uca900 struct{}
    49  
    50  func (Layout_uca900) MaxCodepoint() rune {
    51  	return MaxCodepoint - 1
    52  }
    53  
    54  func equalWeights900(table Weights, levels int, A, B rune) bool {
    55  	pA, offsetA := PageOffset(A)
    56  	pageA := table[pA]
    57  
    58  	pB, offsetB := PageOffset(B)
    59  	pageB := table[pB]
    60  
    61  	if pageA == nil || pageB == nil {
    62  		return false
    63  	}
    64  
    65  	ppA := (*pageA)[256+offsetA:]
    66  	ppB := (*pageB)[256+offsetB:]
    67  
    68  	if ppA[0] != 0x0 && ppB[0] != 0x0 && ppA[0] != ppB[0] {
    69  		return false
    70  	}
    71  
    72  	cA := int((*pageA)[offsetA])
    73  	cB := int((*pageB)[offsetB])
    74  
    75  	for l := 0; l < levels; l++ {
    76  		wA, wB := l*256, l*256
    77  		wA1, wB1 := wA+(cA*256*3), wB+(cB*256*3)
    78  
    79  		for wA < wA1 && wB < wB1 {
    80  			for wA < wA1 && ppA[wA] == 0x0 {
    81  				wA += 256 * 3
    82  			}
    83  			if wA == wA1 {
    84  				break
    85  			}
    86  			for wB < wB1 && ppB[wB] == 0x0 {
    87  				wB += 256 * 3
    88  			}
    89  			if wB == wB1 {
    90  				break
    91  			}
    92  			if ppA[wA] != ppB[wB] {
    93  				return false
    94  			}
    95  			wA += 256 * 3
    96  			wB += 256 * 3
    97  		}
    98  		for wA < wA1 {
    99  			if ppA[wA] != 0x0 {
   100  				return false
   101  			}
   102  			wA += 256 * 3
   103  		}
   104  		for wB < wB1 {
   105  			if ppB[wB] != 0x0 {
   106  				return false
   107  			}
   108  			wB += 256 * 3
   109  		}
   110  	}
   111  	return true
   112  }
   113  
   114  func (Layout_uca900) DebugWeights(table Weights, codepoint rune) (result []uint16) {
   115  	p, offset := PageOffset(codepoint)
   116  	page := table[p]
   117  	if page == nil {
   118  		return nil
   119  	}
   120  
   121  	ceCount := int((*page)[offset])
   122  	for ce := 0; ce < ceCount; ce++ {
   123  		result = append(result,
   124  			(*page)[256+(ce*3+0)*256+offset],
   125  			(*page)[256+(ce*3+1)*256+offset],
   126  			(*page)[256+(ce*3+2)*256+offset],
   127  		)
   128  	}
   129  	return
   130  }
   131  
   132  func (Layout_uca900) allocPage(original *[]uint16, patches []Patch) []uint16 {
   133  	var maxWeights int
   134  	for _, p := range patches {
   135  		if len(p.Patch)%3 != 0 {
   136  			panic("len(p.Patch)%3")
   137  		}
   138  		if len(p.Patch) > maxWeights {
   139  			maxWeights = len(p.Patch)
   140  		}
   141  	}
   142  
   143  	minLenForPage := maxWeights*CodepointsPerPage + CodepointsPerPage
   144  	if original == nil {
   145  		return make([]uint16, minLenForPage)
   146  	}
   147  	if len(*original) > minLenForPage {
   148  		minLenForPage = len(*original)
   149  	}
   150  	newPage := make([]uint16, minLenForPage)
   151  	copy(newPage, *original)
   152  	return newPage
   153  }
   154  
   155  func (Layout_uca900) applyPatches(page []uint16, offset int, weights []uint16) {
   156  	var weightcount = len(weights) / 3
   157  	var ce int
   158  	for ce < weightcount {
   159  		page[(ce*3+1)*CodepointsPerPage+offset] = weights[ce*3+0]
   160  		page[(ce*3+2)*CodepointsPerPage+offset] = weights[ce*3+1]
   161  		page[(ce*3+3)*CodepointsPerPage+offset] = weights[ce*3+2]
   162  		ce++
   163  	}
   164  
   165  	for ce < int(page[offset]) {
   166  		page[(ce*3+1)*CodepointsPerPage+offset] = 0x0
   167  		page[(ce*3+2)*CodepointsPerPage+offset] = 0x0
   168  		page[(ce*3+3)*CodepointsPerPage+offset] = 0x0
   169  		ce++
   170  	}
   171  
   172  	page[offset] = uint16(weightcount)
   173  }
   174  
   175  type Layout_uca_legacy struct {
   176  	Max rune
   177  }
   178  
   179  func (l Layout_uca_legacy) MaxCodepoint() rune {
   180  	return l.Max
   181  }
   182  
   183  func equalWeightsLegacy(table Weights, A, B rune) bool {
   184  	pA, offsetA := PageOffset(A)
   185  	pageA := table[pA]
   186  
   187  	pB, offsetB := PageOffset(B)
   188  	pageB := table[pB]
   189  
   190  	if pageA == nil || pageB == nil {
   191  		return false
   192  	}
   193  
   194  	sA := int((*pageA)[0])
   195  	sB := int((*pageB)[0])
   196  	iA := 1 + sA*offsetA
   197  	iB := 1 + sB*offsetB
   198  
   199  	var shrt, long []uint16
   200  	if sA < sB {
   201  		shrt = (*pageA)[iA : iA+sA]
   202  		long = (*pageB)[iB : iB+sB]
   203  	} else {
   204  		shrt = (*pageB)[iB : iB+sB]
   205  		long = (*pageA)[iA : iA+sA]
   206  	}
   207  
   208  	for i, wA := range shrt {
   209  		wB := long[i]
   210  		if wA != wB {
   211  			return false
   212  		}
   213  	}
   214  	if len(long) > len(shrt) && long[len(shrt)] != 0x0 {
   215  		return false
   216  	}
   217  	return true
   218  }
   219  
   220  func (l Layout_uca_legacy) DebugWeights(table Weights, codepoint rune) (result []uint16) {
   221  	if codepoint > l.Max {
   222  		return nil
   223  	}
   224  	p, offset := PageOffset(codepoint)
   225  	page := table[p]
   226  	if page == nil {
   227  		return nil
   228  	}
   229  
   230  	stride := int((*page)[0])
   231  	position := 1 + stride*offset
   232  	weights := (*page)[position : position+stride]
   233  
   234  	for i, w := range weights {
   235  		if w == 0x0 {
   236  			weights = weights[:i]
   237  			break
   238  		}
   239  	}
   240  	return weights
   241  }
   242  
   243  func (Layout_uca_legacy) allocPage(original *[]uint16, patches []Patch) []uint16 {
   244  	var newStride int
   245  	for _, p := range patches {
   246  		if len(p.Patch) > newStride {
   247  			newStride = len(p.Patch)
   248  		}
   249  	}
   250  
   251  	minLenForPage := 1 + newStride*CodepointsPerPage
   252  	if original == nil {
   253  		return make([]uint16, minLenForPage)
   254  	}
   255  
   256  	if len(*original) >= minLenForPage {
   257  		newPage := make([]uint16, len(*original))
   258  		copy(newPage, *original)
   259  		return newPage
   260  	}
   261  
   262  	originalStride := int((*original)[0])
   263  	if originalStride >= newStride {
   264  		panic("mismatch in originalStride calculation?")
   265  	}
   266  
   267  	newPage := make([]uint16, minLenForPage)
   268  	for i := 0; i < CodepointsPerPage; i++ {
   269  		for j := 0; j < originalStride; j++ {
   270  			newPage[1+i*newStride] = (*original)[1+i*originalStride]
   271  		}
   272  	}
   273  	return newPage
   274  }
   275  
   276  func (Layout_uca_legacy) applyPatches(page []uint16, offset int, weights []uint16) {
   277  	stride := int(page[0])
   278  	var ce int
   279  	for ce < len(weights) {
   280  		page[1+offset*stride+ce] = weights[ce]
   281  		ce++
   282  	}
   283  	for ce < stride {
   284  		page[1+offset*stride+ce] = 0x0
   285  		ce++
   286  	}
   287  }
   288  
   289  type tableWithPatch struct {
   290  	tableptr uintptr
   291  	patchptr uintptr
   292  }
   293  
   294  var cachedTables = make(map[tableWithPatch]Weights)
   295  var cachedTablesMu sync.Mutex
   296  
   297  func lookupCachedTable(table Weights, patch []Patch) (Weights, bool) {
   298  	hdr1 := (*reflect.SliceHeader)(unsafe.Pointer(&table))
   299  	hdr2 := (*reflect.SliceHeader)(unsafe.Pointer(&patch))
   300  
   301  	cachedTablesMu.Lock()
   302  	defer cachedTablesMu.Unlock()
   303  	tbl, ok := cachedTables[tableWithPatch{hdr1.Data, hdr2.Data}]
   304  	return tbl, ok
   305  }
   306  
   307  func storeCachedTable(table Weights, patch []Patch, result Weights) {
   308  	hdr1 := (*reflect.SliceHeader)(unsafe.Pointer(&table))
   309  	hdr2 := (*reflect.SliceHeader)(unsafe.Pointer(&patch))
   310  
   311  	cachedTablesMu.Lock()
   312  	cachedTables[tableWithPatch{hdr1.Data, hdr2.Data}] = result
   313  	cachedTablesMu.Unlock()
   314  }
   315  
   316  func ApplyTailoring(layout Layout, base Weights, patches []Patch) Weights {
   317  	if len(patches) == 0 {
   318  		return base
   319  	}
   320  	if result, ok := lookupCachedTable(base, patches); ok {
   321  		return result
   322  	}
   323  
   324  	result := make(Weights, len(base))
   325  	copy(result, base)
   326  
   327  	groups := make(map[int][]Patch)
   328  	for _, patch := range patches {
   329  		p, _ := PageOffset(patch.Codepoint)
   330  		groups[p] = append(groups[p], patch)
   331  	}
   332  
   333  	for p, pps := range groups {
   334  		page := layout.allocPage(result[p], pps)
   335  
   336  		for _, patch := range pps {
   337  			_, off := PageOffset(patch.Codepoint)
   338  			layout.applyPatches(page, off, patch.Patch)
   339  		}
   340  
   341  		result[p] = &page
   342  	}
   343  
   344  	storeCachedTable(base, patches, result)
   345  	return result
   346  }