github.com/cockroachdb/cockroachdb-parser@v0.23.3-0.20240213214944-911057d40c9a/pkg/sql/lexbase/normalize.go (about)

     1  // Copyright 2017 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package lexbase
    12  
    13  import (
    14  	"strings"
    15  	"unicode"
    16  
    17  	"golang.org/x/text/unicode/norm"
    18  )
    19  
    20  // Special case normalization rules for Turkish/Azeri lowercase dotless-i and
    21  // uppercase dotted-i. Fold both dotted and dotless 'i' into the ascii i/I, so
    22  // our case-insensitive comparison functions can be locale-invariant. This
    23  // mapping implements case-insensitivity for Turkish and other latin-derived
    24  // languages simultaneously, with the additional quirk that it is also
    25  // insensitive to the dottedness of the i's
    26  var normalize = unicode.SpecialCase{
    27  	unicode.CaseRange{
    28  		Lo: 0x0130,
    29  		Hi: 0x0130,
    30  		Delta: [unicode.MaxCase]rune{
    31  			0x49 - 0x130, // Upper
    32  			0x69 - 0x130, // Lower
    33  			0x49 - 0x130, // Title
    34  		},
    35  	},
    36  	unicode.CaseRange{
    37  		Lo: 0x0131,
    38  		Hi: 0x0131,
    39  		Delta: [unicode.MaxCase]rune{
    40  			0x49 - 0x131, // Upper
    41  			0x69 - 0x131, // Lower
    42  			0x49 - 0x131, // Title
    43  		},
    44  	},
    45  }
    46  
    47  // NormalizeName normalizes to lowercase and Unicode Normalization
    48  // Form C (NFC).
    49  func NormalizeName(n string) string {
    50  	lower := strings.Map(normalize.ToLower, n)
    51  	if isASCII(lower) {
    52  		return lower
    53  	}
    54  	return norm.NFC.String(lower)
    55  }
    56  
    57  // NormalizeString normalizes to Unicode Normalization Form C (NFC).
    58  // This function is specifically for double quoted identifiers.
    59  func NormalizeString(s string) string {
    60  	if isASCII(s) {
    61  		return s
    62  	}
    63  	return norm.NFC.String(s)
    64  }