github.com/cockroachdb/cockroachdb-parser@v0.23.3-0.20240213214944-911057d40c9a/pkg/sql/lexbase/normalize.go (about) 1 // Copyright 2017 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package lexbase 12 13 import ( 14 "strings" 15 "unicode" 16 17 "golang.org/x/text/unicode/norm" 18 ) 19 20 // Special case normalization rules for Turkish/Azeri lowercase dotless-i and 21 // uppercase dotted-i. Fold both dotted and dotless 'i' into the ascii i/I, so 22 // our case-insensitive comparison functions can be locale-invariant. This 23 // mapping implements case-insensitivity for Turkish and other latin-derived 24 // languages simultaneously, with the additional quirk that it is also 25 // insensitive to the dottedness of the i's 26 var normalize = unicode.SpecialCase{ 27 unicode.CaseRange{ 28 Lo: 0x0130, 29 Hi: 0x0130, 30 Delta: [unicode.MaxCase]rune{ 31 0x49 - 0x130, // Upper 32 0x69 - 0x130, // Lower 33 0x49 - 0x130, // Title 34 }, 35 }, 36 unicode.CaseRange{ 37 Lo: 0x0131, 38 Hi: 0x0131, 39 Delta: [unicode.MaxCase]rune{ 40 0x49 - 0x131, // Upper 41 0x69 - 0x131, // Lower 42 0x49 - 0x131, // Title 43 }, 44 }, 45 } 46 47 // NormalizeName normalizes to lowercase and Unicode Normalization 48 // Form C (NFC). 49 func NormalizeName(n string) string { 50 lower := strings.Map(normalize.ToLower, n) 51 if isASCII(lower) { 52 return lower 53 } 54 return norm.NFC.String(lower) 55 } 56 57 // NormalizeString normalizes to Unicode Normalization Form C (NFC). 58 // This function is specifically for double quoted identifiers. 59 func NormalizeString(s string) string { 60 if isASCII(s) { 61 return s 62 } 63 return norm.NFC.String(s) 64 }