github.com/ncruces/go-sqlite3@v0.15.1-0.20240520133447-53eef1510ff0/ext/unicode/unicode.go (about) 1 // Package unicode provides an alternative to the SQLite ICU extension. 2 // 3 // Like the [ICU extension], it provides Unicode aware: 4 // - upper() and lower() functions, 5 // - LIKE and REGEXP operators, 6 // - collation sequences. 7 // 8 // The implementation is not 100% compatible with the [ICU extension]: 9 // - upper() and lower() use [strings.ToUpper], [strings.ToLower] and [cases]; 10 // - the LIKE operator follows [strings.EqualFold] rules; 11 // - the REGEXP operator uses Go [regexp/syntax]; 12 // - collation sequences use [collate]. 13 // 14 // Expect subtle differences (e.g.) in the handling of Turkish case folding. 15 // 16 // [ICU extension]: https://sqlite.org/src/dir/ext/icu 17 package unicode 18 19 import ( 20 "bytes" 21 "regexp" 22 "strings" 23 "unicode/utf8" 24 25 "github.com/ncruces/go-sqlite3" 26 "github.com/ncruces/go-sqlite3/internal/util" 27 "golang.org/x/text/cases" 28 "golang.org/x/text/collate" 29 "golang.org/x/text/language" 30 ) 31 32 // Register registers Unicode aware functions for a database connection. 33 func Register(db *sqlite3.Conn) { 34 flags := sqlite3.DETERMINISTIC | sqlite3.INNOCUOUS 35 36 db.CreateFunction("like", 2, flags, like) 37 db.CreateFunction("like", 3, flags, like) 38 db.CreateFunction("upper", 1, flags, upper) 39 db.CreateFunction("upper", 2, flags, upper) 40 db.CreateFunction("lower", 1, flags, lower) 41 db.CreateFunction("lower", 2, flags, lower) 42 db.CreateFunction("regexp", 2, flags, regex) 43 db.CreateFunction("icu_load_collation", 2, sqlite3.DIRECTONLY, 44 func(ctx sqlite3.Context, arg ...sqlite3.Value) { 45 name := arg[1].Text() 46 if name == "" { 47 return 48 } 49 50 err := RegisterCollation(db, arg[0].Text(), name) 51 if err != nil { 52 ctx.ResultError(err) 53 return 54 } 55 }) 56 } 57 58 // RegisterCollation registers a Unicode collation sequence for a database connection. 59 func RegisterCollation(db *sqlite3.Conn, locale, name string) error { 60 tag, err := language.Parse(locale) 61 if err != nil { 62 return err 63 } 64 return db.CreateCollation(name, collate.New(tag).Compare) 65 } 66 67 func upper(ctx sqlite3.Context, arg ...sqlite3.Value) { 68 if len(arg) == 1 { 69 ctx.ResultRawText(bytes.ToUpper(arg[0].RawText())) 70 return 71 } 72 cs, ok := ctx.GetAuxData(1).(cases.Caser) 73 if !ok { 74 t, err := language.Parse(arg[1].Text()) 75 if err != nil { 76 ctx.ResultError(err) 77 return 78 } 79 c := cases.Upper(t) 80 ctx.SetAuxData(1, c) 81 cs = c 82 } 83 ctx.ResultRawText(cs.Bytes(arg[0].RawText())) 84 } 85 86 func lower(ctx sqlite3.Context, arg ...sqlite3.Value) { 87 if len(arg) == 1 { 88 ctx.ResultRawText(bytes.ToLower(arg[0].RawText())) 89 return 90 } 91 cs, ok := ctx.GetAuxData(1).(cases.Caser) 92 if !ok { 93 t, err := language.Parse(arg[1].Text()) 94 if err != nil { 95 ctx.ResultError(err) 96 return 97 } 98 c := cases.Lower(t) 99 ctx.SetAuxData(1, c) 100 cs = c 101 } 102 ctx.ResultRawText(cs.Bytes(arg[0].RawText())) 103 } 104 105 func regex(ctx sqlite3.Context, arg ...sqlite3.Value) { 106 re, ok := ctx.GetAuxData(0).(*regexp.Regexp) 107 if !ok { 108 r, err := regexp.Compile(arg[0].Text()) 109 if err != nil { 110 ctx.ResultError(err) 111 return 112 } 113 re = r 114 ctx.SetAuxData(0, re) 115 } 116 ctx.ResultBool(re.Match(arg[1].RawText())) 117 } 118 119 func like(ctx sqlite3.Context, arg ...sqlite3.Value) { 120 escape := rune(-1) 121 if len(arg) == 3 { 122 var size int 123 b := arg[2].RawText() 124 escape, size = utf8.DecodeRune(b) 125 if size != len(b) { 126 ctx.ResultError(util.ErrorString("ESCAPE expression must be a single character")) 127 return 128 } 129 } 130 131 type likeData struct { 132 *regexp.Regexp 133 escape rune 134 } 135 136 re, ok := ctx.GetAuxData(0).(likeData) 137 if !ok || re.escape != escape { 138 re = likeData{ 139 regexp.MustCompile(like2regex(arg[0].Text(), escape)), 140 escape, 141 } 142 ctx.SetAuxData(0, re) 143 } 144 ctx.ResultBool(re.Match(arg[1].RawText())) 145 } 146 147 func like2regex(pattern string, escape rune) string { 148 var re strings.Builder 149 start := 0 150 literal := false 151 re.Grow(len(pattern) + 10) 152 re.WriteString(`(?is)\A`) // case insensitive, . matches any character 153 for i, r := range pattern { 154 if start < 0 { 155 start = i 156 } 157 if literal { 158 literal = false 159 continue 160 } 161 var symbol string 162 switch r { 163 case '_': 164 symbol = `.` 165 case '%': 166 symbol = `.*` 167 case escape: 168 literal = true 169 default: 170 continue 171 } 172 re.WriteString(regexp.QuoteMeta(pattern[start:i])) 173 re.WriteString(symbol) 174 start = -1 175 } 176 if start >= 0 { 177 re.WriteString(regexp.QuoteMeta(pattern[start:])) 178 } 179 re.WriteString(`\z`) 180 return re.String() 181 }