github.com/ncruces/go-sqlite3@v0.15.1-0.20240520133447-53eef1510ff0/ext/unicode/unicode.go (about)

     1  // Package unicode provides an alternative to the SQLite ICU extension.
     2  //
     3  // Like the [ICU extension], it provides Unicode aware:
     4  //   - upper() and lower() functions,
     5  //   - LIKE and REGEXP operators,
     6  //   - collation sequences.
     7  //
     8  // The implementation is not 100% compatible with the [ICU extension]:
     9  //   - upper() and lower() use [strings.ToUpper], [strings.ToLower] and [cases];
    10  //   - the LIKE operator follows [strings.EqualFold] rules;
    11  //   - the REGEXP operator uses Go [regexp/syntax];
    12  //   - collation sequences use [collate].
    13  //
    14  // Expect subtle differences (e.g.) in the handling of Turkish case folding.
    15  //
    16  // [ICU extension]: https://sqlite.org/src/dir/ext/icu
    17  package unicode
    18  
    19  import (
    20  	"bytes"
    21  	"regexp"
    22  	"strings"
    23  	"unicode/utf8"
    24  
    25  	"github.com/ncruces/go-sqlite3"
    26  	"github.com/ncruces/go-sqlite3/internal/util"
    27  	"golang.org/x/text/cases"
    28  	"golang.org/x/text/collate"
    29  	"golang.org/x/text/language"
    30  )
    31  
    32  // Register registers Unicode aware functions for a database connection.
    33  func Register(db *sqlite3.Conn) {
    34  	flags := sqlite3.DETERMINISTIC | sqlite3.INNOCUOUS
    35  
    36  	db.CreateFunction("like", 2, flags, like)
    37  	db.CreateFunction("like", 3, flags, like)
    38  	db.CreateFunction("upper", 1, flags, upper)
    39  	db.CreateFunction("upper", 2, flags, upper)
    40  	db.CreateFunction("lower", 1, flags, lower)
    41  	db.CreateFunction("lower", 2, flags, lower)
    42  	db.CreateFunction("regexp", 2, flags, regex)
    43  	db.CreateFunction("icu_load_collation", 2, sqlite3.DIRECTONLY,
    44  		func(ctx sqlite3.Context, arg ...sqlite3.Value) {
    45  			name := arg[1].Text()
    46  			if name == "" {
    47  				return
    48  			}
    49  
    50  			err := RegisterCollation(db, arg[0].Text(), name)
    51  			if err != nil {
    52  				ctx.ResultError(err)
    53  				return
    54  			}
    55  		})
    56  }
    57  
    58  // RegisterCollation registers a Unicode collation sequence for a database connection.
    59  func RegisterCollation(db *sqlite3.Conn, locale, name string) error {
    60  	tag, err := language.Parse(locale)
    61  	if err != nil {
    62  		return err
    63  	}
    64  	return db.CreateCollation(name, collate.New(tag).Compare)
    65  }
    66  
    67  func upper(ctx sqlite3.Context, arg ...sqlite3.Value) {
    68  	if len(arg) == 1 {
    69  		ctx.ResultRawText(bytes.ToUpper(arg[0].RawText()))
    70  		return
    71  	}
    72  	cs, ok := ctx.GetAuxData(1).(cases.Caser)
    73  	if !ok {
    74  		t, err := language.Parse(arg[1].Text())
    75  		if err != nil {
    76  			ctx.ResultError(err)
    77  			return
    78  		}
    79  		c := cases.Upper(t)
    80  		ctx.SetAuxData(1, c)
    81  		cs = c
    82  	}
    83  	ctx.ResultRawText(cs.Bytes(arg[0].RawText()))
    84  }
    85  
    86  func lower(ctx sqlite3.Context, arg ...sqlite3.Value) {
    87  	if len(arg) == 1 {
    88  		ctx.ResultRawText(bytes.ToLower(arg[0].RawText()))
    89  		return
    90  	}
    91  	cs, ok := ctx.GetAuxData(1).(cases.Caser)
    92  	if !ok {
    93  		t, err := language.Parse(arg[1].Text())
    94  		if err != nil {
    95  			ctx.ResultError(err)
    96  			return
    97  		}
    98  		c := cases.Lower(t)
    99  		ctx.SetAuxData(1, c)
   100  		cs = c
   101  	}
   102  	ctx.ResultRawText(cs.Bytes(arg[0].RawText()))
   103  }
   104  
   105  func regex(ctx sqlite3.Context, arg ...sqlite3.Value) {
   106  	re, ok := ctx.GetAuxData(0).(*regexp.Regexp)
   107  	if !ok {
   108  		r, err := regexp.Compile(arg[0].Text())
   109  		if err != nil {
   110  			ctx.ResultError(err)
   111  			return
   112  		}
   113  		re = r
   114  		ctx.SetAuxData(0, re)
   115  	}
   116  	ctx.ResultBool(re.Match(arg[1].RawText()))
   117  }
   118  
   119  func like(ctx sqlite3.Context, arg ...sqlite3.Value) {
   120  	escape := rune(-1)
   121  	if len(arg) == 3 {
   122  		var size int
   123  		b := arg[2].RawText()
   124  		escape, size = utf8.DecodeRune(b)
   125  		if size != len(b) {
   126  			ctx.ResultError(util.ErrorString("ESCAPE expression must be a single character"))
   127  			return
   128  		}
   129  	}
   130  
   131  	type likeData struct {
   132  		*regexp.Regexp
   133  		escape rune
   134  	}
   135  
   136  	re, ok := ctx.GetAuxData(0).(likeData)
   137  	if !ok || re.escape != escape {
   138  		re = likeData{
   139  			regexp.MustCompile(like2regex(arg[0].Text(), escape)),
   140  			escape,
   141  		}
   142  		ctx.SetAuxData(0, re)
   143  	}
   144  	ctx.ResultBool(re.Match(arg[1].RawText()))
   145  }
   146  
   147  func like2regex(pattern string, escape rune) string {
   148  	var re strings.Builder
   149  	start := 0
   150  	literal := false
   151  	re.Grow(len(pattern) + 10)
   152  	re.WriteString(`(?is)\A`) // case insensitive, . matches any character
   153  	for i, r := range pattern {
   154  		if start < 0 {
   155  			start = i
   156  		}
   157  		if literal {
   158  			literal = false
   159  			continue
   160  		}
   161  		var symbol string
   162  		switch r {
   163  		case '_':
   164  			symbol = `.`
   165  		case '%':
   166  			symbol = `.*`
   167  		case escape:
   168  			literal = true
   169  		default:
   170  			continue
   171  		}
   172  		re.WriteString(regexp.QuoteMeta(pattern[start:i]))
   173  		re.WriteString(symbol)
   174  		start = -1
   175  	}
   176  	if start >= 0 {
   177  		re.WriteString(regexp.QuoteMeta(pattern[start:]))
   178  	}
   179  	re.WriteString(`\z`)
   180  	return re.String()
   181  }