modernc.org/cc@v1.0.1/v2/testdata/_sqlite/ext/icu/icu.c (about) 1 /* 2 ** 2007 May 6 3 ** 4 ** The author disclaims copyright to this source code. In place of 5 ** a legal notice, here is a blessing: 6 ** 7 ** May you do good and not evil. 8 ** May you find forgiveness for yourself and forgive others. 9 ** May you share freely, never taking more than you give. 10 ** 11 ************************************************************************* 12 ** $Id: icu.c,v 1.7 2007/12/13 21:54:11 drh Exp $ 13 ** 14 ** This file implements an integration between the ICU library 15 ** ("International Components for Unicode", an open-source library 16 ** for handling unicode data) and SQLite. The integration uses 17 ** ICU to provide the following to SQLite: 18 ** 19 ** * An implementation of the SQL regexp() function (and hence REGEXP 20 ** operator) using the ICU uregex_XX() APIs. 21 ** 22 ** * Implementations of the SQL scalar upper() and lower() functions 23 ** for case mapping. 24 ** 25 ** * Integration of ICU and SQLite collation sequences. 26 ** 27 ** * An implementation of the LIKE operator that uses ICU to 28 ** provide case-independent matching. 29 */ 30 31 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ICU) 32 33 /* Include ICU headers */ 34 #include <unicode/utypes.h> 35 #include <unicode/uregex.h> 36 #include <unicode/ustring.h> 37 #include <unicode/ucol.h> 38 39 #include <assert.h> 40 41 #ifndef SQLITE_CORE 42 #include "sqlite3ext.h" 43 SQLITE_EXTENSION_INIT1 44 #else 45 #include "sqlite3.h" 46 #endif 47 48 /* 49 ** Maximum length (in bytes) of the pattern in a LIKE or GLOB 50 ** operator. 51 */ 52 #ifndef SQLITE_MAX_LIKE_PATTERN_LENGTH 53 # define SQLITE_MAX_LIKE_PATTERN_LENGTH 50000 54 #endif 55 56 /* 57 ** Version of sqlite3_free() that is always a function, never a macro. 58 */ 59 static void xFree(void *p){ 60 sqlite3_free(p); 61 } 62 63 /* 64 ** This lookup table is used to help decode the first byte of 65 ** a multi-byte UTF8 character. It is copied here from SQLite source 66 ** code file utf8.c. 67 */ 68 static const unsigned char icuUtf8Trans1[] = { 69 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 70 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 71 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 72 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 73 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 74 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 75 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 76 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00, 77 }; 78 79 #define SQLITE_ICU_READ_UTF8(zIn, c) \ 80 c = *(zIn++); \ 81 if( c>=0xc0 ){ \ 82 c = icuUtf8Trans1[c-0xc0]; \ 83 while( (*zIn & 0xc0)==0x80 ){ \ 84 c = (c<<6) + (0x3f & *(zIn++)); \ 85 } \ 86 } 87 88 #define SQLITE_ICU_SKIP_UTF8(zIn) \ 89 assert( *zIn ); \ 90 if( *(zIn++)>=0xc0 ){ \ 91 while( (*zIn & 0xc0)==0x80 ){zIn++;} \ 92 } 93 94 95 /* 96 ** Compare two UTF-8 strings for equality where the first string is 97 ** a "LIKE" expression. Return true (1) if they are the same and 98 ** false (0) if they are different. 99 */ 100 static int icuLikeCompare( 101 const uint8_t *zPattern, /* LIKE pattern */ 102 const uint8_t *zString, /* The UTF-8 string to compare against */ 103 const UChar32 uEsc /* The escape character */ 104 ){ 105 static const uint32_t MATCH_ONE = (uint32_t)'_'; 106 static const uint32_t MATCH_ALL = (uint32_t)'%'; 107 108 int prevEscape = 0; /* True if the previous character was uEsc */ 109 110 while( 1 ){ 111 112 /* Read (and consume) the next character from the input pattern. */ 113 uint32_t uPattern; 114 SQLITE_ICU_READ_UTF8(zPattern, uPattern); 115 if( uPattern==0 ) break; 116 117 /* There are now 4 possibilities: 118 ** 119 ** 1. uPattern is an unescaped match-all character "%", 120 ** 2. uPattern is an unescaped match-one character "_", 121 ** 3. uPattern is an unescaped escape character, or 122 ** 4. uPattern is to be handled as an ordinary character 123 */ 124 if( !prevEscape && uPattern==MATCH_ALL ){ 125 /* Case 1. */ 126 uint8_t c; 127 128 /* Skip any MATCH_ALL or MATCH_ONE characters that follow a 129 ** MATCH_ALL. For each MATCH_ONE, skip one character in the 130 ** test string. 131 */ 132 while( (c=*zPattern) == MATCH_ALL || c == MATCH_ONE ){ 133 if( c==MATCH_ONE ){ 134 if( *zString==0 ) return 0; 135 SQLITE_ICU_SKIP_UTF8(zString); 136 } 137 zPattern++; 138 } 139 140 if( *zPattern==0 ) return 1; 141 142 while( *zString ){ 143 if( icuLikeCompare(zPattern, zString, uEsc) ){ 144 return 1; 145 } 146 SQLITE_ICU_SKIP_UTF8(zString); 147 } 148 return 0; 149 150 }else if( !prevEscape && uPattern==MATCH_ONE ){ 151 /* Case 2. */ 152 if( *zString==0 ) return 0; 153 SQLITE_ICU_SKIP_UTF8(zString); 154 155 }else if( !prevEscape && uPattern==(uint32_t)uEsc){ 156 /* Case 3. */ 157 prevEscape = 1; 158 159 }else{ 160 /* Case 4. */ 161 uint32_t uString; 162 SQLITE_ICU_READ_UTF8(zString, uString); 163 uString = (uint32_t)u_foldCase((UChar32)uString, U_FOLD_CASE_DEFAULT); 164 uPattern = (uint32_t)u_foldCase((UChar32)uPattern, U_FOLD_CASE_DEFAULT); 165 if( uString!=uPattern ){ 166 return 0; 167 } 168 prevEscape = 0; 169 } 170 } 171 172 return *zString==0; 173 } 174 175 /* 176 ** Implementation of the like() SQL function. This function implements 177 ** the build-in LIKE operator. The first argument to the function is the 178 ** pattern and the second argument is the string. So, the SQL statements: 179 ** 180 ** A LIKE B 181 ** 182 ** is implemented as like(B, A). If there is an escape character E, 183 ** 184 ** A LIKE B ESCAPE E 185 ** 186 ** is mapped to like(B, A, E). 187 */ 188 static void icuLikeFunc( 189 sqlite3_context *context, 190 int argc, 191 sqlite3_value **argv 192 ){ 193 const unsigned char *zA = sqlite3_value_text(argv[0]); 194 const unsigned char *zB = sqlite3_value_text(argv[1]); 195 UChar32 uEsc = 0; 196 197 /* Limit the length of the LIKE or GLOB pattern to avoid problems 198 ** of deep recursion and N*N behavior in patternCompare(). 199 */ 200 if( sqlite3_value_bytes(argv[0])>SQLITE_MAX_LIKE_PATTERN_LENGTH ){ 201 sqlite3_result_error(context, "LIKE or GLOB pattern too complex", -1); 202 return; 203 } 204 205 206 if( argc==3 ){ 207 /* The escape character string must consist of a single UTF-8 character. 208 ** Otherwise, return an error. 209 */ 210 int nE= sqlite3_value_bytes(argv[2]); 211 const unsigned char *zE = sqlite3_value_text(argv[2]); 212 int i = 0; 213 if( zE==0 ) return; 214 U8_NEXT(zE, i, nE, uEsc); 215 if( i!=nE){ 216 sqlite3_result_error(context, 217 "ESCAPE expression must be a single character", -1); 218 return; 219 } 220 } 221 222 if( zA && zB ){ 223 sqlite3_result_int(context, icuLikeCompare(zA, zB, uEsc)); 224 } 225 } 226 227 /* 228 ** This function is called when an ICU function called from within 229 ** the implementation of an SQL scalar function returns an error. 230 ** 231 ** The scalar function context passed as the first argument is 232 ** loaded with an error message based on the following two args. 233 */ 234 static void icuFunctionError( 235 sqlite3_context *pCtx, /* SQLite scalar function context */ 236 const char *zName, /* Name of ICU function that failed */ 237 UErrorCode e /* Error code returned by ICU function */ 238 ){ 239 char zBuf[128]; 240 sqlite3_snprintf(128, zBuf, "ICU error: %s(): %s", zName, u_errorName(e)); 241 zBuf[127] = '\0'; 242 sqlite3_result_error(pCtx, zBuf, -1); 243 } 244 245 /* 246 ** Function to delete compiled regexp objects. Registered as 247 ** a destructor function with sqlite3_set_auxdata(). 248 */ 249 static void icuRegexpDelete(void *p){ 250 URegularExpression *pExpr = (URegularExpression *)p; 251 uregex_close(pExpr); 252 } 253 254 /* 255 ** Implementation of SQLite REGEXP operator. This scalar function takes 256 ** two arguments. The first is a regular expression pattern to compile 257 ** the second is a string to match against that pattern. If either 258 ** argument is an SQL NULL, then NULL Is returned. Otherwise, the result 259 ** is 1 if the string matches the pattern, or 0 otherwise. 260 ** 261 ** SQLite maps the regexp() function to the regexp() operator such 262 ** that the following two are equivalent: 263 ** 264 ** zString REGEXP zPattern 265 ** regexp(zPattern, zString) 266 ** 267 ** Uses the following ICU regexp APIs: 268 ** 269 ** uregex_open() 270 ** uregex_matches() 271 ** uregex_close() 272 */ 273 static void icuRegexpFunc(sqlite3_context *p, int nArg, sqlite3_value **apArg){ 274 UErrorCode status = U_ZERO_ERROR; 275 URegularExpression *pExpr; 276 UBool res; 277 const UChar *zString = sqlite3_value_text16(apArg[1]); 278 279 (void)nArg; /* Unused parameter */ 280 281 /* If the left hand side of the regexp operator is NULL, 282 ** then the result is also NULL. 283 */ 284 if( !zString ){ 285 return; 286 } 287 288 pExpr = sqlite3_get_auxdata(p, 0); 289 if( !pExpr ){ 290 const UChar *zPattern = sqlite3_value_text16(apArg[0]); 291 if( !zPattern ){ 292 return; 293 } 294 pExpr = uregex_open(zPattern, -1, 0, 0, &status); 295 296 if( U_SUCCESS(status) ){ 297 sqlite3_set_auxdata(p, 0, pExpr, icuRegexpDelete); 298 }else{ 299 assert(!pExpr); 300 icuFunctionError(p, "uregex_open", status); 301 return; 302 } 303 } 304 305 /* Configure the text that the regular expression operates on. */ 306 uregex_setText(pExpr, zString, -1, &status); 307 if( !U_SUCCESS(status) ){ 308 icuFunctionError(p, "uregex_setText", status); 309 return; 310 } 311 312 /* Attempt the match */ 313 res = uregex_matches(pExpr, 0, &status); 314 if( !U_SUCCESS(status) ){ 315 icuFunctionError(p, "uregex_matches", status); 316 return; 317 } 318 319 /* Set the text that the regular expression operates on to a NULL 320 ** pointer. This is not really necessary, but it is tidier than 321 ** leaving the regular expression object configured with an invalid 322 ** pointer after this function returns. 323 */ 324 uregex_setText(pExpr, 0, 0, &status); 325 326 /* Return 1 or 0. */ 327 sqlite3_result_int(p, res ? 1 : 0); 328 } 329 330 /* 331 ** Implementations of scalar functions for case mapping - upper() and 332 ** lower(). Function upper() converts its input to upper-case (ABC). 333 ** Function lower() converts to lower-case (abc). 334 ** 335 ** ICU provides two types of case mapping, "general" case mapping and 336 ** "language specific". Refer to ICU documentation for the differences 337 ** between the two. 338 ** 339 ** To utilise "general" case mapping, the upper() or lower() scalar 340 ** functions are invoked with one argument: 341 ** 342 ** upper('ABC') -> 'abc' 343 ** lower('abc') -> 'ABC' 344 ** 345 ** To access ICU "language specific" case mapping, upper() or lower() 346 ** should be invoked with two arguments. The second argument is the name 347 ** of the locale to use. Passing an empty string ("") or SQL NULL value 348 ** as the second argument is the same as invoking the 1 argument version 349 ** of upper() or lower(). 350 ** 351 ** lower('I', 'en_us') -> 'i' 352 ** lower('I', 'tr_tr') -> '\u131' (small dotless i) 353 ** 354 ** http://www.icu-project.org/userguide/posix.html#case_mappings 355 */ 356 static void icuCaseFunc16(sqlite3_context *p, int nArg, sqlite3_value **apArg){ 357 const UChar *zInput; /* Pointer to input string */ 358 UChar *zOutput = 0; /* Pointer to output buffer */ 359 int nInput; /* Size of utf-16 input string in bytes */ 360 int nOut; /* Size of output buffer in bytes */ 361 int cnt; 362 int bToUpper; /* True for toupper(), false for tolower() */ 363 UErrorCode status; 364 const char *zLocale = 0; 365 366 assert(nArg==1 || nArg==2); 367 bToUpper = (sqlite3_user_data(p)!=0); 368 if( nArg==2 ){ 369 zLocale = (const char *)sqlite3_value_text(apArg[1]); 370 } 371 372 zInput = sqlite3_value_text16(apArg[0]); 373 if( !zInput ){ 374 return; 375 } 376 nOut = nInput = sqlite3_value_bytes16(apArg[0]); 377 if( nOut==0 ){ 378 sqlite3_result_text16(p, "", 0, SQLITE_STATIC); 379 return; 380 } 381 382 for(cnt=0; cnt<2; cnt++){ 383 UChar *zNew = sqlite3_realloc(zOutput, nOut); 384 if( zNew==0 ){ 385 sqlite3_free(zOutput); 386 sqlite3_result_error_nomem(p); 387 return; 388 } 389 zOutput = zNew; 390 status = U_ZERO_ERROR; 391 if( bToUpper ){ 392 nOut = 2*u_strToUpper(zOutput,nOut/2,zInput,nInput/2,zLocale,&status); 393 }else{ 394 nOut = 2*u_strToLower(zOutput,nOut/2,zInput,nInput/2,zLocale,&status); 395 } 396 397 if( U_SUCCESS(status) ){ 398 sqlite3_result_text16(p, zOutput, nOut, xFree); 399 }else if( status==U_BUFFER_OVERFLOW_ERROR ){ 400 assert( cnt==0 ); 401 continue; 402 }else{ 403 icuFunctionError(p, bToUpper ? "u_strToUpper" : "u_strToLower", status); 404 } 405 return; 406 } 407 assert( 0 ); /* Unreachable */ 408 } 409 410 /* 411 ** Collation sequence destructor function. The pCtx argument points to 412 ** a UCollator structure previously allocated using ucol_open(). 413 */ 414 static void icuCollationDel(void *pCtx){ 415 UCollator *p = (UCollator *)pCtx; 416 ucol_close(p); 417 } 418 419 /* 420 ** Collation sequence comparison function. The pCtx argument points to 421 ** a UCollator structure previously allocated using ucol_open(). 422 */ 423 static int icuCollationColl( 424 void *pCtx, 425 int nLeft, 426 const void *zLeft, 427 int nRight, 428 const void *zRight 429 ){ 430 UCollationResult res; 431 UCollator *p = (UCollator *)pCtx; 432 res = ucol_strcoll(p, (UChar *)zLeft, nLeft/2, (UChar *)zRight, nRight/2); 433 switch( res ){ 434 case UCOL_LESS: return -1; 435 case UCOL_GREATER: return +1; 436 case UCOL_EQUAL: return 0; 437 } 438 assert(!"Unexpected return value from ucol_strcoll()"); 439 return 0; 440 } 441 442 /* 443 ** Implementation of the scalar function icu_load_collation(). 444 ** 445 ** This scalar function is used to add ICU collation based collation 446 ** types to an SQLite database connection. It is intended to be called 447 ** as follows: 448 ** 449 ** SELECT icu_load_collation(<locale>, <collation-name>); 450 ** 451 ** Where <locale> is a string containing an ICU locale identifier (i.e. 452 ** "en_AU", "tr_TR" etc.) and <collation-name> is the name of the 453 ** collation sequence to create. 454 */ 455 static void icuLoadCollation( 456 sqlite3_context *p, 457 int nArg, 458 sqlite3_value **apArg 459 ){ 460 sqlite3 *db = (sqlite3 *)sqlite3_user_data(p); 461 UErrorCode status = U_ZERO_ERROR; 462 const char *zLocale; /* Locale identifier - (eg. "jp_JP") */ 463 const char *zName; /* SQL Collation sequence name (eg. "japanese") */ 464 UCollator *pUCollator; /* ICU library collation object */ 465 int rc; /* Return code from sqlite3_create_collation_x() */ 466 467 assert(nArg==2); 468 (void)nArg; /* Unused parameter */ 469 zLocale = (const char *)sqlite3_value_text(apArg[0]); 470 zName = (const char *)sqlite3_value_text(apArg[1]); 471 472 if( !zLocale || !zName ){ 473 return; 474 } 475 476 pUCollator = ucol_open(zLocale, &status); 477 if( !U_SUCCESS(status) ){ 478 icuFunctionError(p, "ucol_open", status); 479 return; 480 } 481 assert(p); 482 483 rc = sqlite3_create_collation_v2(db, zName, SQLITE_UTF16, (void *)pUCollator, 484 icuCollationColl, icuCollationDel 485 ); 486 if( rc!=SQLITE_OK ){ 487 ucol_close(pUCollator); 488 sqlite3_result_error(p, "Error registering collation function", -1); 489 } 490 } 491 492 /* 493 ** Register the ICU extension functions with database db. 494 */ 495 int sqlite3IcuInit(sqlite3 *db){ 496 static const struct IcuScalar { 497 const char *zName; /* Function name */ 498 unsigned char nArg; /* Number of arguments */ 499 unsigned short enc; /* Optimal text encoding */ 500 unsigned char iContext; /* sqlite3_user_data() context */ 501 void (*xFunc)(sqlite3_context*,int,sqlite3_value**); 502 } scalars[] = { 503 {"icu_load_collation", 2, SQLITE_UTF8, 1, icuLoadCollation}, 504 {"regexp", 2, SQLITE_ANY|SQLITE_DETERMINISTIC, 0, icuRegexpFunc}, 505 {"lower", 1, SQLITE_UTF16|SQLITE_DETERMINISTIC, 0, icuCaseFunc16}, 506 {"lower", 2, SQLITE_UTF16|SQLITE_DETERMINISTIC, 0, icuCaseFunc16}, 507 {"upper", 1, SQLITE_UTF16|SQLITE_DETERMINISTIC, 1, icuCaseFunc16}, 508 {"upper", 2, SQLITE_UTF16|SQLITE_DETERMINISTIC, 1, icuCaseFunc16}, 509 {"lower", 1, SQLITE_UTF8|SQLITE_DETERMINISTIC, 0, icuCaseFunc16}, 510 {"lower", 2, SQLITE_UTF8|SQLITE_DETERMINISTIC, 0, icuCaseFunc16}, 511 {"upper", 1, SQLITE_UTF8|SQLITE_DETERMINISTIC, 1, icuCaseFunc16}, 512 {"upper", 2, SQLITE_UTF8|SQLITE_DETERMINISTIC, 1, icuCaseFunc16}, 513 {"like", 2, SQLITE_UTF8|SQLITE_DETERMINISTIC, 0, icuLikeFunc}, 514 {"like", 3, SQLITE_UTF8|SQLITE_DETERMINISTIC, 0, icuLikeFunc}, 515 }; 516 int rc = SQLITE_OK; 517 int i; 518 519 520 for(i=0; rc==SQLITE_OK && i<(int)(sizeof(scalars)/sizeof(scalars[0])); i++){ 521 const struct IcuScalar *p = &scalars[i]; 522 rc = sqlite3_create_function( 523 db, p->zName, p->nArg, p->enc, 524 p->iContext ? (void*)db : (void*)0, 525 p->xFunc, 0, 0 526 ); 527 } 528 529 return rc; 530 } 531 532 #if !SQLITE_CORE 533 #ifdef _WIN32 534 __declspec(dllexport) 535 #endif 536 int sqlite3_icu_init( 537 sqlite3 *db, 538 char **pzErrMsg, 539 const sqlite3_api_routines *pApi 540 ){ 541 SQLITE_EXTENSION_INIT2(pApi) 542 return sqlite3IcuInit(db); 543 } 544 #endif 545 546 #endif