modernc.org/cc@v1.0.1/v2/testdata/_sqlite/ext/fts3/fts3_unicode2.c (about)

     1  /*
     2  ** 2012 May 25
     3  **
     4  ** The author disclaims copyright to this source code.  In place of
     5  ** a legal notice, here is a blessing:
     6  **
     7  **    May you do good and not evil.
     8  **    May you find forgiveness for yourself and forgive others.
     9  **    May you share freely, never taking more than you give.
    10  **
    11  ******************************************************************************
    12  */
    13  
    14  /*
    15  ** DO NOT EDIT THIS MACHINE GENERATED FILE.
    16  */
    17  
    18  #ifndef SQLITE_DISABLE_FTS3_UNICODE
    19  #if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)
    20  
    21  #include <assert.h>
    22  
    23  /*
    24  ** Return true if the argument corresponds to a unicode codepoint
    25  ** classified as either a letter or a number. Otherwise false.
    26  **
    27  ** The results are undefined if the value passed to this function
    28  ** is less than zero.
    29  */
    30  int sqlite3FtsUnicodeIsalnum(int c){
    31    /* Each unsigned integer in the following array corresponds to a contiguous
    32    ** range of unicode codepoints that are not either letters or numbers (i.e.
    33    ** codepoints for which this function should return 0).
    34    **
    35    ** The most significant 22 bits in each 32-bit value contain the first 
    36    ** codepoint in the range. The least significant 10 bits are used to store
    37    ** the size of the range (always at least 1). In other words, the value 
    38    ** ((C<<22) + N) represents a range of N codepoints starting with codepoint 
    39    ** C. It is not possible to represent a range larger than 1023 codepoints 
    40    ** using this format.
    41    */
    42    static const unsigned int aEntry[] = {
    43      0x00000030, 0x0000E807, 0x00016C06, 0x0001EC2F, 0x0002AC07,
    44      0x0002D001, 0x0002D803, 0x0002EC01, 0x0002FC01, 0x00035C01,
    45      0x0003DC01, 0x000B0804, 0x000B480E, 0x000B9407, 0x000BB401,
    46      0x000BBC81, 0x000DD401, 0x000DF801, 0x000E1002, 0x000E1C01,
    47      0x000FD801, 0x00120808, 0x00156806, 0x00162402, 0x00163C01,
    48      0x00164437, 0x0017CC02, 0x00180005, 0x00181816, 0x00187802,
    49      0x00192C15, 0x0019A804, 0x0019C001, 0x001B5001, 0x001B580F,
    50      0x001B9C07, 0x001BF402, 0x001C000E, 0x001C3C01, 0x001C4401,
    51      0x001CC01B, 0x001E980B, 0x001FAC09, 0x001FD804, 0x00205804,
    52      0x00206C09, 0x00209403, 0x0020A405, 0x0020C00F, 0x00216403,
    53      0x00217801, 0x0023901B, 0x00240004, 0x0024E803, 0x0024F812,
    54      0x00254407, 0x00258804, 0x0025C001, 0x00260403, 0x0026F001,
    55      0x0026F807, 0x00271C02, 0x00272C03, 0x00275C01, 0x00278802,
    56      0x0027C802, 0x0027E802, 0x00280403, 0x0028F001, 0x0028F805,
    57      0x00291C02, 0x00292C03, 0x00294401, 0x0029C002, 0x0029D401,
    58      0x002A0403, 0x002AF001, 0x002AF808, 0x002B1C03, 0x002B2C03,
    59      0x002B8802, 0x002BC002, 0x002C0403, 0x002CF001, 0x002CF807,
    60      0x002D1C02, 0x002D2C03, 0x002D5802, 0x002D8802, 0x002DC001,
    61      0x002E0801, 0x002EF805, 0x002F1803, 0x002F2804, 0x002F5C01,
    62      0x002FCC08, 0x00300403, 0x0030F807, 0x00311803, 0x00312804,
    63      0x00315402, 0x00318802, 0x0031FC01, 0x00320802, 0x0032F001,
    64      0x0032F807, 0x00331803, 0x00332804, 0x00335402, 0x00338802,
    65      0x00340802, 0x0034F807, 0x00351803, 0x00352804, 0x00355C01,
    66      0x00358802, 0x0035E401, 0x00360802, 0x00372801, 0x00373C06,
    67      0x00375801, 0x00376008, 0x0037C803, 0x0038C401, 0x0038D007,
    68      0x0038FC01, 0x00391C09, 0x00396802, 0x003AC401, 0x003AD006,
    69      0x003AEC02, 0x003B2006, 0x003C041F, 0x003CD00C, 0x003DC417,
    70      0x003E340B, 0x003E6424, 0x003EF80F, 0x003F380D, 0x0040AC14,
    71      0x00412806, 0x00415804, 0x00417803, 0x00418803, 0x00419C07,
    72      0x0041C404, 0x0042080C, 0x00423C01, 0x00426806, 0x0043EC01,
    73      0x004D740C, 0x004E400A, 0x00500001, 0x0059B402, 0x005A0001,
    74      0x005A6C02, 0x005BAC03, 0x005C4803, 0x005CC805, 0x005D4802,
    75      0x005DC802, 0x005ED023, 0x005F6004, 0x005F7401, 0x0060000F,
    76      0x0062A401, 0x0064800C, 0x0064C00C, 0x00650001, 0x00651002,
    77      0x0066C011, 0x00672002, 0x00677822, 0x00685C05, 0x00687802,
    78      0x0069540A, 0x0069801D, 0x0069FC01, 0x006A8007, 0x006AA006,
    79      0x006C0005, 0x006CD011, 0x006D6823, 0x006E0003, 0x006E840D,
    80      0x006F980E, 0x006FF004, 0x00709014, 0x0070EC05, 0x0071F802,
    81      0x00730008, 0x00734019, 0x0073B401, 0x0073C803, 0x00770027,
    82      0x0077F004, 0x007EF401, 0x007EFC03, 0x007F3403, 0x007F7403,
    83      0x007FB403, 0x007FF402, 0x00800065, 0x0081A806, 0x0081E805,
    84      0x00822805, 0x0082801A, 0x00834021, 0x00840002, 0x00840C04,
    85      0x00842002, 0x00845001, 0x00845803, 0x00847806, 0x00849401,
    86      0x00849C01, 0x0084A401, 0x0084B801, 0x0084E802, 0x00850005,
    87      0x00852804, 0x00853C01, 0x00864264, 0x00900027, 0x0091000B,
    88      0x0092704E, 0x00940200, 0x009C0475, 0x009E53B9, 0x00AD400A,
    89      0x00B39406, 0x00B3BC03, 0x00B3E404, 0x00B3F802, 0x00B5C001,
    90      0x00B5FC01, 0x00B7804F, 0x00B8C00C, 0x00BA001A, 0x00BA6C59,
    91      0x00BC00D6, 0x00BFC00C, 0x00C00005, 0x00C02019, 0x00C0A807,
    92      0x00C0D802, 0x00C0F403, 0x00C26404, 0x00C28001, 0x00C3EC01,
    93      0x00C64002, 0x00C6580A, 0x00C70024, 0x00C8001F, 0x00C8A81E,
    94      0x00C94001, 0x00C98020, 0x00CA2827, 0x00CB003F, 0x00CC0100,
    95      0x01370040, 0x02924037, 0x0293F802, 0x02983403, 0x0299BC10,
    96      0x029A7C01, 0x029BC008, 0x029C0017, 0x029C8002, 0x029E2402,
    97      0x02A00801, 0x02A01801, 0x02A02C01, 0x02A08C09, 0x02A0D804,
    98      0x02A1D004, 0x02A20002, 0x02A2D011, 0x02A33802, 0x02A38012,
    99      0x02A3E003, 0x02A4980A, 0x02A51C0D, 0x02A57C01, 0x02A60004,
   100      0x02A6CC1B, 0x02A77802, 0x02A8A40E, 0x02A90C01, 0x02A93002,
   101      0x02A97004, 0x02A9DC03, 0x02A9EC01, 0x02AAC001, 0x02AAC803,
   102      0x02AADC02, 0x02AAF802, 0x02AB0401, 0x02AB7802, 0x02ABAC07,
   103      0x02ABD402, 0x02AF8C0B, 0x03600001, 0x036DFC02, 0x036FFC02,
   104      0x037FFC01, 0x03EC7801, 0x03ECA401, 0x03EEC810, 0x03F4F802,
   105      0x03F7F002, 0x03F8001A, 0x03F88007, 0x03F8C023, 0x03F95013,
   106      0x03F9A004, 0x03FBFC01, 0x03FC040F, 0x03FC6807, 0x03FCEC06,
   107      0x03FD6C0B, 0x03FF8007, 0x03FFA007, 0x03FFE405, 0x04040003,
   108      0x0404DC09, 0x0405E411, 0x0406400C, 0x0407402E, 0x040E7C01,
   109      0x040F4001, 0x04215C01, 0x04247C01, 0x0424FC01, 0x04280403,
   110      0x04281402, 0x04283004, 0x0428E003, 0x0428FC01, 0x04294009,
   111      0x0429FC01, 0x042CE407, 0x04400003, 0x0440E016, 0x04420003,
   112      0x0442C012, 0x04440003, 0x04449C0E, 0x04450004, 0x04460003,
   113      0x0446CC0E, 0x04471404, 0x045AAC0D, 0x0491C004, 0x05BD442E,
   114      0x05BE3C04, 0x074000F6, 0x07440027, 0x0744A4B5, 0x07480046,
   115      0x074C0057, 0x075B0401, 0x075B6C01, 0x075BEC01, 0x075C5401,
   116      0x075CD401, 0x075D3C01, 0x075DBC01, 0x075E2401, 0x075EA401,
   117      0x075F0C01, 0x07BBC002, 0x07C0002C, 0x07C0C064, 0x07C2800F,
   118      0x07C2C40E, 0x07C3040F, 0x07C3440F, 0x07C4401F, 0x07C4C03C,
   119      0x07C5C02B, 0x07C7981D, 0x07C8402B, 0x07C90009, 0x07C94002,
   120      0x07CC0021, 0x07CCC006, 0x07CCDC46, 0x07CE0014, 0x07CE8025,
   121      0x07CF1805, 0x07CF8011, 0x07D0003F, 0x07D10001, 0x07D108B6,
   122      0x07D3E404, 0x07D4003E, 0x07D50004, 0x07D54018, 0x07D7EC46,
   123      0x07D9140B, 0x07DA0046, 0x07DC0074, 0x38000401, 0x38008060,
   124      0x380400F0,
   125    };
   126    static const unsigned int aAscii[4] = {
   127      0xFFFFFFFF, 0xFC00FFFF, 0xF8000001, 0xF8000001,
   128    };
   129  
   130    if( (unsigned int)c<128 ){
   131      return ( (aAscii[c >> 5] & ((unsigned int)1 << (c & 0x001F)))==0 );
   132    }else if( (unsigned int)c<(1<<22) ){
   133      unsigned int key = (((unsigned int)c)<<10) | 0x000003FF;
   134      int iRes = 0;
   135      int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
   136      int iLo = 0;
   137      while( iHi>=iLo ){
   138        int iTest = (iHi + iLo) / 2;
   139        if( key >= aEntry[iTest] ){
   140          iRes = iTest;
   141          iLo = iTest+1;
   142        }else{
   143          iHi = iTest-1;
   144        }
   145      }
   146      assert( aEntry[0]<key );
   147      assert( key>=aEntry[iRes] );
   148      return (((unsigned int)c) >= ((aEntry[iRes]>>10) + (aEntry[iRes]&0x3FF)));
   149    }
   150    return 1;
   151  }
   152  
   153  
   154  /*
   155  ** If the argument is a codepoint corresponding to a lowercase letter
   156  ** in the ASCII range with a diacritic added, return the codepoint
   157  ** of the ASCII letter only. For example, if passed 235 - "LATIN
   158  ** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER
   159  ** E"). The resuls of passing a codepoint that corresponds to an
   160  ** uppercase letter are undefined.
   161  */
   162  static int remove_diacritic(int c){
   163    unsigned short aDia[] = {
   164          0,  1797,  1848,  1859,  1891,  1928,  1940,  1995, 
   165       2024,  2040,  2060,  2110,  2168,  2206,  2264,  2286, 
   166       2344,  2383,  2472,  2488,  2516,  2596,  2668,  2732, 
   167       2782,  2842,  2894,  2954,  2984,  3000,  3028,  3336, 
   168       3456,  3696,  3712,  3728,  3744,  3896,  3912,  3928, 
   169       3968,  4008,  4040,  4106,  4138,  4170,  4202,  4234, 
   170       4266,  4296,  4312,  4344,  4408,  4424,  4472,  4504, 
   171       6148,  6198,  6264,  6280,  6360,  6429,  6505,  6529, 
   172      61448, 61468, 61534, 61592, 61642, 61688, 61704, 61726, 
   173      61784, 61800, 61836, 61880, 61914, 61948, 61998, 62122, 
   174      62154, 62200, 62218, 62302, 62364, 62442, 62478, 62536, 
   175      62554, 62584, 62604, 62640, 62648, 62656, 62664, 62730, 
   176      62924, 63050, 63082, 63274, 63390, 
   177    };
   178    char aChar[] = {
   179      '\0', 'a',  'c',  'e',  'i',  'n',  'o',  'u',  'y',  'y',  'a',  'c',  
   180      'd',  'e',  'e',  'g',  'h',  'i',  'j',  'k',  'l',  'n',  'o',  'r',  
   181      's',  't',  'u',  'u',  'w',  'y',  'z',  'o',  'u',  'a',  'i',  'o',  
   182      'u',  'g',  'k',  'o',  'j',  'g',  'n',  'a',  'e',  'i',  'o',  'r',  
   183      'u',  's',  't',  'h',  'a',  'e',  'o',  'y',  '\0', '\0', '\0', '\0', 
   184      '\0', '\0', '\0', '\0', 'a',  'b',  'd',  'd',  'e',  'f',  'g',  'h',  
   185      'h',  'i',  'k',  'l',  'l',  'm',  'n',  'p',  'r',  'r',  's',  't',  
   186      'u',  'v',  'w',  'w',  'x',  'y',  'z',  'h',  't',  'w',  'y',  'a',  
   187      'e',  'i',  'o',  'u',  'y',  
   188    };
   189  
   190    unsigned int key = (((unsigned int)c)<<3) | 0x00000007;
   191    int iRes = 0;
   192    int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1;
   193    int iLo = 0;
   194    while( iHi>=iLo ){
   195      int iTest = (iHi + iLo) / 2;
   196      if( key >= aDia[iTest] ){
   197        iRes = iTest;
   198        iLo = iTest+1;
   199      }else{
   200        iHi = iTest-1;
   201      }
   202    }
   203    assert( key>=aDia[iRes] );
   204    return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]);
   205  }
   206  
   207  
   208  /*
   209  ** Return true if the argument interpreted as a unicode codepoint
   210  ** is a diacritical modifier character.
   211  */
   212  int sqlite3FtsUnicodeIsdiacritic(int c){
   213    unsigned int mask0 = 0x08029FDF;
   214    unsigned int mask1 = 0x000361F8;
   215    if( c<768 || c>817 ) return 0;
   216    return (c < 768+32) ?
   217        (mask0 & (1 << (c-768))) :
   218        (mask1 & (1 << (c-768-32)));
   219  }
   220  
   221  
   222  /*
   223  ** Interpret the argument as a unicode codepoint. If the codepoint
   224  ** is an upper case character that has a lower case equivalent,
   225  ** return the codepoint corresponding to the lower case version.
   226  ** Otherwise, return a copy of the argument.
   227  **
   228  ** The results are undefined if the value passed to this function
   229  ** is less than zero.
   230  */
   231  int sqlite3FtsUnicodeFold(int c, int bRemoveDiacritic){
   232    /* Each entry in the following array defines a rule for folding a range
   233    ** of codepoints to lower case. The rule applies to a range of nRange
   234    ** codepoints starting at codepoint iCode.
   235    **
   236    ** If the least significant bit in flags is clear, then the rule applies
   237    ** to all nRange codepoints (i.e. all nRange codepoints are upper case and
   238    ** need to be folded). Or, if it is set, then the rule only applies to
   239    ** every second codepoint in the range, starting with codepoint C.
   240    **
   241    ** The 7 most significant bits in flags are an index into the aiOff[]
   242    ** array. If a specific codepoint C does require folding, then its lower
   243    ** case equivalent is ((C + aiOff[flags>>1]) & 0xFFFF).
   244    **
   245    ** The contents of this array are generated by parsing the CaseFolding.txt
   246    ** file distributed as part of the "Unicode Character Database". See
   247    ** http://www.unicode.org for details.
   248    */
   249    static const struct TableEntry {
   250      unsigned short iCode;
   251      unsigned char flags;
   252      unsigned char nRange;
   253    } aEntry[] = {
   254      {65, 14, 26},          {181, 64, 1},          {192, 14, 23},
   255      {216, 14, 7},          {256, 1, 48},          {306, 1, 6},
   256      {313, 1, 16},          {330, 1, 46},          {376, 116, 1},
   257      {377, 1, 6},           {383, 104, 1},         {385, 50, 1},
   258      {386, 1, 4},           {390, 44, 1},          {391, 0, 1},
   259      {393, 42, 2},          {395, 0, 1},           {398, 32, 1},
   260      {399, 38, 1},          {400, 40, 1},          {401, 0, 1},
   261      {403, 42, 1},          {404, 46, 1},          {406, 52, 1},
   262      {407, 48, 1},          {408, 0, 1},           {412, 52, 1},
   263      {413, 54, 1},          {415, 56, 1},          {416, 1, 6},
   264      {422, 60, 1},          {423, 0, 1},           {425, 60, 1},
   265      {428, 0, 1},           {430, 60, 1},          {431, 0, 1},
   266      {433, 58, 2},          {435, 1, 4},           {439, 62, 1},
   267      {440, 0, 1},           {444, 0, 1},           {452, 2, 1},
   268      {453, 0, 1},           {455, 2, 1},           {456, 0, 1},
   269      {458, 2, 1},           {459, 1, 18},          {478, 1, 18},
   270      {497, 2, 1},           {498, 1, 4},           {502, 122, 1},
   271      {503, 134, 1},         {504, 1, 40},          {544, 110, 1},
   272      {546, 1, 18},          {570, 70, 1},          {571, 0, 1},
   273      {573, 108, 1},         {574, 68, 1},          {577, 0, 1},
   274      {579, 106, 1},         {580, 28, 1},          {581, 30, 1},
   275      {582, 1, 10},          {837, 36, 1},          {880, 1, 4},
   276      {886, 0, 1},           {902, 18, 1},          {904, 16, 3},
   277      {908, 26, 1},          {910, 24, 2},          {913, 14, 17},
   278      {931, 14, 9},          {962, 0, 1},           {975, 4, 1},
   279      {976, 140, 1},         {977, 142, 1},         {981, 146, 1},
   280      {982, 144, 1},         {984, 1, 24},          {1008, 136, 1},
   281      {1009, 138, 1},        {1012, 130, 1},        {1013, 128, 1},
   282      {1015, 0, 1},          {1017, 152, 1},        {1018, 0, 1},
   283      {1021, 110, 3},        {1024, 34, 16},        {1040, 14, 32},
   284      {1120, 1, 34},         {1162, 1, 54},         {1216, 6, 1},
   285      {1217, 1, 14},         {1232, 1, 88},         {1329, 22, 38},
   286      {4256, 66, 38},        {4295, 66, 1},         {4301, 66, 1},
   287      {7680, 1, 150},        {7835, 132, 1},        {7838, 96, 1},
   288      {7840, 1, 96},         {7944, 150, 8},        {7960, 150, 6},
   289      {7976, 150, 8},        {7992, 150, 8},        {8008, 150, 6},
   290      {8025, 151, 8},        {8040, 150, 8},        {8072, 150, 8},
   291      {8088, 150, 8},        {8104, 150, 8},        {8120, 150, 2},
   292      {8122, 126, 2},        {8124, 148, 1},        {8126, 100, 1},
   293      {8136, 124, 4},        {8140, 148, 1},        {8152, 150, 2},
   294      {8154, 120, 2},        {8168, 150, 2},        {8170, 118, 2},
   295      {8172, 152, 1},        {8184, 112, 2},        {8186, 114, 2},
   296      {8188, 148, 1},        {8486, 98, 1},         {8490, 92, 1},
   297      {8491, 94, 1},         {8498, 12, 1},         {8544, 8, 16},
   298      {8579, 0, 1},          {9398, 10, 26},        {11264, 22, 47},
   299      {11360, 0, 1},         {11362, 88, 1},        {11363, 102, 1},
   300      {11364, 90, 1},        {11367, 1, 6},         {11373, 84, 1},
   301      {11374, 86, 1},        {11375, 80, 1},        {11376, 82, 1},
   302      {11378, 0, 1},         {11381, 0, 1},         {11390, 78, 2},
   303      {11392, 1, 100},       {11499, 1, 4},         {11506, 0, 1},
   304      {42560, 1, 46},        {42624, 1, 24},        {42786, 1, 14},
   305      {42802, 1, 62},        {42873, 1, 4},         {42877, 76, 1},
   306      {42878, 1, 10},        {42891, 0, 1},         {42893, 74, 1},
   307      {42896, 1, 4},         {42912, 1, 10},        {42922, 72, 1},
   308      {65313, 14, 26},       
   309    };
   310    static const unsigned short aiOff[] = {
   311     1,     2,     8,     15,    16,    26,    28,    32,    
   312     37,    38,    40,    48,    63,    64,    69,    71,    
   313     79,    80,    116,   202,   203,   205,   206,   207,   
   314     209,   210,   211,   213,   214,   217,   218,   219,   
   315     775,   7264,  10792, 10795, 23228, 23256, 30204, 54721, 
   316     54753, 54754, 54756, 54787, 54793, 54809, 57153, 57274, 
   317     57921, 58019, 58363, 61722, 65268, 65341, 65373, 65406, 
   318     65408, 65410, 65415, 65424, 65436, 65439, 65450, 65462, 
   319     65472, 65476, 65478, 65480, 65482, 65488, 65506, 65511, 
   320     65514, 65521, 65527, 65528, 65529, 
   321    };
   322  
   323    int ret = c;
   324  
   325    assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 );
   326  
   327    if( c<128 ){
   328      if( c>='A' && c<='Z' ) ret = c + ('a' - 'A');
   329    }else if( c<65536 ){
   330      const struct TableEntry *p;
   331      int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
   332      int iLo = 0;
   333      int iRes = -1;
   334  
   335      assert( c>aEntry[0].iCode );
   336      while( iHi>=iLo ){
   337        int iTest = (iHi + iLo) / 2;
   338        int cmp = (c - aEntry[iTest].iCode);
   339        if( cmp>=0 ){
   340          iRes = iTest;
   341          iLo = iTest+1;
   342        }else{
   343          iHi = iTest-1;
   344        }
   345      }
   346  
   347      assert( iRes>=0 && c>=aEntry[iRes].iCode );
   348      p = &aEntry[iRes];
   349      if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){
   350        ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;
   351        assert( ret>0 );
   352      }
   353  
   354      if( bRemoveDiacritic ) ret = remove_diacritic(ret);
   355    }
   356    
   357    else if( c>=66560 && c<66600 ){
   358      ret = c + 40;
   359    }
   360  
   361    return ret;
   362  }
   363  #endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */
   364  #endif /* !defined(SQLITE_DISABLE_FTS3_UNICODE) */