github.com/jdgcs/sqlite3@v1.12.1-0.20210908114423-bc5f96e4dd51/testdata/tcl/enc.test (about)

     1  # 2002 May 24
     2  #
     3  # The author disclaims copyright to this source code.  In place of
     4  # a legal notice, here is a blessing:
     5  #
     6  #    May you do good and not evil.
     7  #    May you find forgiveness for yourself and forgive others.
     8  #    May you share freely, never taking more than you give.
     9  #
    10  #***********************************************************************
    11  # This file implements regression tests for SQLite library.  The focus of
    12  # this file is testing the SQLite routines used for converting between the
    13  # various suported unicode encodings (UTF-8, UTF-16, UTF-16le and
    14  # UTF-16be).
    15  #
    16  # $Id: enc.test,v 1.7 2007/05/23 16:23:09 danielk1977 Exp $
    17  
    18  set testdir [file dirname $argv0]
    19  source $testdir/tester.tcl
    20  
    21  # Skip this test if the build does not support multiple encodings.
    22  #
    23  ifcapable {!utf16} {
    24    finish_test
    25    return
    26  }
    27  
    28  proc do_bincmp_test {testname got expect} {
    29    binary scan $expect \c* expectvals
    30    binary scan $got \c* gotvals
    31    do_test $testname [list set dummy $gotvals] $expectvals
    32  }
    33  
    34  # $utf16 is a UTF-16 encoded string. Swap each pair of bytes around
    35  # to change the byte-order of the string.
    36  proc swap_byte_order {utf16} {
    37    binary scan $utf16 \c* ints
    38  
    39    foreach {a b} $ints {
    40      lappend ints2 $b
    41      lappend ints2 $a
    42    }
    43  
    44    return [binary format \c* $ints2]
    45  }
    46  
    47  #
    48  # Test that the SQLite routines for converting between UTF encodings
    49  # produce the same results as their TCL counterparts.
    50  #
    51  # $testname is the prefix to be used for the test names.
    52  # $str is a string to use for testing (encoded in UTF-8, as normal for TCL).
    53  #
    54  # The test procedure is:
    55  # 1. Convert the string from UTF-8 to UTF-16le and check that the TCL and
    56  #    SQLite routines produce the same results.
    57  #
    58  # 2. Convert the string from UTF-8 to UTF-16be and check that the TCL and
    59  #    SQLite routines produce the same results.
    60  #
    61  # 3. Use the SQLite routines to convert the native machine order UTF-16
    62  #    representation back to the original UTF-8. Check that the result
    63  #    matches the original representation.
    64  #
    65  # 4. Add a byte-order mark to each of the UTF-16 representations and
    66  #    check that the SQLite routines can convert them back to UTF-8.  For
    67  #    byte-order mark info, refer to section 3.10 of the unicode standard.
    68  #
    69  # 5. Take the byte-order marked UTF-16 strings from step 4 and ensure
    70  #    that SQLite can convert them both to native byte order UTF-16 
    71  #    strings, sans BOM.
    72  #
    73  # Coverage:
    74  #
    75  # sqlite_utf8to16be (step 2)
    76  # sqlite_utf8to16le (step 1)
    77  # sqlite_utf16to8 (steps 3, 4)
    78  # sqlite_utf16to16le (step 5)
    79  # sqlite_utf16to16be (step 5)
    80  #
    81  proc test_conversion {testname str} {
    82   
    83    # Step 1.
    84    set utf16le_sqlite3 [test_translate $str UTF8 UTF16LE]
    85    set utf16le_tcl [encoding convertto unicode $str]
    86    append utf16le_tcl "\x00\x00"
    87    if { $::tcl_platform(byteOrder)!="littleEndian" } {
    88      set utf16le_tcl [swap_byte_order $utf16le_tcl]
    89    }
    90    do_bincmp_test $testname.1 $utf16le_sqlite3 $utf16le_tcl
    91    set utf16le $utf16le_tcl
    92  
    93    # Step 2.
    94    set utf16be_sqlite3 [test_translate $str UTF8 UTF16BE]
    95    set utf16be_tcl [encoding convertto unicode $str]
    96    append utf16be_tcl "\x00\x00"
    97    if { $::tcl_platform(byteOrder)=="littleEndian" } {
    98      set utf16be_tcl [swap_byte_order $utf16be_tcl]
    99    }
   100    do_bincmp_test $testname.2 $utf16be_sqlite3 $utf16be_tcl
   101    set utf16be $utf16be_tcl
   102   
   103    # Step 3.
   104    if { $::tcl_platform(byteOrder)=="littleEndian" } {
   105      set utf16 $utf16le
   106    } else {
   107      set utf16 $utf16be
   108    }
   109    set utf8_sqlite3 [test_translate $utf16 UTF16 UTF8]
   110    do_bincmp_test $testname.3 $utf8_sqlite3 [binarize $str]
   111  
   112    # Step 4 (little endian).
   113    append utf16le_bom "\xFF\xFE" $utf16le
   114    set utf8_sqlite3 [test_translate $utf16le_bom UTF16 UTF8 1]
   115    do_bincmp_test $testname.4.le $utf8_sqlite3 [binarize $str]
   116  
   117    # Step 4 (big endian).
   118    append utf16be_bom "\xFE\xFF" $utf16be
   119    set utf8_sqlite3 [test_translate $utf16be_bom UTF16 UTF8]
   120    do_bincmp_test $testname.4.be $utf8_sqlite3 [binarize $str]
   121  
   122    # Step 5 (little endian to little endian).
   123    set utf16_sqlite3 [test_translate $utf16le_bom UTF16LE UTF16LE]
   124    do_bincmp_test $testname.5.le.le $utf16_sqlite3 $utf16le
   125  
   126    # Step 5 (big endian to big endian).
   127    set utf16_sqlite3 [test_translate $utf16be_bom UTF16 UTF16BE]
   128    do_bincmp_test $testname.5.be.be $utf16_sqlite3 $utf16be
   129  
   130    # Step 5 (big endian to little endian).
   131    set utf16_sqlite3 [test_translate $utf16be_bom UTF16 UTF16LE]
   132    do_bincmp_test $testname.5.be.le $utf16_sqlite3 $utf16le
   133  
   134    # Step 5 (little endian to big endian).
   135    set utf16_sqlite3 [test_translate $utf16le_bom UTF16 UTF16BE]
   136    do_bincmp_test $testname.5.le.be $utf16_sqlite3 $utf16be
   137  }
   138  
   139  translate_selftest
   140  
   141  test_conversion enc-1 "hello world"
   142  test_conversion enc-2 "sqlite"
   143  test_conversion enc-3 ""
   144  test_conversion enc-X "\u0100"
   145  test_conversion enc-4 "\u1234"
   146  test_conversion enc-5 "\u4321abc"
   147  test_conversion enc-6 "\u4321\u1234"
   148  test_conversion enc-7 [string repeat "abcde\u00EF\u00EE\uFFFCabc" 100]
   149  test_conversion enc-8 [string repeat "\u007E\u007F\u0080\u0081" 100]
   150  test_conversion enc-9 [string repeat "\u07FE\u07FF\u0800\u0801\uFFF0" 100]
   151  test_conversion enc-10 [string repeat "\uE000" 100]
   152  
   153  proc test_collate {enc zLeft zRight} {
   154    return [string compare $zLeft $zRight]
   155  }
   156  add_test_collate $::DB 0 0 1
   157  do_test enc-11.1 {
   158    execsql {
   159      CREATE TABLE ab(a COLLATE test_collate, b);
   160      INSERT INTO ab VALUES(CAST (X'C388' AS TEXT), X'888800');
   161      INSERT INTO ab VALUES(CAST (X'C0808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808388' AS TEXT), X'888800');
   162      CREATE INDEX ab_i ON ab(a, b);
   163    }
   164  } {}
   165  do_test enc-11.2 {
   166    set cp200 "\u00C8"
   167    execsql {
   168      SELECT count(*) FROM ab WHERE a = $::cp200;
   169    }
   170  } {2}
   171  
   172  #-------------------------------------------------------------------------
   173  reset_db
   174  forcedelete test.db2
   175  forcedelete test.db3
   176  
   177  do_execsql_test enc-12.0 {
   178    PRAGMA encoding = 'utf-8';
   179    CREATE TABLE t1(a, b, c);
   180    INSERT INTO t1 VALUES('a', 'b', 'c');
   181    ATTACH 'test.db3' AS aux;
   182    CREATE TABLE aux.t3(x, y, z);
   183    INSERT INTO t3 VALUES('xxx', 'yyy', 'zzz');
   184    PRAGMA encoding;
   185  } {UTF-8}
   186  
   187  do_test enc-12.1 {
   188    sqlite3 db2 test.db2
   189    db2 eval {
   190      PRAGMA encoding = 'UTF-16le';
   191      CREATE TABLE t2(d, e, f);
   192      INSERT INTO t2 VALUES('d', 'e', 'f');
   193      PRAGMA encoding;
   194    }
   195  } {UTF-16le}
   196  
   197  do_test enc-12.2 {
   198    db2 backup test.db
   199    db2 close
   200  } {}
   201  
   202  do_catchsql_test enc-12.3 {
   203    SELECT * FROM t2;
   204  } {1 {attached databases must use the same text encoding as main database}}
   205  
   206  db close
   207  sqlite3 db test.db3
   208  do_execsql_test enc-12.4 {
   209    SELECT * FROM t3;
   210    PRAGMA encoding = 'UTF-16le';
   211    SELECT * FROM t3;
   212  } {xxx yyy zzz xxx yyy zzz}
   213  
   214  db close
   215  sqlite3 db test.db3
   216  breakpoint
   217  do_execsql_test enc-12.5 {
   218    PRAGMA encoding = 'UTF-16le';
   219    PRAGMA encoding;
   220  } {UTF-8}
   221  
   222  reset_db
   223  do_execsql_test enc-12.6 {
   224    PRAGMA encoding = 'UTF-8';
   225    CREATE TEMP TABLE t1(a, b, c);
   226    INSERT INTO t1 VALUES('xxx', 'yyy', 'zzz');
   227  }
   228  do_test enc-12.7 {
   229    sqlite3 db2 test.db2
   230    db2 backup test.db
   231    db2 close
   232    db eval {
   233      SELECT * FROM t1;
   234    }
   235  } {xxx yyy zzz}
   236  do_catchsql_test enc-12.8 {
   237    SELECT * FROM t2;
   238    SELECT * FROM t1;
   239  } {1 {attached databases must use the same text encoding as main database}}
   240  
   241  db close
   242  sqlite3 db test.db
   243  do_execsql_test enc-12.9 {
   244    CREATE TEMP TABLE t1(a, b, c);
   245    INSERT INTO t1 VALUES('xxx', 'yyy', 'zzz');
   246  }
   247  do_execsql_test enc-12.10 {
   248    SELECT * FROM t2;
   249    SELECT * FROM t1;
   250  } {d e f xxx yyy zzz}
   251  
   252  finish_test