gitlab.com/CoiaPrant/sqlite3@v1.19.1/testdata/tcl/enc2.test (about)

     1  # 2002 May 24
     2  #
     3  # The author disclaims copyright to this source code.  In place of
     4  # a legal notice, here is a blessing:
     5  #
     6  #    May you do good and not evil.
     7  #    May you find forgiveness for yourself and forgive others.
     8  #    May you share freely, never taking more than you give.
     9  #
    10  #***********************************************************************
    11  # This file implements regression tests for SQLite library.  The focus of
    12  # this file is testing the SQLite routines used for converting between the
    13  # various suported unicode encodings (UTF-8, UTF-16, UTF-16le and
    14  # UTF-16be).
    15  #
    16  
    17  set testdir [file dirname $argv0]
    18  source $testdir/tester.tcl
    19  
    20  # If UTF16 support is disabled, ignore the tests in this file
    21  #
    22  ifcapable {!utf16} {
    23    finish_test
    24    return
    25  }
    26  
    27  # The rough organisation of tests in this file is:
    28  #
    29  # enc2.1.*: Simple tests with a UTF-8 db.
    30  # enc2.2.*: Simple tests with a UTF-16LE db.
    31  # enc2.3.*: Simple tests with a UTF-16BE db.
    32  # enc2.4.*: Test that attached databases must have the same text encoding
    33  #           as the main database.
    34  # enc2.5.*: Test the behavior of the library when a collation sequence is
    35  #           not available for the most desirable text encoding.
    36  # enc2.6.*: Similar test for user functions.
    37  # enc2.7.*: Test that the VerifyCookie opcode protects against assuming the
    38  #           wrong text encoding for the database.
    39  # enc2.8.*: Test sqlite3_complete16()
    40  #
    41  
    42  db close
    43  
    44  # Return the UTF-8 representation of the supplied UTF-16 string $str. 
    45  proc utf8 {str} {
    46    # If $str ends in two 0x00 0x00 bytes, knock these off before
    47    # converting to UTF-8 using TCL.
    48    binary scan $str \c* vals
    49    if {[lindex $vals end]==0 && [lindex $vals end-1]==0} {
    50      set str [binary format \c* [lrange $vals 0 end-2]]
    51    }
    52  
    53    set r [encoding convertfrom unicode $str]
    54    return $r
    55  }
    56  
    57  #
    58  # This proc contains all the tests in this file. It is run
    59  # three times. Each time the file 'test.db' contains a database
    60  # with the following contents:
    61  set dbcontents {
    62    CREATE TABLE t1(a PRIMARY KEY, b, c);
    63    INSERT INTO t1 VALUES('one', 'I', 1);
    64  }
    65  # This proc tests that we can open and manipulate the test.db 
    66  # database, and that it is possible to retreive values in
    67  # various text encodings.
    68  #
    69  proc run_test_script {t enc} {
    70  
    71  # Open the database and pull out a (the) row.
    72  do_test $t.1 {
    73    sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
    74    execsql {SELECT * FROM t1}
    75  } {one I 1}
    76  
    77  # Insert some data
    78  do_test $t.2 {
    79    execsql {INSERT INTO t1 VALUES('two', 'II', 2);}
    80    execsql {SELECT * FROM t1}
    81  } {one I 1 two II 2}
    82  
    83  # Insert some data 
    84  do_test $t.3 {
    85    execsql {
    86      INSERT INTO t1 VALUES('three','III',3);
    87      INSERT INTO t1 VALUES('four','IV',4);
    88      INSERT INTO t1 VALUES('five','V',5);
    89    }
    90    execsql {SELECT * FROM t1}
    91  } {one I 1 two II 2 three III 3 four IV 4 five V 5}
    92  
    93  # Use the index
    94  do_test $t.4 {
    95    execsql {
    96      SELECT * FROM t1 WHERE a = 'one';
    97    }
    98  } {one I 1}
    99  do_test $t.5 {
   100    execsql {
   101      SELECT * FROM t1 WHERE a = 'four';
   102    }
   103  } {four IV 4}
   104  ifcapable subquery {
   105    do_test $t.6 {
   106      execsql {
   107        SELECT * FROM t1 WHERE a IN ('one', 'two');
   108      }
   109    } {one I 1 two II 2}
   110  }
   111  
   112  # Now check that we can retrieve data in both UTF-16 and UTF-8
   113  do_test $t.7 {
   114    set STMT [sqlite3_prepare $DB "SELECT a FROM t1 WHERE c>3;" -1 TAIL]
   115    sqlite3_step $STMT
   116    sqlite3_column_text $STMT 0
   117  } {four}
   118  
   119  do_test $t.8 {
   120    sqlite3_step $STMT
   121    utf8 [sqlite3_column_text16 $STMT 0]
   122  } {five}
   123  
   124  do_test $t.9 {
   125    sqlite3_finalize $STMT
   126  } SQLITE_OK
   127  
   128  ifcapable vacuum {
   129    execsql VACUUM
   130  }
   131  
   132  do_test $t.10 {
   133    db eval {PRAGMA encoding}
   134  } $enc
   135  
   136  }
   137  
   138  # The three unicode encodings understood by SQLite.
   139  set encodings [list UTF-8 UTF-16le UTF-16be]
   140  
   141  set sqlite_os_trace 0
   142  set i 1
   143  foreach enc $encodings {
   144    forcedelete test.db
   145    sqlite3 db test.db
   146    db eval "PRAGMA encoding = \"$enc\""
   147    execsql $dbcontents
   148    do_test enc2-$i.0.1 {
   149      db eval {PRAGMA encoding}
   150    } $enc
   151    do_test enc2-$i.0.2 {
   152      db eval {PRAGMA encoding=UTF8}
   153      db eval {PRAGMA encoding}
   154    } $enc
   155    do_test enc2-$i.0.3 {
   156      db eval {PRAGMA encoding=UTF16le}
   157      db eval {PRAGMA encoding}
   158    } $enc
   159    do_test enc2-$i.0.4 {
   160      db eval {PRAGMA encoding=UTF16be}
   161      db eval {PRAGMA encoding}
   162    } $enc
   163  
   164    db close
   165    run_test_script enc2-$i $enc
   166    db close
   167    incr i
   168  }
   169  
   170  # Test that it is an error to try to attach a database with a different
   171  # encoding to the main database.
   172  ifcapable attach {
   173    do_test enc2-4.1 {
   174      forcedelete test.db
   175      sqlite3 db test.db
   176      db eval "PRAGMA encoding = 'UTF-8'"
   177      db eval "CREATE TABLE abc(a, b, c);"
   178    } {}
   179    do_test enc2-4.2 {
   180      forcedelete test2.db
   181      sqlite3 db2 test2.db
   182      db2 eval "PRAGMA encoding = 'UTF-16'"
   183      db2 eval "CREATE TABLE abc(a, b, c);"
   184    } {}
   185    do_test enc2-4.3 {
   186      catchsql {
   187        ATTACH 'test2.db' as aux;
   188      }
   189    } {1 {attached databases must use the same text encoding as main database}}
   190    db2 close
   191    db close
   192  }
   193  
   194  # The following tests - enc2-5.* - test that SQLite selects the correct
   195  # collation sequence when more than one is available.
   196  
   197  set ::values [list one two three four five]
   198  set ::test_collate_enc INVALID
   199  proc test_collate {enc lhs rhs} {
   200    set ::test_collate_enc $enc
   201    set l [lsearch -exact $::values $lhs]
   202    set r [lsearch -exact $::values $rhs]
   203    set res [expr $l - $r]
   204    # puts "enc=$enc lhs=$lhs/$l rhs=$rhs/$r res=$res"
   205    return $res
   206  }
   207  
   208  forcedelete test.db
   209  sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
   210  do_test enc2-5.0 {
   211    execsql {
   212      CREATE TABLE t5(a);
   213      INSERT INTO t5 VALUES('one');
   214      INSERT INTO t5 VALUES('two');
   215      INSERT INTO t5 VALUES('five');
   216      INSERT INTO t5 VALUES('three');
   217      INSERT INTO t5 VALUES('four');
   218    }
   219  } {}
   220  do_test enc2-5.1 {
   221    add_test_collate $DB 1 1 1
   222    set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate;}]
   223    lappend res $::test_collate_enc
   224  } {one two three four five UTF-8}
   225  do_test enc2-5.2 {
   226    add_test_collate $DB 0 1 0
   227    set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
   228    lappend res $::test_collate_enc
   229  } {one two three four five UTF-16LE}
   230  do_test enc2-5.3 {
   231    add_test_collate $DB 0 0 1
   232    set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
   233    lappend res $::test_collate_enc
   234  } {one two three four five UTF-16BE}
   235  
   236  db close
   237  forcedelete test.db
   238  sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
   239  execsql {pragma encoding = 'UTF-16LE'}
   240  do_test enc2-5.4 {
   241    execsql {
   242      CREATE TABLE t5(a);
   243      INSERT INTO t5 VALUES('one');
   244      INSERT INTO t5 VALUES('two');
   245      INSERT INTO t5 VALUES('five');
   246      INSERT INTO t5 VALUES('three');
   247      INSERT INTO t5 VALUES('four');
   248    }
   249  } {}
   250  do_test enc2-5.5 {
   251    add_test_collate $DB 1 1 1
   252    set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
   253    lappend res $::test_collate_enc
   254  } {one two three four five UTF-16LE}
   255  do_test enc2-5.6 {
   256    add_test_collate $DB 1 0 1
   257    set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
   258    lappend res $::test_collate_enc
   259  } {one two three four five UTF-16BE}
   260  do_test enc2-5.7 {
   261    add_test_collate $DB 1 0 0
   262    set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
   263    lappend res $::test_collate_enc
   264  } {one two three four five UTF-8}
   265  
   266  db close
   267  forcedelete test.db
   268  sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
   269  execsql {pragma encoding = 'UTF-16BE'}
   270  do_test enc2-5.8 {
   271    execsql {
   272      CREATE TABLE t5(a);
   273      INSERT INTO t5 VALUES('one');
   274      INSERT INTO t5 VALUES('two');
   275      INSERT INTO t5 VALUES('five');
   276      INSERT INTO t5 VALUES('three');
   277      INSERT INTO t5 VALUES('four');
   278    }
   279  } {}
   280  do_test enc2-5.9 {
   281    add_test_collate $DB 1 1 1
   282    set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
   283    lappend res $::test_collate_enc
   284  } {one two three four five UTF-16BE}
   285  do_test enc2-5.10 {
   286    add_test_collate $DB 1 1 0
   287    set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
   288    lappend res $::test_collate_enc
   289  } {one two three four five UTF-16LE}
   290  do_test enc2-5.11 {
   291    add_test_collate $DB 1 0 0
   292    set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
   293    lappend res $::test_collate_enc
   294  } {one two three four five UTF-8}
   295  
   296  # Also test that a UTF-16 collation factory works.
   297  do_test enc2-5-12 {
   298    add_test_collate $DB 0 0 0
   299    catchsql {
   300      SELECT * FROM t5 ORDER BY 1 COLLATE test_collate
   301    }
   302  } {1 {no such collation sequence: test_collate}}
   303  do_test enc2-5.13 {
   304    add_test_collate_needed $DB 
   305    set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate; }]
   306    lappend res $::test_collate_enc
   307  } {one two three four five UTF-16BE}
   308  do_test enc2-5.14 {
   309    set ::sqlite_last_needed_collation
   310  } test_collate
   311  
   312  db close
   313  forcedelete test.db
   314  
   315  do_test enc2-5.15 {
   316    sqlite3 db test.db; set ::DB [sqlite3_connection_pointer db]
   317    add_test_collate_needed $::DB
   318    set ::sqlite_last_needed_collation
   319  } {}
   320  do_test enc2-5.16 {
   321    execsql {CREATE TABLE t1(a varchar collate test_collate);}
   322  } {}
   323  do_test enc2-5.17 {
   324    set ::sqlite_last_needed_collation
   325  } {test_collate}
   326  
   327  # The following tests - enc2-6.* - test that SQLite selects the correct
   328  # user function when more than one is available.
   329  
   330  proc test_function {enc arg} {
   331    return "$enc $arg"
   332  }
   333  
   334  db close
   335  forcedelete test.db
   336  sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
   337  execsql {pragma encoding = 'UTF-8'}
   338  do_test enc2-6.0 {
   339    execsql {
   340      CREATE TABLE t5(a);
   341      INSERT INTO t5 VALUES('one');
   342    }
   343  } {}
   344  do_test enc2-6.1 {
   345    add_test_function $DB 1 1 1
   346    execsql {
   347      SELECT test_function('sqlite')
   348    }
   349  } {{UTF-8 sqlite}}
   350  db close
   351  sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
   352  do_test enc2-6.2 {
   353    add_test_function $DB 0 1 0
   354    execsql {
   355      SELECT test_function('sqlite')
   356    }
   357  } {{UTF-16LE sqlite}}
   358  db close
   359  sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
   360  do_test enc2-6.3 {
   361    add_test_function $DB 0 0 1
   362    execsql {
   363      SELECT test_function('sqlite')
   364    }
   365  } {{UTF-16BE sqlite}}
   366  
   367  db close
   368  forcedelete test.db
   369  sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
   370  execsql {pragma encoding = 'UTF-16LE'}
   371  do_test enc2-6.3 {
   372    execsql {
   373      CREATE TABLE t5(a);
   374      INSERT INTO t5 VALUES('sqlite');
   375    }
   376  } {}
   377  do_test enc2-6.4 {
   378    add_test_function $DB 1 1 1
   379    execsql {
   380      SELECT test_function('sqlite')
   381    }
   382  } {{UTF-16LE sqlite}}
   383  db close
   384  sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
   385  do_test enc2-6.5 {
   386    add_test_function $DB 0 1 0
   387    execsql {
   388      SELECT test_function('sqlite')
   389    }
   390  } {{UTF-16LE sqlite}}
   391  db close
   392  sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
   393  do_test enc2-6.6 {
   394    add_test_function $DB 0 0 1
   395    execsql {
   396      SELECT test_function('sqlite')
   397    }
   398  } {{UTF-16BE sqlite}}
   399  
   400  db close
   401  forcedelete test.db
   402  sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
   403  execsql {pragma encoding = 'UTF-16BE'}
   404  do_test enc2-6.7 {
   405    execsql {
   406      CREATE TABLE t5(a);
   407      INSERT INTO t5 VALUES('sqlite');
   408    }
   409  } {}
   410  do_test enc2-6.8 {
   411    add_test_function $DB 1 1 1
   412    execsql {
   413      SELECT test_function('sqlite')
   414    }
   415  } {{UTF-16BE sqlite}}
   416  db close
   417  sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
   418  do_test enc2-6.9 {
   419    add_test_function $DB 0 1 0
   420    execsql {
   421      SELECT test_function('sqlite')
   422    }
   423  } {{UTF-16LE sqlite}}
   424  db close
   425  sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
   426  do_test enc2-6.10 {
   427    add_test_function $DB 0 0 1
   428    execsql {
   429      SELECT test_function('sqlite')
   430    }
   431  } {{UTF-16BE sqlite}}
   432  
   433  
   434  db close
   435  forcedelete test.db
   436  
   437  # The following tests - enc2-7.* - function as follows:
   438  #
   439  # 1: Open an empty database file assuming UTF-16 encoding.
   440  # 2: Open the same database with a different handle assuming UTF-8. Create
   441  #    a table using this handle.
   442  # 3: Read the sqlite_master table from the first handle. 
   443  # 4: Ensure the first handle recognises the database encoding is UTF-8.
   444  #
   445  do_test enc2-7.1 {
   446    sqlite3 db test.db
   447    execsql {
   448      PRAGMA encoding = 'UTF-16';
   449      SELECT * FROM sqlite_master;
   450    }
   451  } {}
   452  do_test enc2-7.2 {
   453    set enc [execsql {
   454      PRAGMA encoding;
   455    }]
   456    string range $enc 0 end-2 ;# Chop off the "le" or "be"
   457  } {UTF-16}
   458  do_test enc2-7.3 {
   459    sqlite3 db2 test.db
   460    execsql {
   461      PRAGMA encoding = 'UTF-8';
   462      CREATE TABLE abc(a, b, c);
   463    } db2
   464  } {}
   465  do_test enc2-7.4 {
   466    execsql {
   467      SELECT * FROM sqlite_master;
   468    }
   469  } "table abc abc [expr $AUTOVACUUM?3:2] {CREATE TABLE abc(a, b, c)}"
   470  do_test enc2-7.5 {
   471    execsql {
   472      PRAGMA encoding;
   473    }
   474  } {UTF-8}
   475  
   476  db close
   477  db2 close
   478  
   479  proc utf16 {utf8} {
   480    set utf16 [encoding convertto unicode $utf8]
   481    append utf16 "\x00\x00"
   482    return $utf16
   483  }
   484  ifcapable {complete} {
   485    do_test enc2-8.1 {
   486      sqlite3_complete16 [utf16 "SELECT * FROM t1;"]
   487    } {1}
   488    do_test enc2-8.2 {
   489      sqlite3_complete16 [utf16 "SELECT * FROM"]
   490    } {0}
   491  }
   492  
   493  # Test that the encoding of an empty database may still be set after the
   494  # (empty) schema has been initialized.
   495  forcedelete test.db
   496  do_test enc2-9.1 {
   497    sqlite3 db test.db
   498    execsql {
   499      PRAGMA encoding = 'UTF-8';
   500      PRAGMA encoding;
   501    }
   502  } {UTF-8}
   503  do_test enc2-9.2 {
   504    sqlite3 db test.db
   505    execsql {
   506      PRAGMA encoding = 'UTF-16le';
   507      PRAGMA encoding;
   508    }
   509  } {UTF-16le}
   510  do_test enc2-9.3 {
   511    sqlite3 db test.db
   512    execsql {
   513      SELECT * FROM sqlite_master;
   514      PRAGMA encoding = 'UTF-8';
   515      PRAGMA encoding;
   516    }
   517  } {UTF-8}
   518  do_test enc2-9.4 {
   519    sqlite3 db test.db
   520    execsql {
   521      PRAGMA encoding = 'UTF-16le';
   522      CREATE TABLE abc(a, b, c);
   523      PRAGMA encoding;
   524    }
   525  } {UTF-16le}
   526  do_test enc2-9.5 {
   527    sqlite3 db test.db
   528    execsql {
   529      PRAGMA encoding = 'UTF-8';
   530      PRAGMA encoding;
   531    }
   532  } {UTF-16le}
   533  
   534  # Ticket #1987.
   535  # Disallow encoding changes once the encoding has been set.
   536  #
   537  do_test enc2-10.1 {
   538    db close
   539    forcedelete test.db test.db-journal
   540    sqlite3 db test.db
   541    db eval {
   542      PRAGMA encoding=UTF16;
   543      CREATE TABLE t1(a);
   544      PRAGMA encoding=UTF8;
   545      CREATE TABLE t2(b);
   546    }
   547    db close
   548    sqlite3 db test.db
   549    db eval {
   550      SELECT name FROM sqlite_master
   551    }
   552  } {t1 t2}
   553  
   554  # 2020-01-15 ticket a08879a4a476eea9
   555  # Do not allow a database connection encoding change unless *all*
   556  # attached databases are empty.
   557  #
   558  reset_db
   559  do_execsql_test enc2-11.10 {
   560    PRAGMA encoding=UTF8;
   561    CREATE TEMP TABLE t1(x);
   562    INSERT INTO t1 VALUES('this is a test');
   563    PRAGMA encoding=UTF16;
   564    SELECT * FROM t1;
   565  } {{this is a test}}
   566  
   567  finish_test