modernc.org/cc@v1.0.1/v2/testdata/_sqlite/ext/fts5/tool/fts5txt2db.tcl (about)

     1  ##########################################################################
     2  # 2016 Jan 27
     3  #
     4  # The author disclaims copyright to this source code.  In place of
     5  # a legal notice, here is a blessing:
     6  #
     7  #    May you do good and not evil.
     8  #    May you find forgiveness for yourself and forgive others.
     9  #    May you share freely, never taking more than you give.
    10  #
    11  proc process_cmdline {} { 
    12    cmdline::process ::A $::argv {
    13      {fts5                 "use fts5 (this is the default)"}
    14      {fts4                 "use fts4"}
    15      {colsize   "10 10 10" "list of column sizes"}
    16      {tblname   "t1"       "table name to create"}
    17      {detail    "full"     "Fts5 detail mode to use"}
    18      {repeat    1          "Load each file this many times"}
    19      {prefix    ""         "Fts prefix= option"}
    20      {trans     1          "True to use a transaction"}
    21      database
    22      file...
    23    } {
    24    This script is designed to create fts4/5 tables with more than one column.
    25    The -colsize option should be set to a Tcl list of integer values, one for
    26    each column in the table. Each value is the number of tokens that will be
    27    inserted into the column value for each row. For example, setting the -colsize
    28    option to "5 10" creates an FTS table with 2 columns, with roughly 5 and 10
    29    tokens per row in each, respectively.
    30    
    31    Each "FILE" argument should be a text file. The contents of these text files
    32    is split on whitespace characters to form a list of tokens. The first N1
    33    tokens are used for the first column of the first row, where N1 is the first
    34    element of the -colsize list. The next N2 are used for the second column of
    35    the first row, and so on. Rows are added to the table until the entire list
    36    of tokens is exhausted.
    37    }
    38  }
    39  
    40  ###########################################################################
    41  ###########################################################################
    42  # Command line options processor. This is generic code that can be copied
    43  # between scripts.
    44  #
    45  namespace eval cmdline {
    46    proc cmdline_error {O E {msg ""}} {
    47      if {$msg != ""} {
    48        puts stderr "Error: $msg"
    49        puts stderr ""
    50      }
    51    
    52      set L [list]
    53      foreach o $O {
    54        if {[llength $o]==1} {
    55          lappend L [string toupper $o]
    56        }
    57      }
    58    
    59      puts stderr "Usage: $::argv0 ?SWITCHES? $L"
    60      puts stderr ""
    61      puts stderr "Switches are:"
    62      foreach o $O {
    63        if {[llength $o]==3} {
    64          foreach {a b c} $o {}
    65          puts stderr [format "    -%-15s %s (default \"%s\")" "$a VAL" $c $b]
    66        } elseif {[llength $o]==2} {
    67          foreach {a b} $o {}
    68          puts stderr [format "    -%-15s %s" $a $b]
    69        }
    70      }
    71      puts stderr ""
    72      puts stderr $E
    73      exit -1
    74    }
    75    
    76    proc process {avar lArgs O E} {
    77      upvar $avar A
    78      set zTrailing ""       ;# True if ... is present in $O
    79      set lPosargs [list]
    80    
    81      # Populate A() with default values. Also, for each switch in the command
    82      # line spec, set an entry in the idx() array as follows:
    83      #
    84      #  {tblname t1 "table name to use"}  
    85      #      -> [set idx(-tblname) {tblname t1 "table name to use"}  
    86      #
    87      # For each position parameter, append its name to $lPosargs. If the ...
    88      # specifier is present, set $zTrailing to the name of the prefix.
    89      #
    90      foreach o $O {
    91        set nm [lindex $o 0]
    92        set nArg [llength $o]
    93        switch -- $nArg {
    94          1 {
    95            if {[string range $nm end-2 end]=="..."} {
    96              set zTrailing [string range $nm 0 end-3]
    97            } else {
    98              lappend lPosargs $nm
    99            }
   100          }
   101          2 {
   102            set A($nm) 0
   103            set idx(-$nm) $o
   104          }
   105          3 {
   106            set A($nm) [lindex $o 1]
   107            set idx(-$nm) $o
   108          }
   109          default {
   110            error "Error in command line specification"
   111          }
   112        }
   113      }
   114    
   115      # Set explicitly specified option values
   116      #
   117      set nArg [llength $lArgs]
   118      for {set i 0} {$i < $nArg} {incr i} {
   119        set opt [lindex $lArgs $i]
   120        if {[string range $opt 0 0]!="-" || $opt=="--"} break
   121        set c [array names idx "${opt}*"]
   122        if {[llength $c]==0} { cmdline_error $O $E "Unrecognized option: $opt"}
   123        if {[llength $c]>1}  { cmdline_error $O $E "Ambiguous option: $opt"}
   124    
   125        if {[llength $idx($c)]==3} {
   126          if {$i==[llength $lArgs]-1} {
   127            cmdline_error $O $E "Option requires argument: $c" 
   128          }
   129          incr i
   130          set A([lindex $idx($c) 0]) [lindex $lArgs $i]
   131        } else {
   132          set A([lindex $idx($c) 0]) 1
   133        }
   134      }
   135    
   136      # Deal with position arguments.
   137      #
   138      set nPosarg [llength $lPosargs]
   139      set nRem [expr $nArg - $i]
   140      if {$nRem < $nPosarg || ($zTrailing=="" && $nRem > $nPosarg)} {
   141        cmdline_error $O $E
   142      }
   143      for {set j 0} {$j < $nPosarg} {incr j} {
   144        set A([lindex $lPosargs $j]) [lindex $lArgs [expr $j+$i]]
   145      }
   146      if {$zTrailing!=""} {
   147        set A($zTrailing) [lrange $lArgs [expr $j+$i] end]
   148      }
   149    }
   150  } ;# namespace eval cmdline
   151  # End of command line options processor.
   152  ###########################################################################
   153  ###########################################################################
   154  
   155  process_cmdline
   156  
   157  # If -fts4 was specified, use fts4. Otherwise, fts5.
   158  if {$A(fts4)} {
   159    set A(fts) fts4
   160  } else {
   161    set A(fts) fts5
   162  }
   163  
   164  sqlite3 db $A(database)
   165  
   166  # Create the FTS table in the db. Return a list of the table columns.
   167  #
   168  proc create_table {} {
   169    global A
   170    set cols [list a b c d e f g h i j k l m n o p q r s t u v w x y z]
   171  
   172    set nCol [llength $A(colsize)]
   173    set cols [lrange $cols 0 [expr $nCol-1]]
   174  
   175    set sql    "CREATE VIRTUAL TABLE IF NOT EXISTS $A(tblname) USING $A(fts) ("
   176    append sql [join $cols ,]
   177    if {$A(fts)=="fts5"} { append sql ",detail=$A(detail)" }
   178    append sql ", prefix='$A(prefix)');"
   179  
   180    db eval $sql
   181    return $cols
   182  }
   183  
   184  # Return a list of tokens from the named file.
   185  #
   186  proc readfile {file} {
   187    set fd [open $file]
   188    set data [read $fd]
   189    close $fd
   190    split $data
   191  }
   192  
   193  proc repeat {L n} {
   194    set res [list]
   195    for {set i 0} {$i < $n} {incr i} {
   196      set res [concat $res $L]
   197    }
   198    set res
   199  }
   200  
   201  
   202  # Load all the data into a big list of tokens.
   203  #
   204  set tokens [list]
   205  foreach f $A(file) {
   206    set tokens [concat $tokens [repeat [readfile $f] $A(repeat)]]
   207  }
   208  
   209  set N [llength $tokens]
   210  set i 0
   211  set cols [create_table]
   212  set sql "INSERT INTO $A(tblname) VALUES(\$R([lindex $cols 0])"
   213  foreach c [lrange $cols 1 end] {
   214    append sql ", \$R($c)"
   215  }
   216  append sql ")"
   217  
   218  if {$A(trans)} { db eval BEGIN }
   219    while {$i < $N} {
   220      foreach c $cols s $A(colsize) {
   221        set R($c) [lrange $tokens $i [expr $i+$s-1]]
   222        incr i $s
   223      }
   224      db eval $sql
   225    }
   226  if {$A(trans)} { db eval COMMIT }
   227  
   228  
   229