github.com/ncruces/go-sqlite3@v0.15.1-0.20240520133447-53eef1510ff0/ext/lines/lines.go (about) 1 // Package lines provides a virtual table to read data line-by-line. 2 // 3 // It is particularly useful for line-oriented datasets, 4 // like [ndjson] or [JSON Lines], 5 // when paired with SQLite's JSON support. 6 // 7 // https://github.com/asg017/sqlite-lines 8 // 9 // [ndjson]: https://ndjson.org/ 10 // [JSON Lines]: https://jsonlines.org/ 11 package lines 12 13 import ( 14 "bufio" 15 "bytes" 16 "fmt" 17 "io" 18 "io/fs" 19 20 "github.com/ncruces/go-sqlite3" 21 "github.com/ncruces/go-sqlite3/util/osutil" 22 ) 23 24 // Register registers the lines and lines_read table-valued functions. 25 // The lines function reads from a database blob or text. 26 // The lines_read function reads from a file or an [io.Reader]. 27 // If a filename is specified, [os.Open] is used to open the file. 28 func Register(db *sqlite3.Conn) { 29 RegisterFS(db, osutil.FS{}) 30 } 31 32 // RegisterFS registers the lines and lines_read table-valued functions. 33 // The lines function reads from a database blob or text. 34 // The lines_read function reads from a file or an [io.Reader]. 35 // If a filename is specified, fsys is used to open the file. 36 func RegisterFS(db *sqlite3.Conn, fsys fs.FS) { 37 sqlite3.CreateModule[lines](db, "lines", nil, 38 func(db *sqlite3.Conn, _, _, _ string, _ ...string) (lines, error) { 39 err := db.DeclareVTab(`CREATE TABLE x(line TEXT, data HIDDEN)`) 40 db.VTabConfig(sqlite3.VTAB_INNOCUOUS) 41 return lines{}, err 42 }) 43 sqlite3.CreateModule[lines](db, "lines_read", nil, 44 func(db *sqlite3.Conn, _, _, _ string, _ ...string) (lines, error) { 45 err := db.DeclareVTab(`CREATE TABLE x(line TEXT, data HIDDEN)`) 46 db.VTabConfig(sqlite3.VTAB_DIRECTONLY) 47 return lines{fsys}, err 48 }) 49 } 50 51 type lines struct { 52 fsys fs.FS 53 } 54 55 func (l lines) BestIndex(idx *sqlite3.IndexInfo) error { 56 for i, cst := range idx.Constraint { 57 if cst.Column == 1 && cst.Op == sqlite3.INDEX_CONSTRAINT_EQ && cst.Usable { 58 idx.ConstraintUsage[i] = sqlite3.IndexConstraintUsage{ 59 Omit: true, 60 ArgvIndex: 1, 61 } 62 idx.EstimatedCost = 1e6 63 idx.EstimatedRows = 100 64 return nil 65 } 66 } 67 return sqlite3.CONSTRAINT 68 } 69 70 func (l lines) Open() (sqlite3.VTabCursor, error) { 71 if l.fsys != nil { 72 return &reader{fsys: l.fsys}, nil 73 } else { 74 return &buffer{}, nil 75 } 76 } 77 78 type cursor struct { 79 line []byte 80 rowID int64 81 eof bool 82 } 83 84 func (c *cursor) EOF() bool { 85 return c.eof 86 } 87 88 func (c *cursor) RowID() (int64, error) { 89 return c.rowID, nil 90 } 91 92 func (c *cursor) Column(ctx *sqlite3.Context, n int) error { 93 if n == 0 { 94 ctx.ResultRawText(c.line) 95 } 96 return nil 97 } 98 99 type reader struct { 100 fsys fs.FS 101 reader *bufio.Reader 102 closer io.Closer 103 cursor 104 } 105 106 func (c *reader) Close() (err error) { 107 if c.closer != nil { 108 err = c.closer.Close() 109 c.closer = nil 110 } 111 return err 112 } 113 114 func (c *reader) Filter(idxNum int, idxStr string, arg ...sqlite3.Value) error { 115 if err := c.Close(); err != nil { 116 return err 117 } 118 119 var r io.Reader 120 typ := arg[0].Type() 121 switch typ { 122 case sqlite3.NULL: 123 if p, ok := arg[0].Pointer().(io.Reader); ok { 124 r = p 125 } 126 case sqlite3.TEXT: 127 f, err := c.fsys.Open(arg[0].Text()) 128 if err != nil { 129 return err 130 } 131 r = f 132 } 133 if r == nil { 134 return fmt.Errorf("lines: unsupported argument:%.0w %v", sqlite3.MISMATCH, typ) 135 } 136 137 c.reader = bufio.NewReader(r) 138 c.closer, _ = r.(io.Closer) 139 c.rowID = 0 140 return c.Next() 141 } 142 143 func (c *reader) Next() (err error) { 144 c.line = c.line[:0] 145 for more := true; more; { 146 var line []byte 147 line, more, err = c.reader.ReadLine() 148 c.line = append(c.line, line...) 149 } 150 if err == io.EOF { 151 c.eof = true 152 err = nil 153 } 154 c.rowID++ 155 return err 156 } 157 158 type buffer struct { 159 data []byte 160 cursor 161 } 162 163 func (c *buffer) Filter(idxNum int, idxStr string, arg ...sqlite3.Value) error { 164 typ := arg[0].Type() 165 switch typ { 166 case sqlite3.TEXT: 167 c.data = arg[0].RawText() 168 case sqlite3.BLOB: 169 c.data = arg[0].RawBlob() 170 default: 171 return fmt.Errorf("lines: unsupported argument:%.0w %v", sqlite3.MISMATCH, typ) 172 } 173 174 c.rowID = 0 175 return c.Next() 176 } 177 178 func (c *buffer) Next() error { 179 i := bytes.IndexByte(c.data, '\n') 180 j := i + 1 181 switch { 182 case i < 0: 183 i = len(c.data) 184 j = i 185 case i > 0 && c.data[i-1] == '\r': 186 i-- 187 } 188 c.eof = len(c.data) == 0 189 c.line = c.data[:i] 190 c.data = c.data[j:] 191 c.rowID++ 192 return nil 193 }