github.com/ncruces/go-sqlite3@v0.15.1-0.20240520133447-53eef1510ff0/ext/lines/lines.go (about)

     1  // Package lines provides a virtual table to read data line-by-line.
     2  //
     3  // It is particularly useful for line-oriented datasets,
     4  // like [ndjson] or [JSON Lines],
     5  // when paired with SQLite's JSON support.
     6  //
     7  // https://github.com/asg017/sqlite-lines
     8  //
     9  // [ndjson]: https://ndjson.org/
    10  // [JSON Lines]: https://jsonlines.org/
    11  package lines
    12  
    13  import (
    14  	"bufio"
    15  	"bytes"
    16  	"fmt"
    17  	"io"
    18  	"io/fs"
    19  
    20  	"github.com/ncruces/go-sqlite3"
    21  	"github.com/ncruces/go-sqlite3/util/osutil"
    22  )
    23  
    24  // Register registers the lines and lines_read table-valued functions.
    25  // The lines function reads from a database blob or text.
    26  // The lines_read function reads from a file or an [io.Reader].
    27  // If a filename is specified, [os.Open] is used to open the file.
    28  func Register(db *sqlite3.Conn) {
    29  	RegisterFS(db, osutil.FS{})
    30  }
    31  
    32  // RegisterFS registers the lines and lines_read table-valued functions.
    33  // The lines function reads from a database blob or text.
    34  // The lines_read function reads from a file or an [io.Reader].
    35  // If a filename is specified, fsys is used to open the file.
    36  func RegisterFS(db *sqlite3.Conn, fsys fs.FS) {
    37  	sqlite3.CreateModule[lines](db, "lines", nil,
    38  		func(db *sqlite3.Conn, _, _, _ string, _ ...string) (lines, error) {
    39  			err := db.DeclareVTab(`CREATE TABLE x(line TEXT, data HIDDEN)`)
    40  			db.VTabConfig(sqlite3.VTAB_INNOCUOUS)
    41  			return lines{}, err
    42  		})
    43  	sqlite3.CreateModule[lines](db, "lines_read", nil,
    44  		func(db *sqlite3.Conn, _, _, _ string, _ ...string) (lines, error) {
    45  			err := db.DeclareVTab(`CREATE TABLE x(line TEXT, data HIDDEN)`)
    46  			db.VTabConfig(sqlite3.VTAB_DIRECTONLY)
    47  			return lines{fsys}, err
    48  		})
    49  }
    50  
    51  type lines struct {
    52  	fsys fs.FS
    53  }
    54  
    55  func (l lines) BestIndex(idx *sqlite3.IndexInfo) error {
    56  	for i, cst := range idx.Constraint {
    57  		if cst.Column == 1 && cst.Op == sqlite3.INDEX_CONSTRAINT_EQ && cst.Usable {
    58  			idx.ConstraintUsage[i] = sqlite3.IndexConstraintUsage{
    59  				Omit:      true,
    60  				ArgvIndex: 1,
    61  			}
    62  			idx.EstimatedCost = 1e6
    63  			idx.EstimatedRows = 100
    64  			return nil
    65  		}
    66  	}
    67  	return sqlite3.CONSTRAINT
    68  }
    69  
    70  func (l lines) Open() (sqlite3.VTabCursor, error) {
    71  	if l.fsys != nil {
    72  		return &reader{fsys: l.fsys}, nil
    73  	} else {
    74  		return &buffer{}, nil
    75  	}
    76  }
    77  
    78  type cursor struct {
    79  	line  []byte
    80  	rowID int64
    81  	eof   bool
    82  }
    83  
    84  func (c *cursor) EOF() bool {
    85  	return c.eof
    86  }
    87  
    88  func (c *cursor) RowID() (int64, error) {
    89  	return c.rowID, nil
    90  }
    91  
    92  func (c *cursor) Column(ctx *sqlite3.Context, n int) error {
    93  	if n == 0 {
    94  		ctx.ResultRawText(c.line)
    95  	}
    96  	return nil
    97  }
    98  
    99  type reader struct {
   100  	fsys   fs.FS
   101  	reader *bufio.Reader
   102  	closer io.Closer
   103  	cursor
   104  }
   105  
   106  func (c *reader) Close() (err error) {
   107  	if c.closer != nil {
   108  		err = c.closer.Close()
   109  		c.closer = nil
   110  	}
   111  	return err
   112  }
   113  
   114  func (c *reader) Filter(idxNum int, idxStr string, arg ...sqlite3.Value) error {
   115  	if err := c.Close(); err != nil {
   116  		return err
   117  	}
   118  
   119  	var r io.Reader
   120  	typ := arg[0].Type()
   121  	switch typ {
   122  	case sqlite3.NULL:
   123  		if p, ok := arg[0].Pointer().(io.Reader); ok {
   124  			r = p
   125  		}
   126  	case sqlite3.TEXT:
   127  		f, err := c.fsys.Open(arg[0].Text())
   128  		if err != nil {
   129  			return err
   130  		}
   131  		r = f
   132  	}
   133  	if r == nil {
   134  		return fmt.Errorf("lines: unsupported argument:%.0w %v", sqlite3.MISMATCH, typ)
   135  	}
   136  
   137  	c.reader = bufio.NewReader(r)
   138  	c.closer, _ = r.(io.Closer)
   139  	c.rowID = 0
   140  	return c.Next()
   141  }
   142  
   143  func (c *reader) Next() (err error) {
   144  	c.line = c.line[:0]
   145  	for more := true; more; {
   146  		var line []byte
   147  		line, more, err = c.reader.ReadLine()
   148  		c.line = append(c.line, line...)
   149  	}
   150  	if err == io.EOF {
   151  		c.eof = true
   152  		err = nil
   153  	}
   154  	c.rowID++
   155  	return err
   156  }
   157  
   158  type buffer struct {
   159  	data []byte
   160  	cursor
   161  }
   162  
   163  func (c *buffer) Filter(idxNum int, idxStr string, arg ...sqlite3.Value) error {
   164  	typ := arg[0].Type()
   165  	switch typ {
   166  	case sqlite3.TEXT:
   167  		c.data = arg[0].RawText()
   168  	case sqlite3.BLOB:
   169  		c.data = arg[0].RawBlob()
   170  	default:
   171  		return fmt.Errorf("lines: unsupported argument:%.0w %v", sqlite3.MISMATCH, typ)
   172  	}
   173  
   174  	c.rowID = 0
   175  	return c.Next()
   176  }
   177  
   178  func (c *buffer) Next() error {
   179  	i := bytes.IndexByte(c.data, '\n')
   180  	j := i + 1
   181  	switch {
   182  	case i < 0:
   183  		i = len(c.data)
   184  		j = i
   185  	case i > 0 && c.data[i-1] == '\r':
   186  		i--
   187  	}
   188  	c.eof = len(c.data) == 0
   189  	c.line = c.data[:i]
   190  	c.data = c.data[j:]
   191  	c.rowID++
   192  	return nil
   193  }