github.com/supabase/cli@v1.168.1/internal/utils/parser/token.go (about)

     1  package parser
     2  
     3  import (
     4  	"bufio"
     5  	"io"
     6  	"strings"
     7  	"unicode/utf8"
     8  
     9  	"github.com/go-errors/errors"
    10  	"github.com/spf13/viper"
    11  )
    12  
    13  // Equal to `startBufSize` from `bufio/scan.go`
    14  const startBufSize = 4096
    15  
    16  // MaxScannerCapacity defaults to 64 * 1024 which is not enough for certain lines
    17  // containing e.g. geographical data. 256K ought to be enough for anybody...
    18  var MaxScannerCapacity = 256 * 1024
    19  
    20  // State transition table for tokenizer:
    21  //
    22  //	Ready -> Ready (default)
    23  //	Ready -> Error (on invalid syntax)
    24  //	Ready -> Done (on ;, emit token)
    25  //	Ready -> Done (on EOF, emit token)
    26  //
    27  //	Ready -> Comment (on --)
    28  //	Comment -> Comment (default)
    29  //	Comment -> Ready (on \n)
    30  //
    31  //	Ready -> Block (on /*)
    32  //	Block -> Block (on /*, +-depth)
    33  //	Block -> Ready (on */, depth 0)
    34  //
    35  //	Ready -> Quote (on ')
    36  //	Quote -> Quote (on '', default)
    37  //	Quote -> Ready (on ')
    38  //
    39  //	Ready -> Dollar (on $tag$)
    40  //	Dollar -> Dollar (default)
    41  //	Dollar -> Ready (on $tag$)
    42  //
    43  //	Ready -> Escape (on \)
    44  //	Escape -> Ready (on next)
    45  type tokenizer struct {
    46  	state State
    47  	last  int
    48  }
    49  
    50  func (t *tokenizer) ScanToken(data []byte, atEOF bool) (advance int, token []byte, err error) {
    51  	// If we requested more data, resume from last position.
    52  	for width := 1; t.last < len(data); t.last += width {
    53  		r, width := utf8.DecodeRune(data[t.last:])
    54  		end := t.last + width
    55  		t.state = t.state.Next(r, data[:end])
    56  		// Emit token
    57  		if t.state == nil {
    58  			t.last = 0
    59  			t.state = &ReadyState{}
    60  			return end, data[:end], nil
    61  		}
    62  	}
    63  	if !atEOF || len(data) == 0 {
    64  		// Request more data or end the stream
    65  		return 0, nil, nil
    66  	}
    67  	// We're at EOF. If we have a final, non-terminated token, return it.
    68  	return len(data), data, nil
    69  }
    70  
    71  // Use bufio.Scanner to split a PostgreSQL string into multiple statements.
    72  //
    73  // The core problem is to figure out whether the current ; separator is inside
    74  // an escaped string literal. PostgreSQL has multiple ways of opening a string
    75  // literal, $$, ', --, /*, etc. We use a FSM to guarantee these states are
    76  // entered exclusively. If not in one of the above escape states, the next ;
    77  // token can be parsed as statement separator.
    78  //
    79  // Each statement is split as it is, without removing comments or white spaces.
    80  func Split(sql io.Reader, transform ...func(string) string) (stats []string, err error) {
    81  	t := tokenizer{state: &ReadyState{}}
    82  	scanner := bufio.NewScanner(sql)
    83  
    84  	// Increase scanner capacity to support very long lines containing e.g. geodata
    85  	buf := make([]byte, startBufSize)
    86  	maxbuf := int(viper.GetSizeInBytes("SCANNER_BUFFER_SIZE"))
    87  	if maxbuf == 0 {
    88  		maxbuf = MaxScannerCapacity
    89  	}
    90  	scanner.Buffer(buf, maxbuf)
    91  	scanner.Split(t.ScanToken)
    92  
    93  	var token string
    94  	for scanner.Scan() {
    95  		token = scanner.Text()
    96  		trim := token
    97  		for _, apply := range transform {
    98  			trim = apply(trim)
    99  		}
   100  		if len(trim) > 0 {
   101  			stats = append(stats, trim)
   102  		}
   103  	}
   104  	err = scanner.Err()
   105  	if err != nil {
   106  		err = errors.Errorf("%w\nAfter statement %d: %s", err, len(stats), token)
   107  	}
   108  	if errors.Is(err, bufio.ErrTooLong) {
   109  		err = errors.Errorf("%w\nTry setting SUPABASE_SCANNER_BUFFER_SIZE=5MB (current size is %dKB)", err, maxbuf>>10)
   110  	}
   111  	return stats, err
   112  }
   113  
   114  func SplitAndTrim(sql io.Reader) (stats []string, err error) {
   115  	return Split(sql, func(token string) string {
   116  		return strings.TrimRight(token, ";")
   117  	}, strings.TrimSpace)
   118  }