github.com/supabase/cli@v1.168.1/internal/utils/parser/state.go (about) 1 package parser 2 3 import ( 4 "bytes" 5 "strings" 6 "unicode" 7 "unicode/utf8" 8 ) 9 10 const ( 11 // Omit BEGIN to allow arbitrary whitespaces between BEGIN and ATOMIC keywords. 12 // This can fail if ATOMIC is used as column name because it is not a reserved 13 // keyword: https://www.postgresql.org/docs/current/sql-keywords-appendix.html 14 BEGIN_ATOMIC = "ATOMIC" 15 END_ATOMIC = "END" 16 ) 17 18 type State interface { 19 // Return nil to emit token 20 Next(r rune, data []byte) State 21 } 22 23 // Initial state: ready to parse next token 24 type ReadyState struct{} 25 26 func (s *ReadyState) Next(r rune, data []byte) State { 27 switch r { 28 case '$': 29 offset := len(data) - utf8.RuneLen(r) 30 return &TagState{offset: offset} 31 case '\'': 32 fallthrough 33 case '"': 34 return &QuoteState{delimiter: r} 35 case '-': 36 return &CommentState{} 37 case '/': 38 return &BlockState{} 39 case '\\': 40 return &EscapeState{} 41 case ';': 42 // Emit token 43 return nil 44 case '(': 45 return &AtomicState{prev: s, delimiter: []byte{')'}} 46 case 'c': 47 fallthrough 48 case 'C': 49 offset := len(data) - len(BEGIN_ATOMIC) 50 if offset >= 0 && strings.ToUpper(string(data[offset:])) == BEGIN_ATOMIC { 51 return &AtomicState{prev: s, delimiter: []byte(END_ATOMIC)} 52 } 53 } 54 return s 55 } 56 57 // Opened a line comment 58 type CommentState struct{} 59 60 func (s *CommentState) Next(r rune, data []byte) State { 61 if r == '-' { 62 // No characters are escaped in comments, which is the same as dollar 63 return &DollarState{delimiter: []byte{'\n'}} 64 } 65 // Break out of comment state 66 state := &ReadyState{} 67 return state.Next(r, data) 68 } 69 70 // Opened a block comment 71 type BlockState struct { 72 depth int 73 } 74 75 func (s *BlockState) Next(r rune, data []byte) State { 76 const open = "/*" 77 const close = "*/" 78 window := data[len(data)-2:] 79 if bytes.Equal(window, []byte(open)) { 80 s.depth += 1 81 return s 82 } 83 if s.depth == 0 { 84 // Break out of block state 85 state := &ReadyState{} 86 return state.Next(r, data) 87 } 88 if bytes.Equal(window, []byte(close)) { 89 s.depth -= 1 90 if s.depth == 0 { 91 return &ReadyState{} 92 } 93 } 94 return s 95 } 96 97 // Opened a single quote ' or double quote " 98 type QuoteState struct { 99 delimiter rune 100 escape bool 101 } 102 103 func (s *QuoteState) Next(r rune, data []byte) State { 104 if s.escape { 105 // Preserve escaped quote '' 106 if r == s.delimiter { 107 s.escape = false 108 return s 109 } 110 // Break out of quote state 111 state := &ReadyState{} 112 return state.Next(r, data) 113 } 114 if r == s.delimiter { 115 s.escape = true 116 } 117 return s 118 } 119 120 // Opened a dollar quote, no characters are ever esacped. 121 type DollarState struct { 122 delimiter []byte 123 } 124 125 func (s *DollarState) Next(r rune, data []byte) State { 126 window := data[len(data)-len(s.delimiter):] 127 if bytes.Equal(window, s.delimiter) { 128 // Break out of dollar state 129 return &ReadyState{} 130 } 131 return s 132 } 133 134 // Opened a tag, ie. $tag$ 135 type TagState struct { 136 offset int 137 } 138 139 func (s *TagState) Next(r rune, data []byte) State { 140 if r == '$' { 141 // Make a copy since the data slice may be overwritten 142 tag := data[s.offset:] 143 dollar := DollarState{ 144 delimiter: make([]byte, len(tag)), 145 } 146 copy(dollar.delimiter, tag) 147 return &dollar 148 } 149 // Valid tag: https://www.postgresql.org/docs/current/sql-syntax-lexical.html 150 if unicode.IsLetter(r) || unicode.IsDigit(r) || r == '_' { 151 return s 152 } 153 // Break out of tag state 154 state := &ReadyState{} 155 return state.Next(r, data) 156 } 157 158 // Opened a \ escape 159 type EscapeState struct{} 160 161 func (s *EscapeState) Next(r rune, data []byte) State { 162 return &ReadyState{} 163 } 164 165 // Opened BEGIN ATOMIC function body 166 type AtomicState struct { 167 prev State 168 delimiter []byte 169 } 170 171 func (s *AtomicState) Next(r rune, data []byte) State { 172 // If we are in a quoted state, the current delimiter doesn't count. 173 if curr := s.prev.Next(r, data); curr != nil { 174 s.prev = curr 175 } 176 if _, ok := s.prev.(*ReadyState); ok { 177 window := data[len(data)-len(s.delimiter):] 178 // Treat delimiter as case insensitive 179 if strings.ToUpper(string(window)) == string(s.delimiter) { 180 return &ReadyState{} 181 } 182 } 183 return s 184 }