github.com/quay/claircore@v1.5.28/rhel/dockerfile/lex.go (about) 1 package dockerfile 2 3 import ( 4 "bufio" 5 "errors" 6 "fmt" 7 "io" 8 "strings" 9 "unicode" 10 ) 11 12 /* 13 This lexer is based on the text/template lexer, which has the same recursive 14 function construction. 15 16 Parser directives are handled by the parser. Trailing whitespace is not passed 17 to the parser, which may or may not be significant. This is not a 18 general-purpose dockerfile lexer, it's only intended to handle just enough of 19 valid dockerfiles to extract the labels. 20 */ 21 22 type lexer struct { 23 rd *bufio.Reader 24 state lexFn 25 sb strings.Builder 26 items chan item 27 pos int 28 escchar rune 29 } 30 31 func newLexer() *lexer { 32 return &lexer{ 33 state: start, 34 rd: bufio.NewReader(nil), 35 } 36 } 37 38 // Reset resets the lexer to read from r. 39 func (l *lexer) Reset(r io.Reader) { 40 // The strings.Builder is handled by the 'start' state. 41 l.rd.Reset(r) 42 l.items = make(chan item, 1) 43 l.pos = 0 44 l.escchar = '\\' 45 l.state = start 46 } 47 48 // Escape changes the escape metacharacter (used for line continuations). 49 func (l *lexer) Escape(r rune) { 50 l.escchar = r 51 } 52 53 type item struct { 54 val string 55 kind itemKind 56 pos int 57 } 58 59 type itemKind int 60 61 //go:generate -command stringer go run golang.org/x/tools/cmd/stringer 62 //go:generate stringer -type itemKind 63 64 const ( 65 itemError itemKind = iota 66 itemComment 67 itemInstruction 68 itemLabel 69 itemArg 70 itemEnv 71 itemEOF 72 ) 73 74 const eof = -1 75 76 type lexFn func(*lexer) lexFn 77 78 // Next yields the next item. 79 func (l *lexer) Next() item { 80 // The text/template lexer this is based on uses a goroutine, but that's not 81 // workable because we need to be able to swap the escape metacharacter 82 // after the lexer has started running, and without restarting. A goroutine 83 // would make reads and writes on l.escchar race. 84 // 85 // This construction uses a buffered channel to stash one item and the fact 86 // that a nil channel never succeeds in a select switch. 87 for l.state != nil { 88 select { 89 case i := <-l.items: 90 if i.kind == itemEOF { 91 close(l.items) 92 l.items = nil 93 } 94 return i 95 default: 96 l.state = l.state(l) 97 } 98 } 99 return item{kind: itemEOF} 100 } 101 102 func (l *lexer) consumeWhitespace() (err error) { 103 var r rune 104 var sz int 105 for r, sz, err = l.rd.ReadRune(); err == nil; r, sz, err = l.rd.ReadRune() { 106 if !unicode.IsSpace(r) { 107 err = l.rd.UnreadRune() 108 break 109 } 110 l.pos += sz 111 } 112 switch { 113 case errors.Is(err, nil): 114 case errors.Is(err, io.EOF): 115 default: 116 return err 117 } 118 return nil 119 } 120 121 func (l *lexer) collectLine() (err error) { 122 var r rune 123 var sz int 124 var esc, inComment, started bool 125 Read: 126 for r, sz, err = l.rd.ReadRune(); err == nil; r, sz, err = l.rd.ReadRune() { 127 switch { 128 case inComment && r == '\n': 129 inComment = false 130 started = false 131 case inComment: // Skip 132 case esc && r == '\r': // Lexer hack: why do some things have DOS line endings? 133 case esc && r == '\n': 134 esc = false 135 started = false 136 case esc: 137 // This little lexer only cares about constructing the lines 138 // correctly, so everything else gets passed through. 139 esc = false 140 sz, _ := l.sb.WriteRune(l.escchar) 141 l.pos += sz 142 _, err = l.sb.WriteRune(r) 143 case r == l.escchar: 144 esc = true 145 started = true 146 case !esc && r == '\n': 147 err = l.rd.UnreadRune() 148 break Read 149 case !started && !esc && r == '#': 150 inComment = true 151 case !started: 152 if !unicode.IsSpace(r) { 153 started = true 154 } 155 fallthrough 156 default: 157 _, err = l.sb.WriteRune(r) 158 } 159 if err != nil { 160 break Read 161 } 162 l.pos += sz 163 } 164 switch { 165 case errors.Is(err, nil): 166 case errors.Is(err, io.EOF): 167 default: 168 return err 169 } 170 return nil 171 } 172 173 func (l *lexer) error(e error) lexFn { 174 switch { 175 case errors.Is(e, nil): // ??? 176 case errors.Is(e, io.EOF): 177 l.items <- item{kind: itemEOF} 178 default: 179 l.items <- item{val: e.Error(), kind: itemError, pos: l.pos} 180 } 181 return nil 182 } 183 184 func (l *lexer) peek() rune { 185 r, _, err := l.rd.ReadRune() 186 if errors.Is(err, io.EOF) { 187 return eof 188 } 189 l.rd.UnreadRune() 190 return r 191 } 192 193 func start(l *lexer) lexFn { 194 l.sb.Reset() 195 if err := l.consumeWhitespace(); err != nil { 196 return l.error(err) 197 } 198 switch r := l.peek(); { 199 case r == '#': 200 return lexComment 201 case unicode.IsLetter(r): 202 return lexInstruction 203 case r == eof: 204 l.items <- item{kind: itemEOF} 205 default: 206 return l.error(fmt.Errorf("unknown rune %q", r)) 207 } 208 return nil 209 } 210 211 func lexComment(l *lexer) lexFn { 212 l.rd.ReadRune() // comment marker 213 if err := l.consumeWhitespace(); err != nil { 214 return l.error(err) 215 } 216 if err := l.collectLine(); err != nil { 217 return l.error(err) 218 } 219 l.items <- item{ 220 val: l.sb.String(), 221 kind: itemComment, 222 pos: l.pos, 223 } 224 return start 225 } 226 227 func lexInstruction(l *lexer) lexFn { 228 if err := l.collectLine(); err != nil { 229 return l.error(err) 230 } 231 232 ln := l.sb.String() 233 i := strings.IndexFunc(ln, unicode.IsSpace) 234 if i == -1 { 235 return l.error(fmt.Errorf("unexpected line: %#q", ln)) 236 } 237 cmd := ln[:i] 238 rest := strings.TrimSpace(ln[i:]) 239 switch { 240 case strings.EqualFold(cmd, `arg`): 241 l.items <- item{ 242 val: rest, 243 kind: itemArg, 244 pos: l.pos, 245 } 246 case strings.EqualFold(cmd, `env`): 247 l.items <- item{ 248 val: rest, 249 kind: itemEnv, 250 pos: l.pos, 251 } 252 case strings.EqualFold(cmd, `label`): 253 l.items <- item{ 254 val: rest, 255 kind: itemLabel, 256 pos: l.pos, 257 } 258 default: 259 l.items <- item{ 260 val: l.sb.String(), 261 kind: itemInstruction, 262 pos: l.pos, 263 } 264 } 265 return start 266 }