github.com/quay/claircore@v1.5.28/rhel/dockerfile/lex.go (about)

     1  package dockerfile
     2  
     3  import (
     4  	"bufio"
     5  	"errors"
     6  	"fmt"
     7  	"io"
     8  	"strings"
     9  	"unicode"
    10  )
    11  
    12  /*
    13  This lexer is based on the text/template lexer, which has the same recursive
    14  function construction.
    15  
    16  Parser directives are handled by the parser.  Trailing whitespace is not passed
    17  to the parser, which may or may not be significant. This is not a
    18  general-purpose dockerfile lexer, it's only intended to handle just enough of
    19  valid dockerfiles to extract the labels.
    20  */
    21  
    22  type lexer struct {
    23  	rd      *bufio.Reader
    24  	state   lexFn
    25  	sb      strings.Builder
    26  	items   chan item
    27  	pos     int
    28  	escchar rune
    29  }
    30  
    31  func newLexer() *lexer {
    32  	return &lexer{
    33  		state: start,
    34  		rd:    bufio.NewReader(nil),
    35  	}
    36  }
    37  
    38  // Reset resets the lexer to read from r.
    39  func (l *lexer) Reset(r io.Reader) {
    40  	// The strings.Builder is handled by the 'start' state.
    41  	l.rd.Reset(r)
    42  	l.items = make(chan item, 1)
    43  	l.pos = 0
    44  	l.escchar = '\\'
    45  	l.state = start
    46  }
    47  
    48  // Escape changes the escape metacharacter (used for line continuations).
    49  func (l *lexer) Escape(r rune) {
    50  	l.escchar = r
    51  }
    52  
    53  type item struct {
    54  	val  string
    55  	kind itemKind
    56  	pos  int
    57  }
    58  
    59  type itemKind int
    60  
    61  //go:generate -command stringer go run golang.org/x/tools/cmd/stringer
    62  //go:generate stringer -type itemKind
    63  
    64  const (
    65  	itemError itemKind = iota
    66  	itemComment
    67  	itemInstruction
    68  	itemLabel
    69  	itemArg
    70  	itemEnv
    71  	itemEOF
    72  )
    73  
    74  const eof = -1
    75  
    76  type lexFn func(*lexer) lexFn
    77  
    78  // Next yields the next item.
    79  func (l *lexer) Next() item {
    80  	// The text/template lexer this is based on uses a goroutine, but that's not
    81  	// workable because we need to be able to swap the escape metacharacter
    82  	// after the lexer has started running, and without restarting. A goroutine
    83  	// would make reads and writes on l.escchar race.
    84  	//
    85  	// This construction uses a buffered channel to stash one item and the fact
    86  	// that a nil channel never succeeds in a select switch.
    87  	for l.state != nil {
    88  		select {
    89  		case i := <-l.items:
    90  			if i.kind == itemEOF {
    91  				close(l.items)
    92  				l.items = nil
    93  			}
    94  			return i
    95  		default:
    96  			l.state = l.state(l)
    97  		}
    98  	}
    99  	return item{kind: itemEOF}
   100  }
   101  
   102  func (l *lexer) consumeWhitespace() (err error) {
   103  	var r rune
   104  	var sz int
   105  	for r, sz, err = l.rd.ReadRune(); err == nil; r, sz, err = l.rd.ReadRune() {
   106  		if !unicode.IsSpace(r) {
   107  			err = l.rd.UnreadRune()
   108  			break
   109  		}
   110  		l.pos += sz
   111  	}
   112  	switch {
   113  	case errors.Is(err, nil):
   114  	case errors.Is(err, io.EOF):
   115  	default:
   116  		return err
   117  	}
   118  	return nil
   119  }
   120  
   121  func (l *lexer) collectLine() (err error) {
   122  	var r rune
   123  	var sz int
   124  	var esc, inComment, started bool
   125  Read:
   126  	for r, sz, err = l.rd.ReadRune(); err == nil; r, sz, err = l.rd.ReadRune() {
   127  		switch {
   128  		case inComment && r == '\n':
   129  			inComment = false
   130  			started = false
   131  		case inComment: // Skip
   132  		case esc && r == '\r': // Lexer hack: why do some things have DOS line endings?
   133  		case esc && r == '\n':
   134  			esc = false
   135  			started = false
   136  		case esc:
   137  			// This little lexer only cares about constructing the lines
   138  			// correctly, so everything else gets passed through.
   139  			esc = false
   140  			sz, _ := l.sb.WriteRune(l.escchar)
   141  			l.pos += sz
   142  			_, err = l.sb.WriteRune(r)
   143  		case r == l.escchar:
   144  			esc = true
   145  			started = true
   146  		case !esc && r == '\n':
   147  			err = l.rd.UnreadRune()
   148  			break Read
   149  		case !started && !esc && r == '#':
   150  			inComment = true
   151  		case !started:
   152  			if !unicode.IsSpace(r) {
   153  				started = true
   154  			}
   155  			fallthrough
   156  		default:
   157  			_, err = l.sb.WriteRune(r)
   158  		}
   159  		if err != nil {
   160  			break Read
   161  		}
   162  		l.pos += sz
   163  	}
   164  	switch {
   165  	case errors.Is(err, nil):
   166  	case errors.Is(err, io.EOF):
   167  	default:
   168  		return err
   169  	}
   170  	return nil
   171  }
   172  
   173  func (l *lexer) error(e error) lexFn {
   174  	switch {
   175  	case errors.Is(e, nil): // ???
   176  	case errors.Is(e, io.EOF):
   177  		l.items <- item{kind: itemEOF}
   178  	default:
   179  		l.items <- item{val: e.Error(), kind: itemError, pos: l.pos}
   180  	}
   181  	return nil
   182  }
   183  
   184  func (l *lexer) peek() rune {
   185  	r, _, err := l.rd.ReadRune()
   186  	if errors.Is(err, io.EOF) {
   187  		return eof
   188  	}
   189  	l.rd.UnreadRune()
   190  	return r
   191  }
   192  
   193  func start(l *lexer) lexFn {
   194  	l.sb.Reset()
   195  	if err := l.consumeWhitespace(); err != nil {
   196  		return l.error(err)
   197  	}
   198  	switch r := l.peek(); {
   199  	case r == '#':
   200  		return lexComment
   201  	case unicode.IsLetter(r):
   202  		return lexInstruction
   203  	case r == eof:
   204  		l.items <- item{kind: itemEOF}
   205  	default:
   206  		return l.error(fmt.Errorf("unknown rune %q", r))
   207  	}
   208  	return nil
   209  }
   210  
   211  func lexComment(l *lexer) lexFn {
   212  	l.rd.ReadRune() // comment marker
   213  	if err := l.consumeWhitespace(); err != nil {
   214  		return l.error(err)
   215  	}
   216  	if err := l.collectLine(); err != nil {
   217  		return l.error(err)
   218  	}
   219  	l.items <- item{
   220  		val:  l.sb.String(),
   221  		kind: itemComment,
   222  		pos:  l.pos,
   223  	}
   224  	return start
   225  }
   226  
   227  func lexInstruction(l *lexer) lexFn {
   228  	if err := l.collectLine(); err != nil {
   229  		return l.error(err)
   230  	}
   231  
   232  	ln := l.sb.String()
   233  	i := strings.IndexFunc(ln, unicode.IsSpace)
   234  	if i == -1 {
   235  		return l.error(fmt.Errorf("unexpected line: %#q", ln))
   236  	}
   237  	cmd := ln[:i]
   238  	rest := strings.TrimSpace(ln[i:])
   239  	switch {
   240  	case strings.EqualFold(cmd, `arg`):
   241  		l.items <- item{
   242  			val:  rest,
   243  			kind: itemArg,
   244  			pos:  l.pos,
   245  		}
   246  	case strings.EqualFold(cmd, `env`):
   247  		l.items <- item{
   248  			val:  rest,
   249  			kind: itemEnv,
   250  			pos:  l.pos,
   251  		}
   252  	case strings.EqualFold(cmd, `label`):
   253  		l.items <- item{
   254  			val:  rest,
   255  			kind: itemLabel,
   256  			pos:  l.pos,
   257  		}
   258  	default:
   259  		l.items <- item{
   260  			val:  l.sb.String(),
   261  			kind: itemInstruction,
   262  			pos:  l.pos,
   263  		}
   264  	}
   265  	return start
   266  }