github.com/quay/claircore@v1.5.28/rhel/dockerfile/dockerfile.go (about)

     1  // Package dockerfile implements a minimal dockerfile parser.
     2  package dockerfile
     3  
     4  import (
     5  	"context"
     6  	"errors"
     7  	"fmt"
     8  	"io"
     9  	"strings"
    10  	"unicode"
    11  	"unicode/utf8"
    12  
    13  	"golang.org/x/text/transform"
    14  )
    15  
    16  // GetLabels parses the Dockerfile in the provided Reader and returns all
    17  // discovered labels as provided by the LABEL instruction, with variables
    18  // resolved and expanded.
    19  //
    20  // ARG and ENV instructions are understood. This will yield different results
    21  // if a build argument is supplied at build time.
    22  func GetLabels(_ context.Context, r io.Reader) (map[string]string, error) {
    23  	p := getParser()
    24  	defer putParser(p)
    25  	p.Init(r)
    26  	return p.Labels, p.Run()
    27  }
    28  
    29  type labelParser struct {
    30  	Labels  map[string]string
    31  	lex     *lexer
    32  	unquote *Unquote
    33  	vars    *Vars
    34  	escchar rune
    35  }
    36  
    37  func newLabelParser() *labelParser {
    38  	return &labelParser{
    39  		unquote: NewUnquote(),
    40  		vars:    NewVars(),
    41  		lex:     newLexer(),
    42  	}
    43  }
    44  
    45  // Init sets up the parser to read from r.
    46  func (p *labelParser) Init(r io.Reader) {
    47  	p.Labels = make(map[string]string)
    48  	p.lex.Reset(r)
    49  	p.vars.Clear()
    50  	p.Escape('\\')
    51  }
    52  
    53  // Escape sets the escape metacharacter for the lexer and the current
    54  // transformers.
    55  func (p *labelParser) Escape(r rune) {
    56  	p.escchar = r
    57  	p.lex.Escape(p.escchar)
    58  	p.unquote.Escape(p.escchar)
    59  	p.vars.Escape(p.escchar)
    60  }
    61  
    62  // Run consumes items and keeps track of variables and labels.
    63  //
    64  // A nil error is reported on encountering io.EOF.
    65  func (p *labelParser) Run() error {
    66  	var i item
    67  	for i = p.lex.Next(); ; i = p.lex.Next() {
    68  		switch i.kind {
    69  		case itemEOF:
    70  			return nil
    71  		case itemError:
    72  			return errors.New(i.val)
    73  		case itemEnv:
    74  			if err := p.handleAssign(i.val, p.vars.Set); err != nil {
    75  				return err
    76  			}
    77  		case itemArg:
    78  			idx := strings.IndexByte(i.val, '=')
    79  			if idx == -1 {
    80  				continue
    81  			}
    82  			k, _, err := transform.String(p.unquote, i.val[:idx])
    83  			if err != nil {
    84  				return err
    85  			}
    86  			v, _, err := transform.String(transform.Chain(p.unquote, p.vars), i.val[idx+1:])
    87  			if err != nil {
    88  				return err
    89  			}
    90  			p.vars.Set(k, v)
    91  		case itemLabel:
    92  			// NOTE(hank) This sucks. This is not documented to work this way
    93  			// but experimentally, does.
    94  			//	skopeo inspect docker://registry.redhat.io/rhel7/etcd:3.2.32-14
    95  			if err := p.handleAssign(i.val, func(k, v string) { p.Labels[k] = v }); err != nil {
    96  				return err
    97  			}
    98  		case itemComment:
    99  			v := strings.ToLower(strings.TrimSpace(i.val))
   100  			if strings.Contains(v, `escape=`) {
   101  				eq := strings.IndexByte(v, '=')
   102  				if eq == -1 {
   103  					return fmt.Errorf("botched parser directive: %#q", i.val)
   104  				}
   105  				esc, _ := utf8.DecodeRuneInString(v[:eq+1])
   106  				p.lex.Escape(esc)
   107  				p.unquote.Escape(esc)
   108  				p.vars.Escape(esc)
   109  			}
   110  		default: // discard
   111  		}
   112  	}
   113  }
   114  
   115  // HandleAssign handles the assignment commands.
   116  //
   117  // Only `ENV` commands should have this ambiguity in their handling, but some
   118  // Dockerfiles in the wild have `LABEL` commands that work this way, also.
   119  func (p *labelParser) handleAssign(val string, f func(k, v string)) error {
   120  	if isKV(val) {
   121  		// This is a bunch of k=v pairs. First, we need to split the pairs.
   122  		// Values can be quoted strings, so using FieldsFunc is incorrect.
   123  		pairs, err := splitKV(p.escchar, val)
   124  		if err != nil {
   125  			return err
   126  		}
   127  		for _, kv := range pairs {
   128  			idx := strings.IndexByte(kv, '=')
   129  			if idx == -1 {
   130  				return fmt.Errorf(`invalid assignment syntax: %+#q`, val)
   131  			}
   132  			k, _, err := transform.String(p.unquote, kv[:idx])
   133  			if err != nil {
   134  				return err
   135  			}
   136  			v, _, err := transform.String(transform.Chain(p.unquote, p.vars), kv[idx+1:])
   137  			if err != nil {
   138  				return err
   139  			}
   140  			f(k, v)
   141  		}
   142  		return nil
   143  	}
   144  	idxSp := strings.IndexFunc(val, unicode.IsSpace)
   145  	k, _, err := transform.String(p.unquote, val[:idxSp])
   146  	if err != nil {
   147  		return err
   148  	}
   149  	v, _, err := transform.String(p.vars, strings.TrimLeftFunc(val[idxSp:], unicode.IsSpace))
   150  	if err != nil {
   151  		return err
   152  	}
   153  	f(k, v)
   154  	return nil
   155  }
   156  
   157  // SplitKV splits a string on unquoted or un-escaped whitespace.
   158  //
   159  // Label and Env instructions allow for key-value pairs with this syntax.
   160  func splitKV(escchar rune, in string) ([]string, error) {
   161  	var ret []string
   162  	var esc, quote, ws bool
   163  	var quotechar rune
   164  	start := 0
   165  	for cur, r := range in {
   166  	Backup:
   167  		switch {
   168  		case esc:
   169  			esc = false
   170  		case !esc && r == escchar:
   171  			esc = true
   172  		case !esc && !quote && (r == '"' || r == '\''):
   173  			if ws {
   174  				// If this ends a whitespace run, update the starting position.
   175  				start = cur
   176  			}
   177  			ws = false
   178  			quote = true
   179  			quotechar = r
   180  		case !esc && quote && r == quotechar:
   181  			quote = false
   182  			quotechar = 0
   183  		case !esc && !quote && ws:
   184  			// In a run of unquoted whitespace.
   185  			if isWhitespace(r) {
   186  				break
   187  			}
   188  			// A non-quote character has ended the whitespace run; reset flags
   189  			// and re-process the character.
   190  			start = cur
   191  			ws = false
   192  			goto Backup
   193  		case !esc && !quote && isWhitespace(r):
   194  			ret = append(ret, in[start:cur])
   195  			ws = true
   196  		default: // advance
   197  		}
   198  	}
   199  	if rem := in[start:]; len(rem) > 0 {
   200  		ret = append(ret, rem)
   201  	}
   202  	return ret, nil
   203  }
   204  
   205  // IsWhitespace reports whether the rune is valid intraline whitespace.
   206  func isWhitespace(r rune) bool {
   207  	return unicode.IsSpace(r) && r != '\n'
   208  }
   209  
   210  func isKV(s string) bool {
   211  	idxEq := strings.IndexByte(s, '=')
   212  	idxSp := strings.IndexFunc(s, unicode.IsSpace)
   213  	return idxEq != -1 && (idxSp == -1 || idxSp >= idxEq)
   214  }