github.com/lazyboychen7/engine@v17.12.1-ce-rc2+incompatible/builder/dockerfile/parser/parser.go (about)

     1  // Package parser implements a parser and parse tree dumper for Dockerfiles.
     2  package parser
     3  
     4  import (
     5  	"bufio"
     6  	"bytes"
     7  	"fmt"
     8  	"io"
     9  	"regexp"
    10  	"runtime"
    11  	"strconv"
    12  	"strings"
    13  	"unicode"
    14  
    15  	"github.com/docker/docker/builder/dockerfile/command"
    16  	"github.com/docker/docker/pkg/system"
    17  	"github.com/pkg/errors"
    18  )
    19  
    20  // Node is a structure used to represent a parse tree.
    21  //
    22  // In the node there are three fields, Value, Next, and Children. Value is the
    23  // current token's string value. Next is always the next non-child token, and
    24  // children contains all the children. Here's an example:
    25  //
    26  // (value next (child child-next child-next-next) next-next)
    27  //
    28  // This data structure is frankly pretty lousy for handling complex languages,
    29  // but lucky for us the Dockerfile isn't very complicated. This structure
    30  // works a little more effectively than a "proper" parse tree for our needs.
    31  //
    32  type Node struct {
    33  	Value      string          // actual content
    34  	Next       *Node           // the next item in the current sexp
    35  	Children   []*Node         // the children of this sexp
    36  	Attributes map[string]bool // special attributes for this node
    37  	Original   string          // original line used before parsing
    38  	Flags      []string        // only top Node should have this set
    39  	StartLine  int             // the line in the original dockerfile where the node begins
    40  	endLine    int             // the line in the original dockerfile where the node ends
    41  }
    42  
    43  // Dump dumps the AST defined by `node` as a list of sexps.
    44  // Returns a string suitable for printing.
    45  func (node *Node) Dump() string {
    46  	str := ""
    47  	str += node.Value
    48  
    49  	if len(node.Flags) > 0 {
    50  		str += fmt.Sprintf(" %q", node.Flags)
    51  	}
    52  
    53  	for _, n := range node.Children {
    54  		str += "(" + n.Dump() + ")\n"
    55  	}
    56  
    57  	for n := node.Next; n != nil; n = n.Next {
    58  		if len(n.Children) > 0 {
    59  			str += " " + n.Dump()
    60  		} else {
    61  			str += " " + strconv.Quote(n.Value)
    62  		}
    63  	}
    64  
    65  	return strings.TrimSpace(str)
    66  }
    67  
    68  func (node *Node) lines(start, end int) {
    69  	node.StartLine = start
    70  	node.endLine = end
    71  }
    72  
    73  // AddChild adds a new child node, and updates line information
    74  func (node *Node) AddChild(child *Node, startLine, endLine int) {
    75  	child.lines(startLine, endLine)
    76  	if node.StartLine < 0 {
    77  		node.StartLine = startLine
    78  	}
    79  	node.endLine = endLine
    80  	node.Children = append(node.Children, child)
    81  }
    82  
    83  var (
    84  	dispatch             map[string]func(string, *Directive) (*Node, map[string]bool, error)
    85  	tokenWhitespace      = regexp.MustCompile(`[\t\v\f\r ]+`)
    86  	tokenEscapeCommand   = regexp.MustCompile(`^#[ \t]*escape[ \t]*=[ \t]*(?P<escapechar>.).*$`)
    87  	tokenPlatformCommand = regexp.MustCompile(`^#[ \t]*platform[ \t]*=[ \t]*(?P<platform>.*)$`)
    88  	tokenComment         = regexp.MustCompile(`^#.*$`)
    89  )
    90  
    91  // DefaultEscapeToken is the default escape token
    92  const DefaultEscapeToken = '\\'
    93  
    94  // Directive is the structure used during a build run to hold the state of
    95  // parsing directives.
    96  type Directive struct {
    97  	escapeToken           rune           // Current escape token
    98  	platformToken         string         // Current platform token
    99  	lineContinuationRegex *regexp.Regexp // Current line continuation regex
   100  	processingComplete    bool           // Whether we are done looking for directives
   101  	escapeSeen            bool           // Whether the escape directive has been seen
   102  	platformSeen          bool           // Whether the platform directive has been seen
   103  }
   104  
   105  // setEscapeToken sets the default token for escaping characters in a Dockerfile.
   106  func (d *Directive) setEscapeToken(s string) error {
   107  	if s != "`" && s != "\\" {
   108  		return fmt.Errorf("invalid ESCAPE '%s'. Must be ` or \\", s)
   109  	}
   110  	d.escapeToken = rune(s[0])
   111  	d.lineContinuationRegex = regexp.MustCompile(`\` + s + `[ \t]*$`)
   112  	return nil
   113  }
   114  
   115  // setPlatformToken sets the default platform for pulling images in a Dockerfile.
   116  func (d *Directive) setPlatformToken(s string) error {
   117  	s = strings.ToLower(s)
   118  	valid := []string{runtime.GOOS}
   119  	if system.LCOWSupported() {
   120  		valid = append(valid, "linux")
   121  	}
   122  	for _, item := range valid {
   123  		if s == item {
   124  			d.platformToken = s
   125  			return nil
   126  		}
   127  	}
   128  	return fmt.Errorf("invalid PLATFORM '%s'. Must be one of %v", s, valid)
   129  }
   130  
   131  // possibleParserDirective looks for one or more parser directives '# escapeToken=<char>' and
   132  // '# platform=<string>'. Parser directives must precede any builder instruction
   133  // or other comments, and cannot be repeated.
   134  func (d *Directive) possibleParserDirective(line string) error {
   135  	if d.processingComplete {
   136  		return nil
   137  	}
   138  
   139  	tecMatch := tokenEscapeCommand.FindStringSubmatch(strings.ToLower(line))
   140  	if len(tecMatch) != 0 {
   141  		for i, n := range tokenEscapeCommand.SubexpNames() {
   142  			if n == "escapechar" {
   143  				if d.escapeSeen {
   144  					return errors.New("only one escape parser directive can be used")
   145  				}
   146  				d.escapeSeen = true
   147  				return d.setEscapeToken(tecMatch[i])
   148  			}
   149  		}
   150  	}
   151  
   152  	// Only recognise a platform token if LCOW is supported
   153  	if system.LCOWSupported() {
   154  		tpcMatch := tokenPlatformCommand.FindStringSubmatch(strings.ToLower(line))
   155  		if len(tpcMatch) != 0 {
   156  			for i, n := range tokenPlatformCommand.SubexpNames() {
   157  				if n == "platform" {
   158  					if d.platformSeen {
   159  						return errors.New("only one platform parser directive can be used")
   160  					}
   161  					d.platformSeen = true
   162  					return d.setPlatformToken(tpcMatch[i])
   163  				}
   164  			}
   165  		}
   166  	}
   167  
   168  	d.processingComplete = true
   169  	return nil
   170  }
   171  
   172  // NewDefaultDirective returns a new Directive with the default escapeToken token
   173  func NewDefaultDirective() *Directive {
   174  	directive := Directive{}
   175  	directive.setEscapeToken(string(DefaultEscapeToken))
   176  	return &directive
   177  }
   178  
   179  func init() {
   180  	// Dispatch Table. see line_parsers.go for the parse functions.
   181  	// The command is parsed and mapped to the line parser. The line parser
   182  	// receives the arguments but not the command, and returns an AST after
   183  	// reformulating the arguments according to the rules in the parser
   184  	// functions. Errors are propagated up by Parse() and the resulting AST can
   185  	// be incorporated directly into the existing AST as a next.
   186  	dispatch = map[string]func(string, *Directive) (*Node, map[string]bool, error){
   187  		command.Add:         parseMaybeJSONToList,
   188  		command.Arg:         parseNameOrNameVal,
   189  		command.Cmd:         parseMaybeJSON,
   190  		command.Copy:        parseMaybeJSONToList,
   191  		command.Entrypoint:  parseMaybeJSON,
   192  		command.Env:         parseEnv,
   193  		command.Expose:      parseStringsWhitespaceDelimited,
   194  		command.From:        parseStringsWhitespaceDelimited,
   195  		command.Healthcheck: parseHealthConfig,
   196  		command.Label:       parseLabel,
   197  		command.Maintainer:  parseString,
   198  		command.Onbuild:     parseSubCommand,
   199  		command.Run:         parseMaybeJSON,
   200  		command.Shell:       parseMaybeJSON,
   201  		command.StopSignal:  parseString,
   202  		command.User:        parseString,
   203  		command.Volume:      parseMaybeJSONToList,
   204  		command.Workdir:     parseString,
   205  	}
   206  }
   207  
   208  // newNodeFromLine splits the line into parts, and dispatches to a function
   209  // based on the command and command arguments. A Node is created from the
   210  // result of the dispatch.
   211  func newNodeFromLine(line string, directive *Directive) (*Node, error) {
   212  	cmd, flags, args, err := splitCommand(line)
   213  	if err != nil {
   214  		return nil, err
   215  	}
   216  
   217  	fn := dispatch[cmd]
   218  	// Ignore invalid Dockerfile instructions
   219  	if fn == nil {
   220  		fn = parseIgnore
   221  	}
   222  	next, attrs, err := fn(args, directive)
   223  	if err != nil {
   224  		return nil, err
   225  	}
   226  
   227  	return &Node{
   228  		Value:      cmd,
   229  		Original:   line,
   230  		Flags:      flags,
   231  		Next:       next,
   232  		Attributes: attrs,
   233  	}, nil
   234  }
   235  
   236  // Result is the result of parsing a Dockerfile
   237  type Result struct {
   238  	AST         *Node
   239  	EscapeToken rune
   240  	// TODO @jhowardmsft - see https://github.com/moby/moby/issues/34617
   241  	// This next field will be removed in a future update for LCOW support.
   242  	OS       string
   243  	Warnings []string
   244  }
   245  
   246  // PrintWarnings to the writer
   247  func (r *Result) PrintWarnings(out io.Writer) {
   248  	if len(r.Warnings) == 0 {
   249  		return
   250  	}
   251  	fmt.Fprintf(out, strings.Join(r.Warnings, "\n")+"\n")
   252  }
   253  
   254  // Parse reads lines from a Reader, parses the lines into an AST and returns
   255  // the AST and escape token
   256  func Parse(rwc io.Reader) (*Result, error) {
   257  	d := NewDefaultDirective()
   258  	currentLine := 0
   259  	root := &Node{StartLine: -1}
   260  	scanner := bufio.NewScanner(rwc)
   261  	warnings := []string{}
   262  
   263  	var err error
   264  	for scanner.Scan() {
   265  		bytesRead := scanner.Bytes()
   266  		if currentLine == 0 {
   267  			// First line, strip the byte-order-marker if present
   268  			bytesRead = bytes.TrimPrefix(bytesRead, utf8bom)
   269  		}
   270  		bytesRead, err = processLine(d, bytesRead, true)
   271  		if err != nil {
   272  			return nil, err
   273  		}
   274  		currentLine++
   275  
   276  		startLine := currentLine
   277  		line, isEndOfLine := trimContinuationCharacter(string(bytesRead), d)
   278  		if isEndOfLine && line == "" {
   279  			continue
   280  		}
   281  
   282  		var hasEmptyContinuationLine bool
   283  		for !isEndOfLine && scanner.Scan() {
   284  			bytesRead, err := processLine(d, scanner.Bytes(), false)
   285  			if err != nil {
   286  				return nil, err
   287  			}
   288  			currentLine++
   289  
   290  			if isComment(scanner.Bytes()) {
   291  				// original line was a comment (processLine strips comments)
   292  				continue
   293  			}
   294  			if isEmptyContinuationLine(bytesRead) {
   295  				hasEmptyContinuationLine = true
   296  				continue
   297  			}
   298  
   299  			continuationLine := string(bytesRead)
   300  			continuationLine, isEndOfLine = trimContinuationCharacter(continuationLine, d)
   301  			line += continuationLine
   302  		}
   303  
   304  		if hasEmptyContinuationLine {
   305  			warning := "[WARNING]: Empty continuation line found in:\n    " + line
   306  			warnings = append(warnings, warning)
   307  		}
   308  
   309  		child, err := newNodeFromLine(line, d)
   310  		if err != nil {
   311  			return nil, err
   312  		}
   313  		root.AddChild(child, startLine, currentLine)
   314  	}
   315  
   316  	if len(warnings) > 0 {
   317  		warnings = append(warnings, "[WARNING]: Empty continuation lines will become errors in a future release.")
   318  	}
   319  	return &Result{
   320  		AST:         root,
   321  		Warnings:    warnings,
   322  		EscapeToken: d.escapeToken,
   323  		OS:          d.platformToken,
   324  	}, handleScannerError(scanner.Err())
   325  }
   326  
   327  func trimComments(src []byte) []byte {
   328  	return tokenComment.ReplaceAll(src, []byte{})
   329  }
   330  
   331  func trimWhitespace(src []byte) []byte {
   332  	return bytes.TrimLeftFunc(src, unicode.IsSpace)
   333  }
   334  
   335  func isComment(line []byte) bool {
   336  	return tokenComment.Match(trimWhitespace(line))
   337  }
   338  
   339  func isEmptyContinuationLine(line []byte) bool {
   340  	return len(trimWhitespace(line)) == 0
   341  }
   342  
   343  var utf8bom = []byte{0xEF, 0xBB, 0xBF}
   344  
   345  func trimContinuationCharacter(line string, d *Directive) (string, bool) {
   346  	if d.lineContinuationRegex.MatchString(line) {
   347  		line = d.lineContinuationRegex.ReplaceAllString(line, "")
   348  		return line, false
   349  	}
   350  	return line, true
   351  }
   352  
   353  // TODO: remove stripLeftWhitespace after deprecation period. It seems silly
   354  // to preserve whitespace on continuation lines. Why is that done?
   355  func processLine(d *Directive, token []byte, stripLeftWhitespace bool) ([]byte, error) {
   356  	if stripLeftWhitespace {
   357  		token = trimWhitespace(token)
   358  	}
   359  	return trimComments(token), d.possibleParserDirective(string(token))
   360  }
   361  
   362  func handleScannerError(err error) error {
   363  	switch err {
   364  	case bufio.ErrTooLong:
   365  		return errors.Errorf("dockerfile line greater than max allowed size of %d", bufio.MaxScanTokenSize-1)
   366  	default:
   367  		return err
   368  	}
   369  }