github.com/jiasir/docker@v1.3.3-0.20170609024000-252e610103e7/builder/dockerfile/parser/parser.go (about)

     1  // Package parser implements a parser and parse tree dumper for Dockerfiles.
     2  package parser
     3  
     4  import (
     5  	"bufio"
     6  	"bytes"
     7  	"fmt"
     8  	"io"
     9  	"regexp"
    10  	"strconv"
    11  	"strings"
    12  	"unicode"
    13  
    14  	"github.com/docker/docker/builder/dockerfile/command"
    15  	"github.com/pkg/errors"
    16  )
    17  
    18  // Node is a structure used to represent a parse tree.
    19  //
    20  // In the node there are three fields, Value, Next, and Children. Value is the
    21  // current token's string value. Next is always the next non-child token, and
    22  // children contains all the children. Here's an example:
    23  //
    24  // (value next (child child-next child-next-next) next-next)
    25  //
    26  // This data structure is frankly pretty lousy for handling complex languages,
    27  // but lucky for us the Dockerfile isn't very complicated. This structure
    28  // works a little more effectively than a "proper" parse tree for our needs.
    29  //
    30  type Node struct {
    31  	Value      string          // actual content
    32  	Next       *Node           // the next item in the current sexp
    33  	Children   []*Node         // the children of this sexp
    34  	Attributes map[string]bool // special attributes for this node
    35  	Original   string          // original line used before parsing
    36  	Flags      []string        // only top Node should have this set
    37  	StartLine  int             // the line in the original dockerfile where the node begins
    38  	endLine    int             // the line in the original dockerfile where the node ends
    39  }
    40  
    41  // Dump dumps the AST defined by `node` as a list of sexps.
    42  // Returns a string suitable for printing.
    43  func (node *Node) Dump() string {
    44  	str := ""
    45  	str += node.Value
    46  
    47  	if len(node.Flags) > 0 {
    48  		str += fmt.Sprintf(" %q", node.Flags)
    49  	}
    50  
    51  	for _, n := range node.Children {
    52  		str += "(" + n.Dump() + ")\n"
    53  	}
    54  
    55  	for n := node.Next; n != nil; n = n.Next {
    56  		if len(n.Children) > 0 {
    57  			str += " " + n.Dump()
    58  		} else {
    59  			str += " " + strconv.Quote(n.Value)
    60  		}
    61  	}
    62  
    63  	return strings.TrimSpace(str)
    64  }
    65  
    66  func (node *Node) lines(start, end int) {
    67  	node.StartLine = start
    68  	node.endLine = end
    69  }
    70  
    71  // AddChild adds a new child node, and updates line information
    72  func (node *Node) AddChild(child *Node, startLine, endLine int) {
    73  	child.lines(startLine, endLine)
    74  	if node.StartLine < 0 {
    75  		node.StartLine = startLine
    76  	}
    77  	node.endLine = endLine
    78  	node.Children = append(node.Children, child)
    79  }
    80  
    81  var (
    82  	dispatch           map[string]func(string, *Directive) (*Node, map[string]bool, error)
    83  	tokenWhitespace    = regexp.MustCompile(`[\t\v\f\r ]+`)
    84  	tokenEscapeCommand = regexp.MustCompile(`^#[ \t]*escape[ \t]*=[ \t]*(?P<escapechar>.).*$`)
    85  	tokenComment       = regexp.MustCompile(`^#.*$`)
    86  )
    87  
    88  // DefaultEscapeToken is the default escape token
    89  const DefaultEscapeToken = '\\'
    90  
    91  // Directive is the structure used during a build run to hold the state of
    92  // parsing directives.
    93  type Directive struct {
    94  	escapeToken           rune           // Current escape token
    95  	lineContinuationRegex *regexp.Regexp // Current line continuation regex
    96  	processingComplete    bool           // Whether we are done looking for directives
    97  	escapeSeen            bool           // Whether the escape directive has been seen
    98  }
    99  
   100  // setEscapeToken sets the default token for escaping characters in a Dockerfile.
   101  func (d *Directive) setEscapeToken(s string) error {
   102  	if s != "`" && s != "\\" {
   103  		return fmt.Errorf("invalid ESCAPE '%s'. Must be ` or \\", s)
   104  	}
   105  	d.escapeToken = rune(s[0])
   106  	d.lineContinuationRegex = regexp.MustCompile(`\` + s + `[ \t]*$`)
   107  	return nil
   108  }
   109  
   110  // possibleParserDirective looks for one or more parser directives '# escapeToken=<char>' and
   111  // '# platform=<string>'. Parser directives must precede any builder instruction
   112  // or other comments, and cannot be repeated.
   113  func (d *Directive) possibleParserDirective(line string) error {
   114  	if d.processingComplete {
   115  		return nil
   116  	}
   117  
   118  	tecMatch := tokenEscapeCommand.FindStringSubmatch(strings.ToLower(line))
   119  	if len(tecMatch) != 0 {
   120  		for i, n := range tokenEscapeCommand.SubexpNames() {
   121  			if n == "escapechar" {
   122  				if d.escapeSeen == true {
   123  					return errors.New("only one escape parser directive can be used")
   124  				}
   125  				d.escapeSeen = true
   126  				return d.setEscapeToken(tecMatch[i])
   127  			}
   128  		}
   129  	}
   130  
   131  	d.processingComplete = true
   132  	return nil
   133  }
   134  
   135  // NewDefaultDirective returns a new Directive with the default escapeToken token
   136  func NewDefaultDirective() *Directive {
   137  	directive := Directive{}
   138  	directive.setEscapeToken(string(DefaultEscapeToken))
   139  	return &directive
   140  }
   141  
   142  func init() {
   143  	// Dispatch Table. see line_parsers.go for the parse functions.
   144  	// The command is parsed and mapped to the line parser. The line parser
   145  	// receives the arguments but not the command, and returns an AST after
   146  	// reformulating the arguments according to the rules in the parser
   147  	// functions. Errors are propagated up by Parse() and the resulting AST can
   148  	// be incorporated directly into the existing AST as a next.
   149  	dispatch = map[string]func(string, *Directive) (*Node, map[string]bool, error){
   150  		command.Add:         parseMaybeJSONToList,
   151  		command.Arg:         parseNameOrNameVal,
   152  		command.Cmd:         parseMaybeJSON,
   153  		command.Copy:        parseMaybeJSONToList,
   154  		command.Entrypoint:  parseMaybeJSON,
   155  		command.Env:         parseEnv,
   156  		command.Expose:      parseStringsWhitespaceDelimited,
   157  		command.From:        parseStringsWhitespaceDelimited,
   158  		command.Healthcheck: parseHealthConfig,
   159  		command.Label:       parseLabel,
   160  		command.Maintainer:  parseString,
   161  		command.Onbuild:     parseSubCommand,
   162  		command.Run:         parseMaybeJSON,
   163  		command.Shell:       parseMaybeJSON,
   164  		command.StopSignal:  parseString,
   165  		command.User:        parseString,
   166  		command.Volume:      parseMaybeJSONToList,
   167  		command.Workdir:     parseString,
   168  	}
   169  }
   170  
   171  // newNodeFromLine splits the line into parts, and dispatches to a function
   172  // based on the command and command arguments. A Node is created from the
   173  // result of the dispatch.
   174  func newNodeFromLine(line string, directive *Directive) (*Node, error) {
   175  	cmd, flags, args, err := splitCommand(line)
   176  	if err != nil {
   177  		return nil, err
   178  	}
   179  
   180  	fn := dispatch[cmd]
   181  	// Ignore invalid Dockerfile instructions
   182  	if fn == nil {
   183  		fn = parseIgnore
   184  	}
   185  	next, attrs, err := fn(args, directive)
   186  	if err != nil {
   187  		return nil, err
   188  	}
   189  
   190  	return &Node{
   191  		Value:      cmd,
   192  		Original:   line,
   193  		Flags:      flags,
   194  		Next:       next,
   195  		Attributes: attrs,
   196  	}, nil
   197  }
   198  
   199  // Result is the result of parsing a Dockerfile
   200  type Result struct {
   201  	AST         *Node
   202  	EscapeToken rune
   203  }
   204  
   205  // Parse reads lines from a Reader, parses the lines into an AST and returns
   206  // the AST and escape token
   207  func Parse(rwc io.Reader) (*Result, error) {
   208  	d := NewDefaultDirective()
   209  	currentLine := 0
   210  	root := &Node{StartLine: -1}
   211  	scanner := bufio.NewScanner(rwc)
   212  
   213  	var err error
   214  	for scanner.Scan() {
   215  		bytesRead := scanner.Bytes()
   216  		if currentLine == 0 {
   217  			// First line, strip the byte-order-marker if present
   218  			bytesRead = bytes.TrimPrefix(bytesRead, utf8bom)
   219  		}
   220  		bytesRead, err = processLine(d, bytesRead, true)
   221  		if err != nil {
   222  			return nil, err
   223  		}
   224  		currentLine++
   225  
   226  		startLine := currentLine
   227  		line, isEndOfLine := trimContinuationCharacter(string(bytesRead), d)
   228  		if isEndOfLine && line == "" {
   229  			continue
   230  		}
   231  
   232  		for !isEndOfLine && scanner.Scan() {
   233  			bytesRead, err := processLine(d, scanner.Bytes(), false)
   234  			if err != nil {
   235  				return nil, err
   236  			}
   237  			currentLine++
   238  
   239  			// TODO: warn this is being deprecated/removed
   240  			if isEmptyContinuationLine(bytesRead) {
   241  				continue
   242  			}
   243  
   244  			continuationLine := string(bytesRead)
   245  			continuationLine, isEndOfLine = trimContinuationCharacter(continuationLine, d)
   246  			line += continuationLine
   247  		}
   248  
   249  		child, err := newNodeFromLine(line, d)
   250  		if err != nil {
   251  			return nil, err
   252  		}
   253  		root.AddChild(child, startLine, currentLine)
   254  	}
   255  	return &Result{AST: root, EscapeToken: d.escapeToken}, nil
   256  }
   257  
   258  func trimComments(src []byte) []byte {
   259  	return tokenComment.ReplaceAll(src, []byte{})
   260  }
   261  
   262  func trimWhitespace(src []byte) []byte {
   263  	return bytes.TrimLeftFunc(src, unicode.IsSpace)
   264  }
   265  
   266  func isEmptyContinuationLine(line []byte) bool {
   267  	return len(trimComments(trimWhitespace(line))) == 0
   268  }
   269  
   270  var utf8bom = []byte{0xEF, 0xBB, 0xBF}
   271  
   272  func trimContinuationCharacter(line string, d *Directive) (string, bool) {
   273  	if d.lineContinuationRegex.MatchString(line) {
   274  		line = d.lineContinuationRegex.ReplaceAllString(line, "")
   275  		return line, false
   276  	}
   277  	return line, true
   278  }
   279  
   280  // TODO: remove stripLeftWhitespace after deprecation period. It seems silly
   281  // to preserve whitespace on continuation lines. Why is that done?
   282  func processLine(d *Directive, token []byte, stripLeftWhitespace bool) ([]byte, error) {
   283  	if stripLeftWhitespace {
   284  		token = trimWhitespace(token)
   285  	}
   286  	err := d.possibleParserDirective(string(token))
   287  	return trimComments(token), err
   288  }