github.com/kobeld/docker@v1.12.0-rc1/builder/dockerfile/parser/parser.go (about)

     1  // Package parser implements a parser and parse tree dumper for Dockerfiles.
     2  package parser
     3  
     4  import (
     5  	"bufio"
     6  	"bytes"
     7  	"fmt"
     8  	"io"
     9  	"regexp"
    10  	"strings"
    11  	"unicode"
    12  
    13  	"github.com/docker/docker/builder/dockerfile/command"
    14  )
    15  
    16  // Node is a structure used to represent a parse tree.
    17  //
    18  // In the node there are three fields, Value, Next, and Children. Value is the
    19  // current token's string value. Next is always the next non-child token, and
    20  // children contains all the children. Here's an example:
    21  //
    22  // (value next (child child-next child-next-next) next-next)
    23  //
    24  // This data structure is frankly pretty lousy for handling complex languages,
    25  // but lucky for us the Dockerfile isn't very complicated. This structure
    26  // works a little more effectively than a "proper" parse tree for our needs.
    27  //
    28  type Node struct {
    29  	Value      string          // actual content
    30  	Next       *Node           // the next item in the current sexp
    31  	Children   []*Node         // the children of this sexp
    32  	Attributes map[string]bool // special attributes for this node
    33  	Original   string          // original line used before parsing
    34  	Flags      []string        // only top Node should have this set
    35  	StartLine  int             // the line in the original dockerfile where the node begins
    36  	EndLine    int             // the line in the original dockerfile where the node ends
    37  }
    38  
    39  var (
    40  	dispatch              map[string]func(string) (*Node, map[string]bool, error)
    41  	tokenWhitespace       = regexp.MustCompile(`[\t\v\f\r ]+`)
    42  	tokenLineContinuation *regexp.Regexp
    43  	tokenEscape           rune
    44  	tokenEscapeCommand    = regexp.MustCompile(`^#[ \t]*escape[ \t]*=[ \t]*(?P<escapechar>.).*$`)
    45  	tokenComment          = regexp.MustCompile(`^#.*$`)
    46  	lookingForDirectives  bool
    47  	directiveEscapeSeen   bool
    48  )
    49  
    50  const defaultTokenEscape = "\\"
    51  
    52  // setTokenEscape sets the default token for escaping characters in a Dockerfile.
    53  func setTokenEscape(s string) error {
    54  	if s != "`" && s != "\\" {
    55  		return fmt.Errorf("invalid ESCAPE '%s'. Must be ` or \\", s)
    56  	}
    57  	tokenEscape = rune(s[0])
    58  	tokenLineContinuation = regexp.MustCompile(`\` + s + `[ \t]*$`)
    59  	return nil
    60  }
    61  
    62  func init() {
    63  	// Dispatch Table. see line_parsers.go for the parse functions.
    64  	// The command is parsed and mapped to the line parser. The line parser
    65  	// receives the arguments but not the command, and returns an AST after
    66  	// reformulating the arguments according to the rules in the parser
    67  	// functions. Errors are propagated up by Parse() and the resulting AST can
    68  	// be incorporated directly into the existing AST as a next.
    69  	dispatch = map[string]func(string) (*Node, map[string]bool, error){
    70  		command.Add:         parseMaybeJSONToList,
    71  		command.Arg:         parseNameOrNameVal,
    72  		command.Cmd:         parseMaybeJSON,
    73  		command.Copy:        parseMaybeJSONToList,
    74  		command.Entrypoint:  parseMaybeJSON,
    75  		command.Env:         parseEnv,
    76  		command.Expose:      parseStringsWhitespaceDelimited,
    77  		command.From:        parseString,
    78  		command.Healthcheck: parseHealthConfig,
    79  		command.Label:       parseLabel,
    80  		command.Maintainer:  parseString,
    81  		command.Onbuild:     parseSubCommand,
    82  		command.Run:         parseMaybeJSON,
    83  		command.Shell:       parseMaybeJSON,
    84  		command.StopSignal:  parseString,
    85  		command.User:        parseString,
    86  		command.Volume:      parseMaybeJSONToList,
    87  		command.Workdir:     parseString,
    88  	}
    89  }
    90  
    91  // ParseLine parse a line and return the remainder.
    92  func ParseLine(line string) (string, *Node, error) {
    93  
    94  	// Handle the parser directive '# escape=<char>. Parser directives must precede
    95  	// any builder instruction or other comments, and cannot be repeated.
    96  	if lookingForDirectives {
    97  		tecMatch := tokenEscapeCommand.FindStringSubmatch(strings.ToLower(line))
    98  		if len(tecMatch) > 0 {
    99  			if directiveEscapeSeen == true {
   100  				return "", nil, fmt.Errorf("only one escape parser directive can be used")
   101  			}
   102  			for i, n := range tokenEscapeCommand.SubexpNames() {
   103  				if n == "escapechar" {
   104  					if err := setTokenEscape(tecMatch[i]); err != nil {
   105  						return "", nil, err
   106  					}
   107  					directiveEscapeSeen = true
   108  					return "", nil, nil
   109  				}
   110  			}
   111  		}
   112  	}
   113  
   114  	lookingForDirectives = false
   115  
   116  	if line = stripComments(line); line == "" {
   117  		return "", nil, nil
   118  	}
   119  
   120  	if tokenLineContinuation.MatchString(line) {
   121  		line = tokenLineContinuation.ReplaceAllString(line, "")
   122  		return line, nil, nil
   123  	}
   124  
   125  	cmd, flags, args, err := splitCommand(line)
   126  	if err != nil {
   127  		return "", nil, err
   128  	}
   129  
   130  	node := &Node{}
   131  	node.Value = cmd
   132  
   133  	sexp, attrs, err := fullDispatch(cmd, args)
   134  	if err != nil {
   135  		return "", nil, err
   136  	}
   137  
   138  	node.Next = sexp
   139  	node.Attributes = attrs
   140  	node.Original = line
   141  	node.Flags = flags
   142  
   143  	return "", node, nil
   144  }
   145  
   146  // Parse is the main parse routine.
   147  // It handles an io.ReadWriteCloser and returns the root of the AST.
   148  func Parse(rwc io.Reader) (*Node, error) {
   149  	directiveEscapeSeen = false
   150  	lookingForDirectives = true
   151  	setTokenEscape(defaultTokenEscape) // Assume the default token for escape
   152  	currentLine := 0
   153  	root := &Node{}
   154  	root.StartLine = -1
   155  	scanner := bufio.NewScanner(rwc)
   156  
   157  	utf8bom := []byte{0xEF, 0xBB, 0xBF}
   158  	for scanner.Scan() {
   159  		scannedBytes := scanner.Bytes()
   160  		// We trim UTF8 BOM
   161  		if currentLine == 0 {
   162  			scannedBytes = bytes.TrimPrefix(scannedBytes, utf8bom)
   163  		}
   164  		scannedLine := strings.TrimLeftFunc(string(scannedBytes), unicode.IsSpace)
   165  		currentLine++
   166  		line, child, err := ParseLine(scannedLine)
   167  		if err != nil {
   168  			return nil, err
   169  		}
   170  		startLine := currentLine
   171  
   172  		if line != "" && child == nil {
   173  			for scanner.Scan() {
   174  				newline := scanner.Text()
   175  				currentLine++
   176  
   177  				if stripComments(strings.TrimSpace(newline)) == "" {
   178  					continue
   179  				}
   180  
   181  				line, child, err = ParseLine(line + newline)
   182  				if err != nil {
   183  					return nil, err
   184  				}
   185  
   186  				if child != nil {
   187  					break
   188  				}
   189  			}
   190  			if child == nil && line != "" {
   191  				_, child, err = ParseLine(line)
   192  				if err != nil {
   193  					return nil, err
   194  				}
   195  			}
   196  		}
   197  
   198  		if child != nil {
   199  			// Update the line information for the current child.
   200  			child.StartLine = startLine
   201  			child.EndLine = currentLine
   202  			// Update the line information for the root. The starting line of the root is always the
   203  			// starting line of the first child and the ending line is the ending line of the last child.
   204  			if root.StartLine < 0 {
   205  				root.StartLine = currentLine
   206  			}
   207  			root.EndLine = currentLine
   208  			root.Children = append(root.Children, child)
   209  		}
   210  	}
   211  
   212  	return root, nil
   213  }