github.com/jiasir/docker@v1.3.3-0.20170609024000-252e610103e7/builder/dockerfile/parser/parser.go (about) 1 // Package parser implements a parser and parse tree dumper for Dockerfiles. 2 package parser 3 4 import ( 5 "bufio" 6 "bytes" 7 "fmt" 8 "io" 9 "regexp" 10 "strconv" 11 "strings" 12 "unicode" 13 14 "github.com/docker/docker/builder/dockerfile/command" 15 "github.com/pkg/errors" 16 ) 17 18 // Node is a structure used to represent a parse tree. 19 // 20 // In the node there are three fields, Value, Next, and Children. Value is the 21 // current token's string value. Next is always the next non-child token, and 22 // children contains all the children. Here's an example: 23 // 24 // (value next (child child-next child-next-next) next-next) 25 // 26 // This data structure is frankly pretty lousy for handling complex languages, 27 // but lucky for us the Dockerfile isn't very complicated. This structure 28 // works a little more effectively than a "proper" parse tree for our needs. 29 // 30 type Node struct { 31 Value string // actual content 32 Next *Node // the next item in the current sexp 33 Children []*Node // the children of this sexp 34 Attributes map[string]bool // special attributes for this node 35 Original string // original line used before parsing 36 Flags []string // only top Node should have this set 37 StartLine int // the line in the original dockerfile where the node begins 38 endLine int // the line in the original dockerfile where the node ends 39 } 40 41 // Dump dumps the AST defined by `node` as a list of sexps. 42 // Returns a string suitable for printing. 43 func (node *Node) Dump() string { 44 str := "" 45 str += node.Value 46 47 if len(node.Flags) > 0 { 48 str += fmt.Sprintf(" %q", node.Flags) 49 } 50 51 for _, n := range node.Children { 52 str += "(" + n.Dump() + ")\n" 53 } 54 55 for n := node.Next; n != nil; n = n.Next { 56 if len(n.Children) > 0 { 57 str += " " + n.Dump() 58 } else { 59 str += " " + strconv.Quote(n.Value) 60 } 61 } 62 63 return strings.TrimSpace(str) 64 } 65 66 func (node *Node) lines(start, end int) { 67 node.StartLine = start 68 node.endLine = end 69 } 70 71 // AddChild adds a new child node, and updates line information 72 func (node *Node) AddChild(child *Node, startLine, endLine int) { 73 child.lines(startLine, endLine) 74 if node.StartLine < 0 { 75 node.StartLine = startLine 76 } 77 node.endLine = endLine 78 node.Children = append(node.Children, child) 79 } 80 81 var ( 82 dispatch map[string]func(string, *Directive) (*Node, map[string]bool, error) 83 tokenWhitespace = regexp.MustCompile(`[\t\v\f\r ]+`) 84 tokenEscapeCommand = regexp.MustCompile(`^#[ \t]*escape[ \t]*=[ \t]*(?P<escapechar>.).*$`) 85 tokenComment = regexp.MustCompile(`^#.*$`) 86 ) 87 88 // DefaultEscapeToken is the default escape token 89 const DefaultEscapeToken = '\\' 90 91 // Directive is the structure used during a build run to hold the state of 92 // parsing directives. 93 type Directive struct { 94 escapeToken rune // Current escape token 95 lineContinuationRegex *regexp.Regexp // Current line continuation regex 96 processingComplete bool // Whether we are done looking for directives 97 escapeSeen bool // Whether the escape directive has been seen 98 } 99 100 // setEscapeToken sets the default token for escaping characters in a Dockerfile. 101 func (d *Directive) setEscapeToken(s string) error { 102 if s != "`" && s != "\\" { 103 return fmt.Errorf("invalid ESCAPE '%s'. Must be ` or \\", s) 104 } 105 d.escapeToken = rune(s[0]) 106 d.lineContinuationRegex = regexp.MustCompile(`\` + s + `[ \t]*$`) 107 return nil 108 } 109 110 // possibleParserDirective looks for one or more parser directives '# escapeToken=<char>' and 111 // '# platform=<string>'. Parser directives must precede any builder instruction 112 // or other comments, and cannot be repeated. 113 func (d *Directive) possibleParserDirective(line string) error { 114 if d.processingComplete { 115 return nil 116 } 117 118 tecMatch := tokenEscapeCommand.FindStringSubmatch(strings.ToLower(line)) 119 if len(tecMatch) != 0 { 120 for i, n := range tokenEscapeCommand.SubexpNames() { 121 if n == "escapechar" { 122 if d.escapeSeen == true { 123 return errors.New("only one escape parser directive can be used") 124 } 125 d.escapeSeen = true 126 return d.setEscapeToken(tecMatch[i]) 127 } 128 } 129 } 130 131 d.processingComplete = true 132 return nil 133 } 134 135 // NewDefaultDirective returns a new Directive with the default escapeToken token 136 func NewDefaultDirective() *Directive { 137 directive := Directive{} 138 directive.setEscapeToken(string(DefaultEscapeToken)) 139 return &directive 140 } 141 142 func init() { 143 // Dispatch Table. see line_parsers.go for the parse functions. 144 // The command is parsed and mapped to the line parser. The line parser 145 // receives the arguments but not the command, and returns an AST after 146 // reformulating the arguments according to the rules in the parser 147 // functions. Errors are propagated up by Parse() and the resulting AST can 148 // be incorporated directly into the existing AST as a next. 149 dispatch = map[string]func(string, *Directive) (*Node, map[string]bool, error){ 150 command.Add: parseMaybeJSONToList, 151 command.Arg: parseNameOrNameVal, 152 command.Cmd: parseMaybeJSON, 153 command.Copy: parseMaybeJSONToList, 154 command.Entrypoint: parseMaybeJSON, 155 command.Env: parseEnv, 156 command.Expose: parseStringsWhitespaceDelimited, 157 command.From: parseStringsWhitespaceDelimited, 158 command.Healthcheck: parseHealthConfig, 159 command.Label: parseLabel, 160 command.Maintainer: parseString, 161 command.Onbuild: parseSubCommand, 162 command.Run: parseMaybeJSON, 163 command.Shell: parseMaybeJSON, 164 command.StopSignal: parseString, 165 command.User: parseString, 166 command.Volume: parseMaybeJSONToList, 167 command.Workdir: parseString, 168 } 169 } 170 171 // newNodeFromLine splits the line into parts, and dispatches to a function 172 // based on the command and command arguments. A Node is created from the 173 // result of the dispatch. 174 func newNodeFromLine(line string, directive *Directive) (*Node, error) { 175 cmd, flags, args, err := splitCommand(line) 176 if err != nil { 177 return nil, err 178 } 179 180 fn := dispatch[cmd] 181 // Ignore invalid Dockerfile instructions 182 if fn == nil { 183 fn = parseIgnore 184 } 185 next, attrs, err := fn(args, directive) 186 if err != nil { 187 return nil, err 188 } 189 190 return &Node{ 191 Value: cmd, 192 Original: line, 193 Flags: flags, 194 Next: next, 195 Attributes: attrs, 196 }, nil 197 } 198 199 // Result is the result of parsing a Dockerfile 200 type Result struct { 201 AST *Node 202 EscapeToken rune 203 } 204 205 // Parse reads lines from a Reader, parses the lines into an AST and returns 206 // the AST and escape token 207 func Parse(rwc io.Reader) (*Result, error) { 208 d := NewDefaultDirective() 209 currentLine := 0 210 root := &Node{StartLine: -1} 211 scanner := bufio.NewScanner(rwc) 212 213 var err error 214 for scanner.Scan() { 215 bytesRead := scanner.Bytes() 216 if currentLine == 0 { 217 // First line, strip the byte-order-marker if present 218 bytesRead = bytes.TrimPrefix(bytesRead, utf8bom) 219 } 220 bytesRead, err = processLine(d, bytesRead, true) 221 if err != nil { 222 return nil, err 223 } 224 currentLine++ 225 226 startLine := currentLine 227 line, isEndOfLine := trimContinuationCharacter(string(bytesRead), d) 228 if isEndOfLine && line == "" { 229 continue 230 } 231 232 for !isEndOfLine && scanner.Scan() { 233 bytesRead, err := processLine(d, scanner.Bytes(), false) 234 if err != nil { 235 return nil, err 236 } 237 currentLine++ 238 239 // TODO: warn this is being deprecated/removed 240 if isEmptyContinuationLine(bytesRead) { 241 continue 242 } 243 244 continuationLine := string(bytesRead) 245 continuationLine, isEndOfLine = trimContinuationCharacter(continuationLine, d) 246 line += continuationLine 247 } 248 249 child, err := newNodeFromLine(line, d) 250 if err != nil { 251 return nil, err 252 } 253 root.AddChild(child, startLine, currentLine) 254 } 255 return &Result{AST: root, EscapeToken: d.escapeToken}, nil 256 } 257 258 func trimComments(src []byte) []byte { 259 return tokenComment.ReplaceAll(src, []byte{}) 260 } 261 262 func trimWhitespace(src []byte) []byte { 263 return bytes.TrimLeftFunc(src, unicode.IsSpace) 264 } 265 266 func isEmptyContinuationLine(line []byte) bool { 267 return len(trimComments(trimWhitespace(line))) == 0 268 } 269 270 var utf8bom = []byte{0xEF, 0xBB, 0xBF} 271 272 func trimContinuationCharacter(line string, d *Directive) (string, bool) { 273 if d.lineContinuationRegex.MatchString(line) { 274 line = d.lineContinuationRegex.ReplaceAllString(line, "") 275 return line, false 276 } 277 return line, true 278 } 279 280 // TODO: remove stripLeftWhitespace after deprecation period. It seems silly 281 // to preserve whitespace on continuation lines. Why is that done? 282 func processLine(d *Directive, token []byte, stripLeftWhitespace bool) ([]byte, error) { 283 if stripLeftWhitespace { 284 token = trimWhitespace(token) 285 } 286 err := d.possibleParserDirective(string(token)) 287 return trimComments(token), err 288 }