github.com/lazyboychen7/engine@v17.12.1-ce-rc2+incompatible/builder/dockerfile/parser/parser.go (about) 1 // Package parser implements a parser and parse tree dumper for Dockerfiles. 2 package parser 3 4 import ( 5 "bufio" 6 "bytes" 7 "fmt" 8 "io" 9 "regexp" 10 "runtime" 11 "strconv" 12 "strings" 13 "unicode" 14 15 "github.com/docker/docker/builder/dockerfile/command" 16 "github.com/docker/docker/pkg/system" 17 "github.com/pkg/errors" 18 ) 19 20 // Node is a structure used to represent a parse tree. 21 // 22 // In the node there are three fields, Value, Next, and Children. Value is the 23 // current token's string value. Next is always the next non-child token, and 24 // children contains all the children. Here's an example: 25 // 26 // (value next (child child-next child-next-next) next-next) 27 // 28 // This data structure is frankly pretty lousy for handling complex languages, 29 // but lucky for us the Dockerfile isn't very complicated. This structure 30 // works a little more effectively than a "proper" parse tree for our needs. 31 // 32 type Node struct { 33 Value string // actual content 34 Next *Node // the next item in the current sexp 35 Children []*Node // the children of this sexp 36 Attributes map[string]bool // special attributes for this node 37 Original string // original line used before parsing 38 Flags []string // only top Node should have this set 39 StartLine int // the line in the original dockerfile where the node begins 40 endLine int // the line in the original dockerfile where the node ends 41 } 42 43 // Dump dumps the AST defined by `node` as a list of sexps. 44 // Returns a string suitable for printing. 45 func (node *Node) Dump() string { 46 str := "" 47 str += node.Value 48 49 if len(node.Flags) > 0 { 50 str += fmt.Sprintf(" %q", node.Flags) 51 } 52 53 for _, n := range node.Children { 54 str += "(" + n.Dump() + ")\n" 55 } 56 57 for n := node.Next; n != nil; n = n.Next { 58 if len(n.Children) > 0 { 59 str += " " + n.Dump() 60 } else { 61 str += " " + strconv.Quote(n.Value) 62 } 63 } 64 65 return strings.TrimSpace(str) 66 } 67 68 func (node *Node) lines(start, end int) { 69 node.StartLine = start 70 node.endLine = end 71 } 72 73 // AddChild adds a new child node, and updates line information 74 func (node *Node) AddChild(child *Node, startLine, endLine int) { 75 child.lines(startLine, endLine) 76 if node.StartLine < 0 { 77 node.StartLine = startLine 78 } 79 node.endLine = endLine 80 node.Children = append(node.Children, child) 81 } 82 83 var ( 84 dispatch map[string]func(string, *Directive) (*Node, map[string]bool, error) 85 tokenWhitespace = regexp.MustCompile(`[\t\v\f\r ]+`) 86 tokenEscapeCommand = regexp.MustCompile(`^#[ \t]*escape[ \t]*=[ \t]*(?P<escapechar>.).*$`) 87 tokenPlatformCommand = regexp.MustCompile(`^#[ \t]*platform[ \t]*=[ \t]*(?P<platform>.*)$`) 88 tokenComment = regexp.MustCompile(`^#.*$`) 89 ) 90 91 // DefaultEscapeToken is the default escape token 92 const DefaultEscapeToken = '\\' 93 94 // Directive is the structure used during a build run to hold the state of 95 // parsing directives. 96 type Directive struct { 97 escapeToken rune // Current escape token 98 platformToken string // Current platform token 99 lineContinuationRegex *regexp.Regexp // Current line continuation regex 100 processingComplete bool // Whether we are done looking for directives 101 escapeSeen bool // Whether the escape directive has been seen 102 platformSeen bool // Whether the platform directive has been seen 103 } 104 105 // setEscapeToken sets the default token for escaping characters in a Dockerfile. 106 func (d *Directive) setEscapeToken(s string) error { 107 if s != "`" && s != "\\" { 108 return fmt.Errorf("invalid ESCAPE '%s'. Must be ` or \\", s) 109 } 110 d.escapeToken = rune(s[0]) 111 d.lineContinuationRegex = regexp.MustCompile(`\` + s + `[ \t]*$`) 112 return nil 113 } 114 115 // setPlatformToken sets the default platform for pulling images in a Dockerfile. 116 func (d *Directive) setPlatformToken(s string) error { 117 s = strings.ToLower(s) 118 valid := []string{runtime.GOOS} 119 if system.LCOWSupported() { 120 valid = append(valid, "linux") 121 } 122 for _, item := range valid { 123 if s == item { 124 d.platformToken = s 125 return nil 126 } 127 } 128 return fmt.Errorf("invalid PLATFORM '%s'. Must be one of %v", s, valid) 129 } 130 131 // possibleParserDirective looks for one or more parser directives '# escapeToken=<char>' and 132 // '# platform=<string>'. Parser directives must precede any builder instruction 133 // or other comments, and cannot be repeated. 134 func (d *Directive) possibleParserDirective(line string) error { 135 if d.processingComplete { 136 return nil 137 } 138 139 tecMatch := tokenEscapeCommand.FindStringSubmatch(strings.ToLower(line)) 140 if len(tecMatch) != 0 { 141 for i, n := range tokenEscapeCommand.SubexpNames() { 142 if n == "escapechar" { 143 if d.escapeSeen { 144 return errors.New("only one escape parser directive can be used") 145 } 146 d.escapeSeen = true 147 return d.setEscapeToken(tecMatch[i]) 148 } 149 } 150 } 151 152 // Only recognise a platform token if LCOW is supported 153 if system.LCOWSupported() { 154 tpcMatch := tokenPlatformCommand.FindStringSubmatch(strings.ToLower(line)) 155 if len(tpcMatch) != 0 { 156 for i, n := range tokenPlatformCommand.SubexpNames() { 157 if n == "platform" { 158 if d.platformSeen { 159 return errors.New("only one platform parser directive can be used") 160 } 161 d.platformSeen = true 162 return d.setPlatformToken(tpcMatch[i]) 163 } 164 } 165 } 166 } 167 168 d.processingComplete = true 169 return nil 170 } 171 172 // NewDefaultDirective returns a new Directive with the default escapeToken token 173 func NewDefaultDirective() *Directive { 174 directive := Directive{} 175 directive.setEscapeToken(string(DefaultEscapeToken)) 176 return &directive 177 } 178 179 func init() { 180 // Dispatch Table. see line_parsers.go for the parse functions. 181 // The command is parsed and mapped to the line parser. The line parser 182 // receives the arguments but not the command, and returns an AST after 183 // reformulating the arguments according to the rules in the parser 184 // functions. Errors are propagated up by Parse() and the resulting AST can 185 // be incorporated directly into the existing AST as a next. 186 dispatch = map[string]func(string, *Directive) (*Node, map[string]bool, error){ 187 command.Add: parseMaybeJSONToList, 188 command.Arg: parseNameOrNameVal, 189 command.Cmd: parseMaybeJSON, 190 command.Copy: parseMaybeJSONToList, 191 command.Entrypoint: parseMaybeJSON, 192 command.Env: parseEnv, 193 command.Expose: parseStringsWhitespaceDelimited, 194 command.From: parseStringsWhitespaceDelimited, 195 command.Healthcheck: parseHealthConfig, 196 command.Label: parseLabel, 197 command.Maintainer: parseString, 198 command.Onbuild: parseSubCommand, 199 command.Run: parseMaybeJSON, 200 command.Shell: parseMaybeJSON, 201 command.StopSignal: parseString, 202 command.User: parseString, 203 command.Volume: parseMaybeJSONToList, 204 command.Workdir: parseString, 205 } 206 } 207 208 // newNodeFromLine splits the line into parts, and dispatches to a function 209 // based on the command and command arguments. A Node is created from the 210 // result of the dispatch. 211 func newNodeFromLine(line string, directive *Directive) (*Node, error) { 212 cmd, flags, args, err := splitCommand(line) 213 if err != nil { 214 return nil, err 215 } 216 217 fn := dispatch[cmd] 218 // Ignore invalid Dockerfile instructions 219 if fn == nil { 220 fn = parseIgnore 221 } 222 next, attrs, err := fn(args, directive) 223 if err != nil { 224 return nil, err 225 } 226 227 return &Node{ 228 Value: cmd, 229 Original: line, 230 Flags: flags, 231 Next: next, 232 Attributes: attrs, 233 }, nil 234 } 235 236 // Result is the result of parsing a Dockerfile 237 type Result struct { 238 AST *Node 239 EscapeToken rune 240 // TODO @jhowardmsft - see https://github.com/moby/moby/issues/34617 241 // This next field will be removed in a future update for LCOW support. 242 OS string 243 Warnings []string 244 } 245 246 // PrintWarnings to the writer 247 func (r *Result) PrintWarnings(out io.Writer) { 248 if len(r.Warnings) == 0 { 249 return 250 } 251 fmt.Fprintf(out, strings.Join(r.Warnings, "\n")+"\n") 252 } 253 254 // Parse reads lines from a Reader, parses the lines into an AST and returns 255 // the AST and escape token 256 func Parse(rwc io.Reader) (*Result, error) { 257 d := NewDefaultDirective() 258 currentLine := 0 259 root := &Node{StartLine: -1} 260 scanner := bufio.NewScanner(rwc) 261 warnings := []string{} 262 263 var err error 264 for scanner.Scan() { 265 bytesRead := scanner.Bytes() 266 if currentLine == 0 { 267 // First line, strip the byte-order-marker if present 268 bytesRead = bytes.TrimPrefix(bytesRead, utf8bom) 269 } 270 bytesRead, err = processLine(d, bytesRead, true) 271 if err != nil { 272 return nil, err 273 } 274 currentLine++ 275 276 startLine := currentLine 277 line, isEndOfLine := trimContinuationCharacter(string(bytesRead), d) 278 if isEndOfLine && line == "" { 279 continue 280 } 281 282 var hasEmptyContinuationLine bool 283 for !isEndOfLine && scanner.Scan() { 284 bytesRead, err := processLine(d, scanner.Bytes(), false) 285 if err != nil { 286 return nil, err 287 } 288 currentLine++ 289 290 if isComment(scanner.Bytes()) { 291 // original line was a comment (processLine strips comments) 292 continue 293 } 294 if isEmptyContinuationLine(bytesRead) { 295 hasEmptyContinuationLine = true 296 continue 297 } 298 299 continuationLine := string(bytesRead) 300 continuationLine, isEndOfLine = trimContinuationCharacter(continuationLine, d) 301 line += continuationLine 302 } 303 304 if hasEmptyContinuationLine { 305 warning := "[WARNING]: Empty continuation line found in:\n " + line 306 warnings = append(warnings, warning) 307 } 308 309 child, err := newNodeFromLine(line, d) 310 if err != nil { 311 return nil, err 312 } 313 root.AddChild(child, startLine, currentLine) 314 } 315 316 if len(warnings) > 0 { 317 warnings = append(warnings, "[WARNING]: Empty continuation lines will become errors in a future release.") 318 } 319 return &Result{ 320 AST: root, 321 Warnings: warnings, 322 EscapeToken: d.escapeToken, 323 OS: d.platformToken, 324 }, handleScannerError(scanner.Err()) 325 } 326 327 func trimComments(src []byte) []byte { 328 return tokenComment.ReplaceAll(src, []byte{}) 329 } 330 331 func trimWhitespace(src []byte) []byte { 332 return bytes.TrimLeftFunc(src, unicode.IsSpace) 333 } 334 335 func isComment(line []byte) bool { 336 return tokenComment.Match(trimWhitespace(line)) 337 } 338 339 func isEmptyContinuationLine(line []byte) bool { 340 return len(trimWhitespace(line)) == 0 341 } 342 343 var utf8bom = []byte{0xEF, 0xBB, 0xBF} 344 345 func trimContinuationCharacter(line string, d *Directive) (string, bool) { 346 if d.lineContinuationRegex.MatchString(line) { 347 line = d.lineContinuationRegex.ReplaceAllString(line, "") 348 return line, false 349 } 350 return line, true 351 } 352 353 // TODO: remove stripLeftWhitespace after deprecation period. It seems silly 354 // to preserve whitespace on continuation lines. Why is that done? 355 func processLine(d *Directive, token []byte, stripLeftWhitespace bool) ([]byte, error) { 356 if stripLeftWhitespace { 357 token = trimWhitespace(token) 358 } 359 return trimComments(token), d.possibleParserDirective(string(token)) 360 } 361 362 func handleScannerError(err error) error { 363 switch err { 364 case bufio.ErrTooLong: 365 return errors.Errorf("dockerfile line greater than max allowed size of %d", bufio.MaxScanTokenSize-1) 366 default: 367 return err 368 } 369 }