github.com/sealerio/sealer@v0.11.1-0.20240507115618-f4f89c5853ae/build/kubefile/parser/parse.go (about) 1 // Copyright © 2022 Alibaba Group Holding Ltd. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package parser implements a parser and parse tree dumper for Dockerfiles. 16 package parser 17 18 import ( 19 "bufio" 20 "bytes" 21 "fmt" 22 "io" 23 "regexp" 24 "runtime" 25 "strconv" 26 "strings" 27 "unicode" 28 29 "github.com/docker/docker/pkg/system" 30 "github.com/pkg/errors" 31 32 "github.com/sealerio/sealer/build/kubefile/command" 33 ) 34 35 // Node is a structure used to represent a parse tree. 36 // 37 // In the node there are three fields, Value, Next, and Children. Value is the 38 // current token's string value. Next is always the next non-child token, and 39 // children contains all the children. Here's an example: 40 // 41 // (value next (child child-next child-next-next) next-next) 42 // 43 // This data structure is frankly pretty lousy for handling complex languages, 44 // but lucky for us the Dockerfile isn't very complicated. This structure 45 // works a little more effectively than a "proper" parse tree for our needs. 46 type Node struct { 47 Value string // actual content 48 Next *Node // the next item in the current sexp 49 Children []*Node // the children of this sexp 50 Attributes map[string]bool // special attributes for this node 51 Original string // original line used before parsing 52 Flags []string // only top Node should have this set 53 StartLine int // the line in the original dockerfile where the node begins 54 EndLine int // the line in the original dockerfile where the node ends 55 } 56 57 // Dump dumps the AST defined by `node` as a list of sexps. 58 // Returns a string suitable for printing. 59 func (node *Node) Dump() string { 60 str := "" 61 str += node.Value 62 63 if len(node.Flags) > 0 { 64 str += fmt.Sprintf(" %q", node.Flags) 65 } 66 67 for _, n := range node.Children { 68 str += "(" + n.Dump() + ")\n" 69 } 70 71 for n := node.Next; n != nil; n = n.Next { 72 if len(n.Children) > 0 { 73 str += " " + n.Dump() 74 } else { 75 str += " " + strconv.Quote(n.Value) 76 } 77 } 78 79 return strings.TrimSpace(str) 80 } 81 82 func (node *Node) lines(start, end int) { 83 node.StartLine = start 84 node.EndLine = end 85 } 86 87 // AddChild adds a new child node, and updates line information 88 func (node *Node) AddChild(child *Node, startLine, endLine int) { 89 child.lines(startLine, endLine) 90 if node.StartLine < 0 { 91 node.StartLine = startLine 92 } 93 node.EndLine = endLine 94 node.Children = append(node.Children, child) 95 } 96 97 var ( 98 dispatch map[string]func(string, *Directive) (*Node, map[string]bool, error) 99 tokenWhitespace = regexp.MustCompile(`[\t\v\f\r ]+`) 100 tokenEscapeCommand = regexp.MustCompile(`^#[ \t]*escape[ \t]*=[ \t]*(?P<escapechar>.).*$`) 101 tokenPlatformCommand = regexp.MustCompile(`^#[ \t]*platform[ \t]*=[ \t]*(?P<platform>.*)$`) 102 tokenComment = regexp.MustCompile(`^#.*$`) 103 ) 104 105 // DefaultEscapeToken is the default escape token 106 const DefaultEscapeToken = '\\' 107 108 // defaultPlatformToken is the platform assumed for the build if not explicitly provided 109 var defaultPlatformToken = runtime.GOOS 110 111 // Directive is the structure used during a build run to hold the state of 112 // parsing directives. 113 type Directive struct { 114 escapeToken rune // Current escape token 115 platformToken string // Current platform token 116 lineContinuationRegex *regexp.Regexp // Current line continuation regex 117 processingComplete bool // Whether we are done looking for directives 118 escapeSeen bool // Whether the escape directive has been seen 119 platformSeen bool // Whether the platform directive has been seen 120 } 121 122 // setEscapeToken sets the default token for escaping characters in a Dockerfile. 123 func (d *Directive) setEscapeToken(s string) error { 124 if s != "`" && s != "\\" { 125 return fmt.Errorf("invalid ESCAPE '%s'. Must be ` or \\", s) 126 } 127 d.escapeToken = rune(s[0]) 128 d.lineContinuationRegex = regexp.MustCompile(`\` + s + `[ \t]*$`) 129 return nil 130 } 131 132 // setPlatformToken sets the default platform for pulling images in a Dockerfile. 133 func (d *Directive) setPlatformToken(s string) error { 134 s = strings.ToLower(s) 135 valid := []string{runtime.GOOS} 136 if system.LCOWSupported() { 137 valid = append(valid, "linux") 138 } 139 for _, item := range valid { 140 if s == item { 141 d.platformToken = s 142 return nil 143 } 144 } 145 return fmt.Errorf("invalid PLATFORM '%s'. Must be one of %v", s, valid) 146 } 147 148 // possibleParserDirective looks for one or more parser directives '# escapeToken=<char>' and 149 // '# platform=<string>'. Parser directives must precede any builder instruction 150 // or other comments, and cannot be repeated. 151 func (d *Directive) possibleParserDirective(line string) error { 152 if d.processingComplete { 153 return nil 154 } 155 156 tecMatch := tokenEscapeCommand.FindStringSubmatch(strings.ToLower(line)) 157 if len(tecMatch) != 0 { 158 for i, n := range tokenEscapeCommand.SubexpNames() { 159 if n == "escapechar" { 160 if d.escapeSeen { 161 return errors.New("only one escape parser directive can be used") 162 } 163 d.escapeSeen = true 164 return d.setEscapeToken(tecMatch[i]) 165 } 166 } 167 } 168 169 // TODO @jhowardmsft LCOW Support: Eventually this check can be removed, 170 // but only recognise a platform token if running in LCOW mode. 171 if system.LCOWSupported() { 172 tpcMatch := tokenPlatformCommand.FindStringSubmatch(strings.ToLower(line)) 173 if len(tpcMatch) != 0 { 174 for i, n := range tokenPlatformCommand.SubexpNames() { 175 if n == "platform" { 176 if d.platformSeen { 177 return errors.New("only one platform parser directive can be used") 178 } 179 d.platformSeen = true 180 return d.setPlatformToken(tpcMatch[i]) 181 } 182 } 183 } 184 } 185 186 d.processingComplete = true 187 return nil 188 } 189 190 // NewDefaultDirective returns a new Directive with the default escapeToken token 191 func NewDefaultDirective() *Directive { 192 directive := Directive{} 193 // #nosec 194 _ = directive.setEscapeToken(string(DefaultEscapeToken)) 195 // #nosec 196 _ = directive.setPlatformToken(defaultPlatformToken) 197 return &directive 198 } 199 200 func init() { 201 // Dispatch Table. see line_parsers.go for the parse functions. 202 // The command is parsed and mapped to the line parser. The line parser 203 // receives the arguments but not the command, and returns an AST after 204 // reformulating the arguments according to the rules in the parser 205 // functions. Errors are propagated up by Parse() and the resulting AST can 206 // be incorporated directly into the existing AST as a next. 207 dispatch = map[string]func(string, *Directive) (*Node, map[string]bool, error){ 208 command.Add: parseMaybeJSONToList, 209 command.Arg: parseNameOrNameVal, 210 command.Copy: parseMaybeJSONToList, 211 command.From: parseStringsWhitespaceDelimited, 212 command.Label: parseLabel, 213 command.Maintainer: parseString, 214 command.Run: parseMaybeJSON, 215 command.App: parseMaybeJSONToList, 216 command.AppCmds: parseMaybeJSONToList, 217 command.Env: parseNameOrNameVal, 218 command.AppEnv: parseMaybeJSONToList, 219 command.KUBEVERSION: parseString, 220 command.CNI: parseMaybeJSONToList, 221 command.CSI: parseMaybeJSONToList, 222 command.Launch: parseMaybeJSONToList, 223 command.Cmds: parseMaybeJSONToList, 224 command.Cmd: parseMaybeJSONToList, 225 } 226 } 227 228 // newNodeFromLine splits the line into parts, and dispatches to a function 229 // based on the command and command arguments. A Node is created from the 230 // result of the dispatch. 231 func newNodeFromLine(line string, directive *Directive) (*Node, error) { 232 cmd, flags, args, err := splitCommand(line) 233 if err != nil { 234 return nil, err 235 } 236 237 fn := dispatch[cmd] 238 // Ignore invalid Dockerfile instructions 239 if fn == nil { 240 fn = parseIgnore 241 } 242 next, attrs, err := fn(args, directive) 243 if err != nil { 244 return nil, err 245 } 246 247 return &Node{ 248 Value: cmd, 249 Original: line, 250 Flags: flags, 251 Next: next, 252 Attributes: attrs, 253 }, nil 254 } 255 256 // Result is the result of parsing a Dockerfile 257 type Result struct { 258 AST *Node 259 EscapeToken rune 260 Platform string 261 Warnings []string 262 } 263 264 // PrintWarnings to the writer 265 func (r *Result) PrintWarnings(out io.Writer) { 266 if len(r.Warnings) == 0 { 267 return 268 } 269 fmt.Fprintf(out, strings.Join(r.Warnings, "\n")+"\n") 270 } 271 272 // Parse reads lines from a Reader, parses the lines into an AST and returns 273 // the AST and escape token 274 func parse(rwc io.Reader) (*Result, error) { 275 d := NewDefaultDirective() 276 currentLine := 0 277 root := &Node{StartLine: -1} 278 scanner := bufio.NewScanner(rwc) 279 warnings := []string{} 280 281 var err error 282 for scanner.Scan() { 283 bytesRead := scanner.Bytes() 284 if currentLine == 0 { 285 // First line, strip the byte-order-marker if present 286 bytesRead = bytes.TrimPrefix(bytesRead, utf8bom) 287 } 288 bytesRead, err = processLine(d, bytesRead, true) 289 if err != nil { 290 return nil, err 291 } 292 currentLine++ 293 294 startLine := currentLine 295 line, isEndOfLine := trimContinuationCharacter(string(bytesRead), d) 296 if isEndOfLine && line == "" { 297 continue 298 } 299 300 var hasEmptyContinuationLine bool 301 for !isEndOfLine && scanner.Scan() { 302 bytesRead, err := processLine(d, scanner.Bytes(), false) 303 if err != nil { 304 return nil, err 305 } 306 currentLine++ 307 308 if isEmptyContinuationLine(bytesRead) { 309 hasEmptyContinuationLine = true 310 continue 311 } 312 313 continuationLine := string(bytesRead) 314 continuationLine, isEndOfLine = trimContinuationCharacter(continuationLine, d) 315 line += continuationLine 316 } 317 318 if hasEmptyContinuationLine { 319 warning := "[WARNING]: Empty continuation line found in:\n " + line 320 warnings = append(warnings, warning) 321 } 322 323 child, err := newNodeFromLine(line, d) 324 if err != nil { 325 return nil, err 326 } 327 root.AddChild(child, startLine, currentLine) 328 } 329 330 if len(warnings) > 0 { 331 warnings = append(warnings, "[WARNING]: Empty continuation lines will become errors in a future release.") 332 } 333 return &Result{ 334 AST: root, 335 Warnings: warnings, 336 EscapeToken: d.escapeToken, 337 Platform: d.platformToken, 338 }, nil 339 } 340 341 func trimComments(src []byte) []byte { 342 return tokenComment.ReplaceAll(src, []byte{}) 343 } 344 345 func trimWhitespace(src []byte) []byte { 346 return bytes.TrimLeftFunc(src, unicode.IsSpace) 347 } 348 349 func isEmptyContinuationLine(line []byte) bool { 350 return len(trimComments(trimWhitespace(line))) == 0 351 } 352 353 var utf8bom = []byte{0xEF, 0xBB, 0xBF} 354 355 func trimContinuationCharacter(line string, d *Directive) (string, bool) { 356 if d.lineContinuationRegex.MatchString(line) { 357 line = d.lineContinuationRegex.ReplaceAllString(line, "") 358 return line, false 359 } 360 return line, true 361 } 362 363 // TODO: remove stripLeftWhitespace after deprecation period. It seems silly 364 // to preserve whitespace on continuation lines. Why is that done? 365 func processLine(d *Directive, token []byte, stripLeftWhitespace bool) ([]byte, error) { 366 if stripLeftWhitespace { 367 token = trimWhitespace(token) 368 } 369 return trimComments(token), d.possibleParserDirective(string(token)) 370 }