github.com/sealerio/sealer@v0.11.1-0.20240507115618-f4f89c5853ae/build/kubefile/parser/parse.go (about)

     1  // Copyright © 2022 Alibaba Group Holding Ltd.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package parser implements a parser and parse tree dumper for Dockerfiles.
    16  package parser
    17  
    18  import (
    19  	"bufio"
    20  	"bytes"
    21  	"fmt"
    22  	"io"
    23  	"regexp"
    24  	"runtime"
    25  	"strconv"
    26  	"strings"
    27  	"unicode"
    28  
    29  	"github.com/docker/docker/pkg/system"
    30  	"github.com/pkg/errors"
    31  
    32  	"github.com/sealerio/sealer/build/kubefile/command"
    33  )
    34  
    35  // Node is a structure used to represent a parse tree.
    36  //
    37  // In the node there are three fields, Value, Next, and Children. Value is the
    38  // current token's string value. Next is always the next non-child token, and
    39  // children contains all the children. Here's an example:
    40  //
    41  // (value next (child child-next child-next-next) next-next)
    42  //
    43  // This data structure is frankly pretty lousy for handling complex languages,
    44  // but lucky for us the Dockerfile isn't very complicated. This structure
    45  // works a little more effectively than a "proper" parse tree for our needs.
    46  type Node struct {
    47  	Value      string          // actual content
    48  	Next       *Node           // the next item in the current sexp
    49  	Children   []*Node         // the children of this sexp
    50  	Attributes map[string]bool // special attributes for this node
    51  	Original   string          // original line used before parsing
    52  	Flags      []string        // only top Node should have this set
    53  	StartLine  int             // the line in the original dockerfile where the node begins
    54  	EndLine    int             // the line in the original dockerfile where the node ends
    55  }
    56  
    57  // Dump dumps the AST defined by `node` as a list of sexps.
    58  // Returns a string suitable for printing.
    59  func (node *Node) Dump() string {
    60  	str := ""
    61  	str += node.Value
    62  
    63  	if len(node.Flags) > 0 {
    64  		str += fmt.Sprintf(" %q", node.Flags)
    65  	}
    66  
    67  	for _, n := range node.Children {
    68  		str += "(" + n.Dump() + ")\n"
    69  	}
    70  
    71  	for n := node.Next; n != nil; n = n.Next {
    72  		if len(n.Children) > 0 {
    73  			str += " " + n.Dump()
    74  		} else {
    75  			str += " " + strconv.Quote(n.Value)
    76  		}
    77  	}
    78  
    79  	return strings.TrimSpace(str)
    80  }
    81  
    82  func (node *Node) lines(start, end int) {
    83  	node.StartLine = start
    84  	node.EndLine = end
    85  }
    86  
    87  // AddChild adds a new child node, and updates line information
    88  func (node *Node) AddChild(child *Node, startLine, endLine int) {
    89  	child.lines(startLine, endLine)
    90  	if node.StartLine < 0 {
    91  		node.StartLine = startLine
    92  	}
    93  	node.EndLine = endLine
    94  	node.Children = append(node.Children, child)
    95  }
    96  
    97  var (
    98  	dispatch             map[string]func(string, *Directive) (*Node, map[string]bool, error)
    99  	tokenWhitespace      = regexp.MustCompile(`[\t\v\f\r ]+`)
   100  	tokenEscapeCommand   = regexp.MustCompile(`^#[ \t]*escape[ \t]*=[ \t]*(?P<escapechar>.).*$`)
   101  	tokenPlatformCommand = regexp.MustCompile(`^#[ \t]*platform[ \t]*=[ \t]*(?P<platform>.*)$`)
   102  	tokenComment         = regexp.MustCompile(`^#.*$`)
   103  )
   104  
   105  // DefaultEscapeToken is the default escape token
   106  const DefaultEscapeToken = '\\'
   107  
   108  // defaultPlatformToken is the platform assumed for the build if not explicitly provided
   109  var defaultPlatformToken = runtime.GOOS
   110  
   111  // Directive is the structure used during a build run to hold the state of
   112  // parsing directives.
   113  type Directive struct {
   114  	escapeToken           rune           // Current escape token
   115  	platformToken         string         // Current platform token
   116  	lineContinuationRegex *regexp.Regexp // Current line continuation regex
   117  	processingComplete    bool           // Whether we are done looking for directives
   118  	escapeSeen            bool           // Whether the escape directive has been seen
   119  	platformSeen          bool           // Whether the platform directive has been seen
   120  }
   121  
   122  // setEscapeToken sets the default token for escaping characters in a Dockerfile.
   123  func (d *Directive) setEscapeToken(s string) error {
   124  	if s != "`" && s != "\\" {
   125  		return fmt.Errorf("invalid ESCAPE '%s'. Must be ` or \\", s)
   126  	}
   127  	d.escapeToken = rune(s[0])
   128  	d.lineContinuationRegex = regexp.MustCompile(`\` + s + `[ \t]*$`)
   129  	return nil
   130  }
   131  
   132  // setPlatformToken sets the default platform for pulling images in a Dockerfile.
   133  func (d *Directive) setPlatformToken(s string) error {
   134  	s = strings.ToLower(s)
   135  	valid := []string{runtime.GOOS}
   136  	if system.LCOWSupported() {
   137  		valid = append(valid, "linux")
   138  	}
   139  	for _, item := range valid {
   140  		if s == item {
   141  			d.platformToken = s
   142  			return nil
   143  		}
   144  	}
   145  	return fmt.Errorf("invalid PLATFORM '%s'. Must be one of %v", s, valid)
   146  }
   147  
   148  // possibleParserDirective looks for one or more parser directives '# escapeToken=<char>' and
   149  // '# platform=<string>'. Parser directives must precede any builder instruction
   150  // or other comments, and cannot be repeated.
   151  func (d *Directive) possibleParserDirective(line string) error {
   152  	if d.processingComplete {
   153  		return nil
   154  	}
   155  
   156  	tecMatch := tokenEscapeCommand.FindStringSubmatch(strings.ToLower(line))
   157  	if len(tecMatch) != 0 {
   158  		for i, n := range tokenEscapeCommand.SubexpNames() {
   159  			if n == "escapechar" {
   160  				if d.escapeSeen {
   161  					return errors.New("only one escape parser directive can be used")
   162  				}
   163  				d.escapeSeen = true
   164  				return d.setEscapeToken(tecMatch[i])
   165  			}
   166  		}
   167  	}
   168  
   169  	// TODO @jhowardmsft LCOW Support: Eventually this check can be removed,
   170  	// but only recognise a platform token if running in LCOW mode.
   171  	if system.LCOWSupported() {
   172  		tpcMatch := tokenPlatformCommand.FindStringSubmatch(strings.ToLower(line))
   173  		if len(tpcMatch) != 0 {
   174  			for i, n := range tokenPlatformCommand.SubexpNames() {
   175  				if n == "platform" {
   176  					if d.platformSeen {
   177  						return errors.New("only one platform parser directive can be used")
   178  					}
   179  					d.platformSeen = true
   180  					return d.setPlatformToken(tpcMatch[i])
   181  				}
   182  			}
   183  		}
   184  	}
   185  
   186  	d.processingComplete = true
   187  	return nil
   188  }
   189  
   190  // NewDefaultDirective returns a new Directive with the default escapeToken token
   191  func NewDefaultDirective() *Directive {
   192  	directive := Directive{}
   193  	// #nosec
   194  	_ = directive.setEscapeToken(string(DefaultEscapeToken))
   195  	// #nosec
   196  	_ = directive.setPlatformToken(defaultPlatformToken)
   197  	return &directive
   198  }
   199  
   200  func init() {
   201  	// Dispatch Table. see line_parsers.go for the parse functions.
   202  	// The command is parsed and mapped to the line parser. The line parser
   203  	// receives the arguments but not the command, and returns an AST after
   204  	// reformulating the arguments according to the rules in the parser
   205  	// functions. Errors are propagated up by Parse() and the resulting AST can
   206  	// be incorporated directly into the existing AST as a next.
   207  	dispatch = map[string]func(string, *Directive) (*Node, map[string]bool, error){
   208  		command.Add:         parseMaybeJSONToList,
   209  		command.Arg:         parseNameOrNameVal,
   210  		command.Copy:        parseMaybeJSONToList,
   211  		command.From:        parseStringsWhitespaceDelimited,
   212  		command.Label:       parseLabel,
   213  		command.Maintainer:  parseString,
   214  		command.Run:         parseMaybeJSON,
   215  		command.App:         parseMaybeJSONToList,
   216  		command.AppCmds:     parseMaybeJSONToList,
   217  		command.Env:         parseNameOrNameVal,
   218  		command.AppEnv:      parseMaybeJSONToList,
   219  		command.KUBEVERSION: parseString,
   220  		command.CNI:         parseMaybeJSONToList,
   221  		command.CSI:         parseMaybeJSONToList,
   222  		command.Launch:      parseMaybeJSONToList,
   223  		command.Cmds:        parseMaybeJSONToList,
   224  		command.Cmd:         parseMaybeJSONToList,
   225  	}
   226  }
   227  
   228  // newNodeFromLine splits the line into parts, and dispatches to a function
   229  // based on the command and command arguments. A Node is created from the
   230  // result of the dispatch.
   231  func newNodeFromLine(line string, directive *Directive) (*Node, error) {
   232  	cmd, flags, args, err := splitCommand(line)
   233  	if err != nil {
   234  		return nil, err
   235  	}
   236  
   237  	fn := dispatch[cmd]
   238  	// Ignore invalid Dockerfile instructions
   239  	if fn == nil {
   240  		fn = parseIgnore
   241  	}
   242  	next, attrs, err := fn(args, directive)
   243  	if err != nil {
   244  		return nil, err
   245  	}
   246  
   247  	return &Node{
   248  		Value:      cmd,
   249  		Original:   line,
   250  		Flags:      flags,
   251  		Next:       next,
   252  		Attributes: attrs,
   253  	}, nil
   254  }
   255  
   256  // Result is the result of parsing a Dockerfile
   257  type Result struct {
   258  	AST         *Node
   259  	EscapeToken rune
   260  	Platform    string
   261  	Warnings    []string
   262  }
   263  
   264  // PrintWarnings to the writer
   265  func (r *Result) PrintWarnings(out io.Writer) {
   266  	if len(r.Warnings) == 0 {
   267  		return
   268  	}
   269  	fmt.Fprintf(out, strings.Join(r.Warnings, "\n")+"\n")
   270  }
   271  
   272  // Parse reads lines from a Reader, parses the lines into an AST and returns
   273  // the AST and escape token
   274  func parse(rwc io.Reader) (*Result, error) {
   275  	d := NewDefaultDirective()
   276  	currentLine := 0
   277  	root := &Node{StartLine: -1}
   278  	scanner := bufio.NewScanner(rwc)
   279  	warnings := []string{}
   280  
   281  	var err error
   282  	for scanner.Scan() {
   283  		bytesRead := scanner.Bytes()
   284  		if currentLine == 0 {
   285  			// First line, strip the byte-order-marker if present
   286  			bytesRead = bytes.TrimPrefix(bytesRead, utf8bom)
   287  		}
   288  		bytesRead, err = processLine(d, bytesRead, true)
   289  		if err != nil {
   290  			return nil, err
   291  		}
   292  		currentLine++
   293  
   294  		startLine := currentLine
   295  		line, isEndOfLine := trimContinuationCharacter(string(bytesRead), d)
   296  		if isEndOfLine && line == "" {
   297  			continue
   298  		}
   299  
   300  		var hasEmptyContinuationLine bool
   301  		for !isEndOfLine && scanner.Scan() {
   302  			bytesRead, err := processLine(d, scanner.Bytes(), false)
   303  			if err != nil {
   304  				return nil, err
   305  			}
   306  			currentLine++
   307  
   308  			if isEmptyContinuationLine(bytesRead) {
   309  				hasEmptyContinuationLine = true
   310  				continue
   311  			}
   312  
   313  			continuationLine := string(bytesRead)
   314  			continuationLine, isEndOfLine = trimContinuationCharacter(continuationLine, d)
   315  			line += continuationLine
   316  		}
   317  
   318  		if hasEmptyContinuationLine {
   319  			warning := "[WARNING]: Empty continuation line found in:\n    " + line
   320  			warnings = append(warnings, warning)
   321  		}
   322  
   323  		child, err := newNodeFromLine(line, d)
   324  		if err != nil {
   325  			return nil, err
   326  		}
   327  		root.AddChild(child, startLine, currentLine)
   328  	}
   329  
   330  	if len(warnings) > 0 {
   331  		warnings = append(warnings, "[WARNING]: Empty continuation lines will become errors in a future release.")
   332  	}
   333  	return &Result{
   334  		AST:         root,
   335  		Warnings:    warnings,
   336  		EscapeToken: d.escapeToken,
   337  		Platform:    d.platformToken,
   338  	}, nil
   339  }
   340  
   341  func trimComments(src []byte) []byte {
   342  	return tokenComment.ReplaceAll(src, []byte{})
   343  }
   344  
   345  func trimWhitespace(src []byte) []byte {
   346  	return bytes.TrimLeftFunc(src, unicode.IsSpace)
   347  }
   348  
   349  func isEmptyContinuationLine(line []byte) bool {
   350  	return len(trimComments(trimWhitespace(line))) == 0
   351  }
   352  
   353  var utf8bom = []byte{0xEF, 0xBB, 0xBF}
   354  
   355  func trimContinuationCharacter(line string, d *Directive) (string, bool) {
   356  	if d.lineContinuationRegex.MatchString(line) {
   357  		line = d.lineContinuationRegex.ReplaceAllString(line, "")
   358  		return line, false
   359  	}
   360  	return line, true
   361  }
   362  
   363  // TODO: remove stripLeftWhitespace after deprecation period. It seems silly
   364  // to preserve whitespace on continuation lines. Why is that done?
   365  func processLine(d *Directive, token []byte, stripLeftWhitespace bool) ([]byte, error) {
   366  	if stripLeftWhitespace {
   367  		token = trimWhitespace(token)
   368  	}
   369  	return trimComments(token), d.possibleParserDirective(string(token))
   370  }