github.com/inturn/pre-commit-gobuild@v1.0.12/internal/dockerfile/parser.go (about)

     1  package parser
     2  
     3  import (
     4  	"bufio"
     5  	"bytes"
     6  	"fmt"
     7  	"io"
     8  	"regexp"
     9  	"strings"
    10  	"unicode"
    11  
    12  	"github.com/inturn/pre-commit-gobuild/internal/dockerfile/command"
    13  )
    14  
    15  // Node is a structure used to represent a parse tree.
    16  //
    17  // In the node there are three fields, Value, Next, and Children. Value is the
    18  // current token's string value. Next is always the next non-child token, and
    19  // children contains all the children. Here's an example:
    20  //
    21  // (value next (child child-next child-next-next) next-next)
    22  //
    23  // This data structure is frankly pretty lousy for handling complex languages,
    24  // but lucky for us the Dockerfile isn't very complicated. This structure
    25  // works a little more effectively than a "proper" parse tree for our needs.
    26  //
    27  type Node struct {
    28  	Value      string          // actual content
    29  	Next       *Node           // the next item in the current sexp
    30  	Children   []*Node         // the children of this sexp
    31  	Attributes map[string]bool // special attributes for this node
    32  	Original   string          // original line used before parsing
    33  	Flags      []string        // only top Node should have this set
    34  	StartLine  int             // the line in the original dockerfile where the node begins
    35  	EndLine    int             // the line in the original dockerfile where the node ends
    36  }
    37  
    38  // Directive is the structure used during a build run to hold the state of
    39  // parsing directives.
    40  type Directive struct {
    41  	EscapeToken           rune           // Current escape token
    42  	LineContinuationRegex *regexp.Regexp // Current line contination regex
    43  	LookingForDirectives  bool           // Whether we are currently looking for directives
    44  	EscapeSeen            bool           // Whether the escape directive has been seen
    45  }
    46  
    47  var (
    48  	dispatch           map[string]func(string, *Directive) (*Node, map[string]bool, error)
    49  	tokenWhitespace    = regexp.MustCompile(`[\t\v\f\r ]+`)
    50  	tokenEscapeCommand = regexp.MustCompile(`^#[ \t]*escape[ \t]*=[ \t]*(?P<escapechar>.).*$`)
    51  	tokenComment       = regexp.MustCompile(`^#.*$`)
    52  )
    53  
    54  // DefaultEscapeToken is the default escape token
    55  const DefaultEscapeToken = "\\"
    56  
    57  // SetEscapeToken sets the default token for escaping characters in a Dockerfile.
    58  func SetEscapeToken(s string, d *Directive) error {
    59  	if s != "`" && s != "\\" {
    60  		return fmt.Errorf("invalid ESCAPE '%s'. Must be ` or \\", s)
    61  	}
    62  	d.EscapeToken = rune(s[0])
    63  	d.LineContinuationRegex = regexp.MustCompile(`\` + s + `[ \t]*$`)
    64  	return nil
    65  }
    66  
    67  func init() {
    68  	// Dispatch Table. see line_parsers.go for the parse functions.
    69  	// The command is parsed and mapped to the line parser. The line parser
    70  	// receives the arguments but not the command, and returns an AST after
    71  	// reformulating the arguments according to the rules in the parser
    72  	// functions. Errors are propagated up by Parse() and the resulting AST can
    73  	// be incorporated directly into the existing AST as a next.
    74  	dispatch = map[string]func(string, *Directive) (*Node, map[string]bool, error){
    75  		command.Add:         parseMaybeJSONToList,
    76  		command.Arg:         parseNameOrNameVal,
    77  		command.Cmd:         parseMaybeJSON,
    78  		command.Copy:        parseMaybeJSONToList,
    79  		command.Entrypoint:  parseMaybeJSON,
    80  		command.Env:         parseEnv,
    81  		command.Expose:      parseStringsWhitespaceDelimited,
    82  		command.From:        parseString,
    83  		command.Healthcheck: parseHealthConfig,
    84  		command.Label:       parseLabel,
    85  		command.Maintainer:  parseString,
    86  		command.Onbuild:     parseSubCommand,
    87  		command.Run:         parseMaybeJSON,
    88  		command.Shell:       parseMaybeJSON,
    89  		command.StopSignal:  parseString,
    90  		command.User:        parseString,
    91  		command.Volume:      parseMaybeJSONToList,
    92  		command.Workdir:     parseString,
    93  	}
    94  }
    95  
    96  // ParseLine parses a line and returns the remainder.
    97  func ParseLine(line string, d *Directive, ignoreCont bool) (string, *Node, error) {
    98  	// Handle the parser directive '# escape=<char>. Parser directives must precede
    99  	// any builder instruction or other comments, and cannot be repeated.
   100  	if d.LookingForDirectives {
   101  		tecMatch := tokenEscapeCommand.FindStringSubmatch(strings.ToLower(line))
   102  		if len(tecMatch) > 0 {
   103  			if d.EscapeSeen == true {
   104  				return "", nil, fmt.Errorf("only one escape parser directive can be used")
   105  			}
   106  			for i, n := range tokenEscapeCommand.SubexpNames() {
   107  				if n == "escapechar" {
   108  					if err := SetEscapeToken(tecMatch[i], d); err != nil {
   109  						return "", nil, err
   110  					}
   111  					d.EscapeSeen = true
   112  					return "", nil, nil
   113  				}
   114  			}
   115  		}
   116  	}
   117  
   118  	d.LookingForDirectives = false
   119  
   120  	if line = stripComments(line); line == "" {
   121  		return "", nil, nil
   122  	}
   123  
   124  	if !ignoreCont && d.LineContinuationRegex.MatchString(line) {
   125  		line = d.LineContinuationRegex.ReplaceAllString(line, "")
   126  		return line, nil, nil
   127  	}
   128  
   129  	cmd, flags, args, err := splitCommand(line)
   130  	if err != nil {
   131  		return "", nil, err
   132  	}
   133  
   134  	node := &Node{}
   135  	node.Value = cmd
   136  
   137  	sexp, attrs, err := fullDispatch(cmd, args, d)
   138  	if err != nil {
   139  		return "", nil, err
   140  	}
   141  
   142  	node.Next = sexp
   143  	node.Attributes = attrs
   144  	node.Original = line
   145  	node.Flags = flags
   146  
   147  	return "", node, nil
   148  }
   149  
   150  // Parse is the main parse routine.
   151  // It handles an io.ReadWriteCloser and returns the root of the AST.
   152  func Parse(rwc io.Reader, d *Directive) (*Node, error) {
   153  	currentLine := 0
   154  	root := &Node{}
   155  	root.StartLine = -1
   156  	scanner := bufio.NewScanner(rwc)
   157  
   158  	utf8bom := []byte{0xEF, 0xBB, 0xBF}
   159  	for scanner.Scan() {
   160  		scannedBytes := scanner.Bytes()
   161  		// We trim UTF8 BOM
   162  		if currentLine == 0 {
   163  			scannedBytes = bytes.TrimPrefix(scannedBytes, utf8bom)
   164  		}
   165  		scannedLine := strings.TrimLeftFunc(string(scannedBytes), unicode.IsSpace)
   166  		currentLine++
   167  		line, child, err := ParseLine(scannedLine, d, false)
   168  		if err != nil {
   169  			return nil, err
   170  		}
   171  		startLine := currentLine
   172  
   173  		if line != "" && child == nil {
   174  			for scanner.Scan() {
   175  				newline := scanner.Text()
   176  				currentLine++
   177  
   178  				if stripComments(strings.TrimSpace(newline)) == "" {
   179  					continue
   180  				}
   181  
   182  				line, child, err = ParseLine(line+newline, d, false)
   183  				if err != nil {
   184  					return nil, err
   185  				}
   186  
   187  				if child != nil {
   188  					break
   189  				}
   190  			}
   191  			if child == nil && line != "" {
   192  				// When we call ParseLine we'll pass in 'true' for
   193  				// the ignoreCont param if we're at the EOF. This will
   194  				// prevent the func from returning immediately w/o
   195  				// parsing the line thinking that there's more input
   196  				// to come.
   197  
   198  				_, child, err = ParseLine(line, d, scanner.Err() == nil)
   199  				if err != nil {
   200  					return nil, err
   201  				}
   202  			}
   203  		}
   204  
   205  		if child != nil {
   206  			// Update the line information for the current child.
   207  			child.StartLine = startLine
   208  			child.EndLine = currentLine
   209  			// Update the line information for the root. The starting line of the root is always the
   210  			// starting line of the first child and the ending line is the ending line of the last child.
   211  			if root.StartLine < 0 {
   212  				root.StartLine = currentLine
   213  			}
   214  			root.EndLine = currentLine
   215  			root.Children = append(root.Children, child)
   216  		}
   217  	}
   218  
   219  	return root, nil
   220  }