github.com/inturn/pre-commit-gobuild@v1.0.12/internal/dockerfile/parser.go (about) 1 package parser 2 3 import ( 4 "bufio" 5 "bytes" 6 "fmt" 7 "io" 8 "regexp" 9 "strings" 10 "unicode" 11 12 "github.com/inturn/pre-commit-gobuild/internal/dockerfile/command" 13 ) 14 15 // Node is a structure used to represent a parse tree. 16 // 17 // In the node there are three fields, Value, Next, and Children. Value is the 18 // current token's string value. Next is always the next non-child token, and 19 // children contains all the children. Here's an example: 20 // 21 // (value next (child child-next child-next-next) next-next) 22 // 23 // This data structure is frankly pretty lousy for handling complex languages, 24 // but lucky for us the Dockerfile isn't very complicated. This structure 25 // works a little more effectively than a "proper" parse tree for our needs. 26 // 27 type Node struct { 28 Value string // actual content 29 Next *Node // the next item in the current sexp 30 Children []*Node // the children of this sexp 31 Attributes map[string]bool // special attributes for this node 32 Original string // original line used before parsing 33 Flags []string // only top Node should have this set 34 StartLine int // the line in the original dockerfile where the node begins 35 EndLine int // the line in the original dockerfile where the node ends 36 } 37 38 // Directive is the structure used during a build run to hold the state of 39 // parsing directives. 40 type Directive struct { 41 EscapeToken rune // Current escape token 42 LineContinuationRegex *regexp.Regexp // Current line contination regex 43 LookingForDirectives bool // Whether we are currently looking for directives 44 EscapeSeen bool // Whether the escape directive has been seen 45 } 46 47 var ( 48 dispatch map[string]func(string, *Directive) (*Node, map[string]bool, error) 49 tokenWhitespace = regexp.MustCompile(`[\t\v\f\r ]+`) 50 tokenEscapeCommand = regexp.MustCompile(`^#[ \t]*escape[ \t]*=[ \t]*(?P<escapechar>.).*$`) 51 tokenComment = regexp.MustCompile(`^#.*$`) 52 ) 53 54 // DefaultEscapeToken is the default escape token 55 const DefaultEscapeToken = "\\" 56 57 // SetEscapeToken sets the default token for escaping characters in a Dockerfile. 58 func SetEscapeToken(s string, d *Directive) error { 59 if s != "`" && s != "\\" { 60 return fmt.Errorf("invalid ESCAPE '%s'. Must be ` or \\", s) 61 } 62 d.EscapeToken = rune(s[0]) 63 d.LineContinuationRegex = regexp.MustCompile(`\` + s + `[ \t]*$`) 64 return nil 65 } 66 67 func init() { 68 // Dispatch Table. see line_parsers.go for the parse functions. 69 // The command is parsed and mapped to the line parser. The line parser 70 // receives the arguments but not the command, and returns an AST after 71 // reformulating the arguments according to the rules in the parser 72 // functions. Errors are propagated up by Parse() and the resulting AST can 73 // be incorporated directly into the existing AST as a next. 74 dispatch = map[string]func(string, *Directive) (*Node, map[string]bool, error){ 75 command.Add: parseMaybeJSONToList, 76 command.Arg: parseNameOrNameVal, 77 command.Cmd: parseMaybeJSON, 78 command.Copy: parseMaybeJSONToList, 79 command.Entrypoint: parseMaybeJSON, 80 command.Env: parseEnv, 81 command.Expose: parseStringsWhitespaceDelimited, 82 command.From: parseString, 83 command.Healthcheck: parseHealthConfig, 84 command.Label: parseLabel, 85 command.Maintainer: parseString, 86 command.Onbuild: parseSubCommand, 87 command.Run: parseMaybeJSON, 88 command.Shell: parseMaybeJSON, 89 command.StopSignal: parseString, 90 command.User: parseString, 91 command.Volume: parseMaybeJSONToList, 92 command.Workdir: parseString, 93 } 94 } 95 96 // ParseLine parses a line and returns the remainder. 97 func ParseLine(line string, d *Directive, ignoreCont bool) (string, *Node, error) { 98 // Handle the parser directive '# escape=<char>. Parser directives must precede 99 // any builder instruction or other comments, and cannot be repeated. 100 if d.LookingForDirectives { 101 tecMatch := tokenEscapeCommand.FindStringSubmatch(strings.ToLower(line)) 102 if len(tecMatch) > 0 { 103 if d.EscapeSeen == true { 104 return "", nil, fmt.Errorf("only one escape parser directive can be used") 105 } 106 for i, n := range tokenEscapeCommand.SubexpNames() { 107 if n == "escapechar" { 108 if err := SetEscapeToken(tecMatch[i], d); err != nil { 109 return "", nil, err 110 } 111 d.EscapeSeen = true 112 return "", nil, nil 113 } 114 } 115 } 116 } 117 118 d.LookingForDirectives = false 119 120 if line = stripComments(line); line == "" { 121 return "", nil, nil 122 } 123 124 if !ignoreCont && d.LineContinuationRegex.MatchString(line) { 125 line = d.LineContinuationRegex.ReplaceAllString(line, "") 126 return line, nil, nil 127 } 128 129 cmd, flags, args, err := splitCommand(line) 130 if err != nil { 131 return "", nil, err 132 } 133 134 node := &Node{} 135 node.Value = cmd 136 137 sexp, attrs, err := fullDispatch(cmd, args, d) 138 if err != nil { 139 return "", nil, err 140 } 141 142 node.Next = sexp 143 node.Attributes = attrs 144 node.Original = line 145 node.Flags = flags 146 147 return "", node, nil 148 } 149 150 // Parse is the main parse routine. 151 // It handles an io.ReadWriteCloser and returns the root of the AST. 152 func Parse(rwc io.Reader, d *Directive) (*Node, error) { 153 currentLine := 0 154 root := &Node{} 155 root.StartLine = -1 156 scanner := bufio.NewScanner(rwc) 157 158 utf8bom := []byte{0xEF, 0xBB, 0xBF} 159 for scanner.Scan() { 160 scannedBytes := scanner.Bytes() 161 // We trim UTF8 BOM 162 if currentLine == 0 { 163 scannedBytes = bytes.TrimPrefix(scannedBytes, utf8bom) 164 } 165 scannedLine := strings.TrimLeftFunc(string(scannedBytes), unicode.IsSpace) 166 currentLine++ 167 line, child, err := ParseLine(scannedLine, d, false) 168 if err != nil { 169 return nil, err 170 } 171 startLine := currentLine 172 173 if line != "" && child == nil { 174 for scanner.Scan() { 175 newline := scanner.Text() 176 currentLine++ 177 178 if stripComments(strings.TrimSpace(newline)) == "" { 179 continue 180 } 181 182 line, child, err = ParseLine(line+newline, d, false) 183 if err != nil { 184 return nil, err 185 } 186 187 if child != nil { 188 break 189 } 190 } 191 if child == nil && line != "" { 192 // When we call ParseLine we'll pass in 'true' for 193 // the ignoreCont param if we're at the EOF. This will 194 // prevent the func from returning immediately w/o 195 // parsing the line thinking that there's more input 196 // to come. 197 198 _, child, err = ParseLine(line, d, scanner.Err() == nil) 199 if err != nil { 200 return nil, err 201 } 202 } 203 } 204 205 if child != nil { 206 // Update the line information for the current child. 207 child.StartLine = startLine 208 child.EndLine = currentLine 209 // Update the line information for the root. The starting line of the root is always the 210 // starting line of the first child and the ending line is the ending line of the last child. 211 if root.StartLine < 0 { 212 root.StartLine = currentLine 213 } 214 root.EndLine = currentLine 215 root.Children = append(root.Children, child) 216 } 217 } 218 219 return root, nil 220 }