github.com/kobeld/docker@v1.12.0-rc1/builder/dockerfile/parser/parser.go (about) 1 // Package parser implements a parser and parse tree dumper for Dockerfiles. 2 package parser 3 4 import ( 5 "bufio" 6 "bytes" 7 "fmt" 8 "io" 9 "regexp" 10 "strings" 11 "unicode" 12 13 "github.com/docker/docker/builder/dockerfile/command" 14 ) 15 16 // Node is a structure used to represent a parse tree. 17 // 18 // In the node there are three fields, Value, Next, and Children. Value is the 19 // current token's string value. Next is always the next non-child token, and 20 // children contains all the children. Here's an example: 21 // 22 // (value next (child child-next child-next-next) next-next) 23 // 24 // This data structure is frankly pretty lousy for handling complex languages, 25 // but lucky for us the Dockerfile isn't very complicated. This structure 26 // works a little more effectively than a "proper" parse tree for our needs. 27 // 28 type Node struct { 29 Value string // actual content 30 Next *Node // the next item in the current sexp 31 Children []*Node // the children of this sexp 32 Attributes map[string]bool // special attributes for this node 33 Original string // original line used before parsing 34 Flags []string // only top Node should have this set 35 StartLine int // the line in the original dockerfile where the node begins 36 EndLine int // the line in the original dockerfile where the node ends 37 } 38 39 var ( 40 dispatch map[string]func(string) (*Node, map[string]bool, error) 41 tokenWhitespace = regexp.MustCompile(`[\t\v\f\r ]+`) 42 tokenLineContinuation *regexp.Regexp 43 tokenEscape rune 44 tokenEscapeCommand = regexp.MustCompile(`^#[ \t]*escape[ \t]*=[ \t]*(?P<escapechar>.).*$`) 45 tokenComment = regexp.MustCompile(`^#.*$`) 46 lookingForDirectives bool 47 directiveEscapeSeen bool 48 ) 49 50 const defaultTokenEscape = "\\" 51 52 // setTokenEscape sets the default token for escaping characters in a Dockerfile. 53 func setTokenEscape(s string) error { 54 if s != "`" && s != "\\" { 55 return fmt.Errorf("invalid ESCAPE '%s'. Must be ` or \\", s) 56 } 57 tokenEscape = rune(s[0]) 58 tokenLineContinuation = regexp.MustCompile(`\` + s + `[ \t]*$`) 59 return nil 60 } 61 62 func init() { 63 // Dispatch Table. see line_parsers.go for the parse functions. 64 // The command is parsed and mapped to the line parser. The line parser 65 // receives the arguments but not the command, and returns an AST after 66 // reformulating the arguments according to the rules in the parser 67 // functions. Errors are propagated up by Parse() and the resulting AST can 68 // be incorporated directly into the existing AST as a next. 69 dispatch = map[string]func(string) (*Node, map[string]bool, error){ 70 command.Add: parseMaybeJSONToList, 71 command.Arg: parseNameOrNameVal, 72 command.Cmd: parseMaybeJSON, 73 command.Copy: parseMaybeJSONToList, 74 command.Entrypoint: parseMaybeJSON, 75 command.Env: parseEnv, 76 command.Expose: parseStringsWhitespaceDelimited, 77 command.From: parseString, 78 command.Healthcheck: parseHealthConfig, 79 command.Label: parseLabel, 80 command.Maintainer: parseString, 81 command.Onbuild: parseSubCommand, 82 command.Run: parseMaybeJSON, 83 command.Shell: parseMaybeJSON, 84 command.StopSignal: parseString, 85 command.User: parseString, 86 command.Volume: parseMaybeJSONToList, 87 command.Workdir: parseString, 88 } 89 } 90 91 // ParseLine parse a line and return the remainder. 92 func ParseLine(line string) (string, *Node, error) { 93 94 // Handle the parser directive '# escape=<char>. Parser directives must precede 95 // any builder instruction or other comments, and cannot be repeated. 96 if lookingForDirectives { 97 tecMatch := tokenEscapeCommand.FindStringSubmatch(strings.ToLower(line)) 98 if len(tecMatch) > 0 { 99 if directiveEscapeSeen == true { 100 return "", nil, fmt.Errorf("only one escape parser directive can be used") 101 } 102 for i, n := range tokenEscapeCommand.SubexpNames() { 103 if n == "escapechar" { 104 if err := setTokenEscape(tecMatch[i]); err != nil { 105 return "", nil, err 106 } 107 directiveEscapeSeen = true 108 return "", nil, nil 109 } 110 } 111 } 112 } 113 114 lookingForDirectives = false 115 116 if line = stripComments(line); line == "" { 117 return "", nil, nil 118 } 119 120 if tokenLineContinuation.MatchString(line) { 121 line = tokenLineContinuation.ReplaceAllString(line, "") 122 return line, nil, nil 123 } 124 125 cmd, flags, args, err := splitCommand(line) 126 if err != nil { 127 return "", nil, err 128 } 129 130 node := &Node{} 131 node.Value = cmd 132 133 sexp, attrs, err := fullDispatch(cmd, args) 134 if err != nil { 135 return "", nil, err 136 } 137 138 node.Next = sexp 139 node.Attributes = attrs 140 node.Original = line 141 node.Flags = flags 142 143 return "", node, nil 144 } 145 146 // Parse is the main parse routine. 147 // It handles an io.ReadWriteCloser and returns the root of the AST. 148 func Parse(rwc io.Reader) (*Node, error) { 149 directiveEscapeSeen = false 150 lookingForDirectives = true 151 setTokenEscape(defaultTokenEscape) // Assume the default token for escape 152 currentLine := 0 153 root := &Node{} 154 root.StartLine = -1 155 scanner := bufio.NewScanner(rwc) 156 157 utf8bom := []byte{0xEF, 0xBB, 0xBF} 158 for scanner.Scan() { 159 scannedBytes := scanner.Bytes() 160 // We trim UTF8 BOM 161 if currentLine == 0 { 162 scannedBytes = bytes.TrimPrefix(scannedBytes, utf8bom) 163 } 164 scannedLine := strings.TrimLeftFunc(string(scannedBytes), unicode.IsSpace) 165 currentLine++ 166 line, child, err := ParseLine(scannedLine) 167 if err != nil { 168 return nil, err 169 } 170 startLine := currentLine 171 172 if line != "" && child == nil { 173 for scanner.Scan() { 174 newline := scanner.Text() 175 currentLine++ 176 177 if stripComments(strings.TrimSpace(newline)) == "" { 178 continue 179 } 180 181 line, child, err = ParseLine(line + newline) 182 if err != nil { 183 return nil, err 184 } 185 186 if child != nil { 187 break 188 } 189 } 190 if child == nil && line != "" { 191 _, child, err = ParseLine(line) 192 if err != nil { 193 return nil, err 194 } 195 } 196 } 197 198 if child != nil { 199 // Update the line information for the current child. 200 child.StartLine = startLine 201 child.EndLine = currentLine 202 // Update the line information for the root. The starting line of the root is always the 203 // starting line of the first child and the ending line is the ending line of the last child. 204 if root.StartLine < 0 { 205 root.StartLine = currentLine 206 } 207 root.EndLine = currentLine 208 root.Children = append(root.Children, child) 209 } 210 } 211 212 return root, nil 213 }