github.com/vieux/docker@v0.6.3-0.20161004191708-e097c2a938c7/builder/dockerfile/parser/parser.go (about) 1 // Package parser implements a parser and parse tree dumper for Dockerfiles. 2 package parser 3 4 import ( 5 "bufio" 6 "bytes" 7 "fmt" 8 "io" 9 "regexp" 10 "strings" 11 "unicode" 12 13 "github.com/docker/docker/builder/dockerfile/command" 14 ) 15 16 // Node is a structure used to represent a parse tree. 17 // 18 // In the node there are three fields, Value, Next, and Children. Value is the 19 // current token's string value. Next is always the next non-child token, and 20 // children contains all the children. Here's an example: 21 // 22 // (value next (child child-next child-next-next) next-next) 23 // 24 // This data structure is frankly pretty lousy for handling complex languages, 25 // but lucky for us the Dockerfile isn't very complicated. This structure 26 // works a little more effectively than a "proper" parse tree for our needs. 27 // 28 type Node struct { 29 Value string // actual content 30 Next *Node // the next item in the current sexp 31 Children []*Node // the children of this sexp 32 Attributes map[string]bool // special attributes for this node 33 Original string // original line used before parsing 34 Flags []string // only top Node should have this set 35 StartLine int // the line in the original dockerfile where the node begins 36 EndLine int // the line in the original dockerfile where the node ends 37 } 38 39 // Directive is the structure used during a build run to hold the state of 40 // parsing directives. 41 type Directive struct { 42 EscapeToken rune // Current escape token 43 LineContinuationRegex *regexp.Regexp // Current line contination regex 44 LookingForDirectives bool // Whether we are currently looking for directives 45 EscapeSeen bool // Whether the escape directive has been seen 46 } 47 48 var ( 49 dispatch map[string]func(string, *Directive) (*Node, map[string]bool, error) 50 tokenWhitespace = regexp.MustCompile(`[\t\v\f\r ]+`) 51 tokenEscapeCommand = regexp.MustCompile(`^#[ \t]*escape[ \t]*=[ \t]*(?P<escapechar>.).*$`) 52 tokenComment = regexp.MustCompile(`^#.*$`) 53 ) 54 55 // DefaultEscapeToken is the default escape token 56 const DefaultEscapeToken = "\\" 57 58 // SetEscapeToken sets the default token for escaping characters in a Dockerfile. 59 func SetEscapeToken(s string, d *Directive) error { 60 if s != "`" && s != "\\" { 61 return fmt.Errorf("invalid ESCAPE '%s'. Must be ` or \\", s) 62 } 63 d.EscapeToken = rune(s[0]) 64 d.LineContinuationRegex = regexp.MustCompile(`\` + s + `[ \t]*$`) 65 return nil 66 } 67 68 func init() { 69 // Dispatch Table. see line_parsers.go for the parse functions. 70 // The command is parsed and mapped to the line parser. The line parser 71 // receives the arguments but not the command, and returns an AST after 72 // reformulating the arguments according to the rules in the parser 73 // functions. Errors are propagated up by Parse() and the resulting AST can 74 // be incorporated directly into the existing AST as a next. 75 dispatch = map[string]func(string, *Directive) (*Node, map[string]bool, error){ 76 command.Add: parseMaybeJSONToList, 77 command.Arg: parseNameOrNameVal, 78 command.Cmd: parseMaybeJSON, 79 command.Copy: parseMaybeJSONToList, 80 command.Entrypoint: parseMaybeJSON, 81 command.Env: parseEnv, 82 command.Expose: parseStringsWhitespaceDelimited, 83 command.From: parseString, 84 command.Healthcheck: parseHealthConfig, 85 command.Label: parseLabel, 86 command.Maintainer: parseString, 87 command.Onbuild: parseSubCommand, 88 command.Run: parseMaybeJSON, 89 command.Shell: parseMaybeJSON, 90 command.StopSignal: parseString, 91 command.User: parseString, 92 command.Volume: parseMaybeJSONToList, 93 command.Workdir: parseString, 94 } 95 } 96 97 // ParseLine parses a line and returns the remainder. 98 func ParseLine(line string, d *Directive) (string, *Node, error) { 99 // Handle the parser directive '# escape=<char>. Parser directives must precede 100 // any builder instruction or other comments, and cannot be repeated. 101 if d.LookingForDirectives { 102 tecMatch := tokenEscapeCommand.FindStringSubmatch(strings.ToLower(line)) 103 if len(tecMatch) > 0 { 104 if d.EscapeSeen == true { 105 return "", nil, fmt.Errorf("only one escape parser directive can be used") 106 } 107 for i, n := range tokenEscapeCommand.SubexpNames() { 108 if n == "escapechar" { 109 if err := SetEscapeToken(tecMatch[i], d); err != nil { 110 return "", nil, err 111 } 112 d.EscapeSeen = true 113 return "", nil, nil 114 } 115 } 116 } 117 } 118 119 d.LookingForDirectives = false 120 121 if line = stripComments(line); line == "" { 122 return "", nil, nil 123 } 124 125 if d.LineContinuationRegex.MatchString(line) { 126 line = d.LineContinuationRegex.ReplaceAllString(line, "") 127 return line, nil, nil 128 } 129 130 cmd, flags, args, err := splitCommand(line) 131 if err != nil { 132 return "", nil, err 133 } 134 135 node := &Node{} 136 node.Value = cmd 137 138 sexp, attrs, err := fullDispatch(cmd, args, d) 139 if err != nil { 140 return "", nil, err 141 } 142 143 node.Next = sexp 144 node.Attributes = attrs 145 node.Original = line 146 node.Flags = flags 147 148 return "", node, nil 149 } 150 151 // Parse is the main parse routine. 152 // It handles an io.ReadWriteCloser and returns the root of the AST. 153 func Parse(rwc io.Reader, d *Directive) (*Node, error) { 154 currentLine := 0 155 root := &Node{} 156 root.StartLine = -1 157 scanner := bufio.NewScanner(rwc) 158 159 utf8bom := []byte{0xEF, 0xBB, 0xBF} 160 for scanner.Scan() { 161 scannedBytes := scanner.Bytes() 162 // We trim UTF8 BOM 163 if currentLine == 0 { 164 scannedBytes = bytes.TrimPrefix(scannedBytes, utf8bom) 165 } 166 scannedLine := strings.TrimLeftFunc(string(scannedBytes), unicode.IsSpace) 167 currentLine++ 168 line, child, err := ParseLine(scannedLine, d) 169 if err != nil { 170 return nil, err 171 } 172 startLine := currentLine 173 174 if line != "" && child == nil { 175 for scanner.Scan() { 176 newline := scanner.Text() 177 currentLine++ 178 179 if stripComments(strings.TrimSpace(newline)) == "" { 180 continue 181 } 182 183 line, child, err = ParseLine(line+newline, d) 184 if err != nil { 185 return nil, err 186 } 187 188 if child != nil { 189 break 190 } 191 } 192 if child == nil && line != "" { 193 _, child, err = ParseLine(line, d) 194 if err != nil { 195 return nil, err 196 } 197 } 198 } 199 200 if child != nil { 201 // Update the line information for the current child. 202 child.StartLine = startLine 203 child.EndLine = currentLine 204 // Update the line information for the root. The starting line of the root is always the 205 // starting line of the first child and the ending line is the ending line of the last child. 206 if root.StartLine < 0 { 207 root.StartLine = currentLine 208 } 209 root.EndLine = currentLine 210 root.Children = append(root.Children, child) 211 } 212 } 213 214 return root, nil 215 }