github.com/zhuohuang-hust/src-cbuild@v0.0.0-20230105071821-c7aab3e7c840/builder/dockerfile/parser/parser.go (about) 1 // Package parser implements a parser and parse tree dumper for Dockerfiles. 2 package parser 3 4 import ( 5 "bufio" 6 "bytes" 7 "fmt" 8 "io" 9 "regexp" 10 "strings" 11 "unicode" 12 13 "github.com/docker/docker/builder/dockerfile/command" 14 ) 15 16 // Node is a structure used to represent a parse tree. 17 // 18 // In the node there are three fields, Value, Next, and Children. Value is the 19 // current token's string value. Next is always the next non-child token, and 20 // children contains all the children. Here's an example: 21 // 22 // (value next (child child-next child-next-next) next-next) 23 // 24 // This data structure is frankly pretty lousy for handling complex languages, 25 // but lucky for us the Dockerfile isn't very complicated. This structure 26 // works a little more effectively than a "proper" parse tree for our needs. 27 // 28 type Node struct { 29 Value string // actual content 30 Next *Node // the next item in the current sexp 31 Children []*Node // the children of this sexp 32 Attributes map[string]bool // special attributes for this node 33 Original string // original line used before parsing 34 Flags []string // only top Node should have this set 35 StartLine int // the line in the original dockerfile where the node begins 36 EndLine int // the line in the original dockerfile where the node ends 37 } 38 39 // Directive is the structure used during a build run to hold the state of 40 // parsing directives. 41 type Directive struct { 42 EscapeToken rune // Current escape token 43 LineContinuationRegex *regexp.Regexp // Current line contination regex 44 LookingForDirectives bool // Whether we are currently looking for directives 45 EscapeSeen bool // Whether the escape directive has been seen 46 } 47 48 var ( 49 dispatch map[string]func(string, *Directive) (*Node, map[string]bool, error) 50 tokenWhitespace = regexp.MustCompile(`[\t\v\f\r ]+`) 51 tokenEscapeCommand = regexp.MustCompile(`^#[ \t]*escape[ \t]*=[ \t]*(?P<escapechar>.).*$`) 52 tokenComment = regexp.MustCompile(`^#.*$`) 53 ) 54 55 // DefaultEscapeToken is the default escape token 56 const DefaultEscapeToken = "\\" 57 58 // SetEscapeToken sets the default token for escaping characters in a Dockerfile. 59 func SetEscapeToken(s string, d *Directive) error { 60 if s != "`" && s != "\\" { 61 return fmt.Errorf("invalid ESCAPE '%s'. Must be ` or \\", s) 62 } 63 d.EscapeToken = rune(s[0]) 64 d.LineContinuationRegex = regexp.MustCompile(`\` + s + `$`) 65 return nil 66 } 67 68 func init() { 69 // Dispatch Table. see line_parsers.go for the parse functions. 70 // The command is parsed and mapped to the line parser. The line parser 71 // receives the arguments but not the command, and returns an AST after 72 // reformulating the arguments according to the rules in the parser 73 // functions. Errors are propagated up by Parse() and the resulting AST can 74 // be incorporated directly into the existing AST as a next. 75 dispatch = map[string]func(string, *Directive) (*Node, map[string]bool, error){ 76 command.Add: parseMaybeJSONToList, 77 command.Arg: parseNameOrNameVal, 78 command.Cmd: parseMaybeJSON, 79 command.Copy: parseMaybeJSONToList, 80 command.Entrypoint: parseMaybeJSON, 81 command.Env: parseEnv, 82 command.Expose: parseStringsWhitespaceDelimited, 83 command.From: parseString, 84 command.Healthcheck: parseHealthConfig, 85 command.Label: parseLabel, 86 command.Maintainer: parseString, 87 command.Onbuild: parseSubCommand, 88 command.Run: parseMaybeJSON, 89 command.Shell: parseMaybeJSON, 90 command.StopSignal: parseString, 91 command.User: parseString, 92 command.Volume: parseMaybeJSONToList, 93 command.Workdir: parseString, 94 } 95 } 96 97 // ParseLine parses a line and returns the remainder. 98 func ParseLine(line string, d *Directive, ignoreCont bool) (string, *Node, error) { 99 // Handle the parser directive '# escape=<char>. Parser directives must precede 100 // any builder instruction or other comments, and cannot be repeated. 101 if d.LookingForDirectives { 102 tecMatch := tokenEscapeCommand.FindStringSubmatch(strings.ToLower(line)) 103 if len(tecMatch) > 0 { 104 if d.EscapeSeen == true { 105 return "", nil, fmt.Errorf("only one escape parser directive can be used") 106 } 107 for i, n := range tokenEscapeCommand.SubexpNames() { 108 if n == "escapechar" { 109 if err := SetEscapeToken(tecMatch[i], d); err != nil { 110 return "", nil, err 111 } 112 d.EscapeSeen = true 113 return "", nil, nil 114 } 115 } 116 } 117 } 118 119 d.LookingForDirectives = false 120 121 if line = stripComments(line); line == "" { 122 return "", nil, nil 123 } 124 125 if !ignoreCont && d.LineContinuationRegex.MatchString(line) { 126 line = d.LineContinuationRegex.ReplaceAllString(line, "") 127 return line, nil, nil 128 } 129 130 cmd, flags, args, err := splitCommand(line) 131 if err != nil { 132 return "", nil, err 133 } 134 135 node := &Node{} 136 node.Value = cmd 137 138 sexp, attrs, err := fullDispatch(cmd, args, d) 139 if err != nil { 140 return "", nil, err 141 } 142 143 node.Next = sexp 144 node.Attributes = attrs 145 node.Original = line 146 node.Flags = flags 147 148 return "", node, nil 149 } 150 151 // Parse is the main parse routine. 152 // It handles an io.ReadWriteCloser and returns the root of the AST. 153 func Parse(rwc io.Reader, d *Directive) (*Node, error) { 154 currentLine := 0 155 root := &Node{} 156 root.StartLine = -1 157 scanner := bufio.NewScanner(rwc) 158 159 utf8bom := []byte{0xEF, 0xBB, 0xBF} 160 for scanner.Scan() { 161 scannedBytes := scanner.Bytes() 162 // We trim UTF8 BOM 163 if currentLine == 0 { 164 scannedBytes = bytes.TrimPrefix(scannedBytes, utf8bom) 165 } 166 scannedLine := strings.TrimLeftFunc(string(scannedBytes), unicode.IsSpace) 167 currentLine++ 168 line, child, err := ParseLine(scannedLine, d, false) 169 if err != nil { 170 return nil, err 171 } 172 startLine := currentLine 173 174 if line != "" && child == nil { 175 for scanner.Scan() { 176 newline := scanner.Text() 177 currentLine++ 178 179 if stripComments(strings.TrimSpace(newline)) == "" { 180 continue 181 } 182 183 line, child, err = ParseLine(line+newline, d, false) 184 if err != nil { 185 return nil, err 186 } 187 188 if child != nil { 189 break 190 } 191 } 192 if child == nil && line != "" { 193 // When we call ParseLine we'll pass in 'true' for 194 // the ignoreCont param if we're at the EOF. This will 195 // prevent the func from returning immediately w/o 196 // parsing the line thinking that there's more input 197 // to come. 198 199 _, child, err = ParseLine(line, d, scanner.Err() == nil) 200 if err != nil { 201 return nil, err 202 } 203 } 204 } 205 206 if child != nil { 207 // Update the line information for the current child. 208 child.StartLine = startLine 209 child.EndLine = currentLine 210 // Update the line information for the root. The starting line of the root is always the 211 // starting line of the first child and the ending line is the ending line of the last child. 212 if root.StartLine < 0 { 213 root.StartLine = currentLine 214 } 215 root.EndLine = currentLine 216 root.Children = append(root.Children, child) 217 } 218 } 219 220 return root, nil 221 }