gitee.com/mysnapcore/mysnapd@v0.1.0/strutil/shlex/shlex.go (about) 1 /* 2 Copyright 2012 Google Inc. All Rights Reserved. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 /* 18 Package shlex implements a simple lexer which splits input in to tokens using 19 shell-style rules for quoting and commenting. 20 21 The basic use case uses the default ASCII lexer to split a string into sub-strings: 22 23 shlex.Split("one \"two three\" four") -> []string{"one", "two three", "four"} 24 25 To process a stream of strings: 26 27 l := NewLexer(os.Stdin) 28 for ; token, err := l.Next(); err != nil { 29 // process token 30 } 31 32 To access the raw token stream (which includes tokens for comments): 33 34 t := NewTokenizer(os.Stdin) 35 for ; token, err := t.Next(); err != nil { 36 // process token 37 } 38 39 */ 40 package shlex 41 42 import ( 43 "bufio" 44 "fmt" 45 "io" 46 "strings" 47 ) 48 49 // TokenType is a top-level token classification: A word, space, comment, unknown. 50 type TokenType int 51 52 // runeTokenClass is the type of a UTF-8 character classification: A quote, space, escape. 53 type runeTokenClass int 54 55 // the internal state used by the lexer state machine 56 type lexerState int 57 58 // Token is a (type, value) pair representing a lexographical token. 59 type Token struct { 60 tokenType TokenType 61 value string 62 } 63 64 // Equal reports whether tokens a, and b, are equal. 65 // Two tokens are equal if both their types and values are equal. A nil token can 66 // never be equal to another token. 67 func (a *Token) Equal(b *Token) bool { 68 if a == nil || b == nil { 69 return false 70 } 71 if a.tokenType != b.tokenType { 72 return false 73 } 74 return a.value == b.value 75 } 76 77 // Named classes of UTF-8 runes 78 const ( 79 spaceRunes = " \t\r\n" 80 escapingQuoteRunes = `"` 81 nonEscapingQuoteRunes = "'" 82 escapeRunes = `\` 83 commentRunes = "#" 84 ) 85 86 // Classes of rune token 87 const ( 88 //nolint:deadcode 89 unknownRuneClass runeTokenClass = iota 90 spaceRuneClass 91 escapingQuoteRuneClass 92 nonEscapingQuoteRuneClass 93 escapeRuneClass 94 commentRuneClass 95 eofRuneClass 96 ) 97 98 // Classes of lexographic token 99 const ( 100 UnknownToken TokenType = iota 101 WordToken 102 SpaceToken 103 CommentToken 104 ) 105 106 // Lexer state machine states 107 const ( 108 startState lexerState = iota // no runes have been seen 109 inWordState // processing regular runes in a word 110 escapingState // we have just consumed an escape rune; the next rune is literal 111 escapingQuotedState // we have just consumed an escape rune within a quoted string 112 quotingEscapingState // we are within a quoted string that supports escaping ("...") 113 quotingState // we are within a string that does not support escaping ('...') 114 commentState // we are within a comment (everything following an unquoted or unescaped # 115 ) 116 117 // tokenClassifier is used for classifying rune characters. 118 type tokenClassifier map[rune]runeTokenClass 119 120 func (typeMap tokenClassifier) addRuneClass(runes string, tokenType runeTokenClass) { 121 for _, runeChar := range runes { 122 typeMap[runeChar] = tokenType 123 } 124 } 125 126 // newDefaultClassifier creates a new classifier for ASCII characters. 127 func newDefaultClassifier() tokenClassifier { 128 t := tokenClassifier{} 129 t.addRuneClass(spaceRunes, spaceRuneClass) 130 t.addRuneClass(escapingQuoteRunes, escapingQuoteRuneClass) 131 t.addRuneClass(nonEscapingQuoteRunes, nonEscapingQuoteRuneClass) 132 t.addRuneClass(escapeRunes, escapeRuneClass) 133 t.addRuneClass(commentRunes, commentRuneClass) 134 return t 135 } 136 137 // ClassifyRune classifiees a rune 138 func (t tokenClassifier) ClassifyRune(runeVal rune) runeTokenClass { 139 return t[runeVal] 140 } 141 142 // Lexer turns an input stream into a sequence of tokens. Whitespace and comments are skipped. 143 type Lexer Tokenizer 144 145 // NewLexer creates a new lexer from an input stream. 146 func NewLexer(r io.Reader) *Lexer { 147 148 return (*Lexer)(NewTokenizer(r)) 149 } 150 151 // Next returns the next word, or an error. If there are no more words, 152 // the error will be io.EOF. 153 func (l *Lexer) Next() (string, error) { 154 for { 155 token, err := (*Tokenizer)(l).Next() 156 if err != nil { 157 return "", err 158 } 159 switch token.tokenType { 160 case WordToken: 161 return token.value, nil 162 case CommentToken: 163 // skip comments 164 default: 165 return "", fmt.Errorf("Unknown token type: %v", token.tokenType) 166 } 167 } 168 } 169 170 // Tokenizer turns an input stream into a sequence of typed tokens 171 type Tokenizer struct { 172 input bufio.Reader 173 classifier tokenClassifier 174 } 175 176 // NewTokenizer creates a new tokenizer from an input stream. 177 func NewTokenizer(r io.Reader) *Tokenizer { 178 input := bufio.NewReader(r) 179 classifier := newDefaultClassifier() 180 return &Tokenizer{ 181 input: *input, 182 classifier: classifier} 183 } 184 185 // scanStream scans the stream for the next token using the internal state machine. 186 // It will panic if it encounters a rune which it does not know how to handle. 187 func (t *Tokenizer) scanStream() (*Token, error) { 188 state := startState 189 var tokenType TokenType 190 var value []rune 191 var nextRune rune 192 var nextRuneType runeTokenClass 193 var err error 194 195 for { 196 nextRune, _, err = t.input.ReadRune() 197 nextRuneType = t.classifier.ClassifyRune(nextRune) 198 199 if err == io.EOF { 200 nextRuneType = eofRuneClass 201 err = nil 202 } else if err != nil { 203 return nil, err 204 } 205 206 switch state { 207 case startState: // no runes read yet 208 { 209 switch nextRuneType { 210 case eofRuneClass: 211 { 212 return nil, io.EOF 213 } 214 case spaceRuneClass: 215 { 216 } 217 case escapingQuoteRuneClass: 218 { 219 tokenType = WordToken 220 state = quotingEscapingState 221 } 222 case nonEscapingQuoteRuneClass: 223 { 224 tokenType = WordToken 225 state = quotingState 226 } 227 case escapeRuneClass: 228 { 229 tokenType = WordToken 230 state = escapingState 231 } 232 case commentRuneClass: 233 { 234 tokenType = CommentToken 235 state = commentState 236 } 237 default: 238 { 239 tokenType = WordToken 240 value = append(value, nextRune) 241 state = inWordState 242 } 243 } 244 } 245 case inWordState: // in a regular word 246 { 247 switch nextRuneType { 248 case eofRuneClass: 249 { 250 token := &Token{ 251 tokenType: tokenType, 252 value: string(value)} 253 return token, err 254 } 255 case spaceRuneClass: 256 { 257 t.input.UnreadRune() 258 token := &Token{ 259 tokenType: tokenType, 260 value: string(value)} 261 return token, err 262 } 263 case escapingQuoteRuneClass: 264 { 265 state = quotingEscapingState 266 } 267 case nonEscapingQuoteRuneClass: 268 { 269 state = quotingState 270 } 271 case escapeRuneClass: 272 { 273 state = escapingState 274 } 275 default: 276 { 277 value = append(value, nextRune) 278 } 279 } 280 } 281 case escapingState: // the rune after an escape character 282 { 283 switch nextRuneType { 284 case eofRuneClass: 285 { 286 err = fmt.Errorf("EOF found after escape character") 287 token := &Token{ 288 tokenType: tokenType, 289 value: string(value)} 290 return token, err 291 } 292 default: 293 { 294 state = inWordState 295 value = append(value, nextRune) 296 } 297 } 298 } 299 case escapingQuotedState: // the next rune after an escape character, in double quotes 300 { 301 switch nextRuneType { 302 case eofRuneClass: 303 { 304 err = fmt.Errorf("EOF found after escape character") 305 token := &Token{ 306 tokenType: tokenType, 307 value: string(value)} 308 return token, err 309 } 310 default: 311 { 312 state = quotingEscapingState 313 value = append(value, nextRune) 314 } 315 } 316 } 317 case quotingEscapingState: // in escaping double quotes 318 { 319 switch nextRuneType { 320 case eofRuneClass: 321 { 322 err = fmt.Errorf("EOF found when expecting closing quote") 323 token := &Token{ 324 tokenType: tokenType, 325 value: string(value)} 326 return token, err 327 } 328 case escapingQuoteRuneClass: 329 { 330 state = inWordState 331 } 332 case escapeRuneClass: 333 { 334 state = escapingQuotedState 335 } 336 default: 337 { 338 value = append(value, nextRune) 339 } 340 } 341 } 342 case quotingState: // in non-escaping single quotes 343 { 344 switch nextRuneType { 345 case eofRuneClass: 346 { 347 err = fmt.Errorf("EOF found when expecting closing quote") 348 token := &Token{ 349 tokenType: tokenType, 350 value: string(value)} 351 return token, err 352 } 353 case nonEscapingQuoteRuneClass: 354 { 355 state = inWordState 356 } 357 default: 358 { 359 value = append(value, nextRune) 360 } 361 } 362 } 363 case commentState: // in a comment 364 { 365 switch nextRuneType { 366 case eofRuneClass: 367 { 368 token := &Token{ 369 tokenType: tokenType, 370 value: string(value)} 371 return token, err 372 } 373 case spaceRuneClass: 374 { 375 if nextRune == '\n' { 376 token := &Token{ 377 tokenType: tokenType, 378 value: string(value)} 379 return token, err 380 } else { 381 value = append(value, nextRune) 382 } 383 } 384 default: 385 { 386 value = append(value, nextRune) 387 } 388 } 389 } 390 default: 391 { 392 return nil, fmt.Errorf("Unexpected state: %v", state) 393 } 394 } 395 } 396 } 397 398 // Next returns the next token in the stream. 399 func (t *Tokenizer) Next() (*Token, error) { 400 return t.scanStream() 401 } 402 403 // Split partitions a string into a slice of strings. 404 func Split(s string) ([]string, error) { 405 l := NewLexer(strings.NewReader(s)) 406 subStrings := make([]string, 0) 407 for { 408 word, err := l.Next() 409 if err != nil { 410 if err == io.EOF { 411 return subStrings, nil 412 } 413 return subStrings, err 414 } 415 subStrings = append(subStrings, word) 416 } 417 }