github.com/grailbio/base@v0.0.11/config/parse.go (about) 1 // Copyright 2019 GRAIL, Inc. All rights reserved. 2 // Use of this source code is governed by the Apache 2.0 3 // license that can be found in the LICENSE file. 4 5 package config 6 7 import ( 8 "errors" 9 "fmt" 10 "io" 11 "log" 12 "os" 13 "path/filepath" 14 "sort" 15 "strconv" 16 "strings" 17 "text/scanner" 18 "unicode" 19 ) 20 21 // insertionToks defines the sets of tokens after which 22 // a semicolon is inserted. 23 var insertionToks = map[rune]bool{ 24 scanner.Ident: true, 25 scanner.String: true, 26 scanner.RawString: true, 27 scanner.Int: true, 28 scanner.Float: true, 29 scanner.Char: true, 30 ')': true, 31 '}': true, 32 ']': true, 33 } 34 35 // def wraps a value to indicate that it is a default. 36 type def struct{ value any } 37 38 // unwrap returns the value v, unwrapped from def. 39 func unwrap(v interface{}) (_ any, wasDef bool) { 40 if v, ok := v.(def); ok { 41 u, _ := unwrap(v.value) 42 return u, true 43 } 44 return v, false 45 } 46 47 // indirect is a type that indicates an indirection. 48 type indirect string 49 50 // GoString renders an indirect type as a string without quotes, 51 // matching the concrete representation of indirections. 52 func (i indirect) GoString() string { 53 if i == "" { 54 return "nil" 55 } 56 return string(i) 57 } 58 59 // An instance stores a parsed configuration clause. 60 type instance struct { 61 // name is the global name of the instance. 62 name string 63 // parent is the instance of which this is derived, if any. 64 parent string 65 // params contains the set of parameters defined by this instance. 66 // The values of the parameter map takes on valid config literal 67 // values. They are: indirect, bool, int, float64, and string. 68 params map[string]interface{} 69 } 70 71 // Merge merges the provided instance into inst. Any 72 // nondefault parameter values in other are set in this 73 // instance. 74 func (inst *instance) Merge(other *instance) { 75 if other.parent != "" { 76 inst.parent = other.parent 77 } 78 for k, v := range other.params { 79 if _, ok := v.(def); ok { 80 continue 81 } 82 inst.params[k] = v 83 } 84 } 85 86 // Equal tells whether two instances are equal. 87 func (inst *instance) Equal(other *instance) bool { 88 if inst.name != other.name || inst.parent != other.parent || len(inst.params) != len(other.params) { 89 return false 90 } 91 for k, v := range inst.params { 92 w, ok := other.params[k] 93 if !ok { 94 return false 95 } 96 v, _ = unwrap(v) 97 w, _ = unwrap(w) 98 switch vval := v.(type) { 99 case indirect: 100 wval, ok := w.(indirect) 101 if !ok || vval != wval { 102 return false 103 } 104 case string: 105 wval, ok := w.(string) 106 if !ok || vval != wval { 107 return false 108 } 109 case bool: 110 wval, ok := w.(bool) 111 if !ok || vval != wval { 112 return false 113 } 114 case int: 115 wval, ok := w.(int) 116 if !ok || vval != wval { 117 return false 118 } 119 case float64: 120 wval, ok := w.(float64) 121 if !ok || vval != wval { 122 return false 123 } 124 } 125 } 126 return true 127 } 128 129 // instances stores a collection of named instanes. 130 type instances map[string]*instance 131 132 // Merge merges an instance into this collection. 133 func (m instances) Merge(inst *instance) { 134 if m[inst.name] == nil { 135 m[inst.name] = inst 136 return 137 } 138 m[inst.name].Merge(inst) 139 } 140 141 // Equal tells whether instances m is equal to instances n. 142 func (m instances) Equal(n instances) bool { 143 if len(m) != len(n) { 144 return false 145 } 146 for name, minst := range m { 147 ninst, ok := n[name] 148 if !ok { 149 return false 150 } 151 if !minst.Equal(ninst) { 152 return false 153 } 154 } 155 return true 156 } 157 158 // SyntaxString returns a string representation of this instance 159 // which is also valid config syntax. Docs optionally provides 160 // documentation for the parameters in the instance. 161 func (inst *instance) SyntaxString(docs map[string]string) string { 162 // TODO: Consider printing floats with minimum precision (1 appears as 1.0) so users 163 // can easily contrast them with integers. 164 var b strings.Builder 165 writeDoc(&b, "", docs[""]) 166 if inst.parent == "" { 167 b.WriteString("param ") 168 b.WriteString(inst.name) 169 if len(inst.params) == 0 { 170 b.WriteString(" ()\n") 171 return b.String() 172 } 173 b.WriteString(" (\n") 174 writeParams(&b, inst.params, docs) 175 b.WriteString(")\n") 176 return b.String() 177 } 178 b.WriteString("instance ") 179 b.WriteString(inst.name) 180 b.WriteString(" ") 181 b.WriteString(inst.parent) 182 if len(inst.params) > 0 { 183 b.WriteString(" (\n") 184 writeParams(&b, inst.params, docs) 185 b.WriteString(")") 186 } 187 b.WriteString("\n") 188 return b.String() 189 } 190 191 func writeDoc(b *strings.Builder, prefix string, doc string) { 192 if doc == "" { 193 return 194 } 195 for _, line := range strings.Split(doc, "\n") { 196 b.WriteString(prefix) 197 b.WriteString("// ") 198 b.WriteString(line) 199 b.WriteString("\n") 200 } 201 } 202 203 func writeParams(b *strings.Builder, params map[string]any, docs map[string]string) { 204 forEachParam(params, func(name string, v any) { 205 writeDoc(b, "\t", docs[name]) 206 v, wasDef := unwrap(v) 207 var repr string 208 switch vt := v.(type) { 209 case string: 210 // Improve readability by using a raw literal (no quote-escaping), if possible. 211 if strings.ContainsRune(vt, '"') && !strings.ContainsRune(vt, '`') { 212 repr = "`" + vt + "`" 213 } else { 214 repr = strconv.Quote(vt) 215 } 216 default: 217 repr = fmt.Sprintf("%#v", v) 218 } 219 fmt.Fprintf(b, "\t%s = %s", name, repr) 220 if wasDef { 221 b.WriteString(" // default") 222 } 223 b.WriteString("\n") 224 }) 225 } 226 227 func forEachParam(params map[string]any, fn func(k string, v any)) { 228 keys := make([]string, 0, len(params)) 229 for k := range params { 230 keys = append(keys, k) 231 } 232 sort.Strings(keys) 233 for _, k := range keys { 234 fn(k, params[k]) 235 } 236 } 237 238 // A parser stores parser state defines the productions 239 // in the profile grammar. 240 type parser struct { 241 scanner scanner.Scanner 242 errors []string 243 244 insertion bool 245 scanned rune 246 } 247 248 // parse parses the config read by the provided reader into a 249 // concrete profile into a set of instances. If the reader r 250 // implements 251 // 252 // Name() string 253 // 254 // then this is used as a filename to display positional information 255 // in error messages. 256 func parse(r io.Reader) (instances, error) { 257 var p parser 258 p.scanner.Whitespace &= ^uint64(1 << '\n') 259 p.scanner.Mode = scanner.ScanIdents | scanner.ScanFloats | scanner.ScanChars | 260 scanner.ScanStrings | scanner.ScanRawStrings 261 p.scanner.IsIdentRune = func(ch rune, i int) bool { 262 return unicode.IsLetter(ch) || (unicode.IsDigit(ch) || ch == '_' || ch == '/' || ch == '-') && i > 0 263 } 264 if named, ok := r.(interface{ Name() string }); ok { 265 filename := named.Name() 266 if cwd, err := os.Getwd(); err == nil { 267 if rel, err := filepath.Rel(cwd, filename); err == nil && len(rel) < len(filename) { 268 filename = rel 269 } 270 } 271 p.scanner.Position.Filename = filename 272 } 273 p.scanner.Error = func(s *scanner.Scanner, msg string) { 274 // TODO(marius): report these in error 275 log.Printf("%s: %s", s.Position, msg) 276 } 277 p.scanner.Init(r) 278 if insts, ok := p.toplevel(); ok { 279 return insts, nil 280 } 281 switch len(p.errors) { 282 case 0: 283 return nil, errors.New("parse error") 284 case 1: 285 return nil, fmt.Errorf("parse error: %s", p.errors[0]) 286 default: 287 return nil, fmt.Errorf("parse error:\n%s", strings.Join(p.errors, "\n")) 288 } 289 } 290 291 // toplevel parses the config grammar. It is as follows: 292 // 293 // toplevel: 294 // clause 295 // clause ';' toplevel 296 // <eof> 297 // 298 // clause: 299 // param 300 // instance 301 // 302 // param: 303 // ident assign 304 // ident assignlist 305 // 306 // instance: 307 // ident ident 308 // ident ident assignlist 309 // 310 // assign: 311 // key = value 312 // 313 // assignlist: 314 // ( list ) 315 // 316 // list: 317 // assign 318 // assign ';' list 319 // 320 // value: 321 // 'true' 322 // 'false' 323 // 'nil' 324 // ident 325 // integer 326 // float 327 // string 328 func (p *parser) toplevel() (insts instances, ok bool) { 329 ok = true // Empty input is okay. 330 insts = make(instances) 331 for { 332 switch p.next() { 333 case scanner.EOF: 334 return 335 case ';': 336 case scanner.Ident: 337 switch p.text() { 338 case "param": 339 var ( 340 name string 341 params map[string]interface{} 342 ) 343 name, params, ok = p.param() 344 if !ok { 345 return 346 } 347 insts.Merge(&instance{name: name, params: params}) 348 case "instance": 349 var inst *instance 350 inst, ok = p.instance() 351 if !ok { 352 return 353 } 354 insts.Merge(inst) 355 default: 356 p.errorf("unrecognized toplevel clause: %s", p.text()) 357 return nil, false 358 } 359 } 360 } 361 } 362 363 // param: 364 // ident assign 365 // ident assignlist 366 func (p *parser) param() (instance string, params map[string]interface{}, ok bool) { 367 if p.next() != scanner.Ident { 368 p.errorf("expected identifier") 369 return 370 } 371 instance = p.text() 372 switch tok := p.peek(); tok { 373 case scanner.Ident: 374 var ( 375 key string 376 value interface{} 377 ) 378 key, value, ok = p.assign() 379 if !ok { 380 return 381 } 382 params = map[string]interface{}{key: value} 383 case '(': 384 params, ok = p.assignlist() 385 default: 386 p.next() 387 p.errorf("unexpected: %s", scanner.TokenString(tok)) 388 } 389 return 390 } 391 392 // instance: 393 // ident ident 394 // ident ident assignlist 395 func (p *parser) instance() (inst *instance, ok bool) { 396 if p.next() != scanner.Ident { 397 p.errorf("expected identifier") 398 return 399 } 400 inst = &instance{name: p.text()} 401 if p.next() != scanner.Ident { 402 p.errorf("expected identifier") 403 return 404 } 405 inst.parent = p.text() 406 if p.peek() != '(' { 407 ok = true 408 return 409 } 410 inst.params, ok = p.assignlist() 411 return 412 } 413 414 // assign: 415 // key = value 416 func (p *parser) assign() (key string, value interface{}, ok bool) { 417 if p.next() != scanner.Ident { 418 p.errorf("expected identifier") 419 return 420 } 421 key = p.text() 422 if p.next() != '=' { 423 p.errorf(`expected "="`) 424 return 425 } 426 value, ok = p.value() 427 return 428 } 429 430 // assignlist: 431 // ( list ) 432 // 433 // list: 434 // assign 435 // assign ';' list 436 func (p *parser) assignlist() (assigns map[string]interface{}, ok bool) { 437 if p.next() != '(' { 438 p.errorf(`parse error: expected "("`) 439 return 440 } 441 assigns = make(map[string]interface{}) 442 for { 443 switch p.peek() { 444 default: 445 var ( 446 key string 447 value interface{} 448 ) 449 key, value, ok = p.assign() 450 if !ok { 451 return 452 } 453 assigns[key] = value 454 case ';': 455 p.next() 456 case ')': 457 p.next() 458 ok = true 459 return 460 } 461 } 462 } 463 464 // value: 465 // 'true' 466 // 'false' 467 // 'nil' 468 // identifier 469 // integer 470 // float 471 // string 472 func (p *parser) value() (value any, ok bool) { 473 switch tok := p.next(); tok { 474 case scanner.Ident: 475 switch p.text() { 476 case "true": 477 return true, true 478 case "false": 479 return false, true 480 case "nil": 481 return indirect(""), true 482 default: 483 return indirect(p.text()), true 484 } 485 case scanner.String, scanner.RawString: 486 text, err := strconv.Unquote(p.text()) 487 if err != nil { 488 p.errorf("could not parse string: %v", err) 489 return nil, false 490 } 491 return text, true 492 case '-': 493 return p.parseNumber(p.next(), true) 494 default: 495 return p.parseNumber(tok, false) 496 } 497 } 498 499 func (p *parser) parseNumber(tok rune, negate bool) (value any, ok bool) { 500 switch tok { 501 case scanner.Int: 502 v, err := strconv.ParseInt(p.text(), 0, 64) 503 if err != nil { 504 p.errorf("could not parse integer: %v", err) 505 return nil, false 506 } 507 if negate { 508 v = -v 509 } 510 return int(v), true 511 case scanner.Float: 512 v, err := strconv.ParseFloat(p.text(), 64) 513 if err != nil { 514 p.errorf("could not parse float: %v", err) 515 return nil, false 516 } 517 if negate { 518 v = -v 519 } 520 return v, true 521 default: 522 p.errorf("parse error: not a value") 523 return nil, false 524 } 525 } 526 527 func (p *parser) next() rune { 528 tok := p.peek() 529 p.insertion = insertionToks[tok] 530 p.scanned = 0 531 return tok 532 } 533 534 func (p *parser) peek() rune { 535 if p.scanned == 0 { 536 p.scanned = p.scanner.Scan() 537 } 538 if p.insertion && p.scanned == '\n' { 539 return ';' 540 } 541 return p.scanned 542 } 543 544 func (p *parser) text() string { 545 return p.scanner.TokenText() 546 } 547 548 func (p *parser) errorf(format string, args ...interface{}) { 549 e := fmt.Sprintf("%s: %s", p.scanner.Position, fmt.Sprintf(format, args...)) 550 p.errors = append(p.errors, e) 551 }