github.com/Schaudge/grailbase@v0.0.0-20240223061707-44c758a471c0/config/parse.go (about)

     1  // Copyright 2019 GRAIL, Inc. All rights reserved.
     2  // Use of this source code is governed by the Apache 2.0
     3  // license that can be found in the LICENSE file.
     4  
     5  package config
     6  
     7  import (
     8  	"errors"
     9  	"fmt"
    10  	"io"
    11  	"log"
    12  	"os"
    13  	"path/filepath"
    14  	"sort"
    15  	"strconv"
    16  	"strings"
    17  	"text/scanner"
    18  	"unicode"
    19  )
    20  
    21  // insertionToks defines the sets of tokens after which
    22  // a semicolon is inserted.
    23  var insertionToks = map[rune]bool{
    24  	scanner.Ident:     true,
    25  	scanner.String:    true,
    26  	scanner.RawString: true,
    27  	scanner.Int:       true,
    28  	scanner.Float:     true,
    29  	scanner.Char:      true,
    30  	')':               true,
    31  	'}':               true,
    32  	']':               true,
    33  }
    34  
    35  // def wraps a value to indicate that it is a default.
    36  type def struct{ value any }
    37  
    38  // unwrap returns the value v, unwrapped from def.
    39  func unwrap(v interface{}) (_ any, wasDef bool) {
    40  	if v, ok := v.(def); ok {
    41  		u, _ := unwrap(v.value)
    42  		return u, true
    43  	}
    44  	return v, false
    45  }
    46  
    47  // indirect is a type that indicates an indirection.
    48  type indirect string
    49  
    50  // GoString renders an indirect type as a string without quotes,
    51  // matching the concrete representation of indirections.
    52  func (i indirect) GoString() string {
    53  	if i == "" {
    54  		return "nil"
    55  	}
    56  	return string(i)
    57  }
    58  
    59  // An instance stores a parsed configuration clause.
    60  type instance struct {
    61  	// name is the global name of the instance.
    62  	name string
    63  	// parent is the instance of which this is derived, if any.
    64  	parent string
    65  	// params contains the set of parameters defined by this instance.
    66  	// The values of the parameter map takes on valid config literal
    67  	// values. They are: indirect, bool, int, float64, and string.
    68  	params map[string]interface{}
    69  }
    70  
    71  // Merge merges the provided instance into inst. Any
    72  // nondefault parameter values in other are set in this
    73  // instance.
    74  func (inst *instance) Merge(other *instance) {
    75  	if other.parent != "" {
    76  		inst.parent = other.parent
    77  	}
    78  	for k, v := range other.params {
    79  		if _, ok := v.(def); ok {
    80  			continue
    81  		}
    82  		inst.params[k] = v
    83  	}
    84  }
    85  
    86  // Equal tells whether two instances are equal.
    87  func (inst *instance) Equal(other *instance) bool {
    88  	if inst.name != other.name || inst.parent != other.parent || len(inst.params) != len(other.params) {
    89  		return false
    90  	}
    91  	for k, v := range inst.params {
    92  		w, ok := other.params[k]
    93  		if !ok {
    94  			return false
    95  		}
    96  		v, _ = unwrap(v)
    97  		w, _ = unwrap(w)
    98  		switch vval := v.(type) {
    99  		case indirect:
   100  			wval, ok := w.(indirect)
   101  			if !ok || vval != wval {
   102  				return false
   103  			}
   104  		case string:
   105  			wval, ok := w.(string)
   106  			if !ok || vval != wval {
   107  				return false
   108  			}
   109  		case bool:
   110  			wval, ok := w.(bool)
   111  			if !ok || vval != wval {
   112  				return false
   113  			}
   114  		case int:
   115  			wval, ok := w.(int)
   116  			if !ok || vval != wval {
   117  				return false
   118  			}
   119  		case float64:
   120  			wval, ok := w.(float64)
   121  			if !ok || vval != wval {
   122  				return false
   123  			}
   124  		}
   125  	}
   126  	return true
   127  }
   128  
   129  // instances stores a collection of named instanes.
   130  type instances map[string]*instance
   131  
   132  // Merge merges an instance into this collection.
   133  func (m instances) Merge(inst *instance) {
   134  	if m[inst.name] == nil {
   135  		m[inst.name] = inst
   136  		return
   137  	}
   138  	m[inst.name].Merge(inst)
   139  }
   140  
   141  // Equal tells whether instances m is equal to instances n.
   142  func (m instances) Equal(n instances) bool {
   143  	if len(m) != len(n) {
   144  		return false
   145  	}
   146  	for name, minst := range m {
   147  		ninst, ok := n[name]
   148  		if !ok {
   149  			return false
   150  		}
   151  		if !minst.Equal(ninst) {
   152  			return false
   153  		}
   154  	}
   155  	return true
   156  }
   157  
   158  // SyntaxString returns a string representation of this instance
   159  // which is also valid config syntax. Docs optionally provides
   160  // documentation for the parameters in the instance.
   161  func (inst *instance) SyntaxString(docs map[string]string) string {
   162  	// TODO: Consider printing floats with minimum precision (1 appears as 1.0) so users
   163  	// can easily contrast them with integers.
   164  	var b strings.Builder
   165  	writeDoc(&b, "", docs[""])
   166  	if inst.parent == "" {
   167  		b.WriteString("param ")
   168  		b.WriteString(inst.name)
   169  		if len(inst.params) == 0 {
   170  			b.WriteString(" ()\n")
   171  			return b.String()
   172  		}
   173  		b.WriteString(" (\n")
   174  		writeParams(&b, inst.params, docs)
   175  		b.WriteString(")\n")
   176  		return b.String()
   177  	}
   178  	b.WriteString("instance ")
   179  	b.WriteString(inst.name)
   180  	b.WriteString(" ")
   181  	b.WriteString(inst.parent)
   182  	if len(inst.params) > 0 {
   183  		b.WriteString(" (\n")
   184  		writeParams(&b, inst.params, docs)
   185  		b.WriteString(")")
   186  	}
   187  	b.WriteString("\n")
   188  	return b.String()
   189  }
   190  
   191  func writeDoc(b *strings.Builder, prefix string, doc string) {
   192  	if doc == "" {
   193  		return
   194  	}
   195  	for _, line := range strings.Split(doc, "\n") {
   196  		b.WriteString(prefix)
   197  		b.WriteString("// ")
   198  		b.WriteString(line)
   199  		b.WriteString("\n")
   200  	}
   201  }
   202  
   203  func writeParams(b *strings.Builder, params map[string]any, docs map[string]string) {
   204  	forEachParam(params, func(name string, v any) {
   205  		writeDoc(b, "\t", docs[name])
   206  		v, wasDef := unwrap(v)
   207  		var repr string
   208  		switch vt := v.(type) {
   209  		case string:
   210  			// Improve readability by using a raw literal (no quote-escaping), if possible.
   211  			if strings.ContainsRune(vt, '"') && !strings.ContainsRune(vt, '`') {
   212  				repr = "`" + vt + "`"
   213  			} else {
   214  				repr = strconv.Quote(vt)
   215  			}
   216  		default:
   217  			repr = fmt.Sprintf("%#v", v)
   218  		}
   219  		fmt.Fprintf(b, "\t%s = %s", name, repr)
   220  		if wasDef {
   221  			b.WriteString(" // default")
   222  		}
   223  		b.WriteString("\n")
   224  	})
   225  }
   226  
   227  func forEachParam(params map[string]any, fn func(k string, v any)) {
   228  	keys := make([]string, 0, len(params))
   229  	for k := range params {
   230  		keys = append(keys, k)
   231  	}
   232  	sort.Strings(keys)
   233  	for _, k := range keys {
   234  		fn(k, params[k])
   235  	}
   236  }
   237  
   238  // A parser stores parser state defines the productions
   239  // in the profile grammar.
   240  type parser struct {
   241  	scanner scanner.Scanner
   242  	errors  []string
   243  
   244  	insertion bool
   245  	scanned   rune
   246  }
   247  
   248  // parse parses the config read by the provided reader into a
   249  // concrete profile into a set of instances. If the reader r
   250  // implements
   251  //
   252  //	Name() string
   253  //
   254  // then this is used as a filename to display positional information
   255  // in error messages.
   256  func parse(r io.Reader) (instances, error) {
   257  	var p parser
   258  	p.scanner.Whitespace &= ^uint64(1 << '\n')
   259  	p.scanner.Mode = scanner.ScanIdents | scanner.ScanFloats | scanner.ScanChars |
   260  		scanner.ScanStrings | scanner.ScanRawStrings
   261  	p.scanner.IsIdentRune = func(ch rune, i int) bool {
   262  		return unicode.IsLetter(ch) || (unicode.IsDigit(ch) || ch == '_' || ch == '/' || ch == '-') && i > 0
   263  	}
   264  	if named, ok := r.(interface{ Name() string }); ok {
   265  		filename := named.Name()
   266  		if cwd, err := os.Getwd(); err == nil {
   267  			if rel, err := filepath.Rel(cwd, filename); err == nil && len(rel) < len(filename) {
   268  				filename = rel
   269  			}
   270  		}
   271  		p.scanner.Position.Filename = filename
   272  	}
   273  	p.scanner.Error = func(s *scanner.Scanner, msg string) {
   274  		// TODO(marius): report these in error
   275  		log.Printf("%s: %s", s.Position, msg)
   276  	}
   277  	p.scanner.Init(r)
   278  	if insts, ok := p.toplevel(); ok {
   279  		return insts, nil
   280  	}
   281  	switch len(p.errors) {
   282  	case 0:
   283  		return nil, errors.New("parse error")
   284  	case 1:
   285  		return nil, fmt.Errorf("parse error: %s", p.errors[0])
   286  	default:
   287  		return nil, fmt.Errorf("parse error:\n%s", strings.Join(p.errors, "\n"))
   288  	}
   289  }
   290  
   291  // toplevel parses the config grammar. It is as follows:
   292  //
   293  //	toplevel:
   294  //		clause
   295  //		clause ';' toplevel
   296  //		<eof>
   297  //
   298  //	clause:
   299  //		param
   300  //		instance
   301  //
   302  //	param:
   303  //		ident assign
   304  //		ident assignlist
   305  //
   306  //	instance:
   307  //		ident ident
   308  //		ident ident assignlist
   309  //
   310  //	assign:
   311  //		key = value
   312  //
   313  //	assignlist:
   314  //		( list )
   315  //
   316  //	list:
   317  //		assign
   318  //		assign ';' list
   319  //
   320  //	value:
   321  //		'true'
   322  //		'false'
   323  //		'nil'
   324  //		ident
   325  //		integer
   326  //		float
   327  //		string
   328  func (p *parser) toplevel() (insts instances, ok bool) {
   329  	ok = true // Empty input is okay.
   330  	insts = make(instances)
   331  	for {
   332  		switch p.next() {
   333  		case scanner.EOF:
   334  			return
   335  		case ';':
   336  		case scanner.Ident:
   337  			switch p.text() {
   338  			case "param":
   339  				var (
   340  					name   string
   341  					params map[string]interface{}
   342  				)
   343  				name, params, ok = p.param()
   344  				if !ok {
   345  					return
   346  				}
   347  				insts.Merge(&instance{name: name, params: params})
   348  			case "instance":
   349  				var inst *instance
   350  				inst, ok = p.instance()
   351  				if !ok {
   352  					return
   353  				}
   354  				insts.Merge(inst)
   355  			default:
   356  				p.errorf("unrecognized toplevel clause: %s", p.text())
   357  				return nil, false
   358  			}
   359  		}
   360  	}
   361  }
   362  
   363  // param:
   364  //	ident assign
   365  //	ident assignlist
   366  func (p *parser) param() (instance string, params map[string]interface{}, ok bool) {
   367  	if p.next() != scanner.Ident {
   368  		p.errorf("expected identifier")
   369  		return
   370  	}
   371  	instance = p.text()
   372  	switch tok := p.peek(); tok {
   373  	case scanner.Ident:
   374  		var (
   375  			key   string
   376  			value interface{}
   377  		)
   378  		key, value, ok = p.assign()
   379  		if !ok {
   380  			return
   381  		}
   382  		params = map[string]interface{}{key: value}
   383  	case '(':
   384  		params, ok = p.assignlist()
   385  	default:
   386  		p.next()
   387  		p.errorf("unexpected: %s", scanner.TokenString(tok))
   388  	}
   389  	return
   390  }
   391  
   392  // instance:
   393  //	ident ident
   394  //	ident ident assignlist
   395  func (p *parser) instance() (inst *instance, ok bool) {
   396  	if p.next() != scanner.Ident {
   397  		p.errorf("expected identifier")
   398  		return
   399  	}
   400  	inst = &instance{name: p.text()}
   401  	if p.next() != scanner.Ident {
   402  		p.errorf("expected identifier")
   403  		return
   404  	}
   405  	inst.parent = p.text()
   406  	if p.peek() != '(' {
   407  		ok = true
   408  		return
   409  	}
   410  	inst.params, ok = p.assignlist()
   411  	return
   412  }
   413  
   414  // assign:
   415  //	key = value
   416  func (p *parser) assign() (key string, value interface{}, ok bool) {
   417  	if p.next() != scanner.Ident {
   418  		p.errorf("expected identifier")
   419  		return
   420  	}
   421  	key = p.text()
   422  	if p.next() != '=' {
   423  		p.errorf(`expected "="`)
   424  		return
   425  	}
   426  	value, ok = p.value()
   427  	return
   428  }
   429  
   430  // assignlist:
   431  //	( list )
   432  //
   433  // list:
   434  //	assign
   435  //	assign ';' list
   436  func (p *parser) assignlist() (assigns map[string]interface{}, ok bool) {
   437  	if p.next() != '(' {
   438  		p.errorf(`parse error: expected "("`)
   439  		return
   440  	}
   441  	assigns = make(map[string]interface{})
   442  	for {
   443  		switch p.peek() {
   444  		default:
   445  			var (
   446  				key   string
   447  				value interface{}
   448  			)
   449  			key, value, ok = p.assign()
   450  			if !ok {
   451  				return
   452  			}
   453  			assigns[key] = value
   454  		case ';':
   455  			p.next()
   456  		case ')':
   457  			p.next()
   458  			ok = true
   459  			return
   460  		}
   461  	}
   462  }
   463  
   464  // value:
   465  //	'true'
   466  //	'false'
   467  //	'nil'
   468  //	identifier
   469  //	integer
   470  //	float
   471  //	string
   472  func (p *parser) value() (value any, ok bool) {
   473  	switch tok := p.next(); tok {
   474  	case scanner.Ident:
   475  		switch p.text() {
   476  		case "true":
   477  			return true, true
   478  		case "false":
   479  			return false, true
   480  		case "nil":
   481  			return indirect(""), true
   482  		default:
   483  			return indirect(p.text()), true
   484  		}
   485  	case scanner.String, scanner.RawString:
   486  		text, err := strconv.Unquote(p.text())
   487  		if err != nil {
   488  			p.errorf("could not parse string: %v", err)
   489  			return nil, false
   490  		}
   491  		return text, true
   492  	case '-':
   493  		return p.parseNumber(p.next(), true)
   494  	default:
   495  		return p.parseNumber(tok, false)
   496  	}
   497  }
   498  
   499  func (p *parser) parseNumber(tok rune, negate bool) (value any, ok bool) {
   500  	switch tok {
   501  	case scanner.Int:
   502  		v, err := strconv.ParseInt(p.text(), 0, 64)
   503  		if err != nil {
   504  			p.errorf("could not parse integer: %v", err)
   505  			return nil, false
   506  		}
   507  		if negate {
   508  			v = -v
   509  		}
   510  		return int(v), true
   511  	case scanner.Float:
   512  		v, err := strconv.ParseFloat(p.text(), 64)
   513  		if err != nil {
   514  			p.errorf("could not parse float: %v", err)
   515  			return nil, false
   516  		}
   517  		if negate {
   518  			v = -v
   519  		}
   520  		return v, true
   521  	default:
   522  		p.errorf("parse error: not a value")
   523  		return nil, false
   524  	}
   525  }
   526  
   527  func (p *parser) next() rune {
   528  	tok := p.peek()
   529  	p.insertion = insertionToks[tok]
   530  	p.scanned = 0
   531  	return tok
   532  }
   533  
   534  func (p *parser) peek() rune {
   535  	if p.scanned == 0 {
   536  		p.scanned = p.scanner.Scan()
   537  	}
   538  	if p.insertion && p.scanned == '\n' {
   539  		return ';'
   540  	}
   541  	return p.scanned
   542  }
   543  
   544  func (p *parser) text() string {
   545  	return p.scanner.TokenText()
   546  }
   547  
   548  func (p *parser) errorf(format string, args ...interface{}) {
   549  	e := fmt.Sprintf("%s: %s", p.scanner.Position, fmt.Sprintf(format, args...))
   550  	p.errors = append(p.errors, e)
   551  }