cuelang.org/go@v0.10.1/encoding/toml/decode.go (about)

     1  // Copyright 2024 The CUE Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package toml converts TOML to and from CUE.
    16  //
    17  // WARNING: THIS PACKAGE IS EXPERIMENTAL.
    18  // ITS API MAY CHANGE AT ANY TIME.
    19  package toml
    20  
    21  import (
    22  	"fmt"
    23  	"io"
    24  	"strconv"
    25  	"strings"
    26  
    27  	toml "github.com/pelletier/go-toml/v2/unstable"
    28  
    29  	"cuelang.org/go/cue/ast"
    30  	"cuelang.org/go/cue/errors"
    31  	"cuelang.org/go/cue/literal"
    32  	"cuelang.org/go/cue/token"
    33  )
    34  
    35  // TODO(mvdan): schema and decode options
    36  
    37  // NewDecoder creates a decoder from a stream of TOML input.
    38  func NewDecoder(filename string, r io.Reader) *Decoder {
    39  	// Note that we don't consume the reader here,
    40  	// as there's no need, and we can't return an error either.
    41  	return &Decoder{r: r, filename: filename, seenTableKeys: make(map[string]bool)}
    42  }
    43  
    44  // Decoder implements the decoding state.
    45  //
    46  // Note that TOML files and streams never decode multiple CUE nodes;
    47  // subsequent calls to [Decoder.Decode] may return [io.EOF].
    48  type Decoder struct {
    49  	r io.Reader
    50  
    51  	filename string
    52  
    53  	decoded bool // whether [Decoder.Decoded] has been called already
    54  	parser  toml.Parser
    55  
    56  	// seenTableKeys tracks which rooted keys we have already decoded as tables,
    57  	// as duplicate table keys in TOML are not allowed.
    58  	seenTableKeys map[rootedKey]bool
    59  
    60  	// topFile is the top-level CUE file we are decoding into.
    61  	// TODO(mvdan): make an *ast.File once the decoder returns ast.Node rather than ast.Expr.
    62  	topFile *ast.StructLit
    63  
    64  	// tokenFile is used to create positions which can be used for error values and syntax tree nodes.
    65  	tokenFile *token.File
    66  
    67  	// openTableArrays keeps track of all the declared table arrays so that
    68  	// later headers can append a new table array element, or add a field
    69  	// to the last element in a table array.
    70  	//
    71  	// TODO(mvdan): an unsorted slice means we do two linear searches per header key.
    72  	// For N distinct `[[keys]]`, this means a decoding runtime of O(2*N*N).
    73  	// Consider either sorting this array so we can do a binary search for O(N*log2(N)),
    74  	// or perhaps a tree, although for a nesting level D, that could cause O(N*D),
    75  	// and a tree would use more slices and so more allocations.
    76  	//
    77  	// Note that a map is not a good option either, because even though it makes
    78  	// exact lookups cheap, prefix matches are still linear and relatively slow.
    79  	// A sorted slice allows both mechanisms to use a form of binary search.
    80  	openTableArrays []openTableArray
    81  
    82  	// currentTableKey is the rooted key for the current table where the following
    83  	// TOML `key = value` lines will be inserted.
    84  	currentTableKey rootedKey
    85  
    86  	// currentTable is the CUE struct literal for currentTableKey.
    87  	// It is nil before the first [header] or [[header]],
    88  	// in which case any key-values are inserted in topFile.
    89  	currentTable *ast.StructLit
    90  }
    91  
    92  // rootedKey is a dot-separated path from the root of the TOML document.
    93  // The string elements in between the dots may be quoted to avoid ambiguity.
    94  // For the time being, this is just an alias for the sake of documentation.
    95  //
    96  // A path into an array element is like "arr.3",
    97  // which looks very similar to a table's "tbl.key",
    98  // particularly since a table key can be any string.
    99  // However, we just need these keys to detect duplicates,
   100  // and a path cannot be both an array and table, so it's OK.
   101  type rootedKey = string
   102  
   103  // openTableArray records information about a declared table array.
   104  type openTableArray struct {
   105  	key       rootedKey
   106  	level     int // the level of nesting, 1 or higher, e.g. 2 for key="foo.bar"
   107  	list      *ast.ListLit
   108  	lastTable *ast.StructLit
   109  }
   110  
   111  // TODO(mvdan): support decoding comments
   112  // TODO(mvdan): support ast.Node positions
   113  
   114  // Decode parses the input stream as TOML and converts it to a CUE [*ast.File].
   115  // Because TOML files only contain a single top-level expression,
   116  // subsequent calls to this method may return [io.EOF].
   117  func (d *Decoder) Decode() (ast.Expr, error) {
   118  	if d.decoded {
   119  		return nil, io.EOF
   120  	}
   121  	d.decoded = true
   122  	// TODO(mvdan): unfortunately go-toml does not support streaming as of v2.2.2.
   123  	data, err := io.ReadAll(d.r)
   124  	if err != nil {
   125  		return nil, err
   126  	}
   127  	d.tokenFile = token.NewFile(d.filename, 0, len(data))
   128  	d.tokenFile.SetLinesForContent(data)
   129  	d.parser.Reset(data)
   130  	// Note that if the input is empty the result will be the same
   131  	// as for an empty table: an empty struct.
   132  	// The TOML spec and other decoders also work this way.
   133  	d.topFile = &ast.StructLit{}
   134  	for d.parser.NextExpression() {
   135  		if err := d.nextRootNode(d.parser.Expression()); err != nil {
   136  			return nil, err
   137  		}
   138  	}
   139  	if err := d.parser.Error(); err != nil {
   140  		if err, ok := err.(*toml.ParserError); ok {
   141  			shape := d.parser.Shape(d.parser.Range(err.Highlight))
   142  			return nil, d.posErrf(shape.Start, "%s", err.Message)
   143  		}
   144  		return nil, err
   145  	}
   146  	return d.topFile, nil
   147  }
   148  
   149  func (d *Decoder) nodeErrf(tnode *toml.Node, format string, args ...any) error {
   150  	if tnode.Raw.Length == 0 {
   151  		// Otherwise the Shape method call below happily returns a position like 1:1,
   152  		// which is worse than no position information as it confuses the user.
   153  		panic("Decoder.errf was given an empty toml.Node as position")
   154  	}
   155  	pos := d.parser.Shape(tnode.Raw).Start
   156  	return d.posErrf(pos, format, args...)
   157  }
   158  
   159  func (d *Decoder) posErrf(pos toml.Position, format string, args ...any) error {
   160  	return errors.Newf(d.tokenFile.Pos(pos.Offset, token.NoRelPos), format, args...)
   161  }
   162  
   163  // nextRootNode is called for every top-level expression from the TOML parser.
   164  //
   165  // This method does not return a syntax tree node directly,
   166  // because some kinds of top-level expressions like comments and table headers
   167  // require recording some state in the decoder to produce a node at a later time.
   168  func (d *Decoder) nextRootNode(tnode *toml.Node) error {
   169  	switch tnode.Kind {
   170  	// Key-Values in TOML are in the form of:
   171  	//
   172  	//   foo.title = "Foo"
   173  	//   foo.bar.baz = "value"
   174  	//
   175  	// We decode them as "inline" structs in CUE, which keeps the original shape:
   176  	//
   177  	//   foo: title: "Foo"
   178  	//   foo: bar: baz: "value"
   179  	//
   180  	// An alternative would be to join struct literals, which avoids some repetition,
   181  	// but also introduces extra lines and may break some comment positions:
   182  	//
   183  	//   foo: {
   184  	//       title: "Foo"
   185  	//       bar: baz: "value"
   186  	//   }
   187  	case toml.KeyValue:
   188  		// Top-level fields begin a new line.
   189  		field, err := d.decodeField(d.currentTableKey, tnode, token.Newline)
   190  		if err != nil {
   191  			return err
   192  		}
   193  		if d.currentTable != nil {
   194  			d.currentTable.Elts = append(d.currentTable.Elts, field)
   195  		} else {
   196  			d.topFile.Elts = append(d.topFile.Elts, field)
   197  		}
   198  
   199  	case toml.Table:
   200  		// Tables always begin a new line.
   201  		key, keyElems := decodeKey("", tnode.Key())
   202  		// All table keys must be unique, including for the top-level table.
   203  		if d.seenTableKeys[key] {
   204  			return d.nodeErrf(tnode.Child(), "duplicate key: %s", key)
   205  		}
   206  		d.seenTableKeys[key] = true
   207  
   208  		// We want a multi-line struct with curly braces,
   209  		// just like TOML's tables are on multiple lines.
   210  		d.currentTable = &ast.StructLit{
   211  			Lbrace: token.NoPos.WithRel(token.Blank),
   212  			Rbrace: token.NoPos.WithRel(token.Newline),
   213  		}
   214  		array := d.findArrayPrefix(key)
   215  		if array != nil { // [last_array.new_table]
   216  			if array.key == key {
   217  				return d.nodeErrf(tnode.Child(), "cannot redeclare table array %q as a table", key)
   218  			}
   219  			subKeyElems := keyElems[array.level:]
   220  			topField, leafField := inlineFields(subKeyElems, token.Newline)
   221  			array.lastTable.Elts = append(array.lastTable.Elts, topField)
   222  			leafField.Value = d.currentTable
   223  		} else { // [new_table]
   224  			topField, leafField := inlineFields(keyElems, token.Newline)
   225  			d.topFile.Elts = append(d.topFile.Elts, topField)
   226  			leafField.Value = d.currentTable
   227  		}
   228  		d.currentTableKey = key
   229  
   230  	case toml.ArrayTable:
   231  		// Table array elements always begin a new line.
   232  		key, keyElems := decodeKey("", tnode.Key())
   233  		if d.seenTableKeys[key] {
   234  			return d.nodeErrf(tnode.Child(), "cannot redeclare key %q as a table array", key)
   235  		}
   236  		// Each struct inside a table array sits on separate lines.
   237  		d.currentTable = &ast.StructLit{
   238  			Lbrace: token.NoPos.WithRel(token.Newline),
   239  			Rbrace: token.NoPos.WithRel(token.Newline),
   240  		}
   241  		if array := d.findArrayPrefix(key); array != nil && array.level == len(keyElems) {
   242  			// [[last_array]] - appending to an existing array.
   243  			d.currentTableKey = key + "." + strconv.Itoa(len(array.list.Elts))
   244  			array.lastTable = d.currentTable
   245  			array.list.Elts = append(array.list.Elts, d.currentTable)
   246  		} else {
   247  			// Creating a new array via either [[new_array]] or [[last_array.new_array]].
   248  			// We want a multi-line list with square braces,
   249  			// since TOML's table arrays are on multiple lines.
   250  			list := &ast.ListLit{
   251  				Lbrack: token.NoPos.WithRel(token.Blank),
   252  				Rbrack: token.NoPos.WithRel(token.Newline),
   253  			}
   254  			if array == nil {
   255  				// [[new_array]] - at the top level
   256  				topField, leafField := inlineFields(keyElems, token.Newline)
   257  				d.topFile.Elts = append(d.topFile.Elts, topField)
   258  				leafField.Value = list
   259  			} else {
   260  				// [[last_array.new_array]] - on the last array element
   261  				subKeyElems := keyElems[array.level:]
   262  				topField, leafField := inlineFields(subKeyElems, token.Newline)
   263  				array.lastTable.Elts = append(array.lastTable.Elts, topField)
   264  				leafField.Value = list
   265  			}
   266  
   267  			d.currentTableKey = key + ".0"
   268  			list.Elts = append(list.Elts, d.currentTable)
   269  			d.openTableArrays = append(d.openTableArrays, openTableArray{
   270  				key:       key,
   271  				level:     len(keyElems),
   272  				list:      list,
   273  				lastTable: d.currentTable,
   274  			})
   275  		}
   276  
   277  	default:
   278  		return fmt.Errorf("encoding/toml.Decoder.nextRootNode: unknown %s %#v", tnode.Kind, tnode)
   279  	}
   280  	return nil
   281  }
   282  
   283  // decodeField decodes a single table key and its value as a struct field.
   284  func (d *Decoder) decodeField(key rootedKey, tnode *toml.Node, relPos token.RelPos) (*ast.Field, error) {
   285  	key, keyElems := decodeKey(key, tnode.Key())
   286  	if d.findArray(key) != nil {
   287  		return nil, d.nodeErrf(tnode.Child().Next(), "cannot redeclare table array %q as a table", key)
   288  	}
   289  	topField, leafField := inlineFields(keyElems, relPos)
   290  	// All table keys must be unique, including inner table ones.
   291  	if d.seenTableKeys[key] {
   292  		return nil, d.nodeErrf(tnode.Child().Next(), "duplicate key: %s", key)
   293  	}
   294  	d.seenTableKeys[key] = true
   295  	value, err := d.decodeExpr(key, tnode.Value())
   296  	if err != nil {
   297  		return nil, err
   298  	}
   299  	leafField.Value = value
   300  	return topField, nil
   301  }
   302  
   303  // findArray returns an existing table array if one exists at exactly the given key.
   304  func (d *Decoder) findArray(key rootedKey) *openTableArray {
   305  	for i, arr := range d.openTableArrays {
   306  		if arr.key == key {
   307  			return &d.openTableArrays[i]
   308  		}
   309  	}
   310  	return nil
   311  }
   312  
   313  // findArray returns an existing table array if one exists at exactly the given key
   314  // or as a prefix to the given key.
   315  func (d *Decoder) findArrayPrefix(key rootedKey) *openTableArray {
   316  	// TODO(mvdan): see the performance TODO on [Decoder.openTableArrays].
   317  
   318  	// Prefer an exact match over a relative prefix match.
   319  	if arr := d.findArray(key); arr != nil {
   320  		return arr
   321  	}
   322  	// The longest relative key match wins.
   323  	maxLevel := 0
   324  	var maxLevelArr *openTableArray
   325  	for i, arr := range d.openTableArrays {
   326  		if strings.HasPrefix(key, arr.key+".") && arr.level > maxLevel {
   327  			maxLevel = arr.level
   328  			maxLevelArr = &d.openTableArrays[i]
   329  		}
   330  	}
   331  	if maxLevel > 0 {
   332  		return maxLevelArr
   333  	}
   334  	return nil
   335  }
   336  
   337  // decodeKey extracts a rootedKey from a TOML node key iterator,
   338  // appending to the given parent key and returning the unquoted string elements.
   339  func decodeKey(key rootedKey, iter toml.Iterator) (rootedKey, []string) {
   340  	var elems []string
   341  	for iter.Next() {
   342  		name := string(iter.Node().Data)
   343  		// TODO(mvdan): use an append-like API once we have benchmarks
   344  		if len(key) > 0 {
   345  			key += "."
   346  		}
   347  		key += quoteLabelIfNeeded(name)
   348  		elems = append(elems, name)
   349  	}
   350  	return key, elems
   351  }
   352  
   353  // inlineFields constructs a single-line chain of CUE fields joined with structs,
   354  // so that an input like:
   355  //
   356  //	["foo", "bar.baz", "zzz"]
   357  //
   358  // results in the CUE fields:
   359  //
   360  //	foo: "bar.baz": zzz: <nil>
   361  //
   362  // The "top" field, in this case "foo", can then be added as an element to a struct.
   363  // The "leaf" field, in this case "zzz", leaves its value as nil to be filled out.
   364  func inlineFields(names []string, relPos token.RelPos) (top, leaf *ast.Field) {
   365  	curField := &ast.Field{
   366  		Label: label(names[0], token.NoPos.WithRel(relPos)),
   367  	}
   368  
   369  	topField := curField
   370  	for _, elem := range names[1:] {
   371  		nextField := &ast.Field{
   372  			Label: label(elem, token.NoPos.WithRel(token.Blank)), // on the same line
   373  		}
   374  		curField.Value = &ast.StructLit{Elts: []ast.Decl{nextField}}
   375  		curField = nextField
   376  	}
   377  	return topField, curField
   378  }
   379  
   380  // quoteLabelIfNeeded quotes a label name only if it needs quoting.
   381  //
   382  // TODO(mvdan): this exists in multiple packages; move to cue/literal or cue/ast?
   383  func quoteLabelIfNeeded(name string) string {
   384  	if ast.IsValidIdent(name) {
   385  		return name
   386  	}
   387  	return literal.Label.Quote(name)
   388  }
   389  
   390  // label creates an ast.Label that represents a key with exactly the literal string name.
   391  // This means a quoted string literal for the key "_", as TOML never means "top",
   392  // as well as for any keys beginning with an underscore, as we don't want to hide any fields.
   393  // cue/format knows how to quote any other identifiers correctly.
   394  func label(name string, pos token.Pos) ast.Label {
   395  	if strings.HasPrefix(name, "_") {
   396  		return &ast.BasicLit{
   397  			ValuePos: pos,
   398  			Kind:     token.STRING,
   399  			Value:    literal.String.Quote(name),
   400  		}
   401  	}
   402  	return &ast.Ident{
   403  		NamePos: pos,
   404  		Name:    name,
   405  	}
   406  }
   407  
   408  // decodeExpr decodes a single TOML value expression, found on the right side
   409  // of a `key = value` line.
   410  func (d *Decoder) decodeExpr(key rootedKey, tnode *toml.Node) (ast.Expr, error) {
   411  	// TODO(mvdan): we currently assume that TOML basic literals (string, int, float)
   412  	// are also valid CUE literals; we should double check this, perhaps via fuzzing.
   413  	data := string(tnode.Data)
   414  	switch tnode.Kind {
   415  	case toml.String:
   416  		return ast.NewString(data), nil
   417  	case toml.Integer:
   418  		return ast.NewLit(token.INT, data), nil
   419  	case toml.Float:
   420  		return ast.NewLit(token.FLOAT, data), nil
   421  	case toml.Bool:
   422  		return ast.NewBool(data == "true"), nil
   423  	case toml.Array:
   424  		list := &ast.ListLit{}
   425  		elems := tnode.Children()
   426  		for elems.Next() {
   427  			key := key + "." + strconv.Itoa(len(list.Elts))
   428  			elem, err := d.decodeExpr(key, elems.Node())
   429  			if err != nil {
   430  				return nil, err
   431  			}
   432  			list.Elts = append(list.Elts, elem)
   433  		}
   434  		return list, nil
   435  	case toml.InlineTable:
   436  		strct := &ast.StructLit{
   437  			// We want a single-line struct, just like TOML's inline tables are on a single line.
   438  			Lbrace: token.NoPos.WithRel(token.Blank),
   439  			Rbrace: token.NoPos.WithRel(token.Blank),
   440  		}
   441  		elems := tnode.Children()
   442  		for elems.Next() {
   443  			// Inline table fields are on the same line.
   444  			field, err := d.decodeField(key, elems.Node(), token.Blank)
   445  			if err != nil {
   446  				return nil, err
   447  			}
   448  			strct.Elts = append(strct.Elts, field)
   449  		}
   450  		return strct, nil
   451  	// TODO(mvdan): dates and times
   452  	default:
   453  		return nil, fmt.Errorf("encoding/toml.Decoder.decodeExpr: unknown %s %#v", tnode.Kind, tnode)
   454  	}
   455  }