cuelang.org/go@v0.10.1/encoding/toml/decode.go (about) 1 // Copyright 2024 The CUE Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package toml converts TOML to and from CUE. 16 // 17 // WARNING: THIS PACKAGE IS EXPERIMENTAL. 18 // ITS API MAY CHANGE AT ANY TIME. 19 package toml 20 21 import ( 22 "fmt" 23 "io" 24 "strconv" 25 "strings" 26 27 toml "github.com/pelletier/go-toml/v2/unstable" 28 29 "cuelang.org/go/cue/ast" 30 "cuelang.org/go/cue/errors" 31 "cuelang.org/go/cue/literal" 32 "cuelang.org/go/cue/token" 33 ) 34 35 // TODO(mvdan): schema and decode options 36 37 // NewDecoder creates a decoder from a stream of TOML input. 38 func NewDecoder(filename string, r io.Reader) *Decoder { 39 // Note that we don't consume the reader here, 40 // as there's no need, and we can't return an error either. 41 return &Decoder{r: r, filename: filename, seenTableKeys: make(map[string]bool)} 42 } 43 44 // Decoder implements the decoding state. 45 // 46 // Note that TOML files and streams never decode multiple CUE nodes; 47 // subsequent calls to [Decoder.Decode] may return [io.EOF]. 48 type Decoder struct { 49 r io.Reader 50 51 filename string 52 53 decoded bool // whether [Decoder.Decoded] has been called already 54 parser toml.Parser 55 56 // seenTableKeys tracks which rooted keys we have already decoded as tables, 57 // as duplicate table keys in TOML are not allowed. 58 seenTableKeys map[rootedKey]bool 59 60 // topFile is the top-level CUE file we are decoding into. 61 // TODO(mvdan): make an *ast.File once the decoder returns ast.Node rather than ast.Expr. 62 topFile *ast.StructLit 63 64 // tokenFile is used to create positions which can be used for error values and syntax tree nodes. 65 tokenFile *token.File 66 67 // openTableArrays keeps track of all the declared table arrays so that 68 // later headers can append a new table array element, or add a field 69 // to the last element in a table array. 70 // 71 // TODO(mvdan): an unsorted slice means we do two linear searches per header key. 72 // For N distinct `[[keys]]`, this means a decoding runtime of O(2*N*N). 73 // Consider either sorting this array so we can do a binary search for O(N*log2(N)), 74 // or perhaps a tree, although for a nesting level D, that could cause O(N*D), 75 // and a tree would use more slices and so more allocations. 76 // 77 // Note that a map is not a good option either, because even though it makes 78 // exact lookups cheap, prefix matches are still linear and relatively slow. 79 // A sorted slice allows both mechanisms to use a form of binary search. 80 openTableArrays []openTableArray 81 82 // currentTableKey is the rooted key for the current table where the following 83 // TOML `key = value` lines will be inserted. 84 currentTableKey rootedKey 85 86 // currentTable is the CUE struct literal for currentTableKey. 87 // It is nil before the first [header] or [[header]], 88 // in which case any key-values are inserted in topFile. 89 currentTable *ast.StructLit 90 } 91 92 // rootedKey is a dot-separated path from the root of the TOML document. 93 // The string elements in between the dots may be quoted to avoid ambiguity. 94 // For the time being, this is just an alias for the sake of documentation. 95 // 96 // A path into an array element is like "arr.3", 97 // which looks very similar to a table's "tbl.key", 98 // particularly since a table key can be any string. 99 // However, we just need these keys to detect duplicates, 100 // and a path cannot be both an array and table, so it's OK. 101 type rootedKey = string 102 103 // openTableArray records information about a declared table array. 104 type openTableArray struct { 105 key rootedKey 106 level int // the level of nesting, 1 or higher, e.g. 2 for key="foo.bar" 107 list *ast.ListLit 108 lastTable *ast.StructLit 109 } 110 111 // TODO(mvdan): support decoding comments 112 // TODO(mvdan): support ast.Node positions 113 114 // Decode parses the input stream as TOML and converts it to a CUE [*ast.File]. 115 // Because TOML files only contain a single top-level expression, 116 // subsequent calls to this method may return [io.EOF]. 117 func (d *Decoder) Decode() (ast.Expr, error) { 118 if d.decoded { 119 return nil, io.EOF 120 } 121 d.decoded = true 122 // TODO(mvdan): unfortunately go-toml does not support streaming as of v2.2.2. 123 data, err := io.ReadAll(d.r) 124 if err != nil { 125 return nil, err 126 } 127 d.tokenFile = token.NewFile(d.filename, 0, len(data)) 128 d.tokenFile.SetLinesForContent(data) 129 d.parser.Reset(data) 130 // Note that if the input is empty the result will be the same 131 // as for an empty table: an empty struct. 132 // The TOML spec and other decoders also work this way. 133 d.topFile = &ast.StructLit{} 134 for d.parser.NextExpression() { 135 if err := d.nextRootNode(d.parser.Expression()); err != nil { 136 return nil, err 137 } 138 } 139 if err := d.parser.Error(); err != nil { 140 if err, ok := err.(*toml.ParserError); ok { 141 shape := d.parser.Shape(d.parser.Range(err.Highlight)) 142 return nil, d.posErrf(shape.Start, "%s", err.Message) 143 } 144 return nil, err 145 } 146 return d.topFile, nil 147 } 148 149 func (d *Decoder) nodeErrf(tnode *toml.Node, format string, args ...any) error { 150 if tnode.Raw.Length == 0 { 151 // Otherwise the Shape method call below happily returns a position like 1:1, 152 // which is worse than no position information as it confuses the user. 153 panic("Decoder.errf was given an empty toml.Node as position") 154 } 155 pos := d.parser.Shape(tnode.Raw).Start 156 return d.posErrf(pos, format, args...) 157 } 158 159 func (d *Decoder) posErrf(pos toml.Position, format string, args ...any) error { 160 return errors.Newf(d.tokenFile.Pos(pos.Offset, token.NoRelPos), format, args...) 161 } 162 163 // nextRootNode is called for every top-level expression from the TOML parser. 164 // 165 // This method does not return a syntax tree node directly, 166 // because some kinds of top-level expressions like comments and table headers 167 // require recording some state in the decoder to produce a node at a later time. 168 func (d *Decoder) nextRootNode(tnode *toml.Node) error { 169 switch tnode.Kind { 170 // Key-Values in TOML are in the form of: 171 // 172 // foo.title = "Foo" 173 // foo.bar.baz = "value" 174 // 175 // We decode them as "inline" structs in CUE, which keeps the original shape: 176 // 177 // foo: title: "Foo" 178 // foo: bar: baz: "value" 179 // 180 // An alternative would be to join struct literals, which avoids some repetition, 181 // but also introduces extra lines and may break some comment positions: 182 // 183 // foo: { 184 // title: "Foo" 185 // bar: baz: "value" 186 // } 187 case toml.KeyValue: 188 // Top-level fields begin a new line. 189 field, err := d.decodeField(d.currentTableKey, tnode, token.Newline) 190 if err != nil { 191 return err 192 } 193 if d.currentTable != nil { 194 d.currentTable.Elts = append(d.currentTable.Elts, field) 195 } else { 196 d.topFile.Elts = append(d.topFile.Elts, field) 197 } 198 199 case toml.Table: 200 // Tables always begin a new line. 201 key, keyElems := decodeKey("", tnode.Key()) 202 // All table keys must be unique, including for the top-level table. 203 if d.seenTableKeys[key] { 204 return d.nodeErrf(tnode.Child(), "duplicate key: %s", key) 205 } 206 d.seenTableKeys[key] = true 207 208 // We want a multi-line struct with curly braces, 209 // just like TOML's tables are on multiple lines. 210 d.currentTable = &ast.StructLit{ 211 Lbrace: token.NoPos.WithRel(token.Blank), 212 Rbrace: token.NoPos.WithRel(token.Newline), 213 } 214 array := d.findArrayPrefix(key) 215 if array != nil { // [last_array.new_table] 216 if array.key == key { 217 return d.nodeErrf(tnode.Child(), "cannot redeclare table array %q as a table", key) 218 } 219 subKeyElems := keyElems[array.level:] 220 topField, leafField := inlineFields(subKeyElems, token.Newline) 221 array.lastTable.Elts = append(array.lastTable.Elts, topField) 222 leafField.Value = d.currentTable 223 } else { // [new_table] 224 topField, leafField := inlineFields(keyElems, token.Newline) 225 d.topFile.Elts = append(d.topFile.Elts, topField) 226 leafField.Value = d.currentTable 227 } 228 d.currentTableKey = key 229 230 case toml.ArrayTable: 231 // Table array elements always begin a new line. 232 key, keyElems := decodeKey("", tnode.Key()) 233 if d.seenTableKeys[key] { 234 return d.nodeErrf(tnode.Child(), "cannot redeclare key %q as a table array", key) 235 } 236 // Each struct inside a table array sits on separate lines. 237 d.currentTable = &ast.StructLit{ 238 Lbrace: token.NoPos.WithRel(token.Newline), 239 Rbrace: token.NoPos.WithRel(token.Newline), 240 } 241 if array := d.findArrayPrefix(key); array != nil && array.level == len(keyElems) { 242 // [[last_array]] - appending to an existing array. 243 d.currentTableKey = key + "." + strconv.Itoa(len(array.list.Elts)) 244 array.lastTable = d.currentTable 245 array.list.Elts = append(array.list.Elts, d.currentTable) 246 } else { 247 // Creating a new array via either [[new_array]] or [[last_array.new_array]]. 248 // We want a multi-line list with square braces, 249 // since TOML's table arrays are on multiple lines. 250 list := &ast.ListLit{ 251 Lbrack: token.NoPos.WithRel(token.Blank), 252 Rbrack: token.NoPos.WithRel(token.Newline), 253 } 254 if array == nil { 255 // [[new_array]] - at the top level 256 topField, leafField := inlineFields(keyElems, token.Newline) 257 d.topFile.Elts = append(d.topFile.Elts, topField) 258 leafField.Value = list 259 } else { 260 // [[last_array.new_array]] - on the last array element 261 subKeyElems := keyElems[array.level:] 262 topField, leafField := inlineFields(subKeyElems, token.Newline) 263 array.lastTable.Elts = append(array.lastTable.Elts, topField) 264 leafField.Value = list 265 } 266 267 d.currentTableKey = key + ".0" 268 list.Elts = append(list.Elts, d.currentTable) 269 d.openTableArrays = append(d.openTableArrays, openTableArray{ 270 key: key, 271 level: len(keyElems), 272 list: list, 273 lastTable: d.currentTable, 274 }) 275 } 276 277 default: 278 return fmt.Errorf("encoding/toml.Decoder.nextRootNode: unknown %s %#v", tnode.Kind, tnode) 279 } 280 return nil 281 } 282 283 // decodeField decodes a single table key and its value as a struct field. 284 func (d *Decoder) decodeField(key rootedKey, tnode *toml.Node, relPos token.RelPos) (*ast.Field, error) { 285 key, keyElems := decodeKey(key, tnode.Key()) 286 if d.findArray(key) != nil { 287 return nil, d.nodeErrf(tnode.Child().Next(), "cannot redeclare table array %q as a table", key) 288 } 289 topField, leafField := inlineFields(keyElems, relPos) 290 // All table keys must be unique, including inner table ones. 291 if d.seenTableKeys[key] { 292 return nil, d.nodeErrf(tnode.Child().Next(), "duplicate key: %s", key) 293 } 294 d.seenTableKeys[key] = true 295 value, err := d.decodeExpr(key, tnode.Value()) 296 if err != nil { 297 return nil, err 298 } 299 leafField.Value = value 300 return topField, nil 301 } 302 303 // findArray returns an existing table array if one exists at exactly the given key. 304 func (d *Decoder) findArray(key rootedKey) *openTableArray { 305 for i, arr := range d.openTableArrays { 306 if arr.key == key { 307 return &d.openTableArrays[i] 308 } 309 } 310 return nil 311 } 312 313 // findArray returns an existing table array if one exists at exactly the given key 314 // or as a prefix to the given key. 315 func (d *Decoder) findArrayPrefix(key rootedKey) *openTableArray { 316 // TODO(mvdan): see the performance TODO on [Decoder.openTableArrays]. 317 318 // Prefer an exact match over a relative prefix match. 319 if arr := d.findArray(key); arr != nil { 320 return arr 321 } 322 // The longest relative key match wins. 323 maxLevel := 0 324 var maxLevelArr *openTableArray 325 for i, arr := range d.openTableArrays { 326 if strings.HasPrefix(key, arr.key+".") && arr.level > maxLevel { 327 maxLevel = arr.level 328 maxLevelArr = &d.openTableArrays[i] 329 } 330 } 331 if maxLevel > 0 { 332 return maxLevelArr 333 } 334 return nil 335 } 336 337 // decodeKey extracts a rootedKey from a TOML node key iterator, 338 // appending to the given parent key and returning the unquoted string elements. 339 func decodeKey(key rootedKey, iter toml.Iterator) (rootedKey, []string) { 340 var elems []string 341 for iter.Next() { 342 name := string(iter.Node().Data) 343 // TODO(mvdan): use an append-like API once we have benchmarks 344 if len(key) > 0 { 345 key += "." 346 } 347 key += quoteLabelIfNeeded(name) 348 elems = append(elems, name) 349 } 350 return key, elems 351 } 352 353 // inlineFields constructs a single-line chain of CUE fields joined with structs, 354 // so that an input like: 355 // 356 // ["foo", "bar.baz", "zzz"] 357 // 358 // results in the CUE fields: 359 // 360 // foo: "bar.baz": zzz: <nil> 361 // 362 // The "top" field, in this case "foo", can then be added as an element to a struct. 363 // The "leaf" field, in this case "zzz", leaves its value as nil to be filled out. 364 func inlineFields(names []string, relPos token.RelPos) (top, leaf *ast.Field) { 365 curField := &ast.Field{ 366 Label: label(names[0], token.NoPos.WithRel(relPos)), 367 } 368 369 topField := curField 370 for _, elem := range names[1:] { 371 nextField := &ast.Field{ 372 Label: label(elem, token.NoPos.WithRel(token.Blank)), // on the same line 373 } 374 curField.Value = &ast.StructLit{Elts: []ast.Decl{nextField}} 375 curField = nextField 376 } 377 return topField, curField 378 } 379 380 // quoteLabelIfNeeded quotes a label name only if it needs quoting. 381 // 382 // TODO(mvdan): this exists in multiple packages; move to cue/literal or cue/ast? 383 func quoteLabelIfNeeded(name string) string { 384 if ast.IsValidIdent(name) { 385 return name 386 } 387 return literal.Label.Quote(name) 388 } 389 390 // label creates an ast.Label that represents a key with exactly the literal string name. 391 // This means a quoted string literal for the key "_", as TOML never means "top", 392 // as well as for any keys beginning with an underscore, as we don't want to hide any fields. 393 // cue/format knows how to quote any other identifiers correctly. 394 func label(name string, pos token.Pos) ast.Label { 395 if strings.HasPrefix(name, "_") { 396 return &ast.BasicLit{ 397 ValuePos: pos, 398 Kind: token.STRING, 399 Value: literal.String.Quote(name), 400 } 401 } 402 return &ast.Ident{ 403 NamePos: pos, 404 Name: name, 405 } 406 } 407 408 // decodeExpr decodes a single TOML value expression, found on the right side 409 // of a `key = value` line. 410 func (d *Decoder) decodeExpr(key rootedKey, tnode *toml.Node) (ast.Expr, error) { 411 // TODO(mvdan): we currently assume that TOML basic literals (string, int, float) 412 // are also valid CUE literals; we should double check this, perhaps via fuzzing. 413 data := string(tnode.Data) 414 switch tnode.Kind { 415 case toml.String: 416 return ast.NewString(data), nil 417 case toml.Integer: 418 return ast.NewLit(token.INT, data), nil 419 case toml.Float: 420 return ast.NewLit(token.FLOAT, data), nil 421 case toml.Bool: 422 return ast.NewBool(data == "true"), nil 423 case toml.Array: 424 list := &ast.ListLit{} 425 elems := tnode.Children() 426 for elems.Next() { 427 key := key + "." + strconv.Itoa(len(list.Elts)) 428 elem, err := d.decodeExpr(key, elems.Node()) 429 if err != nil { 430 return nil, err 431 } 432 list.Elts = append(list.Elts, elem) 433 } 434 return list, nil 435 case toml.InlineTable: 436 strct := &ast.StructLit{ 437 // We want a single-line struct, just like TOML's inline tables are on a single line. 438 Lbrace: token.NoPos.WithRel(token.Blank), 439 Rbrace: token.NoPos.WithRel(token.Blank), 440 } 441 elems := tnode.Children() 442 for elems.Next() { 443 // Inline table fields are on the same line. 444 field, err := d.decodeField(key, elems.Node(), token.Blank) 445 if err != nil { 446 return nil, err 447 } 448 strct.Elts = append(strct.Elts, field) 449 } 450 return strct, nil 451 // TODO(mvdan): dates and times 452 default: 453 return nil, fmt.Errorf("encoding/toml.Decoder.decodeExpr: unknown %s %#v", tnode.Kind, tnode) 454 } 455 }