cuelang.org/go@v0.13.0/encoding/toml/decode.go (about) 1 // Copyright 2024 The CUE Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package toml converts TOML to and from CUE. 16 // 17 // WARNING: THIS PACKAGE IS EXPERIMENTAL. 18 // ITS API MAY CHANGE AT ANY TIME. 19 package toml 20 21 import ( 22 "fmt" 23 "io" 24 "strconv" 25 "strings" 26 "time" 27 28 toml "github.com/pelletier/go-toml/v2/unstable" 29 30 "cuelang.org/go/cue/ast" 31 "cuelang.org/go/cue/errors" 32 "cuelang.org/go/cue/literal" 33 "cuelang.org/go/cue/token" 34 ) 35 36 // TODO(mvdan): schema and decode options 37 38 // NewDecoder creates a decoder from a stream of TOML input. 39 func NewDecoder(filename string, r io.Reader) *Decoder { 40 // Note that we don't consume the reader here, 41 // as there's no need, and we can't return an error either. 42 return &Decoder{r: r, filename: filename, seenTableKeys: make(map[string]bool)} 43 } 44 45 // Decoder implements the decoding state. 46 // 47 // Note that TOML files and streams never decode multiple CUE nodes; 48 // subsequent calls to [Decoder.Decode] may return [io.EOF]. 49 type Decoder struct { 50 r io.Reader 51 52 filename string 53 54 decoded bool // whether [Decoder.Decoded] has been called already 55 parser toml.Parser 56 57 // seenTableKeys tracks which rooted keys we have already decoded as tables, 58 // as duplicate table keys in TOML are not allowed. 59 seenTableKeys map[rootedKey]bool 60 61 // topFile is the top-level CUE file we are decoding into. 62 // TODO(mvdan): make an *ast.File once the decoder returns ast.Node rather than ast.Expr. 63 topFile *ast.StructLit 64 65 // tokenFile is used to create positions which can be used for error values and syntax tree nodes. 66 tokenFile *token.File 67 68 // openTableArrays keeps track of all the declared table arrays so that 69 // later headers can append a new table array element, or add a field 70 // to the last element in a table array. 71 // 72 // TODO(mvdan): an unsorted slice means we do two linear searches per header key. 73 // For N distinct `[[keys]]`, this means a decoding runtime of O(2*N*N). 74 // Consider either sorting this array so we can do a binary search for O(N*log2(N)), 75 // or perhaps a tree, although for a nesting level D, that could cause O(N*D), 76 // and a tree would use more slices and so more allocations. 77 // 78 // Note that a map is not a good option either, because even though it makes 79 // exact lookups cheap, prefix matches are still linear and relatively slow. 80 // A sorted slice allows both mechanisms to use a form of binary search. 81 openTableArrays []openTableArray 82 83 // currentTableKey is the rooted key for the current table where the following 84 // TOML `key = value` lines will be inserted. 85 currentTableKey rootedKey 86 87 // currentTable is the CUE struct literal for currentTableKey. 88 // It is nil before the first [header] or [[header]], 89 // in which case any key-values are inserted in topFile. 90 currentTable *ast.StructLit 91 } 92 93 // rootedKey is a dot-separated path from the root of the TOML document. 94 // The string elements in between the dots may be quoted to avoid ambiguity. 95 // For the time being, this is just an alias for the sake of documentation. 96 // 97 // A path into an array element is like "arr.3", 98 // which looks very similar to a table's "tbl.key", 99 // particularly since a table key can be any string. 100 // However, we just need these keys to detect duplicates, 101 // and a path cannot be both an array and table, so it's OK. 102 type rootedKey = string 103 104 // openTableArray records information about a declared table array. 105 type openTableArray struct { 106 rkey rootedKey 107 level int // the level of nesting, 1 or higher, e.g. 2 for key="foo.bar" 108 list *ast.ListLit 109 lastTable *ast.StructLit 110 } 111 112 // TODO(mvdan): support decoding comments 113 114 // Decode parses the input stream as TOML and converts it to a CUE [*ast.File]. 115 // Because TOML files only contain a single top-level expression, 116 // subsequent calls to this method may return [io.EOF]. 117 func (d *Decoder) Decode() (ast.Expr, error) { 118 if d.decoded { 119 return nil, io.EOF 120 } 121 d.decoded = true 122 // TODO(mvdan): unfortunately go-toml does not support streaming as of v2.2.2. 123 data, err := io.ReadAll(d.r) 124 if err != nil { 125 return nil, err 126 } 127 d.tokenFile = token.NewFile(d.filename, 0, len(data)) 128 d.tokenFile.SetLinesForContent(data) 129 d.parser.Reset(data) 130 // Note that if the input is empty the result will be the same 131 // as for an empty table: an empty struct. 132 // The TOML spec and other decoders also work this way. 133 d.topFile = &ast.StructLit{} 134 for d.parser.NextExpression() { 135 if err := d.nextRootNode(d.parser.Expression()); err != nil { 136 return nil, err 137 } 138 } 139 if err := d.parser.Error(); err != nil { 140 if err, ok := err.(*toml.ParserError); ok { 141 shape := d.parser.Shape(d.parser.Range(err.Highlight)) 142 return nil, d.posErrf(shape.Start, "%s", err.Message) 143 } 144 return nil, err 145 } 146 return d.topFile, nil 147 } 148 149 func (d *Decoder) shape(tnode *toml.Node) toml.Shape { 150 if tnode.Raw.Length == 0 { 151 // Otherwise the Shape method call below happily returns a position like 1:1, 152 // which is worse than no position information as it confuses the user. 153 panic("Decoder.nodePos was given an empty toml.Node as position") 154 } 155 return d.parser.Shape(tnode.Raw) 156 } 157 158 func (d *Decoder) nodeErrf(tnode *toml.Node, format string, args ...any) error { 159 return d.posErrf(d.shape(tnode).Start, format, args...) 160 } 161 162 func (d *Decoder) posErrf(pos toml.Position, format string, args ...any) error { 163 return errors.Newf(d.tokenFile.Pos(pos.Offset, token.NoRelPos), format, args...) 164 } 165 166 // nextRootNode is called for every top-level expression from the TOML parser. 167 // 168 // This method does not return a syntax tree node directly, 169 // because some kinds of top-level expressions like comments and table headers 170 // require recording some state in the decoder to produce a node at a later time. 171 func (d *Decoder) nextRootNode(tnode *toml.Node) error { 172 switch tnode.Kind { 173 // Key-Values in TOML are in the form of: 174 // 175 // foo.title = "Foo" 176 // foo.bar.baz = "value" 177 // 178 // We decode them as "inline" structs in CUE, which keeps the original shape: 179 // 180 // foo: title: "Foo" 181 // foo: bar: baz: "value" 182 // 183 // An alternative would be to join struct literals, which avoids some repetition, 184 // but also introduces extra lines and may break some comment positions: 185 // 186 // foo: { 187 // title: "Foo" 188 // bar: baz: "value" 189 // } 190 case toml.KeyValue: 191 // Top-level fields begin a new line. 192 field, err := d.decodeField(d.currentTableKey, tnode, token.Newline) 193 if err != nil { 194 return err 195 } 196 if d.currentTable != nil { 197 d.currentTable.Elts = append(d.currentTable.Elts, field) 198 } else { 199 d.topFile.Elts = append(d.topFile.Elts, field) 200 } 201 202 case toml.Table: 203 // Tables always begin a new line. 204 key, keyElems := d.decodeKey("", tnode.Key()) 205 // All table keys must be unique, including for the top-level table. 206 if d.seenTableKeys[key] { 207 return d.nodeErrf(tnode.Child(), "duplicate key: %s", key) 208 } 209 d.seenTableKeys[key] = true 210 211 // We want a multi-line struct with curly braces, 212 // just like TOML's tables are on multiple lines. 213 d.currentTable = &ast.StructLit{ 214 // No positions, as TOML doesn't have table delimiters. 215 Lbrace: token.NoPos.WithRel(token.Blank), 216 Rbrace: token.NoPos.WithRel(token.Newline), 217 } 218 array := d.findArrayPrefix(key) 219 if array != nil { // [last_array.new_table] 220 if array.rkey == key { 221 return d.nodeErrf(tnode.Child(), "cannot redeclare table array %q as a table", key) 222 } 223 subKeyElems := keyElems[array.level:] 224 topField, leafField := d.inlineFields(subKeyElems, token.Newline) 225 array.lastTable.Elts = append(array.lastTable.Elts, topField) 226 leafField.Value = d.currentTable 227 } else { // [new_table] 228 topField, leafField := d.inlineFields(keyElems, token.Newline) 229 d.topFile.Elts = append(d.topFile.Elts, topField) 230 leafField.Value = d.currentTable 231 } 232 d.currentTableKey = key 233 234 case toml.ArrayTable: 235 // Table array elements always begin a new line. 236 key, keyElems := d.decodeKey("", tnode.Key()) 237 if d.seenTableKeys[key] { 238 return d.nodeErrf(tnode.Child(), "cannot redeclare key %q as a table array", key) 239 } 240 // Each struct inside a table array sits on separate lines. 241 d.currentTable = &ast.StructLit{ 242 // No positions, as TOML doesn't have table delimiters. 243 Lbrace: token.NoPos.WithRel(token.Newline), 244 Rbrace: token.NoPos.WithRel(token.Newline), 245 } 246 if array := d.findArrayPrefix(key); array != nil && array.level == len(keyElems) { 247 // [[last_array]] - appending to an existing array. 248 d.currentTableKey = key + "." + strconv.Itoa(len(array.list.Elts)) 249 array.lastTable = d.currentTable 250 array.list.Elts = append(array.list.Elts, d.currentTable) 251 } else { 252 // Creating a new array via either [[new_array]] or [[last_array.new_array]]. 253 // We want a multi-line list with square braces, 254 // since TOML's table arrays are on multiple lines. 255 list := &ast.ListLit{ 256 // No positions, as TOML doesn't have array table delimiters. 257 Lbrack: token.NoPos.WithRel(token.Blank), 258 Rbrack: token.NoPos.WithRel(token.Newline), 259 } 260 if array == nil { 261 // [[new_array]] - at the top level 262 topField, leafField := d.inlineFields(keyElems, token.Newline) 263 d.topFile.Elts = append(d.topFile.Elts, topField) 264 leafField.Value = list 265 } else { 266 // [[last_array.new_array]] - on the last array element 267 subKeyElems := keyElems[array.level:] 268 topField, leafField := d.inlineFields(subKeyElems, token.Newline) 269 array.lastTable.Elts = append(array.lastTable.Elts, topField) 270 leafField.Value = list 271 } 272 273 d.currentTableKey = key + ".0" 274 list.Elts = append(list.Elts, d.currentTable) 275 d.openTableArrays = append(d.openTableArrays, openTableArray{ 276 rkey: key, 277 level: len(keyElems), 278 list: list, 279 lastTable: d.currentTable, 280 }) 281 } 282 283 default: 284 return fmt.Errorf("encoding/toml.Decoder.nextRootNode: unknown %s %#v", tnode.Kind, tnode) 285 } 286 return nil 287 } 288 289 // decodeField decodes a single table key and its value as a struct field. 290 func (d *Decoder) decodeField(rkey rootedKey, tnode *toml.Node, relPos token.RelPos) (*ast.Field, error) { 291 rkey, keyElems := d.decodeKey(rkey, tnode.Key()) 292 if d.findArray(rkey) != nil { 293 return nil, d.nodeErrf(tnode.Child().Next(), "cannot redeclare table array %q as a table", rkey) 294 } 295 topField, leafField := d.inlineFields(keyElems, relPos) 296 // All table keys must be unique, including inner table ones. 297 if d.seenTableKeys[rkey] { 298 return nil, d.nodeErrf(tnode.Child().Next(), "duplicate key: %s", rkey) 299 } 300 d.seenTableKeys[rkey] = true 301 value, err := d.decodeExpr(rkey, tnode.Value()) 302 if err != nil { 303 return nil, err 304 } 305 leafField.Value = value 306 return topField, nil 307 } 308 309 // findArray returns an existing table array if one exists at exactly the given key. 310 func (d *Decoder) findArray(rkey rootedKey) *openTableArray { 311 for i, arr := range d.openTableArrays { 312 if arr.rkey == rkey { 313 return &d.openTableArrays[i] 314 } 315 } 316 return nil 317 } 318 319 // findArray returns an existing table array if one exists at exactly the given key 320 // or as a prefix to the given key. 321 func (d *Decoder) findArrayPrefix(rkey rootedKey) *openTableArray { 322 // TODO(mvdan): see the performance TODO on [Decoder.openTableArrays]. 323 324 // Prefer an exact match over a relative prefix match. 325 if arr := d.findArray(rkey); arr != nil { 326 return arr 327 } 328 // The longest relative key match wins. 329 maxLevel := 0 330 var maxLevelArr *openTableArray 331 for i, arr := range d.openTableArrays { 332 if strings.HasPrefix(rkey, arr.rkey+".") && arr.level > maxLevel { 333 maxLevel = arr.level 334 maxLevelArr = &d.openTableArrays[i] 335 } 336 } 337 if maxLevel > 0 { 338 return maxLevelArr 339 } 340 return nil 341 } 342 343 // tomlKey represents a name with a position which forms part of a TOML dotted key, 344 // such as "foo" from "[foo.bar.baz]". 345 type tomlKey struct { 346 name string 347 shape toml.Shape 348 } 349 350 // decodeKey extracts a rootedKey from a TOML node key iterator, 351 // appending to the given parent key and returning the unquoted string elements. 352 func (d *Decoder) decodeKey(rkey rootedKey, iter toml.Iterator) (rootedKey, []tomlKey) { 353 var elems []tomlKey 354 for iter.Next() { 355 node := iter.Node() 356 name := string(node.Data) 357 // TODO(mvdan): use an append-like API once we have benchmarks 358 if len(rkey) > 0 { 359 rkey += "." 360 } 361 rkey += quoteLabelIfNeeded(name) 362 elems = append(elems, tomlKey{name, d.shape(node)}) 363 } 364 return rkey, elems 365 } 366 367 // inlineFields constructs a single-line chain of CUE fields joined with structs, 368 // so that an input like: 369 // 370 // ["foo", "bar.baz", "zzz"] 371 // 372 // results in the CUE fields: 373 // 374 // foo: "bar.baz": zzz: <nil> 375 // 376 // The "top" field, in this case "foo", can then be added as an element to a struct. 377 // The "leaf" field, in this case "zzz", leaves its value as nil to be filled out. 378 func (d *Decoder) inlineFields(tkeys []tomlKey, relPos token.RelPos) (top, leaf *ast.Field) { 379 curField := &ast.Field{ 380 Label: d.label(tkeys[0], relPos), 381 } 382 383 topField := curField 384 for _, tkey := range tkeys[1:] { 385 nextField := &ast.Field{ 386 Label: d.label(tkey, token.Blank), // on the same line 387 } 388 curField.Value = &ast.StructLit{Elts: []ast.Decl{nextField}} 389 curField = nextField 390 } 391 return topField, curField 392 } 393 394 // quoteLabelIfNeeded quotes a label name only if it needs quoting. 395 // 396 // TODO(mvdan): this exists in multiple packages; move to cue/literal or cue/ast? 397 func quoteLabelIfNeeded(name string) string { 398 if ast.IsValidIdent(name) { 399 return name 400 } 401 return literal.Label.Quote(name) 402 } 403 404 // label creates an ast.Label that represents a key with exactly the literal string name. 405 // This means a quoted string literal for the key "_", as TOML never means "top", 406 // as well as for any keys beginning with an underscore, as we don't want to hide any fields. 407 // cue/format knows how to quote any other identifiers correctly. 408 func (d *Decoder) label(tkey tomlKey, relPos token.RelPos) ast.Label { 409 pos := d.tokenFile.Pos(tkey.shape.Start.Offset, relPos) 410 if strings.HasPrefix(tkey.name, "_") { 411 return &ast.BasicLit{ 412 ValuePos: pos, 413 Kind: token.STRING, 414 Value: literal.String.Quote(tkey.name), 415 } 416 } 417 return &ast.Ident{ 418 NamePos: pos, 419 Name: tkey.name, 420 } 421 } 422 423 // decodeExpr decodes a single TOML value expression, found on the right side 424 // of a `key = value` line. 425 func (d *Decoder) decodeExpr(rkey rootedKey, tnode *toml.Node) (ast.Expr, error) { 426 // TODO(mvdan): we currently assume that TOML basic literals (string, int, float) 427 // are also valid CUE literals; we should double check this, perhaps via fuzzing. 428 data := string(tnode.Data) 429 var expr ast.Expr 430 switch tnode.Kind { 431 case toml.String: 432 expr = ast.NewString(data) 433 case toml.Integer: 434 expr = ast.NewLit(token.INT, data) 435 case toml.Float: 436 expr = ast.NewLit(token.FLOAT, data) 437 case toml.Bool: 438 expr = ast.NewBool(data == "true") 439 case toml.Array: 440 list := &ast.ListLit{} 441 elems := tnode.Children() 442 for elems.Next() { 443 key := rkey + "." + strconv.Itoa(len(list.Elts)) 444 elem, err := d.decodeExpr(key, elems.Node()) 445 if err != nil { 446 return nil, err 447 } 448 list.Elts = append(list.Elts, elem) 449 } 450 expr = list 451 case toml.InlineTable: 452 strct := &ast.StructLit{ 453 // We want a single-line struct, just like TOML's inline tables are on a single line. 454 Lbrace: token.NoPos.WithRel(token.Blank), 455 Rbrace: token.NoPos.WithRel(token.Blank), 456 } 457 elems := tnode.Children() 458 for elems.Next() { 459 // Inline table fields are on the same line. 460 field, err := d.decodeField(rkey, elems.Node(), token.Blank) 461 if err != nil { 462 return nil, err 463 } 464 strct.Elts = append(strct.Elts, field) 465 } 466 expr = strct 467 case toml.LocalDate, toml.LocalTime, toml.LocalDateTime, toml.DateTime: 468 // CUE does not have native date nor time literal kinds, 469 // so we decode these as strings exactly as they came in 470 // and we validate them with time.Format using the corresponding format string. 471 // Not only does this ensure that the resulting CUE can be used with our time package, 472 // but it also means that we can roundtrip a TOML timestamp without confusing it for a string. 473 var format ast.Expr 474 switch tnode.Kind { 475 case toml.LocalDate: 476 // TODO(mvdan): rename time.RFC3339Date to time.DateOnly to mirror Go 477 format = ast.NewSel(&ast.Ident{ 478 Name: "time", 479 Node: ast.NewImport(nil, "time"), 480 }, "RFC3339Date") 481 case toml.LocalTime: 482 // TODO(mvdan): add TimeOnly to CUE's time package to mirror Go 483 format = ast.NewString(time.TimeOnly) 484 case toml.LocalDateTime: 485 // RFC3339 minus the timezone; this seems like a format peculiar to TOML. 486 format = ast.NewString("2006-01-02T15:04:05") 487 default: // DateTime 488 format = ast.NewSel(&ast.Ident{ 489 Name: "time", 490 Node: ast.NewImport(nil, "time"), 491 }, "RFC3339") 492 } 493 expr = ast.NewBinExpr(token.AND, ast.NewString(data), ast.NewCall( 494 ast.NewSel(&ast.Ident{ 495 Name: "time", 496 Node: ast.NewImport(nil, "time"), 497 }, "Format"), format), 498 ) 499 default: 500 return nil, fmt.Errorf("encoding/toml.Decoder.decodeExpr: unknown %s %#v", tnode.Kind, tnode) 501 } 502 // TODO(mvdan): some go-toml nodes such as Kind=toml.Bool do not seem to have a Raw Range 503 // which would let us grab their position information; fix this upstream. 504 if tnode.Raw.Length > 0 { 505 ast.SetPos(expr, d.tokenFile.Pos(d.shape(tnode).Start.Offset, token.NoRelPos)) 506 } 507 return expr, nil 508 }