golang.org/x/arch@v0.17.0/internal/unify/yaml.go (about) 1 // Copyright 2025 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package unify 6 7 import ( 8 "errors" 9 "fmt" 10 "io" 11 "regexp" 12 "strings" 13 14 "gopkg.in/yaml.v3" 15 ) 16 17 // UnmarshalOpts provides options to unmarshaling. The zero value is the default 18 // options. 19 type UnmarshalOpts struct { 20 // Path is the file path to store in the [Pos] of all [Value]s. 21 Path string 22 23 // StringReplacer, if non-nil, is called for each string value to perform 24 // any application-specific string interpolation. 25 StringReplacer func(string) string 26 } 27 28 // UnmarshalYAML unmarshals a YAML node into a Closure. 29 // 30 // This is how UnmarshalYAML maps YAML nodes into terminal Values: 31 // 32 // - "_" or !top _ is the top value ([Top]). 33 // 34 // - "_|_" or !bottom _ is the bottom value. This is an error during 35 // unmarshaling, but can appear in marshaled values. 36 // 37 // - "$<name>" or !var <name> is a variable ([Var]). Everywhere the same name 38 // appears within a single unmarshal operation, it is mapped to the same 39 // variable. Different unmarshal operations get different variables, even if 40 // they have the same string name. 41 // 42 // - !regex "x" is a regular expression ([String]), as is any string that 43 // doesn't match "_", "_|_", or "$...". Regular expressions are implicitly 44 // anchored at the beginning and end. If the string doesn't contain any 45 // meta-characters (that is, it's a "literal" regular expression), then it's 46 // treated as an exact string. 47 // 48 // - !string "x", or any int, float, bool, or binary value is an exact string 49 // ([String]). 50 // 51 // - !regex [x, y, ...] is an intersection of regular expressions ([String]). 52 // 53 // This is how UnmarshalYAML maps YAML nodes into non-terminal Values: 54 // 55 // - Sequence nodes like [x, y, z] are tuples ([Tuple]). 56 // 57 // - !repeat [x] is a repeated tuple ([Tuple]), which is 0 or more instances of 58 // x. There must be exactly one element in the list. 59 // 60 // - Mapping nodes like {a: x, b: y} are defs ([Def]). Any fields not listed are 61 // implicitly top. 62 // 63 // - !sum [x, y, z] is a sum of its children. This can be thought of as a union 64 // of the values x, y, and z, or as a non-deterministic choice between x, y, and 65 // z. If a variable appears both inside the sum and outside of it, only the 66 // non-deterministic choice view really works. The unifier does not directly 67 // implement sums; instead, this is decoded as a fresh variable that's 68 // simultaneously bound to x, y, and z. 69 func (c *Closure) UnmarshalYAML(node *yaml.Node) error { 70 return c.unmarshal(node, UnmarshalOpts{}) 71 } 72 73 // Unmarshal is like [UnmarshalYAML], but accepts options and reads from r. If 74 // opts.Path is "" and r has a Name() string method, the result of r.Name() is 75 // used as the path for all [Value]s read from r. 76 func (c *Closure) Unmarshal(r io.Reader, opts UnmarshalOpts) error { 77 if opts.Path == "" { 78 type named interface{ Name() string } 79 if n, ok := r.(named); ok { 80 opts.Path = n.Name() 81 } 82 } 83 84 var node yaml.Node 85 if err := yaml.NewDecoder(r).Decode(&node); err != nil { 86 return err 87 } 88 np := &node 89 if np.Kind == yaml.DocumentNode { 90 np = node.Content[0] 91 } 92 return c.unmarshal(np, opts) 93 } 94 95 func (c *Closure) unmarshal(node *yaml.Node, opts UnmarshalOpts) error { 96 dec := &yamlDecoder{opts: opts, vars: make(map[string]*ident)} 97 val, err := dec.value(node) 98 if err != nil { 99 return err 100 } 101 vars := make(map[*ident]*Value) 102 for _, id := range dec.vars { 103 vars[id] = topValue 104 } 105 *c = Closure{val, dec.env} 106 return nil 107 } 108 109 type yamlDecoder struct { 110 opts UnmarshalOpts 111 112 vars map[string]*ident 113 nSums int 114 115 env nonDetEnv 116 } 117 118 func (dec *yamlDecoder) value(node *yaml.Node) (vOut *Value, errOut error) { 119 pos := &Pos{Path: dec.opts.Path, Line: node.Line} 120 121 // Resolve alias nodes. 122 if node.Kind == yaml.AliasNode { 123 node = node.Alias 124 } 125 126 mk := func(d Domain) (*Value, error) { 127 v := &Value{Domain: d, pos: pos} 128 return v, nil 129 } 130 mk2 := func(d Domain, err error) (*Value, error) { 131 if err != nil { 132 return nil, err 133 } 134 return mk(d) 135 } 136 137 // is tests the kind and long tag of node. 138 is := func(kind yaml.Kind, tag string) bool { 139 return node.Kind == kind && node.LongTag() == tag 140 } 141 isExact := func() bool { 142 if node.Kind != yaml.ScalarNode { 143 return false 144 } 145 // We treat any string-ish YAML node as a string. 146 switch node.LongTag() { 147 case "!string", "tag:yaml.org,2002:int", "tag:yaml.org,2002:float", "tag:yaml.org,2002:bool", "tag:yaml.org,2002:binary": 148 return true 149 } 150 return false 151 } 152 153 // !!str nodes provide a short-hand syntax for several leaf domains that are 154 // also available under explicit tags. To simplify checking below, we set 155 // strVal to non-"" only for !!str nodes. 156 strVal := "" 157 isStr := is(yaml.ScalarNode, "tag:yaml.org,2002:str") 158 if isStr { 159 strVal = node.Value 160 } 161 162 switch { 163 case is(yaml.ScalarNode, "!var"): 164 strVal = "$" + node.Value 165 fallthrough 166 case strings.HasPrefix(strVal, "$"): 167 id, ok := dec.vars[strVal] 168 if !ok { 169 // We encode different idents with the same string name by adding a 170 // #N suffix. Strip that off so it doesn't accumulate. This isn't 171 // meant to be used in user-written input, though nothing stops that. 172 name, _, _ := strings.Cut(strVal, "#") 173 id = &ident{name: name} 174 dec.vars[strVal] = id 175 dec.env = dec.env.bind(id, topValue) 176 } 177 return mk(Var{id: id}) 178 179 case strVal == "_" || is(yaml.ScalarNode, "!top"): 180 return mk(Top{}) 181 182 case strVal == "_|_" || is(yaml.ScalarNode, "!bottom"): 183 return nil, errors.New("found bottom") 184 185 case isExact(): 186 val := node.Value 187 if dec.opts.StringReplacer != nil { 188 val = dec.opts.StringReplacer(val) 189 } 190 return mk(NewStringExact(val)) 191 192 case isStr || is(yaml.ScalarNode, "!regex"): 193 // Any other string we treat as a regex. This will produce an exact 194 // string anyway if the regex is literal. 195 val := node.Value 196 if dec.opts.StringReplacer != nil { 197 val = dec.opts.StringReplacer(val) 198 } 199 return mk2(NewStringRegex(val)) 200 201 case is(yaml.SequenceNode, "!regex"): 202 var vals []string 203 if err := node.Decode(&vals); err != nil { 204 return nil, err 205 } 206 return mk2(NewStringRegex(vals...)) 207 208 case is(yaml.MappingNode, "tag:yaml.org,2002:map"): 209 var fields []string 210 var vals []*Value 211 for i := 0; i < len(node.Content); i += 2 { 212 key := node.Content[i] 213 if key.Kind != yaml.ScalarNode { 214 return nil, fmt.Errorf("non-scalar key %q", key.Value) 215 } 216 val, err := dec.value(node.Content[i+1]) 217 if err != nil { 218 return nil, err 219 } 220 fields = append(fields, key.Value) 221 vals = append(vals, val) 222 } 223 return mk(NewDef(fields, vals)) 224 225 case is(yaml.SequenceNode, "tag:yaml.org,2002:seq"): 226 elts := node.Content 227 vs := make([]*Value, 0, len(elts)) 228 for _, elt := range elts { 229 v, err := dec.value(elt) 230 if err != nil { 231 return nil, err 232 } 233 vs = append(vs, v) 234 } 235 return mk(NewTuple(vs...)) 236 237 case is(yaml.SequenceNode, "!repeat") || is(yaml.SequenceNode, "!repeat-unify"): 238 // !repeat must have one child. !repeat-unify is used internally for 239 // delayed unification, and is the same, it's just allowed to have more 240 // than one child. 241 if node.LongTag() == "!repeat" && len(node.Content) != 1 { 242 return nil, fmt.Errorf("!repeat must have exactly one child") 243 } 244 245 // Decode the children to make sure they're well-formed, but otherwise 246 // discard that decoding and do it again every time we need a new 247 // element. 248 var gen []func(e nonDetEnv) (*Value, nonDetEnv) 249 origEnv := dec.env 250 elts := node.Content 251 for i, elt := range elts { 252 _, err := dec.value(elt) 253 if err != nil { 254 return nil, err 255 } 256 // Undo any effects on the environment. We *do* keep any named 257 // variables that were added to the vars map in case they were 258 // introduced within the element. 259 dec.env = origEnv 260 // Add a generator function 261 gen = append(gen, func(e nonDetEnv) (*Value, nonDetEnv) { 262 dec.env = e 263 // TODO: If this is in a sum, this tends to generate a ton of 264 // fresh variables that are different on each branch of the 265 // parent sum. Does it make sense to hold on to the i'th value 266 // of the tuple after we've generated it? 267 v, err := dec.value(elts[i]) 268 if err != nil { 269 // It worked the first time, so this really shouldn't hapen. 270 panic("decoding repeat element failed") 271 } 272 return v, dec.env 273 }) 274 } 275 return mk(NewRepeat(gen...)) 276 277 case is(yaml.SequenceNode, "!sum"): 278 vs := make([]*Value, 0, len(node.Content)) 279 for _, elt := range node.Content { 280 v, err := dec.value(elt) 281 if err != nil { 282 return nil, err 283 } 284 vs = append(vs, v) 285 } 286 if len(vs) == 1 { 287 return vs[0], nil 288 } 289 290 // A sum is implemented as a fresh variable that's simultaneously bound 291 // to each of the descendants. 292 id := &ident{name: fmt.Sprintf("sum%d", dec.nSums)} 293 dec.nSums++ 294 dec.env = dec.env.bind(id, vs...) 295 return mk(Var{id: id}) 296 } 297 298 return nil, fmt.Errorf("unknown node kind %d %v", node.Kind, node.Tag) 299 } 300 301 type yamlEncoder struct { 302 idp identPrinter 303 e nonDetEnv // We track the environment for !repeat nodes. 304 } 305 306 // TODO: Switch some Value marshaling to Closure? 307 308 func (c Closure) MarshalYAML() (any, error) { 309 // TODO: If the environment is trivial, just marshal the value. 310 enc := &yamlEncoder{} 311 return enc.closure(c), nil 312 } 313 314 func (c Closure) String() string { 315 b, err := yaml.Marshal(c) 316 if err != nil { 317 return fmt.Sprintf("marshal failed: %s", err) 318 } 319 return string(b) 320 } 321 322 func (v *Value) MarshalYAML() (any, error) { 323 enc := &yamlEncoder{} 324 return enc.value(v), nil 325 } 326 327 func (v *Value) String() string { 328 b, err := yaml.Marshal(v) 329 if err != nil { 330 return fmt.Sprintf("marshal failed: %s", err) 331 } 332 return string(b) 333 } 334 335 func (enc *yamlEncoder) closure(c Closure) *yaml.Node { 336 enc.e = c.env 337 var n yaml.Node 338 n.Kind = yaml.MappingNode 339 n.Tag = "!closure" 340 n.Content = make([]*yaml.Node, 4) 341 n.Content[0] = new(yaml.Node) 342 n.Content[0].SetString("env") 343 n.Content[2] = new(yaml.Node) 344 n.Content[2].SetString("in") 345 n.Content[3] = enc.value(c.val) 346 // Fill in the env after we've written the value in case value encoding 347 // affects the env. 348 n.Content[1] = enc.env(enc.e) 349 enc.e = nonDetEnv{} // Allow GC'ing the env 350 return &n 351 } 352 353 func (enc *yamlEncoder) env(e nonDetEnv) *yaml.Node { 354 var n yaml.Node 355 n.Kind = yaml.SequenceNode 356 n.Tag = "!env" 357 for _, term := range e.factors { 358 var nTerm yaml.Node 359 n.Content = append(n.Content, &nTerm) 360 nTerm.Kind = yaml.SequenceNode 361 for _, det := range term.terms { 362 var nDet yaml.Node 363 nTerm.Content = append(nTerm.Content, &nDet) 364 nDet.Kind = yaml.MappingNode 365 for i, val := range det.vals { 366 var nLabel yaml.Node 367 nLabel.SetString(enc.idp.unique(term.ids[i])) 368 nDet.Content = append(nDet.Content, &nLabel, enc.value(val)) 369 } 370 } 371 } 372 return &n 373 } 374 375 var yamlIntRe = regexp.MustCompile(`^-?[0-9]+$`) 376 377 func (enc *yamlEncoder) value(v *Value) *yaml.Node { 378 var n yaml.Node 379 switch d := v.Domain.(type) { 380 case nil: 381 // Not allowed by unmarshaler, but useful for understanding when 382 // something goes horribly wrong. 383 // 384 // TODO: We might be able to track useful provenance for this, which 385 // would really help with debugging unexpected bottoms. 386 n.SetString("_|_") 387 return &n 388 389 case Top: 390 n.SetString("_") 391 return &n 392 393 case Def: 394 n.Kind = yaml.MappingNode 395 for k, elt := range d.All() { 396 var kn yaml.Node 397 kn.SetString(k) 398 n.Content = append(n.Content, &kn, enc.value(elt)) 399 } 400 n.HeadComment = v.PosString() 401 return &n 402 403 case Tuple: 404 n.Kind = yaml.SequenceNode 405 if d.repeat == nil { 406 for _, elt := range d.vs { 407 n.Content = append(n.Content, enc.value(elt)) 408 } 409 } else { 410 if len(d.repeat) == 1 { 411 n.Tag = "!repeat" 412 } else { 413 n.Tag = "!repeat-unify" 414 } 415 // TODO: I'm not positive this will round-trip everything correctly. 416 for _, gen := range d.repeat { 417 v, e := gen(enc.e) 418 enc.e = e 419 n.Content = append(n.Content, enc.value(v)) 420 } 421 } 422 return &n 423 424 case String: 425 switch d.kind { 426 case stringExact: 427 // Make this into a "nice" !!int node if I can. 428 if yamlIntRe.MatchString(d.exact) { 429 n.SetString(d.exact) 430 n.Tag = "tag:yaml.org,2002:int" 431 return &n 432 } 433 n.SetString(regexp.QuoteMeta(d.exact)) 434 return &n 435 case stringRegex: 436 o := make([]string, 0, 1) 437 for _, re := range d.re { 438 s := re.String() 439 s = strings.TrimSuffix(strings.TrimPrefix(s, `\A(?:`), `)\z`) 440 o = append(o, s) 441 } 442 if len(o) == 1 { 443 n.SetString(o[0]) 444 return &n 445 } 446 n.Encode(o) 447 n.Tag = "!regex" 448 return &n 449 } 450 panic("bad String kind") 451 452 case Var: 453 // TODO: If Var only appears once in the whole Value and is independent 454 // in the environment (part of a term that is only over Var), then emit 455 // this as a !sum instead. 456 if false { 457 var vs []*Value // TODO: Get values of this var. 458 if len(vs) == 1 { 459 return enc.value(vs[0]) 460 } 461 n.Kind = yaml.SequenceNode 462 n.Tag = "!sum" 463 for _, elt := range vs { 464 n.Content = append(n.Content, enc.value(elt)) 465 } 466 return &n 467 } 468 n.SetString(enc.idp.unique(d.id)) 469 if !strings.HasPrefix(d.id.name, "$") { 470 n.Tag = "!var" 471 } 472 return &n 473 } 474 panic(fmt.Sprintf("unknown domain type %T", v.Domain)) 475 }