golang.org/x/arch@v0.17.0/internal/unify/yaml.go (about)

     1  // Copyright 2025 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package unify
     6  
     7  import (
     8  	"errors"
     9  	"fmt"
    10  	"io"
    11  	"regexp"
    12  	"strings"
    13  
    14  	"gopkg.in/yaml.v3"
    15  )
    16  
    17  // UnmarshalOpts provides options to unmarshaling. The zero value is the default
    18  // options.
    19  type UnmarshalOpts struct {
    20  	// Path is the file path to store in the [Pos] of all [Value]s.
    21  	Path string
    22  
    23  	// StringReplacer, if non-nil, is called for each string value to perform
    24  	// any application-specific string interpolation.
    25  	StringReplacer func(string) string
    26  }
    27  
    28  // UnmarshalYAML unmarshals a YAML node into a Closure.
    29  //
    30  // This is how UnmarshalYAML maps YAML nodes into terminal Values:
    31  //
    32  // - "_" or !top _ is the top value ([Top]).
    33  //
    34  // - "_|_" or !bottom _ is the bottom value. This is an error during
    35  // unmarshaling, but can appear in marshaled values.
    36  //
    37  // - "$<name>" or !var <name> is a variable ([Var]). Everywhere the same name
    38  // appears within a single unmarshal operation, it is mapped to the same
    39  // variable. Different unmarshal operations get different variables, even if
    40  // they have the same string name.
    41  //
    42  // - !regex "x" is a regular expression ([String]), as is any string that
    43  // doesn't match "_", "_|_", or "$...". Regular expressions are implicitly
    44  // anchored at the beginning and end. If the string doesn't contain any
    45  // meta-characters (that is, it's a "literal" regular expression), then it's
    46  // treated as an exact string.
    47  //
    48  // - !string "x", or any int, float, bool, or binary value is an exact string
    49  // ([String]).
    50  //
    51  // - !regex [x, y, ...] is an intersection of regular expressions ([String]).
    52  //
    53  // This is how UnmarshalYAML maps YAML nodes into non-terminal Values:
    54  //
    55  // - Sequence nodes like [x, y, z] are tuples ([Tuple]).
    56  //
    57  // - !repeat [x] is a repeated tuple ([Tuple]), which is 0 or more instances of
    58  // x. There must be exactly one element in the list.
    59  //
    60  // - Mapping nodes like {a: x, b: y} are defs ([Def]). Any fields not listed are
    61  // implicitly top.
    62  //
    63  // - !sum [x, y, z] is a sum of its children. This can be thought of as a union
    64  // of the values x, y, and z, or as a non-deterministic choice between x, y, and
    65  // z. If a variable appears both inside the sum and outside of it, only the
    66  // non-deterministic choice view really works. The unifier does not directly
    67  // implement sums; instead, this is decoded as a fresh variable that's
    68  // simultaneously bound to x, y, and z.
    69  func (c *Closure) UnmarshalYAML(node *yaml.Node) error {
    70  	return c.unmarshal(node, UnmarshalOpts{})
    71  }
    72  
    73  // Unmarshal is like [UnmarshalYAML], but accepts options and reads from r. If
    74  // opts.Path is "" and r has a Name() string method, the result of r.Name() is
    75  // used as the path for all [Value]s read from r.
    76  func (c *Closure) Unmarshal(r io.Reader, opts UnmarshalOpts) error {
    77  	if opts.Path == "" {
    78  		type named interface{ Name() string }
    79  		if n, ok := r.(named); ok {
    80  			opts.Path = n.Name()
    81  		}
    82  	}
    83  
    84  	var node yaml.Node
    85  	if err := yaml.NewDecoder(r).Decode(&node); err != nil {
    86  		return err
    87  	}
    88  	np := &node
    89  	if np.Kind == yaml.DocumentNode {
    90  		np = node.Content[0]
    91  	}
    92  	return c.unmarshal(np, opts)
    93  }
    94  
    95  func (c *Closure) unmarshal(node *yaml.Node, opts UnmarshalOpts) error {
    96  	dec := &yamlDecoder{opts: opts, vars: make(map[string]*ident)}
    97  	val, err := dec.value(node)
    98  	if err != nil {
    99  		return err
   100  	}
   101  	vars := make(map[*ident]*Value)
   102  	for _, id := range dec.vars {
   103  		vars[id] = topValue
   104  	}
   105  	*c = Closure{val, dec.env}
   106  	return nil
   107  }
   108  
   109  type yamlDecoder struct {
   110  	opts UnmarshalOpts
   111  
   112  	vars  map[string]*ident
   113  	nSums int
   114  
   115  	env nonDetEnv
   116  }
   117  
   118  func (dec *yamlDecoder) value(node *yaml.Node) (vOut *Value, errOut error) {
   119  	pos := &Pos{Path: dec.opts.Path, Line: node.Line}
   120  
   121  	// Resolve alias nodes.
   122  	if node.Kind == yaml.AliasNode {
   123  		node = node.Alias
   124  	}
   125  
   126  	mk := func(d Domain) (*Value, error) {
   127  		v := &Value{Domain: d, pos: pos}
   128  		return v, nil
   129  	}
   130  	mk2 := func(d Domain, err error) (*Value, error) {
   131  		if err != nil {
   132  			return nil, err
   133  		}
   134  		return mk(d)
   135  	}
   136  
   137  	// is tests the kind and long tag of node.
   138  	is := func(kind yaml.Kind, tag string) bool {
   139  		return node.Kind == kind && node.LongTag() == tag
   140  	}
   141  	isExact := func() bool {
   142  		if node.Kind != yaml.ScalarNode {
   143  			return false
   144  		}
   145  		// We treat any string-ish YAML node as a string.
   146  		switch node.LongTag() {
   147  		case "!string", "tag:yaml.org,2002:int", "tag:yaml.org,2002:float", "tag:yaml.org,2002:bool", "tag:yaml.org,2002:binary":
   148  			return true
   149  		}
   150  		return false
   151  	}
   152  
   153  	// !!str nodes provide a short-hand syntax for several leaf domains that are
   154  	// also available under explicit tags. To simplify checking below, we set
   155  	// strVal to non-"" only for !!str nodes.
   156  	strVal := ""
   157  	isStr := is(yaml.ScalarNode, "tag:yaml.org,2002:str")
   158  	if isStr {
   159  		strVal = node.Value
   160  	}
   161  
   162  	switch {
   163  	case is(yaml.ScalarNode, "!var"):
   164  		strVal = "$" + node.Value
   165  		fallthrough
   166  	case strings.HasPrefix(strVal, "$"):
   167  		id, ok := dec.vars[strVal]
   168  		if !ok {
   169  			// We encode different idents with the same string name by adding a
   170  			// #N suffix. Strip that off so it doesn't accumulate. This isn't
   171  			// meant to be used in user-written input, though nothing stops that.
   172  			name, _, _ := strings.Cut(strVal, "#")
   173  			id = &ident{name: name}
   174  			dec.vars[strVal] = id
   175  			dec.env = dec.env.bind(id, topValue)
   176  		}
   177  		return mk(Var{id: id})
   178  
   179  	case strVal == "_" || is(yaml.ScalarNode, "!top"):
   180  		return mk(Top{})
   181  
   182  	case strVal == "_|_" || is(yaml.ScalarNode, "!bottom"):
   183  		return nil, errors.New("found bottom")
   184  
   185  	case isExact():
   186  		val := node.Value
   187  		if dec.opts.StringReplacer != nil {
   188  			val = dec.opts.StringReplacer(val)
   189  		}
   190  		return mk(NewStringExact(val))
   191  
   192  	case isStr || is(yaml.ScalarNode, "!regex"):
   193  		// Any other string we treat as a regex. This will produce an exact
   194  		// string anyway if the regex is literal.
   195  		val := node.Value
   196  		if dec.opts.StringReplacer != nil {
   197  			val = dec.opts.StringReplacer(val)
   198  		}
   199  		return mk2(NewStringRegex(val))
   200  
   201  	case is(yaml.SequenceNode, "!regex"):
   202  		var vals []string
   203  		if err := node.Decode(&vals); err != nil {
   204  			return nil, err
   205  		}
   206  		return mk2(NewStringRegex(vals...))
   207  
   208  	case is(yaml.MappingNode, "tag:yaml.org,2002:map"):
   209  		var fields []string
   210  		var vals []*Value
   211  		for i := 0; i < len(node.Content); i += 2 {
   212  			key := node.Content[i]
   213  			if key.Kind != yaml.ScalarNode {
   214  				return nil, fmt.Errorf("non-scalar key %q", key.Value)
   215  			}
   216  			val, err := dec.value(node.Content[i+1])
   217  			if err != nil {
   218  				return nil, err
   219  			}
   220  			fields = append(fields, key.Value)
   221  			vals = append(vals, val)
   222  		}
   223  		return mk(NewDef(fields, vals))
   224  
   225  	case is(yaml.SequenceNode, "tag:yaml.org,2002:seq"):
   226  		elts := node.Content
   227  		vs := make([]*Value, 0, len(elts))
   228  		for _, elt := range elts {
   229  			v, err := dec.value(elt)
   230  			if err != nil {
   231  				return nil, err
   232  			}
   233  			vs = append(vs, v)
   234  		}
   235  		return mk(NewTuple(vs...))
   236  
   237  	case is(yaml.SequenceNode, "!repeat") || is(yaml.SequenceNode, "!repeat-unify"):
   238  		// !repeat must have one child. !repeat-unify is used internally for
   239  		// delayed unification, and is the same, it's just allowed to have more
   240  		// than one child.
   241  		if node.LongTag() == "!repeat" && len(node.Content) != 1 {
   242  			return nil, fmt.Errorf("!repeat must have exactly one child")
   243  		}
   244  
   245  		// Decode the children to make sure they're well-formed, but otherwise
   246  		// discard that decoding and do it again every time we need a new
   247  		// element.
   248  		var gen []func(e nonDetEnv) (*Value, nonDetEnv)
   249  		origEnv := dec.env
   250  		elts := node.Content
   251  		for i, elt := range elts {
   252  			_, err := dec.value(elt)
   253  			if err != nil {
   254  				return nil, err
   255  			}
   256  			// Undo any effects on the environment. We *do* keep any named
   257  			// variables that were added to the vars map in case they were
   258  			// introduced within the element.
   259  			dec.env = origEnv
   260  			// Add a generator function
   261  			gen = append(gen, func(e nonDetEnv) (*Value, nonDetEnv) {
   262  				dec.env = e
   263  				// TODO: If this is in a sum, this tends to generate a ton of
   264  				// fresh variables that are different on each branch of the
   265  				// parent sum. Does it make sense to hold on to the i'th value
   266  				// of the tuple after we've generated it?
   267  				v, err := dec.value(elts[i])
   268  				if err != nil {
   269  					// It worked the first time, so this really shouldn't hapen.
   270  					panic("decoding repeat element failed")
   271  				}
   272  				return v, dec.env
   273  			})
   274  		}
   275  		return mk(NewRepeat(gen...))
   276  
   277  	case is(yaml.SequenceNode, "!sum"):
   278  		vs := make([]*Value, 0, len(node.Content))
   279  		for _, elt := range node.Content {
   280  			v, err := dec.value(elt)
   281  			if err != nil {
   282  				return nil, err
   283  			}
   284  			vs = append(vs, v)
   285  		}
   286  		if len(vs) == 1 {
   287  			return vs[0], nil
   288  		}
   289  
   290  		// A sum is implemented as a fresh variable that's simultaneously bound
   291  		// to each of the descendants.
   292  		id := &ident{name: fmt.Sprintf("sum%d", dec.nSums)}
   293  		dec.nSums++
   294  		dec.env = dec.env.bind(id, vs...)
   295  		return mk(Var{id: id})
   296  	}
   297  
   298  	return nil, fmt.Errorf("unknown node kind %d %v", node.Kind, node.Tag)
   299  }
   300  
   301  type yamlEncoder struct {
   302  	idp identPrinter
   303  	e   nonDetEnv // We track the environment for !repeat nodes.
   304  }
   305  
   306  // TODO: Switch some Value marshaling to Closure?
   307  
   308  func (c Closure) MarshalYAML() (any, error) {
   309  	// TODO: If the environment is trivial, just marshal the value.
   310  	enc := &yamlEncoder{}
   311  	return enc.closure(c), nil
   312  }
   313  
   314  func (c Closure) String() string {
   315  	b, err := yaml.Marshal(c)
   316  	if err != nil {
   317  		return fmt.Sprintf("marshal failed: %s", err)
   318  	}
   319  	return string(b)
   320  }
   321  
   322  func (v *Value) MarshalYAML() (any, error) {
   323  	enc := &yamlEncoder{}
   324  	return enc.value(v), nil
   325  }
   326  
   327  func (v *Value) String() string {
   328  	b, err := yaml.Marshal(v)
   329  	if err != nil {
   330  		return fmt.Sprintf("marshal failed: %s", err)
   331  	}
   332  	return string(b)
   333  }
   334  
   335  func (enc *yamlEncoder) closure(c Closure) *yaml.Node {
   336  	enc.e = c.env
   337  	var n yaml.Node
   338  	n.Kind = yaml.MappingNode
   339  	n.Tag = "!closure"
   340  	n.Content = make([]*yaml.Node, 4)
   341  	n.Content[0] = new(yaml.Node)
   342  	n.Content[0].SetString("env")
   343  	n.Content[2] = new(yaml.Node)
   344  	n.Content[2].SetString("in")
   345  	n.Content[3] = enc.value(c.val)
   346  	// Fill in the env after we've written the value in case value encoding
   347  	// affects the env.
   348  	n.Content[1] = enc.env(enc.e)
   349  	enc.e = nonDetEnv{} // Allow GC'ing the env
   350  	return &n
   351  }
   352  
   353  func (enc *yamlEncoder) env(e nonDetEnv) *yaml.Node {
   354  	var n yaml.Node
   355  	n.Kind = yaml.SequenceNode
   356  	n.Tag = "!env"
   357  	for _, term := range e.factors {
   358  		var nTerm yaml.Node
   359  		n.Content = append(n.Content, &nTerm)
   360  		nTerm.Kind = yaml.SequenceNode
   361  		for _, det := range term.terms {
   362  			var nDet yaml.Node
   363  			nTerm.Content = append(nTerm.Content, &nDet)
   364  			nDet.Kind = yaml.MappingNode
   365  			for i, val := range det.vals {
   366  				var nLabel yaml.Node
   367  				nLabel.SetString(enc.idp.unique(term.ids[i]))
   368  				nDet.Content = append(nDet.Content, &nLabel, enc.value(val))
   369  			}
   370  		}
   371  	}
   372  	return &n
   373  }
   374  
   375  var yamlIntRe = regexp.MustCompile(`^-?[0-9]+$`)
   376  
   377  func (enc *yamlEncoder) value(v *Value) *yaml.Node {
   378  	var n yaml.Node
   379  	switch d := v.Domain.(type) {
   380  	case nil:
   381  		// Not allowed by unmarshaler, but useful for understanding when
   382  		// something goes horribly wrong.
   383  		//
   384  		// TODO: We might be able to track useful provenance for this, which
   385  		// would really help with debugging unexpected bottoms.
   386  		n.SetString("_|_")
   387  		return &n
   388  
   389  	case Top:
   390  		n.SetString("_")
   391  		return &n
   392  
   393  	case Def:
   394  		n.Kind = yaml.MappingNode
   395  		for k, elt := range d.All() {
   396  			var kn yaml.Node
   397  			kn.SetString(k)
   398  			n.Content = append(n.Content, &kn, enc.value(elt))
   399  		}
   400  		n.HeadComment = v.PosString()
   401  		return &n
   402  
   403  	case Tuple:
   404  		n.Kind = yaml.SequenceNode
   405  		if d.repeat == nil {
   406  			for _, elt := range d.vs {
   407  				n.Content = append(n.Content, enc.value(elt))
   408  			}
   409  		} else {
   410  			if len(d.repeat) == 1 {
   411  				n.Tag = "!repeat"
   412  			} else {
   413  				n.Tag = "!repeat-unify"
   414  			}
   415  			// TODO: I'm not positive this will round-trip everything correctly.
   416  			for _, gen := range d.repeat {
   417  				v, e := gen(enc.e)
   418  				enc.e = e
   419  				n.Content = append(n.Content, enc.value(v))
   420  			}
   421  		}
   422  		return &n
   423  
   424  	case String:
   425  		switch d.kind {
   426  		case stringExact:
   427  			// Make this into a "nice" !!int node if I can.
   428  			if yamlIntRe.MatchString(d.exact) {
   429  				n.SetString(d.exact)
   430  				n.Tag = "tag:yaml.org,2002:int"
   431  				return &n
   432  			}
   433  			n.SetString(regexp.QuoteMeta(d.exact))
   434  			return &n
   435  		case stringRegex:
   436  			o := make([]string, 0, 1)
   437  			for _, re := range d.re {
   438  				s := re.String()
   439  				s = strings.TrimSuffix(strings.TrimPrefix(s, `\A(?:`), `)\z`)
   440  				o = append(o, s)
   441  			}
   442  			if len(o) == 1 {
   443  				n.SetString(o[0])
   444  				return &n
   445  			}
   446  			n.Encode(o)
   447  			n.Tag = "!regex"
   448  			return &n
   449  		}
   450  		panic("bad String kind")
   451  
   452  	case Var:
   453  		// TODO: If Var only appears once in the whole Value and is independent
   454  		// in the environment (part of a term that is only over Var), then emit
   455  		// this as a !sum instead.
   456  		if false {
   457  			var vs []*Value // TODO: Get values of this var.
   458  			if len(vs) == 1 {
   459  				return enc.value(vs[0])
   460  			}
   461  			n.Kind = yaml.SequenceNode
   462  			n.Tag = "!sum"
   463  			for _, elt := range vs {
   464  				n.Content = append(n.Content, enc.value(elt))
   465  			}
   466  			return &n
   467  		}
   468  		n.SetString(enc.idp.unique(d.id))
   469  		if !strings.HasPrefix(d.id.name, "$") {
   470  			n.Tag = "!var"
   471  		}
   472  		return &n
   473  	}
   474  	panic(fmt.Sprintf("unknown domain type %T", v.Domain))
   475  }