golang.org/x/arch@v0.17.0/internal/unify/env.go (about) 1 // Copyright 2025 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package unify 6 7 import ( 8 "fmt" 9 "iter" 10 "reflect" 11 "slices" 12 "strings" 13 ) 14 15 // A nonDetEnv is a non-deterministic mapping from [ident]s to [Value]s. 16 // 17 // Logically, this is just a set of deterministic environments, where each 18 // deterministic environment is a complete mapping from each [ident]s to exactly 19 // one [Value]. In particular, [ident]s are NOT necessarily independent of each 20 // other. For example, an environment may have both {x: 1, y: 1} and {x: 2, y: 21 // 2}, but not {x: 1, y: 2}. 22 // 23 // A nonDetEnv is immutable. 24 // 25 // Often [ident]s are independent of each other, so the representation optimizes 26 // for this by using a cross-product of environment factors, where each factor 27 // is a sum of deterministic environments. These operations obey the usual 28 // distributional laws, so we can always canonicalize into this form. (It MAY be 29 // worthwhile to allow more general expressions of sums and products.) 30 // 31 // For example, to represent {{x: 1, y: 1}, {x: 2, y: 2}}, in which the 32 // variables x and y are dependent, we need a single factor that covers x and y 33 // and consists of two terms: {x: 1, y: 1} + {x: 2, y: 2}. 34 // 35 // If we add a third variable z that can be 1 or 2, independent of x and y, we 36 // get four logical environments: 37 // 38 // {x: 1, y: 1, z: 1} 39 // {x: 2, y: 2, z: 1} 40 // {x: 1, y: 1, z: 2} 41 // {x: 2, y: 2, z: 2} 42 // 43 // This could be represented as a single factor that is the sum of these four 44 // detEnvs, but because z is independent, it can be a separate factor. Hence, 45 // the most compact representation of this environment is: 46 // 47 // ({x: 1, y: 1} + {x: 2, y: 2}) тип ({z: 1} + {z: 2}) 48 // 49 // That is, two factors, where each is the sum of two terms. 50 type nonDetEnv struct { 51 // factors is a list of the multiplicative factors in this environment. The 52 // set of deterministic environments is the cross-product of these factors. 53 // All factors must have disjoint variables. 54 factors []*envSum 55 } 56 57 // envSum is a sum of deterministic environments, all with the same set of 58 // variables. 59 type envSum struct { 60 ids []*ident // TODO: Do we ever use this as a slice? Should it be a map? 61 terms []detEnv 62 } 63 64 type detEnv struct { 65 vals []*Value // Indexes correspond to envSum.ids 66 } 67 68 var ( 69 // zeroEnvFactor is the "0" value of an [envSum]. It's a a factor with no 70 // sum terms. This is easiest to think of as: an empty sum must be the 71 // additive identity, 0. 72 zeroEnvFactor = &envSum{} 73 74 // topEnv is the algebraic one value of a [nonDetEnv]. It has no factors 75 // because the product of no factors is the multiplicative identity. 76 topEnv = nonDetEnv{} 77 // bottomEnv is the algebraic zero value of a [nonDetEnv]. The product of 78 // bottomEnv with x is bottomEnv, and the sum of bottomEnv with y is y. 79 bottomEnv = nonDetEnv{factors: []*envSum{zeroEnvFactor}} 80 ) 81 82 // bind binds id to each of vals in e. 83 // 84 // Its panics if id is already bound in e. 85 // 86 // Environments are typically initially constructed by starting with [topEnv] 87 // and calling bind one or more times. 88 func (e nonDetEnv) bind(id *ident, vals ...*Value) nonDetEnv { 89 if e.isBottom() { 90 return bottomEnv 91 } 92 93 // TODO: If any of vals are _, should we just not do anything? We're kind of 94 // inconsistent about whether an id missing from e means id is invalid or 95 // means id is _. 96 97 // Check that id isn't present in e. 98 for _, f := range e.factors { 99 if slices.Contains(f.ids, id) { 100 panic("id " + id.name + " already present in environment") 101 } 102 } 103 104 // Create the new sum term. 105 sum := &envSum{ids: []*ident{id}} 106 for _, val := range vals { 107 sum.terms = append(sum.terms, detEnv{vals: []*Value{val}}) 108 } 109 // Multiply it in. 110 factors := append(e.factors[:len(e.factors):len(e.factors)], sum) 111 return nonDetEnv{factors} 112 } 113 114 func (e nonDetEnv) isBottom() bool { 115 if len(e.factors) == 0 { 116 // This is top. 117 return false 118 } 119 return len(e.factors[0].terms) == 0 120 } 121 122 func (e nonDetEnv) vars() iter.Seq[*ident] { 123 return func(yield func(*ident) bool) { 124 for _, t := range e.factors { 125 for _, id := range t.ids { 126 if !yield(id) { 127 return 128 } 129 } 130 } 131 } 132 } 133 134 // all enumerates all deterministic environments in e. 135 // 136 // The result slice is in the same order as the slice returned by 137 // [nonDetEnv2.vars]. The slice is reused between iterations. 138 func (e nonDetEnv) all() iter.Seq[[]*Value] { 139 return func(yield func([]*Value) bool) { 140 var vals []*Value 141 var walk func(int) bool 142 walk = func(i int) bool { 143 if i == len(e.factors) { 144 return yield(vals) 145 } 146 start := len(vals) 147 for _, term := range e.factors[i].terms { 148 vals = append(vals[:start], term.vals...) 149 if !walk(i + 1) { 150 return false 151 } 152 } 153 return true 154 } 155 walk(0) 156 } 157 } 158 159 // allOrdered is like all, but idOrder controls the order of the values in the 160 // resulting slice. Any [ident]s in idOrder that are missing from e are set to 161 // topValue. The values of idOrder must be a bijection with [0, n). 162 func (e nonDetEnv) allOrdered(idOrder map[*ident]int) iter.Seq[[]*Value] { 163 valsLen := 0 164 for _, idx := range idOrder { 165 valsLen = max(valsLen, idx+1) 166 } 167 168 return func(yield func([]*Value) bool) { 169 vals := make([]*Value, valsLen) 170 // e may not have all of the IDs in idOrder. Make sure any missing 171 // values are top. 172 for i := range vals { 173 vals[i] = topValue 174 } 175 var walk func(int) bool 176 walk = func(i int) bool { 177 if i == len(e.factors) { 178 return yield(vals) 179 } 180 for _, term := range e.factors[i].terms { 181 for j, id := range e.factors[i].ids { 182 vals[idOrder[id]] = term.vals[j] 183 } 184 if !walk(i + 1) { 185 return false 186 } 187 } 188 return true 189 } 190 walk(0) 191 } 192 } 193 194 func crossEnvs(envs ...nonDetEnv) nonDetEnv { 195 // Combine the factors of envs 196 var factors []*envSum 197 haveIDs := map[*ident]struct{}{} 198 for _, e := range envs { 199 if e.isBottom() { 200 // The environment is bottom, so the whole product goes to 201 // bottom. 202 return bottomEnv 203 } 204 // Check that all ids are disjoint. 205 for _, f := range e.factors { 206 for _, id := range f.ids { 207 if _, ok := haveIDs[id]; ok { 208 panic("conflict on " + id.name) 209 } 210 haveIDs[id] = struct{}{} 211 } 212 } 213 // Everything checks out. Multiply the factors. 214 factors = append(factors, e.factors...) 215 } 216 return nonDetEnv{factors: factors} 217 } 218 219 func sumEnvs(envs ...nonDetEnv) nonDetEnv { 220 // nonDetEnv is a product at the top level, so we implement summation using 221 // the distributive law. We also use associativity to keep as many top-level 222 // factors as we can, since those are what keep the environment compact. 223 // 224 // a * b * c + a * d (where a, b, c, and d are factors) 225 // (combine common factors) 226 // = a * (b * c + d) 227 // (expand factors into their sum terms) 228 // = a * ((b_1 + b_2 + ...) * (c_1 + c_2 + ...) + d) 229 // (where b_i and c_i are deterministic environments) 230 // (FOIL) 231 // = a * (b_1 * c_1 + b_1 * c_2 + b_2 * c_1 + b_2 * c2 + d) 232 // (all factors are now in canonical form) 233 // = a * e 234 // 235 // The product of two deterministic environments is a deterministic 236 // environment, and the sum of deterministic environments is a factor, so 237 // this process results in the canonical product-of-sums form. 238 // 239 // TODO: This is a bit of a one-way process. We could try to factor the 240 // environment to reduce the number of sums. I'm not sure how to do this 241 // efficiently. It might be possible to guide it by gathering the 242 // distributions of each ID's bindings. E.g., if there are 12 deterministic 243 // environments in a sum and $x is bound to 4 different values, each 3 244 // times, then it *might* be possible to factor out $x into a 4-way sum of 245 // its own. 246 247 factors, toSum := commonFactors(envs) 248 249 if len(toSum) > 0 { 250 // Collect all IDs into a single order. 251 var ids []*ident 252 idOrder := make(map[*ident]int) 253 for _, e := range toSum { 254 for v := range e.vars() { 255 if _, ok := idOrder[v]; !ok { 256 idOrder[v] = len(ids) 257 ids = append(ids, v) 258 } 259 } 260 } 261 262 // Flatten out each term in the sum. 263 var summands []detEnv 264 for _, env := range toSum { 265 for vals := range env.allOrdered(idOrder) { 266 summands = append(summands, detEnv{vals: slices.Clone(vals)}) 267 } 268 } 269 factors = append(factors, &envSum{ids: ids, terms: summands}) 270 } 271 272 return nonDetEnv{factors: factors} 273 } 274 275 // commonFactors finds common factors that can be factored out of a summation of 276 // [nonDetEnv]s. 277 func commonFactors(envs []nonDetEnv) (common []*envSum, toSum []nonDetEnv) { 278 // Drop any bottom environments. They don't contribute to the sum and they 279 // would complicate some logic below. 280 envs = slices.DeleteFunc(envs, func(e nonDetEnv) bool { 281 return e.isBottom() 282 }) 283 if len(envs) == 0 { 284 return bottomEnv.factors, nil 285 } 286 287 // It's very common that the exact same factor will appear across all envs. 288 // Keep those factored out. 289 // 290 // TODO: Is it also common to have vars that are bound to the same value 291 // across all envs? If so, we could also factor those into common terms. 292 counts := map[*envSum]int{} 293 for _, e := range envs { 294 for _, f := range e.factors { 295 counts[f]++ 296 } 297 } 298 for _, f := range envs[0].factors { 299 if counts[f] == len(envs) { 300 // Common factor 301 common = append(common, f) 302 } 303 } 304 305 // Any other factors need to be multiplied out. 306 for _, env := range envs { 307 var newFactors []*envSum 308 for _, f := range env.factors { 309 if counts[f] != len(envs) { 310 newFactors = append(newFactors, f) 311 } 312 } 313 if len(newFactors) > 0 { 314 toSum = append(toSum, nonDetEnv{factors: newFactors}) 315 } 316 } 317 318 return common, toSum 319 } 320 321 // envPartition is a subset of an env where id is bound to value in all 322 // deterministic environments. 323 type envPartition struct { 324 id *ident 325 value *Value 326 env nonDetEnv 327 } 328 329 func (e nonDetEnv) partitionBy(id *ident) []envPartition { 330 if e.isBottom() { 331 // Bottom contains all variables 332 return []envPartition{{id: id, value: bottomValue, env: e}} 333 } 334 335 // Find the factor containing id and id's index in that factor. 336 idFactor, idIndex := -1, -1 337 var newIDs []*ident 338 for factI, fact := range e.factors { 339 idI := slices.Index(fact.ids, id) 340 if idI < 0 { 341 continue 342 } else if idFactor != -1 { 343 panic("multiple factors containing id " + id.name) 344 } else { 345 idFactor, idIndex = factI, idI 346 // Drop id from this factor's IDs 347 newIDs = without(fact.ids, idI) 348 } 349 } 350 if idFactor == -1 { 351 panic("id " + id.name + " not found in environment") 352 } 353 354 // If id is the only term in its factor, then dropping it is equivalent to 355 // making the factor be the unit value, so we can just drop the factor. (And 356 // if this is the only factor, we'll arrive at [topEnv], which is exactly 357 // what we want!). In this case we can use the same nonDetEnv in all of the 358 // partitions. 359 isUnit := len(newIDs) == 0 360 var unitFactors []*envSum 361 if isUnit { 362 unitFactors = without(e.factors, idFactor) 363 } 364 365 // Create a partition for each distinct value of id. 366 var parts []envPartition 367 partIndex := map[*Value]int{} 368 for _, det := range e.factors[idFactor].terms { 369 val := det.vals[idIndex] 370 i, ok := partIndex[val] 371 if !ok { 372 i = len(parts) 373 var factors []*envSum 374 if isUnit { 375 factors = unitFactors 376 } else { 377 // Copy all other factor 378 factors = slices.Clone(e.factors) 379 factors[idFactor] = &envSum{ids: newIDs} 380 } 381 parts = append(parts, envPartition{id: id, value: val, env: nonDetEnv{factors: factors}}) 382 partIndex[val] = i 383 } 384 385 if !isUnit { 386 factor := parts[i].env.factors[idFactor] 387 newVals := without(det.vals, idIndex) 388 factor.terms = append(factor.terms, detEnv{vals: newVals}) 389 } 390 } 391 return parts 392 } 393 394 type ident struct { 395 _ [0]func() // Not comparable (only compare *ident) 396 name string 397 } 398 399 type Var struct { 400 id *ident 401 } 402 403 func (d Var) Exact() bool { 404 // These can't appear in concrete Values. 405 panic("Exact called on non-concrete Value") 406 } 407 408 func (d Var) decode(rv reflect.Value) error { 409 return &inexactError{"var", rv.Type().String()} 410 } 411 412 func (d Var) unify(w *Value, e nonDetEnv, swap bool, uf *unifier) (Domain, nonDetEnv, error) { 413 // TODO: Vars from !sums in the input can have a huge number of values. 414 // Unifying these could be way more efficient with some indexes over any 415 // exact values we can pull out, like Def fields that are exact Strings. 416 // Maybe we try to produce an array of yes/no/maybe matches and then we only 417 // have to do deeper evaluation of the maybes. We could probably cache this 418 // on an envTerm. It may also help to special-case Var/Var unification to 419 // pick which one to index versus enumerate. 420 421 if vd, ok := w.Domain.(Var); ok && d.id == vd.id { 422 // Unifying $x with $x results in $x. If we descend into this we'll have 423 // problems because we strip $x out of the environment to keep ourselves 424 // honest and then can't find it on the other side. 425 // 426 // TODO: I'm not positive this is the right fix. 427 return vd, e, nil 428 } 429 430 // We need to unify w with the value of d in each possible environment. We 431 // can save some work by grouping environments by the value of d, since 432 // there will be a lot of redundancy here. 433 var nEnvs []nonDetEnv 434 envParts := e.partitionBy(d.id) 435 for i, envPart := range envParts { 436 exit := uf.enterVar(d.id, i) 437 // Each branch logically gets its own copy of the initial environment 438 // (narrowed down to just this binding of the variable), and each branch 439 // may result in different changes to that starting environment. 440 res, e2, err := w.unify(envPart.value, envPart.env, swap, uf) 441 exit.exit() 442 if err != nil { 443 return nil, nonDetEnv{}, err 444 } 445 if res.Domain == nil { 446 // This branch entirely failed to unify, so it's gone. 447 continue 448 } 449 nEnv := e2.bind(d.id, res) 450 nEnvs = append(nEnvs, nEnv) 451 } 452 453 if len(nEnvs) == 0 { 454 // All branches failed 455 return nil, bottomEnv, nil 456 } 457 458 // The effect of this is entirely captured in the environment. We can return 459 // back the same Bind node. 460 return d, sumEnvs(nEnvs...), nil 461 } 462 463 // An identPrinter maps [ident]s to unique string names. 464 type identPrinter struct { 465 ids map[*ident]string 466 idGen map[string]int 467 } 468 469 func (p *identPrinter) unique(id *ident) string { 470 if p.ids == nil { 471 p.ids = make(map[*ident]string) 472 p.idGen = make(map[string]int) 473 } 474 475 name, ok := p.ids[id] 476 if !ok { 477 gen := p.idGen[id.name] 478 p.idGen[id.name]++ 479 if gen == 0 { 480 name = id.name 481 } else { 482 name = fmt.Sprintf("%s#%d", id.name, gen) 483 } 484 p.ids[id] = name 485 } 486 487 return name 488 } 489 490 func (p *identPrinter) slice(ids []*ident) string { 491 var strs []string 492 for _, id := range ids { 493 strs = append(strs, p.unique(id)) 494 } 495 return fmt.Sprintf("[%s]", strings.Join(strs, ", ")) 496 } 497 498 func without[Elt any](s []Elt, i int) []Elt { 499 return append(s[:i:i], s[i+1:]...) 500 }