github.com/rsc/tmp@v0.0.0-20240517235954-6deaab19748b/bootstrap/internal/gc/reg.go

github.com/rsc/tmp@v0.0.0-20240517235954-6deaab19748b/bootstrap/internal/gc/reg.go (about)

     1  // Do not edit. Bootstrap copy of /Users/rsc/g/go/src/cmd/internal/gc/reg.go
     2  
     3  // Derived from Inferno utils/6c/reg.c
     4  // http://code.google.com/p/inferno-os/source/browse/utils/6c/reg.c
     5  //
     6  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     7  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     8  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     9  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
    10  //	Portions Copyright © 2004,2006 Bruce Ellis
    11  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    12  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    13  //	Portions Copyright © 2009 The Go Authors.  All rights reserved.
    14  //
    15  // Permission is hereby granted, free of charge, to any person obtaining a copy
    16  // of this software and associated documentation files (the "Software"), to deal
    17  // in the Software without restriction, including without limitation the rights
    18  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    19  // copies of the Software, and to permit persons to whom the Software is
    20  // furnished to do so, subject to the following conditions:
    21  //
    22  // The above copyright notice and this permission notice shall be included in
    23  // all copies or substantial portions of the Software.
    24  //
    25  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    26  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    27  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    28  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    29  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    30  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    31  // THE SOFTWARE.
    32  
    33  package gc
    34  
    35  import (
    36  	"bytes"
    37  	"rsc.io/tmp/bootstrap/internal/obj"
    38  	"fmt"
    39  	"sort"
    40  	"strings"
    41  )
    42  
    43  // A Var represents a single variable that may be stored in a register.
    44  // That variable may itself correspond to a hardware register,
    45  // to represent the use of registers in the unoptimized instruction stream.
    46  type Var struct {
    47  	offset     int64
    48  	node       *Node
    49  	nextinnode *Var
    50  	width      int
    51  	id         int // index in vars
    52  	name       int8
    53  	etype      int8
    54  	addr       int8
    55  }
    56  
    57  // Bits represents a set of Vars, stored as a bit set of var numbers
    58  // (the index in vars, or equivalently v.id).
    59  type Bits struct {
    60  	b [BITS]uint64
    61  }
    62  
    63  const (
    64  	BITS = 3
    65  	NVAR = BITS * 64
    66  )
    67  
    68  var (
    69  	vars [NVAR]Var // variables under consideration
    70  	nvar int       // number of vars
    71  
    72  	regbits uint64 // bits for hardware registers
    73  
    74  	zbits   Bits // zero
    75  	externs Bits // global variables
    76  	params  Bits // function parameters and results
    77  	ivar    Bits // function parameters (inputs)
    78  	ovar    Bits // function results (outputs)
    79  	consts  Bits // constant values
    80  	addrs   Bits // variables with address taken
    81  )
    82  
    83  // A Reg is a wrapper around a single Prog (one instruction) that holds
    84  // register optimization information while the optimizer runs.
    85  // r->prog is the instruction.
    86  type Reg struct {
    87  	set  Bits // regopt variables written by this instruction.
    88  	use1 Bits // regopt variables read by prog->from.
    89  	use2 Bits // regopt variables read by prog->to.
    90  
    91  	// refahead/refbehind are the regopt variables whose current
    92  	// value may be used in the following/preceding instructions
    93  	// up to a CALL (or the value is clobbered).
    94  	refbehind Bits
    95  	refahead  Bits
    96  
    97  	// calahead/calbehind are similar, but for variables in
    98  	// instructions that are reachable after hitting at least one
    99  	// CALL.
   100  	calbehind Bits
   101  	calahead  Bits
   102  
   103  	regdiff Bits
   104  	act     Bits
   105  	regu    uint64 // register used bitmap
   106  }
   107  
   108  // A Rgn represents a single regopt variable over a region of code
   109  // where a register could potentially be dedicated to that variable.
   110  // The code encompassed by a Rgn is defined by the flow graph,
   111  // starting at enter, flood-filling forward while varno is refahead
   112  // and backward while varno is refbehind, and following branches.
   113  // A single variable may be represented by multiple disjoint Rgns and
   114  // each Rgn may choose a different register for that variable.
   115  // Registers are allocated to regions greedily in order of descending
   116  // cost.
   117  type Rgn struct {
   118  	enter *Flow
   119  	cost  int16
   120  	varno int16
   121  	regno int16
   122  }
   123  
   124  // The Plan 9 C compilers used a limit of 600 regions,
   125  // but the yacc-generated parser in y.go has 3100 regions.
   126  // We set MaxRgn large enough to handle that.
   127  // There's not a huge cost to having too many regions:
   128  // the main processing traces the live area for each variable,
   129  // which is limited by the number of variables times the area,
   130  // not the raw region count. If there are many regions, they
   131  // are almost certainly small and easy to trace.
   132  // The only operation that scales with region count is the
   133  // sorting by cost, which uses sort.Sort and is therefore
   134  // guaranteed n log n.
   135  const MaxRgn = 6000
   136  
   137  var (
   138  	region  []Rgn
   139  	nregion int
   140  )
   141  
   142  type rcmp []Rgn
   143  
   144  func (x rcmp) Len() int {
   145  	return len(x)
   146  }
   147  
   148  func (x rcmp) Swap(i, j int) {
   149  	x[i], x[j] = x[j], x[i]
   150  }
   151  
   152  func (x rcmp) Less(i, j int) bool {
   153  	p1 := &x[i]
   154  	p2 := &x[j]
   155  	if p1.cost != p2.cost {
   156  		return int(p2.cost)-int(p1.cost) < 0
   157  	}
   158  	if p1.varno != p2.varno {
   159  		return int(p2.varno)-int(p1.varno) < 0
   160  	}
   161  	if p1.enter != p2.enter {
   162  		return int(p2.enter.Id-p1.enter.Id) < 0
   163  	}
   164  	return false
   165  }
   166  
   167  func setaddrs(bit Bits) {
   168  	var i int
   169  	var n int
   170  	var v *Var
   171  	var node *Node
   172  
   173  	for bany(&bit) {
   174  		// convert each bit to a variable
   175  		i = bnum(bit)
   176  
   177  		node = vars[i].node
   178  		n = int(vars[i].name)
   179  		biclr(&bit, uint(i))
   180  
   181  		// disable all pieces of that variable
   182  		for i = 0; i < nvar; i++ {
   183  			v = &vars[i]
   184  			if v.node == node && int(v.name) == n {
   185  				v.addr = 2
   186  			}
   187  		}
   188  	}
   189  }
   190  
   191  var regnodes [64]*Node
   192  
   193  func walkvardef(n *Node, f *Flow, active int) {
   194  	var f1 *Flow
   195  	var bn int
   196  	var v *Var
   197  
   198  	for f1 = f; f1 != nil; f1 = f1.S1 {
   199  		if f1.Active == int32(active) {
   200  			break
   201  		}
   202  		f1.Active = int32(active)
   203  		if f1.Prog.As == obj.AVARKILL && f1.Prog.To.Node == n {
   204  			break
   205  		}
   206  		for v, _ = n.Opt.(*Var); v != nil; v = v.nextinnode {
   207  			bn = v.id
   208  			biset(&(f1.Data.(*Reg)).act, uint(bn))
   209  		}
   210  
   211  		if f1.Prog.As == obj.ACALL {
   212  			break
   213  		}
   214  	}
   215  
   216  	for f2 := f; f2 != f1; f2 = f2.S1 {
   217  		if f2.S2 != nil {
   218  			walkvardef(n, f2.S2, active)
   219  		}
   220  	}
   221  }
   222  
   223  /*
   224   * add mov b,rn
   225   * just after r
   226   */
   227  func addmove(r *Flow, bn int, rn int, f int) {
   228  	p1 := Ctxt.NewProg()
   229  	Clearp(p1)
   230  	p1.Pc = 9999
   231  
   232  	p := r.Prog
   233  	p1.Link = p.Link
   234  	p.Link = p1
   235  	p1.Lineno = p.Lineno
   236  
   237  	v := &vars[bn]
   238  
   239  	a := &p1.To
   240  	a.Offset = v.offset
   241  	a.Etype = uint8(v.etype)
   242  	a.Type = obj.TYPE_MEM
   243  	a.Name = v.name
   244  	a.Node = v.node
   245  	a.Sym = Linksym(v.node.Sym)
   246  
   247  	/* NOTE(rsc): 9g did
   248  	if(a->etype == TARRAY)
   249  		a->type = TYPE_ADDR;
   250  	else if(a->sym == nil)
   251  		a->type = TYPE_CONST;
   252  	*/
   253  	p1.As = int16(Thearch.Optoas(OAS, Types[uint8(v.etype)]))
   254  
   255  	// TODO(rsc): Remove special case here.
   256  	if (Thearch.Thechar == '5' || Thearch.Thechar == '7' || Thearch.Thechar == '9') && v.etype == TBOOL {
   257  		p1.As = int16(Thearch.Optoas(OAS, Types[TUINT8]))
   258  	}
   259  	p1.From.Type = obj.TYPE_REG
   260  	p1.From.Reg = int16(rn)
   261  	p1.From.Name = obj.NAME_NONE
   262  	if f == 0 {
   263  		p1.From = *a
   264  		*a = obj.Addr{}
   265  		a.Type = obj.TYPE_REG
   266  		a.Reg = int16(rn)
   267  	}
   268  
   269  	if Debug['R'] != 0 && Debug['v'] != 0 {
   270  		fmt.Printf("%v ===add=== %v\n", p, p1)
   271  	}
   272  	Ostats.Nspill++
   273  }
   274  
   275  func overlap_reg(o1 int64, w1 int, o2 int64, w2 int) bool {
   276  	t1 := o1 + int64(w1)
   277  	t2 := o2 + int64(w2)
   278  
   279  	if t1 <= o2 || t2 <= o1 {
   280  		return false
   281  	}
   282  
   283  	return true
   284  }
   285  
   286  func mkvar(f *Flow, a *obj.Addr) Bits {
   287  	/*
   288  	 * mark registers used
   289  	 */
   290  	if a.Type == obj.TYPE_NONE {
   291  		return zbits
   292  	}
   293  
   294  	r := f.Data.(*Reg)
   295  	r.use1.b[0] |= Thearch.Doregbits(int(a.Index)) // TODO: Use RtoB
   296  
   297  	var n int
   298  	switch a.Type {
   299  	default:
   300  		regu := Thearch.Doregbits(int(a.Reg)) | Thearch.RtoB(int(a.Reg)) // TODO: Use RtoB
   301  		if regu == 0 {
   302  			return zbits
   303  		}
   304  		bit := zbits
   305  		bit.b[0] = regu
   306  		return bit
   307  
   308  		// TODO(rsc): Remove special case here.
   309  	case obj.TYPE_ADDR:
   310  		var bit Bits
   311  		if Thearch.Thechar == '5' || Thearch.Thechar == '7' || Thearch.Thechar == '9' {
   312  			goto memcase
   313  		}
   314  		a.Type = obj.TYPE_MEM
   315  		bit = mkvar(f, a)
   316  		setaddrs(bit)
   317  		a.Type = obj.TYPE_ADDR
   318  		Ostats.Naddr++
   319  		return zbits
   320  
   321  	memcase:
   322  		fallthrough
   323  
   324  	case obj.TYPE_MEM:
   325  		if r != nil {
   326  			r.use1.b[0] |= Thearch.RtoB(int(a.Reg))
   327  		}
   328  
   329  		/* NOTE: 5g did
   330  		if(r->f.prog->scond & (C_PBIT|C_WBIT))
   331  			r->set.b[0] |= RtoB(a->reg);
   332  		*/
   333  		switch a.Name {
   334  		default:
   335  			// Note: This case handles NAME_EXTERN and NAME_STATIC.
   336  			// We treat these as requiring eager writes to memory, due to
   337  			// the possibility of a fault handler looking at them, so there is
   338  			// not much point in registerizing the loads.
   339  			// If we later choose the set of candidate variables from a
   340  			// larger list, these cases could be deprioritized instead of
   341  			// removed entirely.
   342  			return zbits
   343  
   344  		case obj.NAME_PARAM,
   345  			obj.NAME_AUTO:
   346  			n = int(a.Name)
   347  		}
   348  	}
   349  
   350  	node, _ := a.Node.(*Node)
   351  	if node == nil || node.Op != ONAME || node.Orig == nil {
   352  		return zbits
   353  	}
   354  	node = node.Orig
   355  	if node.Orig != node {
   356  		Fatal("%v: bad node", Ctxt.Dconv(a))
   357  	}
   358  	if node.Sym == nil || node.Sym.Name[0] == '.' {
   359  		return zbits
   360  	}
   361  	et := int(a.Etype)
   362  	o := a.Offset
   363  	w := a.Width
   364  	if w < 0 {
   365  		Fatal("bad width %d for %v", w, Ctxt.Dconv(a))
   366  	}
   367  
   368  	flag := 0
   369  	var v *Var
   370  	for i := 0; i < nvar; i++ {
   371  		v = &vars[i]
   372  		if v.node == node && int(v.name) == n {
   373  			if v.offset == o {
   374  				if int(v.etype) == et {
   375  					if int64(v.width) == w {
   376  						// TODO(rsc): Remove special case for arm here.
   377  						if flag == 0 || Thearch.Thechar != '5' {
   378  							return blsh(uint(i))
   379  						}
   380  					}
   381  				}
   382  			}
   383  
   384  			// if they overlap, disable both
   385  			if overlap_reg(v.offset, v.width, o, int(w)) {
   386  				//				print("disable overlap %s %d %d %d %d, %E != %E\n", s->name, v->offset, v->width, o, w, v->etype, et);
   387  				v.addr = 1
   388  
   389  				flag = 1
   390  			}
   391  		}
   392  	}
   393  
   394  	switch et {
   395  	case 0, TFUNC:
   396  		return zbits
   397  	}
   398  
   399  	if nvar >= NVAR {
   400  		if Debug['w'] > 1 && node != nil {
   401  			Fatal("variable not optimized: %v", Nconv(node, obj.FmtSharp))
   402  		}
   403  		if Debug['v'] > 0 {
   404  			Warn("variable not optimized: %v", Nconv(node, obj.FmtSharp))
   405  		}
   406  
   407  		// If we're not tracking a word in a variable, mark the rest as
   408  		// having its address taken, so that we keep the whole thing
   409  		// live at all calls. otherwise we might optimize away part of
   410  		// a variable but not all of it.
   411  		var v *Var
   412  		for i := 0; i < nvar; i++ {
   413  			v = &vars[i]
   414  			if v.node == node {
   415  				v.addr = 1
   416  			}
   417  		}
   418  
   419  		return zbits
   420  	}
   421  
   422  	i := nvar
   423  	nvar++
   424  	v = &vars[i]
   425  	v.id = i
   426  	v.offset = o
   427  	v.name = int8(n)
   428  	v.etype = int8(et)
   429  	v.width = int(w)
   430  	v.addr = int8(flag) // funny punning
   431  	v.node = node
   432  
   433  	// node->opt is the head of a linked list
   434  	// of Vars within the given Node, so that
   435  	// we can start at a Var and find all the other
   436  	// Vars in the same Go variable.
   437  	v.nextinnode, _ = node.Opt.(*Var)
   438  
   439  	node.Opt = v
   440  
   441  	bit := blsh(uint(i))
   442  	if n == obj.NAME_EXTERN || n == obj.NAME_STATIC {
   443  		for z := 0; z < BITS; z++ {
   444  			externs.b[z] |= bit.b[z]
   445  		}
   446  	}
   447  	if n == obj.NAME_PARAM {
   448  		for z := 0; z < BITS; z++ {
   449  			params.b[z] |= bit.b[z]
   450  		}
   451  	}
   452  
   453  	if node.Class == PPARAM {
   454  		for z := 0; z < BITS; z++ {
   455  			ivar.b[z] |= bit.b[z]
   456  		}
   457  	}
   458  	if node.Class == PPARAMOUT {
   459  		for z := 0; z < BITS; z++ {
   460  			ovar.b[z] |= bit.b[z]
   461  		}
   462  	}
   463  
   464  	// Treat values with their address taken as live at calls,
   465  	// because the garbage collector's liveness analysis in ../gc/plive.c does.
   466  	// These must be consistent or else we will elide stores and the garbage
   467  	// collector will see uninitialized data.
   468  	// The typical case where our own analysis is out of sync is when the
   469  	// node appears to have its address taken but that code doesn't actually
   470  	// get generated and therefore doesn't show up as an address being
   471  	// taken when we analyze the instruction stream.
   472  	// One instance of this case is when a closure uses the same name as
   473  	// an outer variable for one of its own variables declared with :=.
   474  	// The parser flags the outer variable as possibly shared, and therefore
   475  	// sets addrtaken, even though it ends up not being actually shared.
   476  	// If we were better about _ elision, _ = &x would suffice too.
   477  	// The broader := in a closure problem is mentioned in a comment in
   478  	// closure.c:/^typecheckclosure and dcl.c:/^oldname.
   479  	if node.Addrtaken {
   480  		v.addr = 1
   481  	}
   482  
   483  	// Disable registerization for globals, because:
   484  	// (1) we might panic at any time and we want the recovery code
   485  	// to see the latest values (issue 1304).
   486  	// (2) we don't know what pointers might point at them and we want
   487  	// loads via those pointers to see updated values and vice versa (issue 7995).
   488  	//
   489  	// Disable registerization for results if using defer, because the deferred func
   490  	// might recover and return, causing the current values to be used.
   491  	if node.Class == PEXTERN || (Hasdefer != 0 && node.Class == PPARAMOUT) {
   492  		v.addr = 1
   493  	}
   494  
   495  	if Debug['R'] != 0 {
   496  		fmt.Printf("bit=%2d et=%v w=%d+%d %v %v flag=%d\n", i, Econv(int(et), 0), o, w, Nconv(node, obj.FmtSharp), Ctxt.Dconv(a), v.addr)
   497  	}
   498  	Ostats.Nvar++
   499  
   500  	return bit
   501  }
   502  
   503  var change int
   504  
   505  func prop(f *Flow, ref Bits, cal Bits) {
   506  	var f1 *Flow
   507  	var r1 *Reg
   508  	var z int
   509  	var i int
   510  	var v *Var
   511  	var v1 *Var
   512  
   513  	for f1 = f; f1 != nil; f1 = f1.P1 {
   514  		r1 = f1.Data.(*Reg)
   515  		for z = 0; z < BITS; z++ {
   516  			ref.b[z] |= r1.refahead.b[z]
   517  			if ref.b[z] != r1.refahead.b[z] {
   518  				r1.refahead.b[z] = ref.b[z]
   519  				change = 1
   520  			}
   521  
   522  			cal.b[z] |= r1.calahead.b[z]
   523  			if cal.b[z] != r1.calahead.b[z] {
   524  				r1.calahead.b[z] = cal.b[z]
   525  				change = 1
   526  			}
   527  		}
   528  
   529  		switch f1.Prog.As {
   530  		case obj.ACALL:
   531  			if Noreturn(f1.Prog) {
   532  				break
   533  			}
   534  
   535  			// Mark all input variables (ivar) as used, because that's what the
   536  			// liveness bitmaps say. The liveness bitmaps say that so that a
   537  			// panic will not show stale values in the parameter dump.
   538  			// Mark variables with a recent VARDEF (r1->act) as used,
   539  			// so that the optimizer flushes initializations to memory,
   540  			// so that if a garbage collection happens during this CALL,
   541  			// the collector will see initialized memory. Again this is to
   542  			// match what the liveness bitmaps say.
   543  			for z = 0; z < BITS; z++ {
   544  				cal.b[z] |= ref.b[z] | externs.b[z] | ivar.b[z] | r1.act.b[z]
   545  				ref.b[z] = 0
   546  			}
   547  
   548  			// cal.b is the current approximation of what's live across the call.
   549  			// Every bit in cal.b is a single stack word. For each such word,
   550  			// find all the other tracked stack words in the same Go variable
   551  			// (struct/slice/string/interface) and mark them live too.
   552  			// This is necessary because the liveness analysis for the garbage
   553  			// collector works at variable granularity, not at word granularity.
   554  			// It is fundamental for slice/string/interface: the garbage collector
   555  			// needs the whole value, not just some of the words, in order to
   556  			// interpret the other bits correctly. Specifically, slice needs a consistent
   557  			// ptr and cap, string needs a consistent ptr and len, and interface
   558  			// needs a consistent type word and data word.
   559  			for z = 0; z < BITS; z++ {
   560  				if cal.b[z] == 0 {
   561  					continue
   562  				}
   563  				for i = 0; i < 64; i++ {
   564  					if z*64+i >= nvar || (cal.b[z]>>uint(i))&1 == 0 {
   565  						continue
   566  					}
   567  					v = &vars[z*64+i]
   568  					if v.node.Opt == nil { // v represents fixed register, not Go variable
   569  						continue
   570  					}
   571  
   572  					// v->node->opt is the head of a linked list of Vars
   573  					// corresponding to tracked words from the Go variable v->node.
   574  					// Walk the list and set all the bits.
   575  					// For a large struct this could end up being quadratic:
   576  					// after the first setting, the outer loop (for z, i) would see a 1 bit
   577  					// for all of the remaining words in the struct, and for each such
   578  					// word would go through and turn on all the bits again.
   579  					// To avoid the quadratic behavior, we only turn on the bits if
   580  					// v is the head of the list or if the head's bit is not yet turned on.
   581  					// This will set the bits at most twice, keeping the overall loop linear.
   582  					v1, _ = v.node.Opt.(*Var)
   583  
   584  					if v == v1 || !btest(&cal, uint(v1.id)) {
   585  						for ; v1 != nil; v1 = v1.nextinnode {
   586  							biset(&cal, uint(v1.id))
   587  						}
   588  					}
   589  				}
   590  			}
   591  
   592  		case obj.ATEXT:
   593  			for z = 0; z < BITS; z++ {
   594  				cal.b[z] = 0
   595  				ref.b[z] = 0
   596  			}
   597  
   598  		case obj.ARET:
   599  			for z = 0; z < BITS; z++ {
   600  				cal.b[z] = externs.b[z] | ovar.b[z]
   601  				ref.b[z] = 0
   602  			}
   603  		}
   604  
   605  		for z = 0; z < BITS; z++ {
   606  			ref.b[z] = ref.b[z]&^r1.set.b[z] | r1.use1.b[z] | r1.use2.b[z]
   607  			cal.b[z] &^= (r1.set.b[z] | r1.use1.b[z] | r1.use2.b[z])
   608  			r1.refbehind.b[z] = ref.b[z]
   609  			r1.calbehind.b[z] = cal.b[z]
   610  		}
   611  
   612  		if f1.Active != 0 {
   613  			break
   614  		}
   615  		f1.Active = 1
   616  	}
   617  
   618  	var r *Reg
   619  	var f2 *Flow
   620  	for ; f != f1; f = f.P1 {
   621  		r = f.Data.(*Reg)
   622  		for f2 = f.P2; f2 != nil; f2 = f2.P2link {
   623  			prop(f2, r.refbehind, r.calbehind)
   624  		}
   625  	}
   626  }
   627  
   628  func synch(f *Flow, dif Bits) {
   629  	var r1 *Reg
   630  	var z int
   631  
   632  	for f1 := f; f1 != nil; f1 = f1.S1 {
   633  		r1 = f1.Data.(*Reg)
   634  		for z = 0; z < BITS; z++ {
   635  			dif.b[z] = dif.b[z]&^(^r1.refbehind.b[z]&r1.refahead.b[z]) | r1.set.b[z] | r1.regdiff.b[z]
   636  			if dif.b[z] != r1.regdiff.b[z] {
   637  				r1.regdiff.b[z] = dif.b[z]
   638  				change = 1
   639  			}
   640  		}
   641  
   642  		if f1.Active != 0 {
   643  			break
   644  		}
   645  		f1.Active = 1
   646  		for z = 0; z < BITS; z++ {
   647  			dif.b[z] &^= (^r1.calbehind.b[z] & r1.calahead.b[z])
   648  		}
   649  		if f1.S2 != nil {
   650  			synch(f1.S2, dif)
   651  		}
   652  	}
   653  }
   654  
   655  func allreg(b uint64, r *Rgn) uint64 {
   656  	v := &vars[r.varno]
   657  	r.regno = 0
   658  	switch v.etype {
   659  	default:
   660  		Fatal("unknown etype %d/%v", Bitno(b), Econv(int(v.etype), 0))
   661  
   662  	case TINT8,
   663  		TUINT8,
   664  		TINT16,
   665  		TUINT16,
   666  		TINT32,
   667  		TUINT32,
   668  		TINT64,
   669  		TUINT64,
   670  		TINT,
   671  		TUINT,
   672  		TUINTPTR,
   673  		TBOOL,
   674  		TPTR32,
   675  		TPTR64:
   676  		i := Thearch.BtoR(^b)
   677  		if i != 0 && r.cost > 0 {
   678  			r.regno = int16(i)
   679  			return Thearch.RtoB(i)
   680  		}
   681  
   682  	case TFLOAT32, TFLOAT64:
   683  		i := Thearch.BtoF(^b)
   684  		if i != 0 && r.cost > 0 {
   685  			r.regno = int16(i)
   686  			return Thearch.FtoB(i)
   687  		}
   688  	}
   689  
   690  	return 0
   691  }
   692  
   693  func LOAD(r *Reg, z int) uint64 {
   694  	return ^r.refbehind.b[z] & r.refahead.b[z]
   695  }
   696  
   697  func STORE(r *Reg, z int) uint64 {
   698  	return ^r.calbehind.b[z] & r.calahead.b[z]
   699  }
   700  
   701  // Cost parameters
   702  const (
   703  	CLOAD = 5 // cost of load
   704  	CREF  = 5 // cost of reference if not registerized
   705  	LOOP  = 3 // loop execution count (applied in popt.go)
   706  )
   707  
   708  func paint1(f *Flow, bn int) {
   709  	z := bn / 64
   710  	bb := uint64(1 << uint(bn%64))
   711  	r := f.Data.(*Reg)
   712  	if r.act.b[z]&bb != 0 {
   713  		return
   714  	}
   715  	var f1 *Flow
   716  	var r1 *Reg
   717  	for {
   718  		if r.refbehind.b[z]&bb == 0 {
   719  			break
   720  		}
   721  		f1 = f.P1
   722  		if f1 == nil {
   723  			break
   724  		}
   725  		r1 = f1.Data.(*Reg)
   726  		if r1.refahead.b[z]&bb == 0 {
   727  			break
   728  		}
   729  		if r1.act.b[z]&bb != 0 {
   730  			break
   731  		}
   732  		f = f1
   733  		r = r1
   734  	}
   735  
   736  	if LOAD(r, z)&^(r.set.b[z]&^(r.use1.b[z]|r.use2.b[z]))&bb != 0 {
   737  		change -= CLOAD * int(f.Loop)
   738  	}
   739  
   740  	for {
   741  		r.act.b[z] |= bb
   742  
   743  		if f.Prog.As != obj.ANOP { // don't give credit for NOPs
   744  			if r.use1.b[z]&bb != 0 {
   745  				change += CREF * int(f.Loop)
   746  			}
   747  			if (r.use2.b[z]|r.set.b[z])&bb != 0 {
   748  				change += CREF * int(f.Loop)
   749  			}
   750  		}
   751  
   752  		if STORE(r, z)&r.regdiff.b[z]&bb != 0 {
   753  			change -= CLOAD * int(f.Loop)
   754  		}
   755  
   756  		if r.refbehind.b[z]&bb != 0 {
   757  			for f1 = f.P2; f1 != nil; f1 = f1.P2link {
   758  				if (f1.Data.(*Reg)).refahead.b[z]&bb != 0 {
   759  					paint1(f1, bn)
   760  				}
   761  			}
   762  		}
   763  
   764  		if r.refahead.b[z]&bb == 0 {
   765  			break
   766  		}
   767  		f1 = f.S2
   768  		if f1 != nil {
   769  			if (f1.Data.(*Reg)).refbehind.b[z]&bb != 0 {
   770  				paint1(f1, bn)
   771  			}
   772  		}
   773  		f = f.S1
   774  		if f == nil {
   775  			break
   776  		}
   777  		r = f.Data.(*Reg)
   778  		if r.act.b[z]&bb != 0 {
   779  			break
   780  		}
   781  		if r.refbehind.b[z]&bb == 0 {
   782  			break
   783  		}
   784  	}
   785  }
   786  
   787  func paint2(f *Flow, bn int, depth int) uint64 {
   788  	z := bn / 64
   789  	bb := uint64(1 << uint(bn%64))
   790  	vreg := regbits
   791  	r := f.Data.(*Reg)
   792  	if r.act.b[z]&bb == 0 {
   793  		return vreg
   794  	}
   795  	var r1 *Reg
   796  	var f1 *Flow
   797  	for {
   798  		if r.refbehind.b[z]&bb == 0 {
   799  			break
   800  		}
   801  		f1 = f.P1
   802  		if f1 == nil {
   803  			break
   804  		}
   805  		r1 = f1.Data.(*Reg)
   806  		if r1.refahead.b[z]&bb == 0 {
   807  			break
   808  		}
   809  		if r1.act.b[z]&bb == 0 {
   810  			break
   811  		}
   812  		f = f1
   813  		r = r1
   814  	}
   815  
   816  	for {
   817  		if Debug['R'] != 0 && Debug['v'] != 0 {
   818  			fmt.Printf("  paint2 %d %v\n", depth, f.Prog)
   819  		}
   820  
   821  		r.act.b[z] &^= bb
   822  
   823  		vreg |= r.regu
   824  
   825  		if r.refbehind.b[z]&bb != 0 {
   826  			for f1 = f.P2; f1 != nil; f1 = f1.P2link {
   827  				if (f1.Data.(*Reg)).refahead.b[z]&bb != 0 {
   828  					vreg |= paint2(f1, bn, depth+1)
   829  				}
   830  			}
   831  		}
   832  
   833  		if r.refahead.b[z]&bb == 0 {
   834  			break
   835  		}
   836  		f1 = f.S2
   837  		if f1 != nil {
   838  			if (f1.Data.(*Reg)).refbehind.b[z]&bb != 0 {
   839  				vreg |= paint2(f1, bn, depth+1)
   840  			}
   841  		}
   842  		f = f.S1
   843  		if f == nil {
   844  			break
   845  		}
   846  		r = f.Data.(*Reg)
   847  		if r.act.b[z]&bb == 0 {
   848  			break
   849  		}
   850  		if r.refbehind.b[z]&bb == 0 {
   851  			break
   852  		}
   853  	}
   854  
   855  	return vreg
   856  }
   857  
   858  func paint3(f *Flow, bn int, rb uint64, rn int) {
   859  	z := bn / 64
   860  	bb := uint64(1 << uint(bn%64))
   861  	r := f.Data.(*Reg)
   862  	if r.act.b[z]&bb != 0 {
   863  		return
   864  	}
   865  	var r1 *Reg
   866  	var f1 *Flow
   867  	for {
   868  		if r.refbehind.b[z]&bb == 0 {
   869  			break
   870  		}
   871  		f1 = f.P1
   872  		if f1 == nil {
   873  			break
   874  		}
   875  		r1 = f1.Data.(*Reg)
   876  		if r1.refahead.b[z]&bb == 0 {
   877  			break
   878  		}
   879  		if r1.act.b[z]&bb != 0 {
   880  			break
   881  		}
   882  		f = f1
   883  		r = r1
   884  	}
   885  
   886  	if LOAD(r, z)&^(r.set.b[z]&^(r.use1.b[z]|r.use2.b[z]))&bb != 0 {
   887  		addmove(f, bn, rn, 0)
   888  	}
   889  	var p *obj.Prog
   890  	for {
   891  		r.act.b[z] |= bb
   892  		p = f.Prog
   893  
   894  		if r.use1.b[z]&bb != 0 {
   895  			if Debug['R'] != 0 && Debug['v'] != 0 {
   896  				fmt.Printf("%v", p)
   897  			}
   898  			addreg(&p.From, rn)
   899  			if Debug['R'] != 0 && Debug['v'] != 0 {
   900  				fmt.Printf(" ===change== %v\n", p)
   901  			}
   902  		}
   903  
   904  		if (r.use2.b[z]|r.set.b[z])&bb != 0 {
   905  			if Debug['R'] != 0 && Debug['v'] != 0 {
   906  				fmt.Printf("%v", p)
   907  			}
   908  			addreg(&p.To, rn)
   909  			if Debug['R'] != 0 && Debug['v'] != 0 {
   910  				fmt.Printf(" ===change== %v\n", p)
   911  			}
   912  		}
   913  
   914  		if STORE(r, z)&r.regdiff.b[z]&bb != 0 {
   915  			addmove(f, bn, rn, 1)
   916  		}
   917  		r.regu |= rb
   918  
   919  		if r.refbehind.b[z]&bb != 0 {
   920  			for f1 = f.P2; f1 != nil; f1 = f1.P2link {
   921  				if (f1.Data.(*Reg)).refahead.b[z]&bb != 0 {
   922  					paint3(f1, bn, rb, rn)
   923  				}
   924  			}
   925  		}
   926  
   927  		if r.refahead.b[z]&bb == 0 {
   928  			break
   929  		}
   930  		f1 = f.S2
   931  		if f1 != nil {
   932  			if (f1.Data.(*Reg)).refbehind.b[z]&bb != 0 {
   933  				paint3(f1, bn, rb, rn)
   934  			}
   935  		}
   936  		f = f.S1
   937  		if f == nil {
   938  			break
   939  		}
   940  		r = f.Data.(*Reg)
   941  		if r.act.b[z]&bb != 0 {
   942  			break
   943  		}
   944  		if r.refbehind.b[z]&bb == 0 {
   945  			break
   946  		}
   947  	}
   948  }
   949  
   950  func addreg(a *obj.Addr, rn int) {
   951  	a.Sym = nil
   952  	a.Node = nil
   953  	a.Offset = 0
   954  	a.Type = obj.TYPE_REG
   955  	a.Reg = int16(rn)
   956  	a.Name = 0
   957  
   958  	Ostats.Ncvtreg++
   959  }
   960  
   961  func dumpone(f *Flow, isreg int) {
   962  	fmt.Printf("%d:%v", f.Loop, f.Prog)
   963  	if isreg != 0 {
   964  		r := f.Data.(*Reg)
   965  		var bit Bits
   966  		for z := 0; z < BITS; z++ {
   967  			bit.b[z] = r.set.b[z] | r.use1.b[z] | r.use2.b[z] | r.refbehind.b[z] | r.refahead.b[z] | r.calbehind.b[z] | r.calahead.b[z] | r.regdiff.b[z] | r.act.b[z] | 0
   968  		}
   969  		if bany(&bit) {
   970  			fmt.Printf("\t")
   971  			if bany(&r.set) {
   972  				fmt.Printf(" s:%v", &r.set)
   973  			}
   974  			if bany(&r.use1) {
   975  				fmt.Printf(" u1:%v", &r.use1)
   976  			}
   977  			if bany(&r.use2) {
   978  				fmt.Printf(" u2:%v", &r.use2)
   979  			}
   980  			if bany(&r.refbehind) {
   981  				fmt.Printf(" rb:%v ", &r.refbehind)
   982  			}
   983  			if bany(&r.refahead) {
   984  				fmt.Printf(" ra:%v ", &r.refahead)
   985  			}
   986  			if bany(&r.calbehind) {
   987  				fmt.Printf(" cb:%v ", &r.calbehind)
   988  			}
   989  			if bany(&r.calahead) {
   990  				fmt.Printf(" ca:%v ", &r.calahead)
   991  			}
   992  			if bany(&r.regdiff) {
   993  				fmt.Printf(" d:%v ", &r.regdiff)
   994  			}
   995  			if bany(&r.act) {
   996  				fmt.Printf(" a:%v ", &r.act)
   997  			}
   998  		}
   999  	}
  1000  
  1001  	fmt.Printf("\n")
  1002  }
  1003  
  1004  func Dumpit(str string, r0 *Flow, isreg int) {
  1005  	var r1 *Flow
  1006  
  1007  	fmt.Printf("\n%s\n", str)
  1008  	for r := r0; r != nil; r = r.Link {
  1009  		dumpone(r, isreg)
  1010  		r1 = r.P2
  1011  		if r1 != nil {
  1012  			fmt.Printf("\tpred:")
  1013  			for ; r1 != nil; r1 = r1.P2link {
  1014  				fmt.Printf(" %.4d", uint(int(r1.Prog.Pc)))
  1015  			}
  1016  			if r.P1 != nil {
  1017  				fmt.Printf(" (and %.4d)", uint(int(r.P1.Prog.Pc)))
  1018  			} else {
  1019  				fmt.Printf(" (only)")
  1020  			}
  1021  			fmt.Printf("\n")
  1022  		}
  1023  
  1024  		// Print successors if it's not just the next one
  1025  		if r.S1 != r.Link || r.S2 != nil {
  1026  			fmt.Printf("\tsucc:")
  1027  			if r.S1 != nil {
  1028  				fmt.Printf(" %.4d", uint(int(r.S1.Prog.Pc)))
  1029  			}
  1030  			if r.S2 != nil {
  1031  				fmt.Printf(" %.4d", uint(int(r.S2.Prog.Pc)))
  1032  			}
  1033  			fmt.Printf("\n")
  1034  		}
  1035  	}
  1036  }
  1037  
  1038  func regopt(firstp *obj.Prog) {
  1039  	mergetemp(firstp)
  1040  
  1041  	/*
  1042  	 * control flow is more complicated in generated go code
  1043  	 * than in generated c code.  define pseudo-variables for
  1044  	 * registers, so we have complete register usage information.
  1045  	 */
  1046  	var nreg int
  1047  	regnames := Thearch.Regnames(&nreg)
  1048  
  1049  	nvar = nreg
  1050  	for i := 0; i < nreg; i++ {
  1051  		vars[i] = Var{}
  1052  	}
  1053  	for i := 0; i < nreg; i++ {
  1054  		if regnodes[i] == nil {
  1055  			regnodes[i] = newname(Lookup(regnames[i]))
  1056  		}
  1057  		vars[i].node = regnodes[i]
  1058  	}
  1059  
  1060  	regbits = Thearch.Excludedregs()
  1061  	externs = zbits
  1062  	params = zbits
  1063  	consts = zbits
  1064  	addrs = zbits
  1065  	ivar = zbits
  1066  	ovar = zbits
  1067  
  1068  	/*
  1069  	 * pass 1
  1070  	 * build aux data structure
  1071  	 * allocate pcs
  1072  	 * find use and set of variables
  1073  	 */
  1074  	g := Flowstart(firstp, func() interface{} { return new(Reg) })
  1075  	if g == nil {
  1076  		for i := 0; i < nvar; i++ {
  1077  			vars[i].node.Opt = nil
  1078  		}
  1079  		return
  1080  	}
  1081  
  1082  	firstf := g.Start
  1083  
  1084  	for f := firstf; f != nil; f = f.Link {
  1085  		p := f.Prog
  1086  		if p.As == obj.AVARDEF || p.As == obj.AVARKILL {
  1087  			continue
  1088  		}
  1089  
  1090  		// Avoid making variables for direct-called functions.
  1091  		if p.As == obj.ACALL && p.To.Type == obj.TYPE_MEM && p.To.Name == obj.NAME_EXTERN {
  1092  			continue
  1093  		}
  1094  
  1095  		// from vs to doesn't matter for registers.
  1096  		r := f.Data.(*Reg)
  1097  		r.use1.b[0] |= p.Info.Reguse | p.Info.Regindex
  1098  		r.set.b[0] |= p.Info.Regset
  1099  
  1100  		bit := mkvar(f, &p.From)
  1101  		if bany(&bit) {
  1102  			if p.Info.Flags&LeftAddr != 0 {
  1103  				setaddrs(bit)
  1104  			}
  1105  			if p.Info.Flags&LeftRead != 0 {
  1106  				for z := 0; z < BITS; z++ {
  1107  					r.use1.b[z] |= bit.b[z]
  1108  				}
  1109  			}
  1110  			if p.Info.Flags&LeftWrite != 0 {
  1111  				for z := 0; z < BITS; z++ {
  1112  					r.set.b[z] |= bit.b[z]
  1113  				}
  1114  			}
  1115  		}
  1116  
  1117  		// Compute used register for reg
  1118  		if p.Info.Flags&RegRead != 0 {
  1119  			r.use1.b[0] |= Thearch.RtoB(int(p.Reg))
  1120  		}
  1121  
  1122  		// Currently we never generate three register forms.
  1123  		// If we do, this will need to change.
  1124  		if p.From3.Type != obj.TYPE_NONE {
  1125  			Fatal("regopt not implemented for from3")
  1126  		}
  1127  
  1128  		bit = mkvar(f, &p.To)
  1129  		if bany(&bit) {
  1130  			if p.Info.Flags&RightAddr != 0 {
  1131  				setaddrs(bit)
  1132  			}
  1133  			if p.Info.Flags&RightRead != 0 {
  1134  				for z := 0; z < BITS; z++ {
  1135  					r.use2.b[z] |= bit.b[z]
  1136  				}
  1137  			}
  1138  			if p.Info.Flags&RightWrite != 0 {
  1139  				for z := 0; z < BITS; z++ {
  1140  					r.set.b[z] |= bit.b[z]
  1141  				}
  1142  			}
  1143  		}
  1144  	}
  1145  
  1146  	for i := 0; i < nvar; i++ {
  1147  		v := &vars[i]
  1148  		if v.addr != 0 {
  1149  			bit := blsh(uint(i))
  1150  			for z := 0; z < BITS; z++ {
  1151  				addrs.b[z] |= bit.b[z]
  1152  			}
  1153  		}
  1154  
  1155  		if Debug['R'] != 0 && Debug['v'] != 0 {
  1156  			fmt.Printf("bit=%2d addr=%d et=%v w=%-2d s=%v + %d\n", i, v.addr, Econv(int(v.etype), 0), v.width, v.node, v.offset)
  1157  		}
  1158  	}
  1159  
  1160  	if Debug['R'] != 0 && Debug['v'] != 0 {
  1161  		Dumpit("pass1", firstf, 1)
  1162  	}
  1163  
  1164  	/*
  1165  	 * pass 2
  1166  	 * find looping structure
  1167  	 */
  1168  	flowrpo(g)
  1169  
  1170  	if Debug['R'] != 0 && Debug['v'] != 0 {
  1171  		Dumpit("pass2", firstf, 1)
  1172  	}
  1173  
  1174  	/*
  1175  	 * pass 2.5
  1176  	 * iterate propagating fat vardef covering forward
  1177  	 * r->act records vars with a VARDEF since the last CALL.
  1178  	 * (r->act will be reused in pass 5 for something else,
  1179  	 * but we'll be done with it by then.)
  1180  	 */
  1181  	active := 0
  1182  
  1183  	for f := firstf; f != nil; f = f.Link {
  1184  		f.Active = 0
  1185  		r := f.Data.(*Reg)
  1186  		r.act = zbits
  1187  	}
  1188  
  1189  	for f := firstf; f != nil; f = f.Link {
  1190  		p := f.Prog
  1191  		if p.As == obj.AVARDEF && Isfat(((p.To.Node).(*Node)).Type) && ((p.To.Node).(*Node)).Opt != nil {
  1192  			active++
  1193  			walkvardef(p.To.Node.(*Node), f, active)
  1194  		}
  1195  	}
  1196  
  1197  	/*
  1198  	 * pass 3
  1199  	 * iterate propagating usage
  1200  	 * 	back until flow graph is complete
  1201  	 */
  1202  	var f1 *Flow
  1203  	var i int
  1204  	var f *Flow
  1205  loop1:
  1206  	change = 0
  1207  
  1208  	for f = firstf; f != nil; f = f.Link {
  1209  		f.Active = 0
  1210  	}
  1211  	for f = firstf; f != nil; f = f.Link {
  1212  		if f.Prog.As == obj.ARET {
  1213  			prop(f, zbits, zbits)
  1214  		}
  1215  	}
  1216  
  1217  	/* pick up unreachable code */
  1218  loop11:
  1219  	i = 0
  1220  
  1221  	for f = firstf; f != nil; f = f1 {
  1222  		f1 = f.Link
  1223  		if f1 != nil && f1.Active != 0 && f.Active == 0 {
  1224  			prop(f, zbits, zbits)
  1225  			i = 1
  1226  		}
  1227  	}
  1228  
  1229  	if i != 0 {
  1230  		goto loop11
  1231  	}
  1232  	if change != 0 {
  1233  		goto loop1
  1234  	}
  1235  
  1236  	if Debug['R'] != 0 && Debug['v'] != 0 {
  1237  		Dumpit("pass3", firstf, 1)
  1238  	}
  1239  
  1240  	/*
  1241  	 * pass 4
  1242  	 * iterate propagating register/variable synchrony
  1243  	 * 	forward until graph is complete
  1244  	 */
  1245  loop2:
  1246  	change = 0
  1247  
  1248  	for f = firstf; f != nil; f = f.Link {
  1249  		f.Active = 0
  1250  	}
  1251  	synch(firstf, zbits)
  1252  	if change != 0 {
  1253  		goto loop2
  1254  	}
  1255  
  1256  	if Debug['R'] != 0 && Debug['v'] != 0 {
  1257  		Dumpit("pass4", firstf, 1)
  1258  	}
  1259  
  1260  	/*
  1261  	 * pass 4.5
  1262  	 * move register pseudo-variables into regu.
  1263  	 */
  1264  	mask := uint64((1 << uint(nreg)) - 1)
  1265  	for f := firstf; f != nil; f = f.Link {
  1266  		r := f.Data.(*Reg)
  1267  		r.regu = (r.refbehind.b[0] | r.set.b[0]) & mask
  1268  		r.set.b[0] &^= mask
  1269  		r.use1.b[0] &^= mask
  1270  		r.use2.b[0] &^= mask
  1271  		r.refbehind.b[0] &^= mask
  1272  		r.refahead.b[0] &^= mask
  1273  		r.calbehind.b[0] &^= mask
  1274  		r.calahead.b[0] &^= mask
  1275  		r.regdiff.b[0] &^= mask
  1276  		r.act.b[0] &^= mask
  1277  	}
  1278  
  1279  	if Debug['R'] != 0 && Debug['v'] != 0 {
  1280  		Dumpit("pass4.5", firstf, 1)
  1281  	}
  1282  
  1283  	/*
  1284  	 * pass 5
  1285  	 * isolate regions
  1286  	 * calculate costs (paint1)
  1287  	 */
  1288  	var bit Bits
  1289  	if f := firstf; f != nil {
  1290  		r := f.Data.(*Reg)
  1291  		for z := 0; z < BITS; z++ {
  1292  			bit.b[z] = (r.refahead.b[z] | r.calahead.b[z]) &^ (externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z])
  1293  		}
  1294  		if bany(&bit) && f.Refset == 0 {
  1295  			// should never happen - all variables are preset
  1296  			if Debug['w'] != 0 {
  1297  				fmt.Printf("%v: used and not set: %v\n", f.Prog.Line(), &bit)
  1298  			}
  1299  			f.Refset = 1
  1300  		}
  1301  	}
  1302  
  1303  	for f := firstf; f != nil; f = f.Link {
  1304  		(f.Data.(*Reg)).act = zbits
  1305  	}
  1306  	nregion = 0
  1307  	region = region[:0]
  1308  	var rgp *Rgn
  1309  	for f := firstf; f != nil; f = f.Link {
  1310  		r := f.Data.(*Reg)
  1311  		for z := 0; z < BITS; z++ {
  1312  			bit.b[z] = r.set.b[z] &^ (r.refahead.b[z] | r.calahead.b[z] | addrs.b[z])
  1313  		}
  1314  		if bany(&bit) && f.Refset == 0 {
  1315  			if Debug['w'] != 0 {
  1316  				fmt.Printf("%v: set and not used: %v\n", f.Prog.Line(), &bit)
  1317  			}
  1318  			f.Refset = 1
  1319  			Thearch.Excise(f)
  1320  		}
  1321  
  1322  		for z := 0; z < BITS; z++ {
  1323  			bit.b[z] = LOAD(r, z) &^ (r.act.b[z] | addrs.b[z])
  1324  		}
  1325  		for bany(&bit) {
  1326  			i = bnum(bit)
  1327  			change = 0
  1328  			paint1(f, i)
  1329  			biclr(&bit, uint(i))
  1330  			if change <= 0 {
  1331  				continue
  1332  			}
  1333  			if nregion >= MaxRgn {
  1334  				nregion++
  1335  				continue
  1336  			}
  1337  
  1338  			region = append(region, Rgn{
  1339  				enter: f,
  1340  				cost:  int16(change),
  1341  				varno: int16(i),
  1342  			})
  1343  			nregion++
  1344  		}
  1345  	}
  1346  
  1347  	if false && Debug['v'] != 0 && strings.Contains(Curfn.Nname.Sym.Name, "Parse") {
  1348  		Warn("regions: %d\n", nregion)
  1349  	}
  1350  	if nregion >= MaxRgn {
  1351  		if Debug['v'] != 0 {
  1352  			Warn("too many regions: %d\n", nregion)
  1353  		}
  1354  		nregion = MaxRgn
  1355  	}
  1356  
  1357  	sort.Sort(rcmp(region[:nregion]))
  1358  
  1359  	if Debug['R'] != 0 && Debug['v'] != 0 {
  1360  		Dumpit("pass5", firstf, 1)
  1361  	}
  1362  
  1363  	/*
  1364  	 * pass 6
  1365  	 * determine used registers (paint2)
  1366  	 * replace code (paint3)
  1367  	 */
  1368  	if Debug['R'] != 0 && Debug['v'] != 0 {
  1369  		fmt.Printf("\nregisterizing\n")
  1370  	}
  1371  	var usedreg uint64
  1372  	var vreg uint64
  1373  	for i := 0; i < nregion; i++ {
  1374  		rgp = &region[i]
  1375  		if Debug['R'] != 0 && Debug['v'] != 0 {
  1376  			fmt.Printf("region %d: cost %d varno %d enter %d\n", i, rgp.cost, rgp.varno, rgp.enter.Prog.Pc)
  1377  		}
  1378  		bit = blsh(uint(rgp.varno))
  1379  		usedreg = paint2(rgp.enter, int(rgp.varno), 0)
  1380  		vreg = allreg(usedreg, rgp)
  1381  		if rgp.regno != 0 {
  1382  			if Debug['R'] != 0 && Debug['v'] != 0 {
  1383  				v := &vars[rgp.varno]
  1384  				fmt.Printf("registerize %v+%d (bit=%2d et=%v) in %v usedreg=%#x vreg=%#x\n", v.node, v.offset, rgp.varno, Econv(int(v.etype), 0), obj.Rconv(int(rgp.regno)), usedreg, vreg)
  1385  			}
  1386  
  1387  			paint3(rgp.enter, int(rgp.varno), vreg, int(rgp.regno))
  1388  		}
  1389  	}
  1390  
  1391  	/*
  1392  	 * free aux structures. peep allocates new ones.
  1393  	 */
  1394  	for i := 0; i < nvar; i++ {
  1395  		vars[i].node.Opt = nil
  1396  	}
  1397  	Flowend(g)
  1398  	firstf = nil
  1399  
  1400  	if Debug['R'] != 0 && Debug['v'] != 0 {
  1401  		// Rebuild flow graph, since we inserted instructions
  1402  		g := Flowstart(firstp, nil)
  1403  		firstf = g.Start
  1404  		Dumpit("pass6", firstf, 0)
  1405  		Flowend(g)
  1406  		firstf = nil
  1407  	}
  1408  
  1409  	/*
  1410  	 * pass 7
  1411  	 * peep-hole on basic block
  1412  	 */
  1413  	if Debug['R'] == 0 || Debug['P'] != 0 {
  1414  		Thearch.Peep(firstp)
  1415  	}
  1416  
  1417  	/*
  1418  	 * eliminate nops
  1419  	 */
  1420  	for p := firstp; p != nil; p = p.Link {
  1421  		for p.Link != nil && p.Link.As == obj.ANOP {
  1422  			p.Link = p.Link.Link
  1423  		}
  1424  		if p.To.Type == obj.TYPE_BRANCH {
  1425  			for p.To.Val.(*obj.Prog) != nil && p.To.Val.(*obj.Prog).As == obj.ANOP {
  1426  				p.To.Val = p.To.Val.(*obj.Prog).Link
  1427  			}
  1428  		}
  1429  	}
  1430  
  1431  	if Debug['R'] != 0 {
  1432  		if Ostats.Ncvtreg != 0 || Ostats.Nspill != 0 || Ostats.Nreload != 0 || Ostats.Ndelmov != 0 || Ostats.Nvar != 0 || Ostats.Naddr != 0 || false {
  1433  			fmt.Printf("\nstats\n")
  1434  		}
  1435  
  1436  		if Ostats.Ncvtreg != 0 {
  1437  			fmt.Printf("\t%4d cvtreg\n", Ostats.Ncvtreg)
  1438  		}
  1439  		if Ostats.Nspill != 0 {
  1440  			fmt.Printf("\t%4d spill\n", Ostats.Nspill)
  1441  		}
  1442  		if Ostats.Nreload != 0 {
  1443  			fmt.Printf("\t%4d reload\n", Ostats.Nreload)
  1444  		}
  1445  		if Ostats.Ndelmov != 0 {
  1446  			fmt.Printf("\t%4d delmov\n", Ostats.Ndelmov)
  1447  		}
  1448  		if Ostats.Nvar != 0 {
  1449  			fmt.Printf("\t%4d var\n", Ostats.Nvar)
  1450  		}
  1451  		if Ostats.Naddr != 0 {
  1452  			fmt.Printf("\t%4d addr\n", Ostats.Naddr)
  1453  		}
  1454  
  1455  		Ostats = OptStats{}
  1456  	}
  1457  }
  1458  
  1459  // bany reports whether any bits in a are set.
  1460  func bany(a *Bits) bool {
  1461  	for _, x := range &a.b { // & to avoid making a copy of a.b
  1462  		if x != 0 {
  1463  			return true
  1464  		}
  1465  	}
  1466  	return false
  1467  }
  1468  
  1469  // bnum reports the lowest index of a 1 bit in a.
  1470  func bnum(a Bits) int {
  1471  	for i, x := range &a.b { // & to avoid making a copy of a.b
  1472  		if x != 0 {
  1473  			return 64*i + Bitno(x)
  1474  		}
  1475  	}
  1476  
  1477  	Fatal("bad in bnum")
  1478  	return 0
  1479  }
  1480  
  1481  // blsh returns a Bits with 1 at index n, 0 elsewhere (1<<n).
  1482  func blsh(n uint) Bits {
  1483  	c := zbits
  1484  	c.b[n/64] = 1 << (n % 64)
  1485  	return c
  1486  }
  1487  
  1488  // btest reports whether bit n is 1.
  1489  func btest(a *Bits, n uint) bool {
  1490  	return a.b[n/64]&(1<<(n%64)) != 0
  1491  }
  1492  
  1493  // biset sets bit n to 1.
  1494  func biset(a *Bits, n uint) {
  1495  	a.b[n/64] |= 1 << (n % 64)
  1496  }
  1497  
  1498  // biclr sets bit n to 0.
  1499  func biclr(a *Bits, n uint) {
  1500  	a.b[n/64] &^= (1 << (n % 64))
  1501  }
  1502  
  1503  // Bitno reports the lowest index of a 1 bit in b.
  1504  // It calls Fatal if there is no 1 bit.
  1505  func Bitno(b uint64) int {
  1506  	if b == 0 {
  1507  		Fatal("bad in bitno")
  1508  	}
  1509  	n := 0
  1510  	if b&(1<<32-1) == 0 {
  1511  		n += 32
  1512  		b >>= 32
  1513  	}
  1514  	if b&(1<<16-1) == 0 {
  1515  		n += 16
  1516  		b >>= 16
  1517  	}
  1518  	if b&(1<<8-1) == 0 {
  1519  		n += 8
  1520  		b >>= 8
  1521  	}
  1522  	if b&(1<<4-1) == 0 {
  1523  		n += 4
  1524  		b >>= 4
  1525  	}
  1526  	if b&(1<<2-1) == 0 {
  1527  		n += 2
  1528  		b >>= 2
  1529  	}
  1530  	if b&1 == 0 {
  1531  		n++
  1532  	}
  1533  	return n
  1534  }
  1535  
  1536  // String returns a space-separated list of the variables represented by bits.
  1537  func (bits Bits) String() string {
  1538  	// Note: This method takes a value receiver, both for convenience
  1539  	// and to make it safe to modify the bits as we process them.
  1540  	// Even so, most prints above use &bits, because then the value
  1541  	// being stored in the interface{} is a pointer and does not require
  1542  	// an allocation and copy to create the interface{}.
  1543  	var buf bytes.Buffer
  1544  	sep := ""
  1545  	for bany(&bits) {
  1546  		i := bnum(bits)
  1547  		buf.WriteString(sep)
  1548  		sep = " "
  1549  		v := &vars[i]
  1550  		if v.node == nil || v.node.Sym == nil {
  1551  			fmt.Fprintf(&buf, "$%d", i)
  1552  		} else {
  1553  			fmt.Fprintf(&buf, "%s(%d)", v.node.Sym.Name, i)
  1554  			if v.offset != 0 {
  1555  				fmt.Fprintf(&buf, "%+d", int64(v.offset))
  1556  			}
  1557  		}
  1558  		biclr(&bits, uint(i))
  1559  	}
  1560  	return buf.String()
  1561  }