github.com/peggyl/go@v0.0.0-20151008231540-ae315999c2d5/src/cmd/compile/internal/gc/reg.go (about)

     1  // Derived from Inferno utils/6c/reg.c
     2  // http://code.google.com/p/inferno-os/source/browse/utils/6c/reg.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors.  All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  package gc
    32  
    33  import (
    34  	"bytes"
    35  	"cmd/internal/obj"
    36  	"fmt"
    37  	"sort"
    38  	"strings"
    39  )
    40  
    41  // A Var represents a single variable that may be stored in a register.
    42  // That variable may itself correspond to a hardware register,
    43  // to represent the use of registers in the unoptimized instruction stream.
    44  type Var struct {
    45  	offset     int64
    46  	node       *Node
    47  	nextinnode *Var
    48  	width      int
    49  	id         int // index in vars
    50  	name       int8
    51  	etype      int8
    52  	addr       int8
    53  }
    54  
    55  // Bits represents a set of Vars, stored as a bit set of var numbers
    56  // (the index in vars, or equivalently v.id).
    57  type Bits struct {
    58  	b [BITS]uint64
    59  }
    60  
    61  const (
    62  	BITS = 3
    63  	NVAR = BITS * 64
    64  )
    65  
    66  var (
    67  	vars [NVAR]Var // variables under consideration
    68  	nvar int       // number of vars
    69  
    70  	regbits uint64 // bits for hardware registers
    71  
    72  	zbits   Bits // zero
    73  	externs Bits // global variables
    74  	params  Bits // function parameters and results
    75  	ivar    Bits // function parameters (inputs)
    76  	ovar    Bits // function results (outputs)
    77  	consts  Bits // constant values
    78  	addrs   Bits // variables with address taken
    79  )
    80  
    81  // A Reg is a wrapper around a single Prog (one instruction) that holds
    82  // register optimization information while the optimizer runs.
    83  // r->prog is the instruction.
    84  type Reg struct {
    85  	set  Bits // regopt variables written by this instruction.
    86  	use1 Bits // regopt variables read by prog->from.
    87  	use2 Bits // regopt variables read by prog->to.
    88  
    89  	// refahead/refbehind are the regopt variables whose current
    90  	// value may be used in the following/preceding instructions
    91  	// up to a CALL (or the value is clobbered).
    92  	refbehind Bits
    93  	refahead  Bits
    94  
    95  	// calahead/calbehind are similar, but for variables in
    96  	// instructions that are reachable after hitting at least one
    97  	// CALL.
    98  	calbehind Bits
    99  	calahead  Bits
   100  
   101  	regdiff Bits
   102  	act     Bits
   103  	regu    uint64 // register used bitmap
   104  }
   105  
   106  // A Rgn represents a single regopt variable over a region of code
   107  // where a register could potentially be dedicated to that variable.
   108  // The code encompassed by a Rgn is defined by the flow graph,
   109  // starting at enter, flood-filling forward while varno is refahead
   110  // and backward while varno is refbehind, and following branches.
   111  // A single variable may be represented by multiple disjoint Rgns and
   112  // each Rgn may choose a different register for that variable.
   113  // Registers are allocated to regions greedily in order of descending
   114  // cost.
   115  type Rgn struct {
   116  	enter *Flow
   117  	cost  int16
   118  	varno int16
   119  	regno int16
   120  }
   121  
   122  // The Plan 9 C compilers used a limit of 600 regions,
   123  // but the yacc-generated parser in y.go has 3100 regions.
   124  // We set MaxRgn large enough to handle that.
   125  // There's not a huge cost to having too many regions:
   126  // the main processing traces the live area for each variable,
   127  // which is limited by the number of variables times the area,
   128  // not the raw region count. If there are many regions, they
   129  // are almost certainly small and easy to trace.
   130  // The only operation that scales with region count is the
   131  // sorting by cost, which uses sort.Sort and is therefore
   132  // guaranteed n log n.
   133  const MaxRgn = 6000
   134  
   135  var (
   136  	region  []Rgn
   137  	nregion int
   138  )
   139  
   140  type rcmp []Rgn
   141  
   142  func (x rcmp) Len() int {
   143  	return len(x)
   144  }
   145  
   146  func (x rcmp) Swap(i, j int) {
   147  	x[i], x[j] = x[j], x[i]
   148  }
   149  
   150  func (x rcmp) Less(i, j int) bool {
   151  	p1 := &x[i]
   152  	p2 := &x[j]
   153  	if p1.cost != p2.cost {
   154  		return int(p2.cost)-int(p1.cost) < 0
   155  	}
   156  	if p1.varno != p2.varno {
   157  		return int(p2.varno)-int(p1.varno) < 0
   158  	}
   159  	if p1.enter != p2.enter {
   160  		return int(p2.enter.Id-p1.enter.Id) < 0
   161  	}
   162  	return false
   163  }
   164  
   165  func setaddrs(bit Bits) {
   166  	var i int
   167  	var n int
   168  	var v *Var
   169  	var node *Node
   170  
   171  	for bany(&bit) {
   172  		// convert each bit to a variable
   173  		i = bnum(&bit)
   174  
   175  		node = vars[i].node
   176  		n = int(vars[i].name)
   177  		biclr(&bit, uint(i))
   178  
   179  		// disable all pieces of that variable
   180  		for i = 0; i < nvar; i++ {
   181  			v = &vars[i]
   182  			if v.node == node && int(v.name) == n {
   183  				v.addr = 2
   184  			}
   185  		}
   186  	}
   187  }
   188  
   189  var regnodes [64]*Node
   190  
   191  func walkvardef(n *Node, f *Flow, active int) {
   192  	var f1 *Flow
   193  	var bn int
   194  	var v *Var
   195  
   196  	for f1 = f; f1 != nil; f1 = f1.S1 {
   197  		if f1.Active == int32(active) {
   198  			break
   199  		}
   200  		f1.Active = int32(active)
   201  		if f1.Prog.As == obj.AVARKILL && f1.Prog.To.Node == n {
   202  			break
   203  		}
   204  		for v, _ = n.Opt().(*Var); v != nil; v = v.nextinnode {
   205  			bn = v.id
   206  			biset(&(f1.Data.(*Reg)).act, uint(bn))
   207  		}
   208  
   209  		if f1.Prog.As == obj.ACALL {
   210  			break
   211  		}
   212  	}
   213  
   214  	for f2 := f; f2 != f1; f2 = f2.S1 {
   215  		if f2.S2 != nil {
   216  			walkvardef(n, f2.S2, active)
   217  		}
   218  	}
   219  }
   220  
   221  /*
   222   * add mov b,rn
   223   * just after r
   224   */
   225  func addmove(r *Flow, bn int, rn int, f int) {
   226  	p1 := Ctxt.NewProg()
   227  	Clearp(p1)
   228  	p1.Pc = 9999
   229  
   230  	p := r.Prog
   231  	p1.Link = p.Link
   232  	p.Link = p1
   233  	p1.Lineno = p.Lineno
   234  
   235  	v := &vars[bn]
   236  
   237  	a := &p1.To
   238  	a.Offset = v.offset
   239  	a.Etype = uint8(v.etype)
   240  	a.Type = obj.TYPE_MEM
   241  	a.Name = v.name
   242  	a.Node = v.node
   243  	a.Sym = Linksym(v.node.Sym)
   244  
   245  	/* NOTE(rsc): 9g did
   246  	if(a->etype == TARRAY)
   247  		a->type = TYPE_ADDR;
   248  	else if(a->sym == nil)
   249  		a->type = TYPE_CONST;
   250  	*/
   251  	p1.As = int16(Thearch.Optoas(OAS, Types[uint8(v.etype)]))
   252  
   253  	// TODO(rsc): Remove special case here.
   254  	if (Thearch.Thechar == '5' || Thearch.Thechar == '7' || Thearch.Thechar == '9') && v.etype == TBOOL {
   255  		p1.As = int16(Thearch.Optoas(OAS, Types[TUINT8]))
   256  	}
   257  	p1.From.Type = obj.TYPE_REG
   258  	p1.From.Reg = int16(rn)
   259  	p1.From.Name = obj.NAME_NONE
   260  	if f == 0 {
   261  		p1.From = *a
   262  		*a = obj.Addr{}
   263  		a.Type = obj.TYPE_REG
   264  		a.Reg = int16(rn)
   265  	}
   266  
   267  	if Debug['R'] != 0 && Debug['v'] != 0 {
   268  		fmt.Printf("%v ===add=== %v\n", p, p1)
   269  	}
   270  	Ostats.Nspill++
   271  }
   272  
   273  func overlap_reg(o1 int64, w1 int, o2 int64, w2 int) bool {
   274  	t1 := o1 + int64(w1)
   275  	t2 := o2 + int64(w2)
   276  
   277  	if t1 <= o2 || t2 <= o1 {
   278  		return false
   279  	}
   280  
   281  	return true
   282  }
   283  
   284  func mkvar(f *Flow, a *obj.Addr) Bits {
   285  	/*
   286  	 * mark registers used
   287  	 */
   288  	if a.Type == obj.TYPE_NONE {
   289  		return zbits
   290  	}
   291  
   292  	r := f.Data.(*Reg)
   293  	r.use1.b[0] |= Thearch.Doregbits(int(a.Index)) // TODO: Use RtoB
   294  
   295  	var n int
   296  	switch a.Type {
   297  	default:
   298  		regu := Thearch.Doregbits(int(a.Reg)) | Thearch.RtoB(int(a.Reg)) // TODO: Use RtoB
   299  		if regu == 0 {
   300  			return zbits
   301  		}
   302  		bit := zbits
   303  		bit.b[0] = regu
   304  		return bit
   305  
   306  		// TODO(rsc): Remove special case here.
   307  	case obj.TYPE_ADDR:
   308  		var bit Bits
   309  		if Thearch.Thechar == '5' || Thearch.Thechar == '7' || Thearch.Thechar == '9' {
   310  			goto memcase
   311  		}
   312  		a.Type = obj.TYPE_MEM
   313  		bit = mkvar(f, a)
   314  		setaddrs(bit)
   315  		a.Type = obj.TYPE_ADDR
   316  		Ostats.Naddr++
   317  		return zbits
   318  
   319  	memcase:
   320  		fallthrough
   321  
   322  	case obj.TYPE_MEM:
   323  		if r != nil {
   324  			r.use1.b[0] |= Thearch.RtoB(int(a.Reg))
   325  		}
   326  
   327  		/* NOTE: 5g did
   328  		if(r->f.prog->scond & (C_PBIT|C_WBIT))
   329  			r->set.b[0] |= RtoB(a->reg);
   330  		*/
   331  		switch a.Name {
   332  		default:
   333  			// Note: This case handles NAME_EXTERN and NAME_STATIC.
   334  			// We treat these as requiring eager writes to memory, due to
   335  			// the possibility of a fault handler looking at them, so there is
   336  			// not much point in registerizing the loads.
   337  			// If we later choose the set of candidate variables from a
   338  			// larger list, these cases could be deprioritized instead of
   339  			// removed entirely.
   340  			return zbits
   341  
   342  		case obj.NAME_PARAM,
   343  			obj.NAME_AUTO:
   344  			n = int(a.Name)
   345  		}
   346  	}
   347  
   348  	node, _ := a.Node.(*Node)
   349  	if node == nil || node.Op != ONAME || node.Orig == nil {
   350  		return zbits
   351  	}
   352  	node = node.Orig
   353  	if node.Orig != node {
   354  		Fatalf("%v: bad node", Ctxt.Dconv(a))
   355  	}
   356  	if node.Sym == nil || node.Sym.Name[0] == '.' {
   357  		return zbits
   358  	}
   359  	et := int(a.Etype)
   360  	o := a.Offset
   361  	w := a.Width
   362  	if w < 0 {
   363  		Fatalf("bad width %d for %v", w, Ctxt.Dconv(a))
   364  	}
   365  
   366  	flag := 0
   367  	var v *Var
   368  	for i := 0; i < nvar; i++ {
   369  		v = &vars[i]
   370  		if v.node == node && int(v.name) == n {
   371  			if v.offset == o {
   372  				if int(v.etype) == et {
   373  					if int64(v.width) == w {
   374  						// TODO(rsc): Remove special case for arm here.
   375  						if flag == 0 || Thearch.Thechar != '5' {
   376  							return blsh(uint(i))
   377  						}
   378  					}
   379  				}
   380  			}
   381  
   382  			// if they overlap, disable both
   383  			if overlap_reg(v.offset, v.width, o, int(w)) {
   384  				//				print("disable overlap %s %d %d %d %d, %E != %E\n", s->name, v->offset, v->width, o, w, v->etype, et);
   385  				v.addr = 1
   386  
   387  				flag = 1
   388  			}
   389  		}
   390  	}
   391  
   392  	switch et {
   393  	case 0, TFUNC:
   394  		return zbits
   395  	}
   396  
   397  	if nvar >= NVAR {
   398  		if Debug['w'] > 1 && node != nil {
   399  			Fatalf("variable not optimized: %v", Nconv(node, obj.FmtSharp))
   400  		}
   401  		if Debug['v'] > 0 {
   402  			Warn("variable not optimized: %v", Nconv(node, obj.FmtSharp))
   403  		}
   404  
   405  		// If we're not tracking a word in a variable, mark the rest as
   406  		// having its address taken, so that we keep the whole thing
   407  		// live at all calls. otherwise we might optimize away part of
   408  		// a variable but not all of it.
   409  		var v *Var
   410  		for i := 0; i < nvar; i++ {
   411  			v = &vars[i]
   412  			if v.node == node {
   413  				v.addr = 1
   414  			}
   415  		}
   416  
   417  		return zbits
   418  	}
   419  
   420  	i := nvar
   421  	nvar++
   422  	v = &vars[i]
   423  	v.id = i
   424  	v.offset = o
   425  	v.name = int8(n)
   426  	v.etype = int8(et)
   427  	v.width = int(w)
   428  	v.addr = int8(flag) // funny punning
   429  	v.node = node
   430  
   431  	// node->opt is the head of a linked list
   432  	// of Vars within the given Node, so that
   433  	// we can start at a Var and find all the other
   434  	// Vars in the same Go variable.
   435  	v.nextinnode, _ = node.Opt().(*Var)
   436  
   437  	node.SetOpt(v)
   438  
   439  	bit := blsh(uint(i))
   440  	if n == obj.NAME_EXTERN || n == obj.NAME_STATIC {
   441  		for z := 0; z < BITS; z++ {
   442  			externs.b[z] |= bit.b[z]
   443  		}
   444  	}
   445  	if n == obj.NAME_PARAM {
   446  		for z := 0; z < BITS; z++ {
   447  			params.b[z] |= bit.b[z]
   448  		}
   449  	}
   450  
   451  	if node.Class == PPARAM {
   452  		for z := 0; z < BITS; z++ {
   453  			ivar.b[z] |= bit.b[z]
   454  		}
   455  	}
   456  	if node.Class == PPARAMOUT {
   457  		for z := 0; z < BITS; z++ {
   458  			ovar.b[z] |= bit.b[z]
   459  		}
   460  	}
   461  
   462  	// Treat values with their address taken as live at calls,
   463  	// because the garbage collector's liveness analysis in ../gc/plive.c does.
   464  	// These must be consistent or else we will elide stores and the garbage
   465  	// collector will see uninitialized data.
   466  	// The typical case where our own analysis is out of sync is when the
   467  	// node appears to have its address taken but that code doesn't actually
   468  	// get generated and therefore doesn't show up as an address being
   469  	// taken when we analyze the instruction stream.
   470  	// One instance of this case is when a closure uses the same name as
   471  	// an outer variable for one of its own variables declared with :=.
   472  	// The parser flags the outer variable as possibly shared, and therefore
   473  	// sets addrtaken, even though it ends up not being actually shared.
   474  	// If we were better about _ elision, _ = &x would suffice too.
   475  	// The broader := in a closure problem is mentioned in a comment in
   476  	// closure.c:/^typecheckclosure and dcl.c:/^oldname.
   477  	if node.Addrtaken {
   478  		v.addr = 1
   479  	}
   480  
   481  	// Disable registerization for globals, because:
   482  	// (1) we might panic at any time and we want the recovery code
   483  	// to see the latest values (issue 1304).
   484  	// (2) we don't know what pointers might point at them and we want
   485  	// loads via those pointers to see updated values and vice versa (issue 7995).
   486  	//
   487  	// Disable registerization for results if using defer, because the deferred func
   488  	// might recover and return, causing the current values to be used.
   489  	if node.Class == PEXTERN || (hasdefer && node.Class == PPARAMOUT) {
   490  		v.addr = 1
   491  	}
   492  
   493  	if Debug['R'] != 0 {
   494  		fmt.Printf("bit=%2d et=%v w=%d+%d %v %v flag=%d\n", i, Econv(int(et), 0), o, w, Nconv(node, obj.FmtSharp), Ctxt.Dconv(a), v.addr)
   495  	}
   496  	Ostats.Nvar++
   497  
   498  	return bit
   499  }
   500  
   501  var change int
   502  
   503  func prop(f *Flow, ref Bits, cal Bits) {
   504  	var f1 *Flow
   505  	var r1 *Reg
   506  	var z int
   507  	var i int
   508  	var v *Var
   509  	var v1 *Var
   510  
   511  	for f1 = f; f1 != nil; f1 = f1.P1 {
   512  		r1 = f1.Data.(*Reg)
   513  		for z = 0; z < BITS; z++ {
   514  			ref.b[z] |= r1.refahead.b[z]
   515  			if ref.b[z] != r1.refahead.b[z] {
   516  				r1.refahead.b[z] = ref.b[z]
   517  				change = 1
   518  			}
   519  
   520  			cal.b[z] |= r1.calahead.b[z]
   521  			if cal.b[z] != r1.calahead.b[z] {
   522  				r1.calahead.b[z] = cal.b[z]
   523  				change = 1
   524  			}
   525  		}
   526  
   527  		switch f1.Prog.As {
   528  		case obj.ACALL:
   529  			if Noreturn(f1.Prog) {
   530  				break
   531  			}
   532  
   533  			// Mark all input variables (ivar) as used, because that's what the
   534  			// liveness bitmaps say. The liveness bitmaps say that so that a
   535  			// panic will not show stale values in the parameter dump.
   536  			// Mark variables with a recent VARDEF (r1->act) as used,
   537  			// so that the optimizer flushes initializations to memory,
   538  			// so that if a garbage collection happens during this CALL,
   539  			// the collector will see initialized memory. Again this is to
   540  			// match what the liveness bitmaps say.
   541  			for z = 0; z < BITS; z++ {
   542  				cal.b[z] |= ref.b[z] | externs.b[z] | ivar.b[z] | r1.act.b[z]
   543  				ref.b[z] = 0
   544  			}
   545  
   546  			// cal.b is the current approximation of what's live across the call.
   547  			// Every bit in cal.b is a single stack word. For each such word,
   548  			// find all the other tracked stack words in the same Go variable
   549  			// (struct/slice/string/interface) and mark them live too.
   550  			// This is necessary because the liveness analysis for the garbage
   551  			// collector works at variable granularity, not at word granularity.
   552  			// It is fundamental for slice/string/interface: the garbage collector
   553  			// needs the whole value, not just some of the words, in order to
   554  			// interpret the other bits correctly. Specifically, slice needs a consistent
   555  			// ptr and cap, string needs a consistent ptr and len, and interface
   556  			// needs a consistent type word and data word.
   557  			for z = 0; z < BITS; z++ {
   558  				if cal.b[z] == 0 {
   559  					continue
   560  				}
   561  				for i = 0; i < 64; i++ {
   562  					if z*64+i >= nvar || (cal.b[z]>>uint(i))&1 == 0 {
   563  						continue
   564  					}
   565  					v = &vars[z*64+i]
   566  					if v.node.Opt() == nil { // v represents fixed register, not Go variable
   567  						continue
   568  					}
   569  
   570  					// v->node->opt is the head of a linked list of Vars
   571  					// corresponding to tracked words from the Go variable v->node.
   572  					// Walk the list and set all the bits.
   573  					// For a large struct this could end up being quadratic:
   574  					// after the first setting, the outer loop (for z, i) would see a 1 bit
   575  					// for all of the remaining words in the struct, and for each such
   576  					// word would go through and turn on all the bits again.
   577  					// To avoid the quadratic behavior, we only turn on the bits if
   578  					// v is the head of the list or if the head's bit is not yet turned on.
   579  					// This will set the bits at most twice, keeping the overall loop linear.
   580  					v1, _ = v.node.Opt().(*Var)
   581  
   582  					if v == v1 || !btest(&cal, uint(v1.id)) {
   583  						for ; v1 != nil; v1 = v1.nextinnode {
   584  							biset(&cal, uint(v1.id))
   585  						}
   586  					}
   587  				}
   588  			}
   589  
   590  		case obj.ATEXT:
   591  			for z = 0; z < BITS; z++ {
   592  				cal.b[z] = 0
   593  				ref.b[z] = 0
   594  			}
   595  
   596  		case obj.ARET:
   597  			for z = 0; z < BITS; z++ {
   598  				cal.b[z] = externs.b[z] | ovar.b[z]
   599  				ref.b[z] = 0
   600  			}
   601  		}
   602  
   603  		for z = 0; z < BITS; z++ {
   604  			ref.b[z] = ref.b[z]&^r1.set.b[z] | r1.use1.b[z] | r1.use2.b[z]
   605  			cal.b[z] &^= (r1.set.b[z] | r1.use1.b[z] | r1.use2.b[z])
   606  			r1.refbehind.b[z] = ref.b[z]
   607  			r1.calbehind.b[z] = cal.b[z]
   608  		}
   609  
   610  		if f1.Active != 0 {
   611  			break
   612  		}
   613  		f1.Active = 1
   614  	}
   615  
   616  	var r *Reg
   617  	var f2 *Flow
   618  	for ; f != f1; f = f.P1 {
   619  		r = f.Data.(*Reg)
   620  		for f2 = f.P2; f2 != nil; f2 = f2.P2link {
   621  			prop(f2, r.refbehind, r.calbehind)
   622  		}
   623  	}
   624  }
   625  
   626  func synch(f *Flow, dif Bits) {
   627  	var r1 *Reg
   628  	var z int
   629  
   630  	for f1 := f; f1 != nil; f1 = f1.S1 {
   631  		r1 = f1.Data.(*Reg)
   632  		for z = 0; z < BITS; z++ {
   633  			dif.b[z] = dif.b[z]&^(^r1.refbehind.b[z]&r1.refahead.b[z]) | r1.set.b[z] | r1.regdiff.b[z]
   634  			if dif.b[z] != r1.regdiff.b[z] {
   635  				r1.regdiff.b[z] = dif.b[z]
   636  				change = 1
   637  			}
   638  		}
   639  
   640  		if f1.Active != 0 {
   641  			break
   642  		}
   643  		f1.Active = 1
   644  		for z = 0; z < BITS; z++ {
   645  			dif.b[z] &^= (^r1.calbehind.b[z] & r1.calahead.b[z])
   646  		}
   647  		if f1.S2 != nil {
   648  			synch(f1.S2, dif)
   649  		}
   650  	}
   651  }
   652  
   653  func allreg(b uint64, r *Rgn) uint64 {
   654  	v := &vars[r.varno]
   655  	r.regno = 0
   656  	switch v.etype {
   657  	default:
   658  		Fatalf("unknown etype %d/%v", Bitno(b), Econv(int(v.etype), 0))
   659  
   660  	case TINT8,
   661  		TUINT8,
   662  		TINT16,
   663  		TUINT16,
   664  		TINT32,
   665  		TUINT32,
   666  		TINT64,
   667  		TUINT64,
   668  		TINT,
   669  		TUINT,
   670  		TUINTPTR,
   671  		TBOOL,
   672  		TPTR32,
   673  		TPTR64:
   674  		i := Thearch.BtoR(^b)
   675  		if i != 0 && r.cost > 0 {
   676  			r.regno = int16(i)
   677  			return Thearch.RtoB(i)
   678  		}
   679  
   680  	case TFLOAT32, TFLOAT64:
   681  		i := Thearch.BtoF(^b)
   682  		if i != 0 && r.cost > 0 {
   683  			r.regno = int16(i)
   684  			return Thearch.FtoB(i)
   685  		}
   686  	}
   687  
   688  	return 0
   689  }
   690  
   691  func LOAD(r *Reg, z int) uint64 {
   692  	return ^r.refbehind.b[z] & r.refahead.b[z]
   693  }
   694  
   695  func STORE(r *Reg, z int) uint64 {
   696  	return ^r.calbehind.b[z] & r.calahead.b[z]
   697  }
   698  
   699  // Cost parameters
   700  const (
   701  	CLOAD = 5 // cost of load
   702  	CREF  = 5 // cost of reference if not registerized
   703  	LOOP  = 3 // loop execution count (applied in popt.go)
   704  )
   705  
   706  func paint1(f *Flow, bn int) {
   707  	z := bn / 64
   708  	bb := uint64(1 << uint(bn%64))
   709  	r := f.Data.(*Reg)
   710  	if r.act.b[z]&bb != 0 {
   711  		return
   712  	}
   713  	var f1 *Flow
   714  	var r1 *Reg
   715  	for {
   716  		if r.refbehind.b[z]&bb == 0 {
   717  			break
   718  		}
   719  		f1 = f.P1
   720  		if f1 == nil {
   721  			break
   722  		}
   723  		r1 = f1.Data.(*Reg)
   724  		if r1.refahead.b[z]&bb == 0 {
   725  			break
   726  		}
   727  		if r1.act.b[z]&bb != 0 {
   728  			break
   729  		}
   730  		f = f1
   731  		r = r1
   732  	}
   733  
   734  	if LOAD(r, z)&^(r.set.b[z]&^(r.use1.b[z]|r.use2.b[z]))&bb != 0 {
   735  		change -= CLOAD * int(f.Loop)
   736  	}
   737  
   738  	for {
   739  		r.act.b[z] |= bb
   740  
   741  		if f.Prog.As != obj.ANOP { // don't give credit for NOPs
   742  			if r.use1.b[z]&bb != 0 {
   743  				change += CREF * int(f.Loop)
   744  			}
   745  			if (r.use2.b[z]|r.set.b[z])&bb != 0 {
   746  				change += CREF * int(f.Loop)
   747  			}
   748  		}
   749  
   750  		if STORE(r, z)&r.regdiff.b[z]&bb != 0 {
   751  			change -= CLOAD * int(f.Loop)
   752  		}
   753  
   754  		if r.refbehind.b[z]&bb != 0 {
   755  			for f1 = f.P2; f1 != nil; f1 = f1.P2link {
   756  				if (f1.Data.(*Reg)).refahead.b[z]&bb != 0 {
   757  					paint1(f1, bn)
   758  				}
   759  			}
   760  		}
   761  
   762  		if r.refahead.b[z]&bb == 0 {
   763  			break
   764  		}
   765  		f1 = f.S2
   766  		if f1 != nil {
   767  			if (f1.Data.(*Reg)).refbehind.b[z]&bb != 0 {
   768  				paint1(f1, bn)
   769  			}
   770  		}
   771  		f = f.S1
   772  		if f == nil {
   773  			break
   774  		}
   775  		r = f.Data.(*Reg)
   776  		if r.act.b[z]&bb != 0 {
   777  			break
   778  		}
   779  		if r.refbehind.b[z]&bb == 0 {
   780  			break
   781  		}
   782  	}
   783  }
   784  
   785  func paint2(f *Flow, bn int, depth int) uint64 {
   786  	z := bn / 64
   787  	bb := uint64(1 << uint(bn%64))
   788  	vreg := regbits
   789  	r := f.Data.(*Reg)
   790  	if r.act.b[z]&bb == 0 {
   791  		return vreg
   792  	}
   793  	var r1 *Reg
   794  	var f1 *Flow
   795  	for {
   796  		if r.refbehind.b[z]&bb == 0 {
   797  			break
   798  		}
   799  		f1 = f.P1
   800  		if f1 == nil {
   801  			break
   802  		}
   803  		r1 = f1.Data.(*Reg)
   804  		if r1.refahead.b[z]&bb == 0 {
   805  			break
   806  		}
   807  		if r1.act.b[z]&bb == 0 {
   808  			break
   809  		}
   810  		f = f1
   811  		r = r1
   812  	}
   813  
   814  	for {
   815  		if Debug['R'] != 0 && Debug['v'] != 0 {
   816  			fmt.Printf("  paint2 %d %v\n", depth, f.Prog)
   817  		}
   818  
   819  		r.act.b[z] &^= bb
   820  
   821  		vreg |= r.regu
   822  
   823  		if r.refbehind.b[z]&bb != 0 {
   824  			for f1 = f.P2; f1 != nil; f1 = f1.P2link {
   825  				if (f1.Data.(*Reg)).refahead.b[z]&bb != 0 {
   826  					vreg |= paint2(f1, bn, depth+1)
   827  				}
   828  			}
   829  		}
   830  
   831  		if r.refahead.b[z]&bb == 0 {
   832  			break
   833  		}
   834  		f1 = f.S2
   835  		if f1 != nil {
   836  			if (f1.Data.(*Reg)).refbehind.b[z]&bb != 0 {
   837  				vreg |= paint2(f1, bn, depth+1)
   838  			}
   839  		}
   840  		f = f.S1
   841  		if f == nil {
   842  			break
   843  		}
   844  		r = f.Data.(*Reg)
   845  		if r.act.b[z]&bb == 0 {
   846  			break
   847  		}
   848  		if r.refbehind.b[z]&bb == 0 {
   849  			break
   850  		}
   851  	}
   852  
   853  	return vreg
   854  }
   855  
   856  func paint3(f *Flow, bn int, rb uint64, rn int) {
   857  	z := bn / 64
   858  	bb := uint64(1 << uint(bn%64))
   859  	r := f.Data.(*Reg)
   860  	if r.act.b[z]&bb != 0 {
   861  		return
   862  	}
   863  	var r1 *Reg
   864  	var f1 *Flow
   865  	for {
   866  		if r.refbehind.b[z]&bb == 0 {
   867  			break
   868  		}
   869  		f1 = f.P1
   870  		if f1 == nil {
   871  			break
   872  		}
   873  		r1 = f1.Data.(*Reg)
   874  		if r1.refahead.b[z]&bb == 0 {
   875  			break
   876  		}
   877  		if r1.act.b[z]&bb != 0 {
   878  			break
   879  		}
   880  		f = f1
   881  		r = r1
   882  	}
   883  
   884  	if LOAD(r, z)&^(r.set.b[z]&^(r.use1.b[z]|r.use2.b[z]))&bb != 0 {
   885  		addmove(f, bn, rn, 0)
   886  	}
   887  	var p *obj.Prog
   888  	for {
   889  		r.act.b[z] |= bb
   890  		p = f.Prog
   891  
   892  		if r.use1.b[z]&bb != 0 {
   893  			if Debug['R'] != 0 && Debug['v'] != 0 {
   894  				fmt.Printf("%v", p)
   895  			}
   896  			addreg(&p.From, rn)
   897  			if Debug['R'] != 0 && Debug['v'] != 0 {
   898  				fmt.Printf(" ===change== %v\n", p)
   899  			}
   900  		}
   901  
   902  		if (r.use2.b[z]|r.set.b[z])&bb != 0 {
   903  			if Debug['R'] != 0 && Debug['v'] != 0 {
   904  				fmt.Printf("%v", p)
   905  			}
   906  			addreg(&p.To, rn)
   907  			if Debug['R'] != 0 && Debug['v'] != 0 {
   908  				fmt.Printf(" ===change== %v\n", p)
   909  			}
   910  		}
   911  
   912  		if STORE(r, z)&r.regdiff.b[z]&bb != 0 {
   913  			addmove(f, bn, rn, 1)
   914  		}
   915  		r.regu |= rb
   916  
   917  		if r.refbehind.b[z]&bb != 0 {
   918  			for f1 = f.P2; f1 != nil; f1 = f1.P2link {
   919  				if (f1.Data.(*Reg)).refahead.b[z]&bb != 0 {
   920  					paint3(f1, bn, rb, rn)
   921  				}
   922  			}
   923  		}
   924  
   925  		if r.refahead.b[z]&bb == 0 {
   926  			break
   927  		}
   928  		f1 = f.S2
   929  		if f1 != nil {
   930  			if (f1.Data.(*Reg)).refbehind.b[z]&bb != 0 {
   931  				paint3(f1, bn, rb, rn)
   932  			}
   933  		}
   934  		f = f.S1
   935  		if f == nil {
   936  			break
   937  		}
   938  		r = f.Data.(*Reg)
   939  		if r.act.b[z]&bb != 0 {
   940  			break
   941  		}
   942  		if r.refbehind.b[z]&bb == 0 {
   943  			break
   944  		}
   945  	}
   946  }
   947  
   948  func addreg(a *obj.Addr, rn int) {
   949  	a.Sym = nil
   950  	a.Node = nil
   951  	a.Offset = 0
   952  	a.Type = obj.TYPE_REG
   953  	a.Reg = int16(rn)
   954  	a.Name = 0
   955  
   956  	Ostats.Ncvtreg++
   957  }
   958  
   959  func dumpone(f *Flow, isreg int) {
   960  	fmt.Printf("%d:%v", f.Loop, f.Prog)
   961  	if isreg != 0 {
   962  		r := f.Data.(*Reg)
   963  		var bit Bits
   964  		for z := 0; z < BITS; z++ {
   965  			bit.b[z] = r.set.b[z] | r.use1.b[z] | r.use2.b[z] | r.refbehind.b[z] | r.refahead.b[z] | r.calbehind.b[z] | r.calahead.b[z] | r.regdiff.b[z] | r.act.b[z] | 0
   966  		}
   967  		if bany(&bit) {
   968  			fmt.Printf("\t")
   969  			if bany(&r.set) {
   970  				fmt.Printf(" s:%v", &r.set)
   971  			}
   972  			if bany(&r.use1) {
   973  				fmt.Printf(" u1:%v", &r.use1)
   974  			}
   975  			if bany(&r.use2) {
   976  				fmt.Printf(" u2:%v", &r.use2)
   977  			}
   978  			if bany(&r.refbehind) {
   979  				fmt.Printf(" rb:%v ", &r.refbehind)
   980  			}
   981  			if bany(&r.refahead) {
   982  				fmt.Printf(" ra:%v ", &r.refahead)
   983  			}
   984  			if bany(&r.calbehind) {
   985  				fmt.Printf(" cb:%v ", &r.calbehind)
   986  			}
   987  			if bany(&r.calahead) {
   988  				fmt.Printf(" ca:%v ", &r.calahead)
   989  			}
   990  			if bany(&r.regdiff) {
   991  				fmt.Printf(" d:%v ", &r.regdiff)
   992  			}
   993  			if bany(&r.act) {
   994  				fmt.Printf(" a:%v ", &r.act)
   995  			}
   996  		}
   997  	}
   998  
   999  	fmt.Printf("\n")
  1000  }
  1001  
  1002  func Dumpit(str string, r0 *Flow, isreg int) {
  1003  	var r1 *Flow
  1004  
  1005  	fmt.Printf("\n%s\n", str)
  1006  	for r := r0; r != nil; r = r.Link {
  1007  		dumpone(r, isreg)
  1008  		r1 = r.P2
  1009  		if r1 != nil {
  1010  			fmt.Printf("\tpred:")
  1011  			for ; r1 != nil; r1 = r1.P2link {
  1012  				fmt.Printf(" %.4d", uint(int(r1.Prog.Pc)))
  1013  			}
  1014  			if r.P1 != nil {
  1015  				fmt.Printf(" (and %.4d)", uint(int(r.P1.Prog.Pc)))
  1016  			} else {
  1017  				fmt.Printf(" (only)")
  1018  			}
  1019  			fmt.Printf("\n")
  1020  		}
  1021  
  1022  		// Print successors if it's not just the next one
  1023  		if r.S1 != r.Link || r.S2 != nil {
  1024  			fmt.Printf("\tsucc:")
  1025  			if r.S1 != nil {
  1026  				fmt.Printf(" %.4d", uint(int(r.S1.Prog.Pc)))
  1027  			}
  1028  			if r.S2 != nil {
  1029  				fmt.Printf(" %.4d", uint(int(r.S2.Prog.Pc)))
  1030  			}
  1031  			fmt.Printf("\n")
  1032  		}
  1033  	}
  1034  }
  1035  
  1036  func regopt(firstp *obj.Prog) {
  1037  	mergetemp(firstp)
  1038  
  1039  	/*
  1040  	 * control flow is more complicated in generated go code
  1041  	 * than in generated c code.  define pseudo-variables for
  1042  	 * registers, so we have complete register usage information.
  1043  	 */
  1044  	var nreg int
  1045  	regnames := Thearch.Regnames(&nreg)
  1046  
  1047  	nvar = nreg
  1048  	for i := 0; i < nreg; i++ {
  1049  		vars[i] = Var{}
  1050  	}
  1051  	for i := 0; i < nreg; i++ {
  1052  		if regnodes[i] == nil {
  1053  			regnodes[i] = newname(Lookup(regnames[i]))
  1054  		}
  1055  		vars[i].node = regnodes[i]
  1056  	}
  1057  
  1058  	regbits = Thearch.Excludedregs()
  1059  	externs = zbits
  1060  	params = zbits
  1061  	consts = zbits
  1062  	addrs = zbits
  1063  	ivar = zbits
  1064  	ovar = zbits
  1065  
  1066  	/*
  1067  	 * pass 1
  1068  	 * build aux data structure
  1069  	 * allocate pcs
  1070  	 * find use and set of variables
  1071  	 */
  1072  	g := Flowstart(firstp, func() interface{} { return new(Reg) })
  1073  	if g == nil {
  1074  		for i := 0; i < nvar; i++ {
  1075  			vars[i].node.SetOpt(nil)
  1076  		}
  1077  		return
  1078  	}
  1079  
  1080  	firstf := g.Start
  1081  
  1082  	for f := firstf; f != nil; f = f.Link {
  1083  		p := f.Prog
  1084  		if p.As == obj.AVARDEF || p.As == obj.AVARKILL {
  1085  			continue
  1086  		}
  1087  
  1088  		// Avoid making variables for direct-called functions.
  1089  		if p.As == obj.ACALL && p.To.Type == obj.TYPE_MEM && p.To.Name == obj.NAME_EXTERN {
  1090  			continue
  1091  		}
  1092  
  1093  		// from vs to doesn't matter for registers.
  1094  		r := f.Data.(*Reg)
  1095  		r.use1.b[0] |= p.Info.Reguse | p.Info.Regindex
  1096  		r.set.b[0] |= p.Info.Regset
  1097  
  1098  		bit := mkvar(f, &p.From)
  1099  		if bany(&bit) {
  1100  			if p.Info.Flags&LeftAddr != 0 {
  1101  				setaddrs(bit)
  1102  			}
  1103  			if p.Info.Flags&LeftRead != 0 {
  1104  				for z := 0; z < BITS; z++ {
  1105  					r.use1.b[z] |= bit.b[z]
  1106  				}
  1107  			}
  1108  			if p.Info.Flags&LeftWrite != 0 {
  1109  				for z := 0; z < BITS; z++ {
  1110  					r.set.b[z] |= bit.b[z]
  1111  				}
  1112  			}
  1113  		}
  1114  
  1115  		// Compute used register for reg
  1116  		if p.Info.Flags&RegRead != 0 {
  1117  			r.use1.b[0] |= Thearch.RtoB(int(p.Reg))
  1118  		}
  1119  
  1120  		// Currently we never generate three register forms.
  1121  		// If we do, this will need to change.
  1122  		if p.From3Type() != obj.TYPE_NONE {
  1123  			Fatalf("regopt not implemented for from3")
  1124  		}
  1125  
  1126  		bit = mkvar(f, &p.To)
  1127  		if bany(&bit) {
  1128  			if p.Info.Flags&RightAddr != 0 {
  1129  				setaddrs(bit)
  1130  			}
  1131  			if p.Info.Flags&RightRead != 0 {
  1132  				for z := 0; z < BITS; z++ {
  1133  					r.use2.b[z] |= bit.b[z]
  1134  				}
  1135  			}
  1136  			if p.Info.Flags&RightWrite != 0 {
  1137  				for z := 0; z < BITS; z++ {
  1138  					r.set.b[z] |= bit.b[z]
  1139  				}
  1140  			}
  1141  		}
  1142  	}
  1143  
  1144  	for i := 0; i < nvar; i++ {
  1145  		v := &vars[i]
  1146  		if v.addr != 0 {
  1147  			bit := blsh(uint(i))
  1148  			for z := 0; z < BITS; z++ {
  1149  				addrs.b[z] |= bit.b[z]
  1150  			}
  1151  		}
  1152  
  1153  		if Debug['R'] != 0 && Debug['v'] != 0 {
  1154  			fmt.Printf("bit=%2d addr=%d et=%v w=%-2d s=%v + %d\n", i, v.addr, Econv(int(v.etype), 0), v.width, v.node, v.offset)
  1155  		}
  1156  	}
  1157  
  1158  	if Debug['R'] != 0 && Debug['v'] != 0 {
  1159  		Dumpit("pass1", firstf, 1)
  1160  	}
  1161  
  1162  	/*
  1163  	 * pass 2
  1164  	 * find looping structure
  1165  	 */
  1166  	flowrpo(g)
  1167  
  1168  	if Debug['R'] != 0 && Debug['v'] != 0 {
  1169  		Dumpit("pass2", firstf, 1)
  1170  	}
  1171  
  1172  	/*
  1173  	 * pass 2.5
  1174  	 * iterate propagating fat vardef covering forward
  1175  	 * r->act records vars with a VARDEF since the last CALL.
  1176  	 * (r->act will be reused in pass 5 for something else,
  1177  	 * but we'll be done with it by then.)
  1178  	 */
  1179  	active := 0
  1180  
  1181  	for f := firstf; f != nil; f = f.Link {
  1182  		f.Active = 0
  1183  		r := f.Data.(*Reg)
  1184  		r.act = zbits
  1185  	}
  1186  
  1187  	for f := firstf; f != nil; f = f.Link {
  1188  		p := f.Prog
  1189  		if p.As == obj.AVARDEF && Isfat(((p.To.Node).(*Node)).Type) && ((p.To.Node).(*Node)).Opt() != nil {
  1190  			active++
  1191  			walkvardef(p.To.Node.(*Node), f, active)
  1192  		}
  1193  	}
  1194  
  1195  	/*
  1196  	 * pass 3
  1197  	 * iterate propagating usage
  1198  	 * 	back until flow graph is complete
  1199  	 */
  1200  	var f1 *Flow
  1201  	var i int
  1202  	var f *Flow
  1203  loop1:
  1204  	change = 0
  1205  
  1206  	for f = firstf; f != nil; f = f.Link {
  1207  		f.Active = 0
  1208  	}
  1209  	for f = firstf; f != nil; f = f.Link {
  1210  		if f.Prog.As == obj.ARET {
  1211  			prop(f, zbits, zbits)
  1212  		}
  1213  	}
  1214  
  1215  	/* pick up unreachable code */
  1216  loop11:
  1217  	i = 0
  1218  
  1219  	for f = firstf; f != nil; f = f1 {
  1220  		f1 = f.Link
  1221  		if f1 != nil && f1.Active != 0 && f.Active == 0 {
  1222  			prop(f, zbits, zbits)
  1223  			i = 1
  1224  		}
  1225  	}
  1226  
  1227  	if i != 0 {
  1228  		goto loop11
  1229  	}
  1230  	if change != 0 {
  1231  		goto loop1
  1232  	}
  1233  
  1234  	if Debug['R'] != 0 && Debug['v'] != 0 {
  1235  		Dumpit("pass3", firstf, 1)
  1236  	}
  1237  
  1238  	/*
  1239  	 * pass 4
  1240  	 * iterate propagating register/variable synchrony
  1241  	 * 	forward until graph is complete
  1242  	 */
  1243  loop2:
  1244  	change = 0
  1245  
  1246  	for f = firstf; f != nil; f = f.Link {
  1247  		f.Active = 0
  1248  	}
  1249  	synch(firstf, zbits)
  1250  	if change != 0 {
  1251  		goto loop2
  1252  	}
  1253  
  1254  	if Debug['R'] != 0 && Debug['v'] != 0 {
  1255  		Dumpit("pass4", firstf, 1)
  1256  	}
  1257  
  1258  	/*
  1259  	 * pass 4.5
  1260  	 * move register pseudo-variables into regu.
  1261  	 */
  1262  	mask := uint64((1 << uint(nreg)) - 1)
  1263  	for f := firstf; f != nil; f = f.Link {
  1264  		r := f.Data.(*Reg)
  1265  		r.regu = (r.refbehind.b[0] | r.set.b[0]) & mask
  1266  		r.set.b[0] &^= mask
  1267  		r.use1.b[0] &^= mask
  1268  		r.use2.b[0] &^= mask
  1269  		r.refbehind.b[0] &^= mask
  1270  		r.refahead.b[0] &^= mask
  1271  		r.calbehind.b[0] &^= mask
  1272  		r.calahead.b[0] &^= mask
  1273  		r.regdiff.b[0] &^= mask
  1274  		r.act.b[0] &^= mask
  1275  	}
  1276  
  1277  	if Debug['R'] != 0 && Debug['v'] != 0 {
  1278  		Dumpit("pass4.5", firstf, 1)
  1279  	}
  1280  
  1281  	/*
  1282  	 * pass 5
  1283  	 * isolate regions
  1284  	 * calculate costs (paint1)
  1285  	 */
  1286  	var bit Bits
  1287  	if f := firstf; f != nil {
  1288  		r := f.Data.(*Reg)
  1289  		for z := 0; z < BITS; z++ {
  1290  			bit.b[z] = (r.refahead.b[z] | r.calahead.b[z]) &^ (externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z])
  1291  		}
  1292  		if bany(&bit) && !f.Refset {
  1293  			// should never happen - all variables are preset
  1294  			if Debug['w'] != 0 {
  1295  				fmt.Printf("%v: used and not set: %v\n", f.Prog.Line(), &bit)
  1296  			}
  1297  			f.Refset = true
  1298  		}
  1299  	}
  1300  
  1301  	for f := firstf; f != nil; f = f.Link {
  1302  		(f.Data.(*Reg)).act = zbits
  1303  	}
  1304  	nregion = 0
  1305  	region = region[:0]
  1306  	var rgp *Rgn
  1307  	for f := firstf; f != nil; f = f.Link {
  1308  		r := f.Data.(*Reg)
  1309  		for z := 0; z < BITS; z++ {
  1310  			bit.b[z] = r.set.b[z] &^ (r.refahead.b[z] | r.calahead.b[z] | addrs.b[z])
  1311  		}
  1312  		if bany(&bit) && !f.Refset {
  1313  			if Debug['w'] != 0 {
  1314  				fmt.Printf("%v: set and not used: %v\n", f.Prog.Line(), &bit)
  1315  			}
  1316  			f.Refset = true
  1317  			Thearch.Excise(f)
  1318  		}
  1319  
  1320  		for z := 0; z < BITS; z++ {
  1321  			bit.b[z] = LOAD(r, z) &^ (r.act.b[z] | addrs.b[z])
  1322  		}
  1323  		for bany(&bit) {
  1324  			i = bnum(&bit)
  1325  			change = 0
  1326  			paint1(f, i)
  1327  			biclr(&bit, uint(i))
  1328  			if change <= 0 {
  1329  				continue
  1330  			}
  1331  			if nregion >= MaxRgn {
  1332  				nregion++
  1333  				continue
  1334  			}
  1335  
  1336  			region = append(region, Rgn{
  1337  				enter: f,
  1338  				cost:  int16(change),
  1339  				varno: int16(i),
  1340  			})
  1341  			nregion++
  1342  		}
  1343  	}
  1344  
  1345  	if false && Debug['v'] != 0 && strings.Contains(Curfn.Func.Nname.Sym.Name, "Parse") {
  1346  		Warn("regions: %d\n", nregion)
  1347  	}
  1348  	if nregion >= MaxRgn {
  1349  		if Debug['v'] != 0 {
  1350  			Warn("too many regions: %d\n", nregion)
  1351  		}
  1352  		nregion = MaxRgn
  1353  	}
  1354  
  1355  	sort.Sort(rcmp(region[:nregion]))
  1356  
  1357  	if Debug['R'] != 0 && Debug['v'] != 0 {
  1358  		Dumpit("pass5", firstf, 1)
  1359  	}
  1360  
  1361  	/*
  1362  	 * pass 6
  1363  	 * determine used registers (paint2)
  1364  	 * replace code (paint3)
  1365  	 */
  1366  	if Debug['R'] != 0 && Debug['v'] != 0 {
  1367  		fmt.Printf("\nregisterizing\n")
  1368  	}
  1369  	var usedreg uint64
  1370  	var vreg uint64
  1371  	for i := 0; i < nregion; i++ {
  1372  		rgp = &region[i]
  1373  		if Debug['R'] != 0 && Debug['v'] != 0 {
  1374  			fmt.Printf("region %d: cost %d varno %d enter %d\n", i, rgp.cost, rgp.varno, rgp.enter.Prog.Pc)
  1375  		}
  1376  		bit = blsh(uint(rgp.varno))
  1377  		usedreg = paint2(rgp.enter, int(rgp.varno), 0)
  1378  		vreg = allreg(usedreg, rgp)
  1379  		if rgp.regno != 0 {
  1380  			if Debug['R'] != 0 && Debug['v'] != 0 {
  1381  				v := &vars[rgp.varno]
  1382  				fmt.Printf("registerize %v+%d (bit=%2d et=%v) in %v usedreg=%#x vreg=%#x\n", v.node, v.offset, rgp.varno, Econv(int(v.etype), 0), obj.Rconv(int(rgp.regno)), usedreg, vreg)
  1383  			}
  1384  
  1385  			paint3(rgp.enter, int(rgp.varno), vreg, int(rgp.regno))
  1386  		}
  1387  	}
  1388  
  1389  	/*
  1390  	 * free aux structures. peep allocates new ones.
  1391  	 */
  1392  	for i := 0; i < nvar; i++ {
  1393  		vars[i].node.SetOpt(nil)
  1394  	}
  1395  	Flowend(g)
  1396  	firstf = nil
  1397  
  1398  	if Debug['R'] != 0 && Debug['v'] != 0 {
  1399  		// Rebuild flow graph, since we inserted instructions
  1400  		g := Flowstart(firstp, nil)
  1401  		firstf = g.Start
  1402  		Dumpit("pass6", firstf, 0)
  1403  		Flowend(g)
  1404  		firstf = nil
  1405  	}
  1406  
  1407  	/*
  1408  	 * pass 7
  1409  	 * peep-hole on basic block
  1410  	 */
  1411  	if Debug['R'] == 0 || Debug['P'] != 0 {
  1412  		Thearch.Peep(firstp)
  1413  	}
  1414  
  1415  	/*
  1416  	 * eliminate nops
  1417  	 */
  1418  	for p := firstp; p != nil; p = p.Link {
  1419  		for p.Link != nil && p.Link.As == obj.ANOP {
  1420  			p.Link = p.Link.Link
  1421  		}
  1422  		if p.To.Type == obj.TYPE_BRANCH {
  1423  			for p.To.Val.(*obj.Prog) != nil && p.To.Val.(*obj.Prog).As == obj.ANOP {
  1424  				p.To.Val = p.To.Val.(*obj.Prog).Link
  1425  			}
  1426  		}
  1427  	}
  1428  
  1429  	if Debug['R'] != 0 {
  1430  		if Ostats.Ncvtreg != 0 || Ostats.Nspill != 0 || Ostats.Nreload != 0 || Ostats.Ndelmov != 0 || Ostats.Nvar != 0 || Ostats.Naddr != 0 || false {
  1431  			fmt.Printf("\nstats\n")
  1432  		}
  1433  
  1434  		if Ostats.Ncvtreg != 0 {
  1435  			fmt.Printf("\t%4d cvtreg\n", Ostats.Ncvtreg)
  1436  		}
  1437  		if Ostats.Nspill != 0 {
  1438  			fmt.Printf("\t%4d spill\n", Ostats.Nspill)
  1439  		}
  1440  		if Ostats.Nreload != 0 {
  1441  			fmt.Printf("\t%4d reload\n", Ostats.Nreload)
  1442  		}
  1443  		if Ostats.Ndelmov != 0 {
  1444  			fmt.Printf("\t%4d delmov\n", Ostats.Ndelmov)
  1445  		}
  1446  		if Ostats.Nvar != 0 {
  1447  			fmt.Printf("\t%4d var\n", Ostats.Nvar)
  1448  		}
  1449  		if Ostats.Naddr != 0 {
  1450  			fmt.Printf("\t%4d addr\n", Ostats.Naddr)
  1451  		}
  1452  
  1453  		Ostats = OptStats{}
  1454  	}
  1455  }
  1456  
  1457  // bany reports whether any bits in a are set.
  1458  func bany(a *Bits) bool {
  1459  	for _, x := range &a.b { // & to avoid making a copy of a.b
  1460  		if x != 0 {
  1461  			return true
  1462  		}
  1463  	}
  1464  	return false
  1465  }
  1466  
  1467  // bnum reports the lowest index of a 1 bit in a.
  1468  func bnum(a *Bits) int {
  1469  	for i, x := range &a.b { // & to avoid making a copy of a.b
  1470  		if x != 0 {
  1471  			return 64*i + Bitno(x)
  1472  		}
  1473  	}
  1474  
  1475  	Fatalf("bad in bnum")
  1476  	return 0
  1477  }
  1478  
  1479  // blsh returns a Bits with 1 at index n, 0 elsewhere (1<<n).
  1480  func blsh(n uint) Bits {
  1481  	c := zbits
  1482  	c.b[n/64] = 1 << (n % 64)
  1483  	return c
  1484  }
  1485  
  1486  // btest reports whether bit n is 1.
  1487  func btest(a *Bits, n uint) bool {
  1488  	return a.b[n/64]&(1<<(n%64)) != 0
  1489  }
  1490  
  1491  // biset sets bit n to 1.
  1492  func biset(a *Bits, n uint) {
  1493  	a.b[n/64] |= 1 << (n % 64)
  1494  }
  1495  
  1496  // biclr sets bit n to 0.
  1497  func biclr(a *Bits, n uint) {
  1498  	a.b[n/64] &^= (1 << (n % 64))
  1499  }
  1500  
  1501  // Bitno reports the lowest index of a 1 bit in b.
  1502  // It calls Fatalf if there is no 1 bit.
  1503  func Bitno(b uint64) int {
  1504  	if b == 0 {
  1505  		Fatalf("bad in bitno")
  1506  	}
  1507  	n := 0
  1508  	if b&(1<<32-1) == 0 {
  1509  		n += 32
  1510  		b >>= 32
  1511  	}
  1512  	if b&(1<<16-1) == 0 {
  1513  		n += 16
  1514  		b >>= 16
  1515  	}
  1516  	if b&(1<<8-1) == 0 {
  1517  		n += 8
  1518  		b >>= 8
  1519  	}
  1520  	if b&(1<<4-1) == 0 {
  1521  		n += 4
  1522  		b >>= 4
  1523  	}
  1524  	if b&(1<<2-1) == 0 {
  1525  		n += 2
  1526  		b >>= 2
  1527  	}
  1528  	if b&1 == 0 {
  1529  		n++
  1530  	}
  1531  	return n
  1532  }
  1533  
  1534  // String returns a space-separated list of the variables represented by bits.
  1535  func (bits Bits) String() string {
  1536  	// Note: This method takes a value receiver, both for convenience
  1537  	// and to make it safe to modify the bits as we process them.
  1538  	// Even so, most prints above use &bits, because then the value
  1539  	// being stored in the interface{} is a pointer and does not require
  1540  	// an allocation and copy to create the interface{}.
  1541  	var buf bytes.Buffer
  1542  	sep := ""
  1543  	for bany(&bits) {
  1544  		i := bnum(&bits)
  1545  		buf.WriteString(sep)
  1546  		sep = " "
  1547  		v := &vars[i]
  1548  		if v.node == nil || v.node.Sym == nil {
  1549  			fmt.Fprintf(&buf, "$%d", i)
  1550  		} else {
  1551  			fmt.Fprintf(&buf, "%s(%d)", v.node.Sym.Name, i)
  1552  			if v.offset != 0 {
  1553  				fmt.Fprintf(&buf, "%+d", int64(v.offset))
  1554  			}
  1555  		}
  1556  		biclr(&bits, uint(i))
  1557  	}
  1558  	return buf.String()
  1559  }