github.com/euank/go@v0.0.0-20160829210321-495514729181/src/cmd/compile/internal/gc/reg.go (about)

     1  // Derived from Inferno utils/6c/reg.c
     2  // https://bitbucket.org/inferno-os/inferno-os/src/default/utils/6c/reg.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors. All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  package gc
    32  
    33  import (
    34  	"bytes"
    35  	"cmd/internal/obj"
    36  	"cmd/internal/sys"
    37  	"fmt"
    38  	"sort"
    39  	"strings"
    40  )
    41  
    42  // A Var represents a single variable that may be stored in a register.
    43  // That variable may itself correspond to a hardware register,
    44  // to represent the use of registers in the unoptimized instruction stream.
    45  type Var struct {
    46  	offset     int64
    47  	node       *Node
    48  	nextinnode *Var
    49  	width      int
    50  	id         int // index in vars
    51  	name       int8
    52  	etype      EType
    53  	addr       int8
    54  }
    55  
    56  // Bits represents a set of Vars, stored as a bit set of var numbers
    57  // (the index in vars, or equivalently v.id).
    58  type Bits struct {
    59  	b [BITS]uint64
    60  }
    61  
    62  const (
    63  	BITS = 3
    64  	NVAR = BITS * 64
    65  )
    66  
    67  var (
    68  	vars [NVAR]Var // variables under consideration
    69  	nvar int       // number of vars
    70  
    71  	regbits uint64 // bits for hardware registers
    72  
    73  	zbits   Bits // zero
    74  	externs Bits // global variables
    75  	params  Bits // function parameters and results
    76  	ivar    Bits // function parameters (inputs)
    77  	ovar    Bits // function results (outputs)
    78  	consts  Bits // constant values
    79  	addrs   Bits // variables with address taken
    80  )
    81  
    82  // A Reg is a wrapper around a single Prog (one instruction) that holds
    83  // register optimization information while the optimizer runs.
    84  // r->prog is the instruction.
    85  type Reg struct {
    86  	set  Bits // regopt variables written by this instruction.
    87  	use1 Bits // regopt variables read by prog->from.
    88  	use2 Bits // regopt variables read by prog->to.
    89  
    90  	// refahead/refbehind are the regopt variables whose current
    91  	// value may be used in the following/preceding instructions
    92  	// up to a CALL (or the value is clobbered).
    93  	refbehind Bits
    94  	refahead  Bits
    95  
    96  	// calahead/calbehind are similar, but for variables in
    97  	// instructions that are reachable after hitting at least one
    98  	// CALL.
    99  	calbehind Bits
   100  	calahead  Bits
   101  
   102  	regdiff Bits
   103  	act     Bits
   104  	regu    uint64 // register used bitmap
   105  }
   106  
   107  // A Rgn represents a single regopt variable over a region of code
   108  // where a register could potentially be dedicated to that variable.
   109  // The code encompassed by a Rgn is defined by the flow graph,
   110  // starting at enter, flood-filling forward while varno is refahead
   111  // and backward while varno is refbehind, and following branches.
   112  // A single variable may be represented by multiple disjoint Rgns and
   113  // each Rgn may choose a different register for that variable.
   114  // Registers are allocated to regions greedily in order of descending
   115  // cost.
   116  type Rgn struct {
   117  	enter *Flow
   118  	cost  int16
   119  	varno int16
   120  	regno int16
   121  }
   122  
   123  // The Plan 9 C compilers used a limit of 600 regions,
   124  // but the yacc-generated parser in y.go has 3100 regions.
   125  // We set MaxRgn large enough to handle that.
   126  // There's not a huge cost to having too many regions:
   127  // the main processing traces the live area for each variable,
   128  // which is limited by the number of variables times the area,
   129  // not the raw region count. If there are many regions, they
   130  // are almost certainly small and easy to trace.
   131  // The only operation that scales with region count is the
   132  // sorting by cost, which uses sort.Sort and is therefore
   133  // guaranteed n log n.
   134  const MaxRgn = 6000
   135  
   136  var (
   137  	region  []Rgn
   138  	nregion int
   139  )
   140  
   141  type rcmp []Rgn
   142  
   143  func (x rcmp) Len() int {
   144  	return len(x)
   145  }
   146  
   147  func (x rcmp) Swap(i, j int) {
   148  	x[i], x[j] = x[j], x[i]
   149  }
   150  
   151  func (x rcmp) Less(i, j int) bool {
   152  	p1 := &x[i]
   153  	p2 := &x[j]
   154  	if p1.cost != p2.cost {
   155  		return int(p2.cost)-int(p1.cost) < 0
   156  	}
   157  	if p1.varno != p2.varno {
   158  		return int(p2.varno)-int(p1.varno) < 0
   159  	}
   160  	if p1.enter != p2.enter {
   161  		return int(p2.enter.Id-p1.enter.Id) < 0
   162  	}
   163  	return false
   164  }
   165  
   166  func setaddrs(bit Bits) {
   167  	var i int
   168  	var n int
   169  	var v *Var
   170  	var node *Node
   171  
   172  	for bany(&bit) {
   173  		// convert each bit to a variable
   174  		i = bnum(&bit)
   175  
   176  		node = vars[i].node
   177  		n = int(vars[i].name)
   178  		biclr(&bit, uint(i))
   179  
   180  		// disable all pieces of that variable
   181  		for i = 0; i < nvar; i++ {
   182  			v = &vars[i]
   183  			if v.node == node && int(v.name) == n {
   184  				v.addr = 2
   185  			}
   186  		}
   187  	}
   188  }
   189  
   190  var regnodes [64]*Node
   191  
   192  func walkvardef(n *Node, f *Flow, active int) {
   193  	var f1 *Flow
   194  	var bn int
   195  	var v *Var
   196  
   197  	for f1 = f; f1 != nil; f1 = f1.S1 {
   198  		if f1.Active == int32(active) {
   199  			break
   200  		}
   201  		f1.Active = int32(active)
   202  		if f1.Prog.As == obj.AVARKILL && f1.Prog.To.Node == n {
   203  			break
   204  		}
   205  		for v, _ = n.Opt().(*Var); v != nil; v = v.nextinnode {
   206  			bn = v.id
   207  			biset(&(f1.Data.(*Reg)).act, uint(bn))
   208  		}
   209  
   210  		if f1.Prog.As == obj.ACALL {
   211  			break
   212  		}
   213  	}
   214  
   215  	for f2 := f; f2 != f1; f2 = f2.S1 {
   216  		if f2.S2 != nil {
   217  			walkvardef(n, f2.S2, active)
   218  		}
   219  	}
   220  }
   221  
   222  // add mov b,rn
   223  // just after r
   224  func addmove(r *Flow, bn int, rn int, f int) {
   225  	p1 := Ctxt.NewProg()
   226  	Clearp(p1)
   227  	p1.Pc = 9999
   228  
   229  	p := r.Prog
   230  	p1.Link = p.Link
   231  	p.Link = p1
   232  	p1.Lineno = p.Lineno
   233  
   234  	v := &vars[bn]
   235  
   236  	a := &p1.To
   237  	a.Offset = v.offset
   238  	a.Etype = uint8(v.etype)
   239  	a.Type = obj.TYPE_MEM
   240  	a.Name = v.name
   241  	a.Node = v.node
   242  	a.Sym = Linksym(v.node.Sym)
   243  
   244  	/* NOTE(rsc): 9g did
   245  	if(a->etype == TARRAY)
   246  		a->type = TYPE_ADDR;
   247  	else if(a->sym == nil)
   248  		a->type = TYPE_CONST;
   249  	*/
   250  	p1.As = Thearch.Optoas(OAS, Types[uint8(v.etype)])
   251  
   252  	// TODO(rsc): Remove special case here.
   253  	if Thearch.LinkArch.InFamily(sys.MIPS64, sys.ARM, sys.ARM64, sys.PPC64) && v.etype == TBOOL {
   254  		p1.As = Thearch.Optoas(OAS, Types[TUINT8])
   255  	}
   256  	p1.From.Type = obj.TYPE_REG
   257  	p1.From.Reg = int16(rn)
   258  	p1.From.Name = obj.NAME_NONE
   259  	if f == 0 {
   260  		p1.From = *a
   261  		*a = obj.Addr{}
   262  		a.Type = obj.TYPE_REG
   263  		a.Reg = int16(rn)
   264  	}
   265  
   266  	if Debug['R'] != 0 && Debug['v'] != 0 {
   267  		fmt.Printf("%v ===add=== %v\n", p, p1)
   268  	}
   269  	Ostats.Nspill++
   270  }
   271  
   272  func overlap_reg(o1 int64, w1 int, o2 int64, w2 int) bool {
   273  	t1 := o1 + int64(w1)
   274  	t2 := o2 + int64(w2)
   275  
   276  	if t1 <= o2 || t2 <= o1 {
   277  		return false
   278  	}
   279  
   280  	return true
   281  }
   282  
   283  func mkvar(f *Flow, a *obj.Addr) Bits {
   284  	// mark registers used
   285  	if a.Type == obj.TYPE_NONE {
   286  		return zbits
   287  	}
   288  
   289  	r := f.Data.(*Reg)
   290  	r.use1.b[0] |= Thearch.Doregbits(int(a.Index)) // TODO: Use RtoB
   291  
   292  	var n int
   293  	switch a.Type {
   294  	default:
   295  		regu := Thearch.Doregbits(int(a.Reg)) | Thearch.RtoB(int(a.Reg)) // TODO: Use RtoB
   296  		if regu == 0 {
   297  			return zbits
   298  		}
   299  		bit := zbits
   300  		bit.b[0] = regu
   301  		return bit
   302  
   303  		// TODO(rsc): Remove special case here.
   304  	case obj.TYPE_ADDR:
   305  		var bit Bits
   306  		if Thearch.LinkArch.InFamily(sys.MIPS64, sys.ARM, sys.ARM64, sys.PPC64) {
   307  			goto memcase
   308  		}
   309  		a.Type = obj.TYPE_MEM
   310  		bit = mkvar(f, a)
   311  		setaddrs(bit)
   312  		a.Type = obj.TYPE_ADDR
   313  		Ostats.Naddr++
   314  		return zbits
   315  
   316  	memcase:
   317  		fallthrough
   318  
   319  	case obj.TYPE_MEM:
   320  		if r != nil {
   321  			r.use1.b[0] |= Thearch.RtoB(int(a.Reg))
   322  		}
   323  
   324  		/* NOTE: 5g did
   325  		if(r->f.prog->scond & (C_PBIT|C_WBIT))
   326  			r->set.b[0] |= RtoB(a->reg);
   327  		*/
   328  		switch a.Name {
   329  		default:
   330  			// Note: This case handles NAME_EXTERN and NAME_STATIC.
   331  			// We treat these as requiring eager writes to memory, due to
   332  			// the possibility of a fault handler looking at them, so there is
   333  			// not much point in registerizing the loads.
   334  			// If we later choose the set of candidate variables from a
   335  			// larger list, these cases could be deprioritized instead of
   336  			// removed entirely.
   337  			return zbits
   338  
   339  		case obj.NAME_PARAM,
   340  			obj.NAME_AUTO:
   341  			n = int(a.Name)
   342  		}
   343  	}
   344  
   345  	node, _ := a.Node.(*Node)
   346  	if node == nil || node.Op != ONAME || node.Orig == nil {
   347  		return zbits
   348  	}
   349  	node = node.Orig
   350  	if node.Orig != node {
   351  		Fatalf("%v: bad node", Ctxt.Dconv(a))
   352  	}
   353  	if node.Sym == nil || node.Sym.Name[0] == '.' {
   354  		return zbits
   355  	}
   356  	et := EType(a.Etype)
   357  	o := a.Offset
   358  	w := a.Width
   359  	if w < 0 {
   360  		Fatalf("bad width %d for %v", w, Ctxt.Dconv(a))
   361  	}
   362  
   363  	flag := 0
   364  	var v *Var
   365  	for i := 0; i < nvar; i++ {
   366  		v = &vars[i]
   367  		if v.node == node && int(v.name) == n {
   368  			if v.offset == o {
   369  				if v.etype == et {
   370  					if int64(v.width) == w {
   371  						// TODO(rsc): Remove special case for arm here.
   372  						if flag == 0 || Thearch.LinkArch.Family != sys.ARM {
   373  							return blsh(uint(i))
   374  						}
   375  					}
   376  				}
   377  			}
   378  
   379  			// if they overlap, disable both
   380  			if overlap_reg(v.offset, v.width, o, int(w)) {
   381  				//				print("disable overlap %s %d %d %d %d, %E != %E\n", s->name, v->offset, v->width, o, w, v->etype, et);
   382  				v.addr = 1
   383  
   384  				flag = 1
   385  			}
   386  		}
   387  	}
   388  
   389  	switch et {
   390  	case 0, TFUNC:
   391  		return zbits
   392  	}
   393  
   394  	if nvar >= NVAR {
   395  		if Debug['w'] > 1 && node != nil {
   396  			Fatalf("variable not optimized: %v", Nconv(node, FmtSharp))
   397  		}
   398  		if Debug['v'] > 0 {
   399  			Warn("variable not optimized: %v", Nconv(node, FmtSharp))
   400  		}
   401  
   402  		// If we're not tracking a word in a variable, mark the rest as
   403  		// having its address taken, so that we keep the whole thing
   404  		// live at all calls. otherwise we might optimize away part of
   405  		// a variable but not all of it.
   406  		var v *Var
   407  		for i := 0; i < nvar; i++ {
   408  			v = &vars[i]
   409  			if v.node == node {
   410  				v.addr = 1
   411  			}
   412  		}
   413  
   414  		return zbits
   415  	}
   416  
   417  	i := nvar
   418  	nvar++
   419  	v = &vars[i]
   420  	v.id = i
   421  	v.offset = o
   422  	v.name = int8(n)
   423  	v.etype = et
   424  	v.width = int(w)
   425  	v.addr = int8(flag) // funny punning
   426  	v.node = node
   427  
   428  	// node->opt is the head of a linked list
   429  	// of Vars within the given Node, so that
   430  	// we can start at a Var and find all the other
   431  	// Vars in the same Go variable.
   432  	v.nextinnode, _ = node.Opt().(*Var)
   433  
   434  	node.SetOpt(v)
   435  
   436  	bit := blsh(uint(i))
   437  	if n == obj.NAME_EXTERN || n == obj.NAME_STATIC {
   438  		for z := 0; z < BITS; z++ {
   439  			externs.b[z] |= bit.b[z]
   440  		}
   441  	}
   442  	if n == obj.NAME_PARAM {
   443  		for z := 0; z < BITS; z++ {
   444  			params.b[z] |= bit.b[z]
   445  		}
   446  	}
   447  
   448  	if node.Class == PPARAM {
   449  		for z := 0; z < BITS; z++ {
   450  			ivar.b[z] |= bit.b[z]
   451  		}
   452  	}
   453  	if node.Class == PPARAMOUT {
   454  		for z := 0; z < BITS; z++ {
   455  			ovar.b[z] |= bit.b[z]
   456  		}
   457  	}
   458  
   459  	// Treat values with their address taken as live at calls,
   460  	// because the garbage collector's liveness analysis in plive.go does.
   461  	// These must be consistent or else we will elide stores and the garbage
   462  	// collector will see uninitialized data.
   463  	// The typical case where our own analysis is out of sync is when the
   464  	// node appears to have its address taken but that code doesn't actually
   465  	// get generated and therefore doesn't show up as an address being
   466  	// taken when we analyze the instruction stream.
   467  	// One instance of this case is when a closure uses the same name as
   468  	// an outer variable for one of its own variables declared with :=.
   469  	// The parser flags the outer variable as possibly shared, and therefore
   470  	// sets addrtaken, even though it ends up not being actually shared.
   471  	// If we were better about _ elision, _ = &x would suffice too.
   472  	// The broader := in a closure problem is mentioned in a comment in
   473  	// closure.go:/^typecheckclosure and dcl.go:/^oldname.
   474  	if node.Addrtaken {
   475  		v.addr = 1
   476  	}
   477  
   478  	// Disable registerization for globals, because:
   479  	// (1) we might panic at any time and we want the recovery code
   480  	// to see the latest values (issue 1304).
   481  	// (2) we don't know what pointers might point at them and we want
   482  	// loads via those pointers to see updated values and vice versa (issue 7995).
   483  	//
   484  	// Disable registerization for results if using defer, because the deferred func
   485  	// might recover and return, causing the current values to be used.
   486  	if node.Class == PEXTERN || (hasdefer && node.Class == PPARAMOUT) {
   487  		v.addr = 1
   488  	}
   489  
   490  	if Debug['R'] != 0 {
   491  		fmt.Printf("bit=%2d et=%v w=%d+%d %v %v flag=%d\n", i, et, o, w, Nconv(node, FmtSharp), Ctxt.Dconv(a), v.addr)
   492  	}
   493  	Ostats.Nvar++
   494  
   495  	return bit
   496  }
   497  
   498  var change int
   499  
   500  func prop(f *Flow, ref Bits, cal Bits) {
   501  	var f1 *Flow
   502  	var r1 *Reg
   503  	var z int
   504  	var i int
   505  	var v *Var
   506  	var v1 *Var
   507  
   508  	for f1 = f; f1 != nil; f1 = f1.P1 {
   509  		r1 = f1.Data.(*Reg)
   510  		for z = 0; z < BITS; z++ {
   511  			ref.b[z] |= r1.refahead.b[z]
   512  			if ref.b[z] != r1.refahead.b[z] {
   513  				r1.refahead.b[z] = ref.b[z]
   514  				change = 1
   515  			}
   516  
   517  			cal.b[z] |= r1.calahead.b[z]
   518  			if cal.b[z] != r1.calahead.b[z] {
   519  				r1.calahead.b[z] = cal.b[z]
   520  				change = 1
   521  			}
   522  		}
   523  
   524  		switch f1.Prog.As {
   525  		case obj.ACALL:
   526  			if Noreturn(f1.Prog) {
   527  				break
   528  			}
   529  
   530  			// Mark all input variables (ivar) as used, because that's what the
   531  			// liveness bitmaps say. The liveness bitmaps say that so that a
   532  			// panic will not show stale values in the parameter dump.
   533  			// Mark variables with a recent VARDEF (r1->act) as used,
   534  			// so that the optimizer flushes initializations to memory,
   535  			// so that if a garbage collection happens during this CALL,
   536  			// the collector will see initialized memory. Again this is to
   537  			// match what the liveness bitmaps say.
   538  			for z = 0; z < BITS; z++ {
   539  				cal.b[z] |= ref.b[z] | externs.b[z] | ivar.b[z] | r1.act.b[z]
   540  				ref.b[z] = 0
   541  			}
   542  
   543  			// cal.b is the current approximation of what's live across the call.
   544  			// Every bit in cal.b is a single stack word. For each such word,
   545  			// find all the other tracked stack words in the same Go variable
   546  			// (struct/slice/string/interface) and mark them live too.
   547  			// This is necessary because the liveness analysis for the garbage
   548  			// collector works at variable granularity, not at word granularity.
   549  			// It is fundamental for slice/string/interface: the garbage collector
   550  			// needs the whole value, not just some of the words, in order to
   551  			// interpret the other bits correctly. Specifically, slice needs a consistent
   552  			// ptr and cap, string needs a consistent ptr and len, and interface
   553  			// needs a consistent type word and data word.
   554  			for z = 0; z < BITS; z++ {
   555  				if cal.b[z] == 0 {
   556  					continue
   557  				}
   558  				for i = 0; i < 64; i++ {
   559  					if z*64+i >= nvar || (cal.b[z]>>uint(i))&1 == 0 {
   560  						continue
   561  					}
   562  					v = &vars[z*64+i]
   563  					if v.node.Opt() == nil { // v represents fixed register, not Go variable
   564  						continue
   565  					}
   566  
   567  					// v->node->opt is the head of a linked list of Vars
   568  					// corresponding to tracked words from the Go variable v->node.
   569  					// Walk the list and set all the bits.
   570  					// For a large struct this could end up being quadratic:
   571  					// after the first setting, the outer loop (for z, i) would see a 1 bit
   572  					// for all of the remaining words in the struct, and for each such
   573  					// word would go through and turn on all the bits again.
   574  					// To avoid the quadratic behavior, we only turn on the bits if
   575  					// v is the head of the list or if the head's bit is not yet turned on.
   576  					// This will set the bits at most twice, keeping the overall loop linear.
   577  					v1, _ = v.node.Opt().(*Var)
   578  
   579  					if v == v1 || !btest(&cal, uint(v1.id)) {
   580  						for ; v1 != nil; v1 = v1.nextinnode {
   581  							biset(&cal, uint(v1.id))
   582  						}
   583  					}
   584  				}
   585  			}
   586  
   587  		case obj.ATEXT:
   588  			for z = 0; z < BITS; z++ {
   589  				cal.b[z] = 0
   590  				ref.b[z] = 0
   591  			}
   592  
   593  		case obj.ARET:
   594  			for z = 0; z < BITS; z++ {
   595  				cal.b[z] = externs.b[z] | ovar.b[z]
   596  				ref.b[z] = 0
   597  			}
   598  		}
   599  
   600  		for z = 0; z < BITS; z++ {
   601  			ref.b[z] = ref.b[z]&^r1.set.b[z] | r1.use1.b[z] | r1.use2.b[z]
   602  			cal.b[z] &^= (r1.set.b[z] | r1.use1.b[z] | r1.use2.b[z])
   603  			r1.refbehind.b[z] = ref.b[z]
   604  			r1.calbehind.b[z] = cal.b[z]
   605  		}
   606  
   607  		if f1.Active != 0 {
   608  			break
   609  		}
   610  		f1.Active = 1
   611  	}
   612  
   613  	var r *Reg
   614  	var f2 *Flow
   615  	for ; f != f1; f = f.P1 {
   616  		r = f.Data.(*Reg)
   617  		for f2 = f.P2; f2 != nil; f2 = f2.P2link {
   618  			prop(f2, r.refbehind, r.calbehind)
   619  		}
   620  	}
   621  }
   622  
   623  func synch(f *Flow, dif Bits) {
   624  	var r1 *Reg
   625  	var z int
   626  
   627  	for f1 := f; f1 != nil; f1 = f1.S1 {
   628  		r1 = f1.Data.(*Reg)
   629  		for z = 0; z < BITS; z++ {
   630  			dif.b[z] = dif.b[z]&^(^r1.refbehind.b[z]&r1.refahead.b[z]) | r1.set.b[z] | r1.regdiff.b[z]
   631  			if dif.b[z] != r1.regdiff.b[z] {
   632  				r1.regdiff.b[z] = dif.b[z]
   633  				change = 1
   634  			}
   635  		}
   636  
   637  		if f1.Active != 0 {
   638  			break
   639  		}
   640  		f1.Active = 1
   641  		for z = 0; z < BITS; z++ {
   642  			dif.b[z] &^= (^r1.calbehind.b[z] & r1.calahead.b[z])
   643  		}
   644  		if f1.S2 != nil {
   645  			synch(f1.S2, dif)
   646  		}
   647  	}
   648  }
   649  
   650  func allreg(b uint64, r *Rgn) uint64 {
   651  	v := &vars[r.varno]
   652  	r.regno = 0
   653  	switch v.etype {
   654  	default:
   655  		Fatalf("unknown etype %d/%v", Bitno(b), v.etype)
   656  
   657  	case TINT8,
   658  		TUINT8,
   659  		TINT16,
   660  		TUINT16,
   661  		TINT32,
   662  		TUINT32,
   663  		TINT64,
   664  		TUINT64,
   665  		TINT,
   666  		TUINT,
   667  		TUINTPTR,
   668  		TBOOL,
   669  		TPTR32,
   670  		TPTR64:
   671  		i := Thearch.BtoR(^b)
   672  		if i != 0 && r.cost > 0 {
   673  			r.regno = int16(i)
   674  			return Thearch.RtoB(i)
   675  		}
   676  
   677  	case TFLOAT32, TFLOAT64:
   678  		i := Thearch.BtoF(^b)
   679  		if i != 0 && r.cost > 0 {
   680  			r.regno = int16(i)
   681  			return Thearch.FtoB(i)
   682  		}
   683  	}
   684  
   685  	return 0
   686  }
   687  
   688  func LOAD(r *Reg, z int) uint64 {
   689  	return ^r.refbehind.b[z] & r.refahead.b[z]
   690  }
   691  
   692  func STORE(r *Reg, z int) uint64 {
   693  	return ^r.calbehind.b[z] & r.calahead.b[z]
   694  }
   695  
   696  // Cost parameters
   697  const (
   698  	CLOAD = 5 // cost of load
   699  	CREF  = 5 // cost of reference if not registerized
   700  	LOOP  = 3 // loop execution count (applied in popt.go)
   701  )
   702  
   703  func paint1(f *Flow, bn int) {
   704  	z := bn / 64
   705  	bb := uint64(1 << uint(bn%64))
   706  	r := f.Data.(*Reg)
   707  	if r.act.b[z]&bb != 0 {
   708  		return
   709  	}
   710  	var f1 *Flow
   711  	var r1 *Reg
   712  	for {
   713  		if r.refbehind.b[z]&bb == 0 {
   714  			break
   715  		}
   716  		f1 = f.P1
   717  		if f1 == nil {
   718  			break
   719  		}
   720  		r1 = f1.Data.(*Reg)
   721  		if r1.refahead.b[z]&bb == 0 {
   722  			break
   723  		}
   724  		if r1.act.b[z]&bb != 0 {
   725  			break
   726  		}
   727  		f = f1
   728  		r = r1
   729  	}
   730  
   731  	if LOAD(r, z)&^(r.set.b[z]&^(r.use1.b[z]|r.use2.b[z]))&bb != 0 {
   732  		change -= CLOAD * int(f.Loop)
   733  	}
   734  
   735  	for {
   736  		r.act.b[z] |= bb
   737  
   738  		if f.Prog.As != obj.ANOP { // don't give credit for NOPs
   739  			if r.use1.b[z]&bb != 0 {
   740  				change += CREF * int(f.Loop)
   741  			}
   742  			if (r.use2.b[z]|r.set.b[z])&bb != 0 {
   743  				change += CREF * int(f.Loop)
   744  			}
   745  		}
   746  
   747  		if STORE(r, z)&r.regdiff.b[z]&bb != 0 {
   748  			change -= CLOAD * int(f.Loop)
   749  		}
   750  
   751  		if r.refbehind.b[z]&bb != 0 {
   752  			for f1 = f.P2; f1 != nil; f1 = f1.P2link {
   753  				if (f1.Data.(*Reg)).refahead.b[z]&bb != 0 {
   754  					paint1(f1, bn)
   755  				}
   756  			}
   757  		}
   758  
   759  		if r.refahead.b[z]&bb == 0 {
   760  			break
   761  		}
   762  		f1 = f.S2
   763  		if f1 != nil {
   764  			if (f1.Data.(*Reg)).refbehind.b[z]&bb != 0 {
   765  				paint1(f1, bn)
   766  			}
   767  		}
   768  		f = f.S1
   769  		if f == nil {
   770  			break
   771  		}
   772  		r = f.Data.(*Reg)
   773  		if r.act.b[z]&bb != 0 {
   774  			break
   775  		}
   776  		if r.refbehind.b[z]&bb == 0 {
   777  			break
   778  		}
   779  	}
   780  }
   781  
   782  func paint2(f *Flow, bn int, depth int) uint64 {
   783  	z := bn / 64
   784  	bb := uint64(1 << uint(bn%64))
   785  	vreg := regbits
   786  	r := f.Data.(*Reg)
   787  	if r.act.b[z]&bb == 0 {
   788  		return vreg
   789  	}
   790  	var r1 *Reg
   791  	var f1 *Flow
   792  	for {
   793  		if r.refbehind.b[z]&bb == 0 {
   794  			break
   795  		}
   796  		f1 = f.P1
   797  		if f1 == nil {
   798  			break
   799  		}
   800  		r1 = f1.Data.(*Reg)
   801  		if r1.refahead.b[z]&bb == 0 {
   802  			break
   803  		}
   804  		if r1.act.b[z]&bb == 0 {
   805  			break
   806  		}
   807  		f = f1
   808  		r = r1
   809  	}
   810  
   811  	for {
   812  		if Debug['R'] != 0 && Debug['v'] != 0 {
   813  			fmt.Printf("  paint2 %d %v\n", depth, f.Prog)
   814  		}
   815  
   816  		r.act.b[z] &^= bb
   817  
   818  		vreg |= r.regu
   819  
   820  		if r.refbehind.b[z]&bb != 0 {
   821  			for f1 = f.P2; f1 != nil; f1 = f1.P2link {
   822  				if (f1.Data.(*Reg)).refahead.b[z]&bb != 0 {
   823  					vreg |= paint2(f1, bn, depth+1)
   824  				}
   825  			}
   826  		}
   827  
   828  		if r.refahead.b[z]&bb == 0 {
   829  			break
   830  		}
   831  		f1 = f.S2
   832  		if f1 != nil {
   833  			if (f1.Data.(*Reg)).refbehind.b[z]&bb != 0 {
   834  				vreg |= paint2(f1, bn, depth+1)
   835  			}
   836  		}
   837  		f = f.S1
   838  		if f == nil {
   839  			break
   840  		}
   841  		r = f.Data.(*Reg)
   842  		if r.act.b[z]&bb == 0 {
   843  			break
   844  		}
   845  		if r.refbehind.b[z]&bb == 0 {
   846  			break
   847  		}
   848  	}
   849  
   850  	return vreg
   851  }
   852  
   853  func paint3(f *Flow, bn int, rb uint64, rn int) {
   854  	z := bn / 64
   855  	bb := uint64(1 << uint(bn%64))
   856  	r := f.Data.(*Reg)
   857  	if r.act.b[z]&bb != 0 {
   858  		return
   859  	}
   860  	var r1 *Reg
   861  	var f1 *Flow
   862  	for {
   863  		if r.refbehind.b[z]&bb == 0 {
   864  			break
   865  		}
   866  		f1 = f.P1
   867  		if f1 == nil {
   868  			break
   869  		}
   870  		r1 = f1.Data.(*Reg)
   871  		if r1.refahead.b[z]&bb == 0 {
   872  			break
   873  		}
   874  		if r1.act.b[z]&bb != 0 {
   875  			break
   876  		}
   877  		f = f1
   878  		r = r1
   879  	}
   880  
   881  	if LOAD(r, z)&^(r.set.b[z]&^(r.use1.b[z]|r.use2.b[z]))&bb != 0 {
   882  		addmove(f, bn, rn, 0)
   883  	}
   884  	var p *obj.Prog
   885  	for {
   886  		r.act.b[z] |= bb
   887  		p = f.Prog
   888  
   889  		if r.use1.b[z]&bb != 0 {
   890  			if Debug['R'] != 0 && Debug['v'] != 0 {
   891  				fmt.Printf("%v", p)
   892  			}
   893  			addreg(&p.From, rn)
   894  			if Debug['R'] != 0 && Debug['v'] != 0 {
   895  				fmt.Printf(" ===change== %v\n", p)
   896  			}
   897  		}
   898  
   899  		if (r.use2.b[z]|r.set.b[z])&bb != 0 {
   900  			if Debug['R'] != 0 && Debug['v'] != 0 {
   901  				fmt.Printf("%v", p)
   902  			}
   903  			addreg(&p.To, rn)
   904  			if Debug['R'] != 0 && Debug['v'] != 0 {
   905  				fmt.Printf(" ===change== %v\n", p)
   906  			}
   907  		}
   908  
   909  		if STORE(r, z)&r.regdiff.b[z]&bb != 0 {
   910  			addmove(f, bn, rn, 1)
   911  		}
   912  		r.regu |= rb
   913  
   914  		if r.refbehind.b[z]&bb != 0 {
   915  			for f1 = f.P2; f1 != nil; f1 = f1.P2link {
   916  				if (f1.Data.(*Reg)).refahead.b[z]&bb != 0 {
   917  					paint3(f1, bn, rb, rn)
   918  				}
   919  			}
   920  		}
   921  
   922  		if r.refahead.b[z]&bb == 0 {
   923  			break
   924  		}
   925  		f1 = f.S2
   926  		if f1 != nil {
   927  			if (f1.Data.(*Reg)).refbehind.b[z]&bb != 0 {
   928  				paint3(f1, bn, rb, rn)
   929  			}
   930  		}
   931  		f = f.S1
   932  		if f == nil {
   933  			break
   934  		}
   935  		r = f.Data.(*Reg)
   936  		if r.act.b[z]&bb != 0 {
   937  			break
   938  		}
   939  		if r.refbehind.b[z]&bb == 0 {
   940  			break
   941  		}
   942  	}
   943  }
   944  
   945  func addreg(a *obj.Addr, rn int) {
   946  	a.Sym = nil
   947  	a.Node = nil
   948  	a.Offset = 0
   949  	a.Type = obj.TYPE_REG
   950  	a.Reg = int16(rn)
   951  	a.Name = 0
   952  
   953  	Ostats.Ncvtreg++
   954  }
   955  
   956  func dumpone(f *Flow, isreg int) {
   957  	fmt.Printf("%d:%v", f.Loop, f.Prog)
   958  	if isreg != 0 {
   959  		r := f.Data.(*Reg)
   960  		var bit Bits
   961  		for z := 0; z < BITS; z++ {
   962  			bit.b[z] = r.set.b[z] | r.use1.b[z] | r.use2.b[z] | r.refbehind.b[z] | r.refahead.b[z] | r.calbehind.b[z] | r.calahead.b[z] | r.regdiff.b[z] | r.act.b[z] | 0
   963  		}
   964  		if bany(&bit) {
   965  			fmt.Printf("\t")
   966  			if bany(&r.set) {
   967  				fmt.Printf(" s:%v", &r.set)
   968  			}
   969  			if bany(&r.use1) {
   970  				fmt.Printf(" u1:%v", &r.use1)
   971  			}
   972  			if bany(&r.use2) {
   973  				fmt.Printf(" u2:%v", &r.use2)
   974  			}
   975  			if bany(&r.refbehind) {
   976  				fmt.Printf(" rb:%v ", &r.refbehind)
   977  			}
   978  			if bany(&r.refahead) {
   979  				fmt.Printf(" ra:%v ", &r.refahead)
   980  			}
   981  			if bany(&r.calbehind) {
   982  				fmt.Printf(" cb:%v ", &r.calbehind)
   983  			}
   984  			if bany(&r.calahead) {
   985  				fmt.Printf(" ca:%v ", &r.calahead)
   986  			}
   987  			if bany(&r.regdiff) {
   988  				fmt.Printf(" d:%v ", &r.regdiff)
   989  			}
   990  			if bany(&r.act) {
   991  				fmt.Printf(" a:%v ", &r.act)
   992  			}
   993  		}
   994  	}
   995  
   996  	fmt.Printf("\n")
   997  }
   998  
   999  func Dumpit(str string, r0 *Flow, isreg int) {
  1000  	var r1 *Flow
  1001  
  1002  	fmt.Printf("\n%s\n", str)
  1003  	for r := r0; r != nil; r = r.Link {
  1004  		dumpone(r, isreg)
  1005  		r1 = r.P2
  1006  		if r1 != nil {
  1007  			fmt.Printf("\tpred:")
  1008  			for ; r1 != nil; r1 = r1.P2link {
  1009  				fmt.Printf(" %.4d", uint(int(r1.Prog.Pc)))
  1010  			}
  1011  			if r.P1 != nil {
  1012  				fmt.Printf(" (and %.4d)", uint(int(r.P1.Prog.Pc)))
  1013  			} else {
  1014  				fmt.Printf(" (only)")
  1015  			}
  1016  			fmt.Printf("\n")
  1017  		}
  1018  
  1019  		// Print successors if it's not just the next one
  1020  		if r.S1 != r.Link || r.S2 != nil {
  1021  			fmt.Printf("\tsucc:")
  1022  			if r.S1 != nil {
  1023  				fmt.Printf(" %.4d", uint(int(r.S1.Prog.Pc)))
  1024  			}
  1025  			if r.S2 != nil {
  1026  				fmt.Printf(" %.4d", uint(int(r.S2.Prog.Pc)))
  1027  			}
  1028  			fmt.Printf("\n")
  1029  		}
  1030  	}
  1031  }
  1032  
  1033  func regopt(firstp *obj.Prog) {
  1034  	mergetemp(firstp)
  1035  
  1036  	// control flow is more complicated in generated go code
  1037  	// than in generated c code.  define pseudo-variables for
  1038  	// registers, so we have complete register usage information.
  1039  	var nreg int
  1040  	regnames := Thearch.Regnames(&nreg)
  1041  
  1042  	nvar = nreg
  1043  	for i := 0; i < nreg; i++ {
  1044  		vars[i] = Var{}
  1045  	}
  1046  	for i := 0; i < nreg; i++ {
  1047  		if regnodes[i] == nil {
  1048  			regnodes[i] = newname(Lookup(regnames[i]))
  1049  		}
  1050  		vars[i].node = regnodes[i]
  1051  	}
  1052  
  1053  	regbits = Thearch.Excludedregs()
  1054  	externs = zbits
  1055  	params = zbits
  1056  	consts = zbits
  1057  	addrs = zbits
  1058  	ivar = zbits
  1059  	ovar = zbits
  1060  
  1061  	// pass 1
  1062  	// build aux data structure
  1063  	// allocate pcs
  1064  	// find use and set of variables
  1065  	g := Flowstart(firstp, func() interface{} { return new(Reg) })
  1066  	if g == nil {
  1067  		for i := 0; i < nvar; i++ {
  1068  			vars[i].node.SetOpt(nil)
  1069  		}
  1070  		return
  1071  	}
  1072  
  1073  	firstf := g.Start
  1074  
  1075  	for f := firstf; f != nil; f = f.Link {
  1076  		p := f.Prog
  1077  		// AVARLIVE must be considered a use, do not skip it.
  1078  		// Otherwise the variable will be optimized away,
  1079  		// and the whole point of AVARLIVE is to keep it on the stack.
  1080  		if p.As == obj.AVARDEF || p.As == obj.AVARKILL {
  1081  			continue
  1082  		}
  1083  
  1084  		// Avoid making variables for direct-called functions.
  1085  		if p.As == obj.ACALL && p.To.Type == obj.TYPE_MEM && p.To.Name == obj.NAME_EXTERN {
  1086  			continue
  1087  		}
  1088  
  1089  		// from vs to doesn't matter for registers.
  1090  		r := f.Data.(*Reg)
  1091  		r.use1.b[0] |= p.Info.Reguse | p.Info.Regindex
  1092  		r.set.b[0] |= p.Info.Regset
  1093  
  1094  		bit := mkvar(f, &p.From)
  1095  		if bany(&bit) {
  1096  			if p.Info.Flags&LeftAddr != 0 {
  1097  				setaddrs(bit)
  1098  			}
  1099  			if p.Info.Flags&LeftRead != 0 {
  1100  				for z := 0; z < BITS; z++ {
  1101  					r.use1.b[z] |= bit.b[z]
  1102  				}
  1103  			}
  1104  			if p.Info.Flags&LeftWrite != 0 {
  1105  				for z := 0; z < BITS; z++ {
  1106  					r.set.b[z] |= bit.b[z]
  1107  				}
  1108  			}
  1109  		}
  1110  
  1111  		// Compute used register for reg
  1112  		if p.Info.Flags&RegRead != 0 {
  1113  			r.use1.b[0] |= Thearch.RtoB(int(p.Reg))
  1114  		}
  1115  
  1116  		// Currently we never generate three register forms.
  1117  		// If we do, this will need to change.
  1118  		if p.From3Type() != obj.TYPE_NONE && p.From3Type() != obj.TYPE_CONST {
  1119  			Fatalf("regopt not implemented for from3")
  1120  		}
  1121  
  1122  		bit = mkvar(f, &p.To)
  1123  		if bany(&bit) {
  1124  			if p.Info.Flags&RightAddr != 0 {
  1125  				setaddrs(bit)
  1126  			}
  1127  			if p.Info.Flags&RightRead != 0 {
  1128  				for z := 0; z < BITS; z++ {
  1129  					r.use2.b[z] |= bit.b[z]
  1130  				}
  1131  			}
  1132  			if p.Info.Flags&RightWrite != 0 {
  1133  				for z := 0; z < BITS; z++ {
  1134  					r.set.b[z] |= bit.b[z]
  1135  				}
  1136  			}
  1137  		}
  1138  	}
  1139  
  1140  	for i := 0; i < nvar; i++ {
  1141  		v := &vars[i]
  1142  		if v.addr != 0 {
  1143  			bit := blsh(uint(i))
  1144  			for z := 0; z < BITS; z++ {
  1145  				addrs.b[z] |= bit.b[z]
  1146  			}
  1147  		}
  1148  
  1149  		if Debug['R'] != 0 && Debug['v'] != 0 {
  1150  			fmt.Printf("bit=%2d addr=%d et=%v w=%-2d s=%v + %d\n", i, v.addr, v.etype, v.width, v.node, v.offset)
  1151  		}
  1152  	}
  1153  
  1154  	if Debug['R'] != 0 && Debug['v'] != 0 {
  1155  		Dumpit("pass1", firstf, 1)
  1156  	}
  1157  
  1158  	// pass 2
  1159  	// find looping structure
  1160  	flowrpo(g)
  1161  
  1162  	if Debug['R'] != 0 && Debug['v'] != 0 {
  1163  		Dumpit("pass2", firstf, 1)
  1164  	}
  1165  
  1166  	// pass 2.5
  1167  	// iterate propagating fat vardef covering forward
  1168  	// r->act records vars with a VARDEF since the last CALL.
  1169  	// (r->act will be reused in pass 5 for something else,
  1170  	// but we'll be done with it by then.)
  1171  	active := 0
  1172  
  1173  	for f := firstf; f != nil; f = f.Link {
  1174  		f.Active = 0
  1175  		r := f.Data.(*Reg)
  1176  		r.act = zbits
  1177  	}
  1178  
  1179  	for f := firstf; f != nil; f = f.Link {
  1180  		p := f.Prog
  1181  		if p.As == obj.AVARDEF && Isfat(((p.To.Node).(*Node)).Type) && ((p.To.Node).(*Node)).Opt() != nil {
  1182  			active++
  1183  			walkvardef(p.To.Node.(*Node), f, active)
  1184  		}
  1185  	}
  1186  
  1187  	// pass 3
  1188  	// iterate propagating usage
  1189  	// 	back until flow graph is complete
  1190  	var f1 *Flow
  1191  	var i int
  1192  	var f *Flow
  1193  loop1:
  1194  	change = 0
  1195  
  1196  	for f = firstf; f != nil; f = f.Link {
  1197  		f.Active = 0
  1198  	}
  1199  	for f = firstf; f != nil; f = f.Link {
  1200  		if f.Prog.As == obj.ARET {
  1201  			prop(f, zbits, zbits)
  1202  		}
  1203  	}
  1204  
  1205  	// pick up unreachable code
  1206  loop11:
  1207  	i = 0
  1208  
  1209  	for f = firstf; f != nil; f = f1 {
  1210  		f1 = f.Link
  1211  		if f1 != nil && f1.Active != 0 && f.Active == 0 {
  1212  			prop(f, zbits, zbits)
  1213  			i = 1
  1214  		}
  1215  	}
  1216  
  1217  	if i != 0 {
  1218  		goto loop11
  1219  	}
  1220  	if change != 0 {
  1221  		goto loop1
  1222  	}
  1223  
  1224  	if Debug['R'] != 0 && Debug['v'] != 0 {
  1225  		Dumpit("pass3", firstf, 1)
  1226  	}
  1227  
  1228  	// pass 4
  1229  	// iterate propagating register/variable synchrony
  1230  	// 	forward until graph is complete
  1231  loop2:
  1232  	change = 0
  1233  
  1234  	for f = firstf; f != nil; f = f.Link {
  1235  		f.Active = 0
  1236  	}
  1237  	synch(firstf, zbits)
  1238  	if change != 0 {
  1239  		goto loop2
  1240  	}
  1241  
  1242  	if Debug['R'] != 0 && Debug['v'] != 0 {
  1243  		Dumpit("pass4", firstf, 1)
  1244  	}
  1245  
  1246  	// pass 4.5
  1247  	// move register pseudo-variables into regu.
  1248  	mask := uint64((1 << uint(nreg)) - 1)
  1249  	for f := firstf; f != nil; f = f.Link {
  1250  		r := f.Data.(*Reg)
  1251  		r.regu = (r.refbehind.b[0] | r.set.b[0]) & mask
  1252  		r.set.b[0] &^= mask
  1253  		r.use1.b[0] &^= mask
  1254  		r.use2.b[0] &^= mask
  1255  		r.refbehind.b[0] &^= mask
  1256  		r.refahead.b[0] &^= mask
  1257  		r.calbehind.b[0] &^= mask
  1258  		r.calahead.b[0] &^= mask
  1259  		r.regdiff.b[0] &^= mask
  1260  		r.act.b[0] &^= mask
  1261  	}
  1262  
  1263  	if Debug['R'] != 0 && Debug['v'] != 0 {
  1264  		Dumpit("pass4.5", firstf, 1)
  1265  	}
  1266  
  1267  	// pass 5
  1268  	// isolate regions
  1269  	// calculate costs (paint1)
  1270  	var bit Bits
  1271  	if f := firstf; f != nil {
  1272  		r := f.Data.(*Reg)
  1273  		for z := 0; z < BITS; z++ {
  1274  			bit.b[z] = (r.refahead.b[z] | r.calahead.b[z]) &^ (externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z])
  1275  		}
  1276  		if bany(&bit) && !f.Refset {
  1277  			// should never happen - all variables are preset
  1278  			if Debug['w'] != 0 {
  1279  				fmt.Printf("%v: used and not set: %v\n", f.Prog.Line(), &bit)
  1280  			}
  1281  			f.Refset = true
  1282  		}
  1283  	}
  1284  
  1285  	for f := firstf; f != nil; f = f.Link {
  1286  		(f.Data.(*Reg)).act = zbits
  1287  	}
  1288  	nregion = 0
  1289  	region = region[:0]
  1290  	for f := firstf; f != nil; f = f.Link {
  1291  		r := f.Data.(*Reg)
  1292  		for z := 0; z < BITS; z++ {
  1293  			bit.b[z] = r.set.b[z] &^ (r.refahead.b[z] | r.calahead.b[z] | addrs.b[z])
  1294  		}
  1295  		if bany(&bit) && !f.Refset {
  1296  			if Debug['w'] != 0 {
  1297  				fmt.Printf("%v: set and not used: %v\n", f.Prog.Line(), &bit)
  1298  			}
  1299  			f.Refset = true
  1300  			Thearch.Excise(f)
  1301  		}
  1302  
  1303  		for z := 0; z < BITS; z++ {
  1304  			bit.b[z] = LOAD(r, z) &^ (r.act.b[z] | addrs.b[z])
  1305  		}
  1306  		for bany(&bit) {
  1307  			i = bnum(&bit)
  1308  			change = 0
  1309  			paint1(f, i)
  1310  			biclr(&bit, uint(i))
  1311  			if change <= 0 {
  1312  				continue
  1313  			}
  1314  			if nregion >= MaxRgn {
  1315  				nregion++
  1316  				continue
  1317  			}
  1318  
  1319  			region = append(region, Rgn{
  1320  				enter: f,
  1321  				cost:  int16(change),
  1322  				varno: int16(i),
  1323  			})
  1324  			nregion++
  1325  		}
  1326  	}
  1327  
  1328  	if false && Debug['v'] != 0 && strings.Contains(Curfn.Func.Nname.Sym.Name, "Parse") {
  1329  		Warn("regions: %d\n", nregion)
  1330  	}
  1331  	if nregion >= MaxRgn {
  1332  		if Debug['v'] != 0 {
  1333  			Warn("too many regions: %d\n", nregion)
  1334  		}
  1335  		nregion = MaxRgn
  1336  	}
  1337  
  1338  	sort.Sort(rcmp(region[:nregion]))
  1339  
  1340  	if Debug['R'] != 0 && Debug['v'] != 0 {
  1341  		Dumpit("pass5", firstf, 1)
  1342  	}
  1343  
  1344  	// pass 6
  1345  	// determine used registers (paint2)
  1346  	// replace code (paint3)
  1347  	if Debug['R'] != 0 && Debug['v'] != 0 {
  1348  		fmt.Printf("\nregisterizing\n")
  1349  	}
  1350  	for i := 0; i < nregion; i++ {
  1351  		rgp := &region[i]
  1352  		if Debug['R'] != 0 && Debug['v'] != 0 {
  1353  			fmt.Printf("region %d: cost %d varno %d enter %d\n", i, rgp.cost, rgp.varno, rgp.enter.Prog.Pc)
  1354  		}
  1355  		bit = blsh(uint(rgp.varno))
  1356  		usedreg := paint2(rgp.enter, int(rgp.varno), 0)
  1357  		vreg := allreg(usedreg, rgp)
  1358  		if rgp.regno != 0 {
  1359  			if Debug['R'] != 0 && Debug['v'] != 0 {
  1360  				v := &vars[rgp.varno]
  1361  				fmt.Printf("registerize %v+%d (bit=%2d et=%v) in %v usedreg=%#x vreg=%#x\n", v.node, v.offset, rgp.varno, v.etype, obj.Rconv(int(rgp.regno)), usedreg, vreg)
  1362  			}
  1363  
  1364  			paint3(rgp.enter, int(rgp.varno), vreg, int(rgp.regno))
  1365  		}
  1366  	}
  1367  
  1368  	// free aux structures. peep allocates new ones.
  1369  	for i := 0; i < nvar; i++ {
  1370  		vars[i].node.SetOpt(nil)
  1371  	}
  1372  	Flowend(g)
  1373  	firstf = nil
  1374  
  1375  	if Debug['R'] != 0 && Debug['v'] != 0 {
  1376  		// Rebuild flow graph, since we inserted instructions
  1377  		g := Flowstart(firstp, nil)
  1378  		firstf = g.Start
  1379  		Dumpit("pass6", firstf, 0)
  1380  		Flowend(g)
  1381  		firstf = nil
  1382  	}
  1383  
  1384  	// pass 7
  1385  	// peep-hole on basic block
  1386  	if Debug['R'] == 0 || Debug['P'] != 0 {
  1387  		Thearch.Peep(firstp)
  1388  	}
  1389  
  1390  	// eliminate nops
  1391  	for p := firstp; p != nil; p = p.Link {
  1392  		for p.Link != nil && p.Link.As == obj.ANOP {
  1393  			p.Link = p.Link.Link
  1394  		}
  1395  		if p.To.Type == obj.TYPE_BRANCH {
  1396  			for p.To.Val.(*obj.Prog) != nil && p.To.Val.(*obj.Prog).As == obj.ANOP {
  1397  				p.To.Val = p.To.Val.(*obj.Prog).Link
  1398  			}
  1399  		}
  1400  	}
  1401  
  1402  	if Debug['R'] != 0 {
  1403  		if Ostats.Ncvtreg != 0 || Ostats.Nspill != 0 || Ostats.Nreload != 0 || Ostats.Ndelmov != 0 || Ostats.Nvar != 0 || Ostats.Naddr != 0 || false {
  1404  			fmt.Printf("\nstats\n")
  1405  		}
  1406  
  1407  		if Ostats.Ncvtreg != 0 {
  1408  			fmt.Printf("\t%4d cvtreg\n", Ostats.Ncvtreg)
  1409  		}
  1410  		if Ostats.Nspill != 0 {
  1411  			fmt.Printf("\t%4d spill\n", Ostats.Nspill)
  1412  		}
  1413  		if Ostats.Nreload != 0 {
  1414  			fmt.Printf("\t%4d reload\n", Ostats.Nreload)
  1415  		}
  1416  		if Ostats.Ndelmov != 0 {
  1417  			fmt.Printf("\t%4d delmov\n", Ostats.Ndelmov)
  1418  		}
  1419  		if Ostats.Nvar != 0 {
  1420  			fmt.Printf("\t%4d var\n", Ostats.Nvar)
  1421  		}
  1422  		if Ostats.Naddr != 0 {
  1423  			fmt.Printf("\t%4d addr\n", Ostats.Naddr)
  1424  		}
  1425  
  1426  		Ostats = OptStats{}
  1427  	}
  1428  }
  1429  
  1430  // bany reports whether any bits in a are set.
  1431  func bany(a *Bits) bool {
  1432  	for _, x := range &a.b { // & to avoid making a copy of a.b
  1433  		if x != 0 {
  1434  			return true
  1435  		}
  1436  	}
  1437  	return false
  1438  }
  1439  
  1440  // bnum reports the lowest index of a 1 bit in a.
  1441  func bnum(a *Bits) int {
  1442  	for i, x := range &a.b { // & to avoid making a copy of a.b
  1443  		if x != 0 {
  1444  			return 64*i + Bitno(x)
  1445  		}
  1446  	}
  1447  
  1448  	Fatalf("bad in bnum")
  1449  	return 0
  1450  }
  1451  
  1452  // blsh returns a Bits with 1 at index n, 0 elsewhere (1<<n).
  1453  func blsh(n uint) Bits {
  1454  	c := zbits
  1455  	c.b[n/64] = 1 << (n % 64)
  1456  	return c
  1457  }
  1458  
  1459  // btest reports whether bit n is 1.
  1460  func btest(a *Bits, n uint) bool {
  1461  	return a.b[n/64]&(1<<(n%64)) != 0
  1462  }
  1463  
  1464  // biset sets bit n to 1.
  1465  func biset(a *Bits, n uint) {
  1466  	a.b[n/64] |= 1 << (n % 64)
  1467  }
  1468  
  1469  // biclr sets bit n to 0.
  1470  func biclr(a *Bits, n uint) {
  1471  	a.b[n/64] &^= (1 << (n % 64))
  1472  }
  1473  
  1474  // Bitno reports the lowest index of a 1 bit in b.
  1475  // It calls Fatalf if there is no 1 bit.
  1476  func Bitno(b uint64) int {
  1477  	if b == 0 {
  1478  		Fatalf("bad in bitno")
  1479  	}
  1480  	n := 0
  1481  	if b&(1<<32-1) == 0 {
  1482  		n += 32
  1483  		b >>= 32
  1484  	}
  1485  	if b&(1<<16-1) == 0 {
  1486  		n += 16
  1487  		b >>= 16
  1488  	}
  1489  	if b&(1<<8-1) == 0 {
  1490  		n += 8
  1491  		b >>= 8
  1492  	}
  1493  	if b&(1<<4-1) == 0 {
  1494  		n += 4
  1495  		b >>= 4
  1496  	}
  1497  	if b&(1<<2-1) == 0 {
  1498  		n += 2
  1499  		b >>= 2
  1500  	}
  1501  	if b&1 == 0 {
  1502  		n++
  1503  	}
  1504  	return n
  1505  }
  1506  
  1507  // String returns a space-separated list of the variables represented by bits.
  1508  func (bits Bits) String() string {
  1509  	// Note: This method takes a value receiver, both for convenience
  1510  	// and to make it safe to modify the bits as we process them.
  1511  	// Even so, most prints above use &bits, because then the value
  1512  	// being stored in the interface{} is a pointer and does not require
  1513  	// an allocation and copy to create the interface{}.
  1514  	var buf bytes.Buffer
  1515  	sep := ""
  1516  	for bany(&bits) {
  1517  		i := bnum(&bits)
  1518  		buf.WriteString(sep)
  1519  		sep = " "
  1520  		v := &vars[i]
  1521  		if v.node == nil || v.node.Sym == nil {
  1522  			fmt.Fprintf(&buf, "$%d", i)
  1523  		} else {
  1524  			fmt.Fprintf(&buf, "%s(%d)", v.node.Sym.Name, i)
  1525  			if v.offset != 0 {
  1526  				fmt.Fprintf(&buf, "%+d", v.offset)
  1527  			}
  1528  		}
  1529  		biclr(&bits, uint(i))
  1530  	}
  1531  	return buf.String()
  1532  }