github.com/q45/go@v0.0.0-20151101211701-a4fb8c13db3f/src/cmd/compile/internal/gc/reg.go (about)

     1  // Derived from Inferno utils/6c/reg.c
     2  // http://code.google.com/p/inferno-os/source/browse/utils/6c/reg.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors.  All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  package gc
    32  
    33  import (
    34  	"bytes"
    35  	"cmd/internal/obj"
    36  	"fmt"
    37  	"sort"
    38  	"strings"
    39  )
    40  
    41  // A Var represents a single variable that may be stored in a register.
    42  // That variable may itself correspond to a hardware register,
    43  // to represent the use of registers in the unoptimized instruction stream.
    44  type Var struct {
    45  	offset     int64
    46  	node       *Node
    47  	nextinnode *Var
    48  	width      int
    49  	id         int // index in vars
    50  	name       int8
    51  	etype      EType
    52  	addr       int8
    53  }
    54  
    55  // Bits represents a set of Vars, stored as a bit set of var numbers
    56  // (the index in vars, or equivalently v.id).
    57  type Bits struct {
    58  	b [BITS]uint64
    59  }
    60  
    61  const (
    62  	BITS = 3
    63  	NVAR = BITS * 64
    64  )
    65  
    66  var (
    67  	vars [NVAR]Var // variables under consideration
    68  	nvar int       // number of vars
    69  
    70  	regbits uint64 // bits for hardware registers
    71  
    72  	zbits   Bits // zero
    73  	externs Bits // global variables
    74  	params  Bits // function parameters and results
    75  	ivar    Bits // function parameters (inputs)
    76  	ovar    Bits // function results (outputs)
    77  	consts  Bits // constant values
    78  	addrs   Bits // variables with address taken
    79  )
    80  
    81  // A Reg is a wrapper around a single Prog (one instruction) that holds
    82  // register optimization information while the optimizer runs.
    83  // r->prog is the instruction.
    84  type Reg struct {
    85  	set  Bits // regopt variables written by this instruction.
    86  	use1 Bits // regopt variables read by prog->from.
    87  	use2 Bits // regopt variables read by prog->to.
    88  
    89  	// refahead/refbehind are the regopt variables whose current
    90  	// value may be used in the following/preceding instructions
    91  	// up to a CALL (or the value is clobbered).
    92  	refbehind Bits
    93  	refahead  Bits
    94  
    95  	// calahead/calbehind are similar, but for variables in
    96  	// instructions that are reachable after hitting at least one
    97  	// CALL.
    98  	calbehind Bits
    99  	calahead  Bits
   100  
   101  	regdiff Bits
   102  	act     Bits
   103  	regu    uint64 // register used bitmap
   104  }
   105  
   106  // A Rgn represents a single regopt variable over a region of code
   107  // where a register could potentially be dedicated to that variable.
   108  // The code encompassed by a Rgn is defined by the flow graph,
   109  // starting at enter, flood-filling forward while varno is refahead
   110  // and backward while varno is refbehind, and following branches.
   111  // A single variable may be represented by multiple disjoint Rgns and
   112  // each Rgn may choose a different register for that variable.
   113  // Registers are allocated to regions greedily in order of descending
   114  // cost.
   115  type Rgn struct {
   116  	enter *Flow
   117  	cost  int16
   118  	varno int16
   119  	regno int16
   120  }
   121  
   122  // The Plan 9 C compilers used a limit of 600 regions,
   123  // but the yacc-generated parser in y.go has 3100 regions.
   124  // We set MaxRgn large enough to handle that.
   125  // There's not a huge cost to having too many regions:
   126  // the main processing traces the live area for each variable,
   127  // which is limited by the number of variables times the area,
   128  // not the raw region count. If there are many regions, they
   129  // are almost certainly small and easy to trace.
   130  // The only operation that scales with region count is the
   131  // sorting by cost, which uses sort.Sort and is therefore
   132  // guaranteed n log n.
   133  const MaxRgn = 6000
   134  
   135  var (
   136  	region  []Rgn
   137  	nregion int
   138  )
   139  
   140  type rcmp []Rgn
   141  
   142  func (x rcmp) Len() int {
   143  	return len(x)
   144  }
   145  
   146  func (x rcmp) Swap(i, j int) {
   147  	x[i], x[j] = x[j], x[i]
   148  }
   149  
   150  func (x rcmp) Less(i, j int) bool {
   151  	p1 := &x[i]
   152  	p2 := &x[j]
   153  	if p1.cost != p2.cost {
   154  		return int(p2.cost)-int(p1.cost) < 0
   155  	}
   156  	if p1.varno != p2.varno {
   157  		return int(p2.varno)-int(p1.varno) < 0
   158  	}
   159  	if p1.enter != p2.enter {
   160  		return int(p2.enter.Id-p1.enter.Id) < 0
   161  	}
   162  	return false
   163  }
   164  
   165  func setaddrs(bit Bits) {
   166  	var i int
   167  	var n int
   168  	var v *Var
   169  	var node *Node
   170  
   171  	for bany(&bit) {
   172  		// convert each bit to a variable
   173  		i = bnum(&bit)
   174  
   175  		node = vars[i].node
   176  		n = int(vars[i].name)
   177  		biclr(&bit, uint(i))
   178  
   179  		// disable all pieces of that variable
   180  		for i = 0; i < nvar; i++ {
   181  			v = &vars[i]
   182  			if v.node == node && int(v.name) == n {
   183  				v.addr = 2
   184  			}
   185  		}
   186  	}
   187  }
   188  
   189  var regnodes [64]*Node
   190  
   191  func walkvardef(n *Node, f *Flow, active int) {
   192  	var f1 *Flow
   193  	var bn int
   194  	var v *Var
   195  
   196  	for f1 = f; f1 != nil; f1 = f1.S1 {
   197  		if f1.Active == int32(active) {
   198  			break
   199  		}
   200  		f1.Active = int32(active)
   201  		if f1.Prog.As == obj.AVARKILL && f1.Prog.To.Node == n {
   202  			break
   203  		}
   204  		for v, _ = n.Opt().(*Var); v != nil; v = v.nextinnode {
   205  			bn = v.id
   206  			biset(&(f1.Data.(*Reg)).act, uint(bn))
   207  		}
   208  
   209  		if f1.Prog.As == obj.ACALL {
   210  			break
   211  		}
   212  	}
   213  
   214  	for f2 := f; f2 != f1; f2 = f2.S1 {
   215  		if f2.S2 != nil {
   216  			walkvardef(n, f2.S2, active)
   217  		}
   218  	}
   219  }
   220  
   221  // add mov b,rn
   222  // just after r
   223  func addmove(r *Flow, bn int, rn int, f int) {
   224  	p1 := Ctxt.NewProg()
   225  	Clearp(p1)
   226  	p1.Pc = 9999
   227  
   228  	p := r.Prog
   229  	p1.Link = p.Link
   230  	p.Link = p1
   231  	p1.Lineno = p.Lineno
   232  
   233  	v := &vars[bn]
   234  
   235  	a := &p1.To
   236  	a.Offset = v.offset
   237  	a.Etype = uint8(v.etype)
   238  	a.Type = obj.TYPE_MEM
   239  	a.Name = v.name
   240  	a.Node = v.node
   241  	a.Sym = Linksym(v.node.Sym)
   242  
   243  	/* NOTE(rsc): 9g did
   244  	if(a->etype == TARRAY)
   245  		a->type = TYPE_ADDR;
   246  	else if(a->sym == nil)
   247  		a->type = TYPE_CONST;
   248  	*/
   249  	p1.As = int16(Thearch.Optoas(OAS, Types[uint8(v.etype)]))
   250  
   251  	// TODO(rsc): Remove special case here.
   252  	if (Thearch.Thechar == '5' || Thearch.Thechar == '7' || Thearch.Thechar == '9') && v.etype == TBOOL {
   253  		p1.As = int16(Thearch.Optoas(OAS, Types[TUINT8]))
   254  	}
   255  	p1.From.Type = obj.TYPE_REG
   256  	p1.From.Reg = int16(rn)
   257  	p1.From.Name = obj.NAME_NONE
   258  	if f == 0 {
   259  		p1.From = *a
   260  		*a = obj.Addr{}
   261  		a.Type = obj.TYPE_REG
   262  		a.Reg = int16(rn)
   263  	}
   264  
   265  	if Debug['R'] != 0 && Debug['v'] != 0 {
   266  		fmt.Printf("%v ===add=== %v\n", p, p1)
   267  	}
   268  	Ostats.Nspill++
   269  }
   270  
   271  func overlap_reg(o1 int64, w1 int, o2 int64, w2 int) bool {
   272  	t1 := o1 + int64(w1)
   273  	t2 := o2 + int64(w2)
   274  
   275  	if t1 <= o2 || t2 <= o1 {
   276  		return false
   277  	}
   278  
   279  	return true
   280  }
   281  
   282  func mkvar(f *Flow, a *obj.Addr) Bits {
   283  	// mark registers used
   284  	if a.Type == obj.TYPE_NONE {
   285  		return zbits
   286  	}
   287  
   288  	r := f.Data.(*Reg)
   289  	r.use1.b[0] |= Thearch.Doregbits(int(a.Index)) // TODO: Use RtoB
   290  
   291  	var n int
   292  	switch a.Type {
   293  	default:
   294  		regu := Thearch.Doregbits(int(a.Reg)) | Thearch.RtoB(int(a.Reg)) // TODO: Use RtoB
   295  		if regu == 0 {
   296  			return zbits
   297  		}
   298  		bit := zbits
   299  		bit.b[0] = regu
   300  		return bit
   301  
   302  		// TODO(rsc): Remove special case here.
   303  	case obj.TYPE_ADDR:
   304  		var bit Bits
   305  		if Thearch.Thechar == '5' || Thearch.Thechar == '7' || Thearch.Thechar == '9' {
   306  			goto memcase
   307  		}
   308  		a.Type = obj.TYPE_MEM
   309  		bit = mkvar(f, a)
   310  		setaddrs(bit)
   311  		a.Type = obj.TYPE_ADDR
   312  		Ostats.Naddr++
   313  		return zbits
   314  
   315  	memcase:
   316  		fallthrough
   317  
   318  	case obj.TYPE_MEM:
   319  		if r != nil {
   320  			r.use1.b[0] |= Thearch.RtoB(int(a.Reg))
   321  		}
   322  
   323  		/* NOTE: 5g did
   324  		if(r->f.prog->scond & (C_PBIT|C_WBIT))
   325  			r->set.b[0] |= RtoB(a->reg);
   326  		*/
   327  		switch a.Name {
   328  		default:
   329  			// Note: This case handles NAME_EXTERN and NAME_STATIC.
   330  			// We treat these as requiring eager writes to memory, due to
   331  			// the possibility of a fault handler looking at them, so there is
   332  			// not much point in registerizing the loads.
   333  			// If we later choose the set of candidate variables from a
   334  			// larger list, these cases could be deprioritized instead of
   335  			// removed entirely.
   336  			return zbits
   337  
   338  		case obj.NAME_PARAM,
   339  			obj.NAME_AUTO:
   340  			n = int(a.Name)
   341  		}
   342  	}
   343  
   344  	node, _ := a.Node.(*Node)
   345  	if node == nil || node.Op != ONAME || node.Orig == nil {
   346  		return zbits
   347  	}
   348  	node = node.Orig
   349  	if node.Orig != node {
   350  		Fatalf("%v: bad node", Ctxt.Dconv(a))
   351  	}
   352  	if node.Sym == nil || node.Sym.Name[0] == '.' {
   353  		return zbits
   354  	}
   355  	et := EType(a.Etype)
   356  	o := a.Offset
   357  	w := a.Width
   358  	if w < 0 {
   359  		Fatalf("bad width %d for %v", w, Ctxt.Dconv(a))
   360  	}
   361  
   362  	flag := 0
   363  	var v *Var
   364  	for i := 0; i < nvar; i++ {
   365  		v = &vars[i]
   366  		if v.node == node && int(v.name) == n {
   367  			if v.offset == o {
   368  				if v.etype == et {
   369  					if int64(v.width) == w {
   370  						// TODO(rsc): Remove special case for arm here.
   371  						if flag == 0 || Thearch.Thechar != '5' {
   372  							return blsh(uint(i))
   373  						}
   374  					}
   375  				}
   376  			}
   377  
   378  			// if they overlap, disable both
   379  			if overlap_reg(v.offset, v.width, o, int(w)) {
   380  				//				print("disable overlap %s %d %d %d %d, %E != %E\n", s->name, v->offset, v->width, o, w, v->etype, et);
   381  				v.addr = 1
   382  
   383  				flag = 1
   384  			}
   385  		}
   386  	}
   387  
   388  	switch et {
   389  	case 0, TFUNC:
   390  		return zbits
   391  	}
   392  
   393  	if nvar >= NVAR {
   394  		if Debug['w'] > 1 && node != nil {
   395  			Fatalf("variable not optimized: %v", Nconv(node, obj.FmtSharp))
   396  		}
   397  		if Debug['v'] > 0 {
   398  			Warn("variable not optimized: %v", Nconv(node, obj.FmtSharp))
   399  		}
   400  
   401  		// If we're not tracking a word in a variable, mark the rest as
   402  		// having its address taken, so that we keep the whole thing
   403  		// live at all calls. otherwise we might optimize away part of
   404  		// a variable but not all of it.
   405  		var v *Var
   406  		for i := 0; i < nvar; i++ {
   407  			v = &vars[i]
   408  			if v.node == node {
   409  				v.addr = 1
   410  			}
   411  		}
   412  
   413  		return zbits
   414  	}
   415  
   416  	i := nvar
   417  	nvar++
   418  	v = &vars[i]
   419  	v.id = i
   420  	v.offset = o
   421  	v.name = int8(n)
   422  	v.etype = et
   423  	v.width = int(w)
   424  	v.addr = int8(flag) // funny punning
   425  	v.node = node
   426  
   427  	// node->opt is the head of a linked list
   428  	// of Vars within the given Node, so that
   429  	// we can start at a Var and find all the other
   430  	// Vars in the same Go variable.
   431  	v.nextinnode, _ = node.Opt().(*Var)
   432  
   433  	node.SetOpt(v)
   434  
   435  	bit := blsh(uint(i))
   436  	if n == obj.NAME_EXTERN || n == obj.NAME_STATIC {
   437  		for z := 0; z < BITS; z++ {
   438  			externs.b[z] |= bit.b[z]
   439  		}
   440  	}
   441  	if n == obj.NAME_PARAM {
   442  		for z := 0; z < BITS; z++ {
   443  			params.b[z] |= bit.b[z]
   444  		}
   445  	}
   446  
   447  	if node.Class == PPARAM {
   448  		for z := 0; z < BITS; z++ {
   449  			ivar.b[z] |= bit.b[z]
   450  		}
   451  	}
   452  	if node.Class == PPARAMOUT {
   453  		for z := 0; z < BITS; z++ {
   454  			ovar.b[z] |= bit.b[z]
   455  		}
   456  	}
   457  
   458  	// Treat values with their address taken as live at calls,
   459  	// because the garbage collector's liveness analysis in plive.go does.
   460  	// These must be consistent or else we will elide stores and the garbage
   461  	// collector will see uninitialized data.
   462  	// The typical case where our own analysis is out of sync is when the
   463  	// node appears to have its address taken but that code doesn't actually
   464  	// get generated and therefore doesn't show up as an address being
   465  	// taken when we analyze the instruction stream.
   466  	// One instance of this case is when a closure uses the same name as
   467  	// an outer variable for one of its own variables declared with :=.
   468  	// The parser flags the outer variable as possibly shared, and therefore
   469  	// sets addrtaken, even though it ends up not being actually shared.
   470  	// If we were better about _ elision, _ = &x would suffice too.
   471  	// The broader := in a closure problem is mentioned in a comment in
   472  	// closure.go:/^typecheckclosure and dcl.go:/^oldname.
   473  	if node.Addrtaken {
   474  		v.addr = 1
   475  	}
   476  
   477  	// Disable registerization for globals, because:
   478  	// (1) we might panic at any time and we want the recovery code
   479  	// to see the latest values (issue 1304).
   480  	// (2) we don't know what pointers might point at them and we want
   481  	// loads via those pointers to see updated values and vice versa (issue 7995).
   482  	//
   483  	// Disable registerization for results if using defer, because the deferred func
   484  	// might recover and return, causing the current values to be used.
   485  	if node.Class == PEXTERN || (hasdefer && node.Class == PPARAMOUT) {
   486  		v.addr = 1
   487  	}
   488  
   489  	if Debug['R'] != 0 {
   490  		fmt.Printf("bit=%2d et=%v w=%d+%d %v %v flag=%d\n", i, Econv(et), o, w, Nconv(node, obj.FmtSharp), Ctxt.Dconv(a), v.addr)
   491  	}
   492  	Ostats.Nvar++
   493  
   494  	return bit
   495  }
   496  
   497  var change int
   498  
   499  func prop(f *Flow, ref Bits, cal Bits) {
   500  	var f1 *Flow
   501  	var r1 *Reg
   502  	var z int
   503  	var i int
   504  	var v *Var
   505  	var v1 *Var
   506  
   507  	for f1 = f; f1 != nil; f1 = f1.P1 {
   508  		r1 = f1.Data.(*Reg)
   509  		for z = 0; z < BITS; z++ {
   510  			ref.b[z] |= r1.refahead.b[z]
   511  			if ref.b[z] != r1.refahead.b[z] {
   512  				r1.refahead.b[z] = ref.b[z]
   513  				change = 1
   514  			}
   515  
   516  			cal.b[z] |= r1.calahead.b[z]
   517  			if cal.b[z] != r1.calahead.b[z] {
   518  				r1.calahead.b[z] = cal.b[z]
   519  				change = 1
   520  			}
   521  		}
   522  
   523  		switch f1.Prog.As {
   524  		case obj.ACALL:
   525  			if Noreturn(f1.Prog) {
   526  				break
   527  			}
   528  
   529  			// Mark all input variables (ivar) as used, because that's what the
   530  			// liveness bitmaps say. The liveness bitmaps say that so that a
   531  			// panic will not show stale values in the parameter dump.
   532  			// Mark variables with a recent VARDEF (r1->act) as used,
   533  			// so that the optimizer flushes initializations to memory,
   534  			// so that if a garbage collection happens during this CALL,
   535  			// the collector will see initialized memory. Again this is to
   536  			// match what the liveness bitmaps say.
   537  			for z = 0; z < BITS; z++ {
   538  				cal.b[z] |= ref.b[z] | externs.b[z] | ivar.b[z] | r1.act.b[z]
   539  				ref.b[z] = 0
   540  			}
   541  
   542  			// cal.b is the current approximation of what's live across the call.
   543  			// Every bit in cal.b is a single stack word. For each such word,
   544  			// find all the other tracked stack words in the same Go variable
   545  			// (struct/slice/string/interface) and mark them live too.
   546  			// This is necessary because the liveness analysis for the garbage
   547  			// collector works at variable granularity, not at word granularity.
   548  			// It is fundamental for slice/string/interface: the garbage collector
   549  			// needs the whole value, not just some of the words, in order to
   550  			// interpret the other bits correctly. Specifically, slice needs a consistent
   551  			// ptr and cap, string needs a consistent ptr and len, and interface
   552  			// needs a consistent type word and data word.
   553  			for z = 0; z < BITS; z++ {
   554  				if cal.b[z] == 0 {
   555  					continue
   556  				}
   557  				for i = 0; i < 64; i++ {
   558  					if z*64+i >= nvar || (cal.b[z]>>uint(i))&1 == 0 {
   559  						continue
   560  					}
   561  					v = &vars[z*64+i]
   562  					if v.node.Opt() == nil { // v represents fixed register, not Go variable
   563  						continue
   564  					}
   565  
   566  					// v->node->opt is the head of a linked list of Vars
   567  					// corresponding to tracked words from the Go variable v->node.
   568  					// Walk the list and set all the bits.
   569  					// For a large struct this could end up being quadratic:
   570  					// after the first setting, the outer loop (for z, i) would see a 1 bit
   571  					// for all of the remaining words in the struct, and for each such
   572  					// word would go through and turn on all the bits again.
   573  					// To avoid the quadratic behavior, we only turn on the bits if
   574  					// v is the head of the list or if the head's bit is not yet turned on.
   575  					// This will set the bits at most twice, keeping the overall loop linear.
   576  					v1, _ = v.node.Opt().(*Var)
   577  
   578  					if v == v1 || !btest(&cal, uint(v1.id)) {
   579  						for ; v1 != nil; v1 = v1.nextinnode {
   580  							biset(&cal, uint(v1.id))
   581  						}
   582  					}
   583  				}
   584  			}
   585  
   586  		case obj.ATEXT:
   587  			for z = 0; z < BITS; z++ {
   588  				cal.b[z] = 0
   589  				ref.b[z] = 0
   590  			}
   591  
   592  		case obj.ARET:
   593  			for z = 0; z < BITS; z++ {
   594  				cal.b[z] = externs.b[z] | ovar.b[z]
   595  				ref.b[z] = 0
   596  			}
   597  		}
   598  
   599  		for z = 0; z < BITS; z++ {
   600  			ref.b[z] = ref.b[z]&^r1.set.b[z] | r1.use1.b[z] | r1.use2.b[z]
   601  			cal.b[z] &^= (r1.set.b[z] | r1.use1.b[z] | r1.use2.b[z])
   602  			r1.refbehind.b[z] = ref.b[z]
   603  			r1.calbehind.b[z] = cal.b[z]
   604  		}
   605  
   606  		if f1.Active != 0 {
   607  			break
   608  		}
   609  		f1.Active = 1
   610  	}
   611  
   612  	var r *Reg
   613  	var f2 *Flow
   614  	for ; f != f1; f = f.P1 {
   615  		r = f.Data.(*Reg)
   616  		for f2 = f.P2; f2 != nil; f2 = f2.P2link {
   617  			prop(f2, r.refbehind, r.calbehind)
   618  		}
   619  	}
   620  }
   621  
   622  func synch(f *Flow, dif Bits) {
   623  	var r1 *Reg
   624  	var z int
   625  
   626  	for f1 := f; f1 != nil; f1 = f1.S1 {
   627  		r1 = f1.Data.(*Reg)
   628  		for z = 0; z < BITS; z++ {
   629  			dif.b[z] = dif.b[z]&^(^r1.refbehind.b[z]&r1.refahead.b[z]) | r1.set.b[z] | r1.regdiff.b[z]
   630  			if dif.b[z] != r1.regdiff.b[z] {
   631  				r1.regdiff.b[z] = dif.b[z]
   632  				change = 1
   633  			}
   634  		}
   635  
   636  		if f1.Active != 0 {
   637  			break
   638  		}
   639  		f1.Active = 1
   640  		for z = 0; z < BITS; z++ {
   641  			dif.b[z] &^= (^r1.calbehind.b[z] & r1.calahead.b[z])
   642  		}
   643  		if f1.S2 != nil {
   644  			synch(f1.S2, dif)
   645  		}
   646  	}
   647  }
   648  
   649  func allreg(b uint64, r *Rgn) uint64 {
   650  	v := &vars[r.varno]
   651  	r.regno = 0
   652  	switch v.etype {
   653  	default:
   654  		Fatalf("unknown etype %d/%v", Bitno(b), Econv(v.etype))
   655  
   656  	case TINT8,
   657  		TUINT8,
   658  		TINT16,
   659  		TUINT16,
   660  		TINT32,
   661  		TUINT32,
   662  		TINT64,
   663  		TUINT64,
   664  		TINT,
   665  		TUINT,
   666  		TUINTPTR,
   667  		TBOOL,
   668  		TPTR32,
   669  		TPTR64:
   670  		i := Thearch.BtoR(^b)
   671  		if i != 0 && r.cost > 0 {
   672  			r.regno = int16(i)
   673  			return Thearch.RtoB(i)
   674  		}
   675  
   676  	case TFLOAT32, TFLOAT64:
   677  		i := Thearch.BtoF(^b)
   678  		if i != 0 && r.cost > 0 {
   679  			r.regno = int16(i)
   680  			return Thearch.FtoB(i)
   681  		}
   682  	}
   683  
   684  	return 0
   685  }
   686  
   687  func LOAD(r *Reg, z int) uint64 {
   688  	return ^r.refbehind.b[z] & r.refahead.b[z]
   689  }
   690  
   691  func STORE(r *Reg, z int) uint64 {
   692  	return ^r.calbehind.b[z] & r.calahead.b[z]
   693  }
   694  
   695  // Cost parameters
   696  const (
   697  	CLOAD = 5 // cost of load
   698  	CREF  = 5 // cost of reference if not registerized
   699  	LOOP  = 3 // loop execution count (applied in popt.go)
   700  )
   701  
   702  func paint1(f *Flow, bn int) {
   703  	z := bn / 64
   704  	bb := uint64(1 << uint(bn%64))
   705  	r := f.Data.(*Reg)
   706  	if r.act.b[z]&bb != 0 {
   707  		return
   708  	}
   709  	var f1 *Flow
   710  	var r1 *Reg
   711  	for {
   712  		if r.refbehind.b[z]&bb == 0 {
   713  			break
   714  		}
   715  		f1 = f.P1
   716  		if f1 == nil {
   717  			break
   718  		}
   719  		r1 = f1.Data.(*Reg)
   720  		if r1.refahead.b[z]&bb == 0 {
   721  			break
   722  		}
   723  		if r1.act.b[z]&bb != 0 {
   724  			break
   725  		}
   726  		f = f1
   727  		r = r1
   728  	}
   729  
   730  	if LOAD(r, z)&^(r.set.b[z]&^(r.use1.b[z]|r.use2.b[z]))&bb != 0 {
   731  		change -= CLOAD * int(f.Loop)
   732  	}
   733  
   734  	for {
   735  		r.act.b[z] |= bb
   736  
   737  		if f.Prog.As != obj.ANOP { // don't give credit for NOPs
   738  			if r.use1.b[z]&bb != 0 {
   739  				change += CREF * int(f.Loop)
   740  			}
   741  			if (r.use2.b[z]|r.set.b[z])&bb != 0 {
   742  				change += CREF * int(f.Loop)
   743  			}
   744  		}
   745  
   746  		if STORE(r, z)&r.regdiff.b[z]&bb != 0 {
   747  			change -= CLOAD * int(f.Loop)
   748  		}
   749  
   750  		if r.refbehind.b[z]&bb != 0 {
   751  			for f1 = f.P2; f1 != nil; f1 = f1.P2link {
   752  				if (f1.Data.(*Reg)).refahead.b[z]&bb != 0 {
   753  					paint1(f1, bn)
   754  				}
   755  			}
   756  		}
   757  
   758  		if r.refahead.b[z]&bb == 0 {
   759  			break
   760  		}
   761  		f1 = f.S2
   762  		if f1 != nil {
   763  			if (f1.Data.(*Reg)).refbehind.b[z]&bb != 0 {
   764  				paint1(f1, bn)
   765  			}
   766  		}
   767  		f = f.S1
   768  		if f == nil {
   769  			break
   770  		}
   771  		r = f.Data.(*Reg)
   772  		if r.act.b[z]&bb != 0 {
   773  			break
   774  		}
   775  		if r.refbehind.b[z]&bb == 0 {
   776  			break
   777  		}
   778  	}
   779  }
   780  
   781  func paint2(f *Flow, bn int, depth int) uint64 {
   782  	z := bn / 64
   783  	bb := uint64(1 << uint(bn%64))
   784  	vreg := regbits
   785  	r := f.Data.(*Reg)
   786  	if r.act.b[z]&bb == 0 {
   787  		return vreg
   788  	}
   789  	var r1 *Reg
   790  	var f1 *Flow
   791  	for {
   792  		if r.refbehind.b[z]&bb == 0 {
   793  			break
   794  		}
   795  		f1 = f.P1
   796  		if f1 == nil {
   797  			break
   798  		}
   799  		r1 = f1.Data.(*Reg)
   800  		if r1.refahead.b[z]&bb == 0 {
   801  			break
   802  		}
   803  		if r1.act.b[z]&bb == 0 {
   804  			break
   805  		}
   806  		f = f1
   807  		r = r1
   808  	}
   809  
   810  	for {
   811  		if Debug['R'] != 0 && Debug['v'] != 0 {
   812  			fmt.Printf("  paint2 %d %v\n", depth, f.Prog)
   813  		}
   814  
   815  		r.act.b[z] &^= bb
   816  
   817  		vreg |= r.regu
   818  
   819  		if r.refbehind.b[z]&bb != 0 {
   820  			for f1 = f.P2; f1 != nil; f1 = f1.P2link {
   821  				if (f1.Data.(*Reg)).refahead.b[z]&bb != 0 {
   822  					vreg |= paint2(f1, bn, depth+1)
   823  				}
   824  			}
   825  		}
   826  
   827  		if r.refahead.b[z]&bb == 0 {
   828  			break
   829  		}
   830  		f1 = f.S2
   831  		if f1 != nil {
   832  			if (f1.Data.(*Reg)).refbehind.b[z]&bb != 0 {
   833  				vreg |= paint2(f1, bn, depth+1)
   834  			}
   835  		}
   836  		f = f.S1
   837  		if f == nil {
   838  			break
   839  		}
   840  		r = f.Data.(*Reg)
   841  		if r.act.b[z]&bb == 0 {
   842  			break
   843  		}
   844  		if r.refbehind.b[z]&bb == 0 {
   845  			break
   846  		}
   847  	}
   848  
   849  	return vreg
   850  }
   851  
   852  func paint3(f *Flow, bn int, rb uint64, rn int) {
   853  	z := bn / 64
   854  	bb := uint64(1 << uint(bn%64))
   855  	r := f.Data.(*Reg)
   856  	if r.act.b[z]&bb != 0 {
   857  		return
   858  	}
   859  	var r1 *Reg
   860  	var f1 *Flow
   861  	for {
   862  		if r.refbehind.b[z]&bb == 0 {
   863  			break
   864  		}
   865  		f1 = f.P1
   866  		if f1 == nil {
   867  			break
   868  		}
   869  		r1 = f1.Data.(*Reg)
   870  		if r1.refahead.b[z]&bb == 0 {
   871  			break
   872  		}
   873  		if r1.act.b[z]&bb != 0 {
   874  			break
   875  		}
   876  		f = f1
   877  		r = r1
   878  	}
   879  
   880  	if LOAD(r, z)&^(r.set.b[z]&^(r.use1.b[z]|r.use2.b[z]))&bb != 0 {
   881  		addmove(f, bn, rn, 0)
   882  	}
   883  	var p *obj.Prog
   884  	for {
   885  		r.act.b[z] |= bb
   886  		p = f.Prog
   887  
   888  		if r.use1.b[z]&bb != 0 {
   889  			if Debug['R'] != 0 && Debug['v'] != 0 {
   890  				fmt.Printf("%v", p)
   891  			}
   892  			addreg(&p.From, rn)
   893  			if Debug['R'] != 0 && Debug['v'] != 0 {
   894  				fmt.Printf(" ===change== %v\n", p)
   895  			}
   896  		}
   897  
   898  		if (r.use2.b[z]|r.set.b[z])&bb != 0 {
   899  			if Debug['R'] != 0 && Debug['v'] != 0 {
   900  				fmt.Printf("%v", p)
   901  			}
   902  			addreg(&p.To, rn)
   903  			if Debug['R'] != 0 && Debug['v'] != 0 {
   904  				fmt.Printf(" ===change== %v\n", p)
   905  			}
   906  		}
   907  
   908  		if STORE(r, z)&r.regdiff.b[z]&bb != 0 {
   909  			addmove(f, bn, rn, 1)
   910  		}
   911  		r.regu |= rb
   912  
   913  		if r.refbehind.b[z]&bb != 0 {
   914  			for f1 = f.P2; f1 != nil; f1 = f1.P2link {
   915  				if (f1.Data.(*Reg)).refahead.b[z]&bb != 0 {
   916  					paint3(f1, bn, rb, rn)
   917  				}
   918  			}
   919  		}
   920  
   921  		if r.refahead.b[z]&bb == 0 {
   922  			break
   923  		}
   924  		f1 = f.S2
   925  		if f1 != nil {
   926  			if (f1.Data.(*Reg)).refbehind.b[z]&bb != 0 {
   927  				paint3(f1, bn, rb, rn)
   928  			}
   929  		}
   930  		f = f.S1
   931  		if f == nil {
   932  			break
   933  		}
   934  		r = f.Data.(*Reg)
   935  		if r.act.b[z]&bb != 0 {
   936  			break
   937  		}
   938  		if r.refbehind.b[z]&bb == 0 {
   939  			break
   940  		}
   941  	}
   942  }
   943  
   944  func addreg(a *obj.Addr, rn int) {
   945  	a.Sym = nil
   946  	a.Node = nil
   947  	a.Offset = 0
   948  	a.Type = obj.TYPE_REG
   949  	a.Reg = int16(rn)
   950  	a.Name = 0
   951  
   952  	Ostats.Ncvtreg++
   953  }
   954  
   955  func dumpone(f *Flow, isreg int) {
   956  	fmt.Printf("%d:%v", f.Loop, f.Prog)
   957  	if isreg != 0 {
   958  		r := f.Data.(*Reg)
   959  		var bit Bits
   960  		for z := 0; z < BITS; z++ {
   961  			bit.b[z] = r.set.b[z] | r.use1.b[z] | r.use2.b[z] | r.refbehind.b[z] | r.refahead.b[z] | r.calbehind.b[z] | r.calahead.b[z] | r.regdiff.b[z] | r.act.b[z] | 0
   962  		}
   963  		if bany(&bit) {
   964  			fmt.Printf("\t")
   965  			if bany(&r.set) {
   966  				fmt.Printf(" s:%v", &r.set)
   967  			}
   968  			if bany(&r.use1) {
   969  				fmt.Printf(" u1:%v", &r.use1)
   970  			}
   971  			if bany(&r.use2) {
   972  				fmt.Printf(" u2:%v", &r.use2)
   973  			}
   974  			if bany(&r.refbehind) {
   975  				fmt.Printf(" rb:%v ", &r.refbehind)
   976  			}
   977  			if bany(&r.refahead) {
   978  				fmt.Printf(" ra:%v ", &r.refahead)
   979  			}
   980  			if bany(&r.calbehind) {
   981  				fmt.Printf(" cb:%v ", &r.calbehind)
   982  			}
   983  			if bany(&r.calahead) {
   984  				fmt.Printf(" ca:%v ", &r.calahead)
   985  			}
   986  			if bany(&r.regdiff) {
   987  				fmt.Printf(" d:%v ", &r.regdiff)
   988  			}
   989  			if bany(&r.act) {
   990  				fmt.Printf(" a:%v ", &r.act)
   991  			}
   992  		}
   993  	}
   994  
   995  	fmt.Printf("\n")
   996  }
   997  
   998  func Dumpit(str string, r0 *Flow, isreg int) {
   999  	var r1 *Flow
  1000  
  1001  	fmt.Printf("\n%s\n", str)
  1002  	for r := r0; r != nil; r = r.Link {
  1003  		dumpone(r, isreg)
  1004  		r1 = r.P2
  1005  		if r1 != nil {
  1006  			fmt.Printf("\tpred:")
  1007  			for ; r1 != nil; r1 = r1.P2link {
  1008  				fmt.Printf(" %.4d", uint(int(r1.Prog.Pc)))
  1009  			}
  1010  			if r.P1 != nil {
  1011  				fmt.Printf(" (and %.4d)", uint(int(r.P1.Prog.Pc)))
  1012  			} else {
  1013  				fmt.Printf(" (only)")
  1014  			}
  1015  			fmt.Printf("\n")
  1016  		}
  1017  
  1018  		// Print successors if it's not just the next one
  1019  		if r.S1 != r.Link || r.S2 != nil {
  1020  			fmt.Printf("\tsucc:")
  1021  			if r.S1 != nil {
  1022  				fmt.Printf(" %.4d", uint(int(r.S1.Prog.Pc)))
  1023  			}
  1024  			if r.S2 != nil {
  1025  				fmt.Printf(" %.4d", uint(int(r.S2.Prog.Pc)))
  1026  			}
  1027  			fmt.Printf("\n")
  1028  		}
  1029  	}
  1030  }
  1031  
  1032  func regopt(firstp *obj.Prog) {
  1033  	mergetemp(firstp)
  1034  
  1035  	// control flow is more complicated in generated go code
  1036  	// than in generated c code.  define pseudo-variables for
  1037  	// registers, so we have complete register usage information.
  1038  	var nreg int
  1039  	regnames := Thearch.Regnames(&nreg)
  1040  
  1041  	nvar = nreg
  1042  	for i := 0; i < nreg; i++ {
  1043  		vars[i] = Var{}
  1044  	}
  1045  	for i := 0; i < nreg; i++ {
  1046  		if regnodes[i] == nil {
  1047  			regnodes[i] = newname(Lookup(regnames[i]))
  1048  		}
  1049  		vars[i].node = regnodes[i]
  1050  	}
  1051  
  1052  	regbits = Thearch.Excludedregs()
  1053  	externs = zbits
  1054  	params = zbits
  1055  	consts = zbits
  1056  	addrs = zbits
  1057  	ivar = zbits
  1058  	ovar = zbits
  1059  
  1060  	// pass 1
  1061  	// build aux data structure
  1062  	// allocate pcs
  1063  	// find use and set of variables
  1064  	g := Flowstart(firstp, func() interface{} { return new(Reg) })
  1065  	if g == nil {
  1066  		for i := 0; i < nvar; i++ {
  1067  			vars[i].node.SetOpt(nil)
  1068  		}
  1069  		return
  1070  	}
  1071  
  1072  	firstf := g.Start
  1073  
  1074  	for f := firstf; f != nil; f = f.Link {
  1075  		p := f.Prog
  1076  		if p.As == obj.AVARDEF || p.As == obj.AVARKILL {
  1077  			continue
  1078  		}
  1079  
  1080  		// Avoid making variables for direct-called functions.
  1081  		if p.As == obj.ACALL && p.To.Type == obj.TYPE_MEM && p.To.Name == obj.NAME_EXTERN {
  1082  			continue
  1083  		}
  1084  
  1085  		// from vs to doesn't matter for registers.
  1086  		r := f.Data.(*Reg)
  1087  		r.use1.b[0] |= p.Info.Reguse | p.Info.Regindex
  1088  		r.set.b[0] |= p.Info.Regset
  1089  
  1090  		bit := mkvar(f, &p.From)
  1091  		if bany(&bit) {
  1092  			if p.Info.Flags&LeftAddr != 0 {
  1093  				setaddrs(bit)
  1094  			}
  1095  			if p.Info.Flags&LeftRead != 0 {
  1096  				for z := 0; z < BITS; z++ {
  1097  					r.use1.b[z] |= bit.b[z]
  1098  				}
  1099  			}
  1100  			if p.Info.Flags&LeftWrite != 0 {
  1101  				for z := 0; z < BITS; z++ {
  1102  					r.set.b[z] |= bit.b[z]
  1103  				}
  1104  			}
  1105  		}
  1106  
  1107  		// Compute used register for reg
  1108  		if p.Info.Flags&RegRead != 0 {
  1109  			r.use1.b[0] |= Thearch.RtoB(int(p.Reg))
  1110  		}
  1111  
  1112  		// Currently we never generate three register forms.
  1113  		// If we do, this will need to change.
  1114  		if p.From3Type() != obj.TYPE_NONE {
  1115  			Fatalf("regopt not implemented for from3")
  1116  		}
  1117  
  1118  		bit = mkvar(f, &p.To)
  1119  		if bany(&bit) {
  1120  			if p.Info.Flags&RightAddr != 0 {
  1121  				setaddrs(bit)
  1122  			}
  1123  			if p.Info.Flags&RightRead != 0 {
  1124  				for z := 0; z < BITS; z++ {
  1125  					r.use2.b[z] |= bit.b[z]
  1126  				}
  1127  			}
  1128  			if p.Info.Flags&RightWrite != 0 {
  1129  				for z := 0; z < BITS; z++ {
  1130  					r.set.b[z] |= bit.b[z]
  1131  				}
  1132  			}
  1133  		}
  1134  	}
  1135  
  1136  	for i := 0; i < nvar; i++ {
  1137  		v := &vars[i]
  1138  		if v.addr != 0 {
  1139  			bit := blsh(uint(i))
  1140  			for z := 0; z < BITS; z++ {
  1141  				addrs.b[z] |= bit.b[z]
  1142  			}
  1143  		}
  1144  
  1145  		if Debug['R'] != 0 && Debug['v'] != 0 {
  1146  			fmt.Printf("bit=%2d addr=%d et=%v w=%-2d s=%v + %d\n", i, v.addr, Econv(v.etype), v.width, v.node, v.offset)
  1147  		}
  1148  	}
  1149  
  1150  	if Debug['R'] != 0 && Debug['v'] != 0 {
  1151  		Dumpit("pass1", firstf, 1)
  1152  	}
  1153  
  1154  	// pass 2
  1155  	// find looping structure
  1156  	flowrpo(g)
  1157  
  1158  	if Debug['R'] != 0 && Debug['v'] != 0 {
  1159  		Dumpit("pass2", firstf, 1)
  1160  	}
  1161  
  1162  	// pass 2.5
  1163  	// iterate propagating fat vardef covering forward
  1164  	// r->act records vars with a VARDEF since the last CALL.
  1165  	// (r->act will be reused in pass 5 for something else,
  1166  	// but we'll be done with it by then.)
  1167  	active := 0
  1168  
  1169  	for f := firstf; f != nil; f = f.Link {
  1170  		f.Active = 0
  1171  		r := f.Data.(*Reg)
  1172  		r.act = zbits
  1173  	}
  1174  
  1175  	for f := firstf; f != nil; f = f.Link {
  1176  		p := f.Prog
  1177  		if p.As == obj.AVARDEF && Isfat(((p.To.Node).(*Node)).Type) && ((p.To.Node).(*Node)).Opt() != nil {
  1178  			active++
  1179  			walkvardef(p.To.Node.(*Node), f, active)
  1180  		}
  1181  	}
  1182  
  1183  	// pass 3
  1184  	// iterate propagating usage
  1185  	// 	back until flow graph is complete
  1186  	var f1 *Flow
  1187  	var i int
  1188  	var f *Flow
  1189  loop1:
  1190  	change = 0
  1191  
  1192  	for f = firstf; f != nil; f = f.Link {
  1193  		f.Active = 0
  1194  	}
  1195  	for f = firstf; f != nil; f = f.Link {
  1196  		if f.Prog.As == obj.ARET {
  1197  			prop(f, zbits, zbits)
  1198  		}
  1199  	}
  1200  
  1201  	// pick up unreachable code
  1202  loop11:
  1203  	i = 0
  1204  
  1205  	for f = firstf; f != nil; f = f1 {
  1206  		f1 = f.Link
  1207  		if f1 != nil && f1.Active != 0 && f.Active == 0 {
  1208  			prop(f, zbits, zbits)
  1209  			i = 1
  1210  		}
  1211  	}
  1212  
  1213  	if i != 0 {
  1214  		goto loop11
  1215  	}
  1216  	if change != 0 {
  1217  		goto loop1
  1218  	}
  1219  
  1220  	if Debug['R'] != 0 && Debug['v'] != 0 {
  1221  		Dumpit("pass3", firstf, 1)
  1222  	}
  1223  
  1224  	// pass 4
  1225  	// iterate propagating register/variable synchrony
  1226  	// 	forward until graph is complete
  1227  loop2:
  1228  	change = 0
  1229  
  1230  	for f = firstf; f != nil; f = f.Link {
  1231  		f.Active = 0
  1232  	}
  1233  	synch(firstf, zbits)
  1234  	if change != 0 {
  1235  		goto loop2
  1236  	}
  1237  
  1238  	if Debug['R'] != 0 && Debug['v'] != 0 {
  1239  		Dumpit("pass4", firstf, 1)
  1240  	}
  1241  
  1242  	// pass 4.5
  1243  	// move register pseudo-variables into regu.
  1244  	mask := uint64((1 << uint(nreg)) - 1)
  1245  	for f := firstf; f != nil; f = f.Link {
  1246  		r := f.Data.(*Reg)
  1247  		r.regu = (r.refbehind.b[0] | r.set.b[0]) & mask
  1248  		r.set.b[0] &^= mask
  1249  		r.use1.b[0] &^= mask
  1250  		r.use2.b[0] &^= mask
  1251  		r.refbehind.b[0] &^= mask
  1252  		r.refahead.b[0] &^= mask
  1253  		r.calbehind.b[0] &^= mask
  1254  		r.calahead.b[0] &^= mask
  1255  		r.regdiff.b[0] &^= mask
  1256  		r.act.b[0] &^= mask
  1257  	}
  1258  
  1259  	if Debug['R'] != 0 && Debug['v'] != 0 {
  1260  		Dumpit("pass4.5", firstf, 1)
  1261  	}
  1262  
  1263  	// pass 5
  1264  	// isolate regions
  1265  	// calculate costs (paint1)
  1266  	var bit Bits
  1267  	if f := firstf; f != nil {
  1268  		r := f.Data.(*Reg)
  1269  		for z := 0; z < BITS; z++ {
  1270  			bit.b[z] = (r.refahead.b[z] | r.calahead.b[z]) &^ (externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z])
  1271  		}
  1272  		if bany(&bit) && !f.Refset {
  1273  			// should never happen - all variables are preset
  1274  			if Debug['w'] != 0 {
  1275  				fmt.Printf("%v: used and not set: %v\n", f.Prog.Line(), &bit)
  1276  			}
  1277  			f.Refset = true
  1278  		}
  1279  	}
  1280  
  1281  	for f := firstf; f != nil; f = f.Link {
  1282  		(f.Data.(*Reg)).act = zbits
  1283  	}
  1284  	nregion = 0
  1285  	region = region[:0]
  1286  	var rgp *Rgn
  1287  	for f := firstf; f != nil; f = f.Link {
  1288  		r := f.Data.(*Reg)
  1289  		for z := 0; z < BITS; z++ {
  1290  			bit.b[z] = r.set.b[z] &^ (r.refahead.b[z] | r.calahead.b[z] | addrs.b[z])
  1291  		}
  1292  		if bany(&bit) && !f.Refset {
  1293  			if Debug['w'] != 0 {
  1294  				fmt.Printf("%v: set and not used: %v\n", f.Prog.Line(), &bit)
  1295  			}
  1296  			f.Refset = true
  1297  			Thearch.Excise(f)
  1298  		}
  1299  
  1300  		for z := 0; z < BITS; z++ {
  1301  			bit.b[z] = LOAD(r, z) &^ (r.act.b[z] | addrs.b[z])
  1302  		}
  1303  		for bany(&bit) {
  1304  			i = bnum(&bit)
  1305  			change = 0
  1306  			paint1(f, i)
  1307  			biclr(&bit, uint(i))
  1308  			if change <= 0 {
  1309  				continue
  1310  			}
  1311  			if nregion >= MaxRgn {
  1312  				nregion++
  1313  				continue
  1314  			}
  1315  
  1316  			region = append(region, Rgn{
  1317  				enter: f,
  1318  				cost:  int16(change),
  1319  				varno: int16(i),
  1320  			})
  1321  			nregion++
  1322  		}
  1323  	}
  1324  
  1325  	if false && Debug['v'] != 0 && strings.Contains(Curfn.Func.Nname.Sym.Name, "Parse") {
  1326  		Warn("regions: %d\n", nregion)
  1327  	}
  1328  	if nregion >= MaxRgn {
  1329  		if Debug['v'] != 0 {
  1330  			Warn("too many regions: %d\n", nregion)
  1331  		}
  1332  		nregion = MaxRgn
  1333  	}
  1334  
  1335  	sort.Sort(rcmp(region[:nregion]))
  1336  
  1337  	if Debug['R'] != 0 && Debug['v'] != 0 {
  1338  		Dumpit("pass5", firstf, 1)
  1339  	}
  1340  
  1341  	// pass 6
  1342  	// determine used registers (paint2)
  1343  	// replace code (paint3)
  1344  	if Debug['R'] != 0 && Debug['v'] != 0 {
  1345  		fmt.Printf("\nregisterizing\n")
  1346  	}
  1347  	var usedreg uint64
  1348  	var vreg uint64
  1349  	for i := 0; i < nregion; i++ {
  1350  		rgp = &region[i]
  1351  		if Debug['R'] != 0 && Debug['v'] != 0 {
  1352  			fmt.Printf("region %d: cost %d varno %d enter %d\n", i, rgp.cost, rgp.varno, rgp.enter.Prog.Pc)
  1353  		}
  1354  		bit = blsh(uint(rgp.varno))
  1355  		usedreg = paint2(rgp.enter, int(rgp.varno), 0)
  1356  		vreg = allreg(usedreg, rgp)
  1357  		if rgp.regno != 0 {
  1358  			if Debug['R'] != 0 && Debug['v'] != 0 {
  1359  				v := &vars[rgp.varno]
  1360  				fmt.Printf("registerize %v+%d (bit=%2d et=%v) in %v usedreg=%#x vreg=%#x\n", v.node, v.offset, rgp.varno, Econv(v.etype), obj.Rconv(int(rgp.regno)), usedreg, vreg)
  1361  			}
  1362  
  1363  			paint3(rgp.enter, int(rgp.varno), vreg, int(rgp.regno))
  1364  		}
  1365  	}
  1366  
  1367  	// free aux structures. peep allocates new ones.
  1368  	for i := 0; i < nvar; i++ {
  1369  		vars[i].node.SetOpt(nil)
  1370  	}
  1371  	Flowend(g)
  1372  	firstf = nil
  1373  
  1374  	if Debug['R'] != 0 && Debug['v'] != 0 {
  1375  		// Rebuild flow graph, since we inserted instructions
  1376  		g := Flowstart(firstp, nil)
  1377  		firstf = g.Start
  1378  		Dumpit("pass6", firstf, 0)
  1379  		Flowend(g)
  1380  		firstf = nil
  1381  	}
  1382  
  1383  	// pass 7
  1384  	// peep-hole on basic block
  1385  	if Debug['R'] == 0 || Debug['P'] != 0 {
  1386  		Thearch.Peep(firstp)
  1387  	}
  1388  
  1389  	// eliminate nops
  1390  	for p := firstp; p != nil; p = p.Link {
  1391  		for p.Link != nil && p.Link.As == obj.ANOP {
  1392  			p.Link = p.Link.Link
  1393  		}
  1394  		if p.To.Type == obj.TYPE_BRANCH {
  1395  			for p.To.Val.(*obj.Prog) != nil && p.To.Val.(*obj.Prog).As == obj.ANOP {
  1396  				p.To.Val = p.To.Val.(*obj.Prog).Link
  1397  			}
  1398  		}
  1399  	}
  1400  
  1401  	if Debug['R'] != 0 {
  1402  		if Ostats.Ncvtreg != 0 || Ostats.Nspill != 0 || Ostats.Nreload != 0 || Ostats.Ndelmov != 0 || Ostats.Nvar != 0 || Ostats.Naddr != 0 || false {
  1403  			fmt.Printf("\nstats\n")
  1404  		}
  1405  
  1406  		if Ostats.Ncvtreg != 0 {
  1407  			fmt.Printf("\t%4d cvtreg\n", Ostats.Ncvtreg)
  1408  		}
  1409  		if Ostats.Nspill != 0 {
  1410  			fmt.Printf("\t%4d spill\n", Ostats.Nspill)
  1411  		}
  1412  		if Ostats.Nreload != 0 {
  1413  			fmt.Printf("\t%4d reload\n", Ostats.Nreload)
  1414  		}
  1415  		if Ostats.Ndelmov != 0 {
  1416  			fmt.Printf("\t%4d delmov\n", Ostats.Ndelmov)
  1417  		}
  1418  		if Ostats.Nvar != 0 {
  1419  			fmt.Printf("\t%4d var\n", Ostats.Nvar)
  1420  		}
  1421  		if Ostats.Naddr != 0 {
  1422  			fmt.Printf("\t%4d addr\n", Ostats.Naddr)
  1423  		}
  1424  
  1425  		Ostats = OptStats{}
  1426  	}
  1427  }
  1428  
  1429  // bany reports whether any bits in a are set.
  1430  func bany(a *Bits) bool {
  1431  	for _, x := range &a.b { // & to avoid making a copy of a.b
  1432  		if x != 0 {
  1433  			return true
  1434  		}
  1435  	}
  1436  	return false
  1437  }
  1438  
  1439  // bnum reports the lowest index of a 1 bit in a.
  1440  func bnum(a *Bits) int {
  1441  	for i, x := range &a.b { // & to avoid making a copy of a.b
  1442  		if x != 0 {
  1443  			return 64*i + Bitno(x)
  1444  		}
  1445  	}
  1446  
  1447  	Fatalf("bad in bnum")
  1448  	return 0
  1449  }
  1450  
  1451  // blsh returns a Bits with 1 at index n, 0 elsewhere (1<<n).
  1452  func blsh(n uint) Bits {
  1453  	c := zbits
  1454  	c.b[n/64] = 1 << (n % 64)
  1455  	return c
  1456  }
  1457  
  1458  // btest reports whether bit n is 1.
  1459  func btest(a *Bits, n uint) bool {
  1460  	return a.b[n/64]&(1<<(n%64)) != 0
  1461  }
  1462  
  1463  // biset sets bit n to 1.
  1464  func biset(a *Bits, n uint) {
  1465  	a.b[n/64] |= 1 << (n % 64)
  1466  }
  1467  
  1468  // biclr sets bit n to 0.
  1469  func biclr(a *Bits, n uint) {
  1470  	a.b[n/64] &^= (1 << (n % 64))
  1471  }
  1472  
  1473  // Bitno reports the lowest index of a 1 bit in b.
  1474  // It calls Fatalf if there is no 1 bit.
  1475  func Bitno(b uint64) int {
  1476  	if b == 0 {
  1477  		Fatalf("bad in bitno")
  1478  	}
  1479  	n := 0
  1480  	if b&(1<<32-1) == 0 {
  1481  		n += 32
  1482  		b >>= 32
  1483  	}
  1484  	if b&(1<<16-1) == 0 {
  1485  		n += 16
  1486  		b >>= 16
  1487  	}
  1488  	if b&(1<<8-1) == 0 {
  1489  		n += 8
  1490  		b >>= 8
  1491  	}
  1492  	if b&(1<<4-1) == 0 {
  1493  		n += 4
  1494  		b >>= 4
  1495  	}
  1496  	if b&(1<<2-1) == 0 {
  1497  		n += 2
  1498  		b >>= 2
  1499  	}
  1500  	if b&1 == 0 {
  1501  		n++
  1502  	}
  1503  	return n
  1504  }
  1505  
  1506  // String returns a space-separated list of the variables represented by bits.
  1507  func (bits Bits) String() string {
  1508  	// Note: This method takes a value receiver, both for convenience
  1509  	// and to make it safe to modify the bits as we process them.
  1510  	// Even so, most prints above use &bits, because then the value
  1511  	// being stored in the interface{} is a pointer and does not require
  1512  	// an allocation and copy to create the interface{}.
  1513  	var buf bytes.Buffer
  1514  	sep := ""
  1515  	for bany(&bits) {
  1516  		i := bnum(&bits)
  1517  		buf.WriteString(sep)
  1518  		sep = " "
  1519  		v := &vars[i]
  1520  		if v.node == nil || v.node.Sym == nil {
  1521  			fmt.Fprintf(&buf, "$%d", i)
  1522  		} else {
  1523  			fmt.Fprintf(&buf, "%s(%d)", v.node.Sym.Name, i)
  1524  			if v.offset != 0 {
  1525  				fmt.Fprintf(&buf, "%+d", int64(v.offset))
  1526  			}
  1527  		}
  1528  		biclr(&bits, uint(i))
  1529  	}
  1530  	return buf.String()
  1531  }