github.com/yanyiwu/go@v0.0.0-20150106053140-03d6637dbb7f/src/runtime/parfor.go (about)

     1  // Copyright 2012 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Parallel for algorithm.
     6  
     7  package runtime
     8  
     9  import "unsafe"
    10  
    11  type parforthread struct {
    12  	// the thread's iteration space [32lsb, 32msb)
    13  	pos uint64
    14  	// stats
    15  	nsteal     uint64
    16  	nstealcnt  uint64
    17  	nprocyield uint64
    18  	nosyield   uint64
    19  	nsleep     uint64
    20  	pad        [_CacheLineSize]byte
    21  }
    22  
    23  func desc_thr_index(desc *parfor, i uint32) *parforthread {
    24  	return (*parforthread)(add(unsafe.Pointer(desc.thr), uintptr(i)*unsafe.Sizeof(*desc.thr)))
    25  }
    26  
    27  func parforsetup(desc *parfor, nthr, n uint32, ctx unsafe.Pointer, wait bool, body func(*parfor, uint32)) {
    28  	if desc == nil || nthr == 0 || nthr > desc.nthrmax || body == nil {
    29  		print("desc=", desc, " nthr=", nthr, " count=", n, " body=", body, "\n")
    30  		throw("parfor: invalid args")
    31  	}
    32  
    33  	desc.body = *(*unsafe.Pointer)(unsafe.Pointer(&body))
    34  	desc.done = 0
    35  	desc.nthr = nthr
    36  	desc.thrseq = 0
    37  	desc.cnt = n
    38  	desc.ctx = ctx
    39  	desc.wait = wait
    40  	desc.nsteal = 0
    41  	desc.nstealcnt = 0
    42  	desc.nprocyield = 0
    43  	desc.nosyield = 0
    44  	desc.nsleep = 0
    45  
    46  	for i := uint32(0); i < nthr; i++ {
    47  		begin := uint32(uint64(n) * uint64(i) / uint64(nthr))
    48  		end := uint32(uint64(n) * uint64(i+1) / uint64(nthr))
    49  		pos := &desc_thr_index(desc, i).pos
    50  		if uintptr(unsafe.Pointer(pos))&7 != 0 {
    51  			throw("parforsetup: pos is not aligned")
    52  		}
    53  		*pos = uint64(begin) | uint64(end)<<32
    54  	}
    55  }
    56  
    57  func parfordo(desc *parfor) {
    58  	// Obtain 0-based thread index.
    59  	tid := xadd(&desc.thrseq, 1) - 1
    60  	if tid >= desc.nthr {
    61  		print("tid=", tid, " nthr=", desc.nthr, "\n")
    62  		throw("parfor: invalid tid")
    63  	}
    64  
    65  	// If single-threaded, just execute the for serially.
    66  	body := *(*func(*parfor, uint32))(unsafe.Pointer(&desc.body))
    67  	if desc.nthr == 1 {
    68  		for i := uint32(0); i < desc.cnt; i++ {
    69  			body(desc, i)
    70  		}
    71  		return
    72  	}
    73  
    74  	me := desc_thr_index(desc, tid)
    75  	mypos := &me.pos
    76  	for {
    77  		for {
    78  			// While there is local work,
    79  			// bump low index and execute the iteration.
    80  			pos := xadd64(mypos, 1)
    81  			begin := uint32(pos) - 1
    82  			end := uint32(pos >> 32)
    83  			if begin < end {
    84  				body(desc, begin)
    85  				continue
    86  			}
    87  			break
    88  		}
    89  
    90  		// Out of work, need to steal something.
    91  		idle := false
    92  		for try := uint32(0); ; try++ {
    93  			// If we don't see any work for long enough,
    94  			// increment the done counter...
    95  			if try > desc.nthr*4 && !idle {
    96  				idle = true
    97  				xadd(&desc.done, 1)
    98  			}
    99  
   100  			// ...if all threads have incremented the counter,
   101  			// we are done.
   102  			extra := uint32(0)
   103  			if !idle {
   104  				extra = 1
   105  			}
   106  			if desc.done+extra == desc.nthr {
   107  				if !idle {
   108  					xadd(&desc.done, 1)
   109  				}
   110  				goto exit
   111  			}
   112  
   113  			// Choose a random victim for stealing.
   114  			var begin, end uint32
   115  			victim := fastrand1() % (desc.nthr - 1)
   116  			if victim >= tid {
   117  				victim++
   118  			}
   119  			victimpos := &desc_thr_index(desc, victim).pos
   120  			for {
   121  				// See if it has any work.
   122  				pos := atomicload64(victimpos)
   123  				begin = uint32(pos)
   124  				end = uint32(pos >> 32)
   125  				if begin+1 >= end {
   126  					end = 0
   127  					begin = end
   128  					break
   129  				}
   130  				if idle {
   131  					xadd(&desc.done, -1)
   132  					idle = false
   133  				}
   134  				begin2 := begin + (end-begin)/2
   135  				newpos := uint64(begin) | uint64(begin2)<<32
   136  				if cas64(victimpos, pos, newpos) {
   137  					begin = begin2
   138  					break
   139  				}
   140  			}
   141  			if begin < end {
   142  				// Has successfully stolen some work.
   143  				if idle {
   144  					throw("parfor: should not be idle")
   145  				}
   146  				atomicstore64(mypos, uint64(begin)|uint64(end)<<32)
   147  				me.nsteal++
   148  				me.nstealcnt += uint64(end) - uint64(begin)
   149  				break
   150  			}
   151  
   152  			// Backoff.
   153  			if try < desc.nthr {
   154  				// nothing
   155  			} else if try < 4*desc.nthr {
   156  				me.nprocyield++
   157  				procyield(20)
   158  			} else if !desc.wait {
   159  				// If a caller asked not to wait for the others, exit now
   160  				// (assume that most work is already done at this point).
   161  				if !idle {
   162  					xadd(&desc.done, 1)
   163  				}
   164  				goto exit
   165  			} else if try < 6*desc.nthr {
   166  				me.nosyield++
   167  				osyield()
   168  			} else {
   169  				me.nsleep++
   170  				usleep(1)
   171  			}
   172  		}
   173  	}
   174  
   175  exit:
   176  	xadd64(&desc.nsteal, int64(me.nsteal))
   177  	xadd64(&desc.nstealcnt, int64(me.nstealcnt))
   178  	xadd64(&desc.nprocyield, int64(me.nprocyield))
   179  	xadd64(&desc.nosyield, int64(me.nosyield))
   180  	xadd64(&desc.nsleep, int64(me.nsleep))
   181  	me.nsteal = 0
   182  	me.nstealcnt = 0
   183  	me.nprocyield = 0
   184  	me.nosyield = 0
   185  	me.nsleep = 0
   186  }