github.com/aloncn/graphics-go@v0.0.1/src/runtime/parfor.go (about)

     1  // Copyright 2012 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Parallel for algorithm.
     6  
     7  package runtime
     8  
     9  import (
    10  	"runtime/internal/atomic"
    11  	"runtime/internal/sys"
    12  )
    13  
    14  // A parfor holds state for the parallel for operation.
    15  type parfor struct {
    16  	body   func(*parfor, uint32) // executed for each element
    17  	done   uint32                // number of idle threads
    18  	nthr   uint32                // total number of threads
    19  	thrseq uint32                // thread id sequencer
    20  	cnt    uint32                // iteration space [0, cnt)
    21  	wait   bool                  // if true, wait while all threads finish processing,
    22  	// otherwise parfor may return while other threads are still working
    23  
    24  	thr []parforthread // thread descriptors
    25  
    26  	// stats
    27  	nsteal     uint64
    28  	nstealcnt  uint64
    29  	nprocyield uint64
    30  	nosyield   uint64
    31  	nsleep     uint64
    32  }
    33  
    34  // A parforthread holds state for a single thread in the parallel for.
    35  type parforthread struct {
    36  	// the thread's iteration space [32lsb, 32msb)
    37  	pos uint64
    38  	// stats
    39  	nsteal     uint64
    40  	nstealcnt  uint64
    41  	nprocyield uint64
    42  	nosyield   uint64
    43  	nsleep     uint64
    44  	pad        [sys.CacheLineSize]byte
    45  }
    46  
    47  func parforalloc(nthrmax uint32) *parfor {
    48  	return &parfor{
    49  		thr: make([]parforthread, nthrmax),
    50  	}
    51  }
    52  
    53  // Parforsetup initializes desc for a parallel for operation with nthr
    54  // threads executing n jobs.
    55  //
    56  // On return the nthr threads are each expected to call parfordo(desc)
    57  // to run the operation. During those calls, for each i in [0, n), one
    58  // thread will be used invoke body(desc, i).
    59  // If wait is true, no parfordo will return until all work has been completed.
    60  // If wait is false, parfordo may return when there is a small amount
    61  // of work left, under the assumption that another thread has that
    62  // work well in hand.
    63  func parforsetup(desc *parfor, nthr, n uint32, wait bool, body func(*parfor, uint32)) {
    64  	if desc == nil || nthr == 0 || nthr > uint32(len(desc.thr)) || body == nil {
    65  		print("desc=", desc, " nthr=", nthr, " count=", n, " body=", body, "\n")
    66  		throw("parfor: invalid args")
    67  	}
    68  
    69  	desc.body = body
    70  	desc.done = 0
    71  	desc.nthr = nthr
    72  	desc.thrseq = 0
    73  	desc.cnt = n
    74  	desc.wait = wait
    75  	desc.nsteal = 0
    76  	desc.nstealcnt = 0
    77  	desc.nprocyield = 0
    78  	desc.nosyield = 0
    79  	desc.nsleep = 0
    80  
    81  	for i := range desc.thr {
    82  		begin := uint32(uint64(n) * uint64(i) / uint64(nthr))
    83  		end := uint32(uint64(n) * uint64(i+1) / uint64(nthr))
    84  		desc.thr[i].pos = uint64(begin) | uint64(end)<<32
    85  	}
    86  }
    87  
    88  func parfordo(desc *parfor) {
    89  	// Obtain 0-based thread index.
    90  	tid := atomic.Xadd(&desc.thrseq, 1) - 1
    91  	if tid >= desc.nthr {
    92  		print("tid=", tid, " nthr=", desc.nthr, "\n")
    93  		throw("parfor: invalid tid")
    94  	}
    95  
    96  	// If single-threaded, just execute the for serially.
    97  	body := desc.body
    98  	if desc.nthr == 1 {
    99  		for i := uint32(0); i < desc.cnt; i++ {
   100  			body(desc, i)
   101  		}
   102  		return
   103  	}
   104  
   105  	me := &desc.thr[tid]
   106  	mypos := &me.pos
   107  	for {
   108  		for {
   109  			// While there is local work,
   110  			// bump low index and execute the iteration.
   111  			pos := atomic.Xadd64(mypos, 1)
   112  			begin := uint32(pos) - 1
   113  			end := uint32(pos >> 32)
   114  			if begin < end {
   115  				body(desc, begin)
   116  				continue
   117  			}
   118  			break
   119  		}
   120  
   121  		// Out of work, need to steal something.
   122  		idle := false
   123  		for try := uint32(0); ; try++ {
   124  			// If we don't see any work for long enough,
   125  			// increment the done counter...
   126  			if try > desc.nthr*4 && !idle {
   127  				idle = true
   128  				atomic.Xadd(&desc.done, 1)
   129  			}
   130  
   131  			// ...if all threads have incremented the counter,
   132  			// we are done.
   133  			extra := uint32(0)
   134  			if !idle {
   135  				extra = 1
   136  			}
   137  			if desc.done+extra == desc.nthr {
   138  				if !idle {
   139  					atomic.Xadd(&desc.done, 1)
   140  				}
   141  				goto exit
   142  			}
   143  
   144  			// Choose a random victim for stealing.
   145  			var begin, end uint32
   146  			victim := fastrand1() % (desc.nthr - 1)
   147  			if victim >= tid {
   148  				victim++
   149  			}
   150  			victimpos := &desc.thr[victim].pos
   151  			for {
   152  				// See if it has any work.
   153  				pos := atomic.Load64(victimpos)
   154  				begin = uint32(pos)
   155  				end = uint32(pos >> 32)
   156  				if begin+1 >= end {
   157  					end = 0
   158  					begin = end
   159  					break
   160  				}
   161  				if idle {
   162  					atomic.Xadd(&desc.done, -1)
   163  					idle = false
   164  				}
   165  				begin2 := begin + (end-begin)/2
   166  				newpos := uint64(begin) | uint64(begin2)<<32
   167  				if atomic.Cas64(victimpos, pos, newpos) {
   168  					begin = begin2
   169  					break
   170  				}
   171  			}
   172  			if begin < end {
   173  				// Has successfully stolen some work.
   174  				if idle {
   175  					throw("parfor: should not be idle")
   176  				}
   177  				atomic.Store64(mypos, uint64(begin)|uint64(end)<<32)
   178  				me.nsteal++
   179  				me.nstealcnt += uint64(end) - uint64(begin)
   180  				break
   181  			}
   182  
   183  			// Backoff.
   184  			if try < desc.nthr {
   185  				// nothing
   186  			} else if try < 4*desc.nthr {
   187  				me.nprocyield++
   188  				procyield(20)
   189  			} else if !desc.wait {
   190  				// If a caller asked not to wait for the others, exit now
   191  				// (assume that most work is already done at this point).
   192  				if !idle {
   193  					atomic.Xadd(&desc.done, 1)
   194  				}
   195  				goto exit
   196  			} else if try < 6*desc.nthr {
   197  				me.nosyield++
   198  				osyield()
   199  			} else {
   200  				me.nsleep++
   201  				usleep(1)
   202  			}
   203  		}
   204  	}
   205  
   206  exit:
   207  	atomic.Xadd64(&desc.nsteal, int64(me.nsteal))
   208  	atomic.Xadd64(&desc.nstealcnt, int64(me.nstealcnt))
   209  	atomic.Xadd64(&desc.nprocyield, int64(me.nprocyield))
   210  	atomic.Xadd64(&desc.nosyield, int64(me.nosyield))
   211  	atomic.Xadd64(&desc.nsleep, int64(me.nsleep))
   212  	me.nsteal = 0
   213  	me.nstealcnt = 0
   214  	me.nprocyield = 0
   215  	me.nosyield = 0
   216  	me.nsleep = 0
   217  }