github.com/aloncn/graphics-go@v0.0.1/src/runtime/parfor.go (about) 1 // Copyright 2012 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Parallel for algorithm. 6 7 package runtime 8 9 import ( 10 "runtime/internal/atomic" 11 "runtime/internal/sys" 12 ) 13 14 // A parfor holds state for the parallel for operation. 15 type parfor struct { 16 body func(*parfor, uint32) // executed for each element 17 done uint32 // number of idle threads 18 nthr uint32 // total number of threads 19 thrseq uint32 // thread id sequencer 20 cnt uint32 // iteration space [0, cnt) 21 wait bool // if true, wait while all threads finish processing, 22 // otherwise parfor may return while other threads are still working 23 24 thr []parforthread // thread descriptors 25 26 // stats 27 nsteal uint64 28 nstealcnt uint64 29 nprocyield uint64 30 nosyield uint64 31 nsleep uint64 32 } 33 34 // A parforthread holds state for a single thread in the parallel for. 35 type parforthread struct { 36 // the thread's iteration space [32lsb, 32msb) 37 pos uint64 38 // stats 39 nsteal uint64 40 nstealcnt uint64 41 nprocyield uint64 42 nosyield uint64 43 nsleep uint64 44 pad [sys.CacheLineSize]byte 45 } 46 47 func parforalloc(nthrmax uint32) *parfor { 48 return &parfor{ 49 thr: make([]parforthread, nthrmax), 50 } 51 } 52 53 // Parforsetup initializes desc for a parallel for operation with nthr 54 // threads executing n jobs. 55 // 56 // On return the nthr threads are each expected to call parfordo(desc) 57 // to run the operation. During those calls, for each i in [0, n), one 58 // thread will be used invoke body(desc, i). 59 // If wait is true, no parfordo will return until all work has been completed. 60 // If wait is false, parfordo may return when there is a small amount 61 // of work left, under the assumption that another thread has that 62 // work well in hand. 63 func parforsetup(desc *parfor, nthr, n uint32, wait bool, body func(*parfor, uint32)) { 64 if desc == nil || nthr == 0 || nthr > uint32(len(desc.thr)) || body == nil { 65 print("desc=", desc, " nthr=", nthr, " count=", n, " body=", body, "\n") 66 throw("parfor: invalid args") 67 } 68 69 desc.body = body 70 desc.done = 0 71 desc.nthr = nthr 72 desc.thrseq = 0 73 desc.cnt = n 74 desc.wait = wait 75 desc.nsteal = 0 76 desc.nstealcnt = 0 77 desc.nprocyield = 0 78 desc.nosyield = 0 79 desc.nsleep = 0 80 81 for i := range desc.thr { 82 begin := uint32(uint64(n) * uint64(i) / uint64(nthr)) 83 end := uint32(uint64(n) * uint64(i+1) / uint64(nthr)) 84 desc.thr[i].pos = uint64(begin) | uint64(end)<<32 85 } 86 } 87 88 func parfordo(desc *parfor) { 89 // Obtain 0-based thread index. 90 tid := atomic.Xadd(&desc.thrseq, 1) - 1 91 if tid >= desc.nthr { 92 print("tid=", tid, " nthr=", desc.nthr, "\n") 93 throw("parfor: invalid tid") 94 } 95 96 // If single-threaded, just execute the for serially. 97 body := desc.body 98 if desc.nthr == 1 { 99 for i := uint32(0); i < desc.cnt; i++ { 100 body(desc, i) 101 } 102 return 103 } 104 105 me := &desc.thr[tid] 106 mypos := &me.pos 107 for { 108 for { 109 // While there is local work, 110 // bump low index and execute the iteration. 111 pos := atomic.Xadd64(mypos, 1) 112 begin := uint32(pos) - 1 113 end := uint32(pos >> 32) 114 if begin < end { 115 body(desc, begin) 116 continue 117 } 118 break 119 } 120 121 // Out of work, need to steal something. 122 idle := false 123 for try := uint32(0); ; try++ { 124 // If we don't see any work for long enough, 125 // increment the done counter... 126 if try > desc.nthr*4 && !idle { 127 idle = true 128 atomic.Xadd(&desc.done, 1) 129 } 130 131 // ...if all threads have incremented the counter, 132 // we are done. 133 extra := uint32(0) 134 if !idle { 135 extra = 1 136 } 137 if desc.done+extra == desc.nthr { 138 if !idle { 139 atomic.Xadd(&desc.done, 1) 140 } 141 goto exit 142 } 143 144 // Choose a random victim for stealing. 145 var begin, end uint32 146 victim := fastrand1() % (desc.nthr - 1) 147 if victim >= tid { 148 victim++ 149 } 150 victimpos := &desc.thr[victim].pos 151 for { 152 // See if it has any work. 153 pos := atomic.Load64(victimpos) 154 begin = uint32(pos) 155 end = uint32(pos >> 32) 156 if begin+1 >= end { 157 end = 0 158 begin = end 159 break 160 } 161 if idle { 162 atomic.Xadd(&desc.done, -1) 163 idle = false 164 } 165 begin2 := begin + (end-begin)/2 166 newpos := uint64(begin) | uint64(begin2)<<32 167 if atomic.Cas64(victimpos, pos, newpos) { 168 begin = begin2 169 break 170 } 171 } 172 if begin < end { 173 // Has successfully stolen some work. 174 if idle { 175 throw("parfor: should not be idle") 176 } 177 atomic.Store64(mypos, uint64(begin)|uint64(end)<<32) 178 me.nsteal++ 179 me.nstealcnt += uint64(end) - uint64(begin) 180 break 181 } 182 183 // Backoff. 184 if try < desc.nthr { 185 // nothing 186 } else if try < 4*desc.nthr { 187 me.nprocyield++ 188 procyield(20) 189 } else if !desc.wait { 190 // If a caller asked not to wait for the others, exit now 191 // (assume that most work is already done at this point). 192 if !idle { 193 atomic.Xadd(&desc.done, 1) 194 } 195 goto exit 196 } else if try < 6*desc.nthr { 197 me.nosyield++ 198 osyield() 199 } else { 200 me.nsleep++ 201 usleep(1) 202 } 203 } 204 } 205 206 exit: 207 atomic.Xadd64(&desc.nsteal, int64(me.nsteal)) 208 atomic.Xadd64(&desc.nstealcnt, int64(me.nstealcnt)) 209 atomic.Xadd64(&desc.nprocyield, int64(me.nprocyield)) 210 atomic.Xadd64(&desc.nosyield, int64(me.nosyield)) 211 atomic.Xadd64(&desc.nsleep, int64(me.nsleep)) 212 me.nsteal = 0 213 me.nstealcnt = 0 214 me.nprocyield = 0 215 me.nosyield = 0 216 me.nsleep = 0 217 }