github.com/yanyiwu/go@v0.0.0-20150106053140-03d6637dbb7f/src/runtime/parfor.go (about) 1 // Copyright 2012 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Parallel for algorithm. 6 7 package runtime 8 9 import "unsafe" 10 11 type parforthread struct { 12 // the thread's iteration space [32lsb, 32msb) 13 pos uint64 14 // stats 15 nsteal uint64 16 nstealcnt uint64 17 nprocyield uint64 18 nosyield uint64 19 nsleep uint64 20 pad [_CacheLineSize]byte 21 } 22 23 func desc_thr_index(desc *parfor, i uint32) *parforthread { 24 return (*parforthread)(add(unsafe.Pointer(desc.thr), uintptr(i)*unsafe.Sizeof(*desc.thr))) 25 } 26 27 func parforsetup(desc *parfor, nthr, n uint32, ctx unsafe.Pointer, wait bool, body func(*parfor, uint32)) { 28 if desc == nil || nthr == 0 || nthr > desc.nthrmax || body == nil { 29 print("desc=", desc, " nthr=", nthr, " count=", n, " body=", body, "\n") 30 throw("parfor: invalid args") 31 } 32 33 desc.body = *(*unsafe.Pointer)(unsafe.Pointer(&body)) 34 desc.done = 0 35 desc.nthr = nthr 36 desc.thrseq = 0 37 desc.cnt = n 38 desc.ctx = ctx 39 desc.wait = wait 40 desc.nsteal = 0 41 desc.nstealcnt = 0 42 desc.nprocyield = 0 43 desc.nosyield = 0 44 desc.nsleep = 0 45 46 for i := uint32(0); i < nthr; i++ { 47 begin := uint32(uint64(n) * uint64(i) / uint64(nthr)) 48 end := uint32(uint64(n) * uint64(i+1) / uint64(nthr)) 49 pos := &desc_thr_index(desc, i).pos 50 if uintptr(unsafe.Pointer(pos))&7 != 0 { 51 throw("parforsetup: pos is not aligned") 52 } 53 *pos = uint64(begin) | uint64(end)<<32 54 } 55 } 56 57 func parfordo(desc *parfor) { 58 // Obtain 0-based thread index. 59 tid := xadd(&desc.thrseq, 1) - 1 60 if tid >= desc.nthr { 61 print("tid=", tid, " nthr=", desc.nthr, "\n") 62 throw("parfor: invalid tid") 63 } 64 65 // If single-threaded, just execute the for serially. 66 body := *(*func(*parfor, uint32))(unsafe.Pointer(&desc.body)) 67 if desc.nthr == 1 { 68 for i := uint32(0); i < desc.cnt; i++ { 69 body(desc, i) 70 } 71 return 72 } 73 74 me := desc_thr_index(desc, tid) 75 mypos := &me.pos 76 for { 77 for { 78 // While there is local work, 79 // bump low index and execute the iteration. 80 pos := xadd64(mypos, 1) 81 begin := uint32(pos) - 1 82 end := uint32(pos >> 32) 83 if begin < end { 84 body(desc, begin) 85 continue 86 } 87 break 88 } 89 90 // Out of work, need to steal something. 91 idle := false 92 for try := uint32(0); ; try++ { 93 // If we don't see any work for long enough, 94 // increment the done counter... 95 if try > desc.nthr*4 && !idle { 96 idle = true 97 xadd(&desc.done, 1) 98 } 99 100 // ...if all threads have incremented the counter, 101 // we are done. 102 extra := uint32(0) 103 if !idle { 104 extra = 1 105 } 106 if desc.done+extra == desc.nthr { 107 if !idle { 108 xadd(&desc.done, 1) 109 } 110 goto exit 111 } 112 113 // Choose a random victim for stealing. 114 var begin, end uint32 115 victim := fastrand1() % (desc.nthr - 1) 116 if victim >= tid { 117 victim++ 118 } 119 victimpos := &desc_thr_index(desc, victim).pos 120 for { 121 // See if it has any work. 122 pos := atomicload64(victimpos) 123 begin = uint32(pos) 124 end = uint32(pos >> 32) 125 if begin+1 >= end { 126 end = 0 127 begin = end 128 break 129 } 130 if idle { 131 xadd(&desc.done, -1) 132 idle = false 133 } 134 begin2 := begin + (end-begin)/2 135 newpos := uint64(begin) | uint64(begin2)<<32 136 if cas64(victimpos, pos, newpos) { 137 begin = begin2 138 break 139 } 140 } 141 if begin < end { 142 // Has successfully stolen some work. 143 if idle { 144 throw("parfor: should not be idle") 145 } 146 atomicstore64(mypos, uint64(begin)|uint64(end)<<32) 147 me.nsteal++ 148 me.nstealcnt += uint64(end) - uint64(begin) 149 break 150 } 151 152 // Backoff. 153 if try < desc.nthr { 154 // nothing 155 } else if try < 4*desc.nthr { 156 me.nprocyield++ 157 procyield(20) 158 } else if !desc.wait { 159 // If a caller asked not to wait for the others, exit now 160 // (assume that most work is already done at this point). 161 if !idle { 162 xadd(&desc.done, 1) 163 } 164 goto exit 165 } else if try < 6*desc.nthr { 166 me.nosyield++ 167 osyield() 168 } else { 169 me.nsleep++ 170 usleep(1) 171 } 172 } 173 } 174 175 exit: 176 xadd64(&desc.nsteal, int64(me.nsteal)) 177 xadd64(&desc.nstealcnt, int64(me.nstealcnt)) 178 xadd64(&desc.nprocyield, int64(me.nprocyield)) 179 xadd64(&desc.nosyield, int64(me.nosyield)) 180 xadd64(&desc.nsleep, int64(me.nsleep)) 181 me.nsteal = 0 182 me.nstealcnt = 0 183 me.nprocyield = 0 184 me.nosyield = 0 185 me.nsleep = 0 186 }