github.com/llvm-mirror/llgo@v0.0.0-20190322182713-bf6f0a60fce1/third_party/gofrontend/libgo/runtime/parfor.c (about) 1 // Copyright 2012 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Parallel for algorithm. 6 7 #include "runtime.h" 8 #include "arch.h" 9 10 struct ParForThread 11 { 12 // the thread's iteration space [32lsb, 32msb) 13 uint64 pos; 14 // stats 15 uint64 nsteal; 16 uint64 nstealcnt; 17 uint64 nprocyield; 18 uint64 nosyield; 19 uint64 nsleep; 20 byte pad[CacheLineSize]; 21 }; 22 23 ParFor* 24 runtime_parforalloc(uint32 nthrmax) 25 { 26 ParFor *desc; 27 28 // The ParFor object is followed by CacheLineSize padding 29 // and then nthrmax ParForThread. 30 desc = (ParFor*)runtime_malloc(sizeof(ParFor) + CacheLineSize + nthrmax * sizeof(ParForThread)); 31 desc->thr = (ParForThread*)((byte*)(desc+1) + CacheLineSize); 32 desc->nthrmax = nthrmax; 33 return desc; 34 } 35 36 void 37 runtime_parforsetup(ParFor *desc, uint32 nthr, uint32 n, bool wait, const FuncVal *body) 38 { 39 uint32 i, begin, end; 40 uint64 *pos; 41 42 if(desc == nil || nthr == 0 || nthr > desc->nthrmax || body == nil) { 43 runtime_printf("desc=%p nthr=%d count=%d body=%p\n", desc, nthr, n, body); 44 runtime_throw("parfor: invalid args"); 45 } 46 47 desc->body = body; 48 desc->done = 0; 49 desc->nthr = nthr; 50 desc->thrseq = 0; 51 desc->cnt = n; 52 desc->wait = wait; 53 desc->nsteal = 0; 54 desc->nstealcnt = 0; 55 desc->nprocyield = 0; 56 desc->nosyield = 0; 57 desc->nsleep = 0; 58 for(i=0; i<nthr; i++) { 59 begin = (uint64)n*i / nthr; 60 end = (uint64)n*(i+1) / nthr; 61 pos = &desc->thr[i].pos; 62 if(((uintptr)pos & 7) != 0) 63 runtime_throw("parforsetup: pos is not aligned"); 64 *pos = (uint64)begin | (((uint64)end)<<32); 65 } 66 } 67 68 void 69 runtime_parfordo(ParFor *desc) 70 { 71 ParForThread *me; 72 uint32 tid, begin, end, begin2, try, victim, i; 73 uint64 *mypos, *victimpos, pos, newpos; 74 const FuncVal *body; 75 void (*bodyfn)(ParFor*, uint32); 76 bool idle; 77 78 // Obtain 0-based thread index. 79 tid = runtime_xadd(&desc->thrseq, 1) - 1; 80 if(tid >= desc->nthr) { 81 runtime_printf("tid=%d nthr=%d\n", tid, desc->nthr); 82 runtime_throw("parfor: invalid tid"); 83 } 84 85 body = desc->body; 86 bodyfn = (void (*)(ParFor*, uint32))(void*)body->fn; 87 88 // If single-threaded, just execute the for serially. 89 if(desc->nthr==1) { 90 for(i=0; i<desc->cnt; i++) 91 __builtin_call_with_static_chain (bodyfn(desc, i), body); 92 return; 93 } 94 95 me = &desc->thr[tid]; 96 mypos = &me->pos; 97 for(;;) { 98 for(;;) { 99 // While there is local work, 100 // bump low index and execute the iteration. 101 pos = runtime_xadd64(mypos, 1); 102 begin = (uint32)pos-1; 103 end = (uint32)(pos>>32); 104 if(begin < end) { 105 __builtin_call_with_static_chain(bodyfn(desc, begin), body); 106 continue; 107 } 108 break; 109 } 110 111 // Out of work, need to steal something. 112 idle = false; 113 for(try=0;; try++) { 114 // If we don't see any work for long enough, 115 // increment the done counter... 116 if(try > desc->nthr*4 && !idle) { 117 idle = true; 118 runtime_xadd(&desc->done, 1); 119 } 120 // ...if all threads have incremented the counter, 121 // we are done. 122 if(desc->done + !idle == desc->nthr) { 123 if(!idle) 124 runtime_xadd(&desc->done, 1); 125 goto exit; 126 } 127 // Choose a random victim for stealing. 128 victim = runtime_fastrand1() % (desc->nthr-1); 129 if(victim >= tid) 130 victim++; 131 victimpos = &desc->thr[victim].pos; 132 for(;;) { 133 // See if it has any work. 134 pos = runtime_atomicload64(victimpos); 135 begin = (uint32)pos; 136 end = (uint32)(pos>>32); 137 if(begin+1 >= end) { 138 begin = end = 0; 139 break; 140 } 141 if(idle) { 142 runtime_xadd(&desc->done, -1); 143 idle = false; 144 } 145 begin2 = begin + (end-begin)/2; 146 newpos = (uint64)begin | (uint64)begin2<<32; 147 if(runtime_cas64(victimpos, pos, newpos)) { 148 begin = begin2; 149 break; 150 } 151 } 152 if(begin < end) { 153 // Has successfully stolen some work. 154 if(idle) 155 runtime_throw("parfor: should not be idle"); 156 runtime_atomicstore64(mypos, (uint64)begin | (uint64)end<<32); 157 me->nsteal++; 158 me->nstealcnt += end-begin; 159 break; 160 } 161 // Backoff. 162 if(try < desc->nthr) { 163 // nothing 164 } else if (try < 4*desc->nthr) { 165 me->nprocyield++; 166 runtime_procyield(20); 167 // If a caller asked not to wait for the others, exit now 168 // (assume that most work is already done at this point). 169 } else if (!desc->wait) { 170 if(!idle) 171 runtime_xadd(&desc->done, 1); 172 goto exit; 173 } else if (try < 6*desc->nthr) { 174 me->nosyield++; 175 runtime_osyield(); 176 } else { 177 me->nsleep++; 178 runtime_usleep(1); 179 } 180 } 181 } 182 exit: 183 runtime_xadd64(&desc->nsteal, me->nsteal); 184 runtime_xadd64(&desc->nstealcnt, me->nstealcnt); 185 runtime_xadd64(&desc->nprocyield, me->nprocyield); 186 runtime_xadd64(&desc->nosyield, me->nosyield); 187 runtime_xadd64(&desc->nsleep, me->nsleep); 188 me->nsteal = 0; 189 me->nstealcnt = 0; 190 me->nprocyield = 0; 191 me->nosyield = 0; 192 me->nsleep = 0; 193 } 194 195 // For testing from Go. 196 void 197 runtime_parforiters(ParFor *desc, uintptr tid, uintptr *start, uintptr *end) 198 { 199 *start = (uint32)desc->thr[tid].pos; 200 *end = (uint32)(desc->thr[tid].pos>>32); 201 }