github.com/llvm-mirror/llgo@v0.0.0-20190322182713-bf6f0a60fce1/third_party/gofrontend/libgo/runtime/parfor.c (about)

     1  // Copyright 2012 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Parallel for algorithm.
     6  
     7  #include "runtime.h"
     8  #include "arch.h"
     9  
    10  struct ParForThread
    11  {
    12  	// the thread's iteration space [32lsb, 32msb)
    13  	uint64 pos;
    14  	// stats
    15  	uint64 nsteal;
    16  	uint64 nstealcnt;
    17  	uint64 nprocyield;
    18  	uint64 nosyield;
    19  	uint64 nsleep;
    20  	byte pad[CacheLineSize];
    21  };
    22  
    23  ParFor*
    24  runtime_parforalloc(uint32 nthrmax)
    25  {
    26  	ParFor *desc;
    27  
    28  	// The ParFor object is followed by CacheLineSize padding
    29  	// and then nthrmax ParForThread.
    30  	desc = (ParFor*)runtime_malloc(sizeof(ParFor) + CacheLineSize + nthrmax * sizeof(ParForThread));
    31  	desc->thr = (ParForThread*)((byte*)(desc+1) + CacheLineSize);
    32  	desc->nthrmax = nthrmax;
    33  	return desc;
    34  }
    35  
    36  void
    37  runtime_parforsetup(ParFor *desc, uint32 nthr, uint32 n, bool wait, const FuncVal *body)
    38  {
    39  	uint32 i, begin, end;
    40  	uint64 *pos;
    41  
    42  	if(desc == nil || nthr == 0 || nthr > desc->nthrmax || body == nil) {
    43  		runtime_printf("desc=%p nthr=%d count=%d body=%p\n", desc, nthr, n, body);
    44  		runtime_throw("parfor: invalid args");
    45  	}
    46  
    47  	desc->body = body;
    48  	desc->done = 0;
    49  	desc->nthr = nthr;
    50  	desc->thrseq = 0;
    51  	desc->cnt = n;
    52  	desc->wait = wait;
    53  	desc->nsteal = 0;
    54  	desc->nstealcnt = 0;
    55  	desc->nprocyield = 0;
    56  	desc->nosyield = 0;
    57  	desc->nsleep = 0;
    58  	for(i=0; i<nthr; i++) {
    59  		begin = (uint64)n*i / nthr;
    60  		end = (uint64)n*(i+1) / nthr;
    61  		pos = &desc->thr[i].pos;
    62  		if(((uintptr)pos & 7) != 0)
    63  			runtime_throw("parforsetup: pos is not aligned");
    64  		*pos = (uint64)begin | (((uint64)end)<<32);
    65  	}
    66  }
    67  
    68  void
    69  runtime_parfordo(ParFor *desc)
    70  {
    71  	ParForThread *me;
    72  	uint32 tid, begin, end, begin2, try, victim, i;
    73  	uint64 *mypos, *victimpos, pos, newpos;
    74  	const FuncVal *body;
    75  	void (*bodyfn)(ParFor*, uint32);
    76  	bool idle;
    77  
    78  	// Obtain 0-based thread index.
    79  	tid = runtime_xadd(&desc->thrseq, 1) - 1;
    80  	if(tid >= desc->nthr) {
    81  		runtime_printf("tid=%d nthr=%d\n", tid, desc->nthr);
    82  		runtime_throw("parfor: invalid tid");
    83  	}
    84  
    85  	body = desc->body;
    86  	bodyfn = (void (*)(ParFor*, uint32))(void*)body->fn;
    87  
    88  	// If single-threaded, just execute the for serially.
    89  	if(desc->nthr==1) {
    90  		for(i=0; i<desc->cnt; i++)
    91  		  __builtin_call_with_static_chain (bodyfn(desc, i), body);
    92  		return;
    93  	}
    94  
    95  	me = &desc->thr[tid];
    96  	mypos = &me->pos;
    97  	for(;;) {
    98  		for(;;) {
    99  			// While there is local work,
   100  			// bump low index and execute the iteration.
   101  			pos = runtime_xadd64(mypos, 1);
   102  			begin = (uint32)pos-1;
   103  			end = (uint32)(pos>>32);
   104  			if(begin < end) {
   105  				__builtin_call_with_static_chain(bodyfn(desc, begin), body);
   106  				continue;
   107  			}
   108  			break;
   109  		}
   110  
   111  		// Out of work, need to steal something.
   112  		idle = false;
   113  		for(try=0;; try++) {
   114  			// If we don't see any work for long enough,
   115  			// increment the done counter...
   116  			if(try > desc->nthr*4 && !idle) {
   117  				idle = true;
   118  				runtime_xadd(&desc->done, 1);
   119  			}
   120  			// ...if all threads have incremented the counter,
   121  			// we are done.
   122  			if(desc->done + !idle == desc->nthr) {
   123  				if(!idle)
   124  					runtime_xadd(&desc->done, 1);
   125  				goto exit;
   126  			}
   127  			// Choose a random victim for stealing.
   128  			victim = runtime_fastrand1() % (desc->nthr-1);
   129  			if(victim >= tid)
   130  				victim++;
   131  			victimpos = &desc->thr[victim].pos;
   132  			for(;;) {
   133  				// See if it has any work.
   134  				pos = runtime_atomicload64(victimpos);
   135  				begin = (uint32)pos;
   136  				end = (uint32)(pos>>32);
   137  				if(begin+1 >= end) {
   138  					begin = end = 0;
   139  					break;
   140  				}
   141  				if(idle) {
   142  					runtime_xadd(&desc->done, -1);
   143  					idle = false;
   144  				}
   145  				begin2 = begin + (end-begin)/2;
   146  				newpos = (uint64)begin | (uint64)begin2<<32;
   147  				if(runtime_cas64(victimpos, pos, newpos)) {
   148  					begin = begin2;
   149  					break;
   150  				}
   151  			}
   152  			if(begin < end) {
   153  				// Has successfully stolen some work.
   154  				if(idle)
   155  					runtime_throw("parfor: should not be idle");
   156  				runtime_atomicstore64(mypos, (uint64)begin | (uint64)end<<32);
   157  				me->nsteal++;
   158  				me->nstealcnt += end-begin;
   159  				break;
   160  			}
   161  			// Backoff.
   162  			if(try < desc->nthr) {
   163  				// nothing
   164  			} else if (try < 4*desc->nthr) {
   165  				me->nprocyield++;
   166  				runtime_procyield(20);
   167  			// If a caller asked not to wait for the others, exit now
   168  			// (assume that most work is already done at this point).
   169  			} else if (!desc->wait) {
   170  				if(!idle)
   171  					runtime_xadd(&desc->done, 1);
   172  				goto exit;
   173  			} else if (try < 6*desc->nthr) {
   174  				me->nosyield++;
   175  				runtime_osyield();
   176  			} else {
   177  				me->nsleep++;
   178  				runtime_usleep(1);
   179  			}
   180  		}
   181  	}
   182  exit:
   183  	runtime_xadd64(&desc->nsteal, me->nsteal);
   184  	runtime_xadd64(&desc->nstealcnt, me->nstealcnt);
   185  	runtime_xadd64(&desc->nprocyield, me->nprocyield);
   186  	runtime_xadd64(&desc->nosyield, me->nosyield);
   187  	runtime_xadd64(&desc->nsleep, me->nsleep);
   188  	me->nsteal = 0;
   189  	me->nstealcnt = 0;
   190  	me->nprocyield = 0;
   191  	me->nosyield = 0;
   192  	me->nsleep = 0;
   193  }
   194  
   195  // For testing from Go.
   196  void
   197  runtime_parforiters(ParFor *desc, uintptr tid, uintptr *start, uintptr *end)
   198  {
   199  	*start = (uint32)desc->thr[tid].pos;
   200  	*end = (uint32)(desc->thr[tid].pos>>32);
   201  }