github.com/prattmic/llgo-embedded@v0.0.0-20150820070356-41cfecea0e1e/third_party/gofrontend/libgo/runtime/parfor.c (about)

     1  // Copyright 2012 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Parallel for algorithm.
     6  
     7  #include "runtime.h"
     8  #include "arch.h"
     9  
    10  struct ParForThread
    11  {
    12  	// the thread's iteration space [32lsb, 32msb)
    13  	uint64 pos;
    14  	// stats
    15  	uint64 nsteal;
    16  	uint64 nstealcnt;
    17  	uint64 nprocyield;
    18  	uint64 nosyield;
    19  	uint64 nsleep;
    20  	byte pad[CacheLineSize];
    21  };
    22  
    23  ParFor*
    24  runtime_parforalloc(uint32 nthrmax)
    25  {
    26  	ParFor *desc;
    27  
    28  	// The ParFor object is followed by CacheLineSize padding
    29  	// and then nthrmax ParForThread.
    30  	desc = (ParFor*)runtime_malloc(sizeof(ParFor) + CacheLineSize + nthrmax * sizeof(ParForThread));
    31  	desc->thr = (ParForThread*)((byte*)(desc+1) + CacheLineSize);
    32  	desc->nthrmax = nthrmax;
    33  	return desc;
    34  }
    35  
    36  void
    37  runtime_parforsetup(ParFor *desc, uint32 nthr, uint32 n, void *ctx, bool wait, void (*body)(ParFor*, uint32))
    38  {
    39  	uint32 i, begin, end;
    40  	uint64 *pos;
    41  
    42  	if(desc == nil || nthr == 0 || nthr > desc->nthrmax || body == nil) {
    43  		runtime_printf("desc=%p nthr=%d count=%d body=%p\n", desc, nthr, n, body);
    44  		runtime_throw("parfor: invalid args");
    45  	}
    46  
    47  	desc->body = body;
    48  	desc->done = 0;
    49  	desc->nthr = nthr;
    50  	desc->thrseq = 0;
    51  	desc->cnt = n;
    52  	desc->ctx = ctx;
    53  	desc->wait = wait;
    54  	desc->nsteal = 0;
    55  	desc->nstealcnt = 0;
    56  	desc->nprocyield = 0;
    57  	desc->nosyield = 0;
    58  	desc->nsleep = 0;
    59  	for(i=0; i<nthr; i++) {
    60  		begin = (uint64)n*i / nthr;
    61  		end = (uint64)n*(i+1) / nthr;
    62  		pos = &desc->thr[i].pos;
    63  		if(((uintptr)pos & 7) != 0)
    64  			runtime_throw("parforsetup: pos is not aligned");
    65  		*pos = (uint64)begin | (((uint64)end)<<32);
    66  	}
    67  }
    68  
    69  void
    70  runtime_parfordo(ParFor *desc)
    71  {
    72  	ParForThread *me;
    73  	uint32 tid, begin, end, begin2, try, victim, i;
    74  	uint64 *mypos, *victimpos, pos, newpos;
    75  	void (*body)(ParFor*, uint32);
    76  	bool idle;
    77  
    78  	// Obtain 0-based thread index.
    79  	tid = runtime_xadd(&desc->thrseq, 1) - 1;
    80  	if(tid >= desc->nthr) {
    81  		runtime_printf("tid=%d nthr=%d\n", tid, desc->nthr);
    82  		runtime_throw("parfor: invalid tid");
    83  	}
    84  
    85  	// If single-threaded, just execute the for serially.
    86  	if(desc->nthr==1) {
    87  		for(i=0; i<desc->cnt; i++)
    88  			desc->body(desc, i);
    89  		return;
    90  	}
    91  
    92  	body = desc->body;
    93  	me = &desc->thr[tid];
    94  	mypos = &me->pos;
    95  	for(;;) {
    96  		for(;;) {
    97  			// While there is local work,
    98  			// bump low index and execute the iteration.
    99  			pos = runtime_xadd64(mypos, 1);
   100  			begin = (uint32)pos-1;
   101  			end = (uint32)(pos>>32);
   102  			if(begin < end) {
   103  				body(desc, begin);
   104  				continue;
   105  			}
   106  			break;
   107  		}
   108  
   109  		// Out of work, need to steal something.
   110  		idle = false;
   111  		for(try=0;; try++) {
   112  			// If we don't see any work for long enough,
   113  			// increment the done counter...
   114  			if(try > desc->nthr*4 && !idle) {
   115  				idle = true;
   116  				runtime_xadd(&desc->done, 1);
   117  			}
   118  			// ...if all threads have incremented the counter,
   119  			// we are done.
   120  			if(desc->done + !idle == desc->nthr) {
   121  				if(!idle)
   122  					runtime_xadd(&desc->done, 1);
   123  				goto exit;
   124  			}
   125  			// Choose a random victim for stealing.
   126  			victim = runtime_fastrand1() % (desc->nthr-1);
   127  			if(victim >= tid)
   128  				victim++;
   129  			victimpos = &desc->thr[victim].pos;
   130  			for(;;) {
   131  				// See if it has any work.
   132  				pos = runtime_atomicload64(victimpos);
   133  				begin = (uint32)pos;
   134  				end = (uint32)(pos>>32);
   135  				if(begin+1 >= end) {
   136  					begin = end = 0;
   137  					break;
   138  				}
   139  				if(idle) {
   140  					runtime_xadd(&desc->done, -1);
   141  					idle = false;
   142  				}
   143  				begin2 = begin + (end-begin)/2;
   144  				newpos = (uint64)begin | (uint64)begin2<<32;
   145  				if(runtime_cas64(victimpos, pos, newpos)) {
   146  					begin = begin2;
   147  					break;
   148  				}
   149  			}
   150  			if(begin < end) {
   151  				// Has successfully stolen some work.
   152  				if(idle)
   153  					runtime_throw("parfor: should not be idle");
   154  				runtime_atomicstore64(mypos, (uint64)begin | (uint64)end<<32);
   155  				me->nsteal++;
   156  				me->nstealcnt += end-begin;
   157  				break;
   158  			}
   159  			// Backoff.
   160  			if(try < desc->nthr) {
   161  				// nothing
   162  			} else if (try < 4*desc->nthr) {
   163  				me->nprocyield++;
   164  				runtime_procyield(20);
   165  			// If a caller asked not to wait for the others, exit now
   166  			// (assume that most work is already done at this point).
   167  			} else if (!desc->wait) {
   168  				if(!idle)
   169  					runtime_xadd(&desc->done, 1);
   170  				goto exit;
   171  			} else if (try < 6*desc->nthr) {
   172  				me->nosyield++;
   173  				runtime_osyield();
   174  			} else {
   175  				me->nsleep++;
   176  				runtime_usleep(1);
   177  			}
   178  		}
   179  	}
   180  exit:
   181  	runtime_xadd64(&desc->nsteal, me->nsteal);
   182  	runtime_xadd64(&desc->nstealcnt, me->nstealcnt);
   183  	runtime_xadd64(&desc->nprocyield, me->nprocyield);
   184  	runtime_xadd64(&desc->nosyield, me->nosyield);
   185  	runtime_xadd64(&desc->nsleep, me->nsleep);
   186  	me->nsteal = 0;
   187  	me->nstealcnt = 0;
   188  	me->nprocyield = 0;
   189  	me->nosyield = 0;
   190  	me->nsleep = 0;
   191  }
   192  
   193  // For testing from Go.
   194  void
   195  runtime_parforiters(ParFor *desc, uintptr tid, uintptr *start, uintptr *end)
   196  {
   197  	*start = (uint32)desc->thr[tid].pos;
   198  	*end = (uint32)(desc->thr[tid].pos>>32);
   199  }