github.com/rohankumardubey/syslog-redirector-golang@v0.0.0-20140320174030-4859f03d829a/src/pkg/runtime/parfor.c (about)

     1  // Copyright 2012 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Parallel for algorithm.
     6  
     7  #include "runtime.h"
     8  #include "arch_GOARCH.h"
     9  
    10  struct ParForThread
    11  {
    12  	// the thread's iteration space [32lsb, 32msb)
    13  	uint64 pos;
    14  	// stats
    15  	uint64 nsteal;
    16  	uint64 nstealcnt;
    17  	uint64 nprocyield;
    18  	uint64 nosyield;
    19  	uint64 nsleep;
    20  	byte pad[CacheLineSize];
    21  };
    22  
    23  ParFor*
    24  runtime·parforalloc(uint32 nthrmax)
    25  {
    26  	ParFor *desc;
    27  
    28  	// The ParFor object is followed by CacheLineSize padding
    29  	// and then nthrmax ParForThread.
    30  	desc = (ParFor*)runtime·malloc(sizeof(ParFor) + CacheLineSize + nthrmax * sizeof(ParForThread));
    31  	desc->thr = (ParForThread*)((byte*)(desc+1) + CacheLineSize);
    32  	desc->nthrmax = nthrmax;
    33  	return desc;
    34  }
    35  
    36  // For testing from Go
    37  // func parforalloc2(nthrmax uint32) *ParFor
    38  void
    39  runtime·parforalloc2(uint32 nthrmax, ParFor *desc)
    40  {
    41  	desc = runtime·parforalloc(nthrmax);
    42  	FLUSH(&desc);
    43  }
    44  
    45  void
    46  runtime·parforsetup(ParFor *desc, uint32 nthr, uint32 n, void *ctx, bool wait, void (*body)(ParFor*, uint32))
    47  {
    48  	uint32 i, begin, end;
    49  	uint64 *pos;
    50  
    51  	if(desc == nil || nthr == 0 || nthr > desc->nthrmax || body == nil) {
    52  		runtime·printf("desc=%p nthr=%d count=%d body=%p\n", desc, nthr, n, body);
    53  		runtime·throw("parfor: invalid args");
    54  	}
    55  
    56  	desc->body = body;
    57  	desc->done = 0;
    58  	desc->nthr = nthr;
    59  	desc->thrseq = 0;
    60  	desc->cnt = n;
    61  	desc->ctx = ctx;
    62  	desc->wait = wait;
    63  	desc->nsteal = 0;
    64  	desc->nstealcnt = 0;
    65  	desc->nprocyield = 0;
    66  	desc->nosyield = 0;
    67  	desc->nsleep = 0;
    68  	for(i=0; i<nthr; i++) {
    69  		begin = (uint64)n*i / nthr;
    70  		end = (uint64)n*(i+1) / nthr;
    71  		pos = &desc->thr[i].pos;
    72  		if(((uintptr)pos & 7) != 0)
    73  			runtime·throw("parforsetup: pos is not aligned");
    74  		*pos = (uint64)begin | (((uint64)end)<<32);
    75  	}
    76  }
    77  
    78  // For testing from Go
    79  // func parforsetup2(desc *ParFor, nthr, n uint32, ctx *byte, wait bool, body func(*ParFor, uint32))
    80  void
    81  runtime·parforsetup2(ParFor *desc, uint32 nthr, uint32 n, void *ctx, bool wait, void *body)
    82  {
    83  	runtime·parforsetup(desc, nthr, n, ctx, wait, *(void(**)(ParFor*, uint32))body);
    84  }
    85  
    86  void
    87  runtime·parfordo(ParFor *desc)
    88  {
    89  	ParForThread *me;
    90  	uint32 tid, begin, end, begin2, try, victim, i;
    91  	uint64 *mypos, *victimpos, pos, newpos;
    92  	void (*body)(ParFor*, uint32);
    93  	bool idle;
    94  
    95  	// Obtain 0-based thread index.
    96  	tid = runtime·xadd(&desc->thrseq, 1) - 1;
    97  	if(tid >= desc->nthr) {
    98  		runtime·printf("tid=%d nthr=%d\n", tid, desc->nthr);
    99  		runtime·throw("parfor: invalid tid");
   100  	}
   101  
   102  	// If single-threaded, just execute the for serially.
   103  	if(desc->nthr==1) {
   104  		for(i=0; i<desc->cnt; i++)
   105  			desc->body(desc, i);
   106  		return;
   107  	}
   108  
   109  	body = desc->body;
   110  	me = &desc->thr[tid];
   111  	mypos = &me->pos;
   112  	for(;;) {
   113  		for(;;) {
   114  			// While there is local work,
   115  			// bump low index and execute the iteration.
   116  			pos = runtime·xadd64(mypos, 1);
   117  			begin = (uint32)pos-1;
   118  			end = (uint32)(pos>>32);
   119  			if(begin < end) {
   120  				body(desc, begin);
   121  				continue;
   122  			}
   123  			break;
   124  		}
   125  
   126  		// Out of work, need to steal something.
   127  		idle = false;
   128  		for(try=0;; try++) {
   129  			// If we don't see any work for long enough,
   130  			// increment the done counter...
   131  			if(try > desc->nthr*4 && !idle) {
   132  				idle = true;
   133  				runtime·xadd(&desc->done, 1);
   134  			}
   135  			// ...if all threads have incremented the counter,
   136  			// we are done.
   137  			if(desc->done + !idle == desc->nthr) {
   138  				if(!idle)
   139  					runtime·xadd(&desc->done, 1);
   140  				goto exit;
   141  			}
   142  			// Choose a random victim for stealing.
   143  			victim = runtime·fastrand1() % (desc->nthr-1);
   144  			if(victim >= tid)
   145  				victim++;
   146  			victimpos = &desc->thr[victim].pos;
   147  			for(;;) {
   148  				// See if it has any work.
   149  				pos = runtime·atomicload64(victimpos);
   150  				begin = (uint32)pos;
   151  				end = (uint32)(pos>>32);
   152  				if(begin+1 >= end) {
   153  					begin = end = 0;
   154  					break;
   155  				}
   156  				if(idle) {
   157  					runtime·xadd(&desc->done, -1);
   158  					idle = false;
   159  				}
   160  				begin2 = begin + (end-begin)/2;
   161  				newpos = (uint64)begin | (uint64)begin2<<32;
   162  				if(runtime·cas64(victimpos, pos, newpos)) {
   163  					begin = begin2;
   164  					break;
   165  				}
   166  			}
   167  			if(begin < end) {
   168  				// Has successfully stolen some work.
   169  				if(idle)
   170  					runtime·throw("parfor: should not be idle");
   171  				runtime·atomicstore64(mypos, (uint64)begin | (uint64)end<<32);
   172  				me->nsteal++;
   173  				me->nstealcnt += end-begin;
   174  				break;
   175  			}
   176  			// Backoff.
   177  			if(try < desc->nthr) {
   178  				// nothing
   179  			} else if (try < 4*desc->nthr) {
   180  				me->nprocyield++;
   181  				runtime·procyield(20);
   182  			// If a caller asked not to wait for the others, exit now
   183  			// (assume that most work is already done at this point).
   184  			} else if (!desc->wait) {
   185  				if(!idle)
   186  					runtime·xadd(&desc->done, 1);
   187  				goto exit;
   188  			} else if (try < 6*desc->nthr) {
   189  				me->nosyield++;
   190  				runtime·osyield();
   191  			} else {
   192  				me->nsleep++;
   193  				runtime·usleep(1);
   194  			}
   195  		}
   196  	}
   197  exit:
   198  	runtime·xadd64(&desc->nsteal, me->nsteal);
   199  	runtime·xadd64(&desc->nstealcnt, me->nstealcnt);
   200  	runtime·xadd64(&desc->nprocyield, me->nprocyield);
   201  	runtime·xadd64(&desc->nosyield, me->nosyield);
   202  	runtime·xadd64(&desc->nsleep, me->nsleep);
   203  	me->nsteal = 0;
   204  	me->nstealcnt = 0;
   205  	me->nprocyield = 0;
   206  	me->nosyield = 0;
   207  	me->nsleep = 0;
   208  }
   209  
   210  // For testing from Go
   211  // func parforiters(desc *ParFor, tid uintptr) (uintptr, uintptr)
   212  void
   213  runtime·parforiters(ParFor *desc, uintptr tid, uintptr start, uintptr end)
   214  {
   215  	start = (uint32)desc->thr[tid].pos;
   216  	end = (uint32)(desc->thr[tid].pos>>32);
   217  	FLUSH(&start);
   218  	FLUSH(&end);
   219  }