github.com/xushiwei/go@v0.0.0-20130601165731-2b9d83f45bc9/src/pkg/runtime/os_linux.c (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "runtime.h"
     6  #include "defs_GOOS_GOARCH.h"
     7  #include "os_GOOS.h"
     8  #include "signal_unix.h"
     9  #include "stack.h"
    10  
    11  extern SigTab runtime·sigtab[];
    12  
    13  static Sigset sigset_none;
    14  static Sigset sigset_all = { ~(uint32)0, ~(uint32)0 };
    15  
    16  // Linux futex.
    17  //
    18  //	futexsleep(uint32 *addr, uint32 val)
    19  //	futexwakeup(uint32 *addr)
    20  //
    21  // Futexsleep atomically checks if *addr == val and if so, sleeps on addr.
    22  // Futexwakeup wakes up threads sleeping on addr.
    23  // Futexsleep is allowed to wake up spuriously.
    24  
    25  enum
    26  {
    27  	FUTEX_WAIT = 0,
    28  	FUTEX_WAKE = 1,
    29  };
    30  
    31  // Atomically,
    32  //	if(*addr == val) sleep
    33  // Might be woken up spuriously; that's allowed.
    34  // Don't sleep longer than ns; ns < 0 means forever.
    35  void
    36  runtime·futexsleep(uint32 *addr, uint32 val, int64 ns)
    37  {
    38  	Timespec ts, *tsp;
    39  	int64 secs;
    40  
    41  	if(ns < 0)
    42  		tsp = nil;
    43  	else {
    44  		secs = ns/1000000000LL;
    45  		// Avoid overflow
    46  		if(secs > 1LL<<30)
    47  			secs = 1LL<<30;
    48  		ts.tv_sec = secs;
    49  		ts.tv_nsec = ns%1000000000LL;
    50  		tsp = &ts;
    51  	}
    52  
    53  	// Some Linux kernels have a bug where futex of
    54  	// FUTEX_WAIT returns an internal error code
    55  	// as an errno.  Libpthread ignores the return value
    56  	// here, and so can we: as it says a few lines up,
    57  	// spurious wakeups are allowed.
    58  	runtime·futex(addr, FUTEX_WAIT, val, tsp, nil, 0);
    59  }
    60  
    61  // If any procs are sleeping on addr, wake up at most cnt.
    62  void
    63  runtime·futexwakeup(uint32 *addr, uint32 cnt)
    64  {
    65  	int64 ret;
    66  
    67  	ret = runtime·futex(addr, FUTEX_WAKE, cnt, nil, nil, 0);
    68  
    69  	if(ret >= 0)
    70  		return;
    71  
    72  	// I don't know that futex wakeup can return
    73  	// EAGAIN or EINTR, but if it does, it would be
    74  	// safe to loop and call futex again.
    75  	runtime·printf("futexwakeup addr=%p returned %D\n", addr, ret);
    76  	*(int32*)0x1006 = 0x1006;
    77  }
    78  
    79  extern runtime·sched_getaffinity(uintptr pid, uintptr len, uintptr *buf);
    80  static int32
    81  getproccount(void)
    82  {
    83  	uintptr buf[16], t;
    84  	int32 r, cnt, i;
    85  
    86  	cnt = 0;
    87  	r = runtime·sched_getaffinity(0, sizeof(buf), buf);
    88  	if(r > 0)
    89  	for(i = 0; i < r/sizeof(buf[0]); i++) {
    90  		t = buf[i];
    91  		t = t - ((t >> 1) & 0x5555555555555555ULL);
    92  		t = (t & 0x3333333333333333ULL) + ((t >> 2) & 0x3333333333333333ULL);
    93  		cnt += (int32)((((t + (t >> 4)) & 0xF0F0F0F0F0F0F0FULL) * 0x101010101010101ULL) >> 56);
    94  	}
    95  
    96  	return cnt ? cnt : 1;
    97  }
    98  
    99  // Clone, the Linux rfork.
   100  enum
   101  {
   102  	CLONE_VM = 0x100,
   103  	CLONE_FS = 0x200,
   104  	CLONE_FILES = 0x400,
   105  	CLONE_SIGHAND = 0x800,
   106  	CLONE_PTRACE = 0x2000,
   107  	CLONE_VFORK = 0x4000,
   108  	CLONE_PARENT = 0x8000,
   109  	CLONE_THREAD = 0x10000,
   110  	CLONE_NEWNS = 0x20000,
   111  	CLONE_SYSVSEM = 0x40000,
   112  	CLONE_SETTLS = 0x80000,
   113  	CLONE_PARENT_SETTID = 0x100000,
   114  	CLONE_CHILD_CLEARTID = 0x200000,
   115  	CLONE_UNTRACED = 0x800000,
   116  	CLONE_CHILD_SETTID = 0x1000000,
   117  	CLONE_STOPPED = 0x2000000,
   118  	CLONE_NEWUTS = 0x4000000,
   119  	CLONE_NEWIPC = 0x8000000,
   120  };
   121  
   122  void
   123  runtime·newosproc(M *mp, void *stk)
   124  {
   125  	int32 ret;
   126  	int32 flags;
   127  	Sigset oset;
   128  
   129  	/*
   130  	 * note: strace gets confused if we use CLONE_PTRACE here.
   131  	 */
   132  	flags = CLONE_VM	/* share memory */
   133  		| CLONE_FS	/* share cwd, etc */
   134  		| CLONE_FILES	/* share fd table */
   135  		| CLONE_SIGHAND	/* share sig handler table */
   136  		| CLONE_THREAD	/* revisit - okay for now */
   137  		;
   138  
   139  	mp->tls[0] = mp->id;	// so 386 asm can find it
   140  	if(0){
   141  		runtime·printf("newosproc stk=%p m=%p g=%p clone=%p id=%d/%d ostk=%p\n",
   142  			stk, mp, mp->g0, runtime·clone, mp->id, (int32)mp->tls[0], &mp);
   143  	}
   144  
   145  	// Disable signals during clone, so that the new thread starts
   146  	// with signals disabled.  It will enable them in minit.
   147  	runtime·rtsigprocmask(SIG_SETMASK, &sigset_all, &oset, sizeof oset);
   148  	ret = runtime·clone(flags, stk, mp, mp->g0, runtime·mstart);
   149  	runtime·rtsigprocmask(SIG_SETMASK, &oset, nil, sizeof oset);
   150  
   151  	if(ret < 0) {
   152  		runtime·printf("runtime: failed to create new OS thread (have %d already; errno=%d)\n", runtime·mcount(), -ret);
   153  		runtime·throw("runtime.newosproc");
   154  	}
   155  }
   156  
   157  void
   158  runtime·osinit(void)
   159  {
   160  	runtime·ncpu = getproccount();
   161  }
   162  
   163  // Random bytes initialized at startup.  These come
   164  // from the ELF AT_RANDOM auxiliary vector (vdso_linux_amd64.c).
   165  byte*	runtime·startup_random_data;
   166  uint32	runtime·startup_random_data_len;
   167  
   168  void
   169  runtime·get_random_data(byte **rnd, int32 *rnd_len)
   170  {
   171  	if(runtime·startup_random_data != nil) {
   172  		*rnd = runtime·startup_random_data;
   173  		*rnd_len = runtime·startup_random_data_len;
   174  	} else {
   175  		static byte urandom_data[HashRandomBytes];
   176  		int32 fd;
   177  		fd = runtime·open("/dev/urandom", 0 /* O_RDONLY */, 0);
   178  		if(runtime·read(fd, urandom_data, HashRandomBytes) == HashRandomBytes) {
   179  			*rnd = urandom_data;
   180  			*rnd_len = HashRandomBytes;
   181  		} else {
   182  			*rnd = nil;
   183  			*rnd_len = 0;
   184  		}
   185  		runtime·close(fd);
   186  	}
   187  }
   188  
   189  void
   190  runtime·goenvs(void)
   191  {
   192  	runtime·goenvs_unix();
   193  }
   194  
   195  // Called to initialize a new m (including the bootstrap m).
   196  // Called on the parent thread (main thread in case of bootstrap), can allocate memory.
   197  void
   198  runtime·mpreinit(M *mp)
   199  {
   200  	mp->gsignal = runtime·malg(32*1024);	// OS X wants >=8K, Linux >=2K
   201  }
   202  
   203  // Called to initialize a new m (including the bootstrap m).
   204  // Called on the new thread, can not allocate memory.
   205  void
   206  runtime·minit(void)
   207  {
   208  	// Initialize signal handling.
   209  	runtime·signalstack((byte*)m->gsignal->stackguard - StackGuard, 32*1024);
   210  	runtime·rtsigprocmask(SIG_SETMASK, &sigset_none, nil, sizeof(Sigset));
   211  }
   212  
   213  // Called from dropm to undo the effect of an minit.
   214  void
   215  runtime·unminit(void)
   216  {
   217  	runtime·signalstack(nil, 0);
   218  }
   219  
   220  void
   221  runtime·sigpanic(void)
   222  {
   223  	switch(g->sig) {
   224  	case SIGBUS:
   225  		if(g->sigcode0 == BUS_ADRERR && g->sigcode1 < 0x1000) {
   226  			if(g->sigpc == 0)
   227  				runtime·panicstring("call of nil func value");
   228  			runtime·panicstring("invalid memory address or nil pointer dereference");
   229  		}
   230  		runtime·printf("unexpected fault address %p\n", g->sigcode1);
   231  		runtime·throw("fault");
   232  	case SIGSEGV:
   233  		if((g->sigcode0 == 0 || g->sigcode0 == SEGV_MAPERR || g->sigcode0 == SEGV_ACCERR) && g->sigcode1 < 0x1000) {
   234  			if(g->sigpc == 0)
   235  				runtime·panicstring("call of nil func value");
   236  			runtime·panicstring("invalid memory address or nil pointer dereference");
   237  		}
   238  		runtime·printf("unexpected fault address %p\n", g->sigcode1);
   239  		runtime·throw("fault");
   240  	case SIGFPE:
   241  		switch(g->sigcode0) {
   242  		case FPE_INTDIV:
   243  			runtime·panicstring("integer divide by zero");
   244  		case FPE_INTOVF:
   245  			runtime·panicstring("integer overflow");
   246  		}
   247  		runtime·panicstring("floating point error");
   248  	}
   249  	runtime·panicstring(runtime·sigtab[g->sig].name);
   250  }
   251  
   252  uintptr
   253  runtime·memlimit(void)
   254  {
   255  	Rlimit rl;
   256  	extern byte text[], end[];
   257  	uintptr used;
   258  
   259  	if(runtime·getrlimit(RLIMIT_AS, &rl) != 0)
   260  		return 0;
   261  	if(rl.rlim_cur >= 0x7fffffff)
   262  		return 0;
   263  
   264  	// Estimate our VM footprint excluding the heap.
   265  	// Not an exact science: use size of binary plus
   266  	// some room for thread stacks.
   267  	used = end - text + (64<<20);
   268  	if(used >= rl.rlim_cur)
   269  		return 0;
   270  
   271  	// If there's not at least 16 MB left, we're probably
   272  	// not going to be able to do much.  Treat as no limit.
   273  	rl.rlim_cur -= used;
   274  	if(rl.rlim_cur < (16<<20))
   275  		return 0;
   276  
   277  	return rl.rlim_cur - used;
   278  }
   279  
   280  void
   281  runtime·setprof(bool on)
   282  {
   283  	USED(on);
   284  }
   285  
   286  static int8 badcallback[] = "runtime: cgo callback on thread not created by Go.\n";
   287  
   288  // This runs on a foreign stack, without an m or a g.  No stack split.
   289  #pragma textflag 7
   290  void
   291  runtime·badcallback(void)
   292  {
   293  	runtime·write(2, badcallback, sizeof badcallback - 1);
   294  }
   295  
   296  static int8 badsignal[] = "runtime: signal received on thread not created by Go: ";
   297  
   298  // This runs on a foreign stack, without an m or a g.  No stack split.
   299  #pragma textflag 7
   300  void
   301  runtime·badsignal(int32 sig)
   302  {
   303  	int32 len;
   304  
   305  	if (sig == SIGPROF) {
   306  		return;  // Ignore SIGPROFs intended for a non-Go thread.
   307  	}
   308  	runtime·write(2, badsignal, sizeof badsignal - 1);
   309  	if (0 <= sig && sig < NSIG) {
   310  		// Can't call findnull() because it will split stack.
   311  		for(len = 0; runtime·sigtab[sig].name[len]; len++)
   312  			;
   313  		runtime·write(2, runtime·sigtab[sig].name, len);
   314  	}
   315  	runtime·write(2, "\n", 1);
   316  	runtime·exit(1);
   317  }
   318  
   319  #ifdef GOARCH_386
   320  #define sa_handler k_sa_handler
   321  #endif
   322  
   323  /*
   324   * This assembler routine takes the args from registers, puts them on the stack,
   325   * and calls sighandler().
   326   */
   327  extern void runtime·sigtramp(void);
   328  extern void runtime·sigreturn(void);	// calls runtime·sigreturn
   329  
   330  void
   331  runtime·setsig(int32 i, GoSighandler *fn, bool restart)
   332  {
   333  	Sigaction sa;
   334  
   335  	runtime·memclr((byte*)&sa, sizeof sa);
   336  	sa.sa_flags = SA_ONSTACK | SA_SIGINFO | SA_RESTORER;
   337  	if(restart)
   338  		sa.sa_flags |= SA_RESTART;
   339  	sa.sa_mask = ~0ULL;
   340  	// TODO(adonovan): Linux manpage says "sa_restorer element is
   341  	// obsolete and should not be used".  Avoid it here, and test.
   342  	sa.sa_restorer = (void*)runtime·sigreturn;
   343  	if(fn == runtime·sighandler)
   344  		fn = (void*)runtime·sigtramp;
   345  	sa.sa_handler = fn;
   346  	if(runtime·rt_sigaction(i, &sa, nil, sizeof(sa.sa_mask)) != 0)
   347  		runtime·throw("rt_sigaction failure");
   348  }
   349  
   350  GoSighandler*
   351  runtime·getsig(int32 i)
   352  {
   353  	Sigaction sa;
   354  
   355  	runtime·memclr((byte*)&sa, sizeof sa);
   356  	if(runtime·rt_sigaction(i, nil, &sa, sizeof(sa.sa_mask)) != 0)
   357  		runtime·throw("rt_sigaction read failure");
   358  	if((void*)sa.sa_handler == runtime·sigtramp)
   359  		return runtime·sighandler;
   360  	return (void*)sa.sa_handler;
   361  }
   362  
   363  void
   364  runtime·signalstack(byte *p, int32 n)
   365  {
   366  	Sigaltstack st;
   367  
   368  	st.ss_sp = p;
   369  	st.ss_size = n;
   370  	st.ss_flags = 0;
   371  	if(p == nil)
   372  		st.ss_flags = SS_DISABLE;
   373  	runtime·sigaltstack(&st, nil);
   374  }