github.com/spotify/syslog-redirector-golang@v0.0.0-20140320174030-4859f03d829a/src/pkg/runtime/os_linux.c (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "runtime.h"
     6  #include "defs_GOOS_GOARCH.h"
     7  #include "os_GOOS.h"
     8  #include "signal_unix.h"
     9  #include "stack.h"
    10  #include "../../cmd/ld/textflag.h"
    11  
    12  extern SigTab runtime·sigtab[];
    13  
    14  static Sigset sigset_none;
    15  static Sigset sigset_all = { ~(uint32)0, ~(uint32)0 };
    16  
    17  // Linux futex.
    18  //
    19  //	futexsleep(uint32 *addr, uint32 val)
    20  //	futexwakeup(uint32 *addr)
    21  //
    22  // Futexsleep atomically checks if *addr == val and if so, sleeps on addr.
    23  // Futexwakeup wakes up threads sleeping on addr.
    24  // Futexsleep is allowed to wake up spuriously.
    25  
    26  enum
    27  {
    28  	FUTEX_WAIT = 0,
    29  	FUTEX_WAKE = 1,
    30  };
    31  
    32  // Atomically,
    33  //	if(*addr == val) sleep
    34  // Might be woken up spuriously; that's allowed.
    35  // Don't sleep longer than ns; ns < 0 means forever.
    36  #pragma textflag NOSPLIT
    37  void
    38  runtime·futexsleep(uint32 *addr, uint32 val, int64 ns)
    39  {
    40  	Timespec ts;
    41  
    42  	// Some Linux kernels have a bug where futex of
    43  	// FUTEX_WAIT returns an internal error code
    44  	// as an errno.  Libpthread ignores the return value
    45  	// here, and so can we: as it says a few lines up,
    46  	// spurious wakeups are allowed.
    47  
    48  	if(ns < 0) {
    49  		runtime·futex(addr, FUTEX_WAIT, val, nil, nil, 0);
    50  		return;
    51  	}
    52  	// NOTE: tv_nsec is int64 on amd64, so this assumes a little-endian system.
    53  	ts.tv_nsec = 0;
    54  	ts.tv_sec = runtime·timediv(ns, 1000000000LL, (int32*)&ts.tv_nsec);
    55  	runtime·futex(addr, FUTEX_WAIT, val, &ts, nil, 0);
    56  }
    57  
    58  // If any procs are sleeping on addr, wake up at most cnt.
    59  void
    60  runtime·futexwakeup(uint32 *addr, uint32 cnt)
    61  {
    62  	int64 ret;
    63  
    64  	ret = runtime·futex(addr, FUTEX_WAKE, cnt, nil, nil, 0);
    65  
    66  	if(ret >= 0)
    67  		return;
    68  
    69  	// I don't know that futex wakeup can return
    70  	// EAGAIN or EINTR, but if it does, it would be
    71  	// safe to loop and call futex again.
    72  	runtime·printf("futexwakeup addr=%p returned %D\n", addr, ret);
    73  	*(int32*)0x1006 = 0x1006;
    74  }
    75  
    76  extern runtime·sched_getaffinity(uintptr pid, uintptr len, uintptr *buf);
    77  static int32
    78  getproccount(void)
    79  {
    80  	uintptr buf[16], t;
    81  	int32 r, cnt, i;
    82  
    83  	cnt = 0;
    84  	r = runtime·sched_getaffinity(0, sizeof(buf), buf);
    85  	if(r > 0)
    86  	for(i = 0; i < r/sizeof(buf[0]); i++) {
    87  		t = buf[i];
    88  		t = t - ((t >> 1) & 0x5555555555555555ULL);
    89  		t = (t & 0x3333333333333333ULL) + ((t >> 2) & 0x3333333333333333ULL);
    90  		cnt += (int32)((((t + (t >> 4)) & 0xF0F0F0F0F0F0F0FULL) * 0x101010101010101ULL) >> 56);
    91  	}
    92  
    93  	return cnt ? cnt : 1;
    94  }
    95  
    96  // Clone, the Linux rfork.
    97  enum
    98  {
    99  	CLONE_VM = 0x100,
   100  	CLONE_FS = 0x200,
   101  	CLONE_FILES = 0x400,
   102  	CLONE_SIGHAND = 0x800,
   103  	CLONE_PTRACE = 0x2000,
   104  	CLONE_VFORK = 0x4000,
   105  	CLONE_PARENT = 0x8000,
   106  	CLONE_THREAD = 0x10000,
   107  	CLONE_NEWNS = 0x20000,
   108  	CLONE_SYSVSEM = 0x40000,
   109  	CLONE_SETTLS = 0x80000,
   110  	CLONE_PARENT_SETTID = 0x100000,
   111  	CLONE_CHILD_CLEARTID = 0x200000,
   112  	CLONE_UNTRACED = 0x800000,
   113  	CLONE_CHILD_SETTID = 0x1000000,
   114  	CLONE_STOPPED = 0x2000000,
   115  	CLONE_NEWUTS = 0x4000000,
   116  	CLONE_NEWIPC = 0x8000000,
   117  };
   118  
   119  void
   120  runtime·newosproc(M *mp, void *stk)
   121  {
   122  	int32 ret;
   123  	int32 flags;
   124  	Sigset oset;
   125  
   126  	/*
   127  	 * note: strace gets confused if we use CLONE_PTRACE here.
   128  	 */
   129  	flags = CLONE_VM	/* share memory */
   130  		| CLONE_FS	/* share cwd, etc */
   131  		| CLONE_FILES	/* share fd table */
   132  		| CLONE_SIGHAND	/* share sig handler table */
   133  		| CLONE_THREAD	/* revisit - okay for now */
   134  		;
   135  
   136  	mp->tls[0] = mp->id;	// so 386 asm can find it
   137  	if(0){
   138  		runtime·printf("newosproc stk=%p m=%p g=%p clone=%p id=%d/%d ostk=%p\n",
   139  			stk, mp, mp->g0, runtime·clone, mp->id, (int32)mp->tls[0], &mp);
   140  	}
   141  
   142  	// Disable signals during clone, so that the new thread starts
   143  	// with signals disabled.  It will enable them in minit.
   144  	runtime·rtsigprocmask(SIG_SETMASK, &sigset_all, &oset, sizeof oset);
   145  	ret = runtime·clone(flags, stk, mp, mp->g0, runtime·mstart);
   146  	runtime·rtsigprocmask(SIG_SETMASK, &oset, nil, sizeof oset);
   147  
   148  	if(ret < 0) {
   149  		runtime·printf("runtime: failed to create new OS thread (have %d already; errno=%d)\n", runtime·mcount(), -ret);
   150  		runtime·throw("runtime.newosproc");
   151  	}
   152  }
   153  
   154  void
   155  runtime·osinit(void)
   156  {
   157  	runtime·ncpu = getproccount();
   158  }
   159  
   160  // Random bytes initialized at startup.  These come
   161  // from the ELF AT_RANDOM auxiliary vector (vdso_linux_amd64.c).
   162  byte*	runtime·startup_random_data;
   163  uint32	runtime·startup_random_data_len;
   164  
   165  void
   166  runtime·get_random_data(byte **rnd, int32 *rnd_len)
   167  {
   168  	if(runtime·startup_random_data != nil) {
   169  		*rnd = runtime·startup_random_data;
   170  		*rnd_len = runtime·startup_random_data_len;
   171  	} else {
   172  		#pragma dataflag NOPTR
   173  		static byte urandom_data[HashRandomBytes];
   174  		int32 fd;
   175  		fd = runtime·open("/dev/urandom", 0 /* O_RDONLY */, 0);
   176  		if(runtime·read(fd, urandom_data, HashRandomBytes) == HashRandomBytes) {
   177  			*rnd = urandom_data;
   178  			*rnd_len = HashRandomBytes;
   179  		} else {
   180  			*rnd = nil;
   181  			*rnd_len = 0;
   182  		}
   183  		runtime·close(fd);
   184  	}
   185  }
   186  
   187  void
   188  runtime·goenvs(void)
   189  {
   190  	runtime·goenvs_unix();
   191  }
   192  
   193  // Called to initialize a new m (including the bootstrap m).
   194  // Called on the parent thread (main thread in case of bootstrap), can allocate memory.
   195  void
   196  runtime·mpreinit(M *mp)
   197  {
   198  	mp->gsignal = runtime·malg(32*1024);	// OS X wants >=8K, Linux >=2K
   199  }
   200  
   201  // Called to initialize a new m (including the bootstrap m).
   202  // Called on the new thread, can not allocate memory.
   203  void
   204  runtime·minit(void)
   205  {
   206  	// Initialize signal handling.
   207  	runtime·signalstack((byte*)m->gsignal->stackguard - StackGuard, 32*1024);
   208  	runtime·rtsigprocmask(SIG_SETMASK, &sigset_none, nil, sizeof(Sigset));
   209  }
   210  
   211  // Called from dropm to undo the effect of an minit.
   212  void
   213  runtime·unminit(void)
   214  {
   215  	runtime·signalstack(nil, 0);
   216  }
   217  
   218  void
   219  runtime·sigpanic(void)
   220  {
   221  	switch(g->sig) {
   222  	case SIGBUS:
   223  		if(g->sigcode0 == BUS_ADRERR && g->sigcode1 < 0x1000) {
   224  			if(g->sigpc == 0)
   225  				runtime·panicstring("call of nil func value");
   226  			runtime·panicstring("invalid memory address or nil pointer dereference");
   227  		}
   228  		runtime·printf("unexpected fault address %p\n", g->sigcode1);
   229  		runtime·throw("fault");
   230  	case SIGSEGV:
   231  		if((g->sigcode0 == 0 || g->sigcode0 == SEGV_MAPERR || g->sigcode0 == SEGV_ACCERR) && g->sigcode1 < 0x1000) {
   232  			if(g->sigpc == 0)
   233  				runtime·panicstring("call of nil func value");
   234  			runtime·panicstring("invalid memory address or nil pointer dereference");
   235  		}
   236  		runtime·printf("unexpected fault address %p\n", g->sigcode1);
   237  		runtime·throw("fault");
   238  	case SIGFPE:
   239  		switch(g->sigcode0) {
   240  		case FPE_INTDIV:
   241  			runtime·panicstring("integer divide by zero");
   242  		case FPE_INTOVF:
   243  			runtime·panicstring("integer overflow");
   244  		}
   245  		runtime·panicstring("floating point error");
   246  	}
   247  	runtime·panicstring(runtime·sigtab[g->sig].name);
   248  }
   249  
   250  uintptr
   251  runtime·memlimit(void)
   252  {
   253  	Rlimit rl;
   254  	extern byte text[], end[];
   255  	uintptr used;
   256  
   257  	if(runtime·getrlimit(RLIMIT_AS, &rl) != 0)
   258  		return 0;
   259  	if(rl.rlim_cur >= 0x7fffffff)
   260  		return 0;
   261  
   262  	// Estimate our VM footprint excluding the heap.
   263  	// Not an exact science: use size of binary plus
   264  	// some room for thread stacks.
   265  	used = end - text + (64<<20);
   266  	if(used >= rl.rlim_cur)
   267  		return 0;
   268  
   269  	// If there's not at least 16 MB left, we're probably
   270  	// not going to be able to do much.  Treat as no limit.
   271  	rl.rlim_cur -= used;
   272  	if(rl.rlim_cur < (16<<20))
   273  		return 0;
   274  
   275  	return rl.rlim_cur - used;
   276  }
   277  
   278  #ifdef GOARCH_386
   279  #define sa_handler k_sa_handler
   280  #endif
   281  
   282  /*
   283   * This assembler routine takes the args from registers, puts them on the stack,
   284   * and calls sighandler().
   285   */
   286  extern void runtime·sigtramp(void);
   287  extern void runtime·sigreturn(void);	// calls runtime·sigreturn
   288  
   289  void
   290  runtime·setsig(int32 i, GoSighandler *fn, bool restart)
   291  {
   292  	Sigaction sa;
   293  
   294  	runtime·memclr((byte*)&sa, sizeof sa);
   295  	sa.sa_flags = SA_ONSTACK | SA_SIGINFO | SA_RESTORER;
   296  	if(restart)
   297  		sa.sa_flags |= SA_RESTART;
   298  	sa.sa_mask = ~0ULL;
   299  	// TODO(adonovan): Linux manpage says "sa_restorer element is
   300  	// obsolete and should not be used".  Avoid it here, and test.
   301  	sa.sa_restorer = (void*)runtime·sigreturn;
   302  	if(fn == runtime·sighandler)
   303  		fn = (void*)runtime·sigtramp;
   304  	sa.sa_handler = fn;
   305  	if(runtime·rt_sigaction(i, &sa, nil, sizeof(sa.sa_mask)) != 0)
   306  		runtime·throw("rt_sigaction failure");
   307  }
   308  
   309  GoSighandler*
   310  runtime·getsig(int32 i)
   311  {
   312  	Sigaction sa;
   313  
   314  	runtime·memclr((byte*)&sa, sizeof sa);
   315  	if(runtime·rt_sigaction(i, nil, &sa, sizeof(sa.sa_mask)) != 0)
   316  		runtime·throw("rt_sigaction read failure");
   317  	if((void*)sa.sa_handler == runtime·sigtramp)
   318  		return runtime·sighandler;
   319  	return (void*)sa.sa_handler;
   320  }
   321  
   322  void
   323  runtime·signalstack(byte *p, int32 n)
   324  {
   325  	Sigaltstack st;
   326  
   327  	st.ss_sp = p;
   328  	st.ss_size = n;
   329  	st.ss_flags = 0;
   330  	if(p == nil)
   331  		st.ss_flags = SS_DISABLE;
   332  	runtime·sigaltstack(&st, nil);
   333  }