github.com/spotify/syslog-redirector-golang@v0.0.0-20140320174030-4859f03d829a/src/pkg/runtime/os_linux.c (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "runtime.h" 6 #include "defs_GOOS_GOARCH.h" 7 #include "os_GOOS.h" 8 #include "signal_unix.h" 9 #include "stack.h" 10 #include "../../cmd/ld/textflag.h" 11 12 extern SigTab runtime·sigtab[]; 13 14 static Sigset sigset_none; 15 static Sigset sigset_all = { ~(uint32)0, ~(uint32)0 }; 16 17 // Linux futex. 18 // 19 // futexsleep(uint32 *addr, uint32 val) 20 // futexwakeup(uint32 *addr) 21 // 22 // Futexsleep atomically checks if *addr == val and if so, sleeps on addr. 23 // Futexwakeup wakes up threads sleeping on addr. 24 // Futexsleep is allowed to wake up spuriously. 25 26 enum 27 { 28 FUTEX_WAIT = 0, 29 FUTEX_WAKE = 1, 30 }; 31 32 // Atomically, 33 // if(*addr == val) sleep 34 // Might be woken up spuriously; that's allowed. 35 // Don't sleep longer than ns; ns < 0 means forever. 36 #pragma textflag NOSPLIT 37 void 38 runtime·futexsleep(uint32 *addr, uint32 val, int64 ns) 39 { 40 Timespec ts; 41 42 // Some Linux kernels have a bug where futex of 43 // FUTEX_WAIT returns an internal error code 44 // as an errno. Libpthread ignores the return value 45 // here, and so can we: as it says a few lines up, 46 // spurious wakeups are allowed. 47 48 if(ns < 0) { 49 runtime·futex(addr, FUTEX_WAIT, val, nil, nil, 0); 50 return; 51 } 52 // NOTE: tv_nsec is int64 on amd64, so this assumes a little-endian system. 53 ts.tv_nsec = 0; 54 ts.tv_sec = runtime·timediv(ns, 1000000000LL, (int32*)&ts.tv_nsec); 55 runtime·futex(addr, FUTEX_WAIT, val, &ts, nil, 0); 56 } 57 58 // If any procs are sleeping on addr, wake up at most cnt. 59 void 60 runtime·futexwakeup(uint32 *addr, uint32 cnt) 61 { 62 int64 ret; 63 64 ret = runtime·futex(addr, FUTEX_WAKE, cnt, nil, nil, 0); 65 66 if(ret >= 0) 67 return; 68 69 // I don't know that futex wakeup can return 70 // EAGAIN or EINTR, but if it does, it would be 71 // safe to loop and call futex again. 72 runtime·printf("futexwakeup addr=%p returned %D\n", addr, ret); 73 *(int32*)0x1006 = 0x1006; 74 } 75 76 extern runtime·sched_getaffinity(uintptr pid, uintptr len, uintptr *buf); 77 static int32 78 getproccount(void) 79 { 80 uintptr buf[16], t; 81 int32 r, cnt, i; 82 83 cnt = 0; 84 r = runtime·sched_getaffinity(0, sizeof(buf), buf); 85 if(r > 0) 86 for(i = 0; i < r/sizeof(buf[0]); i++) { 87 t = buf[i]; 88 t = t - ((t >> 1) & 0x5555555555555555ULL); 89 t = (t & 0x3333333333333333ULL) + ((t >> 2) & 0x3333333333333333ULL); 90 cnt += (int32)((((t + (t >> 4)) & 0xF0F0F0F0F0F0F0FULL) * 0x101010101010101ULL) >> 56); 91 } 92 93 return cnt ? cnt : 1; 94 } 95 96 // Clone, the Linux rfork. 97 enum 98 { 99 CLONE_VM = 0x100, 100 CLONE_FS = 0x200, 101 CLONE_FILES = 0x400, 102 CLONE_SIGHAND = 0x800, 103 CLONE_PTRACE = 0x2000, 104 CLONE_VFORK = 0x4000, 105 CLONE_PARENT = 0x8000, 106 CLONE_THREAD = 0x10000, 107 CLONE_NEWNS = 0x20000, 108 CLONE_SYSVSEM = 0x40000, 109 CLONE_SETTLS = 0x80000, 110 CLONE_PARENT_SETTID = 0x100000, 111 CLONE_CHILD_CLEARTID = 0x200000, 112 CLONE_UNTRACED = 0x800000, 113 CLONE_CHILD_SETTID = 0x1000000, 114 CLONE_STOPPED = 0x2000000, 115 CLONE_NEWUTS = 0x4000000, 116 CLONE_NEWIPC = 0x8000000, 117 }; 118 119 void 120 runtime·newosproc(M *mp, void *stk) 121 { 122 int32 ret; 123 int32 flags; 124 Sigset oset; 125 126 /* 127 * note: strace gets confused if we use CLONE_PTRACE here. 128 */ 129 flags = CLONE_VM /* share memory */ 130 | CLONE_FS /* share cwd, etc */ 131 | CLONE_FILES /* share fd table */ 132 | CLONE_SIGHAND /* share sig handler table */ 133 | CLONE_THREAD /* revisit - okay for now */ 134 ; 135 136 mp->tls[0] = mp->id; // so 386 asm can find it 137 if(0){ 138 runtime·printf("newosproc stk=%p m=%p g=%p clone=%p id=%d/%d ostk=%p\n", 139 stk, mp, mp->g0, runtime·clone, mp->id, (int32)mp->tls[0], &mp); 140 } 141 142 // Disable signals during clone, so that the new thread starts 143 // with signals disabled. It will enable them in minit. 144 runtime·rtsigprocmask(SIG_SETMASK, &sigset_all, &oset, sizeof oset); 145 ret = runtime·clone(flags, stk, mp, mp->g0, runtime·mstart); 146 runtime·rtsigprocmask(SIG_SETMASK, &oset, nil, sizeof oset); 147 148 if(ret < 0) { 149 runtime·printf("runtime: failed to create new OS thread (have %d already; errno=%d)\n", runtime·mcount(), -ret); 150 runtime·throw("runtime.newosproc"); 151 } 152 } 153 154 void 155 runtime·osinit(void) 156 { 157 runtime·ncpu = getproccount(); 158 } 159 160 // Random bytes initialized at startup. These come 161 // from the ELF AT_RANDOM auxiliary vector (vdso_linux_amd64.c). 162 byte* runtime·startup_random_data; 163 uint32 runtime·startup_random_data_len; 164 165 void 166 runtime·get_random_data(byte **rnd, int32 *rnd_len) 167 { 168 if(runtime·startup_random_data != nil) { 169 *rnd = runtime·startup_random_data; 170 *rnd_len = runtime·startup_random_data_len; 171 } else { 172 #pragma dataflag NOPTR 173 static byte urandom_data[HashRandomBytes]; 174 int32 fd; 175 fd = runtime·open("/dev/urandom", 0 /* O_RDONLY */, 0); 176 if(runtime·read(fd, urandom_data, HashRandomBytes) == HashRandomBytes) { 177 *rnd = urandom_data; 178 *rnd_len = HashRandomBytes; 179 } else { 180 *rnd = nil; 181 *rnd_len = 0; 182 } 183 runtime·close(fd); 184 } 185 } 186 187 void 188 runtime·goenvs(void) 189 { 190 runtime·goenvs_unix(); 191 } 192 193 // Called to initialize a new m (including the bootstrap m). 194 // Called on the parent thread (main thread in case of bootstrap), can allocate memory. 195 void 196 runtime·mpreinit(M *mp) 197 { 198 mp->gsignal = runtime·malg(32*1024); // OS X wants >=8K, Linux >=2K 199 } 200 201 // Called to initialize a new m (including the bootstrap m). 202 // Called on the new thread, can not allocate memory. 203 void 204 runtime·minit(void) 205 { 206 // Initialize signal handling. 207 runtime·signalstack((byte*)m->gsignal->stackguard - StackGuard, 32*1024); 208 runtime·rtsigprocmask(SIG_SETMASK, &sigset_none, nil, sizeof(Sigset)); 209 } 210 211 // Called from dropm to undo the effect of an minit. 212 void 213 runtime·unminit(void) 214 { 215 runtime·signalstack(nil, 0); 216 } 217 218 void 219 runtime·sigpanic(void) 220 { 221 switch(g->sig) { 222 case SIGBUS: 223 if(g->sigcode0 == BUS_ADRERR && g->sigcode1 < 0x1000) { 224 if(g->sigpc == 0) 225 runtime·panicstring("call of nil func value"); 226 runtime·panicstring("invalid memory address or nil pointer dereference"); 227 } 228 runtime·printf("unexpected fault address %p\n", g->sigcode1); 229 runtime·throw("fault"); 230 case SIGSEGV: 231 if((g->sigcode0 == 0 || g->sigcode0 == SEGV_MAPERR || g->sigcode0 == SEGV_ACCERR) && g->sigcode1 < 0x1000) { 232 if(g->sigpc == 0) 233 runtime·panicstring("call of nil func value"); 234 runtime·panicstring("invalid memory address or nil pointer dereference"); 235 } 236 runtime·printf("unexpected fault address %p\n", g->sigcode1); 237 runtime·throw("fault"); 238 case SIGFPE: 239 switch(g->sigcode0) { 240 case FPE_INTDIV: 241 runtime·panicstring("integer divide by zero"); 242 case FPE_INTOVF: 243 runtime·panicstring("integer overflow"); 244 } 245 runtime·panicstring("floating point error"); 246 } 247 runtime·panicstring(runtime·sigtab[g->sig].name); 248 } 249 250 uintptr 251 runtime·memlimit(void) 252 { 253 Rlimit rl; 254 extern byte text[], end[]; 255 uintptr used; 256 257 if(runtime·getrlimit(RLIMIT_AS, &rl) != 0) 258 return 0; 259 if(rl.rlim_cur >= 0x7fffffff) 260 return 0; 261 262 // Estimate our VM footprint excluding the heap. 263 // Not an exact science: use size of binary plus 264 // some room for thread stacks. 265 used = end - text + (64<<20); 266 if(used >= rl.rlim_cur) 267 return 0; 268 269 // If there's not at least 16 MB left, we're probably 270 // not going to be able to do much. Treat as no limit. 271 rl.rlim_cur -= used; 272 if(rl.rlim_cur < (16<<20)) 273 return 0; 274 275 return rl.rlim_cur - used; 276 } 277 278 #ifdef GOARCH_386 279 #define sa_handler k_sa_handler 280 #endif 281 282 /* 283 * This assembler routine takes the args from registers, puts them on the stack, 284 * and calls sighandler(). 285 */ 286 extern void runtime·sigtramp(void); 287 extern void runtime·sigreturn(void); // calls runtime·sigreturn 288 289 void 290 runtime·setsig(int32 i, GoSighandler *fn, bool restart) 291 { 292 Sigaction sa; 293 294 runtime·memclr((byte*)&sa, sizeof sa); 295 sa.sa_flags = SA_ONSTACK | SA_SIGINFO | SA_RESTORER; 296 if(restart) 297 sa.sa_flags |= SA_RESTART; 298 sa.sa_mask = ~0ULL; 299 // TODO(adonovan): Linux manpage says "sa_restorer element is 300 // obsolete and should not be used". Avoid it here, and test. 301 sa.sa_restorer = (void*)runtime·sigreturn; 302 if(fn == runtime·sighandler) 303 fn = (void*)runtime·sigtramp; 304 sa.sa_handler = fn; 305 if(runtime·rt_sigaction(i, &sa, nil, sizeof(sa.sa_mask)) != 0) 306 runtime·throw("rt_sigaction failure"); 307 } 308 309 GoSighandler* 310 runtime·getsig(int32 i) 311 { 312 Sigaction sa; 313 314 runtime·memclr((byte*)&sa, sizeof sa); 315 if(runtime·rt_sigaction(i, nil, &sa, sizeof(sa.sa_mask)) != 0) 316 runtime·throw("rt_sigaction read failure"); 317 if((void*)sa.sa_handler == runtime·sigtramp) 318 return runtime·sighandler; 319 return (void*)sa.sa_handler; 320 } 321 322 void 323 runtime·signalstack(byte *p, int32 n) 324 { 325 Sigaltstack st; 326 327 st.ss_sp = p; 328 st.ss_size = n; 329 st.ss_flags = 0; 330 if(p == nil) 331 st.ss_flags = SS_DISABLE; 332 runtime·sigaltstack(&st, nil); 333 }