github.com/moontrade/nogc@v0.1.7/collections/wormhole/lib.h (about) 1 /* 2 * Copyright (c) 2016--2021 Wu, Xingbo <wuxb45@gmail.com> 3 * 4 * All rights reserved. No warranty, explicit or implicit, provided. 5 */ 6 #pragma once 7 8 // includes {{{ 9 // C headers 10 #include <errno.h> 11 #include <inttypes.h> 12 #include <math.h> 13 #include <stdbool.h> 14 #include <stddef.h> 15 #include <stdio.h> 16 #include <stdlib.h> 17 #include <string.h> 18 #include <assert.h> 19 20 // POSIX headers 21 #include <fcntl.h> 22 #include <pthread.h> 23 #include <unistd.h> 24 25 // Linux headers 26 #include <sys/mman.h> 27 #include <sys/resource.h> 28 #include <sys/stat.h> 29 #include <sys/types.h> 30 31 // SIMD 32 #if defined(__x86_64__) 33 #include <x86intrin.h> 34 #elif defined(__aarch64__) 35 #include <arm_acle.h> 36 #include <arm_neon.h> 37 #endif 38 // }}} includes 39 40 #ifdef __cplusplus 41 extern "C" { 42 #endif 43 44 // types {{{ 45 typedef char s8; 46 typedef short s16; 47 typedef int s32; 48 typedef long s64; 49 typedef __int128_t s128; 50 static_assert(sizeof(s8) == 1, "sizeof(s8)"); 51 static_assert(sizeof(s16) == 2, "sizeof(s16)"); 52 static_assert(sizeof(s32) == 4, "sizeof(s32)"); 53 static_assert(sizeof(s64) == 8, "sizeof(s64)"); 54 static_assert(sizeof(s128) == 16, "sizeof(s128)"); 55 56 typedef unsigned char u8; 57 typedef unsigned short u16; 58 typedef unsigned int u32; 59 typedef unsigned long u64; 60 typedef __uint128_t u128; 61 static_assert(sizeof(u8) == 1, "sizeof(u8)"); 62 static_assert(sizeof(u16) == 2, "sizeof(u16)"); 63 static_assert(sizeof(u32) == 4, "sizeof(u32)"); 64 static_assert(sizeof(u64) == 8, "sizeof(u64)"); 65 static_assert(sizeof(u128) == 16, "sizeof(u128)"); 66 67 #if defined(__x86_64__) 68 typedef __m128i m128; 69 #if defined(__AVX2__) 70 typedef __m256i m256; 71 #endif // __AVX2__ 72 #if defined(__AVX512F__) 73 typedef __m512i m512; 74 #endif // __AVX512F__ 75 #elif defined(__aarch64__) 76 typedef uint8x16_t m128; 77 #else 78 #error Need x86_64 or AArch64. 79 #endif 80 // }}} types 81 82 // defs {{{ 83 #define likely(____x____) __builtin_expect(____x____, 1) 84 #define unlikely(____x____) __builtin_expect(____x____, 0) 85 86 // ansi colors 87 // 3X:fg; 4X:bg; 9X:light fg; 10X:light bg; 88 // X can be one of the following colors: 89 // 0:black; 1:red; 2:green; 3:yellow; 90 // 4:blue; 5:magenta; 6:cyan; 7:white; 91 #define TERMCLR(____code____) "\x1b[" #____code____ "m" 92 // }}} defs 93 94 // const {{{ 95 #define PGSZ ((4096lu)) 96 // }}} const 97 98 // math {{{ 99 extern u64 100 mhash64(const u64 v); 101 102 extern u32 103 mhash32(const u32 v); 104 105 extern u64 106 gcd64(u64 a, u64 b); 107 // }}} math 108 109 // random {{{ 110 extern u64 111 random_u64(void); 112 113 extern void 114 srandom_u64(const u64 seed); 115 116 extern double 117 random_double(void); 118 // }}} random 119 120 // timing {{{ 121 extern u64 122 time_nsec(void); 123 124 extern double 125 time_sec(void); 126 127 extern u64 128 time_diff_nsec(const u64 last); 129 130 extern double 131 time_diff_sec(const double last); 132 133 extern void 134 time_stamp(char * str, const size_t size); 135 136 extern void 137 time_stamp2(char * str, const size_t size); 138 // }}} timing 139 140 // cpucache {{{ 141 extern void 142 cpu_pause(void); 143 144 extern void 145 cpu_mfence(void); 146 147 extern void 148 cpu_cfence(void); 149 150 extern void 151 cpu_prefetch0(const void * const ptr); 152 153 extern void 154 cpu_prefetch1(const void * const ptr); 155 156 extern void 157 cpu_prefetch2(const void * const ptr); 158 159 extern void 160 cpu_prefetch3(const void * const ptr); 161 162 extern void 163 cpu_prefetchw(const void * const ptr); 164 // }}} cpucache 165 166 // crc32c {{{ 167 extern u32 168 crc32c_u8(const u32 crc, const u8 v); 169 170 extern u32 171 crc32c_u16(const u32 crc, const u16 v); 172 173 extern u32 174 crc32c_u32(const u32 crc, const u32 v); 175 176 extern u32 177 crc32c_u64(const u32 crc, const u64 v); 178 179 // 1 <= nr <= 3 180 extern u32 181 crc32c_inc_123(const u8 * buf, u32 nr, u32 crc); 182 183 // nr % 4 == 0 184 extern u32 185 crc32c_inc_x4(const u8 * buf, u32 nr, u32 crc); 186 187 extern u32 188 crc32c_inc(const u8 * buf, u32 nr, u32 crc); 189 // }}} crc32c 190 191 // debug {{{ 192 extern void 193 debug_break(void); 194 195 extern void 196 debug_backtrace(void); 197 198 extern void 199 watch_u64_usr1(u64 * const ptr); 200 201 #ifndef NDEBUG 202 extern void 203 debug_assert(const bool v); 204 #else 205 #define debug_assert(expr) ((void)0) 206 #endif 207 208 __attribute__((noreturn)) 209 extern void 210 debug_die(void); 211 212 __attribute__((noreturn)) 213 extern void 214 debug_die_perror(void); 215 216 extern void 217 debug_dump_maps(FILE * const out); 218 219 extern bool 220 debug_perf_switch(void); 221 // }}} debug 222 223 // mm {{{ 224 #ifdef ALLOCFAIL 225 extern bool 226 alloc_fail(void); 227 #endif 228 229 extern void * 230 xalloc(const size_t align, const size_t size); 231 232 extern void * 233 yalloc(const size_t size); 234 235 extern void ** 236 malloc_2d(const size_t nr, const size_t size); 237 238 extern void ** 239 calloc_2d(const size_t nr, const size_t size); 240 241 extern void 242 pages_unmap(void * const ptr, const size_t size); 243 244 extern void 245 pages_lock(void * const ptr, const size_t size); 246 247 /* hugepages */ 248 // force posix allocators: -DVALGRIND_MEMCHECK 249 extern void * 250 pages_alloc_4kb(const size_t nr_4kb); 251 252 extern void * 253 pages_alloc_2mb(const size_t nr_2mb); 254 255 extern void * 256 pages_alloc_1gb(const size_t nr_1gb); 257 258 extern void * 259 pages_alloc_best(const size_t size, const bool try_1gb, u64 * const size_out); 260 // }}} mm 261 262 // process/thread {{{ 263 extern void 264 thread_get_name(const pthread_t pt, char * const name, const size_t len); 265 266 extern void 267 thread_set_name(const pthread_t pt, const char * const name); 268 269 extern long 270 process_get_rss(void); 271 272 extern u32 273 process_affinity_count(void); 274 275 extern u32 276 process_getaffinity_list(const u32 max, u32 * const cores); 277 278 extern void 279 thread_setaffinity_list(const u32 nr, const u32 * const list); 280 281 extern void 282 thread_pin(const u32 cpu); 283 284 extern u64 285 process_cpu_time_usec(void); 286 287 // if args == true, argx is void ** 288 // if args == false, argx is void * 289 extern u64 290 thread_fork_join(u32 nr, void *(*func) (void *), const bool args, void * const argx); 291 292 extern int 293 thread_create_at(const u32 cpu, pthread_t * const thread, void *(*start_routine) (void *), void * const arg); 294 // }}} process/thread 295 296 // locking {{{ 297 typedef union { 298 u32 opaque; 299 } spinlock; 300 301 extern void 302 spinlock_init(spinlock * const lock); 303 304 extern void 305 spinlock_lock(spinlock * const lock); 306 307 extern bool 308 spinlock_trylock(spinlock * const lock); 309 310 extern void 311 spinlock_unlock(spinlock * const lock); 312 313 typedef union { 314 u32 opaque; 315 } rwlock; 316 317 extern void 318 rwlock_init(rwlock * const lock); 319 320 extern bool 321 rwlock_trylock_read(rwlock * const lock); 322 323 // low-priority reader-lock; use with trylock_write_hp 324 extern bool 325 rwlock_trylock_read_lp(rwlock * const lock); 326 327 extern bool 328 rwlock_trylock_read_nr(rwlock * const lock, u16 nr); 329 330 extern void 331 rwlock_lock_read(rwlock * const lock); 332 333 extern void 334 rwlock_unlock_read(rwlock * const lock); 335 336 extern bool 337 rwlock_trylock_write(rwlock * const lock); 338 339 extern bool 340 rwlock_trylock_write_nr(rwlock * const lock, u16 nr); 341 342 extern void 343 rwlock_lock_write(rwlock * const lock); 344 345 // writer has higher priority; new readers are blocked 346 extern bool 347 rwlock_trylock_write_hp(rwlock * const lock); 348 349 extern bool 350 rwlock_trylock_write_hp_nr(rwlock * const lock, u16 nr); 351 352 extern void 353 rwlock_lock_write_hp(rwlock * const lock); 354 355 extern void 356 rwlock_unlock_write(rwlock * const lock); 357 358 extern void 359 rwlock_write_to_read(rwlock * const lock); 360 361 typedef union { 362 u64 opqaue[8]; 363 } mutex; 364 365 extern void 366 mutex_init(mutex * const lock); 367 368 extern void 369 mutex_lock(mutex * const lock); 370 371 extern bool 372 mutex_trylock(mutex * const lock); 373 374 extern void 375 mutex_unlock(mutex * const lock); 376 377 extern void 378 mutex_deinit(mutex * const lock); 379 // }}} locking 380 381 // coroutine {{{ 382 extern u64 co_switch_stack(u64 * const saversp, const u64 newrsp, const u64 retval); 383 384 struct co; 385 386 extern struct co * 387 co_create(const u64 stacksize, void * func, void * priv, u64 * const host); 388 389 extern void 390 co_reuse(struct co * const co, void * func, void * priv, u64 * const host); 391 392 extern struct co * 393 co_fork(void * func, void * priv); 394 395 extern void * 396 co_priv(void); 397 398 extern u64 399 co_enter(struct co * const to, const u64 retval); 400 401 extern u64 402 co_switch_to(struct co * const to, const u64 retval); 403 404 extern u64 405 co_back(const u64 retval); 406 407 extern void 408 co_exit(const u64 retval); 409 410 extern bool 411 co_valid(struct co * const co); 412 413 extern struct co * 414 co_self(void); 415 416 extern void 417 co_destroy(struct co * const co); 418 419 struct corr; 420 421 extern struct corr * 422 corr_create(const u64 stacksize, void * func, void * priv, u64 * const host); 423 424 extern struct corr * 425 corr_link(const u64 stacksize, void * func, void * priv, struct corr * const prev); 426 427 extern void 428 corr_reuse(struct corr * const co, void * func, void * priv, u64 * const host); 429 430 extern void 431 corr_relink(struct corr * const co, void * func, void * priv, struct corr * const prev); 432 433 extern void 434 corr_enter(struct corr * const co); 435 436 extern void 437 corr_yield(void); 438 439 extern void 440 corr_exit(void); 441 442 extern void 443 corr_destroy(struct corr * const co); 444 // }}} coroutine 445 446 // bits {{{ 447 extern u32 448 bits_reverse_u32(const u32 v); 449 450 extern u64 451 bits_reverse_u64(const u64 v); 452 453 extern u64 454 bits_rotl_u64(const u64 v, const u8 n); 455 456 extern u64 457 bits_rotr_u64(const u64 v, const u8 n); 458 459 extern u32 460 bits_rotl_u32(const u32 v, const u8 n); 461 462 extern u32 463 bits_rotr_u32(const u32 v, const u8 n); 464 465 extern u64 466 bits_p2_up_u64(const u64 v); 467 468 extern u32 469 bits_p2_up_u32(const u32 v); 470 471 extern u64 472 bits_p2_down_u64(const u64 v); 473 474 extern u32 475 bits_p2_down_u32(const u32 v); 476 477 extern u64 478 bits_round_up(const u64 v, const u8 power); 479 480 extern u64 481 bits_round_up_a(const u64 v, const u64 a); 482 483 extern u64 484 bits_round_down(const u64 v, const u8 power); 485 486 extern u64 487 bits_round_down_a(const u64 v, const u64 a); 488 // }}} bits 489 490 // vi128 {{{ 491 extern u32 492 vi128_estimate_u32(const u32 v); 493 494 extern u8 * 495 vi128_encode_u32(u8 * dst, u32 v); 496 497 extern const u8 * 498 vi128_decode_u32(const u8 * src, u32 * const out); 499 500 extern u32 501 vi128_estimate_u64(const u64 v); 502 503 extern u8 * 504 vi128_encode_u64(u8 * dst, u64 v); 505 506 extern const u8 * 507 vi128_decode_u64(const u8 * src, u64 * const out); 508 // }}} vi128 509 510 // misc {{{ 511 // TODO: only works on little endian? 512 struct entry13 { // what a beautiful name 513 union { 514 u16 e1; 515 struct { // easy for debugging 516 u64 e1_64:16; 517 u64 e3:48; 518 }; 519 u64 v64; 520 void * ptr; 521 }; 522 }; 523 524 static_assert(sizeof(struct entry13) == 8, "sizeof(entry13) != 8"); 525 526 // directly access read .e1 and .e3 527 // directly write .e1 528 // use entry13_update() to update the entire entry 529 530 extern struct entry13 531 entry13(const u16 e1, const u64 e3); 532 533 extern void 534 entry13_update_e3(struct entry13 * const e, const u64 e3); 535 536 extern void * 537 u64_to_ptr(const u64 v); 538 539 extern u64 540 ptr_to_u64(const void * const ptr); 541 542 extern size_t 543 m_usable_size(void * const ptr); 544 545 extern size_t 546 fdsize(const int fd); 547 548 extern u32 549 memlcp(const u8 * const p1, const u8 * const p2, const u32 max); 550 551 __attribute__ ((format (printf, 2, 3))) 552 extern void 553 logger_printf(const int fd, const char * const fmt, ...); 554 // }}} misc 555 556 // slab {{{ 557 struct slab; 558 559 extern struct slab * 560 slab_create(const u64 obj_size, const u64 blk_size); 561 562 extern bool 563 slab_reserve_unsafe(struct slab * const slab, const u64 nr); 564 565 extern void * 566 slab_alloc_unsafe(struct slab * const slab); 567 568 extern void * 569 slab_alloc_safe(struct slab * const slab); 570 571 extern void 572 slab_free_unsafe(struct slab * const slab, void * const ptr); 573 574 extern void 575 slab_free_safe(struct slab * const slab, void * const ptr); 576 577 extern void 578 slab_free_all(struct slab * const slab); 579 580 extern u64 581 slab_get_nalloc(struct slab * const slab); 582 583 extern void 584 slab_destroy(struct slab * const slab); 585 // }}} slab 586 587 // string {{{ 588 // XXX strdec_ and strhex_ functions does not append the trailing '\0' to the output string 589 // size of out should be >= 10 590 extern void 591 strdec_32(void * const out, const u32 v); 592 593 // size of out should be >= 20 594 extern void 595 strdec_64(void * const out, const u64 v); 596 597 // size of out should be >= 8 598 extern void 599 strhex_32(void * const out, const u32 v); 600 601 // size of out should be >= 16 602 extern void 603 strhex_64(void * const out, const u64 v); 604 605 extern u64 606 a2u64(const void * const str); 607 608 extern u32 609 a2u32(const void * const str); 610 611 extern s64 612 a2s64(const void * const str); 613 614 extern s32 615 a2s32(const void * const str); 616 617 extern void 618 str_print_hex(FILE * const out, const void * const data, const u32 len); 619 620 extern void 621 str_print_dec(FILE * const out, const void * const data, const u32 len); 622 623 // user should free returned ptr (and nothing else) after use 624 extern char ** 625 strtoks(const char * const str, const char * const delim); 626 627 extern u32 628 strtoks_count(const char * const * const toks); 629 // }}} string 630 631 // qsbr {{{ 632 // QSBR vs EBR (Quiescent-State vs Epoch Based Reclaimation) 633 // QSBR: readers just use qsbr_update -> qsbr_update -> ... repeatedly 634 // EBR: readers use qsbr_update -> qsbr_park -> qsbr_resume -> qsbr_update -> ... 635 // The advantage of EBR is qsbr_park can happen much earlier than the next qsbr_update 636 // The disadvantage is the extra cost, a pair of park/resume is used in every iteration 637 struct qsbr; 638 struct qsbr_ref { 639 #ifdef QSBR_DEBUG 640 u64 debug[16]; 641 #endif 642 u64 opaque[3]; 643 }; 644 645 extern struct qsbr * 646 qsbr_create(void); 647 648 // every READER accessing the shared data must first register itself with the qsbr 649 extern bool 650 qsbr_register(struct qsbr * const q, struct qsbr_ref * const qref); 651 652 extern void 653 qsbr_unregister(struct qsbr * const q, struct qsbr_ref * const qref); 654 655 // For READER: mark the beginning of critical section; like rcu_read_lock() 656 extern void 657 qsbr_update(struct qsbr_ref * const qref, const u64 v); 658 659 // temporarily stop access the shared data to avoid blocking writers 660 // READER can use qsbr_park (like rcu_read_unlock()) in conjunction with qsbr_update 661 // qsbr_park is roughly equivalent to qsbr_unregister, but faster 662 extern void 663 qsbr_park(struct qsbr_ref * const qref); 664 665 // undo the effect of qsbr_park; must use it between qsbr_park and qsbr_update 666 // qsbr_resume is roughly equivalent to qsbr_register, but faster 667 extern void 668 qsbr_resume(struct qsbr_ref * const qref); 669 670 // WRITER: wait until all the readers have announced v=target with qsbr_update 671 extern void 672 qsbr_wait(struct qsbr * const q, const u64 target); 673 674 extern void 675 qsbr_destroy(struct qsbr * const q); 676 // }}} qsbr 677 678 #ifdef __cplusplus 679 } 680 #endif 681 // vim:fdm=marker