github.com/moontrade/unsafe@v0.9.1/memory/rpmalloc/rpmalloc.c (about) 1 /* rpmalloc.c - Memory allocator - Public Domain - 2016-2020 Mattias Jansson 2 * 3 * This library provides a cross-platform lock free thread caching malloc implementation in C11. 4 * The latest source code is always available at 5 * 6 * https://github.com/mjansson/rpmalloc 7 * 8 * This library is put in the public domain; you can redistribute it and/or modify it without any restrictions. 9 * 10 */ 11 12 #include "rpmalloc.h" 13 14 //////////// 15 /// 16 /// Build time configurable limits 17 /// 18 ////// 19 20 #if defined(__clang__) 21 #pragma clang diagnostic ignored "-Wunused-macros" 22 #pragma clang diagnostic ignored "-Wunused-function" 23 #if __has_warning("-Wreserved-identifier") 24 #pragma clang diagnostic ignored "-Wreserved-identifier" 25 #endif 26 #if __has_warning("-Wstatic-in-inline") 27 #pragma clang diagnostic ignored "-Wstatic-in-inline" 28 #endif 29 #elif defined(__GNUC__) 30 #pragma GCC diagnostic ignored "-Wunused-macros" 31 #pragma GCC diagnostic ignored "-Wunused-function" 32 #endif 33 34 #if !defined(__has_builtin) 35 #define __has_builtin(b) 0 36 #endif 37 38 #if defined(__GNUC__) || defined(__clang__) 39 40 #if __has_builtin(__builtin_memcpy_inline) 41 #define _rpmalloc_memcpy_const(x, y, s) __builtin_memcpy_inline(x, y, s) 42 #else 43 #define _rpmalloc_memcpy_const(x, y, s) \ 44 do { \ 45 _Static_assert(__builtin_choose_expr(__builtin_constant_p(s), 1, 0), "len must be a constant integer"); \ 46 memcpy(x, y, s); \ 47 } while (0) 48 #endif 49 50 #if __has_builtin(__builtin_memset_inline) 51 #define _rpmalloc_memset_const(x, y, s) __builtin_memset_inline(x, y, s) 52 #else 53 #define _rpmalloc_memset_const(x, y, s) \ 54 do { \ 55 _Static_assert(__builtin_choose_expr(__builtin_constant_p(s), 1, 0), "len must be a constant integer"); \ 56 memset(x, y, s); \ 57 } while (0) 58 #endif 59 #else 60 #define _rpmalloc_memcpy_const(x, y, s) memcpy(x, y, s) 61 #define _rpmalloc_memset_const(x, y, s) memset(x, y, s) 62 #endif 63 64 #if __has_builtin(__builtin_assume) 65 #define rpmalloc_assume(cond) __builtin_assume(cond) 66 #elif defined(__GNUC__) 67 #define rpmalloc_assume(cond) \ 68 do { \ 69 if (!__builtin_expect(cond, 0)) \ 70 __builtin_unreachable(); \ 71 } while (0) 72 #elif defined(_MSC_VER) 73 #define rpmalloc_assume(cond) __assume(cond) 74 #else 75 #define rpmalloc_assume(cond) 0 76 #endif 77 78 #ifndef HEAP_ARRAY_SIZE 79 //! Size of heap hashmap 80 #define HEAP_ARRAY_SIZE 47 81 #endif 82 #ifndef ENABLE_THREAD_CACHE 83 //! Enable per-thread cache 84 #define ENABLE_THREAD_CACHE 1 85 #endif 86 #ifndef ENABLE_GLOBAL_CACHE 87 //! Enable global cache shared between all threads, requires thread cache 88 #define ENABLE_GLOBAL_CACHE 1 89 #endif 90 #ifndef ENABLE_VALIDATE_ARGS 91 //! Enable validation of args to public entry points 92 #define ENABLE_VALIDATE_ARGS 0 93 #endif 94 #ifndef ENABLE_STATISTICS 95 //! Enable statistics collection 96 #define ENABLE_STATISTICS 0 97 #endif 98 #ifndef ENABLE_ASSERTS 99 //! Enable asserts 100 #define ENABLE_ASSERTS 0 101 #endif 102 #ifndef ENABLE_OVERRIDE 103 //! Override standard library malloc/free and new/delete entry points 104 #define ENABLE_OVERRIDE 1 105 #endif 106 #ifndef ENABLE_PRELOAD 107 //! Support preloading 108 #define ENABLE_PRELOAD 1 109 #endif 110 #ifndef DISABLE_UNMAP 111 //! Disable unmapping memory pages (also enables unlimited cache) 112 #define DISABLE_UNMAP 0 113 #endif 114 #ifndef ENABLE_UNLIMITED_CACHE 115 //! Enable unlimited global cache (no unmapping until finalization) 116 #define ENABLE_UNLIMITED_CACHE 0 117 #endif 118 #ifndef ENABLE_ADAPTIVE_THREAD_CACHE 119 //! Enable adaptive thread cache size based on use heuristics 120 #define ENABLE_ADAPTIVE_THREAD_CACHE 0 121 #endif 122 #ifndef DEFAULT_SPAN_MAP_COUNT 123 //! Default number of spans to map in call to map more virtual memory (default values yield 4MiB here) 124 #define DEFAULT_SPAN_MAP_COUNT 64 125 #endif 126 #ifndef GLOBAL_CACHE_MULTIPLIER 127 //! Multiplier for global cache 128 #define GLOBAL_CACHE_MULTIPLIER 8 129 #endif 130 131 #if DISABLE_UNMAP && !ENABLE_GLOBAL_CACHE 132 #error Must use global cache if unmap is disabled 133 #endif 134 135 #if DISABLE_UNMAP 136 #undef ENABLE_UNLIMITED_CACHE 137 #define ENABLE_UNLIMITED_CACHE 1 138 #endif 139 140 #if !ENABLE_GLOBAL_CACHE 141 #undef ENABLE_UNLIMITED_CACHE 142 #define ENABLE_UNLIMITED_CACHE 0 143 #endif 144 145 #if !ENABLE_THREAD_CACHE 146 #undef ENABLE_ADAPTIVE_THREAD_CACHE 147 #define ENABLE_ADAPTIVE_THREAD_CACHE 0 148 #endif 149 150 #if defined(_WIN32) || defined(__WIN32__) || defined(_WIN64) 151 # define PLATFORM_WINDOWS 1 152 # define PLATFORM_POSIX 0 153 #else 154 # define PLATFORM_WINDOWS 0 155 # define PLATFORM_POSIX 1 156 #endif 157 158 /// Platform and arch specifics 159 #if defined(_MSC_VER) && !defined(__clang__) 160 # pragma warning (disable: 5105) 161 # ifndef FORCEINLINE 162 # define FORCEINLINE inline __forceinline 163 # endif 164 # define _Static_assert static_assert 165 #else 166 # ifndef FORCEINLINE 167 # define FORCEINLINE inline __attribute__((__always_inline__)) 168 # endif 169 #endif 170 #if PLATFORM_WINDOWS 171 # ifndef WIN32_LEAN_AND_MEAN 172 # define WIN32_LEAN_AND_MEAN 173 # endif 174 # include <windows.h> 175 # if ENABLE_VALIDATE_ARGS 176 # include <intsafe.h> 177 # endif 178 #else 179 # include <unistd.h> 180 # include <stdio.h> 181 # include <stdlib.h> 182 # include <time.h> 183 # if defined(__linux__) || defined(__ANDROID__) 184 # include <sys/prctl.h> 185 # if !defined(PR_SET_VMA) 186 # define PR_SET_VMA 0x53564d41 187 # define PR_SET_VMA_ANON_NAME 0 188 # endif 189 # endif 190 # if defined(__APPLE__) 191 # include <TargetConditionals.h> 192 # if !TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR 193 # include <mach/mach_vm.h> 194 # include <mach/vm_statistics.h> 195 # endif 196 # include <pthread.h> 197 # endif 198 # if defined(__HAIKU__) || defined(__TINYC__) 199 # include <pthread.h> 200 # endif 201 #endif 202 203 #include <stdint.h> 204 #include <string.h> 205 #include <errno.h> 206 207 #if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK) 208 #include <fibersapi.h> 209 static DWORD fls_key; 210 #endif 211 212 #if PLATFORM_POSIX 213 # include <sys/mman.h> 214 # include <sched.h> 215 # ifdef __FreeBSD__ 216 # include <sys/sysctl.h> 217 # define MAP_HUGETLB MAP_ALIGNED_SUPER 218 # ifndef PROT_MAX 219 # define PROT_MAX(f) 0 220 # endif 221 # else 222 # define PROT_MAX(f) 0 223 # endif 224 # ifdef __sun 225 extern int madvise(caddr_t, size_t, int); 226 # endif 227 # ifndef MAP_UNINITIALIZED 228 # define MAP_UNINITIALIZED 0 229 # endif 230 #endif 231 #include <errno.h> 232 233 #if ENABLE_ASSERTS 234 # undef NDEBUG 235 # if defined(_MSC_VER) && !defined(_DEBUG) 236 # define _DEBUG 237 # endif 238 # include <assert.h> 239 #define RPMALLOC_TOSTRING_M(x) #x 240 #define RPMALLOC_TOSTRING(x) RPMALLOC_TOSTRING_M(x) 241 #define rpmalloc_assert(truth, message) \ 242 do { \ 243 if (!(truth)) { \ 244 if (_memory_config.error_callback) { \ 245 _memory_config.error_callback( \ 246 message " (" RPMALLOC_TOSTRING(truth) ") at " __FILE__ ":" RPMALLOC_TOSTRING(__LINE__)); \ 247 } else { \ 248 assert((truth) && message); \ 249 } \ 250 } \ 251 } while (0) 252 #else 253 # define rpmalloc_assert(truth, message) do {} while(0) 254 #endif 255 #if ENABLE_STATISTICS 256 # include <stdio.h> 257 #endif 258 259 ////// 260 /// 261 /// Atomic access abstraction (since MSVC does not do C11 yet) 262 /// 263 ////// 264 265 #if defined(_MSC_VER) && !defined(__clang__) 266 267 typedef volatile long atomic32_t; 268 typedef volatile long long atomic64_t; 269 typedef volatile void* atomicptr_t; 270 271 static FORCEINLINE int32_t atomic_load32(atomic32_t* src) { return *src; } 272 static FORCEINLINE void atomic_store32(atomic32_t* dst, int32_t val) { *dst = val; } 273 static FORCEINLINE int32_t atomic_incr32(atomic32_t* val) { return (int32_t)InterlockedIncrement(val); } 274 static FORCEINLINE int32_t atomic_decr32(atomic32_t* val) { return (int32_t)InterlockedDecrement(val); } 275 static FORCEINLINE int32_t atomic_add32(atomic32_t* val, int32_t add) { return (int32_t)InterlockedExchangeAdd(val, add) + add; } 276 static FORCEINLINE int atomic_cas32_acquire(atomic32_t* dst, int32_t val, int32_t ref) { return (InterlockedCompareExchange(dst, val, ref) == ref) ? 1 : 0; } 277 static FORCEINLINE void atomic_store32_release(atomic32_t* dst, int32_t val) { *dst = val; } 278 static FORCEINLINE int64_t atomic_load64(atomic64_t* src) { return *src; } 279 static FORCEINLINE int64_t atomic_add64(atomic64_t* val, int64_t add) { return (int64_t)InterlockedExchangeAdd64(val, add) + add; } 280 static FORCEINLINE void* atomic_load_ptr(atomicptr_t* src) { return (void*)*src; } 281 static FORCEINLINE void atomic_store_ptr(atomicptr_t* dst, void* val) { *dst = val; } 282 static FORCEINLINE void atomic_store_ptr_release(atomicptr_t* dst, void* val) { *dst = val; } 283 static FORCEINLINE void* atomic_exchange_ptr_acquire(atomicptr_t* dst, void* val) { return (void*)InterlockedExchangePointer((void* volatile*)dst, val); } 284 static FORCEINLINE int atomic_cas_ptr(atomicptr_t* dst, void* val, void* ref) { return (InterlockedCompareExchangePointer((void* volatile*)dst, val, ref) == ref) ? 1 : 0; } 285 286 #define EXPECTED(x) (x) 287 #define UNEXPECTED(x) (x) 288 289 #else 290 291 #include <stdatomic.h> 292 293 typedef volatile _Atomic(int32_t) atomic32_t; 294 typedef volatile _Atomic(int64_t) atomic64_t; 295 typedef volatile _Atomic(void*) atomicptr_t; 296 297 static FORCEINLINE int32_t atomic_load32(atomic32_t* src) { return atomic_load_explicit(src, memory_order_relaxed); } 298 static FORCEINLINE void atomic_store32(atomic32_t* dst, int32_t val) { atomic_store_explicit(dst, val, memory_order_relaxed); } 299 static FORCEINLINE int32_t atomic_incr32(atomic32_t* val) { return atomic_fetch_add_explicit(val, 1, memory_order_relaxed) + 1; } 300 static FORCEINLINE int32_t atomic_decr32(atomic32_t* val) { return atomic_fetch_add_explicit(val, -1, memory_order_relaxed) - 1; } 301 static FORCEINLINE int32_t atomic_add32(atomic32_t* val, int32_t add) { return atomic_fetch_add_explicit(val, add, memory_order_relaxed) + add; } 302 static FORCEINLINE int atomic_cas32_acquire(atomic32_t* dst, int32_t val, int32_t ref) { return atomic_compare_exchange_weak_explicit(dst, &ref, val, memory_order_acquire, memory_order_relaxed); } 303 static FORCEINLINE void atomic_store32_release(atomic32_t* dst, int32_t val) { atomic_store_explicit(dst, val, memory_order_release); } 304 static FORCEINLINE int64_t atomic_load64(atomic64_t* val) { return atomic_load_explicit(val, memory_order_relaxed); } 305 static FORCEINLINE int64_t atomic_add64(atomic64_t* val, int64_t add) { return atomic_fetch_add_explicit(val, add, memory_order_relaxed) + add; } 306 static FORCEINLINE void* atomic_load_ptr(atomicptr_t* src) { return atomic_load_explicit(src, memory_order_relaxed); } 307 static FORCEINLINE void atomic_store_ptr(atomicptr_t* dst, void* val) { atomic_store_explicit(dst, val, memory_order_relaxed); } 308 static FORCEINLINE void atomic_store_ptr_release(atomicptr_t* dst, void* val) { atomic_store_explicit(dst, val, memory_order_release); } 309 static FORCEINLINE void* atomic_exchange_ptr_acquire(atomicptr_t* dst, void* val) { return atomic_exchange_explicit(dst, val, memory_order_acquire); } 310 static FORCEINLINE int atomic_cas_ptr(atomicptr_t* dst, void* val, void* ref) { return atomic_compare_exchange_weak_explicit(dst, &ref, val, memory_order_relaxed, memory_order_relaxed); } 311 312 #define EXPECTED(x) __builtin_expect((x), 1) 313 #define UNEXPECTED(x) __builtin_expect((x), 0) 314 315 #endif 316 317 //////////// 318 /// 319 /// Statistics related functions (evaluate to nothing when statistics not enabled) 320 /// 321 ////// 322 323 #if ENABLE_STATISTICS 324 # define _rpmalloc_stat_inc(counter) atomic_incr32(counter) 325 # define _rpmalloc_stat_dec(counter) atomic_decr32(counter) 326 # define _rpmalloc_stat_add(counter, value) atomic_add32(counter, (int32_t)(value)) 327 # define _rpmalloc_stat_add64(counter, value) atomic_add64(counter, (int64_t)(value)) 328 # define _rpmalloc_stat_add_peak(counter, value, peak) do { int32_t _cur_count = atomic_add32(counter, (int32_t)(value)); if (_cur_count > (peak)) peak = _cur_count; } while (0) 329 # define _rpmalloc_stat_sub(counter, value) atomic_add32(counter, -(int32_t)(value)) 330 # define _rpmalloc_stat_inc_alloc(heap, class_idx) do { \ 331 int32_t alloc_current = atomic_incr32(&heap->size_class_use[class_idx].alloc_current); \ 332 if (alloc_current > heap->size_class_use[class_idx].alloc_peak) \ 333 heap->size_class_use[class_idx].alloc_peak = alloc_current; \ 334 atomic_incr32(&heap->size_class_use[class_idx].alloc_total); \ 335 } while(0) 336 # define _rpmalloc_stat_inc_free(heap, class_idx) do { \ 337 atomic_decr32(&heap->size_class_use[class_idx].alloc_current); \ 338 atomic_incr32(&heap->size_class_use[class_idx].free_total); \ 339 } while(0) 340 #else 341 # define _rpmalloc_stat_inc(counter) do {} while(0) 342 # define _rpmalloc_stat_dec(counter) do {} while(0) 343 # define _rpmalloc_stat_add(counter, value) do {} while(0) 344 # define _rpmalloc_stat_add64(counter, value) do {} while(0) 345 # define _rpmalloc_stat_add_peak(counter, value, peak) do {} while (0) 346 # define _rpmalloc_stat_sub(counter, value) do {} while(0) 347 # define _rpmalloc_stat_inc_alloc(heap, class_idx) do {} while(0) 348 # define _rpmalloc_stat_inc_free(heap, class_idx) do {} while(0) 349 #endif 350 351 352 /// 353 /// Preconfigured limits and sizes 354 /// 355 356 //! Granularity of a small allocation block (must be power of two) 357 #define SMALL_GRANULARITY 16 358 //! Small granularity shift count 359 #define SMALL_GRANULARITY_SHIFT 4 360 //! Number of small block size classes 361 #define SMALL_CLASS_COUNT 65 362 //! Maximum size of a small block 363 #define SMALL_SIZE_LIMIT (SMALL_GRANULARITY * (SMALL_CLASS_COUNT - 1)) 364 //! Granularity of a medium allocation block 365 #define MEDIUM_GRANULARITY 512 366 //! Medium granularity shift count 367 #define MEDIUM_GRANULARITY_SHIFT 9 368 //! Number of medium block size classes 369 #define MEDIUM_CLASS_COUNT 61 370 //! Total number of small + medium size classes 371 #define SIZE_CLASS_COUNT (SMALL_CLASS_COUNT + MEDIUM_CLASS_COUNT) 372 //! Number of large block size classes 373 #define LARGE_CLASS_COUNT 63 374 //! Maximum size of a medium block 375 #define MEDIUM_SIZE_LIMIT (SMALL_SIZE_LIMIT + (MEDIUM_GRANULARITY * MEDIUM_CLASS_COUNT)) 376 //! Maximum size of a large block 377 #define LARGE_SIZE_LIMIT ((LARGE_CLASS_COUNT * _memory_span_size) - SPAN_HEADER_SIZE) 378 //! Size of a span header (must be a multiple of SMALL_GRANULARITY and a power of two) 379 #define SPAN_HEADER_SIZE 128 380 //! Number of spans in thread cache 381 #define MAX_THREAD_SPAN_CACHE 400 382 //! Number of spans to transfer between thread and global cache 383 #define THREAD_SPAN_CACHE_TRANSFER 64 384 //! Number of spans in thread cache for large spans (must be greater than LARGE_CLASS_COUNT / 2) 385 #define MAX_THREAD_SPAN_LARGE_CACHE 100 386 //! Number of spans to transfer between thread and global cache for large spans 387 #define THREAD_SPAN_LARGE_CACHE_TRANSFER 6 388 389 _Static_assert((SMALL_GRANULARITY & (SMALL_GRANULARITY - 1)) == 0, "Small granularity must be power of two"); 390 _Static_assert((SPAN_HEADER_SIZE & (SPAN_HEADER_SIZE - 1)) == 0, "Span header size must be power of two"); 391 392 #if ENABLE_VALIDATE_ARGS 393 //! Maximum allocation size to avoid integer overflow 394 #undef MAX_ALLOC_SIZE 395 #define MAX_ALLOC_SIZE (((size_t)-1) - _memory_span_size) 396 #endif 397 398 #define pointer_offset(ptr, ofs) (void*)((char*)(ptr) + (ptrdiff_t)(ofs)) 399 #define pointer_diff(first, second) (ptrdiff_t)((const char*)(first) - (const char*)(second)) 400 401 #define INVALID_POINTER ((void*)((uintptr_t)-1)) 402 403 #define SIZE_CLASS_LARGE SIZE_CLASS_COUNT 404 #define SIZE_CLASS_HUGE ((uint32_t)-1) 405 406 //////////// 407 /// 408 /// Data types 409 /// 410 ////// 411 412 //! A memory heap, per thread 413 typedef struct heap_t heap_t; 414 //! Span of memory pages 415 typedef struct span_t span_t; 416 //! Span list 417 typedef struct span_list_t span_list_t; 418 //! Span active data 419 typedef struct span_active_t span_active_t; 420 //! Size class definition 421 typedef struct size_class_t size_class_t; 422 //! Global cache 423 typedef struct global_cache_t global_cache_t; 424 425 //! Flag indicating span is the first (master) span of a split superspan 426 #define SPAN_FLAG_MASTER 1U 427 //! Flag indicating span is a secondary (sub) span of a split superspan 428 #define SPAN_FLAG_SUBSPAN 2U 429 //! Flag indicating span has blocks with increased alignment 430 #define SPAN_FLAG_ALIGNED_BLOCKS 4U 431 //! Flag indicating an unmapped master span 432 #define SPAN_FLAG_UNMAPPED_MASTER 8U 433 434 #if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS 435 struct span_use_t { 436 //! Current number of spans used (actually used, not in cache) 437 atomic32_t current; 438 //! High water mark of spans used 439 atomic32_t high; 440 #if ENABLE_STATISTICS 441 //! Number of spans in deferred list 442 atomic32_t spans_deferred; 443 //! Number of spans transitioned to global cache 444 atomic32_t spans_to_global; 445 //! Number of spans transitioned from global cache 446 atomic32_t spans_from_global; 447 //! Number of spans transitioned to thread cache 448 atomic32_t spans_to_cache; 449 //! Number of spans transitioned from thread cache 450 atomic32_t spans_from_cache; 451 //! Number of spans transitioned to reserved state 452 atomic32_t spans_to_reserved; 453 //! Number of spans transitioned from reserved state 454 atomic32_t spans_from_reserved; 455 //! Number of raw memory map calls 456 atomic32_t spans_map_calls; 457 #endif 458 }; 459 typedef struct span_use_t span_use_t; 460 #endif 461 462 #if ENABLE_STATISTICS 463 struct size_class_use_t { 464 //! Current number of allocations 465 atomic32_t alloc_current; 466 //! Peak number of allocations 467 int32_t alloc_peak; 468 //! Total number of allocations 469 atomic32_t alloc_total; 470 //! Total number of frees 471 atomic32_t free_total; 472 //! Number of spans in use 473 atomic32_t spans_current; 474 //! Number of spans transitioned to cache 475 int32_t spans_peak; 476 //! Number of spans transitioned to cache 477 atomic32_t spans_to_cache; 478 //! Number of spans transitioned from cache 479 atomic32_t spans_from_cache; 480 //! Number of spans transitioned from reserved state 481 atomic32_t spans_from_reserved; 482 //! Number of spans mapped 483 atomic32_t spans_map_calls; 484 int32_t unused; 485 }; 486 typedef struct size_class_use_t size_class_use_t; 487 #endif 488 489 // A span can either represent a single span of memory pages with size declared by span_map_count configuration variable, 490 // or a set of spans in a continuous region, a super span. Any reference to the term "span" usually refers to both a single 491 // span or a super span. A super span can further be divided into multiple spans (or this, super spans), where the first 492 // (super)span is the master and subsequent (super)spans are subspans. The master span keeps track of how many subspans 493 // that are still alive and mapped in virtual memory, and once all subspans and master have been unmapped the entire 494 // superspan region is released and unmapped (on Windows for example, the entire superspan range has to be released 495 // in the same call to release the virtual memory range, but individual subranges can be decommitted individually 496 // to reduce physical memory use). 497 struct span_t { 498 //! Free list 499 void* free_list; 500 //! Total block count of size class 501 uint32_t block_count; 502 //! Size class 503 uint32_t size_class; 504 //! Index of last block initialized in free list 505 uint32_t free_list_limit; 506 //! Number of used blocks remaining when in partial state 507 uint32_t used_count; 508 //! Deferred free list 509 atomicptr_t free_list_deferred; 510 //! Size of deferred free list, or list of spans when part of a cache list 511 uint32_t list_size; 512 //! Size of a block 513 uint32_t block_size; 514 //! Flags and counters 515 uint32_t flags; 516 //! Number of spans 517 uint32_t span_count; 518 //! Total span counter for master spans 519 uint32_t total_spans; 520 //! Offset from master span for subspans 521 uint32_t offset_from_master; 522 //! Remaining span counter, for master spans 523 atomic32_t remaining_spans; 524 //! Alignment offset 525 uint32_t align_offset; 526 //! Owning heap 527 heap_t* heap; 528 //! Next span 529 span_t* next; 530 //! Previous span 531 span_t* prev; 532 }; 533 _Static_assert(sizeof(span_t) <= SPAN_HEADER_SIZE, "span size mismatch"); 534 535 struct span_cache_t { 536 size_t count; 537 span_t* span[MAX_THREAD_SPAN_CACHE]; 538 }; 539 typedef struct span_cache_t span_cache_t; 540 541 struct span_large_cache_t { 542 size_t count; 543 span_t* span[MAX_THREAD_SPAN_LARGE_CACHE]; 544 }; 545 typedef struct span_large_cache_t span_large_cache_t; 546 547 struct heap_size_class_t { 548 //! Free list of active span 549 void* free_list; 550 //! Double linked list of partially used spans with free blocks. 551 // Previous span pointer in head points to tail span of list. 552 span_t* partial_span; 553 //! Early level cache of fully free spans 554 span_t* cache; 555 }; 556 typedef struct heap_size_class_t heap_size_class_t; 557 558 // Control structure for a heap, either a thread heap or a first class heap if enabled 559 struct heap_t { 560 //! Owning thread ID 561 uintptr_t owner_thread; 562 //! Free lists for each size class 563 heap_size_class_t size_class[SIZE_CLASS_COUNT]; 564 #if ENABLE_THREAD_CACHE 565 //! Arrays of fully freed spans, single span 566 span_cache_t span_cache; 567 #endif 568 //! List of deferred free spans (single linked list) 569 atomicptr_t span_free_deferred; 570 //! Number of full spans 571 size_t full_span_count; 572 //! Mapped but unused spans 573 span_t* span_reserve; 574 //! Master span for mapped but unused spans 575 span_t* span_reserve_master; 576 //! Number of mapped but unused spans 577 uint32_t spans_reserved; 578 //! Child count 579 atomic32_t child_count; 580 //! Next heap in id list 581 heap_t* next_heap; 582 //! Next heap in orphan list 583 heap_t* next_orphan; 584 //! Heap ID 585 int32_t id; 586 //! Finalization state flag 587 int finalize; 588 //! Master heap owning the memory pages 589 heap_t* master_heap; 590 #if ENABLE_THREAD_CACHE 591 //! Arrays of fully freed spans, large spans with > 1 span count 592 span_large_cache_t span_large_cache[LARGE_CLASS_COUNT - 1]; 593 #endif 594 #if RPMALLOC_FIRST_CLASS_HEAPS 595 //! Double linked list of fully utilized spans with free blocks for each size class. 596 // Previous span pointer in head points to tail span of list. 597 span_t* full_span[SIZE_CLASS_COUNT]; 598 //! Double linked list of large and huge spans allocated by this heap 599 span_t* large_huge_span; 600 #endif 601 #if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS 602 //! Current and high water mark of spans used per span count 603 span_use_t span_use[LARGE_CLASS_COUNT]; 604 #endif 605 #if ENABLE_STATISTICS 606 //! Allocation stats per size class 607 size_class_use_t size_class_use[SIZE_CLASS_COUNT + 1]; 608 //! Number of bytes transitioned thread -> global 609 atomic64_t thread_to_global; 610 //! Number of bytes transitioned global -> thread 611 atomic64_t global_to_thread; 612 #endif 613 }; 614 615 // Size class for defining a block size bucket 616 struct size_class_t { 617 //! Size of blocks in this class 618 uint32_t block_size; 619 //! Number of blocks in each chunk 620 uint16_t block_count; 621 //! Class index this class is merged with 622 uint16_t class_idx; 623 }; 624 _Static_assert(sizeof(size_class_t) == 8, "Size class size mismatch"); 625 626 struct global_cache_t { 627 //! Cache lock 628 atomic32_t lock; 629 //! Cache count 630 uint32_t count; 631 #if ENABLE_STATISTICS 632 //! Insert count 633 size_t insert_count; 634 //! Extract count 635 size_t extract_count; 636 #endif 637 //! Cached spans 638 span_t* span[GLOBAL_CACHE_MULTIPLIER * MAX_THREAD_SPAN_CACHE]; 639 //! Unlimited cache overflow 640 span_t* overflow; 641 }; 642 643 //////////// 644 /// 645 /// Global data 646 /// 647 ////// 648 649 //! Default span size (64KiB) 650 #define _memory_default_span_size (64 * 1024) 651 #define _memory_default_span_size_shift 16 652 #define _memory_default_span_mask (~((uintptr_t)(_memory_span_size - 1))) 653 654 //! Initialized flag 655 static int _rpmalloc_initialized; 656 //! Main thread ID 657 static uintptr_t _rpmalloc_main_thread_id; 658 //! Configuration 659 static rpmalloc_config_t _memory_config; 660 //! Memory page size 661 static size_t _memory_page_size; 662 //! Shift to divide by page size 663 static size_t _memory_page_size_shift; 664 //! Granularity at which memory pages are mapped by OS 665 static size_t _memory_map_granularity; 666 #if RPMALLOC_CONFIGURABLE 667 //! Size of a span of memory pages 668 static size_t _memory_span_size; 669 //! Shift to divide by span size 670 static size_t _memory_span_size_shift; 671 //! Mask to get to start of a memory span 672 static uintptr_t _memory_span_mask; 673 #else 674 //! Hardwired span size 675 #define _memory_span_size _memory_default_span_size 676 #define _memory_span_size_shift _memory_default_span_size_shift 677 #define _memory_span_mask _memory_default_span_mask 678 #endif 679 //! Number of spans to map in each map call 680 static size_t _memory_span_map_count; 681 //! Number of spans to keep reserved in each heap 682 static size_t _memory_heap_reserve_count; 683 //! Global size classes 684 static size_class_t _memory_size_class[SIZE_CLASS_COUNT]; 685 //! Run-time size limit of medium blocks 686 static size_t _memory_medium_size_limit; 687 //! Heap ID counter 688 static atomic32_t _memory_heap_id; 689 //! Huge page support 690 static int _memory_huge_pages; 691 #if ENABLE_GLOBAL_CACHE 692 //! Global span cache 693 static global_cache_t _memory_span_cache[LARGE_CLASS_COUNT]; 694 #endif 695 //! Global reserved spans 696 static span_t* _memory_global_reserve; 697 //! Global reserved count 698 static size_t _memory_global_reserve_count; 699 //! Global reserved master 700 static span_t* _memory_global_reserve_master; 701 //! All heaps 702 static heap_t* _memory_heaps[HEAP_ARRAY_SIZE]; 703 //! Used to restrict access to mapping memory for huge pages 704 static atomic32_t _memory_global_lock; 705 //! Orphaned heaps 706 static heap_t* _memory_orphan_heaps; 707 #if RPMALLOC_FIRST_CLASS_HEAPS 708 //! Orphaned heaps (first class heaps) 709 static heap_t* _memory_first_class_orphan_heaps; 710 #endif 711 #if ENABLE_STATISTICS 712 //! Allocations counter 713 static atomic64_t _allocation_counter; 714 //! Deallocations counter 715 static atomic64_t _deallocation_counter; 716 //! Active heap count 717 static atomic32_t _memory_active_heaps; 718 //! Number of currently mapped memory pages 719 static atomic32_t _mapped_pages; 720 //! Peak number of concurrently mapped memory pages 721 static int32_t _mapped_pages_peak; 722 //! Number of mapped master spans 723 static atomic32_t _master_spans; 724 //! Number of unmapped dangling master spans 725 static atomic32_t _unmapped_master_spans; 726 //! Running counter of total number of mapped memory pages since start 727 static atomic32_t _mapped_total; 728 //! Running counter of total number of unmapped memory pages since start 729 static atomic32_t _unmapped_total; 730 //! Number of currently mapped memory pages in OS calls 731 static atomic32_t _mapped_pages_os; 732 //! Number of currently allocated pages in huge allocations 733 static atomic32_t _huge_pages_current; 734 //! Peak number of currently allocated pages in huge allocations 735 static int32_t _huge_pages_peak; 736 #endif 737 738 //////////// 739 /// 740 /// Thread local heap and ID 741 /// 742 ////// 743 744 //! Current thread heap 745 #if ((defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD) || defined(__TINYC__) 746 static pthread_key_t _memory_thread_heap; 747 #else 748 # ifdef _MSC_VER 749 # define _Thread_local __declspec(thread) 750 # define TLS_MODEL 751 # else 752 # ifndef __HAIKU__ 753 # define TLS_MODEL __attribute__((tls_model("initial-exec"))) 754 # else 755 # define TLS_MODEL 756 # endif 757 # if !defined(__clang__) && defined(__GNUC__) 758 # define _Thread_local __thread 759 # endif 760 # endif 761 static _Thread_local heap_t* _memory_thread_heap TLS_MODEL; 762 #endif 763 764 static inline heap_t* 765 get_thread_heap_raw(void) { 766 #if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD 767 return pthread_getspecific(_memory_thread_heap); 768 #else 769 return _memory_thread_heap; 770 #endif 771 } 772 773 //! Get the current thread heap 774 static inline heap_t* 775 get_thread_heap(void) { 776 heap_t* heap = get_thread_heap_raw(); 777 #if ENABLE_PRELOAD 778 if (EXPECTED(heap != 0)) 779 return heap; 780 rpmalloc_initialize(); 781 return get_thread_heap_raw(); 782 #else 783 return heap; 784 #endif 785 } 786 787 //! Fast thread ID 788 static inline uintptr_t 789 get_thread_id(void) { 790 #if defined(_WIN32) 791 return (uintptr_t)((void*)NtCurrentTeb()); 792 #elif (defined(__GNUC__) || defined(__clang__)) && !defined(__CYGWIN__) 793 uintptr_t tid; 794 # if defined(__i386__) 795 __asm__("movl %%gs:0, %0" : "=r" (tid) : : ); 796 # elif defined(__x86_64__) 797 # if defined(__MACH__) 798 __asm__("movq %%gs:0, %0" : "=r" (tid) : : ); 799 # else 800 __asm__("movq %%fs:0, %0" : "=r" (tid) : : ); 801 # endif 802 # elif defined(__arm__) 803 __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3" : "=r" (tid)); 804 # elif defined(__aarch64__) 805 # if defined(__MACH__) 806 // tpidr_el0 likely unused, always return 0 on iOS 807 __asm__ volatile ("mrs %0, tpidrro_el0" : "=r" (tid)); 808 # else 809 __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tid)); 810 # endif 811 # else 812 # error This platform needs implementation of get_thread_id() 813 # endif 814 return tid; 815 #else 816 # error This platform needs implementation of get_thread_id() 817 #endif 818 } 819 820 //! Set the current thread heap 821 static void 822 set_thread_heap(heap_t* heap) { 823 #if ((defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD) || defined(__TINYC__) 824 pthread_setspecific(_memory_thread_heap, heap); 825 #else 826 _memory_thread_heap = heap; 827 #endif 828 if (heap) 829 heap->owner_thread = get_thread_id(); 830 } 831 832 //! Set main thread ID 833 extern void 834 rpmalloc_set_main_thread(void); 835 836 void 837 rpmalloc_set_main_thread(void) { 838 _rpmalloc_main_thread_id = get_thread_id(); 839 } 840 841 static void 842 _rpmalloc_spin(void) { 843 #if defined(_MSC_VER) 844 _mm_pause(); 845 #elif defined(__x86_64__) || defined(__i386__) 846 __asm__ volatile("pause" ::: "memory"); 847 #elif defined(__aarch64__) || (defined(__arm__) && __ARM_ARCH >= 7) 848 __asm__ volatile("yield" ::: "memory"); 849 #elif defined(__powerpc__) || defined(__powerpc64__) 850 // No idea if ever been compiled in such archs but ... as precaution 851 __asm__ volatile("or 27,27,27"); 852 #elif defined(__sparc__) 853 __asm__ volatile("rd %ccr, %g0 \n\trd %ccr, %g0 \n\trd %ccr, %g0"); 854 #else 855 struct timespec ts = {0}; 856 nanosleep(&ts, 0); 857 #endif 858 } 859 860 #if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK) 861 static void NTAPI 862 _rpmalloc_thread_destructor(void* value) { 863 #if ENABLE_OVERRIDE 864 // If this is called on main thread it means rpmalloc_finalize 865 // has not been called and shutdown is forced (through _exit) or unclean 866 if (get_thread_id() == _rpmalloc_main_thread_id) 867 return; 868 #endif 869 if (value) 870 rpmalloc_thread_finalize(1); 871 } 872 #endif 873 874 875 //////////// 876 /// 877 /// Low level memory map/unmap 878 /// 879 ////// 880 881 static void 882 _rpmalloc_set_name(void* address, size_t size) { 883 #if defined(__linux__) || defined(__ANDROID__) 884 const char *name = _memory_huge_pages ? _memory_config.huge_page_name : _memory_config.page_name; 885 if (address == MAP_FAILED || !name) 886 return; 887 // If the kernel does not support CONFIG_ANON_VMA_NAME or if the call fails 888 // (e.g. invalid name) it is a no-op basically. 889 (void)prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, (uintptr_t)address, size, (uintptr_t)name); 890 #else 891 (void)sizeof(size); 892 (void)sizeof(address); 893 #endif 894 } 895 896 897 //! Map more virtual memory 898 // size is number of bytes to map 899 // offset receives the offset in bytes from start of mapped region 900 // returns address to start of mapped region to use 901 static void* 902 _rpmalloc_mmap(size_t size, size_t* offset) { 903 rpmalloc_assert(!(size % _memory_page_size), "Invalid mmap size"); 904 rpmalloc_assert(size >= _memory_page_size, "Invalid mmap size"); 905 void* address = _memory_config.memory_map(size, offset); 906 if (EXPECTED(address != 0)) { 907 _rpmalloc_stat_add_peak(&_mapped_pages, (size >> _memory_page_size_shift), _mapped_pages_peak); 908 _rpmalloc_stat_add(&_mapped_total, (size >> _memory_page_size_shift)); 909 } 910 return address; 911 } 912 913 //! Unmap virtual memory 914 // address is the memory address to unmap, as returned from _memory_map 915 // size is the number of bytes to unmap, which might be less than full region for a partial unmap 916 // offset is the offset in bytes to the actual mapped region, as set by _memory_map 917 // release is set to 0 for partial unmap, or size of entire range for a full unmap 918 static void 919 _rpmalloc_unmap(void* address, size_t size, size_t offset, size_t release) { 920 rpmalloc_assert(!release || (release >= size), "Invalid unmap size"); 921 rpmalloc_assert(!release || (release >= _memory_page_size), "Invalid unmap size"); 922 if (release) { 923 rpmalloc_assert(!(release % _memory_page_size), "Invalid unmap size"); 924 _rpmalloc_stat_sub(&_mapped_pages, (release >> _memory_page_size_shift)); 925 _rpmalloc_stat_add(&_unmapped_total, (release >> _memory_page_size_shift)); 926 } 927 _memory_config.memory_unmap(address, size, offset, release); 928 } 929 930 //! Default implementation to map new pages to virtual memory 931 static void* 932 _rpmalloc_mmap_os(size_t size, size_t* offset) { 933 //Either size is a heap (a single page) or a (multiple) span - we only need to align spans, and only if larger than map granularity 934 size_t padding = ((size >= _memory_span_size) && (_memory_span_size > _memory_map_granularity)) ? _memory_span_size : 0; 935 rpmalloc_assert(size >= _memory_page_size, "Invalid mmap size"); 936 #if PLATFORM_WINDOWS 937 //Ok to MEM_COMMIT - according to MSDN, "actual physical pages are not allocated unless/until the virtual addresses are actually accessed" 938 void* ptr = VirtualAlloc(0, size + padding, (_memory_huge_pages ? MEM_LARGE_PAGES : 0) | MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); 939 if (!ptr) { 940 if (_memory_config.map_fail_callback) { 941 if (_memory_config.map_fail_callback(size + padding)) 942 return _rpmalloc_mmap_os(size, offset); 943 } else { 944 rpmalloc_assert(ptr, "Failed to map virtual memory block"); 945 } 946 return 0; 947 } 948 #else 949 int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_UNINITIALIZED; 950 # if defined(__APPLE__) && !TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR 951 int fd = (int)VM_MAKE_TAG(240U); 952 if (_memory_huge_pages) 953 fd |= VM_FLAGS_SUPERPAGE_SIZE_2MB; 954 void* ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, flags, fd, 0); 955 # elif defined(MAP_HUGETLB) 956 void* ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE | PROT_MAX(PROT_READ | PROT_WRITE), (_memory_huge_pages ? MAP_HUGETLB : 0) | flags, -1, 0); 957 # if defined(MADV_HUGEPAGE) 958 // In some configurations, huge pages allocations might fail thus 959 // we fallback to normal allocations and promote the region as transparent huge page 960 if ((ptr == MAP_FAILED || !ptr) && _memory_huge_pages) { 961 ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, flags, -1, 0); 962 if (ptr && ptr != MAP_FAILED) { 963 int prm = madvise(ptr, size + padding, MADV_HUGEPAGE); 964 (void)prm; 965 rpmalloc_assert((prm == 0), "Failed to promote the page to THP"); 966 } 967 } 968 # endif 969 _rpmalloc_set_name(ptr, size + padding); 970 # elif defined(MAP_ALIGNED) 971 const size_t align = (sizeof(size_t) * 8) - (size_t)(__builtin_clzl(size - 1)); 972 void* ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, (_memory_huge_pages ? MAP_ALIGNED(align) : 0) | flags, -1, 0); 973 # elif defined(MAP_ALIGN) 974 caddr_t base = (_memory_huge_pages ? (caddr_t)(4 << 20) : 0); 975 void* ptr = mmap(base, size + padding, PROT_READ | PROT_WRITE, (_memory_huge_pages ? MAP_ALIGN : 0) | flags, -1, 0); 976 # else 977 void* ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, flags, -1, 0); 978 # endif 979 if ((ptr == MAP_FAILED) || !ptr) { 980 if (_memory_config.map_fail_callback) { 981 if (_memory_config.map_fail_callback(size + padding)) 982 return _rpmalloc_mmap_os(size, offset); 983 } else if (errno != ENOMEM) { 984 rpmalloc_assert((ptr != MAP_FAILED) && ptr, "Failed to map virtual memory block"); 985 } 986 return 0; 987 } 988 #endif 989 _rpmalloc_stat_add(&_mapped_pages_os, (int32_t)((size + padding) >> _memory_page_size_shift)); 990 if (padding) { 991 size_t final_padding = padding - ((uintptr_t)ptr & ~_memory_span_mask); 992 rpmalloc_assert(final_padding <= _memory_span_size, "Internal failure in padding"); 993 rpmalloc_assert(final_padding <= padding, "Internal failure in padding"); 994 rpmalloc_assert(!(final_padding % 8), "Internal failure in padding"); 995 ptr = pointer_offset(ptr, final_padding); 996 *offset = final_padding >> 3; 997 } 998 rpmalloc_assert((size < _memory_span_size) || !((uintptr_t)ptr & ~_memory_span_mask), "Internal failure in padding"); 999 return ptr; 1000 } 1001 1002 //! Default implementation to unmap pages from virtual memory 1003 static void 1004 _rpmalloc_unmap_os(void* address, size_t size, size_t offset, size_t release) { 1005 rpmalloc_assert(release || (offset == 0), "Invalid unmap size"); 1006 rpmalloc_assert(!release || (release >= _memory_page_size), "Invalid unmap size"); 1007 rpmalloc_assert(size >= _memory_page_size, "Invalid unmap size"); 1008 if (release && offset) { 1009 offset <<= 3; 1010 address = pointer_offset(address, -(int32_t)offset); 1011 if ((release >= _memory_span_size) && (_memory_span_size > _memory_map_granularity)) { 1012 //Padding is always one span size 1013 release += _memory_span_size; 1014 } 1015 } 1016 #if !DISABLE_UNMAP 1017 #if PLATFORM_WINDOWS 1018 if (!VirtualFree(address, release ? 0 : size, release ? MEM_RELEASE : MEM_DECOMMIT)) { 1019 rpmalloc_assert(0, "Failed to unmap virtual memory block"); 1020 } 1021 #else 1022 if (release) { 1023 if (munmap(address, release)) { 1024 rpmalloc_assert(0, "Failed to unmap virtual memory block"); 1025 } 1026 } else { 1027 #if defined(MADV_FREE_REUSABLE) 1028 int ret; 1029 while ((ret = madvise(address, size, MADV_FREE_REUSABLE)) == -1 && (errno == EAGAIN)) 1030 errno = 0; 1031 if ((ret == -1) && (errno != 0)) { 1032 #elif defined(MADV_DONTNEED) 1033 if (madvise(address, size, MADV_DONTNEED)) { 1034 #elif defined(MADV_PAGEOUT) 1035 if (madvise(address, size, MADV_PAGEOUT)) { 1036 #elif defined(MADV_FREE) 1037 if (madvise(address, size, MADV_FREE)) { 1038 #else 1039 if (posix_madvise(address, size, POSIX_MADV_DONTNEED)) { 1040 #endif 1041 rpmalloc_assert(0, "Failed to madvise virtual memory block as free"); 1042 } 1043 } 1044 #endif 1045 #endif 1046 if (release) 1047 _rpmalloc_stat_sub(&_mapped_pages_os, release >> _memory_page_size_shift); 1048 } 1049 1050 static void 1051 _rpmalloc_span_mark_as_subspan_unless_master(span_t* master, span_t* subspan, size_t span_count); 1052 1053 //! Use global reserved spans to fulfill a memory map request (reserve size must be checked by caller) 1054 static span_t* 1055 _rpmalloc_global_get_reserved_spans(size_t span_count) { 1056 span_t* span = _memory_global_reserve; 1057 _rpmalloc_span_mark_as_subspan_unless_master(_memory_global_reserve_master, span, span_count); 1058 _memory_global_reserve_count -= span_count; 1059 if (_memory_global_reserve_count) 1060 _memory_global_reserve = (span_t*)pointer_offset(span, span_count << _memory_span_size_shift); 1061 else 1062 _memory_global_reserve = 0; 1063 return span; 1064 } 1065 1066 //! Store the given spans as global reserve (must only be called from within new heap allocation, not thread safe) 1067 static void 1068 _rpmalloc_global_set_reserved_spans(span_t* master, span_t* reserve, size_t reserve_span_count) { 1069 _memory_global_reserve_master = master; 1070 _memory_global_reserve_count = reserve_span_count; 1071 _memory_global_reserve = reserve; 1072 } 1073 1074 1075 //////////// 1076 /// 1077 /// Span linked list management 1078 /// 1079 ////// 1080 1081 //! Add a span to double linked list at the head 1082 static void 1083 _rpmalloc_span_double_link_list_add(span_t** head, span_t* span) { 1084 if (*head) 1085 (*head)->prev = span; 1086 span->next = *head; 1087 *head = span; 1088 } 1089 1090 //! Pop head span from double linked list 1091 static void 1092 _rpmalloc_span_double_link_list_pop_head(span_t** head, span_t* span) { 1093 rpmalloc_assert(*head == span, "Linked list corrupted"); 1094 span = *head; 1095 *head = span->next; 1096 } 1097 1098 //! Remove a span from double linked list 1099 static void 1100 _rpmalloc_span_double_link_list_remove(span_t** head, span_t* span) { 1101 rpmalloc_assert(*head, "Linked list corrupted"); 1102 if (*head == span) { 1103 *head = span->next; 1104 } else { 1105 span_t* next_span = span->next; 1106 span_t* prev_span = span->prev; 1107 prev_span->next = next_span; 1108 if (EXPECTED(next_span != 0)) 1109 next_span->prev = prev_span; 1110 } 1111 } 1112 1113 1114 //////////// 1115 /// 1116 /// Span control 1117 /// 1118 ////// 1119 1120 static void 1121 _rpmalloc_heap_cache_insert(heap_t* heap, span_t* span); 1122 1123 static void 1124 _rpmalloc_heap_finalize(heap_t* heap); 1125 1126 static void 1127 _rpmalloc_heap_set_reserved_spans(heap_t* heap, span_t* master, span_t* reserve, size_t reserve_span_count); 1128 1129 //! Declare the span to be a subspan and store distance from master span and span count 1130 static void 1131 _rpmalloc_span_mark_as_subspan_unless_master(span_t* master, span_t* subspan, size_t span_count) { 1132 rpmalloc_assert((subspan != master) || (subspan->flags & SPAN_FLAG_MASTER), "Span master pointer and/or flag mismatch"); 1133 if (subspan != master) { 1134 subspan->flags = SPAN_FLAG_SUBSPAN; 1135 subspan->offset_from_master = (uint32_t)((uintptr_t)pointer_diff(subspan, master) >> _memory_span_size_shift); 1136 subspan->align_offset = 0; 1137 } 1138 subspan->span_count = (uint32_t)span_count; 1139 } 1140 1141 //! Use reserved spans to fulfill a memory map request (reserve size must be checked by caller) 1142 static span_t* 1143 _rpmalloc_span_map_from_reserve(heap_t* heap, size_t span_count) { 1144 //Update the heap span reserve 1145 span_t* span = heap->span_reserve; 1146 heap->span_reserve = (span_t*)pointer_offset(span, span_count * _memory_span_size); 1147 heap->spans_reserved -= (uint32_t)span_count; 1148 1149 _rpmalloc_span_mark_as_subspan_unless_master(heap->span_reserve_master, span, span_count); 1150 if (span_count <= LARGE_CLASS_COUNT) 1151 _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_from_reserved); 1152 1153 return span; 1154 } 1155 1156 //! Get the aligned number of spans to map in based on wanted count, configured mapping granularity and the page size 1157 static size_t 1158 _rpmalloc_span_align_count(size_t span_count) { 1159 size_t request_count = (span_count > _memory_span_map_count) ? span_count : _memory_span_map_count; 1160 if ((_memory_page_size > _memory_span_size) && ((request_count * _memory_span_size) % _memory_page_size)) 1161 request_count += _memory_span_map_count - (request_count % _memory_span_map_count); 1162 return request_count; 1163 } 1164 1165 //! Setup a newly mapped span 1166 static void 1167 _rpmalloc_span_initialize(span_t* span, size_t total_span_count, size_t span_count, size_t align_offset) { 1168 span->total_spans = (uint32_t)total_span_count; 1169 span->span_count = (uint32_t)span_count; 1170 span->align_offset = (uint32_t)align_offset; 1171 span->flags = SPAN_FLAG_MASTER; 1172 atomic_store32(&span->remaining_spans, (int32_t)total_span_count); 1173 } 1174 1175 static void 1176 _rpmalloc_span_unmap(span_t* span); 1177 1178 //! Map an aligned set of spans, taking configured mapping granularity and the page size into account 1179 static span_t* 1180 _rpmalloc_span_map_aligned_count(heap_t* heap, size_t span_count) { 1181 //If we already have some, but not enough, reserved spans, release those to heap cache and map a new 1182 //full set of spans. Otherwise we would waste memory if page size > span size (huge pages) 1183 size_t aligned_span_count = _rpmalloc_span_align_count(span_count); 1184 size_t align_offset = 0; 1185 span_t* span = (span_t*)_rpmalloc_mmap(aligned_span_count * _memory_span_size, &align_offset); 1186 if (!span) 1187 return 0; 1188 _rpmalloc_span_initialize(span, aligned_span_count, span_count, align_offset); 1189 _rpmalloc_stat_inc(&_master_spans); 1190 if (span_count <= LARGE_CLASS_COUNT) 1191 _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_map_calls); 1192 if (aligned_span_count > span_count) { 1193 span_t* reserved_spans = (span_t*)pointer_offset(span, span_count * _memory_span_size); 1194 size_t reserved_count = aligned_span_count - span_count; 1195 if (heap->spans_reserved) { 1196 _rpmalloc_span_mark_as_subspan_unless_master(heap->span_reserve_master, heap->span_reserve, heap->spans_reserved); 1197 _rpmalloc_heap_cache_insert(heap, heap->span_reserve); 1198 } 1199 if (reserved_count > _memory_heap_reserve_count) { 1200 // If huge pages or eager spam map count, the global reserve spin lock is held by caller, _rpmalloc_span_map 1201 rpmalloc_assert(atomic_load32(&_memory_global_lock) == 1, "Global spin lock not held as expected"); 1202 size_t remain_count = reserved_count - _memory_heap_reserve_count; 1203 reserved_count = _memory_heap_reserve_count; 1204 span_t* remain_span = (span_t*)pointer_offset(reserved_spans, reserved_count * _memory_span_size); 1205 if (_memory_global_reserve) { 1206 _rpmalloc_span_mark_as_subspan_unless_master(_memory_global_reserve_master, _memory_global_reserve, _memory_global_reserve_count); 1207 _rpmalloc_span_unmap(_memory_global_reserve); 1208 } 1209 _rpmalloc_global_set_reserved_spans(span, remain_span, remain_count); 1210 } 1211 _rpmalloc_heap_set_reserved_spans(heap, span, reserved_spans, reserved_count); 1212 } 1213 return span; 1214 } 1215 1216 //! Map in memory pages for the given number of spans (or use previously reserved pages) 1217 static span_t* 1218 _rpmalloc_span_map(heap_t* heap, size_t span_count) { 1219 if (span_count <= heap->spans_reserved) 1220 return _rpmalloc_span_map_from_reserve(heap, span_count); 1221 span_t* span = 0; 1222 int use_global_reserve = (_memory_page_size > _memory_span_size) || (_memory_span_map_count > _memory_heap_reserve_count); 1223 if (use_global_reserve) { 1224 // If huge pages, make sure only one thread maps more memory to avoid bloat 1225 while (!atomic_cas32_acquire(&_memory_global_lock, 1, 0)) 1226 _rpmalloc_spin(); 1227 if (_memory_global_reserve_count >= span_count) { 1228 size_t reserve_count = (!heap->spans_reserved ? _memory_heap_reserve_count : span_count); 1229 if (_memory_global_reserve_count < reserve_count) 1230 reserve_count = _memory_global_reserve_count; 1231 span = _rpmalloc_global_get_reserved_spans(reserve_count); 1232 if (span) { 1233 if (reserve_count > span_count) { 1234 span_t* reserved_span = (span_t*)pointer_offset(span, span_count << _memory_span_size_shift); 1235 _rpmalloc_heap_set_reserved_spans(heap, _memory_global_reserve_master, reserved_span, reserve_count - span_count); 1236 } 1237 // Already marked as subspan in _rpmalloc_global_get_reserved_spans 1238 span->span_count = (uint32_t)span_count; 1239 } 1240 } 1241 } 1242 if (!span) 1243 span = _rpmalloc_span_map_aligned_count(heap, span_count); 1244 if (use_global_reserve) 1245 atomic_store32_release(&_memory_global_lock, 0); 1246 return span; 1247 } 1248 1249 //! Unmap memory pages for the given number of spans (or mark as unused if no partial unmappings) 1250 static void 1251 _rpmalloc_span_unmap(span_t* span) { 1252 rpmalloc_assert((span->flags & SPAN_FLAG_MASTER) || (span->flags & SPAN_FLAG_SUBSPAN), "Span flag corrupted"); 1253 rpmalloc_assert(!(span->flags & SPAN_FLAG_MASTER) || !(span->flags & SPAN_FLAG_SUBSPAN), "Span flag corrupted"); 1254 1255 int is_master = !!(span->flags & SPAN_FLAG_MASTER); 1256 span_t* master = is_master ? span : ((span_t*)pointer_offset(span, -(intptr_t)((uintptr_t)span->offset_from_master * _memory_span_size))); 1257 rpmalloc_assert(is_master || (span->flags & SPAN_FLAG_SUBSPAN), "Span flag corrupted"); 1258 rpmalloc_assert(master->flags & SPAN_FLAG_MASTER, "Span flag corrupted"); 1259 1260 size_t span_count = span->span_count; 1261 if (!is_master) { 1262 //Directly unmap subspans (unless huge pages, in which case we defer and unmap entire page range with master) 1263 rpmalloc_assert(span->align_offset == 0, "Span align offset corrupted"); 1264 if (_memory_span_size >= _memory_page_size) 1265 _rpmalloc_unmap(span, span_count * _memory_span_size, 0, 0); 1266 } else { 1267 //Special double flag to denote an unmapped master 1268 //It must be kept in memory since span header must be used 1269 span->flags |= SPAN_FLAG_MASTER | SPAN_FLAG_SUBSPAN | SPAN_FLAG_UNMAPPED_MASTER; 1270 _rpmalloc_stat_add(&_unmapped_master_spans, 1); 1271 } 1272 1273 if (atomic_add32(&master->remaining_spans, -(int32_t)span_count) <= 0) { 1274 //Everything unmapped, unmap the master span with release flag to unmap the entire range of the super span 1275 rpmalloc_assert(!!(master->flags & SPAN_FLAG_MASTER) && !!(master->flags & SPAN_FLAG_SUBSPAN), "Span flag corrupted"); 1276 size_t unmap_count = master->span_count; 1277 if (_memory_span_size < _memory_page_size) 1278 unmap_count = master->total_spans; 1279 _rpmalloc_stat_sub(&_master_spans, 1); 1280 _rpmalloc_stat_sub(&_unmapped_master_spans, 1); 1281 _rpmalloc_unmap(master, unmap_count * _memory_span_size, master->align_offset, (size_t)master->total_spans * _memory_span_size); 1282 } 1283 } 1284 1285 //! Move the span (used for small or medium allocations) to the heap thread cache 1286 static void 1287 _rpmalloc_span_release_to_cache(heap_t* heap, span_t* span) { 1288 rpmalloc_assert(heap == span->heap, "Span heap pointer corrupted"); 1289 rpmalloc_assert(span->size_class < SIZE_CLASS_COUNT, "Invalid span size class"); 1290 rpmalloc_assert(span->span_count == 1, "Invalid span count"); 1291 #if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS 1292 atomic_decr32(&heap->span_use[0].current); 1293 #endif 1294 _rpmalloc_stat_dec(&heap->size_class_use[span->size_class].spans_current); 1295 if (!heap->finalize) { 1296 _rpmalloc_stat_inc(&heap->span_use[0].spans_to_cache); 1297 _rpmalloc_stat_inc(&heap->size_class_use[span->size_class].spans_to_cache); 1298 if (heap->size_class[span->size_class].cache) 1299 _rpmalloc_heap_cache_insert(heap, heap->size_class[span->size_class].cache); 1300 heap->size_class[span->size_class].cache = span; 1301 } else { 1302 _rpmalloc_span_unmap(span); 1303 } 1304 } 1305 1306 //! Initialize a (partial) free list up to next system memory page, while reserving the first block 1307 //! as allocated, returning number of blocks in list 1308 static uint32_t 1309 free_list_partial_init(void** list, void** first_block, void* page_start, void* block_start, uint32_t block_count, uint32_t block_size) { 1310 rpmalloc_assert(block_count, "Internal failure"); 1311 *first_block = block_start; 1312 if (block_count > 1) { 1313 void* free_block = pointer_offset(block_start, block_size); 1314 void* block_end = pointer_offset(block_start, (size_t)block_size * block_count); 1315 //If block size is less than half a memory page, bound init to next memory page boundary 1316 if (block_size < (_memory_page_size >> 1)) { 1317 void* page_end = pointer_offset(page_start, _memory_page_size); 1318 if (page_end < block_end) 1319 block_end = page_end; 1320 } 1321 *list = free_block; 1322 block_count = 2; 1323 void* next_block = pointer_offset(free_block, block_size); 1324 while (next_block < block_end) { 1325 *((void**)free_block) = next_block; 1326 free_block = next_block; 1327 ++block_count; 1328 next_block = pointer_offset(next_block, block_size); 1329 } 1330 *((void**)free_block) = 0; 1331 } else { 1332 *list = 0; 1333 } 1334 return block_count; 1335 } 1336 1337 //! Initialize an unused span (from cache or mapped) to be new active span, putting the initial free list in heap class free list 1338 static void* 1339 _rpmalloc_span_initialize_new(heap_t* heap, heap_size_class_t* heap_size_class, span_t* span, uint32_t class_idx) { 1340 rpmalloc_assert(span->span_count == 1, "Internal failure"); 1341 size_class_t* size_class = _memory_size_class + class_idx; 1342 span->size_class = class_idx; 1343 span->heap = heap; 1344 span->flags &= ~SPAN_FLAG_ALIGNED_BLOCKS; 1345 span->block_size = size_class->block_size; 1346 span->block_count = size_class->block_count; 1347 span->free_list = 0; 1348 span->list_size = 0; 1349 atomic_store_ptr_release(&span->free_list_deferred, 0); 1350 1351 //Setup free list. Only initialize one system page worth of free blocks in list 1352 void* block; 1353 span->free_list_limit = free_list_partial_init(&heap_size_class->free_list, &block, 1354 span, pointer_offset(span, SPAN_HEADER_SIZE), size_class->block_count, size_class->block_size); 1355 //Link span as partial if there remains blocks to be initialized as free list, or full if fully initialized 1356 if (span->free_list_limit < span->block_count) { 1357 _rpmalloc_span_double_link_list_add(&heap_size_class->partial_span, span); 1358 span->used_count = span->free_list_limit; 1359 } else { 1360 #if RPMALLOC_FIRST_CLASS_HEAPS 1361 _rpmalloc_span_double_link_list_add(&heap->full_span[class_idx], span); 1362 #endif 1363 ++heap->full_span_count; 1364 span->used_count = span->block_count; 1365 } 1366 return block; 1367 } 1368 1369 static void 1370 _rpmalloc_span_extract_free_list_deferred(span_t* span) { 1371 // We need acquire semantics on the CAS operation since we are interested in the list size 1372 // Refer to _rpmalloc_deallocate_defer_small_or_medium for further comments on this dependency 1373 do { 1374 span->free_list = atomic_exchange_ptr_acquire(&span->free_list_deferred, INVALID_POINTER); 1375 } while (span->free_list == INVALID_POINTER); 1376 span->used_count -= span->list_size; 1377 span->list_size = 0; 1378 atomic_store_ptr_release(&span->free_list_deferred, 0); 1379 } 1380 1381 static int 1382 _rpmalloc_span_is_fully_utilized(span_t* span) { 1383 rpmalloc_assert(span->free_list_limit <= span->block_count, "Span free list corrupted"); 1384 return !span->free_list && (span->free_list_limit >= span->block_count); 1385 } 1386 1387 static int 1388 _rpmalloc_span_finalize(heap_t* heap, size_t iclass, span_t* span, span_t** list_head) { 1389 void* free_list = heap->size_class[iclass].free_list; 1390 span_t* class_span = (span_t*)((uintptr_t)free_list & _memory_span_mask); 1391 if (span == class_span) { 1392 // Adopt the heap class free list back into the span free list 1393 void* block = span->free_list; 1394 void* last_block = 0; 1395 while (block) { 1396 last_block = block; 1397 block = *((void**)block); 1398 } 1399 uint32_t free_count = 0; 1400 block = free_list; 1401 while (block) { 1402 ++free_count; 1403 block = *((void**)block); 1404 } 1405 if (last_block) { 1406 *((void**)last_block) = free_list; 1407 } else { 1408 span->free_list = free_list; 1409 } 1410 heap->size_class[iclass].free_list = 0; 1411 span->used_count -= free_count; 1412 } 1413 //If this assert triggers you have memory leaks 1414 rpmalloc_assert(span->list_size == span->used_count, "Memory leak detected"); 1415 if (span->list_size == span->used_count) { 1416 _rpmalloc_stat_dec(&heap->span_use[0].current); 1417 _rpmalloc_stat_dec(&heap->size_class_use[iclass].spans_current); 1418 // This function only used for spans in double linked lists 1419 if (list_head) 1420 _rpmalloc_span_double_link_list_remove(list_head, span); 1421 _rpmalloc_span_unmap(span); 1422 return 1; 1423 } 1424 return 0; 1425 } 1426 1427 1428 //////////// 1429 /// 1430 /// Global cache 1431 /// 1432 ////// 1433 1434 #if ENABLE_GLOBAL_CACHE 1435 1436 //! Finalize a global cache 1437 static void 1438 _rpmalloc_global_cache_finalize(global_cache_t* cache) { 1439 while (!atomic_cas32_acquire(&cache->lock, 1, 0)) 1440 _rpmalloc_spin(); 1441 1442 for (size_t ispan = 0; ispan < cache->count; ++ispan) 1443 _rpmalloc_span_unmap(cache->span[ispan]); 1444 cache->count = 0; 1445 1446 while (cache->overflow) { 1447 span_t* span = cache->overflow; 1448 cache->overflow = span->next; 1449 _rpmalloc_span_unmap(span); 1450 } 1451 1452 atomic_store32_release(&cache->lock, 0); 1453 } 1454 1455 static void 1456 _rpmalloc_global_cache_insert_spans(span_t** span, size_t span_count, size_t count) { 1457 const size_t cache_limit = (span_count == 1) ? 1458 GLOBAL_CACHE_MULTIPLIER * MAX_THREAD_SPAN_CACHE : 1459 GLOBAL_CACHE_MULTIPLIER * (MAX_THREAD_SPAN_LARGE_CACHE - (span_count >> 1)); 1460 1461 global_cache_t* cache = &_memory_span_cache[span_count - 1]; 1462 1463 size_t insert_count = count; 1464 while (!atomic_cas32_acquire(&cache->lock, 1, 0)) 1465 _rpmalloc_spin(); 1466 1467 #if ENABLE_STATISTICS 1468 cache->insert_count += count; 1469 #endif 1470 if ((cache->count + insert_count) > cache_limit) 1471 insert_count = cache_limit - cache->count; 1472 1473 memcpy(cache->span + cache->count, span, sizeof(span_t*) * insert_count); 1474 cache->count += (uint32_t)insert_count; 1475 1476 #if ENABLE_UNLIMITED_CACHE 1477 while (insert_count < count) { 1478 #else 1479 // Enable unlimited cache if huge pages, or we will leak since it is unlikely that an entire huge page 1480 // will be unmapped, and we're unable to partially decommit a huge page 1481 while ((_memory_page_size > _memory_span_size) && (insert_count < count)) { 1482 #endif 1483 span_t* current_span = span[insert_count++]; 1484 current_span->next = cache->overflow; 1485 cache->overflow = current_span; 1486 } 1487 atomic_store32_release(&cache->lock, 0); 1488 1489 span_t* keep = 0; 1490 for (size_t ispan = insert_count; ispan < count; ++ispan) { 1491 span_t* current_span = span[ispan]; 1492 // Keep master spans that has remaining subspans to avoid dangling them 1493 if ((current_span->flags & SPAN_FLAG_MASTER) && 1494 (atomic_load32(¤t_span->remaining_spans) > (int32_t)current_span->span_count)) { 1495 current_span->next = keep; 1496 keep = current_span; 1497 } else { 1498 _rpmalloc_span_unmap(current_span); 1499 } 1500 } 1501 1502 if (keep) { 1503 while (!atomic_cas32_acquire(&cache->lock, 1, 0)) 1504 _rpmalloc_spin(); 1505 1506 size_t islot = 0; 1507 while (keep) { 1508 for (; islot < cache->count; ++islot) { 1509 span_t* current_span = cache->span[islot]; 1510 if (!(current_span->flags & SPAN_FLAG_MASTER) || ((current_span->flags & SPAN_FLAG_MASTER) && 1511 (atomic_load32(¤t_span->remaining_spans) <= (int32_t)current_span->span_count))) { 1512 _rpmalloc_span_unmap(current_span); 1513 cache->span[islot] = keep; 1514 break; 1515 } 1516 } 1517 if (islot == cache->count) 1518 break; 1519 keep = keep->next; 1520 } 1521 1522 if (keep) { 1523 span_t* tail = keep; 1524 while (tail->next) 1525 tail = tail->next; 1526 tail->next = cache->overflow; 1527 cache->overflow = keep; 1528 } 1529 1530 atomic_store32_release(&cache->lock, 0); 1531 } 1532 } 1533 1534 static size_t 1535 _rpmalloc_global_cache_extract_spans(span_t** span, size_t span_count, size_t count) { 1536 global_cache_t* cache = &_memory_span_cache[span_count - 1]; 1537 1538 size_t extract_count = 0; 1539 while (!atomic_cas32_acquire(&cache->lock, 1, 0)) 1540 _rpmalloc_spin(); 1541 1542 #if ENABLE_STATISTICS 1543 cache->extract_count += count; 1544 #endif 1545 size_t want = count - extract_count; 1546 if (want > cache->count) 1547 want = cache->count; 1548 1549 memcpy(span + extract_count, cache->span + (cache->count - want), sizeof(span_t*) * want); 1550 cache->count -= (uint32_t)want; 1551 extract_count += want; 1552 1553 while ((extract_count < count) && cache->overflow) { 1554 span_t* current_span = cache->overflow; 1555 span[extract_count++] = current_span; 1556 cache->overflow = current_span->next; 1557 } 1558 1559 #if ENABLE_ASSERTS 1560 for (size_t ispan = 0; ispan < extract_count; ++ispan) { 1561 rpmalloc_assert(span[ispan]->span_count == span_count, "Global cache span count mismatch"); 1562 } 1563 #endif 1564 1565 atomic_store32_release(&cache->lock, 0); 1566 1567 return extract_count; 1568 } 1569 1570 #endif 1571 1572 //////////// 1573 /// 1574 /// Heap control 1575 /// 1576 ////// 1577 1578 static void _rpmalloc_deallocate_huge(span_t*); 1579 1580 //! Store the given spans as reserve in the given heap 1581 static void 1582 _rpmalloc_heap_set_reserved_spans(heap_t* heap, span_t* master, span_t* reserve, size_t reserve_span_count) { 1583 heap->span_reserve_master = master; 1584 heap->span_reserve = reserve; 1585 heap->spans_reserved = (uint32_t)reserve_span_count; 1586 } 1587 1588 //! Adopt the deferred span cache list, optionally extracting the first single span for immediate re-use 1589 static void 1590 _rpmalloc_heap_cache_adopt_deferred(heap_t* heap, span_t** single_span) { 1591 span_t* span = (span_t*)((void*)atomic_exchange_ptr_acquire(&heap->span_free_deferred, 0)); 1592 while (span) { 1593 span_t* next_span = (span_t*)span->free_list; 1594 rpmalloc_assert(span->heap == heap, "Span heap pointer corrupted"); 1595 if (EXPECTED(span->size_class < SIZE_CLASS_COUNT)) { 1596 rpmalloc_assert(heap->full_span_count, "Heap span counter corrupted"); 1597 --heap->full_span_count; 1598 _rpmalloc_stat_dec(&heap->span_use[0].spans_deferred); 1599 #if RPMALLOC_FIRST_CLASS_HEAPS 1600 _rpmalloc_span_double_link_list_remove(&heap->full_span[span->size_class], span); 1601 #endif 1602 _rpmalloc_stat_dec(&heap->span_use[0].current); 1603 _rpmalloc_stat_dec(&heap->size_class_use[span->size_class].spans_current); 1604 if (single_span && !*single_span) 1605 *single_span = span; 1606 else 1607 _rpmalloc_heap_cache_insert(heap, span); 1608 } else { 1609 if (span->size_class == SIZE_CLASS_HUGE) { 1610 _rpmalloc_deallocate_huge(span); 1611 } else { 1612 rpmalloc_assert(span->size_class == SIZE_CLASS_LARGE, "Span size class invalid"); 1613 rpmalloc_assert(heap->full_span_count, "Heap span counter corrupted"); 1614 --heap->full_span_count; 1615 #if RPMALLOC_FIRST_CLASS_HEAPS 1616 _rpmalloc_span_double_link_list_remove(&heap->large_huge_span, span); 1617 #endif 1618 uint32_t idx = span->span_count - 1; 1619 _rpmalloc_stat_dec(&heap->span_use[idx].spans_deferred); 1620 _rpmalloc_stat_dec(&heap->span_use[idx].current); 1621 if (!idx && single_span && !*single_span) 1622 *single_span = span; 1623 else 1624 _rpmalloc_heap_cache_insert(heap, span); 1625 } 1626 } 1627 span = next_span; 1628 } 1629 } 1630 1631 static void 1632 _rpmalloc_heap_unmap(heap_t* heap) { 1633 if (!heap->master_heap) { 1634 if ((heap->finalize > 1) && !atomic_load32(&heap->child_count)) { 1635 span_t* span = (span_t*)((uintptr_t)heap & _memory_span_mask); 1636 _rpmalloc_span_unmap(span); 1637 } 1638 } else { 1639 if (atomic_decr32(&heap->master_heap->child_count) == 0) { 1640 _rpmalloc_heap_unmap(heap->master_heap); 1641 } 1642 } 1643 } 1644 1645 static void 1646 _rpmalloc_heap_global_finalize(heap_t* heap) { 1647 if (heap->finalize++ > 1) { 1648 --heap->finalize; 1649 return; 1650 } 1651 1652 _rpmalloc_heap_finalize(heap); 1653 1654 #if ENABLE_THREAD_CACHE 1655 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) { 1656 span_cache_t* span_cache; 1657 if (!iclass) 1658 span_cache = &heap->span_cache; 1659 else 1660 span_cache = (span_cache_t*)(heap->span_large_cache + (iclass - 1)); 1661 for (size_t ispan = 0; ispan < span_cache->count; ++ispan) 1662 _rpmalloc_span_unmap(span_cache->span[ispan]); 1663 span_cache->count = 0; 1664 } 1665 #endif 1666 1667 if (heap->full_span_count) { 1668 --heap->finalize; 1669 return; 1670 } 1671 1672 for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) { 1673 if (heap->size_class[iclass].free_list || heap->size_class[iclass].partial_span) { 1674 --heap->finalize; 1675 return; 1676 } 1677 } 1678 //Heap is now completely free, unmap and remove from heap list 1679 size_t list_idx = (size_t)heap->id % HEAP_ARRAY_SIZE; 1680 heap_t* list_heap = _memory_heaps[list_idx]; 1681 if (list_heap == heap) { 1682 _memory_heaps[list_idx] = heap->next_heap; 1683 } else { 1684 while (list_heap->next_heap != heap) 1685 list_heap = list_heap->next_heap; 1686 list_heap->next_heap = heap->next_heap; 1687 } 1688 1689 _rpmalloc_heap_unmap(heap); 1690 } 1691 1692 //! Insert a single span into thread heap cache, releasing to global cache if overflow 1693 static void 1694 _rpmalloc_heap_cache_insert(heap_t* heap, span_t* span) { 1695 if (UNEXPECTED(heap->finalize != 0)) { 1696 _rpmalloc_span_unmap(span); 1697 _rpmalloc_heap_global_finalize(heap); 1698 return; 1699 } 1700 #if ENABLE_THREAD_CACHE 1701 size_t span_count = span->span_count; 1702 _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_to_cache); 1703 if (span_count == 1) { 1704 span_cache_t* span_cache = &heap->span_cache; 1705 span_cache->span[span_cache->count++] = span; 1706 if (span_cache->count == MAX_THREAD_SPAN_CACHE) { 1707 const size_t remain_count = MAX_THREAD_SPAN_CACHE - THREAD_SPAN_CACHE_TRANSFER; 1708 #if ENABLE_GLOBAL_CACHE 1709 _rpmalloc_stat_add64(&heap->thread_to_global, THREAD_SPAN_CACHE_TRANSFER * _memory_span_size); 1710 _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_to_global, THREAD_SPAN_CACHE_TRANSFER); 1711 _rpmalloc_global_cache_insert_spans(span_cache->span + remain_count, span_count, THREAD_SPAN_CACHE_TRANSFER); 1712 #else 1713 for (size_t ispan = 0; ispan < THREAD_SPAN_CACHE_TRANSFER; ++ispan) 1714 _rpmalloc_span_unmap(span_cache->span[remain_count + ispan]); 1715 #endif 1716 span_cache->count = remain_count; 1717 } 1718 } else { 1719 size_t cache_idx = span_count - 2; 1720 span_large_cache_t* span_cache = heap->span_large_cache + cache_idx; 1721 span_cache->span[span_cache->count++] = span; 1722 const size_t cache_limit = (MAX_THREAD_SPAN_LARGE_CACHE - (span_count >> 1)); 1723 if (span_cache->count == cache_limit) { 1724 const size_t transfer_limit = 2 + (cache_limit >> 2); 1725 const size_t transfer_count = (THREAD_SPAN_LARGE_CACHE_TRANSFER <= transfer_limit ? THREAD_SPAN_LARGE_CACHE_TRANSFER : transfer_limit); 1726 const size_t remain_count = cache_limit - transfer_count; 1727 #if ENABLE_GLOBAL_CACHE 1728 _rpmalloc_stat_add64(&heap->thread_to_global, transfer_count * span_count * _memory_span_size); 1729 _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_to_global, transfer_count); 1730 _rpmalloc_global_cache_insert_spans(span_cache->span + remain_count, span_count, transfer_count); 1731 #else 1732 for (size_t ispan = 0; ispan < transfer_count; ++ispan) 1733 _rpmalloc_span_unmap(span_cache->span[remain_count + ispan]); 1734 #endif 1735 span_cache->count = remain_count; 1736 } 1737 } 1738 #else 1739 (void)sizeof(heap); 1740 _rpmalloc_span_unmap(span); 1741 #endif 1742 } 1743 1744 //! Extract the given number of spans from the different cache levels 1745 static span_t* 1746 _rpmalloc_heap_thread_cache_extract(heap_t* heap, size_t span_count) { 1747 span_t* span = 0; 1748 #if ENABLE_THREAD_CACHE 1749 span_cache_t* span_cache; 1750 if (span_count == 1) 1751 span_cache = &heap->span_cache; 1752 else 1753 span_cache = (span_cache_t*)(heap->span_large_cache + (span_count - 2)); 1754 if (span_cache->count) { 1755 _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_from_cache); 1756 return span_cache->span[--span_cache->count]; 1757 } 1758 #endif 1759 return span; 1760 } 1761 1762 static span_t* 1763 _rpmalloc_heap_thread_cache_deferred_extract(heap_t* heap, size_t span_count) { 1764 span_t* span = 0; 1765 if (span_count == 1) { 1766 _rpmalloc_heap_cache_adopt_deferred(heap, &span); 1767 } else { 1768 _rpmalloc_heap_cache_adopt_deferred(heap, 0); 1769 span = _rpmalloc_heap_thread_cache_extract(heap, span_count); 1770 } 1771 return span; 1772 } 1773 1774 static span_t* 1775 _rpmalloc_heap_reserved_extract(heap_t* heap, size_t span_count) { 1776 if (heap->spans_reserved >= span_count) 1777 return _rpmalloc_span_map(heap, span_count); 1778 return 0; 1779 } 1780 1781 //! Extract a span from the global cache 1782 static span_t* 1783 _rpmalloc_heap_global_cache_extract(heap_t* heap, size_t span_count) { 1784 #if ENABLE_GLOBAL_CACHE 1785 #if ENABLE_THREAD_CACHE 1786 span_cache_t* span_cache; 1787 size_t wanted_count; 1788 if (span_count == 1) { 1789 span_cache = &heap->span_cache; 1790 wanted_count = THREAD_SPAN_CACHE_TRANSFER; 1791 } else { 1792 span_cache = (span_cache_t*)(heap->span_large_cache + (span_count - 2)); 1793 wanted_count = THREAD_SPAN_LARGE_CACHE_TRANSFER; 1794 } 1795 span_cache->count = _rpmalloc_global_cache_extract_spans(span_cache->span, span_count, wanted_count); 1796 if (span_cache->count) { 1797 _rpmalloc_stat_add64(&heap->global_to_thread, span_count * span_cache->count * _memory_span_size); 1798 _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_from_global, span_cache->count); 1799 return span_cache->span[--span_cache->count]; 1800 } 1801 #else 1802 span_t* span = 0; 1803 size_t count = _rpmalloc_global_cache_extract_spans(&span, span_count, 1); 1804 if (count) { 1805 _rpmalloc_stat_add64(&heap->global_to_thread, span_count * count * _memory_span_size); 1806 _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_from_global, count); 1807 return span; 1808 } 1809 #endif 1810 #endif 1811 (void)sizeof(heap); 1812 (void)sizeof(span_count); 1813 return 0; 1814 } 1815 1816 static void 1817 _rpmalloc_inc_span_statistics(heap_t* heap, size_t span_count, uint32_t class_idx) { 1818 (void)sizeof(heap); 1819 (void)sizeof(span_count); 1820 (void)sizeof(class_idx); 1821 #if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS 1822 uint32_t idx = (uint32_t)span_count - 1; 1823 uint32_t current_count = (uint32_t)atomic_incr32(&heap->span_use[idx].current); 1824 if (current_count > (uint32_t)atomic_load32(&heap->span_use[idx].high)) 1825 atomic_store32(&heap->span_use[idx].high, (int32_t)current_count); 1826 _rpmalloc_stat_add_peak(&heap->size_class_use[class_idx].spans_current, 1, heap->size_class_use[class_idx].spans_peak); 1827 #endif 1828 } 1829 1830 //! Get a span from one of the cache levels (thread cache, reserved, global cache) or fallback to mapping more memory 1831 static span_t* 1832 _rpmalloc_heap_extract_new_span(heap_t* heap, heap_size_class_t* heap_size_class, size_t span_count, uint32_t class_idx) { 1833 span_t* span; 1834 #if ENABLE_THREAD_CACHE 1835 if (heap_size_class && heap_size_class->cache) { 1836 span = heap_size_class->cache; 1837 heap_size_class->cache = (heap->span_cache.count ? heap->span_cache.span[--heap->span_cache.count] : 0); 1838 _rpmalloc_inc_span_statistics(heap, span_count, class_idx); 1839 return span; 1840 } 1841 #endif 1842 (void)sizeof(class_idx); 1843 // Allow 50% overhead to increase cache hits 1844 size_t base_span_count = span_count; 1845 size_t limit_span_count = (span_count > 2) ? (span_count + (span_count >> 1)) : span_count; 1846 if (limit_span_count > LARGE_CLASS_COUNT) 1847 limit_span_count = LARGE_CLASS_COUNT; 1848 do { 1849 span = _rpmalloc_heap_thread_cache_extract(heap, span_count); 1850 if (EXPECTED(span != 0)) { 1851 _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_cache); 1852 _rpmalloc_inc_span_statistics(heap, span_count, class_idx); 1853 return span; 1854 } 1855 span = _rpmalloc_heap_thread_cache_deferred_extract(heap, span_count); 1856 if (EXPECTED(span != 0)) { 1857 _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_cache); 1858 _rpmalloc_inc_span_statistics(heap, span_count, class_idx); 1859 return span; 1860 } 1861 span = _rpmalloc_heap_reserved_extract(heap, span_count); 1862 if (EXPECTED(span != 0)) { 1863 _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_reserved); 1864 _rpmalloc_inc_span_statistics(heap, span_count, class_idx); 1865 return span; 1866 } 1867 span = _rpmalloc_heap_global_cache_extract(heap, span_count); 1868 if (EXPECTED(span != 0)) { 1869 _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_cache); 1870 _rpmalloc_inc_span_statistics(heap, span_count, class_idx); 1871 return span; 1872 } 1873 ++span_count; 1874 } while (span_count <= limit_span_count); 1875 //Final fallback, map in more virtual memory 1876 span = _rpmalloc_span_map(heap, base_span_count); 1877 _rpmalloc_inc_span_statistics(heap, base_span_count, class_idx); 1878 _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_map_calls); 1879 return span; 1880 } 1881 1882 static void 1883 _rpmalloc_heap_initialize(heap_t* heap) { 1884 _rpmalloc_memset_const(heap, 0, sizeof(heap_t)); 1885 //Get a new heap ID 1886 heap->id = 1 + atomic_incr32(&_memory_heap_id); 1887 1888 //Link in heap in heap ID map 1889 size_t list_idx = (size_t)heap->id % HEAP_ARRAY_SIZE; 1890 heap->next_heap = _memory_heaps[list_idx]; 1891 _memory_heaps[list_idx] = heap; 1892 } 1893 1894 static void 1895 _rpmalloc_heap_orphan(heap_t* heap, int first_class) { 1896 heap->owner_thread = (uintptr_t)-1; 1897 #if RPMALLOC_FIRST_CLASS_HEAPS 1898 heap_t** heap_list = (first_class ? &_memory_first_class_orphan_heaps : &_memory_orphan_heaps); 1899 #else 1900 (void)sizeof(first_class); 1901 heap_t** heap_list = &_memory_orphan_heaps; 1902 #endif 1903 heap->next_orphan = *heap_list; 1904 *heap_list = heap; 1905 } 1906 1907 //! Allocate a new heap from newly mapped memory pages 1908 static heap_t* 1909 _rpmalloc_heap_allocate_new(void) { 1910 // Map in pages for a 16 heaps. If page size is greater than required size for this, map a page and 1911 // use first part for heaps and remaining part for spans for allocations. Adds a lot of complexity, 1912 // but saves a lot of memory on systems where page size > 64 spans (4MiB) 1913 size_t heap_size = sizeof(heap_t); 1914 size_t aligned_heap_size = 16 * ((heap_size + 15) / 16); 1915 size_t request_heap_count = 16; 1916 size_t heap_span_count = ((aligned_heap_size * request_heap_count) + sizeof(span_t) + _memory_span_size - 1) / _memory_span_size; 1917 size_t block_size = _memory_span_size * heap_span_count; 1918 size_t span_count = heap_span_count; 1919 span_t* span = 0; 1920 // If there are global reserved spans, use these first 1921 if (_memory_global_reserve_count >= heap_span_count) { 1922 span = _rpmalloc_global_get_reserved_spans(heap_span_count); 1923 } 1924 if (!span) { 1925 if (_memory_page_size > block_size) { 1926 span_count = _memory_page_size / _memory_span_size; 1927 block_size = _memory_page_size; 1928 // If using huge pages, make sure to grab enough heaps to avoid reallocating a huge page just to serve new heaps 1929 size_t possible_heap_count = (block_size - sizeof(span_t)) / aligned_heap_size; 1930 if (possible_heap_count >= (request_heap_count * 16)) 1931 request_heap_count *= 16; 1932 else if (possible_heap_count < request_heap_count) 1933 request_heap_count = possible_heap_count; 1934 heap_span_count = ((aligned_heap_size * request_heap_count) + sizeof(span_t) + _memory_span_size - 1) / _memory_span_size; 1935 } 1936 1937 size_t align_offset = 0; 1938 span = (span_t*)_rpmalloc_mmap(block_size, &align_offset); 1939 if (!span) 1940 return 0; 1941 1942 // Master span will contain the heaps 1943 _rpmalloc_stat_inc(&_master_spans); 1944 _rpmalloc_span_initialize(span, span_count, heap_span_count, align_offset); 1945 } 1946 1947 size_t remain_size = _memory_span_size - sizeof(span_t); 1948 heap_t* heap = (heap_t*)pointer_offset(span, sizeof(span_t)); 1949 _rpmalloc_heap_initialize(heap); 1950 1951 // Put extra heaps as orphans 1952 size_t num_heaps = remain_size / aligned_heap_size; 1953 if (num_heaps < request_heap_count) 1954 num_heaps = request_heap_count; 1955 atomic_store32(&heap->child_count, (int32_t)num_heaps - 1); 1956 heap_t* extra_heap = (heap_t*)pointer_offset(heap, aligned_heap_size); 1957 while (num_heaps > 1) { 1958 _rpmalloc_heap_initialize(extra_heap); 1959 extra_heap->master_heap = heap; 1960 _rpmalloc_heap_orphan(extra_heap, 1); 1961 extra_heap = (heap_t*)pointer_offset(extra_heap, aligned_heap_size); 1962 --num_heaps; 1963 } 1964 1965 if (span_count > heap_span_count) { 1966 // Cap reserved spans 1967 size_t remain_count = span_count - heap_span_count; 1968 size_t reserve_count = (remain_count > _memory_heap_reserve_count ? _memory_heap_reserve_count : remain_count); 1969 span_t* remain_span = (span_t*)pointer_offset(span, heap_span_count * _memory_span_size); 1970 _rpmalloc_heap_set_reserved_spans(heap, span, remain_span, reserve_count); 1971 1972 if (remain_count > reserve_count) { 1973 // Set to global reserved spans 1974 remain_span = (span_t*)pointer_offset(remain_span, reserve_count * _memory_span_size); 1975 reserve_count = remain_count - reserve_count; 1976 _rpmalloc_global_set_reserved_spans(span, remain_span, reserve_count); 1977 } 1978 } 1979 1980 return heap; 1981 } 1982 1983 static heap_t* 1984 _rpmalloc_heap_extract_orphan(heap_t** heap_list) { 1985 heap_t* heap = *heap_list; 1986 *heap_list = (heap ? heap->next_orphan : 0); 1987 return heap; 1988 } 1989 1990 //! Allocate a new heap, potentially reusing a previously orphaned heap 1991 static heap_t* 1992 _rpmalloc_heap_allocate(int first_class) { 1993 heap_t* heap = 0; 1994 while (!atomic_cas32_acquire(&_memory_global_lock, 1, 0)) 1995 _rpmalloc_spin(); 1996 if (first_class == 0) 1997 heap = _rpmalloc_heap_extract_orphan(&_memory_orphan_heaps); 1998 #if RPMALLOC_FIRST_CLASS_HEAPS 1999 if (!heap) 2000 heap = _rpmalloc_heap_extract_orphan(&_memory_first_class_orphan_heaps); 2001 #endif 2002 if (!heap) 2003 heap = _rpmalloc_heap_allocate_new(); 2004 atomic_store32_release(&_memory_global_lock, 0); 2005 if (heap) 2006 _rpmalloc_heap_cache_adopt_deferred(heap, 0); 2007 return heap; 2008 } 2009 2010 static void 2011 _rpmalloc_heap_release(void* heapptr, int first_class, int release_cache) { 2012 heap_t* heap = (heap_t*)heapptr; 2013 if (!heap) 2014 return; 2015 //Release thread cache spans back to global cache 2016 _rpmalloc_heap_cache_adopt_deferred(heap, 0); 2017 if (release_cache || heap->finalize) { 2018 #if ENABLE_THREAD_CACHE 2019 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) { 2020 span_cache_t* span_cache; 2021 if (!iclass) 2022 span_cache = &heap->span_cache; 2023 else 2024 span_cache = (span_cache_t*)(heap->span_large_cache + (iclass - 1)); 2025 if (!span_cache->count) 2026 continue; 2027 #if ENABLE_GLOBAL_CACHE 2028 if (heap->finalize) { 2029 for (size_t ispan = 0; ispan < span_cache->count; ++ispan) 2030 _rpmalloc_span_unmap(span_cache->span[ispan]); 2031 } else { 2032 _rpmalloc_stat_add64(&heap->thread_to_global, span_cache->count * (iclass + 1) * _memory_span_size); 2033 _rpmalloc_stat_add(&heap->span_use[iclass].spans_to_global, span_cache->count); 2034 _rpmalloc_global_cache_insert_spans(span_cache->span, iclass + 1, span_cache->count); 2035 } 2036 #else 2037 for (size_t ispan = 0; ispan < span_cache->count; ++ispan) 2038 _rpmalloc_span_unmap(span_cache->span[ispan]); 2039 #endif 2040 span_cache->count = 0; 2041 } 2042 #endif 2043 } 2044 2045 if (get_thread_heap_raw() == heap) 2046 set_thread_heap(0); 2047 2048 #if ENABLE_STATISTICS 2049 atomic_decr32(&_memory_active_heaps); 2050 rpmalloc_assert(atomic_load32(&_memory_active_heaps) >= 0, "Still active heaps during finalization"); 2051 #endif 2052 2053 // If we are forcibly terminating with _exit the state of the 2054 // lock atomic is unknown and it's best to just go ahead and exit 2055 if (get_thread_id() != _rpmalloc_main_thread_id) { 2056 while (!atomic_cas32_acquire(&_memory_global_lock, 1, 0)) 2057 _rpmalloc_spin(); 2058 } 2059 _rpmalloc_heap_orphan(heap, first_class); 2060 atomic_store32_release(&_memory_global_lock, 0); 2061 } 2062 2063 static void 2064 _rpmalloc_heap_release_raw(void* heapptr, int release_cache) { 2065 _rpmalloc_heap_release(heapptr, 0, release_cache); 2066 } 2067 2068 static void 2069 _rpmalloc_heap_release_raw_fc(void* heapptr) { 2070 _rpmalloc_heap_release_raw(heapptr, 1); 2071 } 2072 2073 static void 2074 _rpmalloc_heap_finalize(heap_t* heap) { 2075 if (heap->spans_reserved) { 2076 span_t* span = _rpmalloc_span_map(heap, heap->spans_reserved); 2077 _rpmalloc_span_unmap(span); 2078 heap->spans_reserved = 0; 2079 } 2080 2081 _rpmalloc_heap_cache_adopt_deferred(heap, 0); 2082 2083 for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) { 2084 if (heap->size_class[iclass].cache) 2085 _rpmalloc_span_unmap(heap->size_class[iclass].cache); 2086 heap->size_class[iclass].cache = 0; 2087 span_t* span = heap->size_class[iclass].partial_span; 2088 while (span) { 2089 span_t* next = span->next; 2090 _rpmalloc_span_finalize(heap, iclass, span, &heap->size_class[iclass].partial_span); 2091 span = next; 2092 } 2093 // If class still has a free list it must be a full span 2094 if (heap->size_class[iclass].free_list) { 2095 span_t* class_span = (span_t*)((uintptr_t)heap->size_class[iclass].free_list & _memory_span_mask); 2096 span_t** list = 0; 2097 #if RPMALLOC_FIRST_CLASS_HEAPS 2098 list = &heap->full_span[iclass]; 2099 #endif 2100 --heap->full_span_count; 2101 if (!_rpmalloc_span_finalize(heap, iclass, class_span, list)) { 2102 if (list) 2103 _rpmalloc_span_double_link_list_remove(list, class_span); 2104 _rpmalloc_span_double_link_list_add(&heap->size_class[iclass].partial_span, class_span); 2105 } 2106 } 2107 } 2108 2109 #if ENABLE_THREAD_CACHE 2110 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) { 2111 span_cache_t* span_cache; 2112 if (!iclass) 2113 span_cache = &heap->span_cache; 2114 else 2115 span_cache = (span_cache_t*)(heap->span_large_cache + (iclass - 1)); 2116 for (size_t ispan = 0; ispan < span_cache->count; ++ispan) 2117 _rpmalloc_span_unmap(span_cache->span[ispan]); 2118 span_cache->count = 0; 2119 } 2120 #endif 2121 rpmalloc_assert(!atomic_load_ptr(&heap->span_free_deferred), "Heaps still active during finalization"); 2122 } 2123 2124 2125 //////////// 2126 /// 2127 /// Allocation entry points 2128 /// 2129 ////// 2130 2131 //! Pop first block from a free list 2132 static void* 2133 free_list_pop(void** list) { 2134 void* block = *list; 2135 *list = *((void**)block); 2136 return block; 2137 } 2138 2139 //! Allocate a small/medium sized memory block from the given heap 2140 static void* 2141 _rpmalloc_allocate_from_heap_fallback(heap_t* heap, heap_size_class_t* heap_size_class, uint32_t class_idx) { 2142 span_t* span = heap_size_class->partial_span; 2143 rpmalloc_assume(heap != 0); 2144 if (EXPECTED(span != 0)) { 2145 rpmalloc_assert(span->block_count == _memory_size_class[span->size_class].block_count, "Span block count corrupted"); 2146 rpmalloc_assert(!_rpmalloc_span_is_fully_utilized(span), "Internal failure"); 2147 void* block; 2148 if (span->free_list) { 2149 //Span local free list is not empty, swap to size class free list 2150 block = free_list_pop(&span->free_list); 2151 heap_size_class->free_list = span->free_list; 2152 span->free_list = 0; 2153 } else { 2154 //If the span did not fully initialize free list, link up another page worth of blocks 2155 void* block_start = pointer_offset(span, SPAN_HEADER_SIZE + ((size_t)span->free_list_limit * span->block_size)); 2156 span->free_list_limit += free_list_partial_init(&heap_size_class->free_list, &block, 2157 (void*)((uintptr_t)block_start & ~(_memory_page_size - 1)), block_start, 2158 span->block_count - span->free_list_limit, span->block_size); 2159 } 2160 rpmalloc_assert(span->free_list_limit <= span->block_count, "Span block count corrupted"); 2161 span->used_count = span->free_list_limit; 2162 2163 //Swap in deferred free list if present 2164 if (atomic_load_ptr(&span->free_list_deferred)) 2165 _rpmalloc_span_extract_free_list_deferred(span); 2166 2167 //If span is still not fully utilized keep it in partial list and early return block 2168 if (!_rpmalloc_span_is_fully_utilized(span)) 2169 return block; 2170 2171 //The span is fully utilized, unlink from partial list and add to fully utilized list 2172 _rpmalloc_span_double_link_list_pop_head(&heap_size_class->partial_span, span); 2173 #if RPMALLOC_FIRST_CLASS_HEAPS 2174 _rpmalloc_span_double_link_list_add(&heap->full_span[class_idx], span); 2175 #endif 2176 ++heap->full_span_count; 2177 return block; 2178 } 2179 2180 //Find a span in one of the cache levels 2181 span = _rpmalloc_heap_extract_new_span(heap, heap_size_class, 1, class_idx); 2182 if (EXPECTED(span != 0)) { 2183 //Mark span as owned by this heap and set base data, return first block 2184 return _rpmalloc_span_initialize_new(heap, heap_size_class, span, class_idx); 2185 } 2186 2187 return 0; 2188 } 2189 2190 //! Allocate a small sized memory block from the given heap 2191 static void* 2192 _rpmalloc_allocate_small(heap_t* heap, size_t size) { 2193 rpmalloc_assert(heap, "No thread heap"); 2194 //Small sizes have unique size classes 2195 const uint32_t class_idx = (uint32_t)((size + (SMALL_GRANULARITY - 1)) >> SMALL_GRANULARITY_SHIFT); 2196 heap_size_class_t* heap_size_class = heap->size_class + class_idx; 2197 _rpmalloc_stat_inc_alloc(heap, class_idx); 2198 if (EXPECTED(heap_size_class->free_list != 0)) 2199 return free_list_pop(&heap_size_class->free_list); 2200 return _rpmalloc_allocate_from_heap_fallback(heap, heap_size_class, class_idx); 2201 } 2202 2203 //! Allocate a medium sized memory block from the given heap 2204 static void* 2205 _rpmalloc_allocate_medium(heap_t* heap, size_t size) { 2206 rpmalloc_assert(heap, "No thread heap"); 2207 //Calculate the size class index and do a dependent lookup of the final class index (in case of merged classes) 2208 const uint32_t base_idx = (uint32_t)(SMALL_CLASS_COUNT + ((size - (SMALL_SIZE_LIMIT + 1)) >> MEDIUM_GRANULARITY_SHIFT)); 2209 const uint32_t class_idx = _memory_size_class[base_idx].class_idx; 2210 heap_size_class_t* heap_size_class = heap->size_class + class_idx; 2211 _rpmalloc_stat_inc_alloc(heap, class_idx); 2212 if (EXPECTED(heap_size_class->free_list != 0)) 2213 return free_list_pop(&heap_size_class->free_list); 2214 return _rpmalloc_allocate_from_heap_fallback(heap, heap_size_class, class_idx); 2215 } 2216 2217 //! Allocate a large sized memory block from the given heap 2218 static void* 2219 _rpmalloc_allocate_large(heap_t* heap, size_t size) { 2220 rpmalloc_assert(heap, "No thread heap"); 2221 //Calculate number of needed max sized spans (including header) 2222 //Since this function is never called if size > LARGE_SIZE_LIMIT 2223 //the span_count is guaranteed to be <= LARGE_CLASS_COUNT 2224 size += SPAN_HEADER_SIZE; 2225 size_t span_count = size >> _memory_span_size_shift; 2226 if (size & (_memory_span_size - 1)) 2227 ++span_count; 2228 2229 //Find a span in one of the cache levels 2230 span_t* span = _rpmalloc_heap_extract_new_span(heap, 0, span_count, SIZE_CLASS_LARGE); 2231 if (!span) 2232 return span; 2233 2234 //Mark span as owned by this heap and set base data 2235 rpmalloc_assert(span->span_count >= span_count, "Internal failure"); 2236 span->size_class = SIZE_CLASS_LARGE; 2237 span->heap = heap; 2238 2239 #if RPMALLOC_FIRST_CLASS_HEAPS 2240 _rpmalloc_span_double_link_list_add(&heap->large_huge_span, span); 2241 #endif 2242 ++heap->full_span_count; 2243 2244 return pointer_offset(span, SPAN_HEADER_SIZE); 2245 } 2246 2247 //! Allocate a huge block by mapping memory pages directly 2248 static void* 2249 _rpmalloc_allocate_huge(heap_t* heap, size_t size) { 2250 rpmalloc_assert(heap, "No thread heap"); 2251 _rpmalloc_heap_cache_adopt_deferred(heap, 0); 2252 size += SPAN_HEADER_SIZE; 2253 size_t num_pages = size >> _memory_page_size_shift; 2254 if (size & (_memory_page_size - 1)) 2255 ++num_pages; 2256 size_t align_offset = 0; 2257 span_t* span = (span_t*)_rpmalloc_mmap(num_pages * _memory_page_size, &align_offset); 2258 if (!span) 2259 return span; 2260 2261 //Store page count in span_count 2262 span->size_class = SIZE_CLASS_HUGE; 2263 span->span_count = (uint32_t)num_pages; 2264 span->align_offset = (uint32_t)align_offset; 2265 span->heap = heap; 2266 _rpmalloc_stat_add_peak(&_huge_pages_current, num_pages, _huge_pages_peak); 2267 2268 #if RPMALLOC_FIRST_CLASS_HEAPS 2269 _rpmalloc_span_double_link_list_add(&heap->large_huge_span, span); 2270 #endif 2271 ++heap->full_span_count; 2272 2273 return pointer_offset(span, SPAN_HEADER_SIZE); 2274 } 2275 2276 //! Allocate a block of the given size 2277 static void* 2278 _rpmalloc_allocate(heap_t* heap, size_t size) { 2279 _rpmalloc_stat_add64(&_allocation_counter, 1); 2280 if (EXPECTED(size <= SMALL_SIZE_LIMIT)) 2281 return _rpmalloc_allocate_small(heap, size); 2282 else if (size <= _memory_medium_size_limit) 2283 return _rpmalloc_allocate_medium(heap, size); 2284 else if (size <= LARGE_SIZE_LIMIT) 2285 return _rpmalloc_allocate_large(heap, size); 2286 return _rpmalloc_allocate_huge(heap, size); 2287 } 2288 2289 static void* 2290 _rpmalloc_aligned_allocate(heap_t* heap, size_t alignment, size_t size) { 2291 if (alignment <= SMALL_GRANULARITY) 2292 return _rpmalloc_allocate(heap, size); 2293 2294 #if ENABLE_VALIDATE_ARGS 2295 if ((size + alignment) < size) { 2296 errno = EINVAL; 2297 return 0; 2298 } 2299 if (alignment & (alignment - 1)) { 2300 errno = EINVAL; 2301 return 0; 2302 } 2303 #endif 2304 2305 if ((alignment <= SPAN_HEADER_SIZE) && ((size + SPAN_HEADER_SIZE) < _memory_medium_size_limit)) { 2306 // If alignment is less or equal to span header size (which is power of two), 2307 // and size aligned to span header size multiples is less than size + alignment, 2308 // then use natural alignment of blocks to provide alignment 2309 size_t multiple_size = size ? (size + (SPAN_HEADER_SIZE - 1)) & ~(uintptr_t)(SPAN_HEADER_SIZE - 1) : SPAN_HEADER_SIZE; 2310 rpmalloc_assert(!(multiple_size % SPAN_HEADER_SIZE), "Failed alignment calculation"); 2311 if (multiple_size <= (size + alignment)) 2312 return _rpmalloc_allocate(heap, multiple_size); 2313 } 2314 2315 void* ptr = 0; 2316 size_t align_mask = alignment - 1; 2317 if (alignment <= _memory_page_size) { 2318 ptr = _rpmalloc_allocate(heap, size + alignment); 2319 if ((uintptr_t)ptr & align_mask) { 2320 ptr = (void*)(((uintptr_t)ptr & ~(uintptr_t)align_mask) + alignment); 2321 //Mark as having aligned blocks 2322 span_t* span = (span_t*)((uintptr_t)ptr & _memory_span_mask); 2323 span->flags |= SPAN_FLAG_ALIGNED_BLOCKS; 2324 } 2325 return ptr; 2326 } 2327 2328 // Fallback to mapping new pages for this request. Since pointers passed 2329 // to rpfree must be able to reach the start of the span by bitmasking of 2330 // the address with the span size, the returned aligned pointer from this 2331 // function must be with a span size of the start of the mapped area. 2332 // In worst case this requires us to loop and map pages until we get a 2333 // suitable memory address. It also means we can never align to span size 2334 // or greater, since the span header will push alignment more than one 2335 // span size away from span start (thus causing pointer mask to give us 2336 // an invalid span start on free) 2337 if (alignment & align_mask) { 2338 errno = EINVAL; 2339 return 0; 2340 } 2341 if (alignment >= _memory_span_size) { 2342 errno = EINVAL; 2343 return 0; 2344 } 2345 2346 size_t extra_pages = alignment / _memory_page_size; 2347 2348 // Since each span has a header, we will at least need one extra memory page 2349 size_t num_pages = 1 + (size / _memory_page_size); 2350 if (size & (_memory_page_size - 1)) 2351 ++num_pages; 2352 2353 if (extra_pages > num_pages) 2354 num_pages = 1 + extra_pages; 2355 2356 size_t original_pages = num_pages; 2357 size_t limit_pages = (_memory_span_size / _memory_page_size) * 2; 2358 if (limit_pages < (original_pages * 2)) 2359 limit_pages = original_pages * 2; 2360 2361 size_t mapped_size, align_offset; 2362 span_t* span; 2363 2364 retry: 2365 align_offset = 0; 2366 mapped_size = num_pages * _memory_page_size; 2367 2368 span = (span_t*)_rpmalloc_mmap(mapped_size, &align_offset); 2369 if (!span) { 2370 errno = ENOMEM; 2371 return 0; 2372 } 2373 ptr = pointer_offset(span, SPAN_HEADER_SIZE); 2374 2375 if ((uintptr_t)ptr & align_mask) 2376 ptr = (void*)(((uintptr_t)ptr & ~(uintptr_t)align_mask) + alignment); 2377 2378 if (((size_t)pointer_diff(ptr, span) >= _memory_span_size) || 2379 (pointer_offset(ptr, size) > pointer_offset(span, mapped_size)) || 2380 (((uintptr_t)ptr & _memory_span_mask) != (uintptr_t)span)) { 2381 _rpmalloc_unmap(span, mapped_size, align_offset, mapped_size); 2382 ++num_pages; 2383 if (num_pages > limit_pages) { 2384 errno = EINVAL; 2385 return 0; 2386 } 2387 goto retry; 2388 } 2389 2390 //Store page count in span_count 2391 span->size_class = SIZE_CLASS_HUGE; 2392 span->span_count = (uint32_t)num_pages; 2393 span->align_offset = (uint32_t)align_offset; 2394 span->heap = heap; 2395 _rpmalloc_stat_add_peak(&_huge_pages_current, num_pages, _huge_pages_peak); 2396 2397 #if RPMALLOC_FIRST_CLASS_HEAPS 2398 _rpmalloc_span_double_link_list_add(&heap->large_huge_span, span); 2399 #endif 2400 ++heap->full_span_count; 2401 2402 _rpmalloc_stat_add64(&_allocation_counter, 1); 2403 2404 return ptr; 2405 } 2406 2407 2408 //////////// 2409 /// 2410 /// Deallocation entry points 2411 /// 2412 ////// 2413 2414 //! Deallocate the given small/medium memory block in the current thread local heap 2415 static void 2416 _rpmalloc_deallocate_direct_small_or_medium(span_t* span, void* block) { 2417 heap_t* heap = span->heap; 2418 rpmalloc_assert(heap->owner_thread == get_thread_id() || !heap->owner_thread || heap->finalize, "Internal failure"); 2419 //Add block to free list 2420 if (UNEXPECTED(_rpmalloc_span_is_fully_utilized(span))) { 2421 span->used_count = span->block_count; 2422 #if RPMALLOC_FIRST_CLASS_HEAPS 2423 _rpmalloc_span_double_link_list_remove(&heap->full_span[span->size_class], span); 2424 #endif 2425 _rpmalloc_span_double_link_list_add(&heap->size_class[span->size_class].partial_span, span); 2426 --heap->full_span_count; 2427 } 2428 *((void**)block) = span->free_list; 2429 --span->used_count; 2430 span->free_list = block; 2431 if (UNEXPECTED(span->used_count == span->list_size)) { 2432 // If there are no used blocks it is guaranteed that no other external thread is accessing the span 2433 if (span->used_count) { 2434 // Make sure we have synchronized the deferred list and list size by using acquire semantics 2435 // and guarantee that no external thread is accessing span concurrently 2436 void* free_list; 2437 do { 2438 free_list = atomic_exchange_ptr_acquire(&span->free_list_deferred, INVALID_POINTER); 2439 } while (free_list == INVALID_POINTER); 2440 atomic_store_ptr_release(&span->free_list_deferred, free_list); 2441 } 2442 _rpmalloc_span_double_link_list_remove(&heap->size_class[span->size_class].partial_span, span); 2443 _rpmalloc_span_release_to_cache(heap, span); 2444 } 2445 } 2446 2447 static void 2448 _rpmalloc_deallocate_defer_free_span(heap_t* heap, span_t* span) { 2449 if (span->size_class != SIZE_CLASS_HUGE) 2450 _rpmalloc_stat_inc(&heap->span_use[span->span_count - 1].spans_deferred); 2451 //This list does not need ABA protection, no mutable side state 2452 do { 2453 span->free_list = (void*)atomic_load_ptr(&heap->span_free_deferred); 2454 } while (!atomic_cas_ptr(&heap->span_free_deferred, span, span->free_list)); 2455 } 2456 2457 //! Put the block in the deferred free list of the owning span 2458 static void 2459 _rpmalloc_deallocate_defer_small_or_medium(span_t* span, void* block) { 2460 // The memory ordering here is a bit tricky, to avoid having to ABA protect 2461 // the deferred free list to avoid desynchronization of list and list size 2462 // we need to have acquire semantics on successful CAS of the pointer to 2463 // guarantee the list_size variable validity + release semantics on pointer store 2464 void* free_list; 2465 do { 2466 free_list = atomic_exchange_ptr_acquire(&span->free_list_deferred, INVALID_POINTER); 2467 } while (free_list == INVALID_POINTER); 2468 *((void**)block) = free_list; 2469 uint32_t free_count = ++span->list_size; 2470 int all_deferred_free = (free_count == span->block_count); 2471 atomic_store_ptr_release(&span->free_list_deferred, block); 2472 if (all_deferred_free) { 2473 // Span was completely freed by this block. Due to the INVALID_POINTER spin lock 2474 // no other thread can reach this state simultaneously on this span. 2475 // Safe to move to owner heap deferred cache 2476 _rpmalloc_deallocate_defer_free_span(span->heap, span); 2477 } 2478 } 2479 2480 static void 2481 _rpmalloc_deallocate_small_or_medium(span_t* span, void* p) { 2482 _rpmalloc_stat_inc_free(span->heap, span->size_class); 2483 if (span->flags & SPAN_FLAG_ALIGNED_BLOCKS) { 2484 //Realign pointer to block start 2485 void* blocks_start = pointer_offset(span, SPAN_HEADER_SIZE); 2486 uint32_t block_offset = (uint32_t)pointer_diff(p, blocks_start); 2487 p = pointer_offset(p, -(int32_t)(block_offset % span->block_size)); 2488 } 2489 //Check if block belongs to this heap or if deallocation should be deferred 2490 #if RPMALLOC_FIRST_CLASS_HEAPS 2491 int defer = (span->heap->owner_thread && (span->heap->owner_thread != get_thread_id()) && !span->heap->finalize); 2492 #else 2493 int defer = ((span->heap->owner_thread != get_thread_id()) && !span->heap->finalize); 2494 #endif 2495 if (!defer) 2496 _rpmalloc_deallocate_direct_small_or_medium(span, p); 2497 else 2498 _rpmalloc_deallocate_defer_small_or_medium(span, p); 2499 } 2500 2501 //! Deallocate the given large memory block to the current heap 2502 static void 2503 _rpmalloc_deallocate_large(span_t* span) { 2504 rpmalloc_assert(span->size_class == SIZE_CLASS_LARGE, "Bad span size class"); 2505 rpmalloc_assert(!(span->flags & SPAN_FLAG_MASTER) || !(span->flags & SPAN_FLAG_SUBSPAN), "Span flag corrupted"); 2506 rpmalloc_assert((span->flags & SPAN_FLAG_MASTER) || (span->flags & SPAN_FLAG_SUBSPAN), "Span flag corrupted"); 2507 //We must always defer (unless finalizing) if from another heap since we cannot touch the list or counters of another heap 2508 #if RPMALLOC_FIRST_CLASS_HEAPS 2509 int defer = (span->heap->owner_thread && (span->heap->owner_thread != get_thread_id()) && !span->heap->finalize); 2510 #else 2511 int defer = ((span->heap->owner_thread != get_thread_id()) && !span->heap->finalize); 2512 #endif 2513 if (defer) { 2514 _rpmalloc_deallocate_defer_free_span(span->heap, span); 2515 return; 2516 } 2517 rpmalloc_assert(span->heap->full_span_count, "Heap span counter corrupted"); 2518 --span->heap->full_span_count; 2519 #if RPMALLOC_FIRST_CLASS_HEAPS 2520 _rpmalloc_span_double_link_list_remove(&span->heap->large_huge_span, span); 2521 #endif 2522 #if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS 2523 //Decrease counter 2524 size_t idx = span->span_count - 1; 2525 atomic_decr32(&span->heap->span_use[idx].current); 2526 #endif 2527 heap_t* heap = span->heap; 2528 rpmalloc_assert(heap, "No thread heap"); 2529 #if ENABLE_THREAD_CACHE 2530 const int set_as_reserved = ((span->span_count > 1) && (heap->span_cache.count == 0) && !heap->finalize && !heap->spans_reserved); 2531 #else 2532 const int set_as_reserved = ((span->span_count > 1) && !heap->finalize && !heap->spans_reserved); 2533 #endif 2534 if (set_as_reserved) { 2535 heap->span_reserve = span; 2536 heap->spans_reserved = span->span_count; 2537 if (span->flags & SPAN_FLAG_MASTER) { 2538 heap->span_reserve_master = span; 2539 } else { //SPAN_FLAG_SUBSPAN 2540 span_t* master = (span_t*)pointer_offset(span, -(intptr_t)((size_t)span->offset_from_master * _memory_span_size)); 2541 heap->span_reserve_master = master; 2542 rpmalloc_assert(master->flags & SPAN_FLAG_MASTER, "Span flag corrupted"); 2543 rpmalloc_assert(atomic_load32(&master->remaining_spans) >= (int32_t)span->span_count, "Master span count corrupted"); 2544 } 2545 _rpmalloc_stat_inc(&heap->span_use[idx].spans_to_reserved); 2546 } else { 2547 //Insert into cache list 2548 _rpmalloc_heap_cache_insert(heap, span); 2549 } 2550 } 2551 2552 //! Deallocate the given huge span 2553 static void 2554 _rpmalloc_deallocate_huge(span_t* span) { 2555 rpmalloc_assert(span->heap, "No span heap"); 2556 #if RPMALLOC_FIRST_CLASS_HEAPS 2557 int defer = (span->heap->owner_thread && (span->heap->owner_thread != get_thread_id()) && !span->heap->finalize); 2558 #else 2559 int defer = ((span->heap->owner_thread != get_thread_id()) && !span->heap->finalize); 2560 #endif 2561 if (defer) { 2562 _rpmalloc_deallocate_defer_free_span(span->heap, span); 2563 return; 2564 } 2565 rpmalloc_assert(span->heap->full_span_count, "Heap span counter corrupted"); 2566 --span->heap->full_span_count; 2567 #if RPMALLOC_FIRST_CLASS_HEAPS 2568 _rpmalloc_span_double_link_list_remove(&span->heap->large_huge_span, span); 2569 #endif 2570 2571 //Oversized allocation, page count is stored in span_count 2572 size_t num_pages = span->span_count; 2573 _rpmalloc_unmap(span, num_pages * _memory_page_size, span->align_offset, num_pages * _memory_page_size); 2574 _rpmalloc_stat_sub(&_huge_pages_current, num_pages); 2575 } 2576 2577 //! Deallocate the given block 2578 static void 2579 _rpmalloc_deallocate(void* p) { 2580 _rpmalloc_stat_add64(&_deallocation_counter, 1); 2581 //Grab the span (always at start of span, using span alignment) 2582 span_t* span = (span_t*)((uintptr_t)p & _memory_span_mask); 2583 if (UNEXPECTED(!span)) 2584 return; 2585 if (EXPECTED(span->size_class < SIZE_CLASS_COUNT)) 2586 _rpmalloc_deallocate_small_or_medium(span, p); 2587 else if (span->size_class == SIZE_CLASS_LARGE) 2588 _rpmalloc_deallocate_large(span); 2589 else 2590 _rpmalloc_deallocate_huge(span); 2591 } 2592 2593 //////////// 2594 /// 2595 /// Reallocation entry points 2596 /// 2597 ////// 2598 2599 static size_t 2600 _rpmalloc_usable_size(void* p); 2601 2602 //! Reallocate the given block to the given size 2603 static void* 2604 _rpmalloc_reallocate(heap_t* heap, void* p, size_t size, size_t oldsize, unsigned int flags) { 2605 if (p) { 2606 //Grab the span using guaranteed span alignment 2607 span_t* span = (span_t*)((uintptr_t)p & _memory_span_mask); 2608 if (EXPECTED(span->size_class < SIZE_CLASS_COUNT)) { 2609 //Small/medium sized block 2610 rpmalloc_assert(span->span_count == 1, "Span counter corrupted"); 2611 void* blocks_start = pointer_offset(span, SPAN_HEADER_SIZE); 2612 uint32_t block_offset = (uint32_t)pointer_diff(p, blocks_start); 2613 uint32_t block_idx = block_offset / span->block_size; 2614 void* block = pointer_offset(blocks_start, (size_t)block_idx * span->block_size); 2615 if (!oldsize) 2616 oldsize = (size_t)((ptrdiff_t)span->block_size - pointer_diff(p, block)); 2617 if ((size_t)span->block_size >= size) { 2618 //Still fits in block, never mind trying to save memory, but preserve data if alignment changed 2619 if ((p != block) && !(flags & RPMALLOC_NO_PRESERVE)) 2620 memmove(block, p, oldsize); 2621 return block; 2622 } 2623 } else if (span->size_class == SIZE_CLASS_LARGE) { 2624 //Large block 2625 size_t total_size = size + SPAN_HEADER_SIZE; 2626 size_t num_spans = total_size >> _memory_span_size_shift; 2627 if (total_size & (_memory_span_mask - 1)) 2628 ++num_spans; 2629 size_t current_spans = span->span_count; 2630 void* block = pointer_offset(span, SPAN_HEADER_SIZE); 2631 if (!oldsize) 2632 oldsize = (current_spans * _memory_span_size) - (size_t)pointer_diff(p, block) - SPAN_HEADER_SIZE; 2633 if ((current_spans >= num_spans) && (total_size >= (oldsize / 2))) { 2634 //Still fits in block, never mind trying to save memory, but preserve data if alignment changed 2635 if ((p != block) && !(flags & RPMALLOC_NO_PRESERVE)) 2636 memmove(block, p, oldsize); 2637 return block; 2638 } 2639 } else { 2640 //Oversized block 2641 size_t total_size = size + SPAN_HEADER_SIZE; 2642 size_t num_pages = total_size >> _memory_page_size_shift; 2643 if (total_size & (_memory_page_size - 1)) 2644 ++num_pages; 2645 //Page count is stored in span_count 2646 size_t current_pages = span->span_count; 2647 void* block = pointer_offset(span, SPAN_HEADER_SIZE); 2648 if (!oldsize) 2649 oldsize = (current_pages * _memory_page_size) - (size_t)pointer_diff(p, block) - SPAN_HEADER_SIZE; 2650 if ((current_pages >= num_pages) && (num_pages >= (current_pages / 2))) { 2651 //Still fits in block, never mind trying to save memory, but preserve data if alignment changed 2652 if ((p != block) && !(flags & RPMALLOC_NO_PRESERVE)) 2653 memmove(block, p, oldsize); 2654 return block; 2655 } 2656 } 2657 } else { 2658 oldsize = 0; 2659 } 2660 2661 if (!!(flags & RPMALLOC_GROW_OR_FAIL)) 2662 return 0; 2663 2664 //Size is greater than block size, need to allocate a new block and deallocate the old 2665 //Avoid hysteresis by overallocating if increase is small (below 37%) 2666 size_t lower_bound = oldsize + (oldsize >> 2) + (oldsize >> 3); 2667 size_t new_size = (size > lower_bound) ? size : ((size > oldsize) ? lower_bound : size); 2668 void* block = _rpmalloc_allocate(heap, new_size); 2669 if (p && block) { 2670 if (!(flags & RPMALLOC_NO_PRESERVE)) 2671 memcpy(block, p, oldsize < new_size ? oldsize : new_size); 2672 _rpmalloc_deallocate(p); 2673 } 2674 2675 return block; 2676 } 2677 2678 static void* 2679 _rpmalloc_aligned_reallocate(heap_t* heap, void* ptr, size_t alignment, size_t size, size_t oldsize, 2680 unsigned int flags) { 2681 if (alignment <= SMALL_GRANULARITY) 2682 return _rpmalloc_reallocate(heap, ptr, size, oldsize, flags); 2683 2684 int no_alloc = !!(flags & RPMALLOC_GROW_OR_FAIL); 2685 size_t usablesize = (ptr ? _rpmalloc_usable_size(ptr) : 0); 2686 if ((usablesize >= size) && !((uintptr_t)ptr & (alignment - 1))) { 2687 if (no_alloc || (size >= (usablesize / 2))) 2688 return ptr; 2689 } 2690 // Aligned alloc marks span as having aligned blocks 2691 void* block = (!no_alloc ? _rpmalloc_aligned_allocate(heap, alignment, size) : 0); 2692 if (EXPECTED(block != 0)) { 2693 if (!(flags & RPMALLOC_NO_PRESERVE) && ptr) { 2694 if (!oldsize) 2695 oldsize = usablesize; 2696 memcpy(block, ptr, oldsize < size ? oldsize : size); 2697 } 2698 _rpmalloc_deallocate(ptr); 2699 } 2700 return block; 2701 } 2702 2703 2704 //////////// 2705 /// 2706 /// Initialization, finalization and utility 2707 /// 2708 ////// 2709 2710 //! Get the usable size of the given block 2711 static size_t 2712 _rpmalloc_usable_size(void* p) { 2713 //Grab the span using guaranteed span alignment 2714 span_t* span = (span_t*)((uintptr_t)p & _memory_span_mask); 2715 if (span->size_class < SIZE_CLASS_COUNT) { 2716 //Small/medium block 2717 void* blocks_start = pointer_offset(span, SPAN_HEADER_SIZE); 2718 return span->block_size - ((size_t)pointer_diff(p, blocks_start) % span->block_size); 2719 } 2720 if (span->size_class == SIZE_CLASS_LARGE) { 2721 //Large block 2722 size_t current_spans = span->span_count; 2723 return (current_spans * _memory_span_size) - (size_t)pointer_diff(p, span); 2724 } 2725 //Oversized block, page count is stored in span_count 2726 size_t current_pages = span->span_count; 2727 return (current_pages * _memory_page_size) - (size_t)pointer_diff(p, span); 2728 } 2729 2730 //! Adjust and optimize the size class properties for the given class 2731 static void 2732 _rpmalloc_adjust_size_class(size_t iclass) { 2733 size_t block_size = _memory_size_class[iclass].block_size; 2734 size_t block_count = (_memory_span_size - SPAN_HEADER_SIZE) / block_size; 2735 2736 _memory_size_class[iclass].block_count = (uint16_t)block_count; 2737 _memory_size_class[iclass].class_idx = (uint16_t)iclass; 2738 2739 //Check if previous size classes can be merged 2740 if (iclass >= SMALL_CLASS_COUNT) { 2741 size_t prevclass = iclass; 2742 while (prevclass > 0) { 2743 --prevclass; 2744 //A class can be merged if number of pages and number of blocks are equal 2745 if (_memory_size_class[prevclass].block_count == _memory_size_class[iclass].block_count) 2746 _rpmalloc_memcpy_const(_memory_size_class + prevclass, _memory_size_class + iclass, sizeof(_memory_size_class[iclass])); 2747 else 2748 break; 2749 } 2750 } 2751 } 2752 2753 //! Initialize the allocator and setup global data 2754 extern inline int 2755 rpmalloc_initialize(void) { 2756 if (_rpmalloc_initialized) { 2757 rpmalloc_thread_initialize(); 2758 return 0; 2759 } 2760 return rpmalloc_initialize_config(0); 2761 } 2762 2763 int 2764 rpmalloc_initialize_config(const rpmalloc_config_t* config) { 2765 if (_rpmalloc_initialized) { 2766 rpmalloc_thread_initialize(); 2767 return 0; 2768 } 2769 _rpmalloc_initialized = 1; 2770 2771 if (config) 2772 memcpy(&_memory_config, config, sizeof(rpmalloc_config_t)); 2773 else 2774 _rpmalloc_memset_const(&_memory_config, 0, sizeof(rpmalloc_config_t)); 2775 2776 if (!_memory_config.memory_map || !_memory_config.memory_unmap) { 2777 _memory_config.memory_map = _rpmalloc_mmap_os; 2778 _memory_config.memory_unmap = _rpmalloc_unmap_os; 2779 } 2780 2781 #if PLATFORM_WINDOWS 2782 SYSTEM_INFO system_info; 2783 memset(&system_info, 0, sizeof(system_info)); 2784 GetSystemInfo(&system_info); 2785 _memory_map_granularity = system_info.dwAllocationGranularity; 2786 #else 2787 _memory_map_granularity = (size_t)sysconf(_SC_PAGESIZE); 2788 #endif 2789 2790 #if RPMALLOC_CONFIGURABLE 2791 _memory_page_size = _memory_config.page_size; 2792 #else 2793 _memory_page_size = 0; 2794 #endif 2795 _memory_huge_pages = 0; 2796 if (!_memory_page_size) { 2797 #if PLATFORM_WINDOWS 2798 _memory_page_size = system_info.dwPageSize; 2799 #else 2800 _memory_page_size = _memory_map_granularity; 2801 if (_memory_config.enable_huge_pages) { 2802 #if defined(__linux__) 2803 size_t huge_page_size = 0; 2804 FILE* meminfo = fopen("/proc/meminfo", "r"); 2805 if (meminfo) { 2806 char line[128]; 2807 while (!huge_page_size && fgets(line, sizeof(line) - 1, meminfo)) { 2808 line[sizeof(line) - 1] = 0; 2809 if (strstr(line, "Hugepagesize:")) 2810 huge_page_size = (size_t)strtol(line + 13, 0, 10) * 1024; 2811 } 2812 fclose(meminfo); 2813 } 2814 if (huge_page_size) { 2815 _memory_huge_pages = 1; 2816 _memory_page_size = huge_page_size; 2817 _memory_map_granularity = huge_page_size; 2818 } 2819 #elif defined(__FreeBSD__) 2820 int rc; 2821 size_t sz = sizeof(rc); 2822 2823 if (sysctlbyname("vm.pmap.pg_ps_enabled", &rc, &sz, NULL, 0) == 0 && rc == 1) { 2824 static size_t defsize = 2 * 1024 * 1024; 2825 int nsize = 0; 2826 size_t sizes[4] = {0}; 2827 _memory_huge_pages = 1; 2828 _memory_page_size = defsize; 2829 if ((nsize = getpagesizes(sizes, 4)) >= 2) { 2830 nsize --; 2831 for (size_t csize = sizes[nsize]; nsize >= 0 && csize; --nsize, csize = sizes[nsize]) { 2832 //! Unlikely, but as a precaution.. 2833 rpmalloc_assert(!(csize & (csize -1)) && !(csize % 1024), "Invalid page size"); 2834 if (defsize < csize) { 2835 _memory_page_size = csize; 2836 break; 2837 } 2838 } 2839 } 2840 _memory_map_granularity = _memory_page_size; 2841 } 2842 #elif defined(__APPLE__) || defined(__NetBSD__) 2843 _memory_huge_pages = 1; 2844 _memory_page_size = 2 * 1024 * 1024; 2845 _memory_map_granularity = _memory_page_size; 2846 #endif 2847 } 2848 #endif 2849 } else { 2850 if (_memory_config.enable_huge_pages) 2851 _memory_huge_pages = 1; 2852 } 2853 2854 #if PLATFORM_WINDOWS 2855 if (_memory_config.enable_huge_pages) { 2856 HANDLE token = 0; 2857 size_t large_page_minimum = GetLargePageMinimum(); 2858 if (large_page_minimum) 2859 OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token); 2860 if (token) { 2861 LUID luid; 2862 if (LookupPrivilegeValue(0, SE_LOCK_MEMORY_NAME, &luid)) { 2863 TOKEN_PRIVILEGES token_privileges; 2864 memset(&token_privileges, 0, sizeof(token_privileges)); 2865 token_privileges.PrivilegeCount = 1; 2866 token_privileges.Privileges[0].Luid = luid; 2867 token_privileges.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; 2868 if (AdjustTokenPrivileges(token, FALSE, &token_privileges, 0, 0, 0)) { 2869 if (GetLastError() == ERROR_SUCCESS) 2870 _memory_huge_pages = 1; 2871 } 2872 } 2873 CloseHandle(token); 2874 } 2875 if (_memory_huge_pages) { 2876 if (large_page_minimum > _memory_page_size) 2877 _memory_page_size = large_page_minimum; 2878 if (large_page_minimum > _memory_map_granularity) 2879 _memory_map_granularity = large_page_minimum; 2880 } 2881 } 2882 #endif 2883 2884 size_t min_span_size = 256; 2885 size_t max_page_size; 2886 #if UINTPTR_MAX > 0xFFFFFFFF 2887 max_page_size = 4096ULL * 1024ULL * 1024ULL; 2888 #else 2889 max_page_size = 4 * 1024 * 1024; 2890 #endif 2891 if (_memory_page_size < min_span_size) 2892 _memory_page_size = min_span_size; 2893 if (_memory_page_size > max_page_size) 2894 _memory_page_size = max_page_size; 2895 _memory_page_size_shift = 0; 2896 size_t page_size_bit = _memory_page_size; 2897 while (page_size_bit != 1) { 2898 ++_memory_page_size_shift; 2899 page_size_bit >>= 1; 2900 } 2901 _memory_page_size = ((size_t)1 << _memory_page_size_shift); 2902 2903 #if RPMALLOC_CONFIGURABLE 2904 if (!_memory_config.span_size) { 2905 _memory_span_size = _memory_default_span_size; 2906 _memory_span_size_shift = _memory_default_span_size_shift; 2907 _memory_span_mask = _memory_default_span_mask; 2908 } else { 2909 size_t span_size = _memory_config.span_size; 2910 if (span_size > (256 * 1024)) 2911 span_size = (256 * 1024); 2912 _memory_span_size = 4096; 2913 _memory_span_size_shift = 12; 2914 while (_memory_span_size < span_size) { 2915 _memory_span_size <<= 1; 2916 ++_memory_span_size_shift; 2917 } 2918 _memory_span_mask = ~(uintptr_t)(_memory_span_size - 1); 2919 } 2920 #endif 2921 2922 _memory_span_map_count = ( _memory_config.span_map_count ? _memory_config.span_map_count : DEFAULT_SPAN_MAP_COUNT); 2923 if ((_memory_span_size * _memory_span_map_count) < _memory_page_size) 2924 _memory_span_map_count = (_memory_page_size / _memory_span_size); 2925 if ((_memory_page_size >= _memory_span_size) && ((_memory_span_map_count * _memory_span_size) % _memory_page_size)) 2926 _memory_span_map_count = (_memory_page_size / _memory_span_size); 2927 _memory_heap_reserve_count = (_memory_span_map_count > DEFAULT_SPAN_MAP_COUNT) ? DEFAULT_SPAN_MAP_COUNT : _memory_span_map_count; 2928 2929 _memory_config.page_size = _memory_page_size; 2930 _memory_config.span_size = _memory_span_size; 2931 _memory_config.span_map_count = _memory_span_map_count; 2932 _memory_config.enable_huge_pages = _memory_huge_pages; 2933 2934 #if ((defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD) || defined(__TINYC__) 2935 if (pthread_key_create(&_memory_thread_heap, _rpmalloc_heap_release_raw_fc)) 2936 return -1; 2937 #endif 2938 #if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK) 2939 fls_key = FlsAlloc(&_rpmalloc_thread_destructor); 2940 #endif 2941 2942 //Setup all small and medium size classes 2943 size_t iclass = 0; 2944 _memory_size_class[iclass].block_size = SMALL_GRANULARITY; 2945 _rpmalloc_adjust_size_class(iclass); 2946 for (iclass = 1; iclass < SMALL_CLASS_COUNT; ++iclass) { 2947 size_t size = iclass * SMALL_GRANULARITY; 2948 _memory_size_class[iclass].block_size = (uint32_t)size; 2949 _rpmalloc_adjust_size_class(iclass); 2950 } 2951 //At least two blocks per span, then fall back to large allocations 2952 _memory_medium_size_limit = (_memory_span_size - SPAN_HEADER_SIZE) >> 1; 2953 if (_memory_medium_size_limit > MEDIUM_SIZE_LIMIT) 2954 _memory_medium_size_limit = MEDIUM_SIZE_LIMIT; 2955 for (iclass = 0; iclass < MEDIUM_CLASS_COUNT; ++iclass) { 2956 size_t size = SMALL_SIZE_LIMIT + ((iclass + 1) * MEDIUM_GRANULARITY); 2957 if (size > _memory_medium_size_limit) { 2958 _memory_medium_size_limit = SMALL_SIZE_LIMIT + (iclass * MEDIUM_GRANULARITY); 2959 break; 2960 } 2961 _memory_size_class[SMALL_CLASS_COUNT + iclass].block_size = (uint32_t)size; 2962 _rpmalloc_adjust_size_class(SMALL_CLASS_COUNT + iclass); 2963 } 2964 2965 _memory_orphan_heaps = 0; 2966 #if RPMALLOC_FIRST_CLASS_HEAPS 2967 _memory_first_class_orphan_heaps = 0; 2968 #endif 2969 #if ENABLE_STATISTICS 2970 atomic_store32(&_memory_active_heaps, 0); 2971 atomic_store32(&_mapped_pages, 0); 2972 _mapped_pages_peak = 0; 2973 atomic_store32(&_master_spans, 0); 2974 atomic_store32(&_mapped_total, 0); 2975 atomic_store32(&_unmapped_total, 0); 2976 atomic_store32(&_mapped_pages_os, 0); 2977 atomic_store32(&_huge_pages_current, 0); 2978 _huge_pages_peak = 0; 2979 #endif 2980 memset(_memory_heaps, 0, sizeof(_memory_heaps)); 2981 atomic_store32_release(&_memory_global_lock, 0); 2982 2983 rpmalloc_linker_reference(); 2984 2985 //Initialize this thread 2986 rpmalloc_thread_initialize(); 2987 return 0; 2988 } 2989 2990 //! Finalize the allocator 2991 void 2992 rpmalloc_finalize(void) { 2993 rpmalloc_thread_finalize(1); 2994 //rpmalloc_dump_statistics(stdout); 2995 2996 if (_memory_global_reserve) { 2997 atomic_add32(&_memory_global_reserve_master->remaining_spans, -(int32_t)_memory_global_reserve_count); 2998 _memory_global_reserve_master = 0; 2999 _memory_global_reserve_count = 0; 3000 _memory_global_reserve = 0; 3001 } 3002 atomic_store32_release(&_memory_global_lock, 0); 3003 3004 //Free all thread caches and fully free spans 3005 for (size_t list_idx = 0; list_idx < HEAP_ARRAY_SIZE; ++list_idx) { 3006 heap_t* heap = _memory_heaps[list_idx]; 3007 while (heap) { 3008 heap_t* next_heap = heap->next_heap; 3009 heap->finalize = 1; 3010 _rpmalloc_heap_global_finalize(heap); 3011 heap = next_heap; 3012 } 3013 } 3014 3015 #if ENABLE_GLOBAL_CACHE 3016 //Free global caches 3017 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) 3018 _rpmalloc_global_cache_finalize(&_memory_span_cache[iclass]); 3019 #endif 3020 3021 #if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD 3022 pthread_key_delete(_memory_thread_heap); 3023 #endif 3024 #if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK) 3025 FlsFree(fls_key); 3026 fls_key = 0; 3027 #endif 3028 #if ENABLE_STATISTICS 3029 //If you hit these asserts you probably have memory leaks (perhaps global scope data doing dynamic allocations) or double frees in your code 3030 rpmalloc_assert(atomic_load32(&_mapped_pages) == 0, "Memory leak detected"); 3031 rpmalloc_assert(atomic_load32(&_mapped_pages_os) == 0, "Memory leak detected"); 3032 #endif 3033 3034 _rpmalloc_initialized = 0; 3035 } 3036 3037 //! Initialize thread, assign heap 3038 extern inline void 3039 rpmalloc_thread_initialize(void) { 3040 if (!get_thread_heap_raw()) { 3041 heap_t* heap = _rpmalloc_heap_allocate(0); 3042 if (heap) { 3043 _rpmalloc_stat_inc(&_memory_active_heaps); 3044 set_thread_heap(heap); 3045 #if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK) 3046 FlsSetValue(fls_key, heap); 3047 #endif 3048 } 3049 } 3050 } 3051 3052 //! Finalize thread, orphan heap 3053 void 3054 rpmalloc_thread_finalize(int release_caches) { 3055 heap_t* heap = get_thread_heap_raw(); 3056 if (heap) 3057 _rpmalloc_heap_release_raw(heap, release_caches); 3058 set_thread_heap(0); 3059 #if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK) 3060 FlsSetValue(fls_key, 0); 3061 #endif 3062 } 3063 3064 int 3065 rpmalloc_is_thread_initialized(void) { 3066 return (get_thread_heap_raw() != 0) ? 1 : 0; 3067 } 3068 3069 const rpmalloc_config_t* 3070 rpmalloc_config(void) { 3071 return &_memory_config; 3072 } 3073 3074 // Extern interface 3075 3076 extern inline RPMALLOC_ALLOCATOR void* 3077 rpmalloc(size_t size) { 3078 #if ENABLE_VALIDATE_ARGS 3079 if (size >= MAX_ALLOC_SIZE) { 3080 errno = EINVAL; 3081 return 0; 3082 } 3083 #endif 3084 heap_t* heap = get_thread_heap(); 3085 return _rpmalloc_allocate(heap, size); 3086 } 3087 3088 extern inline void 3089 rpfree(void* ptr) { 3090 _rpmalloc_deallocate(ptr); 3091 } 3092 3093 extern inline RPMALLOC_ALLOCATOR void* 3094 rpcalloc(size_t num, size_t size) { 3095 size_t total; 3096 #if ENABLE_VALIDATE_ARGS 3097 #if PLATFORM_WINDOWS 3098 int err = SizeTMult(num, size, &total); 3099 if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) { 3100 errno = EINVAL; 3101 return 0; 3102 } 3103 #else 3104 int err = __builtin_umull_overflow(num, size, &total); 3105 if (err || (total >= MAX_ALLOC_SIZE)) { 3106 errno = EINVAL; 3107 return 0; 3108 } 3109 #endif 3110 #else 3111 total = num * size; 3112 #endif 3113 heap_t* heap = get_thread_heap(); 3114 void* block = _rpmalloc_allocate(heap, total); 3115 if (block) 3116 memset(block, 0, total); 3117 return block; 3118 } 3119 3120 extern inline RPMALLOC_ALLOCATOR void* 3121 rprealloc(void* ptr, size_t size) { 3122 #if ENABLE_VALIDATE_ARGS 3123 if (size >= MAX_ALLOC_SIZE) { 3124 errno = EINVAL; 3125 return ptr; 3126 } 3127 #endif 3128 heap_t* heap = get_thread_heap(); 3129 return _rpmalloc_reallocate(heap, ptr, size, 0, 0); 3130 } 3131 3132 extern RPMALLOC_ALLOCATOR void* 3133 rpaligned_realloc(void* ptr, size_t alignment, size_t size, size_t oldsize, 3134 unsigned int flags) { 3135 #if ENABLE_VALIDATE_ARGS 3136 if ((size + alignment < size) || (alignment > _memory_page_size)) { 3137 errno = EINVAL; 3138 return 0; 3139 } 3140 #endif 3141 heap_t* heap = get_thread_heap(); 3142 return _rpmalloc_aligned_reallocate(heap, ptr, alignment, size, oldsize, flags); 3143 } 3144 3145 extern RPMALLOC_ALLOCATOR void* 3146 rpaligned_alloc(size_t alignment, size_t size) { 3147 heap_t* heap = get_thread_heap(); 3148 return _rpmalloc_aligned_allocate(heap, alignment, size); 3149 } 3150 3151 extern inline RPMALLOC_ALLOCATOR void* 3152 rpaligned_calloc(size_t alignment, size_t num, size_t size) { 3153 size_t total; 3154 #if ENABLE_VALIDATE_ARGS 3155 #if PLATFORM_WINDOWS 3156 int err = SizeTMult(num, size, &total); 3157 if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) { 3158 errno = EINVAL; 3159 return 0; 3160 } 3161 #else 3162 int err = __builtin_umull_overflow(num, size, &total); 3163 if (err || (total >= MAX_ALLOC_SIZE)) { 3164 errno = EINVAL; 3165 return 0; 3166 } 3167 #endif 3168 #else 3169 total = num * size; 3170 #endif 3171 void* block = rpaligned_alloc(alignment, total); 3172 if (block) 3173 memset(block, 0, total); 3174 return block; 3175 } 3176 3177 extern inline RPMALLOC_ALLOCATOR void* 3178 rpmemalign(size_t alignment, size_t size) { 3179 return rpaligned_alloc(alignment, size); 3180 } 3181 3182 extern inline int 3183 rpposix_memalign(void **memptr, size_t alignment, size_t size) { 3184 if (memptr) 3185 *memptr = rpaligned_alloc(alignment, size); 3186 else 3187 return EINVAL; 3188 return *memptr ? 0 : ENOMEM; 3189 } 3190 3191 extern inline size_t 3192 rpmalloc_usable_size(void* ptr) { 3193 return (ptr ? _rpmalloc_usable_size(ptr) : 0); 3194 } 3195 3196 extern inline void 3197 rpmalloc_thread_collect(void) { 3198 } 3199 3200 void 3201 rpmalloc_thread_statistics(rpmalloc_thread_statistics_t* stats) { 3202 memset(stats, 0, sizeof(rpmalloc_thread_statistics_t)); 3203 heap_t* heap = get_thread_heap_raw(); 3204 if (!heap) 3205 return; 3206 3207 for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) { 3208 size_class_t* size_class = _memory_size_class + iclass; 3209 span_t* span = heap->size_class[iclass].partial_span; 3210 while (span) { 3211 size_t free_count = span->list_size; 3212 size_t block_count = size_class->block_count; 3213 if (span->free_list_limit < block_count) 3214 block_count = span->free_list_limit; 3215 free_count += (block_count - span->used_count); 3216 stats->sizecache += free_count * size_class->block_size; 3217 span = span->next; 3218 } 3219 } 3220 3221 #if ENABLE_THREAD_CACHE 3222 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) { 3223 span_cache_t* span_cache; 3224 if (!iclass) 3225 span_cache = &heap->span_cache; 3226 else 3227 span_cache = (span_cache_t*)(heap->span_large_cache + (iclass - 1)); 3228 stats->spancache += span_cache->count * (iclass + 1) * _memory_span_size; 3229 } 3230 #endif 3231 3232 span_t* deferred = (span_t*)atomic_load_ptr(&heap->span_free_deferred); 3233 while (deferred) { 3234 if (deferred->size_class != SIZE_CLASS_HUGE) 3235 stats->spancache += (size_t)deferred->span_count * _memory_span_size; 3236 deferred = (span_t*)deferred->free_list; 3237 } 3238 3239 #if ENABLE_STATISTICS 3240 stats->thread_to_global = (size_t)atomic_load64(&heap->thread_to_global); 3241 stats->global_to_thread = (size_t)atomic_load64(&heap->global_to_thread); 3242 3243 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) { 3244 stats->span_use[iclass].current = (size_t)atomic_load32(&heap->span_use[iclass].current); 3245 stats->span_use[iclass].peak = (size_t)atomic_load32(&heap->span_use[iclass].high); 3246 stats->span_use[iclass].to_global = (size_t)atomic_load32(&heap->span_use[iclass].spans_to_global); 3247 stats->span_use[iclass].from_global = (size_t)atomic_load32(&heap->span_use[iclass].spans_from_global); 3248 stats->span_use[iclass].to_cache = (size_t)atomic_load32(&heap->span_use[iclass].spans_to_cache); 3249 stats->span_use[iclass].from_cache = (size_t)atomic_load32(&heap->span_use[iclass].spans_from_cache); 3250 stats->span_use[iclass].to_reserved = (size_t)atomic_load32(&heap->span_use[iclass].spans_to_reserved); 3251 stats->span_use[iclass].from_reserved = (size_t)atomic_load32(&heap->span_use[iclass].spans_from_reserved); 3252 stats->span_use[iclass].map_calls = (size_t)atomic_load32(&heap->span_use[iclass].spans_map_calls); 3253 } 3254 for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) { 3255 stats->size_use[iclass].alloc_current = (size_t)atomic_load32(&heap->size_class_use[iclass].alloc_current); 3256 stats->size_use[iclass].alloc_peak = (size_t)heap->size_class_use[iclass].alloc_peak; 3257 stats->size_use[iclass].alloc_total = (size_t)atomic_load32(&heap->size_class_use[iclass].alloc_total); 3258 stats->size_use[iclass].free_total = (size_t)atomic_load32(&heap->size_class_use[iclass].free_total); 3259 stats->size_use[iclass].spans_to_cache = (size_t)atomic_load32(&heap->size_class_use[iclass].spans_to_cache); 3260 stats->size_use[iclass].spans_from_cache = (size_t)atomic_load32(&heap->size_class_use[iclass].spans_from_cache); 3261 stats->size_use[iclass].spans_from_reserved = (size_t)atomic_load32(&heap->size_class_use[iclass].spans_from_reserved); 3262 stats->size_use[iclass].map_calls = (size_t)atomic_load32(&heap->size_class_use[iclass].spans_map_calls); 3263 } 3264 #endif 3265 } 3266 3267 void 3268 rpmalloc_global_statistics(rpmalloc_global_statistics_t* stats) { 3269 memset(stats, 0, sizeof(rpmalloc_global_statistics_t)); 3270 #if ENABLE_STATISTICS 3271 stats->mapped = (size_t)atomic_load32(&_mapped_pages) * _memory_page_size; 3272 stats->mapped_peak = (size_t)_mapped_pages_peak * _memory_page_size; 3273 stats->mapped_total = (size_t)atomic_load32(&_mapped_total) * _memory_page_size; 3274 stats->unmapped_total = (size_t)atomic_load32(&_unmapped_total) * _memory_page_size; 3275 stats->huge_alloc = (size_t)atomic_load32(&_huge_pages_current) * _memory_page_size; 3276 stats->huge_alloc_peak = (size_t)_huge_pages_peak * _memory_page_size; 3277 #endif 3278 #if ENABLE_GLOBAL_CACHE 3279 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) 3280 stats->cached += _memory_span_cache[iclass].count * (iclass + 1) * _memory_span_size; 3281 #endif 3282 } 3283 3284 #if ENABLE_STATISTICS 3285 3286 static void 3287 _memory_heap_dump_statistics(heap_t* heap, void* file) { 3288 fprintf(file, "Heap %d stats:\n", heap->id); 3289 fprintf(file, "Class CurAlloc PeakAlloc TotAlloc TotFree BlkSize BlkCount SpansCur SpansPeak PeakAllocMiB ToCacheMiB FromCacheMiB FromReserveMiB MmapCalls\n"); 3290 for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) { 3291 if (!atomic_load32(&heap->size_class_use[iclass].alloc_total)) 3292 continue; 3293 fprintf(file, "%3u: %10u %10u %10u %10u %8u %8u %8d %9d %13zu %11zu %12zu %14zu %9u\n", (uint32_t)iclass, 3294 atomic_load32(&heap->size_class_use[iclass].alloc_current), 3295 heap->size_class_use[iclass].alloc_peak, 3296 atomic_load32(&heap->size_class_use[iclass].alloc_total), 3297 atomic_load32(&heap->size_class_use[iclass].free_total), 3298 _memory_size_class[iclass].block_size, 3299 _memory_size_class[iclass].block_count, 3300 atomic_load32(&heap->size_class_use[iclass].spans_current), 3301 heap->size_class_use[iclass].spans_peak, 3302 ((size_t)heap->size_class_use[iclass].alloc_peak * (size_t)_memory_size_class[iclass].block_size) / (size_t)(1024 * 1024), 3303 ((size_t)atomic_load32(&heap->size_class_use[iclass].spans_to_cache) * _memory_span_size) / (size_t)(1024 * 1024), 3304 ((size_t)atomic_load32(&heap->size_class_use[iclass].spans_from_cache) * _memory_span_size) / (size_t)(1024 * 1024), 3305 ((size_t)atomic_load32(&heap->size_class_use[iclass].spans_from_reserved) * _memory_span_size) / (size_t)(1024 * 1024), 3306 atomic_load32(&heap->size_class_use[iclass].spans_map_calls)); 3307 } 3308 fprintf(file, "Spans Current Peak Deferred PeakMiB Cached ToCacheMiB FromCacheMiB ToReserveMiB FromReserveMiB ToGlobalMiB FromGlobalMiB MmapCalls\n"); 3309 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) { 3310 if (!atomic_load32(&heap->span_use[iclass].high) && !atomic_load32(&heap->span_use[iclass].spans_map_calls)) 3311 continue; 3312 fprintf(file, "%4u: %8d %8u %8u %8zu %7u %11zu %12zu %12zu %14zu %11zu %13zu %10u\n", (uint32_t)(iclass + 1), 3313 atomic_load32(&heap->span_use[iclass].current), 3314 atomic_load32(&heap->span_use[iclass].high), 3315 atomic_load32(&heap->span_use[iclass].spans_deferred), 3316 ((size_t)atomic_load32(&heap->span_use[iclass].high) * (size_t)_memory_span_size * (iclass + 1)) / (size_t)(1024 * 1024), 3317 #if ENABLE_THREAD_CACHE 3318 (unsigned int)(!iclass ? heap->span_cache.count : heap->span_large_cache[iclass - 1].count), 3319 ((size_t)atomic_load32(&heap->span_use[iclass].spans_to_cache) * (iclass + 1) * _memory_span_size) / (size_t)(1024 * 1024), 3320 ((size_t)atomic_load32(&heap->span_use[iclass].spans_from_cache) * (iclass + 1) * _memory_span_size) / (size_t)(1024 * 1024), 3321 #else 3322 0, (size_t)0, (size_t)0, 3323 #endif 3324 ((size_t)atomic_load32(&heap->span_use[iclass].spans_to_reserved) * (iclass + 1) * _memory_span_size) / (size_t)(1024 * 1024), 3325 ((size_t)atomic_load32(&heap->span_use[iclass].spans_from_reserved) * (iclass + 1) * _memory_span_size) / (size_t)(1024 * 1024), 3326 ((size_t)atomic_load32(&heap->span_use[iclass].spans_to_global) * (size_t)_memory_span_size * (iclass + 1)) / (size_t)(1024 * 1024), 3327 ((size_t)atomic_load32(&heap->span_use[iclass].spans_from_global) * (size_t)_memory_span_size * (iclass + 1)) / (size_t)(1024 * 1024), 3328 atomic_load32(&heap->span_use[iclass].spans_map_calls)); 3329 } 3330 fprintf(file, "Full spans: %zu\n", heap->full_span_count); 3331 fprintf(file, "ThreadToGlobalMiB GlobalToThreadMiB\n"); 3332 fprintf(file, "%17zu %17zu\n", (size_t)atomic_load64(&heap->thread_to_global) / (size_t)(1024 * 1024), (size_t)atomic_load64(&heap->global_to_thread) / (size_t)(1024 * 1024)); 3333 } 3334 3335 #endif 3336 3337 void 3338 rpmalloc_dump_statistics(void* file) { 3339 #if ENABLE_STATISTICS 3340 for (size_t list_idx = 0; list_idx < HEAP_ARRAY_SIZE; ++list_idx) { 3341 heap_t* heap = _memory_heaps[list_idx]; 3342 while (heap) { 3343 int need_dump = 0; 3344 for (size_t iclass = 0; !need_dump && (iclass < SIZE_CLASS_COUNT); ++iclass) { 3345 if (!atomic_load32(&heap->size_class_use[iclass].alloc_total)) { 3346 rpmalloc_assert(!atomic_load32(&heap->size_class_use[iclass].free_total), "Heap statistics counter mismatch"); 3347 rpmalloc_assert(!atomic_load32(&heap->size_class_use[iclass].spans_map_calls), "Heap statistics counter mismatch"); 3348 continue; 3349 } 3350 need_dump = 1; 3351 } 3352 for (size_t iclass = 0; !need_dump && (iclass < LARGE_CLASS_COUNT); ++iclass) { 3353 if (!atomic_load32(&heap->span_use[iclass].high) && !atomic_load32(&heap->span_use[iclass].spans_map_calls)) 3354 continue; 3355 need_dump = 1; 3356 } 3357 if (need_dump) 3358 _memory_heap_dump_statistics(heap, file); 3359 heap = heap->next_heap; 3360 } 3361 } 3362 fprintf(file, "Global stats:\n"); 3363 size_t huge_current = (size_t)atomic_load32(&_huge_pages_current) * _memory_page_size; 3364 size_t huge_peak = (size_t)_huge_pages_peak * _memory_page_size; 3365 fprintf(file, "HugeCurrentMiB HugePeakMiB\n"); 3366 fprintf(file, "%14zu %11zu\n", huge_current / (size_t)(1024 * 1024), huge_peak / (size_t)(1024 * 1024)); 3367 3368 #if ENABLE_GLOBAL_CACHE 3369 fprintf(file, "GlobalCacheMiB\n"); 3370 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) { 3371 global_cache_t* cache = _memory_span_cache + iclass; 3372 size_t global_cache = (size_t)cache->count * iclass * _memory_span_size; 3373 3374 size_t global_overflow_cache = 0; 3375 span_t* span = cache->overflow; 3376 while (span) { 3377 global_overflow_cache += iclass * _memory_span_size; 3378 span = span->next; 3379 } 3380 if (global_cache || global_overflow_cache || cache->insert_count || cache->extract_count) 3381 fprintf(file, "%4zu: %8zuMiB (%8zuMiB overflow) %14zu insert %14zu extract\n", iclass + 1, global_cache / (size_t)(1024 * 1024), global_overflow_cache / (size_t)(1024 * 1024), cache->insert_count, cache->extract_count); 3382 } 3383 #endif 3384 3385 size_t mapped = (size_t)atomic_load32(&_mapped_pages) * _memory_page_size; 3386 size_t mapped_os = (size_t)atomic_load32(&_mapped_pages_os) * _memory_page_size; 3387 size_t mapped_peak = (size_t)_mapped_pages_peak * _memory_page_size; 3388 size_t mapped_total = (size_t)atomic_load32(&_mapped_total) * _memory_page_size; 3389 size_t unmapped_total = (size_t)atomic_load32(&_unmapped_total) * _memory_page_size; 3390 fprintf(file, "MappedMiB MappedOSMiB MappedPeakMiB MappedTotalMiB UnmappedTotalMiB\n"); 3391 fprintf(file, "%9zu %11zu %13zu %14zu %16zu\n", 3392 mapped / (size_t)(1024 * 1024), 3393 mapped_os / (size_t)(1024 * 1024), 3394 mapped_peak / (size_t)(1024 * 1024), 3395 mapped_total / (size_t)(1024 * 1024), 3396 unmapped_total / (size_t)(1024 * 1024)); 3397 3398 fprintf(file, "\n"); 3399 #if 0 3400 int64_t allocated = atomic_load64(&_allocation_counter); 3401 int64_t deallocated = atomic_load64(&_deallocation_counter); 3402 fprintf(file, "Allocation count: %lli\n", allocated); 3403 fprintf(file, "Deallocation count: %lli\n", deallocated); 3404 fprintf(file, "Current allocations: %lli\n", (allocated - deallocated)); 3405 fprintf(file, "Master spans: %d\n", atomic_load32(&_master_spans)); 3406 fprintf(file, "Dangling master spans: %d\n", atomic_load32(&_unmapped_master_spans)); 3407 #endif 3408 #endif 3409 (void)sizeof(file); 3410 } 3411 3412 #if RPMALLOC_FIRST_CLASS_HEAPS 3413 3414 extern inline rpmalloc_heap_t* 3415 rpmalloc_heap_acquire(void) { 3416 // Must be a pristine heap from newly mapped memory pages, or else memory blocks 3417 // could already be allocated from the heap which would (wrongly) be released when 3418 // heap is cleared with rpmalloc_heap_free_all(). Also heaps guaranteed to be 3419 // pristine from the dedicated orphan list can be used. 3420 heap_t* heap = _rpmalloc_heap_allocate(1); 3421 rpmalloc_assume(heap != NULL); 3422 heap->owner_thread = 0; 3423 _rpmalloc_stat_inc(&_memory_active_heaps); 3424 return heap; 3425 } 3426 3427 extern inline void 3428 rpmalloc_heap_release(rpmalloc_heap_t* heap) { 3429 if (heap) 3430 _rpmalloc_heap_release(heap, 1, 1); 3431 } 3432 3433 extern inline RPMALLOC_ALLOCATOR void* 3434 rpmalloc_heap_alloc(rpmalloc_heap_t* heap, size_t size) { 3435 #if ENABLE_VALIDATE_ARGS 3436 if (size >= MAX_ALLOC_SIZE) { 3437 errno = EINVAL; 3438 return 0; 3439 } 3440 #endif 3441 return _rpmalloc_allocate(heap, size); 3442 } 3443 3444 extern inline RPMALLOC_ALLOCATOR void* 3445 rpmalloc_heap_aligned_alloc(rpmalloc_heap_t* heap, size_t alignment, size_t size) { 3446 #if ENABLE_VALIDATE_ARGS 3447 if (size >= MAX_ALLOC_SIZE) { 3448 errno = EINVAL; 3449 return 0; 3450 } 3451 #endif 3452 return _rpmalloc_aligned_allocate(heap, alignment, size); 3453 } 3454 3455 extern inline RPMALLOC_ALLOCATOR void* 3456 rpmalloc_heap_calloc(rpmalloc_heap_t* heap, size_t num, size_t size) { 3457 return rpmalloc_heap_aligned_calloc(heap, 0, num, size); 3458 } 3459 3460 extern inline RPMALLOC_ALLOCATOR void* 3461 rpmalloc_heap_aligned_calloc(rpmalloc_heap_t* heap, size_t alignment, size_t num, size_t size) { 3462 size_t total; 3463 #if ENABLE_VALIDATE_ARGS 3464 #if PLATFORM_WINDOWS 3465 int err = SizeTMult(num, size, &total); 3466 if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) { 3467 errno = EINVAL; 3468 return 0; 3469 } 3470 #else 3471 int err = __builtin_umull_overflow(num, size, &total); 3472 if (err || (total >= MAX_ALLOC_SIZE)) { 3473 errno = EINVAL; 3474 return 0; 3475 } 3476 #endif 3477 #else 3478 total = num * size; 3479 #endif 3480 void* block = _rpmalloc_aligned_allocate(heap, alignment, total); 3481 if (block) 3482 memset(block, 0, total); 3483 return block; 3484 } 3485 3486 extern inline RPMALLOC_ALLOCATOR void* 3487 rpmalloc_heap_realloc(rpmalloc_heap_t* heap, void* ptr, size_t size, unsigned int flags) { 3488 #if ENABLE_VALIDATE_ARGS 3489 if (size >= MAX_ALLOC_SIZE) { 3490 errno = EINVAL; 3491 return ptr; 3492 } 3493 #endif 3494 return _rpmalloc_reallocate(heap, ptr, size, 0, flags); 3495 } 3496 3497 extern inline RPMALLOC_ALLOCATOR void* 3498 rpmalloc_heap_aligned_realloc(rpmalloc_heap_t* heap, void* ptr, size_t alignment, size_t size, unsigned int flags) { 3499 #if ENABLE_VALIDATE_ARGS 3500 if ((size + alignment < size) || (alignment > _memory_page_size)) { 3501 errno = EINVAL; 3502 return 0; 3503 } 3504 #endif 3505 return _rpmalloc_aligned_reallocate(heap, ptr, alignment, size, 0, flags); 3506 } 3507 3508 extern inline void 3509 rpmalloc_heap_free(rpmalloc_heap_t* heap, void* ptr) { 3510 (void)sizeof(heap); 3511 _rpmalloc_deallocate(ptr); 3512 } 3513 3514 extern inline void 3515 rpmalloc_heap_free_all(rpmalloc_heap_t* heap) { 3516 span_t* span; 3517 span_t* next_span; 3518 3519 _rpmalloc_heap_cache_adopt_deferred(heap, 0); 3520 3521 for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) { 3522 span = heap->size_class[iclass].partial_span; 3523 while (span) { 3524 next_span = span->next; 3525 _rpmalloc_heap_cache_insert(heap, span); 3526 span = next_span; 3527 } 3528 heap->size_class[iclass].partial_span = 0; 3529 span = heap->full_span[iclass]; 3530 while (span) { 3531 next_span = span->next; 3532 _rpmalloc_heap_cache_insert(heap, span); 3533 span = next_span; 3534 } 3535 } 3536 memset(heap->size_class, 0, sizeof(heap->size_class)); 3537 memset(heap->full_span, 0, sizeof(heap->full_span)); 3538 3539 span = heap->large_huge_span; 3540 while (span) { 3541 next_span = span->next; 3542 if (UNEXPECTED(span->size_class == SIZE_CLASS_HUGE)) 3543 _rpmalloc_deallocate_huge(span); 3544 else 3545 _rpmalloc_heap_cache_insert(heap, span); 3546 span = next_span; 3547 } 3548 heap->large_huge_span = 0; 3549 heap->full_span_count = 0; 3550 3551 #if ENABLE_THREAD_CACHE 3552 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) { 3553 span_cache_t* span_cache; 3554 if (!iclass) 3555 span_cache = &heap->span_cache; 3556 else 3557 span_cache = (span_cache_t*)(heap->span_large_cache + (iclass - 1)); 3558 if (!span_cache->count) 3559 continue; 3560 #if ENABLE_GLOBAL_CACHE 3561 _rpmalloc_stat_add64(&heap->thread_to_global, span_cache->count * (iclass + 1) * _memory_span_size); 3562 _rpmalloc_stat_add(&heap->span_use[iclass].spans_to_global, span_cache->count); 3563 _rpmalloc_global_cache_insert_spans(span_cache->span, iclass + 1, span_cache->count); 3564 #else 3565 for (size_t ispan = 0; ispan < span_cache->count; ++ispan) 3566 _rpmalloc_span_unmap(span_cache->span[ispan]); 3567 #endif 3568 span_cache->count = 0; 3569 } 3570 #endif 3571 3572 #if ENABLE_STATISTICS 3573 for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) { 3574 atomic_store32(&heap->size_class_use[iclass].alloc_current, 0); 3575 atomic_store32(&heap->size_class_use[iclass].spans_current, 0); 3576 } 3577 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) { 3578 atomic_store32(&heap->span_use[iclass].current, 0); 3579 } 3580 #endif 3581 } 3582 3583 extern inline void 3584 rpmalloc_heap_thread_set_current(rpmalloc_heap_t* heap) { 3585 heap_t* prev_heap = get_thread_heap_raw(); 3586 if (prev_heap != heap) { 3587 set_thread_heap(heap); 3588 if (prev_heap) 3589 rpmalloc_heap_release(prev_heap); 3590 } 3591 } 3592 3593 #endif 3594 3595 #if ENABLE_PRELOAD || ENABLE_OVERRIDE 3596 3597 #include "malloc.c" 3598 3599 #endif 3600 3601 void 3602 rpmalloc_linker_reference(void) { 3603 (void)sizeof(_rpmalloc_initialized); 3604 }