github.com/pidato/unsafe@v0.1.4/memory/rpmalloc/src/rpmalloc.c (about) 1 /* rpmalloc.c - Memory allocator - Public Domain - 2016-2020 Mattias Jansson 2 * 3 * This library provides a cross-platform lock free thread caching malloc implementation in C11. 4 * The latest source code is always available at 5 * 6 * https://github.com/mjansson/rpmalloc 7 * 8 * This library is put in the public domain; you can redistribute it and/or modify it without any restrictions. 9 * 10 */ 11 12 #include "rpmalloc.h" 13 14 //////////// 15 /// 16 /// Build time configurable limits 17 /// 18 ////// 19 20 #if defined(__clang__) 21 #pragma clang diagnostic ignored "-Wunused-macros" 22 #pragma clang diagnostic ignored "-Wunused-function" 23 #if __has_warning("-Wreserved-identifier") 24 #pragma clang diagnostic ignored "-Wreserved-identifier" 25 #endif 26 #if __has_warning("-Wstatic-in-inline") 27 #pragma clang diagnostic ignored "-Wstatic-in-inline" 28 #endif 29 #elif defined(__GNUC__) 30 #pragma GCC diagnostic ignored "-Wunused-macros" 31 #pragma GCC diagnostic ignored "-Wunused-function" 32 #endif 33 34 #ifndef HEAP_ARRAY_SIZE 35 //! Size of heap hashmap 36 #define HEAP_ARRAY_SIZE 47 37 #endif 38 #ifndef ENABLE_THREAD_CACHE 39 //! Enable per-thread cache 40 #define ENABLE_THREAD_CACHE 1 41 #endif 42 #ifndef ENABLE_GLOBAL_CACHE 43 //! Enable global cache shared between all threads, requires thread cache 44 #define ENABLE_GLOBAL_CACHE 1 45 #endif 46 #ifndef ENABLE_VALIDATE_ARGS 47 //! Enable validation of args to public entry points 48 #define ENABLE_VALIDATE_ARGS 0 49 #endif 50 #ifndef ENABLE_STATISTICS 51 //! Enable statistics collection 52 #define ENABLE_STATISTICS 0 53 #endif 54 #ifndef ENABLE_ASSERTS 55 //! Enable asserts 56 #define ENABLE_ASSERTS 0 57 #endif 58 #ifndef ENABLE_OVERRIDE 59 //! Override standard library malloc/free and new/delete entry points 60 #define ENABLE_OVERRIDE 1 61 #endif 62 #ifndef ENABLE_PRELOAD 63 //! Support preloading 64 #define ENABLE_PRELOAD 1 65 #endif 66 #ifndef DISABLE_UNMAP 67 //! Disable unmapping memory pages (also enables unlimited cache) 68 #define DISABLE_UNMAP 0 69 #endif 70 #ifndef ENABLE_UNLIMITED_CACHE 71 //! Enable unlimited global cache (no unmapping until finalization) 72 #define ENABLE_UNLIMITED_CACHE 0 73 #endif 74 #ifndef ENABLE_ADAPTIVE_THREAD_CACHE 75 //! Enable adaptive thread cache size based on use heuristics 76 #define ENABLE_ADAPTIVE_THREAD_CACHE 1 77 #endif 78 #ifndef DEFAULT_SPAN_MAP_COUNT 79 //! Default number of spans to map in call to map more virtual memory (default values yield 4MiB here) 80 #define DEFAULT_SPAN_MAP_COUNT 64 81 #endif 82 #ifndef GLOBAL_CACHE_MULTIPLIER 83 //! Multiplier for global cache 84 #define GLOBAL_CACHE_MULTIPLIER 8 85 #endif 86 87 #if DISABLE_UNMAP && !ENABLE_GLOBAL_CACHE 88 #error Must use global cache if unmap is disabled 89 #endif 90 91 #if DISABLE_UNMAP 92 #undef ENABLE_UNLIMITED_CACHE 93 #define ENABLE_UNLIMITED_CACHE 1 94 #endif 95 96 #if !ENABLE_GLOBAL_CACHE 97 #undef ENABLE_UNLIMITED_CACHE 98 #define ENABLE_UNLIMITED_CACHE 0 99 #endif 100 101 #if !ENABLE_THREAD_CACHE 102 #undef ENABLE_ADAPTIVE_THREAD_CACHE 103 #define ENABLE_ADAPTIVE_THREAD_CACHE 0 104 #endif 105 106 #if defined(_WIN32) || defined(__WIN32__) || defined(_WIN64) 107 # define PLATFORM_WINDOWS 1 108 # define PLATFORM_POSIX 0 109 #else 110 # define PLATFORM_WINDOWS 0 111 # define PLATFORM_POSIX 1 112 #endif 113 114 /// Platform and arch specifics 115 #if defined(_MSC_VER) && !defined(__clang__) 116 # pragma warning (disable: 5105) 117 # ifndef FORCEINLINE 118 # define FORCEINLINE inline __forceinline 119 # endif 120 # define _Static_assert static_assert 121 #else 122 # ifndef FORCEINLINE 123 # define FORCEINLINE inline __attribute__((__always_inline__)) 124 # endif 125 #endif 126 #if PLATFORM_WINDOWS 127 # ifndef WIN32_LEAN_AND_MEAN 128 # define WIN32_LEAN_AND_MEAN 129 # endif 130 # include <windows.h> 131 # if ENABLE_VALIDATE_ARGS 132 # include <intsafe.h> 133 # endif 134 #else 135 # include <unistd.h> 136 # include <stdio.h> 137 # include <stdlib.h> 138 # include <time.h> 139 # if defined(__linux__) || defined(__ANDROID__) 140 # include <sys/prctl.h> 141 # if !defined(PR_SET_VMA) 142 # define PR_SET_VMA 0x53564d41 143 # define PR_SET_VMA_ANON_NAME 0 144 # endif 145 # endif 146 # if defined(__APPLE__) 147 # include <TargetConditionals.h> 148 # if !TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR 149 # include <mach/mach_vm.h> 150 # include <mach/vm_statistics.h> 151 # endif 152 # include <pthread.h> 153 # endif 154 # if defined(__HAIKU__) || defined(__TINYC__) 155 # include <pthread.h> 156 # endif 157 #endif 158 159 #include <stdint.h> 160 #include <string.h> 161 #include <errno.h> 162 163 #if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK) 164 #include <fibersapi.h> 165 static DWORD fls_key; 166 #endif 167 168 #if PLATFORM_POSIX 169 # include <sys/mman.h> 170 # include <sched.h> 171 # ifdef __FreeBSD__ 172 # include <sys/sysctl.h> 173 # define MAP_HUGETLB MAP_ALIGNED_SUPER 174 # ifndef PROT_MAX 175 # define PROT_MAX(f) 0 176 # endif 177 # else 178 # define PROT_MAX(f) 0 179 # endif 180 # ifdef __sun 181 extern int madvise(caddr_t, size_t, int); 182 # endif 183 # ifndef MAP_UNINITIALIZED 184 # define MAP_UNINITIALIZED 0 185 # endif 186 #endif 187 #include <errno.h> 188 189 #if ENABLE_ASSERTS 190 # undef NDEBUG 191 # if defined(_MSC_VER) && !defined(_DEBUG) 192 # define _DEBUG 193 # endif 194 # include <assert.h> 195 #define RPMALLOC_TOSTRING_M(x) #x 196 #define RPMALLOC_TOSTRING(x) RPMALLOC_TOSTRING_M(x) 197 #define rpmalloc_assert(truth, message) \ 198 do { \ 199 if (!(truth)) { \ 200 if (_memory_config.error_callback) { \ 201 _memory_config.error_callback( \ 202 message " (" RPMALLOC_TOSTRING(truth) ") at " __FILE__ ":" RPMALLOC_TOSTRING(__LINE__)); \ 203 } else { \ 204 assert((truth) && message); \ 205 } \ 206 } \ 207 } while (0) 208 #else 209 # define rpmalloc_assert(truth, message) do {} while(0) 210 #endif 211 #if ENABLE_STATISTICS 212 # include <stdio.h> 213 #endif 214 215 ////// 216 /// 217 /// Atomic access abstraction (since MSVC does not do C11 yet) 218 /// 219 ////// 220 221 #if defined(_MSC_VER) && !defined(__clang__) 222 223 typedef volatile long atomic32_t; 224 typedef volatile long long atomic64_t; 225 typedef volatile void* atomicptr_t; 226 227 static FORCEINLINE int32_t atomic_load32(atomic32_t* src) { return *src; } 228 static FORCEINLINE void atomic_store32(atomic32_t* dst, int32_t val) { *dst = val; } 229 static FORCEINLINE int32_t atomic_incr32(atomic32_t* val) { return (int32_t)InterlockedIncrement(val); } 230 static FORCEINLINE int32_t atomic_decr32(atomic32_t* val) { return (int32_t)InterlockedDecrement(val); } 231 static FORCEINLINE int32_t atomic_add32(atomic32_t* val, int32_t add) { return (int32_t)InterlockedExchangeAdd(val, add) + add; } 232 static FORCEINLINE int atomic_cas32_acquire(atomic32_t* dst, int32_t val, int32_t ref) { return (InterlockedCompareExchange(dst, val, ref) == ref) ? 1 : 0; } 233 static FORCEINLINE void atomic_store32_release(atomic32_t* dst, int32_t val) { *dst = val; } 234 static FORCEINLINE int64_t atomic_load64(atomic64_t* src) { return *src; } 235 static FORCEINLINE int64_t atomic_add64(atomic64_t* val, int64_t add) { return (int64_t)InterlockedExchangeAdd64(val, add) + add; } 236 static FORCEINLINE void* atomic_load_ptr(atomicptr_t* src) { return (void*)*src; } 237 static FORCEINLINE void atomic_store_ptr(atomicptr_t* dst, void* val) { *dst = val; } 238 static FORCEINLINE void atomic_store_ptr_release(atomicptr_t* dst, void* val) { *dst = val; } 239 static FORCEINLINE void* atomic_exchange_ptr_acquire(atomicptr_t* dst, void* val) { return (void*)InterlockedExchangePointer((void* volatile*)dst, val); } 240 static FORCEINLINE int atomic_cas_ptr(atomicptr_t* dst, void* val, void* ref) { return (InterlockedCompareExchangePointer((void* volatile*)dst, val, ref) == ref) ? 1 : 0; } 241 242 #define EXPECTED(x) (x) 243 #define UNEXPECTED(x) (x) 244 245 #else 246 247 #include <stdatomic.h> 248 249 typedef volatile _Atomic(int32_t) atomic32_t; 250 typedef volatile _Atomic(int64_t) atomic64_t; 251 typedef volatile _Atomic(void*) atomicptr_t; 252 253 static FORCEINLINE int32_t atomic_load32(atomic32_t* src) { return atomic_load_explicit(src, memory_order_relaxed); } 254 static FORCEINLINE void atomic_store32(atomic32_t* dst, int32_t val) { atomic_store_explicit(dst, val, memory_order_relaxed); } 255 static FORCEINLINE int32_t atomic_incr32(atomic32_t* val) { return atomic_fetch_add_explicit(val, 1, memory_order_relaxed) + 1; } 256 static FORCEINLINE int32_t atomic_decr32(atomic32_t* val) { return atomic_fetch_add_explicit(val, -1, memory_order_relaxed) - 1; } 257 static FORCEINLINE int32_t atomic_add32(atomic32_t* val, int32_t add) { return atomic_fetch_add_explicit(val, add, memory_order_relaxed) + add; } 258 static FORCEINLINE int atomic_cas32_acquire(atomic32_t* dst, int32_t val, int32_t ref) { return atomic_compare_exchange_weak_explicit(dst, &ref, val, memory_order_acquire, memory_order_relaxed); } 259 static FORCEINLINE void atomic_store32_release(atomic32_t* dst, int32_t val) { atomic_store_explicit(dst, val, memory_order_release); } 260 static FORCEINLINE int64_t atomic_load64(atomic64_t* val) { return atomic_load_explicit(val, memory_order_relaxed); } 261 static FORCEINLINE int64_t atomic_add64(atomic64_t* val, int64_t add) { return atomic_fetch_add_explicit(val, add, memory_order_relaxed) + add; } 262 static FORCEINLINE void* atomic_load_ptr(atomicptr_t* src) { return atomic_load_explicit(src, memory_order_relaxed); } 263 static FORCEINLINE void atomic_store_ptr(atomicptr_t* dst, void* val) { atomic_store_explicit(dst, val, memory_order_relaxed); } 264 static FORCEINLINE void atomic_store_ptr_release(atomicptr_t* dst, void* val) { atomic_store_explicit(dst, val, memory_order_release); } 265 static FORCEINLINE void* atomic_exchange_ptr_acquire(atomicptr_t* dst, void* val) { return atomic_exchange_explicit(dst, val, memory_order_acquire); } 266 static FORCEINLINE int atomic_cas_ptr(atomicptr_t* dst, void* val, void* ref) { return atomic_compare_exchange_weak_explicit(dst, &ref, val, memory_order_relaxed, memory_order_relaxed); } 267 268 #define EXPECTED(x) __builtin_expect((x), 1) 269 #define UNEXPECTED(x) __builtin_expect((x), 0) 270 271 #endif 272 273 //////////// 274 /// 275 /// Statistics related functions (evaluate to nothing when statistics not enabled) 276 /// 277 ////// 278 279 #if ENABLE_STATISTICS 280 # define _rpmalloc_stat_inc(counter) atomic_incr32(counter) 281 # define _rpmalloc_stat_dec(counter) atomic_decr32(counter) 282 # define _rpmalloc_stat_add(counter, value) atomic_add32(counter, (int32_t)(value)) 283 # define _rpmalloc_stat_add64(counter, value) atomic_add64(counter, (int64_t)(value)) 284 # define _rpmalloc_stat_add_peak(counter, value, peak) do { int32_t _cur_count = atomic_add32(counter, (int32_t)(value)); if (_cur_count > (peak)) peak = _cur_count; } while (0) 285 # define _rpmalloc_stat_sub(counter, value) atomic_add32(counter, -(int32_t)(value)) 286 # define _rpmalloc_stat_inc_alloc(heap, class_idx) do { \ 287 int32_t alloc_current = atomic_incr32(&heap->size_class_use[class_idx].alloc_current); \ 288 if (alloc_current > heap->size_class_use[class_idx].alloc_peak) \ 289 heap->size_class_use[class_idx].alloc_peak = alloc_current; \ 290 atomic_incr32(&heap->size_class_use[class_idx].alloc_total); \ 291 } while(0) 292 # define _rpmalloc_stat_inc_free(heap, class_idx) do { \ 293 atomic_decr32(&heap->size_class_use[class_idx].alloc_current); \ 294 atomic_incr32(&heap->size_class_use[class_idx].free_total); \ 295 } while(0) 296 #else 297 # define _rpmalloc_stat_inc(counter) do {} while(0) 298 # define _rpmalloc_stat_dec(counter) do {} while(0) 299 # define _rpmalloc_stat_add(counter, value) do {} while(0) 300 # define _rpmalloc_stat_add64(counter, value) do {} while(0) 301 # define _rpmalloc_stat_add_peak(counter, value, peak) do {} while (0) 302 # define _rpmalloc_stat_sub(counter, value) do {} while(0) 303 # define _rpmalloc_stat_inc_alloc(heap, class_idx) do {} while(0) 304 # define _rpmalloc_stat_inc_free(heap, class_idx) do {} while(0) 305 #endif 306 307 308 /// 309 /// Preconfigured limits and sizes 310 /// 311 312 //! Granularity of a small allocation block (must be power of two) 313 #define SMALL_GRANULARITY 16 314 //! Small granularity shift count 315 #define SMALL_GRANULARITY_SHIFT 4 316 //! Number of small block size classes 317 #define SMALL_CLASS_COUNT 65 318 //! Maximum size of a small block 319 #define SMALL_SIZE_LIMIT (SMALL_GRANULARITY * (SMALL_CLASS_COUNT - 1)) 320 //! Granularity of a medium allocation block 321 #define MEDIUM_GRANULARITY 512 322 //! Medium granularity shift count 323 #define MEDIUM_GRANULARITY_SHIFT 9 324 //! Number of medium block size classes 325 #define MEDIUM_CLASS_COUNT 61 326 //! Total number of small + medium size classes 327 #define SIZE_CLASS_COUNT (SMALL_CLASS_COUNT + MEDIUM_CLASS_COUNT) 328 //! Number of large block size classes 329 #define LARGE_CLASS_COUNT 63 330 //! Maximum size of a medium block 331 #define MEDIUM_SIZE_LIMIT (SMALL_SIZE_LIMIT + (MEDIUM_GRANULARITY * MEDIUM_CLASS_COUNT)) 332 //! Maximum size of a large block 333 #define LARGE_SIZE_LIMIT ((LARGE_CLASS_COUNT * _memory_span_size) - SPAN_HEADER_SIZE) 334 //! Size of a span header (must be a multiple of SMALL_GRANULARITY and a power of two) 335 #define SPAN_HEADER_SIZE 128 336 //! Number of spans in thread cache 337 #define MAX_THREAD_SPAN_CACHE 400 338 //! Number of spans to transfer between thread and global cache 339 #define THREAD_SPAN_CACHE_TRANSFER 64 340 //! Number of spans in thread cache for large spans (must be greater than LARGE_CLASS_COUNT / 2) 341 #define MAX_THREAD_SPAN_LARGE_CACHE 100 342 //! Number of spans to transfer between thread and global cache for large spans 343 #define THREAD_SPAN_LARGE_CACHE_TRANSFER 6 344 345 _Static_assert((SMALL_GRANULARITY & (SMALL_GRANULARITY - 1)) == 0, "Small granularity must be power of two"); 346 _Static_assert((SPAN_HEADER_SIZE & (SPAN_HEADER_SIZE - 1)) == 0, "Span header size must be power of two"); 347 348 #if ENABLE_VALIDATE_ARGS 349 //! Maximum allocation size to avoid integer overflow 350 #undef MAX_ALLOC_SIZE 351 #define MAX_ALLOC_SIZE (((size_t)-1) - _memory_span_size) 352 #endif 353 354 #define pointer_offset(ptr, ofs) (void*)((char*)(ptr) + (ptrdiff_t)(ofs)) 355 #define pointer_diff(first, second) (ptrdiff_t)((const char*)(first) - (const char*)(second)) 356 357 #define INVALID_POINTER ((void*)((uintptr_t)-1)) 358 359 #define SIZE_CLASS_LARGE SIZE_CLASS_COUNT 360 #define SIZE_CLASS_HUGE ((uint32_t)-1) 361 362 //////////// 363 /// 364 /// Data types 365 /// 366 ////// 367 368 //! A memory heap, per thread 369 typedef struct heap_t heap_t; 370 //! Span of memory pages 371 typedef struct span_t span_t; 372 //! Span list 373 typedef struct span_list_t span_list_t; 374 //! Span active data 375 typedef struct span_active_t span_active_t; 376 //! Size class definition 377 typedef struct size_class_t size_class_t; 378 //! Global cache 379 typedef struct global_cache_t global_cache_t; 380 381 //! Flag indicating span is the first (master) span of a split superspan 382 #define SPAN_FLAG_MASTER 1U 383 //! Flag indicating span is a secondary (sub) span of a split superspan 384 #define SPAN_FLAG_SUBSPAN 2U 385 //! Flag indicating span has blocks with increased alignment 386 #define SPAN_FLAG_ALIGNED_BLOCKS 4U 387 //! Flag indicating an unmapped master span 388 #define SPAN_FLAG_UNMAPPED_MASTER 8U 389 390 #if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS 391 struct span_use_t { 392 //! Current number of spans used (actually used, not in cache) 393 atomic32_t current; 394 //! High water mark of spans used 395 atomic32_t high; 396 #if ENABLE_STATISTICS 397 //! Number of spans in deferred list 398 atomic32_t spans_deferred; 399 //! Number of spans transitioned to global cache 400 atomic32_t spans_to_global; 401 //! Number of spans transitioned from global cache 402 atomic32_t spans_from_global; 403 //! Number of spans transitioned to thread cache 404 atomic32_t spans_to_cache; 405 //! Number of spans transitioned from thread cache 406 atomic32_t spans_from_cache; 407 //! Number of spans transitioned to reserved state 408 atomic32_t spans_to_reserved; 409 //! Number of spans transitioned from reserved state 410 atomic32_t spans_from_reserved; 411 //! Number of raw memory map calls 412 atomic32_t spans_map_calls; 413 #endif 414 }; 415 typedef struct span_use_t span_use_t; 416 #endif 417 418 #if ENABLE_STATISTICS 419 struct size_class_use_t { 420 //! Current number of allocations 421 atomic32_t alloc_current; 422 //! Peak number of allocations 423 int32_t alloc_peak; 424 //! Total number of allocations 425 atomic32_t alloc_total; 426 //! Total number of frees 427 atomic32_t free_total; 428 //! Number of spans in use 429 atomic32_t spans_current; 430 //! Number of spans transitioned to cache 431 int32_t spans_peak; 432 //! Number of spans transitioned to cache 433 atomic32_t spans_to_cache; 434 //! Number of spans transitioned from cache 435 atomic32_t spans_from_cache; 436 //! Number of spans transitioned from reserved state 437 atomic32_t spans_from_reserved; 438 //! Number of spans mapped 439 atomic32_t spans_map_calls; 440 int32_t unused; 441 }; 442 typedef struct size_class_use_t size_class_use_t; 443 #endif 444 445 // A span can either represent a single span of memory pages with size declared by span_map_count configuration variable, 446 // or a set of spans in a continuous region, a super span. Any reference to the term "span" usually refers to both a single 447 // span or a super span. A super span can further be divided into multiple spans (or this, super spans), where the first 448 // (super)span is the master and subsequent (super)spans are subspans. The master span keeps track of how many subspans 449 // that are still alive and mapped in virtual memory, and once all subspans and master have been unmapped the entire 450 // superspan region is released and unmapped (on Windows for example, the entire superspan range has to be released 451 // in the same call to release the virtual memory range, but individual subranges can be decommitted individually 452 // to reduce physical memory use). 453 struct span_t { 454 //! Free list 455 void* free_list; 456 //! Total block count of size class 457 uint32_t block_count; 458 //! Size class 459 uint32_t size_class; 460 //! Index of last block initialized in free list 461 uint32_t free_list_limit; 462 //! Number of used blocks remaining when in partial state 463 uint32_t used_count; 464 //! Deferred free list 465 atomicptr_t free_list_deferred; 466 //! Size of deferred free list, or list of spans when part of a cache list 467 uint32_t list_size; 468 //! Size of a block 469 uint32_t block_size; 470 //! Flags and counters 471 uint32_t flags; 472 //! Number of spans 473 uint32_t span_count; 474 //! Total span counter for master spans 475 uint32_t total_spans; 476 //! Offset from master span for subspans 477 uint32_t offset_from_master; 478 //! Remaining span counter, for master spans 479 atomic32_t remaining_spans; 480 //! Alignment offset 481 uint32_t align_offset; 482 //! Owning heap 483 heap_t* heap; 484 //! Next span 485 span_t* next; 486 //! Previous span 487 span_t* prev; 488 }; 489 _Static_assert(sizeof(span_t) <= SPAN_HEADER_SIZE, "span size mismatch"); 490 491 struct span_cache_t { 492 size_t count; 493 span_t* span[MAX_THREAD_SPAN_CACHE]; 494 }; 495 typedef struct span_cache_t span_cache_t; 496 497 struct span_large_cache_t { 498 size_t count; 499 span_t* span[MAX_THREAD_SPAN_LARGE_CACHE]; 500 }; 501 typedef struct span_large_cache_t span_large_cache_t; 502 503 struct heap_size_class_t { 504 //! Free list of active span 505 void* free_list; 506 //! Double linked list of partially used spans with free blocks. 507 // Previous span pointer in head points to tail span of list. 508 span_t* partial_span; 509 //! Early level cache of fully free spans 510 span_t* cache; 511 }; 512 typedef struct heap_size_class_t heap_size_class_t; 513 514 // Control structure for a heap, either a thread heap or a first class heap if enabled 515 struct heap_t { 516 //! Owning thread ID 517 uintptr_t owner_thread; 518 //! Free lists for each size class 519 heap_size_class_t size_class[SIZE_CLASS_COUNT]; 520 #if ENABLE_THREAD_CACHE 521 //! Arrays of fully freed spans, single span 522 span_cache_t span_cache; 523 #endif 524 //! List of deferred free spans (single linked list) 525 atomicptr_t span_free_deferred; 526 //! Number of full spans 527 size_t full_span_count; 528 //! Mapped but unused spans 529 span_t* span_reserve; 530 //! Master span for mapped but unused spans 531 span_t* span_reserve_master; 532 //! Number of mapped but unused spans 533 uint32_t spans_reserved; 534 //! Child count 535 atomic32_t child_count; 536 //! Next heap in id list 537 heap_t* next_heap; 538 //! Next heap in orphan list 539 heap_t* next_orphan; 540 //! Heap ID 541 int32_t id; 542 //! Finalization state flag 543 int finalize; 544 //! Master heap owning the memory pages 545 heap_t* master_heap; 546 #if ENABLE_THREAD_CACHE 547 //! Arrays of fully freed spans, large spans with > 1 span count 548 span_large_cache_t span_large_cache[LARGE_CLASS_COUNT - 1]; 549 #endif 550 #if RPMALLOC_FIRST_CLASS_HEAPS 551 //! Double linked list of fully utilized spans with free blocks for each size class. 552 // Previous span pointer in head points to tail span of list. 553 span_t* full_span[SIZE_CLASS_COUNT]; 554 //! Double linked list of large and huge spans allocated by this heap 555 span_t* large_huge_span; 556 #endif 557 #if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS 558 //! Current and high water mark of spans used per span count 559 span_use_t span_use[LARGE_CLASS_COUNT]; 560 #endif 561 #if ENABLE_STATISTICS 562 //! Allocation stats per size class 563 size_class_use_t size_class_use[SIZE_CLASS_COUNT + 1]; 564 //! Number of bytes transitioned thread -> global 565 atomic64_t thread_to_global; 566 //! Number of bytes transitioned global -> thread 567 atomic64_t global_to_thread; 568 #endif 569 }; 570 571 // Size class for defining a block size bucket 572 struct size_class_t { 573 //! Size of blocks in this class 574 uint32_t block_size; 575 //! Number of blocks in each chunk 576 uint16_t block_count; 577 //! Class index this class is merged with 578 uint16_t class_idx; 579 }; 580 _Static_assert(sizeof(size_class_t) == 8, "Size class size mismatch"); 581 582 struct global_cache_t { 583 //! Cache lock 584 atomic32_t lock; 585 //! Cache count 586 uint32_t count; 587 #if ENABLE_STATISTICS 588 //! Insert count 589 size_t insert_count; 590 //! Extract count 591 size_t extract_count; 592 #endif 593 //! Cached spans 594 span_t* span[GLOBAL_CACHE_MULTIPLIER * MAX_THREAD_SPAN_CACHE]; 595 //! Unlimited cache overflow 596 span_t* overflow; 597 }; 598 599 //////////// 600 /// 601 /// Global data 602 /// 603 ////// 604 605 //! Default span size (64KiB) 606 #define _memory_default_span_size (64 * 1024) 607 #define _memory_default_span_size_shift 16 608 #define _memory_default_span_mask (~((uintptr_t)(_memory_span_size - 1))) 609 610 //! Initialized flag 611 static int _rpmalloc_initialized; 612 //! Main thread ID 613 static uintptr_t _rpmalloc_main_thread_id; 614 //! Configuration 615 static rpmalloc_config_t _memory_config; 616 //! Memory page size 617 static size_t _memory_page_size; 618 //! Shift to divide by page size 619 static size_t _memory_page_size_shift; 620 //! Granularity at which memory pages are mapped by OS 621 static size_t _memory_map_granularity; 622 #if RPMALLOC_CONFIGURABLE 623 //! Size of a span of memory pages 624 static size_t _memory_span_size; 625 //! Shift to divide by span size 626 static size_t _memory_span_size_shift; 627 //! Mask to get to start of a memory span 628 static uintptr_t _memory_span_mask; 629 #else 630 //! Hardwired span size 631 #define _memory_span_size _memory_default_span_size 632 #define _memory_span_size_shift _memory_default_span_size_shift 633 #define _memory_span_mask _memory_default_span_mask 634 #endif 635 //! Number of spans to map in each map call 636 static size_t _memory_span_map_count; 637 //! Number of spans to keep reserved in each heap 638 static size_t _memory_heap_reserve_count; 639 //! Global size classes 640 static size_class_t _memory_size_class[SIZE_CLASS_COUNT]; 641 //! Run-time size limit of medium blocks 642 static size_t _memory_medium_size_limit; 643 //! Heap ID counter 644 static atomic32_t _memory_heap_id; 645 //! Huge page support 646 static int _memory_huge_pages; 647 #if ENABLE_GLOBAL_CACHE 648 //! Global span cache 649 static global_cache_t _memory_span_cache[LARGE_CLASS_COUNT]; 650 #endif 651 //! Global reserved spans 652 static span_t* _memory_global_reserve; 653 //! Global reserved count 654 static size_t _memory_global_reserve_count; 655 //! Global reserved master 656 static span_t* _memory_global_reserve_master; 657 //! All heaps 658 static heap_t* _memory_heaps[HEAP_ARRAY_SIZE]; 659 //! Used to restrict access to mapping memory for huge pages 660 static atomic32_t _memory_global_lock; 661 //! Orphaned heaps 662 static heap_t* _memory_orphan_heaps; 663 #if RPMALLOC_FIRST_CLASS_HEAPS 664 //! Orphaned heaps (first class heaps) 665 static heap_t* _memory_first_class_orphan_heaps; 666 #endif 667 #if ENABLE_STATISTICS 668 //! Allocations counter 669 static atomic64_t _allocation_counter; 670 //! Deallocations counter 671 static atomic64_t _deallocation_counter; 672 //! Active heap count 673 static atomic32_t _memory_active_heaps; 674 //! Number of currently mapped memory pages 675 static atomic32_t _mapped_pages; 676 //! Peak number of concurrently mapped memory pages 677 static int32_t _mapped_pages_peak; 678 //! Number of mapped master spans 679 static atomic32_t _master_spans; 680 //! Number of unmapped dangling master spans 681 static atomic32_t _unmapped_master_spans; 682 //! Running counter of total number of mapped memory pages since start 683 static atomic32_t _mapped_total; 684 //! Running counter of total number of unmapped memory pages since start 685 static atomic32_t _unmapped_total; 686 //! Number of currently mapped memory pages in OS calls 687 static atomic32_t _mapped_pages_os; 688 //! Number of currently allocated pages in huge allocations 689 static atomic32_t _huge_pages_current; 690 //! Peak number of currently allocated pages in huge allocations 691 static int32_t _huge_pages_peak; 692 #endif 693 694 //////////// 695 /// 696 /// Thread local heap and ID 697 /// 698 ////// 699 700 //! Current thread heap 701 #if ((defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD) || defined(__TINYC__) 702 static pthread_key_t _memory_thread_heap; 703 #else 704 # ifdef _MSC_VER 705 # define _Thread_local __declspec(thread) 706 # define TLS_MODEL 707 # else 708 # ifndef __HAIKU__ 709 # define TLS_MODEL __attribute__((tls_model("initial-exec"))) 710 # else 711 # define TLS_MODEL 712 # endif 713 # if !defined(__clang__) && defined(__GNUC__) 714 # define _Thread_local __thread 715 # endif 716 # endif 717 static _Thread_local heap_t* _memory_thread_heap TLS_MODEL; 718 #endif 719 720 static inline heap_t* 721 get_thread_heap_raw(void) { 722 #if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD 723 return pthread_getspecific(_memory_thread_heap); 724 #else 725 return _memory_thread_heap; 726 #endif 727 } 728 729 //! Get the current thread heap 730 static inline heap_t* 731 get_thread_heap(void) { 732 heap_t* heap = get_thread_heap_raw(); 733 #if ENABLE_PRELOAD 734 if (EXPECTED(heap != 0)) 735 return heap; 736 rpmalloc_initialize(); 737 return get_thread_heap_raw(); 738 #else 739 return heap; 740 #endif 741 } 742 743 //! Fast thread ID 744 static inline uintptr_t 745 get_thread_id(void) { 746 #if defined(_WIN32) 747 return (uintptr_t)((void*)NtCurrentTeb()); 748 #elif (defined(__GNUC__) || defined(__clang__)) && !defined(__CYGWIN__) 749 uintptr_t tid; 750 # if defined(__i386__) 751 __asm__("movl %%gs:0, %0" : "=r" (tid) : : ); 752 # elif defined(__x86_64__) 753 # if defined(__MACH__) 754 __asm__("movq %%gs:0, %0" : "=r" (tid) : : ); 755 # else 756 __asm__("movq %%fs:0, %0" : "=r" (tid) : : ); 757 # endif 758 # elif defined(__arm__) 759 __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3" : "=r" (tid)); 760 # elif defined(__aarch64__) 761 # if defined(__MACH__) 762 // tpidr_el0 likely unused, always return 0 on iOS 763 __asm__ volatile ("mrs %0, tpidrro_el0" : "=r" (tid)); 764 # else 765 __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tid)); 766 # endif 767 # else 768 tid = (uintptr_t)((void*)get_thread_heap_raw()); 769 # endif 770 return tid; 771 #else 772 return (uintptr_t)((void*)get_thread_heap_raw()); 773 #endif 774 } 775 776 //! Set the current thread heap 777 static void 778 set_thread_heap(heap_t* heap) { 779 #if ((defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD) || defined(__TINYC__) 780 pthread_setspecific(_memory_thread_heap, heap); 781 #else 782 _memory_thread_heap = heap; 783 #endif 784 if (heap) 785 heap->owner_thread = get_thread_id(); 786 } 787 788 //! Set main thread ID 789 extern void 790 rpmalloc_set_main_thread(void); 791 792 void 793 rpmalloc_set_main_thread(void) { 794 _rpmalloc_main_thread_id = get_thread_id(); 795 } 796 797 static void 798 _rpmalloc_spin(void) { 799 #if defined(_MSC_VER) 800 _mm_pause(); 801 #elif defined(__x86_64__) || defined(__i386__) 802 __asm__ volatile("pause" ::: "memory"); 803 #elif defined(__aarch64__) || (defined(__arm__) && __ARM_ARCH >= 7) 804 __asm__ volatile("yield" ::: "memory"); 805 #elif defined(__powerpc__) || defined(__powerpc64__) 806 // No idea if ever been compiled in such archs but ... as precaution 807 __asm__ volatile("or 27,27,27"); 808 #elif defined(__sparc__) 809 __asm__ volatile("rd %ccr, %g0 \n\trd %ccr, %g0 \n\trd %ccr, %g0"); 810 #else 811 struct timespec ts = {0}; 812 nanosleep(&ts, 0); 813 #endif 814 } 815 816 #if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK) 817 static void NTAPI 818 _rpmalloc_thread_destructor(void* value) { 819 #if ENABLE_OVERRIDE 820 // If this is called on main thread it means rpmalloc_finalize 821 // has not been called and shutdown is forced (through _exit) or unclean 822 if (get_thread_id() == _rpmalloc_main_thread_id) 823 return; 824 #endif 825 if (value) 826 rpmalloc_thread_finalize(1); 827 } 828 #endif 829 830 831 //////////// 832 /// 833 /// Low level memory map/unmap 834 /// 835 ////// 836 837 static void 838 _rpmalloc_set_name(void* address, size_t size) { 839 #if defined(__linux__) || defined(__ANDROID__) 840 const char *name = _memory_huge_pages ? _memory_config.huge_page_name : _memory_config.page_name; 841 if (address == MAP_FAILED || !name) 842 return; 843 // If the kernel does not support CONFIG_ANON_VMA_NAME or if the call fails 844 // (e.g. invalid name) it is a no-op basically. 845 (void)prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, (uintptr_t)address, size, (uintptr_t)name); 846 #else 847 (void)sizeof(size); 848 (void)sizeof(address); 849 #endif 850 } 851 852 853 //! Map more virtual memory 854 // size is number of bytes to map 855 // offset receives the offset in bytes from start of mapped region 856 // returns address to start of mapped region to use 857 static void* 858 _rpmalloc_mmap(size_t size, size_t* offset) { 859 rpmalloc_assert(!(size % _memory_page_size), "Invalid mmap size"); 860 rpmalloc_assert(size >= _memory_page_size, "Invalid mmap size"); 861 void* address = _memory_config.memory_map(size, offset); 862 if (EXPECTED(address != 0)) { 863 _rpmalloc_stat_add_peak(&_mapped_pages, (size >> _memory_page_size_shift), _mapped_pages_peak); 864 _rpmalloc_stat_add(&_mapped_total, (size >> _memory_page_size_shift)); 865 } 866 return address; 867 } 868 869 //! Unmap virtual memory 870 // address is the memory address to unmap, as returned from _memory_map 871 // size is the number of bytes to unmap, which might be less than full region for a partial unmap 872 // offset is the offset in bytes to the actual mapped region, as set by _memory_map 873 // release is set to 0 for partial unmap, or size of entire range for a full unmap 874 static void 875 _rpmalloc_unmap(void* address, size_t size, size_t offset, size_t release) { 876 rpmalloc_assert(!release || (release >= size), "Invalid unmap size"); 877 rpmalloc_assert(!release || (release >= _memory_page_size), "Invalid unmap size"); 878 if (release) { 879 rpmalloc_assert(!(release % _memory_page_size), "Invalid unmap size"); 880 _rpmalloc_stat_sub(&_mapped_pages, (release >> _memory_page_size_shift)); 881 _rpmalloc_stat_add(&_unmapped_total, (release >> _memory_page_size_shift)); 882 } 883 _memory_config.memory_unmap(address, size, offset, release); 884 } 885 886 //! Default implementation to map new pages to virtual memory 887 static void* 888 _rpmalloc_mmap_os(size_t size, size_t* offset) { 889 //Either size is a heap (a single page) or a (multiple) span - we only need to align spans, and only if larger than map granularity 890 size_t padding = ((size >= _memory_span_size) && (_memory_span_size > _memory_map_granularity)) ? _memory_span_size : 0; 891 rpmalloc_assert(size >= _memory_page_size, "Invalid mmap size"); 892 #if PLATFORM_WINDOWS 893 //Ok to MEM_COMMIT - according to MSDN, "actual physical pages are not allocated unless/until the virtual addresses are actually accessed" 894 void* ptr = VirtualAlloc(0, size + padding, (_memory_huge_pages ? MEM_LARGE_PAGES : 0) | MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); 895 if (!ptr) { 896 if (_memory_config.map_fail_callback) { 897 if (_memory_config.map_fail_callback(size + padding)) 898 return _rpmalloc_mmap_os(size, offset); 899 } else { 900 rpmalloc_assert(ptr, "Failed to map virtual memory block"); 901 } 902 return 0; 903 } 904 #else 905 int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_UNINITIALIZED; 906 # if defined(__APPLE__) && !TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR 907 int fd = (int)VM_MAKE_TAG(240U); 908 if (_memory_huge_pages) 909 fd |= VM_FLAGS_SUPERPAGE_SIZE_2MB; 910 void* ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, flags, fd, 0); 911 # elif defined(MAP_HUGETLB) 912 void* ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE | PROT_MAX(PROT_READ | PROT_WRITE), (_memory_huge_pages ? MAP_HUGETLB : 0) | flags, -1, 0); 913 # if defined(MADV_HUGEPAGE) 914 // In some configurations, huge pages allocations might fail thus 915 // we fallback to normal allocations and promote the region as transparent huge page 916 if ((ptr == MAP_FAILED || !ptr) && _memory_huge_pages) { 917 ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, flags, -1, 0); 918 if (ptr && ptr != MAP_FAILED) { 919 int prm = madvise(ptr, size + padding, MADV_HUGEPAGE); 920 (void)prm; 921 rpmalloc_assert((prm == 0), "Failed to promote the page to THP"); 922 } 923 } 924 # endif 925 _rpmalloc_set_name(ptr, size + padding); 926 # elif defined(MAP_ALIGNED) 927 const size_t align = (sizeof(size_t) * 8) - (size_t)(__builtin_clzl(size - 1)); 928 void* ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, (_memory_huge_pages ? MAP_ALIGNED(align) : 0) | flags, -1, 0); 929 # elif defined(MAP_ALIGN) 930 caddr_t base = (_memory_huge_pages ? (caddr_t)(4 << 20) : 0); 931 void* ptr = mmap(base, size + padding, PROT_READ | PROT_WRITE, (_memory_huge_pages ? MAP_ALIGN : 0) | flags, -1, 0); 932 # else 933 void* ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, flags, -1, 0); 934 # endif 935 if ((ptr == MAP_FAILED) || !ptr) { 936 if (_memory_config.map_fail_callback) { 937 if (_memory_config.map_fail_callback(size + padding)) 938 return _rpmalloc_mmap_os(size, offset); 939 } else if (errno != ENOMEM) { 940 rpmalloc_assert((ptr != MAP_FAILED) && ptr, "Failed to map virtual memory block"); 941 } 942 return 0; 943 } 944 #endif 945 _rpmalloc_stat_add(&_mapped_pages_os, (int32_t)((size + padding) >> _memory_page_size_shift)); 946 if (padding) { 947 size_t final_padding = padding - ((uintptr_t)ptr & ~_memory_span_mask); 948 rpmalloc_assert(final_padding <= _memory_span_size, "Internal failure in padding"); 949 rpmalloc_assert(final_padding <= padding, "Internal failure in padding"); 950 rpmalloc_assert(!(final_padding % 8), "Internal failure in padding"); 951 ptr = pointer_offset(ptr, final_padding); 952 *offset = final_padding >> 3; 953 } 954 rpmalloc_assert((size < _memory_span_size) || !((uintptr_t)ptr & ~_memory_span_mask), "Internal failure in padding"); 955 return ptr; 956 } 957 958 //! Default implementation to unmap pages from virtual memory 959 static void 960 _rpmalloc_unmap_os(void* address, size_t size, size_t offset, size_t release) { 961 rpmalloc_assert(release || (offset == 0), "Invalid unmap size"); 962 rpmalloc_assert(!release || (release >= _memory_page_size), "Invalid unmap size"); 963 rpmalloc_assert(size >= _memory_page_size, "Invalid unmap size"); 964 if (release && offset) { 965 offset <<= 3; 966 address = pointer_offset(address, -(int32_t)offset); 967 if ((release >= _memory_span_size) && (_memory_span_size > _memory_map_granularity)) { 968 //Padding is always one span size 969 release += _memory_span_size; 970 } 971 } 972 #if !DISABLE_UNMAP 973 #if PLATFORM_WINDOWS 974 if (!VirtualFree(address, release ? 0 : size, release ? MEM_RELEASE : MEM_DECOMMIT)) { 975 rpmalloc_assert(0, "Failed to unmap virtual memory block"); 976 } 977 #else 978 if (release) { 979 if (munmap(address, release)) { 980 rpmalloc_assert(0, "Failed to unmap virtual memory block"); 981 } 982 } else { 983 #if defined(MADV_FREE_REUSABLE) 984 int ret; 985 while ((ret = madvise(address, size, MADV_FREE_REUSABLE)) == -1 && (errno == EAGAIN)) 986 errno = 0; 987 if ((ret == -1) && (errno != 0)) { 988 #elif defined(MADV_DONTNEED) 989 if (madvise(address, size, MADV_DONTNEED)) { 990 #elif defined(MADV_PAGEOUT) 991 if (madvise(address, size, MADV_PAGEOUT)) { 992 #elif defined(MADV_FREE) 993 if (madvise(address, size, MADV_FREE)) { 994 #else 995 if (posix_madvise(address, size, POSIX_MADV_DONTNEED)) { 996 #endif 997 rpmalloc_assert(0, "Failed to madvise virtual memory block as free"); 998 } 999 } 1000 #endif 1001 #endif 1002 if (release) 1003 _rpmalloc_stat_sub(&_mapped_pages_os, release >> _memory_page_size_shift); 1004 } 1005 1006 static void 1007 _rpmalloc_span_mark_as_subspan_unless_master(span_t* master, span_t* subspan, size_t span_count); 1008 1009 //! Use global reserved spans to fulfill a memory map request (reserve size must be checked by caller) 1010 static span_t* 1011 _rpmalloc_global_get_reserved_spans(size_t span_count) { 1012 span_t* span = _memory_global_reserve; 1013 _rpmalloc_span_mark_as_subspan_unless_master(_memory_global_reserve_master, span, span_count); 1014 _memory_global_reserve_count -= span_count; 1015 if (_memory_global_reserve_count) 1016 _memory_global_reserve = (span_t*)pointer_offset(span, span_count << _memory_span_size_shift); 1017 else 1018 _memory_global_reserve = 0; 1019 return span; 1020 } 1021 1022 //! Store the given spans as global reserve (must only be called from within new heap allocation, not thread safe) 1023 static void 1024 _rpmalloc_global_set_reserved_spans(span_t* master, span_t* reserve, size_t reserve_span_count) { 1025 _memory_global_reserve_master = master; 1026 _memory_global_reserve_count = reserve_span_count; 1027 _memory_global_reserve = reserve; 1028 } 1029 1030 1031 //////////// 1032 /// 1033 /// Span linked list management 1034 /// 1035 ////// 1036 1037 //! Add a span to double linked list at the head 1038 static void 1039 _rpmalloc_span_double_link_list_add(span_t** head, span_t* span) { 1040 if (*head) 1041 (*head)->prev = span; 1042 span->next = *head; 1043 *head = span; 1044 } 1045 1046 //! Pop head span from double linked list 1047 static void 1048 _rpmalloc_span_double_link_list_pop_head(span_t** head, span_t* span) { 1049 rpmalloc_assert(*head == span, "Linked list corrupted"); 1050 span = *head; 1051 *head = span->next; 1052 } 1053 1054 //! Remove a span from double linked list 1055 static void 1056 _rpmalloc_span_double_link_list_remove(span_t** head, span_t* span) { 1057 rpmalloc_assert(*head, "Linked list corrupted"); 1058 if (*head == span) { 1059 *head = span->next; 1060 } else { 1061 span_t* next_span = span->next; 1062 span_t* prev_span = span->prev; 1063 prev_span->next = next_span; 1064 if (EXPECTED(next_span != 0)) 1065 next_span->prev = prev_span; 1066 } 1067 } 1068 1069 1070 //////////// 1071 /// 1072 /// Span control 1073 /// 1074 ////// 1075 1076 static void 1077 _rpmalloc_heap_cache_insert(heap_t* heap, span_t* span); 1078 1079 static void 1080 _rpmalloc_heap_finalize(heap_t* heap); 1081 1082 static void 1083 _rpmalloc_heap_set_reserved_spans(heap_t* heap, span_t* master, span_t* reserve, size_t reserve_span_count); 1084 1085 //! Declare the span to be a subspan and store distance from master span and span count 1086 static void 1087 _rpmalloc_span_mark_as_subspan_unless_master(span_t* master, span_t* subspan, size_t span_count) { 1088 rpmalloc_assert((subspan != master) || (subspan->flags & SPAN_FLAG_MASTER), "Span master pointer and/or flag mismatch"); 1089 if (subspan != master) { 1090 subspan->flags = SPAN_FLAG_SUBSPAN; 1091 subspan->offset_from_master = (uint32_t)((uintptr_t)pointer_diff(subspan, master) >> _memory_span_size_shift); 1092 subspan->align_offset = 0; 1093 } 1094 subspan->span_count = (uint32_t)span_count; 1095 } 1096 1097 //! Use reserved spans to fulfill a memory map request (reserve size must be checked by caller) 1098 static span_t* 1099 _rpmalloc_span_map_from_reserve(heap_t* heap, size_t span_count) { 1100 //Update the heap span reserve 1101 span_t* span = heap->span_reserve; 1102 heap->span_reserve = (span_t*)pointer_offset(span, span_count * _memory_span_size); 1103 heap->spans_reserved -= (uint32_t)span_count; 1104 1105 _rpmalloc_span_mark_as_subspan_unless_master(heap->span_reserve_master, span, span_count); 1106 if (span_count <= LARGE_CLASS_COUNT) 1107 _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_from_reserved); 1108 1109 return span; 1110 } 1111 1112 //! Get the aligned number of spans to map in based on wanted count, configured mapping granularity and the page size 1113 static size_t 1114 _rpmalloc_span_align_count(size_t span_count) { 1115 size_t request_count = (span_count > _memory_span_map_count) ? span_count : _memory_span_map_count; 1116 if ((_memory_page_size > _memory_span_size) && ((request_count * _memory_span_size) % _memory_page_size)) 1117 request_count += _memory_span_map_count - (request_count % _memory_span_map_count); 1118 return request_count; 1119 } 1120 1121 //! Setup a newly mapped span 1122 static void 1123 _rpmalloc_span_initialize(span_t* span, size_t total_span_count, size_t span_count, size_t align_offset) { 1124 span->total_spans = (uint32_t)total_span_count; 1125 span->span_count = (uint32_t)span_count; 1126 span->align_offset = (uint32_t)align_offset; 1127 span->flags = SPAN_FLAG_MASTER; 1128 atomic_store32(&span->remaining_spans, (int32_t)total_span_count); 1129 } 1130 1131 static void 1132 _rpmalloc_span_unmap(span_t* span); 1133 1134 //! Map an aligned set of spans, taking configured mapping granularity and the page size into account 1135 static span_t* 1136 _rpmalloc_span_map_aligned_count(heap_t* heap, size_t span_count) { 1137 //If we already have some, but not enough, reserved spans, release those to heap cache and map a new 1138 //full set of spans. Otherwise we would waste memory if page size > span size (huge pages) 1139 size_t aligned_span_count = _rpmalloc_span_align_count(span_count); 1140 size_t align_offset = 0; 1141 span_t* span = (span_t*)_rpmalloc_mmap(aligned_span_count * _memory_span_size, &align_offset); 1142 if (!span) 1143 return 0; 1144 _rpmalloc_span_initialize(span, aligned_span_count, span_count, align_offset); 1145 _rpmalloc_stat_inc(&_master_spans); 1146 if (span_count <= LARGE_CLASS_COUNT) 1147 _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_map_calls); 1148 if (aligned_span_count > span_count) { 1149 span_t* reserved_spans = (span_t*)pointer_offset(span, span_count * _memory_span_size); 1150 size_t reserved_count = aligned_span_count - span_count; 1151 if (heap->spans_reserved) { 1152 _rpmalloc_span_mark_as_subspan_unless_master(heap->span_reserve_master, heap->span_reserve, heap->spans_reserved); 1153 _rpmalloc_heap_cache_insert(heap, heap->span_reserve); 1154 } 1155 if (reserved_count > _memory_heap_reserve_count) { 1156 // If huge pages or eager spam map count, the global reserve spin lock is held by caller, _rpmalloc_span_map 1157 rpmalloc_assert(atomic_load32(&_memory_global_lock) == 1, "Global spin lock not held as expected"); 1158 size_t remain_count = reserved_count - _memory_heap_reserve_count; 1159 reserved_count = _memory_heap_reserve_count; 1160 span_t* remain_span = (span_t*)pointer_offset(reserved_spans, reserved_count * _memory_span_size); 1161 if (_memory_global_reserve) { 1162 _rpmalloc_span_mark_as_subspan_unless_master(_memory_global_reserve_master, _memory_global_reserve, _memory_global_reserve_count); 1163 _rpmalloc_span_unmap(_memory_global_reserve); 1164 } 1165 _rpmalloc_global_set_reserved_spans(span, remain_span, remain_count); 1166 } 1167 _rpmalloc_heap_set_reserved_spans(heap, span, reserved_spans, reserved_count); 1168 } 1169 return span; 1170 } 1171 1172 //! Map in memory pages for the given number of spans (or use previously reserved pages) 1173 static span_t* 1174 _rpmalloc_span_map(heap_t* heap, size_t span_count) { 1175 if (span_count <= heap->spans_reserved) 1176 return _rpmalloc_span_map_from_reserve(heap, span_count); 1177 span_t* span = 0; 1178 int use_global_reserve = (_memory_page_size > _memory_span_size) || (_memory_span_map_count > _memory_heap_reserve_count); 1179 if (use_global_reserve) { 1180 // If huge pages, make sure only one thread maps more memory to avoid bloat 1181 while (!atomic_cas32_acquire(&_memory_global_lock, 1, 0)) 1182 _rpmalloc_spin(); 1183 if (_memory_global_reserve_count >= span_count) { 1184 size_t reserve_count = (!heap->spans_reserved ? _memory_heap_reserve_count : span_count); 1185 if (_memory_global_reserve_count < reserve_count) 1186 reserve_count = _memory_global_reserve_count; 1187 span = _rpmalloc_global_get_reserved_spans(reserve_count); 1188 if (span) { 1189 if (reserve_count > span_count) { 1190 span_t* reserved_span = (span_t*)pointer_offset(span, span_count << _memory_span_size_shift); 1191 _rpmalloc_heap_set_reserved_spans(heap, _memory_global_reserve_master, reserved_span, reserve_count - span_count); 1192 } 1193 // Already marked as subspan in _rpmalloc_global_get_reserved_spans 1194 span->span_count = (uint32_t)span_count; 1195 } 1196 } 1197 } 1198 if (!span) 1199 span = _rpmalloc_span_map_aligned_count(heap, span_count); 1200 if (use_global_reserve) 1201 atomic_store32_release(&_memory_global_lock, 0); 1202 return span; 1203 } 1204 1205 //! Unmap memory pages for the given number of spans (or mark as unused if no partial unmappings) 1206 static void 1207 _rpmalloc_span_unmap(span_t* span) { 1208 rpmalloc_assert((span->flags & SPAN_FLAG_MASTER) || (span->flags & SPAN_FLAG_SUBSPAN), "Span flag corrupted"); 1209 rpmalloc_assert(!(span->flags & SPAN_FLAG_MASTER) || !(span->flags & SPAN_FLAG_SUBSPAN), "Span flag corrupted"); 1210 1211 int is_master = !!(span->flags & SPAN_FLAG_MASTER); 1212 span_t* master = is_master ? span : ((span_t*)pointer_offset(span, -(intptr_t)((uintptr_t)span->offset_from_master * _memory_span_size))); 1213 rpmalloc_assert(is_master || (span->flags & SPAN_FLAG_SUBSPAN), "Span flag corrupted"); 1214 rpmalloc_assert(master->flags & SPAN_FLAG_MASTER, "Span flag corrupted"); 1215 1216 size_t span_count = span->span_count; 1217 if (!is_master) { 1218 //Directly unmap subspans (unless huge pages, in which case we defer and unmap entire page range with master) 1219 rpmalloc_assert(span->align_offset == 0, "Span align offset corrupted"); 1220 if (_memory_span_size >= _memory_page_size) 1221 _rpmalloc_unmap(span, span_count * _memory_span_size, 0, 0); 1222 } else { 1223 //Special double flag to denote an unmapped master 1224 //It must be kept in memory since span header must be used 1225 span->flags |= SPAN_FLAG_MASTER | SPAN_FLAG_SUBSPAN | SPAN_FLAG_UNMAPPED_MASTER; 1226 _rpmalloc_stat_add(&_unmapped_master_spans, 1); 1227 } 1228 1229 if (atomic_add32(&master->remaining_spans, -(int32_t)span_count) <= 0) { 1230 //Everything unmapped, unmap the master span with release flag to unmap the entire range of the super span 1231 rpmalloc_assert(!!(master->flags & SPAN_FLAG_MASTER) && !!(master->flags & SPAN_FLAG_SUBSPAN), "Span flag corrupted"); 1232 size_t unmap_count = master->span_count; 1233 if (_memory_span_size < _memory_page_size) 1234 unmap_count = master->total_spans; 1235 _rpmalloc_stat_sub(&_master_spans, 1); 1236 _rpmalloc_stat_sub(&_unmapped_master_spans, 1); 1237 _rpmalloc_unmap(master, unmap_count * _memory_span_size, master->align_offset, (size_t)master->total_spans * _memory_span_size); 1238 } 1239 } 1240 1241 //! Move the span (used for small or medium allocations) to the heap thread cache 1242 static void 1243 _rpmalloc_span_release_to_cache(heap_t* heap, span_t* span) { 1244 rpmalloc_assert(heap == span->heap, "Span heap pointer corrupted"); 1245 rpmalloc_assert(span->size_class < SIZE_CLASS_COUNT, "Invalid span size class"); 1246 rpmalloc_assert(span->span_count == 1, "Invalid span count"); 1247 #if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS 1248 atomic_decr32(&heap->span_use[0].current); 1249 #endif 1250 _rpmalloc_stat_dec(&heap->size_class_use[span->size_class].spans_current); 1251 if (!heap->finalize) { 1252 _rpmalloc_stat_inc(&heap->span_use[0].spans_to_cache); 1253 _rpmalloc_stat_inc(&heap->size_class_use[span->size_class].spans_to_cache); 1254 if (heap->size_class[span->size_class].cache) 1255 _rpmalloc_heap_cache_insert(heap, heap->size_class[span->size_class].cache); 1256 heap->size_class[span->size_class].cache = span; 1257 } else { 1258 _rpmalloc_span_unmap(span); 1259 } 1260 } 1261 1262 //! Initialize a (partial) free list up to next system memory page, while reserving the first block 1263 //! as allocated, returning number of blocks in list 1264 static uint32_t 1265 free_list_partial_init(void** list, void** first_block, void* page_start, void* block_start, uint32_t block_count, uint32_t block_size) { 1266 rpmalloc_assert(block_count, "Internal failure"); 1267 *first_block = block_start; 1268 if (block_count > 1) { 1269 void* free_block = pointer_offset(block_start, block_size); 1270 void* block_end = pointer_offset(block_start, (size_t)block_size * block_count); 1271 //If block size is less than half a memory page, bound init to next memory page boundary 1272 if (block_size < (_memory_page_size >> 1)) { 1273 void* page_end = pointer_offset(page_start, _memory_page_size); 1274 if (page_end < block_end) 1275 block_end = page_end; 1276 } 1277 *list = free_block; 1278 block_count = 2; 1279 void* next_block = pointer_offset(free_block, block_size); 1280 while (next_block < block_end) { 1281 *((void**)free_block) = next_block; 1282 free_block = next_block; 1283 ++block_count; 1284 next_block = pointer_offset(next_block, block_size); 1285 } 1286 *((void**)free_block) = 0; 1287 } else { 1288 *list = 0; 1289 } 1290 return block_count; 1291 } 1292 1293 //! Initialize an unused span (from cache or mapped) to be new active span, putting the initial free list in heap class free list 1294 static void* 1295 _rpmalloc_span_initialize_new(heap_t* heap, heap_size_class_t* heap_size_class, span_t* span, uint32_t class_idx) { 1296 rpmalloc_assert(span->span_count == 1, "Internal failure"); 1297 size_class_t* size_class = _memory_size_class + class_idx; 1298 span->size_class = class_idx; 1299 span->heap = heap; 1300 span->flags &= ~SPAN_FLAG_ALIGNED_BLOCKS; 1301 span->block_size = size_class->block_size; 1302 span->block_count = size_class->block_count; 1303 span->free_list = 0; 1304 span->list_size = 0; 1305 atomic_store_ptr_release(&span->free_list_deferred, 0); 1306 1307 //Setup free list. Only initialize one system page worth of free blocks in list 1308 void* block; 1309 span->free_list_limit = free_list_partial_init(&heap_size_class->free_list, &block, 1310 span, pointer_offset(span, SPAN_HEADER_SIZE), size_class->block_count, size_class->block_size); 1311 //Link span as partial if there remains blocks to be initialized as free list, or full if fully initialized 1312 if (span->free_list_limit < span->block_count) { 1313 _rpmalloc_span_double_link_list_add(&heap_size_class->partial_span, span); 1314 span->used_count = span->free_list_limit; 1315 } else { 1316 #if RPMALLOC_FIRST_CLASS_HEAPS 1317 _rpmalloc_span_double_link_list_add(&heap->full_span[class_idx], span); 1318 #endif 1319 ++heap->full_span_count; 1320 span->used_count = span->block_count; 1321 } 1322 return block; 1323 } 1324 1325 static void 1326 _rpmalloc_span_extract_free_list_deferred(span_t* span) { 1327 // We need acquire semantics on the CAS operation since we are interested in the list size 1328 // Refer to _rpmalloc_deallocate_defer_small_or_medium for further comments on this dependency 1329 do { 1330 span->free_list = atomic_exchange_ptr_acquire(&span->free_list_deferred, INVALID_POINTER); 1331 } while (span->free_list == INVALID_POINTER); 1332 span->used_count -= span->list_size; 1333 span->list_size = 0; 1334 atomic_store_ptr_release(&span->free_list_deferred, 0); 1335 } 1336 1337 static int 1338 _rpmalloc_span_is_fully_utilized(span_t* span) { 1339 rpmalloc_assert(span->free_list_limit <= span->block_count, "Span free list corrupted"); 1340 return !span->free_list && (span->free_list_limit >= span->block_count); 1341 } 1342 1343 static int 1344 _rpmalloc_span_finalize(heap_t* heap, size_t iclass, span_t* span, span_t** list_head) { 1345 void* free_list = heap->size_class[iclass].free_list; 1346 span_t* class_span = (span_t*)((uintptr_t)free_list & _memory_span_mask); 1347 if (span == class_span) { 1348 // Adopt the heap class free list back into the span free list 1349 void* block = span->free_list; 1350 void* last_block = 0; 1351 while (block) { 1352 last_block = block; 1353 block = *((void**)block); 1354 } 1355 uint32_t free_count = 0; 1356 block = free_list; 1357 while (block) { 1358 ++free_count; 1359 block = *((void**)block); 1360 } 1361 if (last_block) { 1362 *((void**)last_block) = free_list; 1363 } else { 1364 span->free_list = free_list; 1365 } 1366 heap->size_class[iclass].free_list = 0; 1367 span->used_count -= free_count; 1368 } 1369 //If this assert triggers you have memory leaks 1370 rpmalloc_assert(span->list_size == span->used_count, "Memory leak detected"); 1371 if (span->list_size == span->used_count) { 1372 _rpmalloc_stat_dec(&heap->span_use[0].current); 1373 _rpmalloc_stat_dec(&heap->size_class_use[iclass].spans_current); 1374 // This function only used for spans in double linked lists 1375 if (list_head) 1376 _rpmalloc_span_double_link_list_remove(list_head, span); 1377 _rpmalloc_span_unmap(span); 1378 return 1; 1379 } 1380 return 0; 1381 } 1382 1383 1384 //////////// 1385 /// 1386 /// Global cache 1387 /// 1388 ////// 1389 1390 #if ENABLE_GLOBAL_CACHE 1391 1392 //! Finalize a global cache 1393 static void 1394 _rpmalloc_global_cache_finalize(global_cache_t* cache) { 1395 while (!atomic_cas32_acquire(&cache->lock, 1, 0)) 1396 _rpmalloc_spin(); 1397 1398 for (size_t ispan = 0; ispan < cache->count; ++ispan) 1399 _rpmalloc_span_unmap(cache->span[ispan]); 1400 cache->count = 0; 1401 1402 while (cache->overflow) { 1403 span_t* span = cache->overflow; 1404 cache->overflow = span->next; 1405 _rpmalloc_span_unmap(span); 1406 } 1407 1408 atomic_store32_release(&cache->lock, 0); 1409 } 1410 1411 static void 1412 _rpmalloc_global_cache_insert_spans(span_t** span, size_t span_count, size_t count) { 1413 const size_t cache_limit = (span_count == 1) ? 1414 GLOBAL_CACHE_MULTIPLIER * MAX_THREAD_SPAN_CACHE : 1415 GLOBAL_CACHE_MULTIPLIER * (MAX_THREAD_SPAN_LARGE_CACHE - (span_count >> 1)); 1416 1417 global_cache_t* cache = &_memory_span_cache[span_count - 1]; 1418 1419 size_t insert_count = count; 1420 while (!atomic_cas32_acquire(&cache->lock, 1, 0)) 1421 _rpmalloc_spin(); 1422 1423 #if ENABLE_STATISTICS 1424 cache->insert_count += count; 1425 #endif 1426 if ((cache->count + insert_count) > cache_limit) 1427 insert_count = cache_limit - cache->count; 1428 1429 memcpy(cache->span + cache->count, span, sizeof(span_t*) * insert_count); 1430 cache->count += (uint32_t)insert_count; 1431 1432 #if ENABLE_UNLIMITED_CACHE 1433 while (insert_count < count) { 1434 #else 1435 // Enable unlimited cache if huge pages, or we will leak since it is unlikely that an entire huge page 1436 // will be unmapped, and we're unable to partially decommit a huge page 1437 while ((_memory_page_size > _memory_span_size) && (insert_count < count)) { 1438 #endif 1439 span_t* current_span = span[insert_count++]; 1440 current_span->next = cache->overflow; 1441 cache->overflow = current_span; 1442 } 1443 atomic_store32_release(&cache->lock, 0); 1444 1445 span_t* keep = 0; 1446 for (size_t ispan = insert_count; ispan < count; ++ispan) { 1447 span_t* current_span = span[ispan]; 1448 // Keep master spans that has remaining subspans to avoid dangling them 1449 if ((current_span->flags & SPAN_FLAG_MASTER) && 1450 (atomic_load32(¤t_span->remaining_spans) > (int32_t)current_span->span_count)) { 1451 current_span->next = keep; 1452 keep = current_span; 1453 } else { 1454 _rpmalloc_span_unmap(current_span); 1455 } 1456 } 1457 1458 if (keep) { 1459 while (!atomic_cas32_acquire(&cache->lock, 1, 0)) 1460 _rpmalloc_spin(); 1461 1462 size_t islot = 0; 1463 while (keep) { 1464 for (; islot < cache->count; ++islot) { 1465 span_t* current_span = cache->span[islot]; 1466 if (!(current_span->flags & SPAN_FLAG_MASTER) || ((current_span->flags & SPAN_FLAG_MASTER) && 1467 (atomic_load32(¤t_span->remaining_spans) <= (int32_t)current_span->span_count))) { 1468 _rpmalloc_span_unmap(current_span); 1469 cache->span[islot] = keep; 1470 break; 1471 } 1472 } 1473 if (islot == cache->count) 1474 break; 1475 keep = keep->next; 1476 } 1477 1478 if (keep) { 1479 span_t* tail = keep; 1480 while (tail->next) 1481 tail = tail->next; 1482 tail->next = cache->overflow; 1483 cache->overflow = keep; 1484 } 1485 1486 atomic_store32_release(&cache->lock, 0); 1487 } 1488 } 1489 1490 static size_t 1491 _rpmalloc_global_cache_extract_spans(span_t** span, size_t span_count, size_t count) { 1492 global_cache_t* cache = &_memory_span_cache[span_count - 1]; 1493 1494 size_t extract_count = 0; 1495 while (!atomic_cas32_acquire(&cache->lock, 1, 0)) 1496 _rpmalloc_spin(); 1497 1498 #if ENABLE_STATISTICS 1499 cache->extract_count += count; 1500 #endif 1501 size_t want = count - extract_count; 1502 if (want > cache->count) 1503 want = cache->count; 1504 1505 memcpy(span + extract_count, cache->span + (cache->count - want), sizeof(span_t*) * want); 1506 cache->count -= (uint32_t)want; 1507 extract_count += want; 1508 1509 while ((extract_count < count) && cache->overflow) { 1510 span_t* current_span = cache->overflow; 1511 span[extract_count++] = current_span; 1512 cache->overflow = current_span->next; 1513 } 1514 1515 #if ENABLE_ASSERTS 1516 for (size_t ispan = 0; ispan < extract_count; ++ispan) { 1517 assert(span[ispan]->span_count == span_count); 1518 } 1519 #endif 1520 1521 atomic_store32_release(&cache->lock, 0); 1522 1523 return extract_count; 1524 } 1525 1526 #endif 1527 1528 //////////// 1529 /// 1530 /// Heap control 1531 /// 1532 ////// 1533 1534 static void _rpmalloc_deallocate_huge(span_t*); 1535 1536 //! Store the given spans as reserve in the given heap 1537 static void 1538 _rpmalloc_heap_set_reserved_spans(heap_t* heap, span_t* master, span_t* reserve, size_t reserve_span_count) { 1539 heap->span_reserve_master = master; 1540 heap->span_reserve = reserve; 1541 heap->spans_reserved = (uint32_t)reserve_span_count; 1542 } 1543 1544 //! Adopt the deferred span cache list, optionally extracting the first single span for immediate re-use 1545 static void 1546 _rpmalloc_heap_cache_adopt_deferred(heap_t* heap, span_t** single_span) { 1547 span_t* span = (span_t*)((void*)atomic_exchange_ptr_acquire(&heap->span_free_deferred, 0)); 1548 while (span) { 1549 span_t* next_span = (span_t*)span->free_list; 1550 rpmalloc_assert(span->heap == heap, "Span heap pointer corrupted"); 1551 if (EXPECTED(span->size_class < SIZE_CLASS_COUNT)) { 1552 rpmalloc_assert(heap->full_span_count, "Heap span counter corrupted"); 1553 --heap->full_span_count; 1554 _rpmalloc_stat_dec(&heap->span_use[0].spans_deferred); 1555 #if RPMALLOC_FIRST_CLASS_HEAPS 1556 _rpmalloc_span_double_link_list_remove(&heap->full_span[span->size_class], span); 1557 #endif 1558 _rpmalloc_stat_dec(&heap->span_use[0].current); 1559 _rpmalloc_stat_dec(&heap->size_class_use[span->size_class].spans_current); 1560 if (single_span && !*single_span) 1561 *single_span = span; 1562 else 1563 _rpmalloc_heap_cache_insert(heap, span); 1564 } else { 1565 if (span->size_class == SIZE_CLASS_HUGE) { 1566 _rpmalloc_deallocate_huge(span); 1567 } else { 1568 rpmalloc_assert(span->size_class == SIZE_CLASS_LARGE, "Span size class invalid"); 1569 rpmalloc_assert(heap->full_span_count, "Heap span counter corrupted"); 1570 --heap->full_span_count; 1571 #if RPMALLOC_FIRST_CLASS_HEAPS 1572 _rpmalloc_span_double_link_list_remove(&heap->large_huge_span, span); 1573 #endif 1574 uint32_t idx = span->span_count - 1; 1575 _rpmalloc_stat_dec(&heap->span_use[idx].spans_deferred); 1576 _rpmalloc_stat_dec(&heap->span_use[idx].current); 1577 if (!idx && single_span && !*single_span) 1578 *single_span = span; 1579 else 1580 _rpmalloc_heap_cache_insert(heap, span); 1581 } 1582 } 1583 span = next_span; 1584 } 1585 } 1586 1587 static void 1588 _rpmalloc_heap_unmap(heap_t* heap) { 1589 if (!heap->master_heap) { 1590 if ((heap->finalize > 1) && !atomic_load32(&heap->child_count)) { 1591 span_t* span = (span_t*)((uintptr_t)heap & _memory_span_mask); 1592 _rpmalloc_span_unmap(span); 1593 } 1594 } else { 1595 if (atomic_decr32(&heap->master_heap->child_count) == 0) { 1596 _rpmalloc_heap_unmap(heap->master_heap); 1597 } 1598 } 1599 } 1600 1601 static void 1602 _rpmalloc_heap_global_finalize(heap_t* heap) { 1603 if (heap->finalize++ > 1) { 1604 --heap->finalize; 1605 return; 1606 } 1607 1608 _rpmalloc_heap_finalize(heap); 1609 1610 #if ENABLE_THREAD_CACHE 1611 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) { 1612 span_cache_t* span_cache; 1613 if (!iclass) 1614 span_cache = &heap->span_cache; 1615 else 1616 span_cache = (span_cache_t*)(heap->span_large_cache + (iclass - 1)); 1617 for (size_t ispan = 0; ispan < span_cache->count; ++ispan) 1618 _rpmalloc_span_unmap(span_cache->span[ispan]); 1619 span_cache->count = 0; 1620 } 1621 #endif 1622 1623 if (heap->full_span_count) { 1624 --heap->finalize; 1625 return; 1626 } 1627 1628 for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) { 1629 if (heap->size_class[iclass].free_list || heap->size_class[iclass].partial_span) { 1630 --heap->finalize; 1631 return; 1632 } 1633 } 1634 //Heap is now completely free, unmap and remove from heap list 1635 size_t list_idx = (size_t)heap->id % HEAP_ARRAY_SIZE; 1636 heap_t* list_heap = _memory_heaps[list_idx]; 1637 if (list_heap == heap) { 1638 _memory_heaps[list_idx] = heap->next_heap; 1639 } else { 1640 while (list_heap->next_heap != heap) 1641 list_heap = list_heap->next_heap; 1642 list_heap->next_heap = heap->next_heap; 1643 } 1644 1645 _rpmalloc_heap_unmap(heap); 1646 } 1647 1648 //! Insert a single span into thread heap cache, releasing to global cache if overflow 1649 static void 1650 _rpmalloc_heap_cache_insert(heap_t* heap, span_t* span) { 1651 if (UNEXPECTED(heap->finalize != 0)) { 1652 _rpmalloc_span_unmap(span); 1653 _rpmalloc_heap_global_finalize(heap); 1654 return; 1655 } 1656 #if ENABLE_THREAD_CACHE 1657 size_t span_count = span->span_count; 1658 _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_to_cache); 1659 if (span_count == 1) { 1660 span_cache_t* span_cache = &heap->span_cache; 1661 span_cache->span[span_cache->count++] = span; 1662 if (span_cache->count == MAX_THREAD_SPAN_CACHE) { 1663 const size_t remain_count = MAX_THREAD_SPAN_CACHE - THREAD_SPAN_CACHE_TRANSFER; 1664 #if ENABLE_GLOBAL_CACHE 1665 _rpmalloc_stat_add64(&heap->thread_to_global, THREAD_SPAN_CACHE_TRANSFER * _memory_span_size); 1666 _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_to_global, THREAD_SPAN_CACHE_TRANSFER); 1667 _rpmalloc_global_cache_insert_spans(span_cache->span + remain_count, span_count, THREAD_SPAN_CACHE_TRANSFER); 1668 #else 1669 for (size_t ispan = 0; ispan < THREAD_SPAN_CACHE_TRANSFER; ++ispan) 1670 _rpmalloc_span_unmap(span_cache->span[remain_count + ispan]); 1671 #endif 1672 span_cache->count = remain_count; 1673 } 1674 } else { 1675 size_t cache_idx = span_count - 2; 1676 span_large_cache_t* span_cache = heap->span_large_cache + cache_idx; 1677 span_cache->span[span_cache->count++] = span; 1678 const size_t cache_limit = (MAX_THREAD_SPAN_LARGE_CACHE - (span_count >> 1)); 1679 if (span_cache->count == cache_limit) { 1680 const size_t transfer_limit = 2 + (cache_limit >> 2); 1681 const size_t transfer_count = (THREAD_SPAN_LARGE_CACHE_TRANSFER <= transfer_limit ? THREAD_SPAN_LARGE_CACHE_TRANSFER : transfer_limit); 1682 const size_t remain_count = cache_limit - transfer_count; 1683 #if ENABLE_GLOBAL_CACHE 1684 _rpmalloc_stat_add64(&heap->thread_to_global, transfer_count * span_count * _memory_span_size); 1685 _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_to_global, transfer_count); 1686 _rpmalloc_global_cache_insert_spans(span_cache->span + remain_count, span_count, transfer_count); 1687 #else 1688 for (size_t ispan = 0; ispan < transfer_count; ++ispan) 1689 _rpmalloc_span_unmap(span_cache->span[remain_count + ispan]); 1690 #endif 1691 span_cache->count = remain_count; 1692 } 1693 } 1694 #else 1695 (void)sizeof(heap); 1696 _rpmalloc_span_unmap(span); 1697 #endif 1698 } 1699 1700 //! Extract the given number of spans from the different cache levels 1701 static span_t* 1702 _rpmalloc_heap_thread_cache_extract(heap_t* heap, size_t span_count) { 1703 span_t* span = 0; 1704 #if ENABLE_THREAD_CACHE 1705 span_cache_t* span_cache; 1706 if (span_count == 1) 1707 span_cache = &heap->span_cache; 1708 else 1709 span_cache = (span_cache_t*)(heap->span_large_cache + (span_count - 2)); 1710 if (span_cache->count) { 1711 _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_from_cache); 1712 return span_cache->span[--span_cache->count]; 1713 } 1714 #endif 1715 return span; 1716 } 1717 1718 static span_t* 1719 _rpmalloc_heap_thread_cache_deferred_extract(heap_t* heap, size_t span_count) { 1720 span_t* span = 0; 1721 if (span_count == 1) { 1722 _rpmalloc_heap_cache_adopt_deferred(heap, &span); 1723 } else { 1724 _rpmalloc_heap_cache_adopt_deferred(heap, 0); 1725 span = _rpmalloc_heap_thread_cache_extract(heap, span_count); 1726 } 1727 return span; 1728 } 1729 1730 static span_t* 1731 _rpmalloc_heap_reserved_extract(heap_t* heap, size_t span_count) { 1732 if (heap->spans_reserved >= span_count) 1733 return _rpmalloc_span_map(heap, span_count); 1734 return 0; 1735 } 1736 1737 //! Extract a span from the global cache 1738 static span_t* 1739 _rpmalloc_heap_global_cache_extract(heap_t* heap, size_t span_count) { 1740 #if ENABLE_GLOBAL_CACHE 1741 #if ENABLE_THREAD_CACHE 1742 span_cache_t* span_cache; 1743 size_t wanted_count; 1744 if (span_count == 1) { 1745 span_cache = &heap->span_cache; 1746 wanted_count = THREAD_SPAN_CACHE_TRANSFER; 1747 } else { 1748 span_cache = (span_cache_t*)(heap->span_large_cache + (span_count - 2)); 1749 wanted_count = THREAD_SPAN_LARGE_CACHE_TRANSFER; 1750 } 1751 span_cache->count = _rpmalloc_global_cache_extract_spans(span_cache->span, span_count, wanted_count); 1752 if (span_cache->count) { 1753 _rpmalloc_stat_add64(&heap->global_to_thread, span_count * span_cache->count * _memory_span_size); 1754 _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_from_global, span_cache->count); 1755 return span_cache->span[--span_cache->count]; 1756 } 1757 #else 1758 span_t* span = 0; 1759 size_t count = _rpmalloc_global_cache_extract_spans(&span, span_count, 1); 1760 if (count) { 1761 _rpmalloc_stat_add64(&heap->global_to_thread, span_count * count * _memory_span_size); 1762 _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_from_global, count); 1763 return span; 1764 } 1765 #endif 1766 #endif 1767 (void)sizeof(heap); 1768 (void)sizeof(span_count); 1769 return 0; 1770 } 1771 1772 static void 1773 _rpmalloc_inc_span_statistics(heap_t* heap, size_t span_count, uint32_t class_idx) { 1774 (void)sizeof(heap); 1775 (void)sizeof(span_count); 1776 (void)sizeof(class_idx); 1777 #if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS 1778 uint32_t idx = (uint32_t)span_count - 1; 1779 uint32_t current_count = (uint32_t)atomic_incr32(&heap->span_use[idx].current); 1780 if (current_count > (uint32_t)atomic_load32(&heap->span_use[idx].high)) 1781 atomic_store32(&heap->span_use[idx].high, (int32_t)current_count); 1782 _rpmalloc_stat_add_peak(&heap->size_class_use[class_idx].spans_current, 1, heap->size_class_use[class_idx].spans_peak); 1783 #endif 1784 } 1785 1786 //! Get a span from one of the cache levels (thread cache, reserved, global cache) or fallback to mapping more memory 1787 static span_t* 1788 _rpmalloc_heap_extract_new_span(heap_t* heap, heap_size_class_t* heap_size_class, size_t span_count, uint32_t class_idx) { 1789 span_t* span; 1790 #if ENABLE_THREAD_CACHE 1791 if (heap_size_class && heap_size_class->cache) { 1792 span = heap_size_class->cache; 1793 heap_size_class->cache = (heap->span_cache.count ? heap->span_cache.span[--heap->span_cache.count] : 0); 1794 _rpmalloc_inc_span_statistics(heap, span_count, class_idx); 1795 return span; 1796 } 1797 #endif 1798 (void)sizeof(class_idx); 1799 // Allow 50% overhead to increase cache hits 1800 size_t base_span_count = span_count; 1801 size_t limit_span_count = (span_count > 2) ? (span_count + (span_count >> 1)) : span_count; 1802 if (limit_span_count > LARGE_CLASS_COUNT) 1803 limit_span_count = LARGE_CLASS_COUNT; 1804 do { 1805 span = _rpmalloc_heap_thread_cache_extract(heap, span_count); 1806 if (EXPECTED(span != 0)) { 1807 _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_cache); 1808 _rpmalloc_inc_span_statistics(heap, span_count, class_idx); 1809 return span; 1810 } 1811 span = _rpmalloc_heap_thread_cache_deferred_extract(heap, span_count); 1812 if (EXPECTED(span != 0)) { 1813 _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_cache); 1814 _rpmalloc_inc_span_statistics(heap, span_count, class_idx); 1815 return span; 1816 } 1817 span = _rpmalloc_heap_reserved_extract(heap, span_count); 1818 if (EXPECTED(span != 0)) { 1819 _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_reserved); 1820 _rpmalloc_inc_span_statistics(heap, span_count, class_idx); 1821 return span; 1822 } 1823 span = _rpmalloc_heap_global_cache_extract(heap, span_count); 1824 if (EXPECTED(span != 0)) { 1825 _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_cache); 1826 _rpmalloc_inc_span_statistics(heap, span_count, class_idx); 1827 return span; 1828 } 1829 ++span_count; 1830 } while (span_count <= limit_span_count); 1831 //Final fallback, map in more virtual memory 1832 span = _rpmalloc_span_map(heap, base_span_count); 1833 _rpmalloc_inc_span_statistics(heap, base_span_count, class_idx); 1834 _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_map_calls); 1835 return span; 1836 } 1837 1838 static void 1839 _rpmalloc_heap_initialize(heap_t* heap) { 1840 memset(heap, 0, sizeof(heap_t)); 1841 //Get a new heap ID 1842 heap->id = 1 + atomic_incr32(&_memory_heap_id); 1843 1844 //Link in heap in heap ID map 1845 size_t list_idx = (size_t)heap->id % HEAP_ARRAY_SIZE; 1846 heap->next_heap = _memory_heaps[list_idx]; 1847 _memory_heaps[list_idx] = heap; 1848 } 1849 1850 static void 1851 _rpmalloc_heap_orphan(heap_t* heap, int first_class) { 1852 heap->owner_thread = (uintptr_t)-1; 1853 #if RPMALLOC_FIRST_CLASS_HEAPS 1854 heap_t** heap_list = (first_class ? &_memory_first_class_orphan_heaps : &_memory_orphan_heaps); 1855 #else 1856 (void)sizeof(first_class); 1857 heap_t** heap_list = &_memory_orphan_heaps; 1858 #endif 1859 heap->next_orphan = *heap_list; 1860 *heap_list = heap; 1861 } 1862 1863 //! Allocate a new heap from newly mapped memory pages 1864 static heap_t* 1865 _rpmalloc_heap_allocate_new(void) { 1866 // Map in pages for a 16 heaps. If page size is greater than required size for this, map a page and 1867 // use first part for heaps and remaining part for spans for allocations. Adds a lot of complexity, 1868 // but saves a lot of memory on systems where page size > 64 spans (4MiB) 1869 size_t heap_size = sizeof(heap_t); 1870 size_t aligned_heap_size = 16 * ((heap_size + 15) / 16); 1871 size_t request_heap_count = 16; 1872 size_t heap_span_count = ((aligned_heap_size * request_heap_count) + sizeof(span_t) + _memory_span_size - 1) / _memory_span_size; 1873 size_t block_size = _memory_span_size * heap_span_count; 1874 size_t span_count = heap_span_count; 1875 span_t* span = 0; 1876 // If there are global reserved spans, use these first 1877 if (_memory_global_reserve_count >= heap_span_count) { 1878 span = _rpmalloc_global_get_reserved_spans(heap_span_count); 1879 } 1880 if (!span) { 1881 if (_memory_page_size > block_size) { 1882 span_count = _memory_page_size / _memory_span_size; 1883 block_size = _memory_page_size; 1884 // If using huge pages, make sure to grab enough heaps to avoid reallocating a huge page just to serve new heaps 1885 size_t possible_heap_count = (block_size - sizeof(span_t)) / aligned_heap_size; 1886 if (possible_heap_count >= (request_heap_count * 16)) 1887 request_heap_count *= 16; 1888 else if (possible_heap_count < request_heap_count) 1889 request_heap_count = possible_heap_count; 1890 heap_span_count = ((aligned_heap_size * request_heap_count) + sizeof(span_t) + _memory_span_size - 1) / _memory_span_size; 1891 } 1892 1893 size_t align_offset = 0; 1894 span = (span_t*)_rpmalloc_mmap(block_size, &align_offset); 1895 if (!span) 1896 return 0; 1897 1898 // Master span will contain the heaps 1899 _rpmalloc_stat_inc(&_master_spans); 1900 _rpmalloc_span_initialize(span, span_count, heap_span_count, align_offset); 1901 } 1902 1903 size_t remain_size = _memory_span_size - sizeof(span_t); 1904 heap_t* heap = (heap_t*)pointer_offset(span, sizeof(span_t)); 1905 _rpmalloc_heap_initialize(heap); 1906 1907 // Put extra heaps as orphans 1908 size_t num_heaps = remain_size / aligned_heap_size; 1909 if (num_heaps < request_heap_count) 1910 num_heaps = request_heap_count; 1911 atomic_store32(&heap->child_count, (int32_t)num_heaps - 1); 1912 heap_t* extra_heap = (heap_t*)pointer_offset(heap, aligned_heap_size); 1913 while (num_heaps > 1) { 1914 _rpmalloc_heap_initialize(extra_heap); 1915 extra_heap->master_heap = heap; 1916 _rpmalloc_heap_orphan(extra_heap, 1); 1917 extra_heap = (heap_t*)pointer_offset(extra_heap, aligned_heap_size); 1918 --num_heaps; 1919 } 1920 1921 if (span_count > heap_span_count) { 1922 // Cap reserved spans 1923 size_t remain_count = span_count - heap_span_count; 1924 size_t reserve_count = (remain_count > _memory_heap_reserve_count ? _memory_heap_reserve_count : remain_count); 1925 span_t* remain_span = (span_t*)pointer_offset(span, heap_span_count * _memory_span_size); 1926 _rpmalloc_heap_set_reserved_spans(heap, span, remain_span, reserve_count); 1927 1928 if (remain_count > reserve_count) { 1929 // Set to global reserved spans 1930 remain_span = (span_t*)pointer_offset(remain_span, reserve_count * _memory_span_size); 1931 reserve_count = remain_count - reserve_count; 1932 _rpmalloc_global_set_reserved_spans(span, remain_span, reserve_count); 1933 } 1934 } 1935 1936 return heap; 1937 } 1938 1939 static heap_t* 1940 _rpmalloc_heap_extract_orphan(heap_t** heap_list) { 1941 heap_t* heap = *heap_list; 1942 *heap_list = (heap ? heap->next_orphan : 0); 1943 return heap; 1944 } 1945 1946 //! Allocate a new heap, potentially reusing a previously orphaned heap 1947 static heap_t* 1948 _rpmalloc_heap_allocate(int first_class) { 1949 heap_t* heap = 0; 1950 while (!atomic_cas32_acquire(&_memory_global_lock, 1, 0)) 1951 _rpmalloc_spin(); 1952 if (first_class == 0) 1953 heap = _rpmalloc_heap_extract_orphan(&_memory_orphan_heaps); 1954 #if RPMALLOC_FIRST_CLASS_HEAPS 1955 if (!heap) 1956 heap = _rpmalloc_heap_extract_orphan(&_memory_first_class_orphan_heaps); 1957 #endif 1958 if (!heap) 1959 heap = _rpmalloc_heap_allocate_new(); 1960 atomic_store32_release(&_memory_global_lock, 0); 1961 _rpmalloc_heap_cache_adopt_deferred(heap, 0); 1962 return heap; 1963 } 1964 1965 static void 1966 _rpmalloc_heap_release(void* heapptr, int first_class, int release_cache) { 1967 heap_t* heap = (heap_t*)heapptr; 1968 if (!heap) 1969 return; 1970 //Release thread cache spans back to global cache 1971 _rpmalloc_heap_cache_adopt_deferred(heap, 0); 1972 if (release_cache || heap->finalize) { 1973 #if ENABLE_THREAD_CACHE 1974 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) { 1975 span_cache_t* span_cache; 1976 if (!iclass) 1977 span_cache = &heap->span_cache; 1978 else 1979 span_cache = (span_cache_t*)(heap->span_large_cache + (iclass - 1)); 1980 if (!span_cache->count) 1981 continue; 1982 #if ENABLE_GLOBAL_CACHE 1983 if (heap->finalize) { 1984 for (size_t ispan = 0; ispan < span_cache->count; ++ispan) 1985 _rpmalloc_span_unmap(span_cache->span[ispan]); 1986 } else { 1987 _rpmalloc_stat_add64(&heap->thread_to_global, span_cache->count * (iclass + 1) * _memory_span_size); 1988 _rpmalloc_stat_add(&heap->span_use[iclass].spans_to_global, span_cache->count); 1989 _rpmalloc_global_cache_insert_spans(span_cache->span, iclass + 1, span_cache->count); 1990 } 1991 #else 1992 for (size_t ispan = 0; ispan < span_cache->count; ++ispan) 1993 _rpmalloc_span_unmap(span_cache->span[ispan]); 1994 #endif 1995 span_cache->count = 0; 1996 } 1997 #endif 1998 } 1999 2000 if (get_thread_heap_raw() == heap) 2001 set_thread_heap(0); 2002 2003 #if ENABLE_STATISTICS 2004 atomic_decr32(&_memory_active_heaps); 2005 rpmalloc_assert(atomic_load32(&_memory_active_heaps) >= 0, "Still active heaps during finalization"); 2006 #endif 2007 2008 // If we are forcibly terminating with _exit the state of the 2009 // lock atomic is unknown and it's best to just go ahead and exit 2010 if (get_thread_id() != _rpmalloc_main_thread_id) { 2011 while (!atomic_cas32_acquire(&_memory_global_lock, 1, 0)) 2012 _rpmalloc_spin(); 2013 } 2014 _rpmalloc_heap_orphan(heap, first_class); 2015 atomic_store32_release(&_memory_global_lock, 0); 2016 } 2017 2018 static void 2019 _rpmalloc_heap_release_raw(void* heapptr, int release_cache) { 2020 _rpmalloc_heap_release(heapptr, 0, release_cache); 2021 } 2022 2023 static void 2024 _rpmalloc_heap_release_raw_fc(void* heapptr) { 2025 _rpmalloc_heap_release_raw(heapptr, 1); 2026 } 2027 2028 static void 2029 _rpmalloc_heap_finalize(heap_t* heap) { 2030 if (heap->spans_reserved) { 2031 span_t* span = _rpmalloc_span_map(heap, heap->spans_reserved); 2032 _rpmalloc_span_unmap(span); 2033 heap->spans_reserved = 0; 2034 } 2035 2036 _rpmalloc_heap_cache_adopt_deferred(heap, 0); 2037 2038 for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) { 2039 if (heap->size_class[iclass].cache) 2040 _rpmalloc_span_unmap(heap->size_class[iclass].cache); 2041 heap->size_class[iclass].cache = 0; 2042 span_t* span = heap->size_class[iclass].partial_span; 2043 while (span) { 2044 span_t* next = span->next; 2045 _rpmalloc_span_finalize(heap, iclass, span, &heap->size_class[iclass].partial_span); 2046 span = next; 2047 } 2048 // If class still has a free list it must be a full span 2049 if (heap->size_class[iclass].free_list) { 2050 span_t* class_span = (span_t*)((uintptr_t)heap->size_class[iclass].free_list & _memory_span_mask); 2051 span_t** list = 0; 2052 #if RPMALLOC_FIRST_CLASS_HEAPS 2053 list = &heap->full_span[iclass]; 2054 #endif 2055 --heap->full_span_count; 2056 if (!_rpmalloc_span_finalize(heap, iclass, class_span, list)) { 2057 if (list) 2058 _rpmalloc_span_double_link_list_remove(list, class_span); 2059 _rpmalloc_span_double_link_list_add(&heap->size_class[iclass].partial_span, class_span); 2060 } 2061 } 2062 } 2063 2064 #if ENABLE_THREAD_CACHE 2065 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) { 2066 span_cache_t* span_cache; 2067 if (!iclass) 2068 span_cache = &heap->span_cache; 2069 else 2070 span_cache = (span_cache_t*)(heap->span_large_cache + (iclass - 1)); 2071 for (size_t ispan = 0; ispan < span_cache->count; ++ispan) 2072 _rpmalloc_span_unmap(span_cache->span[ispan]); 2073 span_cache->count = 0; 2074 } 2075 #endif 2076 rpmalloc_assert(!atomic_load_ptr(&heap->span_free_deferred), "Heaps still active during finalization"); 2077 } 2078 2079 2080 //////////// 2081 /// 2082 /// Allocation entry points 2083 /// 2084 ////// 2085 2086 //! Pop first block from a free list 2087 static void* 2088 free_list_pop(void** list) { 2089 void* block = *list; 2090 *list = *((void**)block); 2091 return block; 2092 } 2093 2094 //! Allocate a small/medium sized memory block from the given heap 2095 static void* 2096 _rpmalloc_allocate_from_heap_fallback(heap_t* heap, heap_size_class_t* heap_size_class, uint32_t class_idx) { 2097 span_t* span = heap_size_class->partial_span; 2098 if (EXPECTED(span != 0)) { 2099 rpmalloc_assert(span->block_count == _memory_size_class[span->size_class].block_count, "Span block count corrupted"); 2100 rpmalloc_assert(!_rpmalloc_span_is_fully_utilized(span), "Internal failure"); 2101 void* block; 2102 if (span->free_list) { 2103 //Span local free list is not empty, swap to size class free list 2104 block = free_list_pop(&span->free_list); 2105 heap_size_class->free_list = span->free_list; 2106 span->free_list = 0; 2107 } else { 2108 //If the span did not fully initialize free list, link up another page worth of blocks 2109 void* block_start = pointer_offset(span, SPAN_HEADER_SIZE + ((size_t)span->free_list_limit * span->block_size)); 2110 span->free_list_limit += free_list_partial_init(&heap_size_class->free_list, &block, 2111 (void*)((uintptr_t)block_start & ~(_memory_page_size - 1)), block_start, 2112 span->block_count - span->free_list_limit, span->block_size); 2113 } 2114 rpmalloc_assert(span->free_list_limit <= span->block_count, "Span block count corrupted"); 2115 span->used_count = span->free_list_limit; 2116 2117 //Swap in deferred free list if present 2118 if (atomic_load_ptr(&span->free_list_deferred)) 2119 _rpmalloc_span_extract_free_list_deferred(span); 2120 2121 //If span is still not fully utilized keep it in partial list and early return block 2122 if (!_rpmalloc_span_is_fully_utilized(span)) 2123 return block; 2124 2125 //The span is fully utilized, unlink from partial list and add to fully utilized list 2126 _rpmalloc_span_double_link_list_pop_head(&heap_size_class->partial_span, span); 2127 #if RPMALLOC_FIRST_CLASS_HEAPS 2128 _rpmalloc_span_double_link_list_add(&heap->full_span[class_idx], span); 2129 #endif 2130 ++heap->full_span_count; 2131 return block; 2132 } 2133 2134 //Find a span in one of the cache levels 2135 span = _rpmalloc_heap_extract_new_span(heap, heap_size_class, 1, class_idx); 2136 if (EXPECTED(span != 0)) { 2137 //Mark span as owned by this heap and set base data, return first block 2138 return _rpmalloc_span_initialize_new(heap, heap_size_class, span, class_idx); 2139 } 2140 2141 return 0; 2142 } 2143 2144 //! Allocate a small sized memory block from the given heap 2145 static void* 2146 _rpmalloc_allocate_small(heap_t* heap, size_t size) { 2147 rpmalloc_assert(heap, "No thread heap"); 2148 //Small sizes have unique size classes 2149 const uint32_t class_idx = (uint32_t)((size + (SMALL_GRANULARITY - 1)) >> SMALL_GRANULARITY_SHIFT); 2150 heap_size_class_t* heap_size_class = heap->size_class + class_idx; 2151 _rpmalloc_stat_inc_alloc(heap, class_idx); 2152 if (EXPECTED(heap_size_class->free_list != 0)) 2153 return free_list_pop(&heap_size_class->free_list); 2154 return _rpmalloc_allocate_from_heap_fallback(heap, heap_size_class, class_idx); 2155 } 2156 2157 //! Allocate a medium sized memory block from the given heap 2158 static void* 2159 _rpmalloc_allocate_medium(heap_t* heap, size_t size) { 2160 rpmalloc_assert(heap, "No thread heap"); 2161 //Calculate the size class index and do a dependent lookup of the final class index (in case of merged classes) 2162 const uint32_t base_idx = (uint32_t)(SMALL_CLASS_COUNT + ((size - (SMALL_SIZE_LIMIT + 1)) >> MEDIUM_GRANULARITY_SHIFT)); 2163 const uint32_t class_idx = _memory_size_class[base_idx].class_idx; 2164 heap_size_class_t* heap_size_class = heap->size_class + class_idx; 2165 _rpmalloc_stat_inc_alloc(heap, class_idx); 2166 if (EXPECTED(heap_size_class->free_list != 0)) 2167 return free_list_pop(&heap_size_class->free_list); 2168 return _rpmalloc_allocate_from_heap_fallback(heap, heap_size_class, class_idx); 2169 } 2170 2171 //! Allocate a large sized memory block from the given heap 2172 static void* 2173 _rpmalloc_allocate_large(heap_t* heap, size_t size) { 2174 rpmalloc_assert(heap, "No thread heap"); 2175 //Calculate number of needed max sized spans (including header) 2176 //Since this function is never called if size > LARGE_SIZE_LIMIT 2177 //the span_count is guaranteed to be <= LARGE_CLASS_COUNT 2178 size += SPAN_HEADER_SIZE; 2179 size_t span_count = size >> _memory_span_size_shift; 2180 if (size & (_memory_span_size - 1)) 2181 ++span_count; 2182 2183 //Find a span in one of the cache levels 2184 span_t* span = _rpmalloc_heap_extract_new_span(heap, 0, span_count, SIZE_CLASS_LARGE); 2185 if (!span) 2186 return span; 2187 2188 //Mark span as owned by this heap and set base data 2189 rpmalloc_assert(span->span_count >= span_count, "Internal failure"); 2190 span->size_class = SIZE_CLASS_LARGE; 2191 span->heap = heap; 2192 2193 #if RPMALLOC_FIRST_CLASS_HEAPS 2194 _rpmalloc_span_double_link_list_add(&heap->large_huge_span, span); 2195 #endif 2196 ++heap->full_span_count; 2197 2198 return pointer_offset(span, SPAN_HEADER_SIZE); 2199 } 2200 2201 //! Allocate a huge block by mapping memory pages directly 2202 static void* 2203 _rpmalloc_allocate_huge(heap_t* heap, size_t size) { 2204 rpmalloc_assert(heap, "No thread heap"); 2205 _rpmalloc_heap_cache_adopt_deferred(heap, 0); 2206 size += SPAN_HEADER_SIZE; 2207 size_t num_pages = size >> _memory_page_size_shift; 2208 if (size & (_memory_page_size - 1)) 2209 ++num_pages; 2210 size_t align_offset = 0; 2211 span_t* span = (span_t*)_rpmalloc_mmap(num_pages * _memory_page_size, &align_offset); 2212 if (!span) 2213 return span; 2214 2215 //Store page count in span_count 2216 span->size_class = SIZE_CLASS_HUGE; 2217 span->span_count = (uint32_t)num_pages; 2218 span->align_offset = (uint32_t)align_offset; 2219 span->heap = heap; 2220 _rpmalloc_stat_add_peak(&_huge_pages_current, num_pages, _huge_pages_peak); 2221 2222 #if RPMALLOC_FIRST_CLASS_HEAPS 2223 _rpmalloc_span_double_link_list_add(&heap->large_huge_span, span); 2224 #endif 2225 ++heap->full_span_count; 2226 2227 return pointer_offset(span, SPAN_HEADER_SIZE); 2228 } 2229 2230 //! Allocate a block of the given size 2231 static void* 2232 _rpmalloc_allocate(heap_t* heap, size_t size) { 2233 _rpmalloc_stat_add64(&_allocation_counter, 1); 2234 if (EXPECTED(size <= SMALL_SIZE_LIMIT)) 2235 return _rpmalloc_allocate_small(heap, size); 2236 else if (size <= _memory_medium_size_limit) 2237 return _rpmalloc_allocate_medium(heap, size); 2238 else if (size <= LARGE_SIZE_LIMIT) 2239 return _rpmalloc_allocate_large(heap, size); 2240 return _rpmalloc_allocate_huge(heap, size); 2241 } 2242 2243 static void* 2244 _rpmalloc_aligned_allocate(heap_t* heap, size_t alignment, size_t size) { 2245 if (alignment <= SMALL_GRANULARITY) 2246 return _rpmalloc_allocate(heap, size); 2247 2248 #if ENABLE_VALIDATE_ARGS 2249 if ((size + alignment) < size) { 2250 errno = EINVAL; 2251 return 0; 2252 } 2253 if (alignment & (alignment - 1)) { 2254 errno = EINVAL; 2255 return 0; 2256 } 2257 #endif 2258 2259 if ((alignment <= SPAN_HEADER_SIZE) && (size < _memory_medium_size_limit)) { 2260 // If alignment is less or equal to span header size (which is power of two), 2261 // and size aligned to span header size multiples is less than size + alignment, 2262 // then use natural alignment of blocks to provide alignment 2263 size_t multiple_size = size ? (size + (SPAN_HEADER_SIZE - 1)) & ~(uintptr_t)(SPAN_HEADER_SIZE - 1) : SPAN_HEADER_SIZE; 2264 rpmalloc_assert(!(multiple_size % SPAN_HEADER_SIZE), "Failed alignment calculation"); 2265 if (multiple_size <= (size + alignment)) 2266 return _rpmalloc_allocate(heap, multiple_size); 2267 } 2268 2269 void* ptr = 0; 2270 size_t align_mask = alignment - 1; 2271 if (alignment <= _memory_page_size) { 2272 ptr = _rpmalloc_allocate(heap, size + alignment); 2273 if ((uintptr_t)ptr & align_mask) { 2274 ptr = (void*)(((uintptr_t)ptr & ~(uintptr_t)align_mask) + alignment); 2275 //Mark as having aligned blocks 2276 span_t* span = (span_t*)((uintptr_t)ptr & _memory_span_mask); 2277 span->flags |= SPAN_FLAG_ALIGNED_BLOCKS; 2278 } 2279 return ptr; 2280 } 2281 2282 // Fallback to mapping new pages for this request. Since pointers passed 2283 // to rpfree must be able to reach the start of the span by bitmasking of 2284 // the address with the span size, the returned aligned pointer from this 2285 // function must be with a span size of the start of the mapped area. 2286 // In worst case this requires us to loop and map pages until we get a 2287 // suitable memory address. It also means we can never align to span size 2288 // or greater, since the span header will push alignment more than one 2289 // span size away from span start (thus causing pointer mask to give us 2290 // an invalid span start on free) 2291 if (alignment & align_mask) { 2292 errno = EINVAL; 2293 return 0; 2294 } 2295 if (alignment >= _memory_span_size) { 2296 errno = EINVAL; 2297 return 0; 2298 } 2299 2300 size_t extra_pages = alignment / _memory_page_size; 2301 2302 // Since each span has a header, we will at least need one extra memory page 2303 size_t num_pages = 1 + (size / _memory_page_size); 2304 if (size & (_memory_page_size - 1)) 2305 ++num_pages; 2306 2307 if (extra_pages > num_pages) 2308 num_pages = 1 + extra_pages; 2309 2310 size_t original_pages = num_pages; 2311 size_t limit_pages = (_memory_span_size / _memory_page_size) * 2; 2312 if (limit_pages < (original_pages * 2)) 2313 limit_pages = original_pages * 2; 2314 2315 size_t mapped_size, align_offset; 2316 span_t* span; 2317 2318 retry: 2319 align_offset = 0; 2320 mapped_size = num_pages * _memory_page_size; 2321 2322 span = (span_t*)_rpmalloc_mmap(mapped_size, &align_offset); 2323 if (!span) { 2324 errno = ENOMEM; 2325 return 0; 2326 } 2327 ptr = pointer_offset(span, SPAN_HEADER_SIZE); 2328 2329 if ((uintptr_t)ptr & align_mask) 2330 ptr = (void*)(((uintptr_t)ptr & ~(uintptr_t)align_mask) + alignment); 2331 2332 if (((size_t)pointer_diff(ptr, span) >= _memory_span_size) || 2333 (pointer_offset(ptr, size) > pointer_offset(span, mapped_size)) || 2334 (((uintptr_t)ptr & _memory_span_mask) != (uintptr_t)span)) { 2335 _rpmalloc_unmap(span, mapped_size, align_offset, mapped_size); 2336 ++num_pages; 2337 if (num_pages > limit_pages) { 2338 errno = EINVAL; 2339 return 0; 2340 } 2341 goto retry; 2342 } 2343 2344 //Store page count in span_count 2345 span->size_class = SIZE_CLASS_HUGE; 2346 span->span_count = (uint32_t)num_pages; 2347 span->align_offset = (uint32_t)align_offset; 2348 span->heap = heap; 2349 _rpmalloc_stat_add_peak(&_huge_pages_current, num_pages, _huge_pages_peak); 2350 2351 #if RPMALLOC_FIRST_CLASS_HEAPS 2352 _rpmalloc_span_double_link_list_add(&heap->large_huge_span, span); 2353 #endif 2354 ++heap->full_span_count; 2355 2356 _rpmalloc_stat_add64(&_allocation_counter, 1); 2357 2358 return ptr; 2359 } 2360 2361 2362 //////////// 2363 /// 2364 /// Deallocation entry points 2365 /// 2366 ////// 2367 2368 //! Deallocate the given small/medium memory block in the current thread local heap 2369 static void 2370 _rpmalloc_deallocate_direct_small_or_medium(span_t* span, void* block) { 2371 heap_t* heap = span->heap; 2372 rpmalloc_assert(heap->owner_thread == get_thread_id() || !heap->owner_thread || heap->finalize, "Internal failure"); 2373 //Add block to free list 2374 if (UNEXPECTED(_rpmalloc_span_is_fully_utilized(span))) { 2375 span->used_count = span->block_count; 2376 #if RPMALLOC_FIRST_CLASS_HEAPS 2377 _rpmalloc_span_double_link_list_remove(&heap->full_span[span->size_class], span); 2378 #endif 2379 _rpmalloc_span_double_link_list_add(&heap->size_class[span->size_class].partial_span, span); 2380 --heap->full_span_count; 2381 } 2382 *((void**)block) = span->free_list; 2383 --span->used_count; 2384 span->free_list = block; 2385 if (UNEXPECTED(span->used_count == span->list_size)) { 2386 // If there are no used blocks it is guaranteed that no other external thread is accessing the span 2387 if (span->used_count) { 2388 // Make sure we have synchronized the deferred list and list size by using acquire semantics 2389 // and guarantee that no external thread is accessing span concurrently 2390 void* free_list; 2391 do { 2392 free_list = atomic_exchange_ptr_acquire(&span->free_list_deferred, INVALID_POINTER); 2393 } while (free_list == INVALID_POINTER); 2394 atomic_store_ptr_release(&span->free_list_deferred, free_list); 2395 } 2396 _rpmalloc_span_double_link_list_remove(&heap->size_class[span->size_class].partial_span, span); 2397 _rpmalloc_span_release_to_cache(heap, span); 2398 } 2399 } 2400 2401 static void 2402 _rpmalloc_deallocate_defer_free_span(heap_t* heap, span_t* span) { 2403 if (span->size_class != SIZE_CLASS_HUGE) 2404 _rpmalloc_stat_inc(&heap->span_use[span->span_count - 1].spans_deferred); 2405 //This list does not need ABA protection, no mutable side state 2406 do { 2407 span->free_list = (void*)atomic_load_ptr(&heap->span_free_deferred); 2408 } while (!atomic_cas_ptr(&heap->span_free_deferred, span, span->free_list)); 2409 } 2410 2411 //! Put the block in the deferred free list of the owning span 2412 static void 2413 _rpmalloc_deallocate_defer_small_or_medium(span_t* span, void* block) { 2414 // The memory ordering here is a bit tricky, to avoid having to ABA protect 2415 // the deferred free list to avoid desynchronization of list and list size 2416 // we need to have acquire semantics on successful CAS of the pointer to 2417 // guarantee the list_size variable validity + release semantics on pointer store 2418 void* free_list; 2419 do { 2420 free_list = atomic_exchange_ptr_acquire(&span->free_list_deferred, INVALID_POINTER); 2421 } while (free_list == INVALID_POINTER); 2422 *((void**)block) = free_list; 2423 uint32_t free_count = ++span->list_size; 2424 int all_deferred_free = (free_count == span->block_count); 2425 atomic_store_ptr_release(&span->free_list_deferred, block); 2426 if (all_deferred_free) { 2427 // Span was completely freed by this block. Due to the INVALID_POINTER spin lock 2428 // no other thread can reach this state simultaneously on this span. 2429 // Safe to move to owner heap deferred cache 2430 _rpmalloc_deallocate_defer_free_span(span->heap, span); 2431 } 2432 } 2433 2434 static void 2435 _rpmalloc_deallocate_small_or_medium(span_t* span, void* p) { 2436 _rpmalloc_stat_inc_free(span->heap, span->size_class); 2437 if (span->flags & SPAN_FLAG_ALIGNED_BLOCKS) { 2438 //Realign pointer to block start 2439 void* blocks_start = pointer_offset(span, SPAN_HEADER_SIZE); 2440 uint32_t block_offset = (uint32_t)pointer_diff(p, blocks_start); 2441 p = pointer_offset(p, -(int32_t)(block_offset % span->block_size)); 2442 } 2443 //Check if block belongs to this heap or if deallocation should be deferred 2444 #if RPMALLOC_FIRST_CLASS_HEAPS 2445 int defer = (span->heap->owner_thread && (span->heap->owner_thread != get_thread_id()) && !span->heap->finalize); 2446 #else 2447 int defer = ((span->heap->owner_thread != get_thread_id()) && !span->heap->finalize); 2448 #endif 2449 if (!defer) 2450 _rpmalloc_deallocate_direct_small_or_medium(span, p); 2451 else 2452 _rpmalloc_deallocate_defer_small_or_medium(span, p); 2453 } 2454 2455 //! Deallocate the given large memory block to the current heap 2456 static void 2457 _rpmalloc_deallocate_large(span_t* span) { 2458 rpmalloc_assert(span->size_class == SIZE_CLASS_LARGE, "Bad span size class"); 2459 rpmalloc_assert(!(span->flags & SPAN_FLAG_MASTER) || !(span->flags & SPAN_FLAG_SUBSPAN), "Span flag corrupted"); 2460 rpmalloc_assert((span->flags & SPAN_FLAG_MASTER) || (span->flags & SPAN_FLAG_SUBSPAN), "Span flag corrupted"); 2461 //We must always defer (unless finalizing) if from another heap since we cannot touch the list or counters of another heap 2462 #if RPMALLOC_FIRST_CLASS_HEAPS 2463 int defer = (span->heap->owner_thread && (span->heap->owner_thread != get_thread_id()) && !span->heap->finalize); 2464 #else 2465 int defer = ((span->heap->owner_thread != get_thread_id()) && !span->heap->finalize); 2466 #endif 2467 if (defer) { 2468 _rpmalloc_deallocate_defer_free_span(span->heap, span); 2469 return; 2470 } 2471 rpmalloc_assert(span->heap->full_span_count, "Heap span counter corrupted"); 2472 --span->heap->full_span_count; 2473 #if RPMALLOC_FIRST_CLASS_HEAPS 2474 _rpmalloc_span_double_link_list_remove(&span->heap->large_huge_span, span); 2475 #endif 2476 #if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS 2477 //Decrease counter 2478 size_t idx = span->span_count - 1; 2479 atomic_decr32(&span->heap->span_use[idx].current); 2480 #endif 2481 heap_t* heap = span->heap; 2482 rpmalloc_assert(heap, "No thread heap"); 2483 #if ENABLE_THREAD_CACHE 2484 const int set_as_reserved = ((span->span_count > 1) && (heap->span_cache.count == 0) && !heap->finalize && !heap->spans_reserved); 2485 #else 2486 const int set_as_reserved = ((span->span_count > 1) && !heap->finalize && !heap->spans_reserved); 2487 #endif 2488 if (set_as_reserved) { 2489 heap->span_reserve = span; 2490 heap->spans_reserved = span->span_count; 2491 if (span->flags & SPAN_FLAG_MASTER) { 2492 heap->span_reserve_master = span; 2493 } else { //SPAN_FLAG_SUBSPAN 2494 span_t* master = (span_t*)pointer_offset(span, -(intptr_t)((size_t)span->offset_from_master * _memory_span_size)); 2495 heap->span_reserve_master = master; 2496 rpmalloc_assert(master->flags & SPAN_FLAG_MASTER, "Span flag corrupted"); 2497 rpmalloc_assert(atomic_load32(&master->remaining_spans) >= (int32_t)span->span_count, "Master span count corrupted"); 2498 } 2499 _rpmalloc_stat_inc(&heap->span_use[idx].spans_to_reserved); 2500 } else { 2501 //Insert into cache list 2502 _rpmalloc_heap_cache_insert(heap, span); 2503 } 2504 } 2505 2506 //! Deallocate the given huge span 2507 static void 2508 _rpmalloc_deallocate_huge(span_t* span) { 2509 rpmalloc_assert(span->heap, "No span heap"); 2510 #if RPMALLOC_FIRST_CLASS_HEAPS 2511 int defer = (span->heap->owner_thread && (span->heap->owner_thread != get_thread_id()) && !span->heap->finalize); 2512 #else 2513 int defer = ((span->heap->owner_thread != get_thread_id()) && !span->heap->finalize); 2514 #endif 2515 if (defer) { 2516 _rpmalloc_deallocate_defer_free_span(span->heap, span); 2517 return; 2518 } 2519 rpmalloc_assert(span->heap->full_span_count, "Heap span counter corrupted"); 2520 --span->heap->full_span_count; 2521 #if RPMALLOC_FIRST_CLASS_HEAPS 2522 _rpmalloc_span_double_link_list_remove(&span->heap->large_huge_span, span); 2523 #endif 2524 2525 //Oversized allocation, page count is stored in span_count 2526 size_t num_pages = span->span_count; 2527 _rpmalloc_unmap(span, num_pages * _memory_page_size, span->align_offset, num_pages * _memory_page_size); 2528 _rpmalloc_stat_sub(&_huge_pages_current, num_pages); 2529 } 2530 2531 //! Deallocate the given block 2532 static void 2533 _rpmalloc_deallocate(void* p) { 2534 _rpmalloc_stat_add64(&_deallocation_counter, 1); 2535 //Grab the span (always at start of span, using span alignment) 2536 span_t* span = (span_t*)((uintptr_t)p & _memory_span_mask); 2537 if (UNEXPECTED(!span)) 2538 return; 2539 if (EXPECTED(span->size_class < SIZE_CLASS_COUNT)) 2540 _rpmalloc_deallocate_small_or_medium(span, p); 2541 else if (span->size_class == SIZE_CLASS_LARGE) 2542 _rpmalloc_deallocate_large(span); 2543 else 2544 _rpmalloc_deallocate_huge(span); 2545 } 2546 2547 //////////// 2548 /// 2549 /// Reallocation entry points 2550 /// 2551 ////// 2552 2553 static size_t 2554 _rpmalloc_usable_size(void* p); 2555 2556 //! Reallocate the given block to the given size 2557 static void* 2558 _rpmalloc_reallocate(heap_t* heap, void* p, size_t size, size_t oldsize, unsigned int flags) { 2559 if (p) { 2560 //Grab the span using guaranteed span alignment 2561 span_t* span = (span_t*)((uintptr_t)p & _memory_span_mask); 2562 if (EXPECTED(span->size_class < SIZE_CLASS_COUNT)) { 2563 //Small/medium sized block 2564 rpmalloc_assert(span->span_count == 1, "Span counter corrupted"); 2565 void* blocks_start = pointer_offset(span, SPAN_HEADER_SIZE); 2566 uint32_t block_offset = (uint32_t)pointer_diff(p, blocks_start); 2567 uint32_t block_idx = block_offset / span->block_size; 2568 void* block = pointer_offset(blocks_start, (size_t)block_idx * span->block_size); 2569 if (!oldsize) 2570 oldsize = (size_t)((ptrdiff_t)span->block_size - pointer_diff(p, block)); 2571 if ((size_t)span->block_size >= size) { 2572 //Still fits in block, never mind trying to save memory, but preserve data if alignment changed 2573 if ((p != block) && !(flags & RPMALLOC_NO_PRESERVE)) 2574 memmove(block, p, oldsize); 2575 return block; 2576 } 2577 } else if (span->size_class == SIZE_CLASS_LARGE) { 2578 //Large block 2579 size_t total_size = size + SPAN_HEADER_SIZE; 2580 size_t num_spans = total_size >> _memory_span_size_shift; 2581 if (total_size & (_memory_span_mask - 1)) 2582 ++num_spans; 2583 size_t current_spans = span->span_count; 2584 void* block = pointer_offset(span, SPAN_HEADER_SIZE); 2585 if (!oldsize) 2586 oldsize = (current_spans * _memory_span_size) - (size_t)pointer_diff(p, block) - SPAN_HEADER_SIZE; 2587 if ((current_spans >= num_spans) && (total_size >= (oldsize / 2))) { 2588 //Still fits in block, never mind trying to save memory, but preserve data if alignment changed 2589 if ((p != block) && !(flags & RPMALLOC_NO_PRESERVE)) 2590 memmove(block, p, oldsize); 2591 return block; 2592 } 2593 } else { 2594 //Oversized block 2595 size_t total_size = size + SPAN_HEADER_SIZE; 2596 size_t num_pages = total_size >> _memory_page_size_shift; 2597 if (total_size & (_memory_page_size - 1)) 2598 ++num_pages; 2599 //Page count is stored in span_count 2600 size_t current_pages = span->span_count; 2601 void* block = pointer_offset(span, SPAN_HEADER_SIZE); 2602 if (!oldsize) 2603 oldsize = (current_pages * _memory_page_size) - (size_t)pointer_diff(p, block) - SPAN_HEADER_SIZE; 2604 if ((current_pages >= num_pages) && (num_pages >= (current_pages / 2))) { 2605 //Still fits in block, never mind trying to save memory, but preserve data if alignment changed 2606 if ((p != block) && !(flags & RPMALLOC_NO_PRESERVE)) 2607 memmove(block, p, oldsize); 2608 return block; 2609 } 2610 } 2611 } else { 2612 oldsize = 0; 2613 } 2614 2615 if (!!(flags & RPMALLOC_GROW_OR_FAIL)) 2616 return 0; 2617 2618 //Size is greater than block size, need to allocate a new block and deallocate the old 2619 //Avoid hysteresis by overallocating if increase is small (below 37%) 2620 size_t lower_bound = oldsize + (oldsize >> 2) + (oldsize >> 3); 2621 size_t new_size = (size > lower_bound) ? size : ((size > oldsize) ? lower_bound : size); 2622 void* block = _rpmalloc_allocate(heap, new_size); 2623 if (p && block) { 2624 if (!(flags & RPMALLOC_NO_PRESERVE)) 2625 memcpy(block, p, oldsize < new_size ? oldsize : new_size); 2626 _rpmalloc_deallocate(p); 2627 } 2628 2629 return block; 2630 } 2631 2632 static void* 2633 _rpmalloc_aligned_reallocate(heap_t* heap, void* ptr, size_t alignment, size_t size, size_t oldsize, 2634 unsigned int flags) { 2635 if (alignment <= SMALL_GRANULARITY) 2636 return _rpmalloc_reallocate(heap, ptr, size, oldsize, flags); 2637 2638 int no_alloc = !!(flags & RPMALLOC_GROW_OR_FAIL); 2639 size_t usablesize = (ptr ? _rpmalloc_usable_size(ptr) : 0); 2640 if ((usablesize >= size) && !((uintptr_t)ptr & (alignment - 1))) { 2641 if (no_alloc || (size >= (usablesize / 2))) 2642 return ptr; 2643 } 2644 // Aligned alloc marks span as having aligned blocks 2645 void* block = (!no_alloc ? _rpmalloc_aligned_allocate(heap, alignment, size) : 0); 2646 if (EXPECTED(block != 0)) { 2647 if (!(flags & RPMALLOC_NO_PRESERVE) && ptr) { 2648 if (!oldsize) 2649 oldsize = usablesize; 2650 memcpy(block, ptr, oldsize < size ? oldsize : size); 2651 } 2652 _rpmalloc_deallocate(ptr); 2653 } 2654 return block; 2655 } 2656 2657 2658 //////////// 2659 /// 2660 /// Initialization, finalization and utility 2661 /// 2662 ////// 2663 2664 //! Get the usable size of the given block 2665 static size_t 2666 _rpmalloc_usable_size(void* p) { 2667 //Grab the span using guaranteed span alignment 2668 span_t* span = (span_t*)((uintptr_t)p & _memory_span_mask); 2669 if (span->size_class < SIZE_CLASS_COUNT) { 2670 //Small/medium block 2671 void* blocks_start = pointer_offset(span, SPAN_HEADER_SIZE); 2672 return span->block_size - ((size_t)pointer_diff(p, blocks_start) % span->block_size); 2673 } 2674 if (span->size_class == SIZE_CLASS_LARGE) { 2675 //Large block 2676 size_t current_spans = span->span_count; 2677 return (current_spans * _memory_span_size) - (size_t)pointer_diff(p, span); 2678 } 2679 //Oversized block, page count is stored in span_count 2680 size_t current_pages = span->span_count; 2681 return (current_pages * _memory_page_size) - (size_t)pointer_diff(p, span); 2682 } 2683 2684 //! Adjust and optimize the size class properties for the given class 2685 static void 2686 _rpmalloc_adjust_size_class(size_t iclass) { 2687 size_t block_size = _memory_size_class[iclass].block_size; 2688 size_t block_count = (_memory_span_size - SPAN_HEADER_SIZE) / block_size; 2689 2690 _memory_size_class[iclass].block_count = (uint16_t)block_count; 2691 _memory_size_class[iclass].class_idx = (uint16_t)iclass; 2692 2693 //Check if previous size classes can be merged 2694 if (iclass >= SMALL_CLASS_COUNT) { 2695 size_t prevclass = iclass; 2696 while (prevclass > 0) { 2697 --prevclass; 2698 //A class can be merged if number of pages and number of blocks are equal 2699 if (_memory_size_class[prevclass].block_count == _memory_size_class[iclass].block_count) 2700 memcpy(_memory_size_class + prevclass, _memory_size_class + iclass, sizeof(_memory_size_class[iclass])); 2701 else 2702 break; 2703 } 2704 } 2705 } 2706 2707 //! Initialize the allocator and setup global data 2708 extern inline int 2709 rpmalloc_initialize(void) { 2710 if (_rpmalloc_initialized) { 2711 rpmalloc_thread_initialize(); 2712 return 0; 2713 } 2714 return rpmalloc_initialize_config(0); 2715 } 2716 2717 int 2718 rpmalloc_initialize_config(const rpmalloc_config_t* config) { 2719 if (_rpmalloc_initialized) { 2720 rpmalloc_thread_initialize(); 2721 return 0; 2722 } 2723 _rpmalloc_initialized = 1; 2724 2725 if (config) 2726 memcpy(&_memory_config, config, sizeof(rpmalloc_config_t)); 2727 else 2728 memset(&_memory_config, 0, sizeof(rpmalloc_config_t)); 2729 2730 if (!_memory_config.memory_map || !_memory_config.memory_unmap) { 2731 _memory_config.memory_map = _rpmalloc_mmap_os; 2732 _memory_config.memory_unmap = _rpmalloc_unmap_os; 2733 } 2734 2735 #if PLATFORM_WINDOWS 2736 SYSTEM_INFO system_info; 2737 memset(&system_info, 0, sizeof(system_info)); 2738 GetSystemInfo(&system_info); 2739 _memory_map_granularity = system_info.dwAllocationGranularity; 2740 #else 2741 _memory_map_granularity = (size_t)sysconf(_SC_PAGESIZE); 2742 #endif 2743 2744 #if RPMALLOC_CONFIGURABLE 2745 _memory_page_size = _memory_config.page_size; 2746 #else 2747 _memory_page_size = 0; 2748 #endif 2749 _memory_huge_pages = 0; 2750 if (!_memory_page_size) { 2751 #if PLATFORM_WINDOWS 2752 _memory_page_size = system_info.dwPageSize; 2753 #else 2754 _memory_page_size = _memory_map_granularity; 2755 if (_memory_config.enable_huge_pages) { 2756 #if defined(__linux__) 2757 size_t huge_page_size = 0; 2758 FILE* meminfo = fopen("/proc/meminfo", "r"); 2759 if (meminfo) { 2760 char line[128]; 2761 while (!huge_page_size && fgets(line, sizeof(line) - 1, meminfo)) { 2762 line[sizeof(line) - 1] = 0; 2763 if (strstr(line, "Hugepagesize:")) 2764 huge_page_size = (size_t)strtol(line + 13, 0, 10) * 1024; 2765 } 2766 fclose(meminfo); 2767 } 2768 if (huge_page_size) { 2769 _memory_huge_pages = 1; 2770 _memory_page_size = huge_page_size; 2771 _memory_map_granularity = huge_page_size; 2772 } 2773 #elif defined(__FreeBSD__) 2774 int rc; 2775 size_t sz = sizeof(rc); 2776 2777 if (sysctlbyname("vm.pmap.pg_ps_enabled", &rc, &sz, NULL, 0) == 0 && rc == 1) { 2778 _memory_huge_pages = 1; 2779 _memory_page_size = 2 * 1024 * 1024; 2780 _memory_map_granularity = _memory_page_size; 2781 } 2782 #elif defined(__APPLE__) || defined(__NetBSD__) 2783 _memory_huge_pages = 1; 2784 _memory_page_size = 2 * 1024 * 1024; 2785 _memory_map_granularity = _memory_page_size; 2786 #endif 2787 } 2788 #endif 2789 } else { 2790 if (_memory_config.enable_huge_pages) 2791 _memory_huge_pages = 1; 2792 } 2793 2794 #if PLATFORM_WINDOWS 2795 if (_memory_config.enable_huge_pages) { 2796 HANDLE token = 0; 2797 size_t large_page_minimum = GetLargePageMinimum(); 2798 if (large_page_minimum) 2799 OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token); 2800 if (token) { 2801 LUID luid; 2802 if (LookupPrivilegeValue(0, SE_LOCK_MEMORY_NAME, &luid)) { 2803 TOKEN_PRIVILEGES token_privileges; 2804 memset(&token_privileges, 0, sizeof(token_privileges)); 2805 token_privileges.PrivilegeCount = 1; 2806 token_privileges.Privileges[0].Luid = luid; 2807 token_privileges.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; 2808 if (AdjustTokenPrivileges(token, FALSE, &token_privileges, 0, 0, 0)) { 2809 if (GetLastError() == ERROR_SUCCESS) 2810 _memory_huge_pages = 1; 2811 } 2812 } 2813 CloseHandle(token); 2814 } 2815 if (_memory_huge_pages) { 2816 if (large_page_minimum > _memory_page_size) 2817 _memory_page_size = large_page_minimum; 2818 if (large_page_minimum > _memory_map_granularity) 2819 _memory_map_granularity = large_page_minimum; 2820 } 2821 } 2822 #endif 2823 2824 size_t min_span_size = 256; 2825 size_t max_page_size; 2826 #if UINTPTR_MAX > 0xFFFFFFFF 2827 max_page_size = 4096ULL * 1024ULL * 1024ULL; 2828 #else 2829 max_page_size = 4 * 1024 * 1024; 2830 #endif 2831 if (_memory_page_size < min_span_size) 2832 _memory_page_size = min_span_size; 2833 if (_memory_page_size > max_page_size) 2834 _memory_page_size = max_page_size; 2835 _memory_page_size_shift = 0; 2836 size_t page_size_bit = _memory_page_size; 2837 while (page_size_bit != 1) { 2838 ++_memory_page_size_shift; 2839 page_size_bit >>= 1; 2840 } 2841 _memory_page_size = ((size_t)1 << _memory_page_size_shift); 2842 2843 #if RPMALLOC_CONFIGURABLE 2844 if (!_memory_config.span_size) { 2845 _memory_span_size = _memory_default_span_size; 2846 _memory_span_size_shift = _memory_default_span_size_shift; 2847 _memory_span_mask = _memory_default_span_mask; 2848 } else { 2849 size_t span_size = _memory_config.span_size; 2850 if (span_size > (256 * 1024)) 2851 span_size = (256 * 1024); 2852 _memory_span_size = 4096; 2853 _memory_span_size_shift = 12; 2854 while (_memory_span_size < span_size) { 2855 _memory_span_size <<= 1; 2856 ++_memory_span_size_shift; 2857 } 2858 _memory_span_mask = ~(uintptr_t)(_memory_span_size - 1); 2859 } 2860 #endif 2861 2862 _memory_span_map_count = ( _memory_config.span_map_count ? _memory_config.span_map_count : DEFAULT_SPAN_MAP_COUNT); 2863 if ((_memory_span_size * _memory_span_map_count) < _memory_page_size) 2864 _memory_span_map_count = (_memory_page_size / _memory_span_size); 2865 if ((_memory_page_size >= _memory_span_size) && ((_memory_span_map_count * _memory_span_size) % _memory_page_size)) 2866 _memory_span_map_count = (_memory_page_size / _memory_span_size); 2867 _memory_heap_reserve_count = (_memory_span_map_count > DEFAULT_SPAN_MAP_COUNT) ? DEFAULT_SPAN_MAP_COUNT : _memory_span_map_count; 2868 2869 _memory_config.page_size = _memory_page_size; 2870 _memory_config.span_size = _memory_span_size; 2871 _memory_config.span_map_count = _memory_span_map_count; 2872 _memory_config.enable_huge_pages = _memory_huge_pages; 2873 2874 #if ((defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD) || defined(__TINYC__) 2875 if (pthread_key_create(&_memory_thread_heap, _rpmalloc_heap_release_raw_fc)) 2876 return -1; 2877 #endif 2878 #if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK) 2879 fls_key = FlsAlloc(&_rpmalloc_thread_destructor); 2880 #endif 2881 2882 //Setup all small and medium size classes 2883 size_t iclass = 0; 2884 _memory_size_class[iclass].block_size = SMALL_GRANULARITY; 2885 _rpmalloc_adjust_size_class(iclass); 2886 for (iclass = 1; iclass < SMALL_CLASS_COUNT; ++iclass) { 2887 size_t size = iclass * SMALL_GRANULARITY; 2888 _memory_size_class[iclass].block_size = (uint32_t)size; 2889 _rpmalloc_adjust_size_class(iclass); 2890 } 2891 //At least two blocks per span, then fall back to large allocations 2892 _memory_medium_size_limit = (_memory_span_size - SPAN_HEADER_SIZE) >> 1; 2893 if (_memory_medium_size_limit > MEDIUM_SIZE_LIMIT) 2894 _memory_medium_size_limit = MEDIUM_SIZE_LIMIT; 2895 for (iclass = 0; iclass < MEDIUM_CLASS_COUNT; ++iclass) { 2896 size_t size = SMALL_SIZE_LIMIT + ((iclass + 1) * MEDIUM_GRANULARITY); 2897 if (size > _memory_medium_size_limit) 2898 break; 2899 _memory_size_class[SMALL_CLASS_COUNT + iclass].block_size = (uint32_t)size; 2900 _rpmalloc_adjust_size_class(SMALL_CLASS_COUNT + iclass); 2901 } 2902 2903 _memory_orphan_heaps = 0; 2904 #if RPMALLOC_FIRST_CLASS_HEAPS 2905 _memory_first_class_orphan_heaps = 0; 2906 #endif 2907 #if ENABLE_STATISTICS 2908 atomic_store32(&_memory_active_heaps, 0); 2909 atomic_store32(&_mapped_pages, 0); 2910 _mapped_pages_peak = 0; 2911 atomic_store32(&_master_spans, 0); 2912 atomic_store32(&_mapped_total, 0); 2913 atomic_store32(&_unmapped_total, 0); 2914 atomic_store32(&_mapped_pages_os, 0); 2915 atomic_store32(&_huge_pages_current, 0); 2916 _huge_pages_peak = 0; 2917 #endif 2918 memset(_memory_heaps, 0, sizeof(_memory_heaps)); 2919 atomic_store32_release(&_memory_global_lock, 0); 2920 2921 rpmalloc_linker_reference(); 2922 2923 //Initialize this thread 2924 rpmalloc_thread_initialize(); 2925 return 0; 2926 } 2927 2928 //! Finalize the allocator 2929 void 2930 rpmalloc_finalize(void) { 2931 rpmalloc_thread_finalize(1); 2932 //rpmalloc_dump_statistics(stdout); 2933 2934 if (_memory_global_reserve) { 2935 atomic_add32(&_memory_global_reserve_master->remaining_spans, -(int32_t)_memory_global_reserve_count); 2936 _memory_global_reserve_master = 0; 2937 _memory_global_reserve_count = 0; 2938 _memory_global_reserve = 0; 2939 } 2940 atomic_store32_release(&_memory_global_lock, 0); 2941 2942 //Free all thread caches and fully free spans 2943 for (size_t list_idx = 0; list_idx < HEAP_ARRAY_SIZE; ++list_idx) { 2944 heap_t* heap = _memory_heaps[list_idx]; 2945 while (heap) { 2946 heap_t* next_heap = heap->next_heap; 2947 heap->finalize = 1; 2948 _rpmalloc_heap_global_finalize(heap); 2949 heap = next_heap; 2950 } 2951 } 2952 2953 #if ENABLE_GLOBAL_CACHE 2954 //Free global caches 2955 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) 2956 _rpmalloc_global_cache_finalize(&_memory_span_cache[iclass]); 2957 #endif 2958 2959 #if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD 2960 pthread_key_delete(_memory_thread_heap); 2961 #endif 2962 #if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK) 2963 FlsFree(fls_key); 2964 fls_key = 0; 2965 #endif 2966 #if ENABLE_STATISTICS 2967 //If you hit these asserts you probably have memory leaks (perhaps global scope data doing dynamic allocations) or double frees in your code 2968 rpmalloc_assert(atomic_load32(&_mapped_pages) == 0, "Memory leak detected"); 2969 rpmalloc_assert(atomic_load32(&_mapped_pages_os) == 0, "Memory leak detected"); 2970 #endif 2971 2972 _rpmalloc_initialized = 0; 2973 } 2974 2975 //! Initialize thread, assign heap 2976 extern inline void 2977 rpmalloc_thread_initialize(void) { 2978 if (!get_thread_heap_raw()) { 2979 heap_t* heap = _rpmalloc_heap_allocate(0); 2980 if (heap) { 2981 _rpmalloc_stat_inc(&_memory_active_heaps); 2982 set_thread_heap(heap); 2983 #if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK) 2984 FlsSetValue(fls_key, heap); 2985 #endif 2986 } 2987 } 2988 } 2989 2990 //! Finalize thread, orphan heap 2991 void 2992 rpmalloc_thread_finalize(int release_caches) { 2993 heap_t* heap = get_thread_heap_raw(); 2994 if (heap) 2995 _rpmalloc_heap_release_raw(heap, release_caches); 2996 set_thread_heap(0); 2997 #if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK) 2998 FlsSetValue(fls_key, 0); 2999 #endif 3000 } 3001 3002 int 3003 rpmalloc_is_thread_initialized(void) { 3004 return (get_thread_heap_raw() != 0) ? 1 : 0; 3005 } 3006 3007 const rpmalloc_config_t* 3008 rpmalloc_config(void) { 3009 return &_memory_config; 3010 } 3011 3012 // Extern interface 3013 3014 extern inline RPMALLOC_ALLOCATOR void* 3015 rpmalloc(size_t size) { 3016 #if ENABLE_VALIDATE_ARGS 3017 if (size >= MAX_ALLOC_SIZE) { 3018 errno = EINVAL; 3019 return 0; 3020 } 3021 #endif 3022 heap_t* heap = get_thread_heap(); 3023 return _rpmalloc_allocate(heap, size); 3024 } 3025 3026 extern inline void 3027 rpfree(void* ptr) { 3028 _rpmalloc_deallocate(ptr); 3029 } 3030 3031 extern inline RPMALLOC_ALLOCATOR void* 3032 rpcalloc(size_t num, size_t size) { 3033 size_t total; 3034 #if ENABLE_VALIDATE_ARGS 3035 #if PLATFORM_WINDOWS 3036 int err = SizeTMult(num, size, &total); 3037 if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) { 3038 errno = EINVAL; 3039 return 0; 3040 } 3041 #else 3042 int err = __builtin_umull_overflow(num, size, &total); 3043 if (err || (total >= MAX_ALLOC_SIZE)) { 3044 errno = EINVAL; 3045 return 0; 3046 } 3047 #endif 3048 #else 3049 total = num * size; 3050 #endif 3051 heap_t* heap = get_thread_heap(); 3052 void* block = _rpmalloc_allocate(heap, total); 3053 if (block) 3054 memset(block, 0, total); 3055 return block; 3056 } 3057 3058 extern inline RPMALLOC_ALLOCATOR void* 3059 rprealloc(void* ptr, size_t size) { 3060 #if ENABLE_VALIDATE_ARGS 3061 if (size >= MAX_ALLOC_SIZE) { 3062 errno = EINVAL; 3063 return ptr; 3064 } 3065 #endif 3066 heap_t* heap = get_thread_heap(); 3067 return _rpmalloc_reallocate(heap, ptr, size, 0, 0); 3068 } 3069 3070 extern RPMALLOC_ALLOCATOR void* 3071 rpaligned_realloc(void* ptr, size_t alignment, size_t size, size_t oldsize, 3072 unsigned int flags) { 3073 #if ENABLE_VALIDATE_ARGS 3074 if ((size + alignment < size) || (alignment > _memory_page_size)) { 3075 errno = EINVAL; 3076 return 0; 3077 } 3078 #endif 3079 heap_t* heap = get_thread_heap(); 3080 return _rpmalloc_aligned_reallocate(heap, ptr, alignment, size, oldsize, flags); 3081 } 3082 3083 extern RPMALLOC_ALLOCATOR void* 3084 rpaligned_alloc(size_t alignment, size_t size) { 3085 heap_t* heap = get_thread_heap(); 3086 return _rpmalloc_aligned_allocate(heap, alignment, size); 3087 } 3088 3089 extern inline RPMALLOC_ALLOCATOR void* 3090 rpaligned_calloc(size_t alignment, size_t num, size_t size) { 3091 size_t total; 3092 #if ENABLE_VALIDATE_ARGS 3093 #if PLATFORM_WINDOWS 3094 int err = SizeTMult(num, size, &total); 3095 if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) { 3096 errno = EINVAL; 3097 return 0; 3098 } 3099 #else 3100 int err = __builtin_umull_overflow(num, size, &total); 3101 if (err || (total >= MAX_ALLOC_SIZE)) { 3102 errno = EINVAL; 3103 return 0; 3104 } 3105 #endif 3106 #else 3107 total = num * size; 3108 #endif 3109 void* block = rpaligned_alloc(alignment, total); 3110 if (block) 3111 memset(block, 0, total); 3112 return block; 3113 } 3114 3115 extern inline RPMALLOC_ALLOCATOR void* 3116 rpmemalign(size_t alignment, size_t size) { 3117 return rpaligned_alloc(alignment, size); 3118 } 3119 3120 extern inline int 3121 rpposix_memalign(void **memptr, size_t alignment, size_t size) { 3122 if (memptr) 3123 *memptr = rpaligned_alloc(alignment, size); 3124 else 3125 return EINVAL; 3126 return *memptr ? 0 : ENOMEM; 3127 } 3128 3129 extern inline size_t 3130 rpmalloc_usable_size(void* ptr) { 3131 return (ptr ? _rpmalloc_usable_size(ptr) : 0); 3132 } 3133 3134 extern inline void 3135 rpmalloc_thread_collect(void) { 3136 } 3137 3138 void 3139 rpmalloc_thread_statistics(rpmalloc_thread_statistics_t* stats) { 3140 memset(stats, 0, sizeof(rpmalloc_thread_statistics_t)); 3141 heap_t* heap = get_thread_heap_raw(); 3142 if (!heap) 3143 return; 3144 3145 for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) { 3146 size_class_t* size_class = _memory_size_class + iclass; 3147 span_t* span = heap->size_class[iclass].partial_span; 3148 while (span) { 3149 size_t free_count = span->list_size; 3150 size_t block_count = size_class->block_count; 3151 if (span->free_list_limit < block_count) 3152 block_count = span->free_list_limit; 3153 free_count += (block_count - span->used_count); 3154 stats->sizecache = free_count * size_class->block_size; 3155 span = span->next; 3156 } 3157 } 3158 3159 #if ENABLE_THREAD_CACHE 3160 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) { 3161 span_cache_t* span_cache; 3162 if (!iclass) 3163 span_cache = &heap->span_cache; 3164 else 3165 span_cache = (span_cache_t*)(heap->span_large_cache + (iclass - 1)); 3166 stats->spancache = span_cache->count * (iclass + 1) * _memory_span_size; 3167 } 3168 #endif 3169 3170 span_t* deferred = (span_t*)atomic_load_ptr(&heap->span_free_deferred); 3171 while (deferred) { 3172 if (deferred->size_class != SIZE_CLASS_HUGE) 3173 stats->spancache = (size_t)deferred->span_count * _memory_span_size; 3174 deferred = (span_t*)deferred->free_list; 3175 } 3176 3177 #if ENABLE_STATISTICS 3178 stats->thread_to_global = (size_t)atomic_load64(&heap->thread_to_global); 3179 stats->global_to_thread = (size_t)atomic_load64(&heap->global_to_thread); 3180 3181 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) { 3182 stats->span_use[iclass].current = (size_t)atomic_load32(&heap->span_use[iclass].current); 3183 stats->span_use[iclass].peak = (size_t)atomic_load32(&heap->span_use[iclass].high); 3184 stats->span_use[iclass].to_global = (size_t)atomic_load32(&heap->span_use[iclass].spans_to_global); 3185 stats->span_use[iclass].from_global = (size_t)atomic_load32(&heap->span_use[iclass].spans_from_global); 3186 stats->span_use[iclass].to_cache = (size_t)atomic_load32(&heap->span_use[iclass].spans_to_cache); 3187 stats->span_use[iclass].from_cache = (size_t)atomic_load32(&heap->span_use[iclass].spans_from_cache); 3188 stats->span_use[iclass].to_reserved = (size_t)atomic_load32(&heap->span_use[iclass].spans_to_reserved); 3189 stats->span_use[iclass].from_reserved = (size_t)atomic_load32(&heap->span_use[iclass].spans_from_reserved); 3190 stats->span_use[iclass].map_calls = (size_t)atomic_load32(&heap->span_use[iclass].spans_map_calls); 3191 } 3192 for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) { 3193 stats->size_use[iclass].alloc_current = (size_t)atomic_load32(&heap->size_class_use[iclass].alloc_current); 3194 stats->size_use[iclass].alloc_peak = (size_t)heap->size_class_use[iclass].alloc_peak; 3195 stats->size_use[iclass].alloc_total = (size_t)atomic_load32(&heap->size_class_use[iclass].alloc_total); 3196 stats->size_use[iclass].free_total = (size_t)atomic_load32(&heap->size_class_use[iclass].free_total); 3197 stats->size_use[iclass].spans_to_cache = (size_t)atomic_load32(&heap->size_class_use[iclass].spans_to_cache); 3198 stats->size_use[iclass].spans_from_cache = (size_t)atomic_load32(&heap->size_class_use[iclass].spans_from_cache); 3199 stats->size_use[iclass].spans_from_reserved = (size_t)atomic_load32(&heap->size_class_use[iclass].spans_from_reserved); 3200 stats->size_use[iclass].map_calls = (size_t)atomic_load32(&heap->size_class_use[iclass].spans_map_calls); 3201 } 3202 #endif 3203 } 3204 3205 void 3206 rpmalloc_global_statistics(rpmalloc_global_statistics_t* stats) { 3207 memset(stats, 0, sizeof(rpmalloc_global_statistics_t)); 3208 #if ENABLE_STATISTICS 3209 stats->mapped = (size_t)atomic_load32(&_mapped_pages) * _memory_page_size; 3210 stats->mapped_peak = (size_t)_mapped_pages_peak * _memory_page_size; 3211 stats->mapped_total = (size_t)atomic_load32(&_mapped_total) * _memory_page_size; 3212 stats->unmapped_total = (size_t)atomic_load32(&_unmapped_total) * _memory_page_size; 3213 stats->huge_alloc = (size_t)atomic_load32(&_huge_pages_current) * _memory_page_size; 3214 stats->huge_alloc_peak = (size_t)_huge_pages_peak * _memory_page_size; 3215 #endif 3216 #if ENABLE_GLOBAL_CACHE 3217 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) 3218 stats->cached += _memory_span_cache[iclass].count * (iclass + 1) * _memory_span_size; 3219 #endif 3220 } 3221 3222 #if ENABLE_STATISTICS 3223 3224 static void 3225 _memory_heap_dump_statistics(heap_t* heap, void* file) { 3226 fprintf(file, "Heap %d stats:\n", heap->id); 3227 fprintf(file, "Class CurAlloc PeakAlloc TotAlloc TotFree BlkSize BlkCount SpansCur SpansPeak PeakAllocMiB ToCacheMiB FromCacheMiB FromReserveMiB MmapCalls\n"); 3228 for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) { 3229 if (!atomic_load32(&heap->size_class_use[iclass].alloc_total)) 3230 continue; 3231 fprintf(file, "%3u: %10u %10u %10u %10u %8u %8u %8d %9d %13zu %11zu %12zu %14zu %9u\n", (uint32_t)iclass, 3232 atomic_load32(&heap->size_class_use[iclass].alloc_current), 3233 heap->size_class_use[iclass].alloc_peak, 3234 atomic_load32(&heap->size_class_use[iclass].alloc_total), 3235 atomic_load32(&heap->size_class_use[iclass].free_total), 3236 _memory_size_class[iclass].block_size, 3237 _memory_size_class[iclass].block_count, 3238 atomic_load32(&heap->size_class_use[iclass].spans_current), 3239 heap->size_class_use[iclass].spans_peak, 3240 ((size_t)heap->size_class_use[iclass].alloc_peak * (size_t)_memory_size_class[iclass].block_size) / (size_t)(1024 * 1024), 3241 ((size_t)atomic_load32(&heap->size_class_use[iclass].spans_to_cache) * _memory_span_size) / (size_t)(1024 * 1024), 3242 ((size_t)atomic_load32(&heap->size_class_use[iclass].spans_from_cache) * _memory_span_size) / (size_t)(1024 * 1024), 3243 ((size_t)atomic_load32(&heap->size_class_use[iclass].spans_from_reserved) * _memory_span_size) / (size_t)(1024 * 1024), 3244 atomic_load32(&heap->size_class_use[iclass].spans_map_calls)); 3245 } 3246 fprintf(file, "Spans Current Peak Deferred PeakMiB Cached ToCacheMiB FromCacheMiB ToReserveMiB FromReserveMiB ToGlobalMiB FromGlobalMiB MmapCalls\n"); 3247 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) { 3248 if (!atomic_load32(&heap->span_use[iclass].high) && !atomic_load32(&heap->span_use[iclass].spans_map_calls)) 3249 continue; 3250 fprintf(file, "%4u: %8d %8u %8u %8zu %7u %11zu %12zu %12zu %14zu %11zu %13zu %10u\n", (uint32_t)(iclass + 1), 3251 atomic_load32(&heap->span_use[iclass].current), 3252 atomic_load32(&heap->span_use[iclass].high), 3253 atomic_load32(&heap->span_use[iclass].spans_deferred), 3254 ((size_t)atomic_load32(&heap->span_use[iclass].high) * (size_t)_memory_span_size * (iclass + 1)) / (size_t)(1024 * 1024), 3255 #if ENABLE_THREAD_CACHE 3256 (unsigned int)(!iclass ? heap->span_cache.count : heap->span_large_cache[iclass - 1].count), 3257 ((size_t)atomic_load32(&heap->span_use[iclass].spans_to_cache) * (iclass + 1) * _memory_span_size) / (size_t)(1024 * 1024), 3258 ((size_t)atomic_load32(&heap->span_use[iclass].spans_from_cache) * (iclass + 1) * _memory_span_size) / (size_t)(1024 * 1024), 3259 #else 3260 0, (size_t)0, (size_t)0, 3261 #endif 3262 ((size_t)atomic_load32(&heap->span_use[iclass].spans_to_reserved) * (iclass + 1) * _memory_span_size) / (size_t)(1024 * 1024), 3263 ((size_t)atomic_load32(&heap->span_use[iclass].spans_from_reserved) * (iclass + 1) * _memory_span_size) / (size_t)(1024 * 1024), 3264 ((size_t)atomic_load32(&heap->span_use[iclass].spans_to_global) * (size_t)_memory_span_size * (iclass + 1)) / (size_t)(1024 * 1024), 3265 ((size_t)atomic_load32(&heap->span_use[iclass].spans_from_global) * (size_t)_memory_span_size * (iclass + 1)) / (size_t)(1024 * 1024), 3266 atomic_load32(&heap->span_use[iclass].spans_map_calls)); 3267 } 3268 fprintf(file, "Full spans: %zu\n", heap->full_span_count); 3269 fprintf(file, "ThreadToGlobalMiB GlobalToThreadMiB\n"); 3270 fprintf(file, "%17zu %17zu\n", (size_t)atomic_load64(&heap->thread_to_global) / (size_t)(1024 * 1024), (size_t)atomic_load64(&heap->global_to_thread) / (size_t)(1024 * 1024)); 3271 } 3272 3273 #endif 3274 3275 void 3276 rpmalloc_dump_statistics(void* file) { 3277 #if ENABLE_STATISTICS 3278 for (size_t list_idx = 0; list_idx < HEAP_ARRAY_SIZE; ++list_idx) { 3279 heap_t* heap = _memory_heaps[list_idx]; 3280 while (heap) { 3281 int need_dump = 0; 3282 for (size_t iclass = 0; !need_dump && (iclass < SIZE_CLASS_COUNT); ++iclass) { 3283 if (!atomic_load32(&heap->size_class_use[iclass].alloc_total)) { 3284 rpmalloc_assert(!atomic_load32(&heap->size_class_use[iclass].free_total), "Heap statistics counter mismatch"); 3285 rpmalloc_assert(!atomic_load32(&heap->size_class_use[iclass].spans_map_calls), "Heap statistics counter mismatch"); 3286 continue; 3287 } 3288 need_dump = 1; 3289 } 3290 for (size_t iclass = 0; !need_dump && (iclass < LARGE_CLASS_COUNT); ++iclass) { 3291 if (!atomic_load32(&heap->span_use[iclass].high) && !atomic_load32(&heap->span_use[iclass].spans_map_calls)) 3292 continue; 3293 need_dump = 1; 3294 } 3295 if (need_dump) 3296 _memory_heap_dump_statistics(heap, file); 3297 heap = heap->next_heap; 3298 } 3299 } 3300 fprintf(file, "Global stats:\n"); 3301 size_t huge_current = (size_t)atomic_load32(&_huge_pages_current) * _memory_page_size; 3302 size_t huge_peak = (size_t)_huge_pages_peak * _memory_page_size; 3303 fprintf(file, "HugeCurrentMiB HugePeakMiB\n"); 3304 fprintf(file, "%14zu %11zu\n", huge_current / (size_t)(1024 * 1024), huge_peak / (size_t)(1024 * 1024)); 3305 3306 fprintf(file, "GlobalCacheMiB\n"); 3307 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) { 3308 global_cache_t* cache = _memory_span_cache + iclass; 3309 size_t global_cache = (size_t)cache->count * iclass * _memory_span_size; 3310 3311 size_t global_overflow_cache = 0; 3312 span_t* span = cache->overflow; 3313 while (span) { 3314 global_overflow_cache += iclass * _memory_span_size; 3315 span = span->next; 3316 } 3317 if (global_cache || global_overflow_cache || cache->insert_count || cache->extract_count) 3318 fprintf(file, "%4zu: %8zuMiB (%8zuMiB overflow) %14zu insert %14zu extract\n", iclass + 1, global_cache / (size_t)(1024 * 1024), global_overflow_cache / (size_t)(1024 * 1024), cache->insert_count, cache->extract_count); 3319 } 3320 3321 size_t mapped = (size_t)atomic_load32(&_mapped_pages) * _memory_page_size; 3322 size_t mapped_os = (size_t)atomic_load32(&_mapped_pages_os) * _memory_page_size; 3323 size_t mapped_peak = (size_t)_mapped_pages_peak * _memory_page_size; 3324 size_t mapped_total = (size_t)atomic_load32(&_mapped_total) * _memory_page_size; 3325 size_t unmapped_total = (size_t)atomic_load32(&_unmapped_total) * _memory_page_size; 3326 fprintf(file, "MappedMiB MappedOSMiB MappedPeakMiB MappedTotalMiB UnmappedTotalMiB\n"); 3327 fprintf(file, "%9zu %11zu %13zu %14zu %16zu\n", 3328 mapped / (size_t)(1024 * 1024), 3329 mapped_os / (size_t)(1024 * 1024), 3330 mapped_peak / (size_t)(1024 * 1024), 3331 mapped_total / (size_t)(1024 * 1024), 3332 unmapped_total / (size_t)(1024 * 1024)); 3333 3334 fprintf(file, "\n"); 3335 #if 0 3336 int64_t allocated = atomic_load64(&_allocation_counter); 3337 int64_t deallocated = atomic_load64(&_deallocation_counter); 3338 fprintf(file, "Allocation count: %lli\n", allocated); 3339 fprintf(file, "Deallocation count: %lli\n", deallocated); 3340 fprintf(file, "Current allocations: %lli\n", (allocated - deallocated)); 3341 fprintf(file, "Master spans: %d\n", atomic_load32(&_master_spans)); 3342 fprintf(file, "Dangling master spans: %d\n", atomic_load32(&_unmapped_master_spans)); 3343 #endif 3344 #endif 3345 (void)sizeof(file); 3346 } 3347 3348 #if RPMALLOC_FIRST_CLASS_HEAPS 3349 3350 extern inline rpmalloc_heap_t* 3351 rpmalloc_heap_acquire(void) { 3352 // Must be a pristine heap from newly mapped memory pages, or else memory blocks 3353 // could already be allocated from the heap which would (wrongly) be released when 3354 // heap is cleared with rpmalloc_heap_free_all(). Also heaps guaranteed to be 3355 // pristine from the dedicated orphan list can be used. 3356 heap_t* heap = _rpmalloc_heap_allocate(1); 3357 heap->owner_thread = 0; 3358 _rpmalloc_stat_inc(&_memory_active_heaps); 3359 return heap; 3360 } 3361 3362 extern inline void 3363 rpmalloc_heap_release(rpmalloc_heap_t* heap) { 3364 if (heap) 3365 _rpmalloc_heap_release(heap, 1, 1); 3366 } 3367 3368 extern inline RPMALLOC_ALLOCATOR void* 3369 rpmalloc_heap_alloc(rpmalloc_heap_t* heap, size_t size) { 3370 #if ENABLE_VALIDATE_ARGS 3371 if (size >= MAX_ALLOC_SIZE) { 3372 errno = EINVAL; 3373 return 0; 3374 } 3375 #endif 3376 return _rpmalloc_allocate(heap, size); 3377 } 3378 3379 extern inline RPMALLOC_ALLOCATOR void* 3380 rpmalloc_heap_aligned_alloc(rpmalloc_heap_t* heap, size_t alignment, size_t size) { 3381 #if ENABLE_VALIDATE_ARGS 3382 if (size >= MAX_ALLOC_SIZE) { 3383 errno = EINVAL; 3384 return 0; 3385 } 3386 #endif 3387 return _rpmalloc_aligned_allocate(heap, alignment, size); 3388 } 3389 3390 extern inline RPMALLOC_ALLOCATOR void* 3391 rpmalloc_heap_calloc(rpmalloc_heap_t* heap, size_t num, size_t size) { 3392 return rpmalloc_heap_aligned_calloc(heap, 0, num, size); 3393 } 3394 3395 extern inline RPMALLOC_ALLOCATOR void* 3396 rpmalloc_heap_aligned_calloc(rpmalloc_heap_t* heap, size_t alignment, size_t num, size_t size) { 3397 size_t total; 3398 #if ENABLE_VALIDATE_ARGS 3399 #if PLATFORM_WINDOWS 3400 int err = SizeTMult(num, size, &total); 3401 if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) { 3402 errno = EINVAL; 3403 return 0; 3404 } 3405 #else 3406 int err = __builtin_umull_overflow(num, size, &total); 3407 if (err || (total >= MAX_ALLOC_SIZE)) { 3408 errno = EINVAL; 3409 return 0; 3410 } 3411 #endif 3412 #else 3413 total = num * size; 3414 #endif 3415 void* block = _rpmalloc_aligned_allocate(heap, alignment, total); 3416 if (block) 3417 memset(block, 0, total); 3418 return block; 3419 } 3420 3421 extern inline RPMALLOC_ALLOCATOR void* 3422 rpmalloc_heap_realloc(rpmalloc_heap_t* heap, void* ptr, size_t size, unsigned int flags) { 3423 #if ENABLE_VALIDATE_ARGS 3424 if (size >= MAX_ALLOC_SIZE) { 3425 errno = EINVAL; 3426 return ptr; 3427 } 3428 #endif 3429 return _rpmalloc_reallocate(heap, ptr, size, 0, flags); 3430 } 3431 3432 extern inline RPMALLOC_ALLOCATOR void* 3433 rpmalloc_heap_aligned_realloc(rpmalloc_heap_t* heap, void* ptr, size_t alignment, size_t size, unsigned int flags) { 3434 #if ENABLE_VALIDATE_ARGS 3435 if ((size + alignment < size) || (alignment > _memory_page_size)) { 3436 errno = EINVAL; 3437 return 0; 3438 } 3439 #endif 3440 return _rpmalloc_aligned_reallocate(heap, ptr, alignment, size, 0, flags); 3441 } 3442 3443 extern inline void 3444 rpmalloc_heap_free(rpmalloc_heap_t* heap, void* ptr) { 3445 (void)sizeof(heap); 3446 _rpmalloc_deallocate(ptr); 3447 } 3448 3449 extern inline void 3450 rpmalloc_heap_free_all(rpmalloc_heap_t* heap) { 3451 span_t* span; 3452 span_t* next_span; 3453 3454 _rpmalloc_heap_cache_adopt_deferred(heap, 0); 3455 3456 for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) { 3457 span = heap->size_class[iclass].partial_span; 3458 while (span) { 3459 next_span = span->next; 3460 _rpmalloc_heap_cache_insert(heap, span); 3461 span = next_span; 3462 } 3463 heap->size_class[iclass].partial_span = 0; 3464 span = heap->full_span[iclass]; 3465 while (span) { 3466 next_span = span->next; 3467 _rpmalloc_heap_cache_insert(heap, span); 3468 span = next_span; 3469 } 3470 } 3471 memset(heap->size_class, 0, sizeof(heap->size_class)); 3472 memset(heap->full_span, 0, sizeof(heap->full_span)); 3473 3474 span = heap->large_huge_span; 3475 while (span) { 3476 next_span = span->next; 3477 if (UNEXPECTED(span->size_class == SIZE_CLASS_HUGE)) 3478 _rpmalloc_deallocate_huge(span); 3479 else 3480 _rpmalloc_heap_cache_insert(heap, span); 3481 span = next_span; 3482 } 3483 heap->large_huge_span = 0; 3484 heap->full_span_count = 0; 3485 3486 #if ENABLE_THREAD_CACHE 3487 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) { 3488 span_cache_t* span_cache; 3489 if (!iclass) 3490 span_cache = &heap->span_cache; 3491 else 3492 span_cache = (span_cache_t*)(heap->span_large_cache + (iclass - 1)); 3493 if (!span_cache->count) 3494 continue; 3495 #if ENABLE_GLOBAL_CACHE 3496 _rpmalloc_stat_add64(&heap->thread_to_global, span_cache->count * (iclass + 1) * _memory_span_size); 3497 _rpmalloc_stat_add(&heap->span_use[iclass].spans_to_global, span_cache->count); 3498 _rpmalloc_global_cache_insert_spans(span_cache->span, iclass + 1, span_cache->count); 3499 #else 3500 for (size_t ispan = 0; ispan < span_cache->count; ++ispan) 3501 _rpmalloc_span_unmap(span_cache->span[ispan]); 3502 #endif 3503 span_cache->count = 0; 3504 } 3505 #endif 3506 3507 #if ENABLE_STATISTICS 3508 for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) { 3509 atomic_store32(&heap->size_class_use[iclass].alloc_current, 0); 3510 atomic_store32(&heap->size_class_use[iclass].spans_current, 0); 3511 } 3512 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) { 3513 atomic_store32(&heap->span_use[iclass].current, 0); 3514 } 3515 #endif 3516 } 3517 3518 extern inline void 3519 rpmalloc_heap_thread_set_current(rpmalloc_heap_t* heap) { 3520 heap_t* prev_heap = get_thread_heap_raw(); 3521 if (prev_heap != heap) { 3522 set_thread_heap(heap); 3523 if (prev_heap) 3524 rpmalloc_heap_release(prev_heap); 3525 } 3526 } 3527 3528 #endif 3529 3530 #if ENABLE_PRELOAD || ENABLE_OVERRIDE 3531 3532 #include "malloc.c" 3533 3534 #endif 3535 3536 void 3537 rpmalloc_linker_reference(void) { 3538 (void)sizeof(_rpmalloc_initialized); 3539 }