github.com/moontrade/nogc@v0.1.7/alloc/rpmalloc/tinygo/rpmalloc.c (about) 1 /* rpmalloc.c - Memory allocator - Public Domain - 2016-2020 Mattias Jansson 2 * 3 * This library provides a cross-platform lock free thread caching malloc implementation in C11. 4 * The latest source code is always available at 5 * 6 * https://github.com/mjansson/rpmalloc 7 * 8 * This library is put in the public domain; you can redistribute it and/or modify it without any restrictions. 9 * 10 */ 11 12 #include "rpmalloc.h" 13 14 //////////// 15 /// 16 /// Build time configurable limits 17 /// 18 ////// 19 20 #if defined(__clang__) 21 #pragma clang diagnostic ignored "-Wunused-macros" 22 #pragma clang diagnostic ignored "-Wunused-function" 23 #if __has_warning("-Wreserved-identifier") 24 #pragma clang diagnostic ignored "-Wreserved-identifier" 25 #endif 26 #elif defined(__GNUC__) 27 #pragma GCC diagnostic ignored "-Wunused-macros" 28 #pragma GCC diagnostic ignored "-Wunused-function" 29 #endif 30 31 #ifndef HEAP_ARRAY_SIZE 32 //! Size of heap hashmap 33 #define HEAP_ARRAY_SIZE 47 34 #endif 35 #ifndef ENABLE_THREAD_CACHE 36 //! Enable per-thread cache 37 #define ENABLE_THREAD_CACHE 0 38 #endif 39 #ifndef ENABLE_GLOBAL_CACHE 40 //! Enable global cache shared between all threads, requires thread cache 41 #define ENABLE_GLOBAL_CACHE 0 42 #endif 43 #ifndef ENABLE_VALIDATE_ARGS 44 //! Enable validation of args to public entry points 45 #define ENABLE_VALIDATE_ARGS 0 46 #endif 47 #ifndef ENABLE_STATISTICS 48 //! Enable statistics collection 49 #define ENABLE_STATISTICS 0 50 #endif 51 #ifndef ENABLE_ASSERTS 52 //! Enable asserts 53 #define ENABLE_ASSERTS 0 54 #endif 55 #ifndef ENABLE_OVERRIDE 56 //! Override standard library malloc/free and new/delete entry points 57 #define ENABLE_OVERRIDE 1 58 #endif 59 #ifndef ENABLE_PRELOAD 60 //! Support preloading 61 #define ENABLE_PRELOAD 1 62 #endif 63 #ifndef DISABLE_UNMAP 64 //! Disable unmapping memory pages (also enables unlimited cache) 65 #define DISABLE_UNMAP 0 66 #endif 67 #ifndef ENABLE_UNLIMITED_CACHE 68 //! Enable unlimited global cache (no unmapping until finalization) 69 #define ENABLE_UNLIMITED_CACHE 0 70 #endif 71 #ifndef ENABLE_ADAPTIVE_THREAD_CACHE 72 //! Enable adaptive thread cache size based on use heuristics 73 #define ENABLE_ADAPTIVE_THREAD_CACHE 0 74 #endif 75 #ifndef DEFAULT_SPAN_MAP_COUNT 76 //! Default number of spans to map in call to map more virtual memory (default values yield 4MiB here) 77 #define DEFAULT_SPAN_MAP_COUNT 64 78 #endif 79 #ifndef GLOBAL_CACHE_MULTIPLIER 80 //! Multiplier for global cache 81 #define GLOBAL_CACHE_MULTIPLIER 8 82 #endif 83 84 #if DISABLE_UNMAP && !ENABLE_GLOBAL_CACHE 85 #error Must use global cache if unmap is disabled 86 #endif 87 88 #if DISABLE_UNMAP 89 #undef ENABLE_UNLIMITED_CACHE 90 #define ENABLE_UNLIMITED_CACHE 1 91 #endif 92 93 #if !ENABLE_GLOBAL_CACHE 94 #undef ENABLE_UNLIMITED_CACHE 95 #define ENABLE_UNLIMITED_CACHE 0 96 #endif 97 98 #if !ENABLE_THREAD_CACHE 99 #undef ENABLE_ADAPTIVE_THREAD_CACHE 100 #define ENABLE_ADAPTIVE_THREAD_CACHE 0 101 #endif 102 103 #if defined(_WIN32) || defined(__WIN32__) || defined(_WIN64) 104 # define PLATFORM_WINDOWS 1 105 # define PLATFORM_POSIX 0 106 #else 107 # define PLATFORM_WINDOWS 0 108 # define PLATFORM_POSIX 1 109 #endif 110 111 /// Platform and arch specifics 112 #if defined(_MSC_VER) && !defined(__clang__) 113 # pragma warning (disable: 5105) 114 # ifndef FORCEINLINE 115 # define FORCEINLINE inline __forceinline 116 # endif 117 # define _Static_assert static_assert 118 #else 119 # ifndef FORCEINLINE 120 # define FORCEINLINE inline __attribute__((__always_inline__)) 121 # endif 122 #endif 123 #if PLATFORM_WINDOWS 124 # ifndef WIN32_LEAN_AND_MEAN 125 # define WIN32_LEAN_AND_MEAN 126 # endif 127 # include <windows.h> 128 # if ENABLE_VALIDATE_ARGS 129 # include <intsafe.h> 130 # endif 131 #else 132 # include <unistd.h> 133 # include <stdio.h> 134 # include <stdlib.h> 135 # include <time.h> 136 # if defined(__APPLE__) 137 # include <TargetConditionals.h> 138 # if !TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR 139 # include <mach/mach_vm.h> 140 # include <mach/vm_statistics.h> 141 # endif 142 # include <pthread.h> 143 # endif 144 # if defined(__HAIKU__) 145 # include <pthread.h> 146 # endif 147 #endif 148 149 #include <stdint.h> 150 #include <string.h> 151 #include <errno.h> 152 153 #if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK) 154 #include <fibersapi.h> 155 static DWORD fls_key; 156 #endif 157 158 #if PLATFORM_POSIX 159 # include <sys/mman.h> 160 # include <sched.h> 161 # ifdef __FreeBSD__ 162 # include <sys/sysctl.h> 163 # define MAP_HUGETLB MAP_ALIGNED_SUPER 164 # ifndef PROT_MAX 165 # define PROT_MAX(f) 0 166 # endif 167 # else 168 # define PROT_MAX(f) 0 169 # endif 170 # ifdef __sun 171 extern int madvise(caddr_t, size_t, int); 172 # endif 173 # ifndef MAP_UNINITIALIZED 174 # define MAP_UNINITIALIZED 0 175 # endif 176 #endif 177 #include <errno.h> 178 179 #if ENABLE_ASSERTS 180 # undef NDEBUG 181 # if defined(_MSC_VER) && !defined(_DEBUG) 182 # define _DEBUG 183 # endif 184 # include <assert.h> 185 #define RPMALLOC_TOSTRING_M(x) #x 186 #define RPMALLOC_TOSTRING(x) RPMALLOC_TOSTRING_M(x) 187 #define rpmalloc_assert(truth, message) \ 188 do { \ 189 if (!(truth)) { \ 190 if (_memory_config.error_callback) { \ 191 _memory_config.error_callback( \ 192 message " (" RPMALLOC_TOSTRING(truth) ") at " __FILE__ ":" RPMALLOC_TOSTRING(__LINE__)); \ 193 } else { \ 194 assert((truth) && message); \ 195 } \ 196 } \ 197 } while (0) 198 #else 199 # define rpmalloc_assert(truth, message) do {} while(0) 200 #endif 201 #if ENABLE_STATISTICS 202 # include <stdio.h> 203 #endif 204 205 ////// 206 /// 207 /// Atomic access abstraction (since MSVC does not do C11 yet) 208 /// 209 ////// 210 211 #if defined(_MSC_VER) && !defined(__clang__) 212 213 typedef volatile long atomic32_t; 214 typedef volatile long long atomic64_t; 215 typedef volatile void* atomicptr_t; 216 217 static FORCEINLINE int32_t atomic_load32(atomic32_t* src) { return *src; } 218 static FORCEINLINE void atomic_store32(atomic32_t* dst, int32_t val) { *dst = val; } 219 static FORCEINLINE int32_t atomic_incr32(atomic32_t* val) { return (int32_t)InterlockedIncrement(val); } 220 static FORCEINLINE int32_t atomic_decr32(atomic32_t* val) { return (int32_t)InterlockedDecrement(val); } 221 static FORCEINLINE int32_t atomic_add32(atomic32_t* val, int32_t add) { return (int32_t)InterlockedExchangeAdd(val, add) + add; } 222 static FORCEINLINE int atomic_cas32_acquire(atomic32_t* dst, int32_t val, int32_t ref) { return (InterlockedCompareExchange(dst, val, ref) == ref) ? 1 : 0; } 223 static FORCEINLINE void atomic_store32_release(atomic32_t* dst, int32_t val) { *dst = val; } 224 static FORCEINLINE int64_t atomic_load64(atomic64_t* src) { return *src; } 225 static FORCEINLINE int64_t atomic_add64(atomic64_t* val, int64_t add) { return (int64_t)InterlockedExchangeAdd64(val, add) + add; } 226 static FORCEINLINE void* atomic_load_ptr(atomicptr_t* src) { return (void*)*src; } 227 static FORCEINLINE void atomic_store_ptr(atomicptr_t* dst, void* val) { *dst = val; } 228 static FORCEINLINE void atomic_store_ptr_release(atomicptr_t* dst, void* val) { *dst = val; } 229 static FORCEINLINE void* atomic_exchange_ptr_acquire(atomicptr_t* dst, void* val) { return (void*)InterlockedExchangePointer((void* volatile*)dst, val); } 230 static FORCEINLINE int atomic_cas_ptr(atomicptr_t* dst, void* val, void* ref) { return (InterlockedCompareExchangePointer((void* volatile*)dst, val, ref) == ref) ? 1 : 0; } 231 232 #define EXPECTED(x) (x) 233 #define UNEXPECTED(x) (x) 234 235 #else 236 237 #include <stdatomic.h> 238 239 typedef volatile _Atomic(int32_t) atomic32_t; 240 typedef volatile _Atomic(int64_t) atomic64_t; 241 typedef volatile _Atomic(void*) atomicptr_t; 242 243 static FORCEINLINE int32_t atomic_load32(atomic32_t* src) { return atomic_load_explicit(src, memory_order_relaxed); } 244 static FORCEINLINE void atomic_store32(atomic32_t* dst, int32_t val) { atomic_store_explicit(dst, val, memory_order_relaxed); } 245 static FORCEINLINE int32_t atomic_incr32(atomic32_t* val) { return atomic_fetch_add_explicit(val, 1, memory_order_relaxed) + 1; } 246 static FORCEINLINE int32_t atomic_decr32(atomic32_t* val) { return atomic_fetch_add_explicit(val, -1, memory_order_relaxed) - 1; } 247 static FORCEINLINE int32_t atomic_add32(atomic32_t* val, int32_t add) { return atomic_fetch_add_explicit(val, add, memory_order_relaxed) + add; } 248 static FORCEINLINE int atomic_cas32_acquire(atomic32_t* dst, int32_t val, int32_t ref) { return atomic_compare_exchange_weak_explicit(dst, &ref, val, memory_order_acquire, memory_order_relaxed); } 249 static FORCEINLINE void atomic_store32_release(atomic32_t* dst, int32_t val) { atomic_store_explicit(dst, val, memory_order_release); } 250 static FORCEINLINE int64_t atomic_load64(atomic64_t* val) { return atomic_load_explicit(val, memory_order_relaxed); } 251 static FORCEINLINE int64_t atomic_add64(atomic64_t* val, int64_t add) { return atomic_fetch_add_explicit(val, add, memory_order_relaxed) + add; } 252 static FORCEINLINE void* atomic_load_ptr(atomicptr_t* src) { return atomic_load_explicit(src, memory_order_relaxed); } 253 static FORCEINLINE void atomic_store_ptr(atomicptr_t* dst, void* val) { atomic_store_explicit(dst, val, memory_order_relaxed); } 254 static FORCEINLINE void atomic_store_ptr_release(atomicptr_t* dst, void* val) { atomic_store_explicit(dst, val, memory_order_release); } 255 static FORCEINLINE void* atomic_exchange_ptr_acquire(atomicptr_t* dst, void* val) { return atomic_exchange_explicit(dst, val, memory_order_acquire); } 256 static FORCEINLINE int atomic_cas_ptr(atomicptr_t* dst, void* val, void* ref) { return atomic_compare_exchange_weak_explicit(dst, &ref, val, memory_order_relaxed, memory_order_relaxed); } 257 258 #define EXPECTED(x) __builtin_expect((x), 1) 259 #define UNEXPECTED(x) __builtin_expect((x), 0) 260 261 #endif 262 263 //////////// 264 /// 265 /// Statistics related functions (evaluate to nothing when statistics not enabled) 266 /// 267 ////// 268 269 #if ENABLE_STATISTICS 270 # define _rpmalloc_stat_inc(counter) atomic_incr32(counter) 271 # define _rpmalloc_stat_dec(counter) atomic_decr32(counter) 272 # define _rpmalloc_stat_add(counter, value) atomic_add32(counter, (int32_t)(value)) 273 # define _rpmalloc_stat_add64(counter, value) atomic_add64(counter, (int64_t)(value)) 274 # define _rpmalloc_stat_add_peak(counter, value, peak) do { int32_t _cur_count = atomic_add32(counter, (int32_t)(value)); if (_cur_count > (peak)) peak = _cur_count; } while (0) 275 # define _rpmalloc_stat_sub(counter, value) atomic_add32(counter, -(int32_t)(value)) 276 # define _rpmalloc_stat_inc_alloc(heap, class_idx) do { \ 277 int32_t alloc_current = atomic_incr32(&heap->size_class_use[class_idx].alloc_current); \ 278 if (alloc_current > heap->size_class_use[class_idx].alloc_peak) \ 279 heap->size_class_use[class_idx].alloc_peak = alloc_current; \ 280 atomic_incr32(&heap->size_class_use[class_idx].alloc_total); \ 281 } while(0) 282 # define _rpmalloc_stat_inc_free(heap, class_idx) do { \ 283 atomic_decr32(&heap->size_class_use[class_idx].alloc_current); \ 284 atomic_incr32(&heap->size_class_use[class_idx].free_total); \ 285 } while(0) 286 #else 287 # define _rpmalloc_stat_inc(counter) do {} while(0) 288 # define _rpmalloc_stat_dec(counter) do {} while(0) 289 # define _rpmalloc_stat_add(counter, value) do {} while(0) 290 # define _rpmalloc_stat_add64(counter, value) do {} while(0) 291 # define _rpmalloc_stat_add_peak(counter, value, peak) do {} while (0) 292 # define _rpmalloc_stat_sub(counter, value) do {} while(0) 293 # define _rpmalloc_stat_inc_alloc(heap, class_idx) do {} while(0) 294 # define _rpmalloc_stat_inc_free(heap, class_idx) do {} while(0) 295 #endif 296 297 298 /// 299 /// Preconfigured limits and sizes 300 /// 301 302 //! Granularity of a small allocation block (must be power of two) 303 #define SMALL_GRANULARITY 16 304 //! Small granularity shift count 305 #define SMALL_GRANULARITY_SHIFT 4 306 //! Number of small block size classes 307 #define SMALL_CLASS_COUNT 65 308 //! Maximum size of a small block 309 #define SMALL_SIZE_LIMIT (SMALL_GRANULARITY * (SMALL_CLASS_COUNT - 1)) 310 //! Granularity of a medium allocation block 311 #define MEDIUM_GRANULARITY 512 312 //! Medium granularity shift count 313 #define MEDIUM_GRANULARITY_SHIFT 9 314 //! Number of medium block size classes 315 #define MEDIUM_CLASS_COUNT 61 316 //! Total number of small + medium size classes 317 #define SIZE_CLASS_COUNT (SMALL_CLASS_COUNT + MEDIUM_CLASS_COUNT) 318 //! Number of large block size classes 319 #define LARGE_CLASS_COUNT 63 320 //! Maximum size of a medium block 321 #define MEDIUM_SIZE_LIMIT (SMALL_SIZE_LIMIT + (MEDIUM_GRANULARITY * MEDIUM_CLASS_COUNT)) 322 //! Maximum size of a large block 323 #define LARGE_SIZE_LIMIT ((LARGE_CLASS_COUNT * _memory_span_size) - SPAN_HEADER_SIZE) 324 //! Size of a span header (must be a multiple of SMALL_GRANULARITY and a power of two) 325 #define SPAN_HEADER_SIZE 128 326 //! Number of spans in thread cache 327 #define MAX_THREAD_SPAN_CACHE 400 328 //! Number of spans to transfer between thread and global cache 329 #define THREAD_SPAN_CACHE_TRANSFER 64 330 //! Number of spans in thread cache for large spans (must be greater than LARGE_CLASS_COUNT / 2) 331 #define MAX_THREAD_SPAN_LARGE_CACHE 100 332 //! Number of spans to transfer between thread and global cache for large spans 333 #define THREAD_SPAN_LARGE_CACHE_TRANSFER 6 334 335 _Static_assert((SMALL_GRANULARITY & (SMALL_GRANULARITY - 1)) == 0, "Small granularity must be power of two"); 336 _Static_assert((SPAN_HEADER_SIZE & (SPAN_HEADER_SIZE - 1)) == 0, "Span header size must be power of two"); 337 338 #if ENABLE_VALIDATE_ARGS 339 //! Maximum allocation size to avoid integer overflow 340 #undef MAX_ALLOC_SIZE 341 #define MAX_ALLOC_SIZE (((size_t)-1) - _memory_span_size) 342 #endif 343 344 #define pointer_offset(ptr, ofs) (void*)((char*)(ptr) + (ptrdiff_t)(ofs)) 345 #define pointer_diff(first, second) (ptrdiff_t)((const char*)(first) - (const char*)(second)) 346 347 #define INVALID_POINTER ((void*)((uintptr_t)-1)) 348 349 #define SIZE_CLASS_LARGE SIZE_CLASS_COUNT 350 #define SIZE_CLASS_HUGE ((uint32_t)-1) 351 352 //////////// 353 /// 354 /// Data types 355 /// 356 ////// 357 358 //! A memory heap, per thread 359 typedef struct heap_t heap_t; 360 //! Span of memory pages 361 typedef struct span_t span_t; 362 //! Span list 363 typedef struct span_list_t span_list_t; 364 //! Span active data 365 typedef struct span_active_t span_active_t; 366 //! Size class definition 367 typedef struct size_class_t size_class_t; 368 //! Global cache 369 typedef struct global_cache_t global_cache_t; 370 371 //! Flag indicating span is the first (master) span of a split superspan 372 #define SPAN_FLAG_MASTER 1U 373 //! Flag indicating span is a secondary (sub) span of a split superspan 374 #define SPAN_FLAG_SUBSPAN 2U 375 //! Flag indicating span has blocks with increased alignment 376 #define SPAN_FLAG_ALIGNED_BLOCKS 4U 377 //! Flag indicating an unmapped master span 378 #define SPAN_FLAG_UNMAPPED_MASTER 8U 379 380 #if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS 381 struct span_use_t { 382 //! Current number of spans used (actually used, not in cache) 383 atomic32_t current; 384 //! High water mark of spans used 385 atomic32_t high; 386 #if ENABLE_STATISTICS 387 //! Number of spans in deferred list 388 atomic32_t spans_deferred; 389 //! Number of spans transitioned to global cache 390 atomic32_t spans_to_global; 391 //! Number of spans transitioned from global cache 392 atomic32_t spans_from_global; 393 //! Number of spans transitioned to thread cache 394 atomic32_t spans_to_cache; 395 //! Number of spans transitioned from thread cache 396 atomic32_t spans_from_cache; 397 //! Number of spans transitioned to reserved state 398 atomic32_t spans_to_reserved; 399 //! Number of spans transitioned from reserved state 400 atomic32_t spans_from_reserved; 401 //! Number of raw memory map calls 402 atomic32_t spans_map_calls; 403 #endif 404 }; 405 typedef struct span_use_t span_use_t; 406 #endif 407 408 #if ENABLE_STATISTICS 409 struct size_class_use_t { 410 //! Current number of allocations 411 atomic32_t alloc_current; 412 //! Peak number of allocations 413 int32_t alloc_peak; 414 //! Total number of allocations 415 atomic32_t alloc_total; 416 //! Total number of frees 417 atomic32_t free_total; 418 //! Number of spans in use 419 atomic32_t spans_current; 420 //! Number of spans transitioned to cache 421 int32_t spans_peak; 422 //! Number of spans transitioned to cache 423 atomic32_t spans_to_cache; 424 //! Number of spans transitioned from cache 425 atomic32_t spans_from_cache; 426 //! Number of spans transitioned from reserved state 427 atomic32_t spans_from_reserved; 428 //! Number of spans mapped 429 atomic32_t spans_map_calls; 430 int32_t unused; 431 }; 432 typedef struct size_class_use_t size_class_use_t; 433 #endif 434 435 // A span can either represent a single span of memory pages with size declared by span_map_count configuration variable, 436 // or a set of spans in a continuous region, a super span. Any reference to the term "span" usually refers to both a single 437 // span or a super span. A super span can further be divided into multiple spans (or this, super spans), where the first 438 // (super)span is the master and subsequent (super)spans are subspans. The master span keeps track of how many subspans 439 // that are still alive and mapped in virtual memory, and once all subspans and master have been unmapped the entire 440 // superspan region is released and unmapped (on Windows for example, the entire superspan range has to be released 441 // in the same call to release the virtual memory range, but individual subranges can be decommitted individually 442 // to reduce physical memory use). 443 struct span_t { 444 //! Free list 445 void* free_list; 446 //! Total block count of size class 447 uint32_t block_count; 448 //! Size class 449 uint32_t size_class; 450 //! Index of last block initialized in free list 451 uint32_t free_list_limit; 452 //! Number of used blocks remaining when in partial state 453 uint32_t used_count; 454 //! Deferred free list 455 atomicptr_t free_list_deferred; 456 //! Size of deferred free list, or list of spans when part of a cache list 457 uint32_t list_size; 458 //! Size of a block 459 uint32_t block_size; 460 //! Flags and counters 461 uint32_t flags; 462 //! Number of spans 463 uint32_t span_count; 464 //! Total span counter for master spans 465 uint32_t total_spans; 466 //! Offset from master span for subspans 467 uint32_t offset_from_master; 468 //! Remaining span counter, for master spans 469 atomic32_t remaining_spans; 470 //! Alignment offset 471 uint32_t align_offset; 472 //! Owning heap 473 heap_t* heap; 474 //! Next span 475 span_t* next; 476 //! Previous span 477 span_t* prev; 478 }; 479 _Static_assert(sizeof(span_t) <= SPAN_HEADER_SIZE, "span size mismatch"); 480 481 struct span_cache_t { 482 size_t count; 483 span_t* span[MAX_THREAD_SPAN_CACHE]; 484 }; 485 typedef struct span_cache_t span_cache_t; 486 487 struct span_large_cache_t { 488 size_t count; 489 span_t* span[MAX_THREAD_SPAN_LARGE_CACHE]; 490 }; 491 typedef struct span_large_cache_t span_large_cache_t; 492 493 struct heap_size_class_t { 494 //! Free list of active span 495 void* free_list; 496 //! Double linked list of partially used spans with free blocks. 497 // Previous span pointer in head points to tail span of list. 498 span_t* partial_span; 499 //! Early level cache of fully free spans 500 span_t* cache; 501 }; 502 typedef struct heap_size_class_t heap_size_class_t; 503 504 // Control structure for a heap, either a thread heap or a first class heap if enabled 505 struct heap_t { 506 //! Owning thread ID 507 uintptr_t owner_thread; 508 //! Free lists for each size class 509 heap_size_class_t size_class[SIZE_CLASS_COUNT]; 510 #if ENABLE_THREAD_CACHE 511 //! Arrays of fully freed spans, single span 512 span_cache_t span_cache; 513 #endif 514 //! List of deferred free spans (single linked list) 515 atomicptr_t span_free_deferred; 516 //! Number of full spans 517 size_t full_span_count; 518 //! Mapped but unused spans 519 span_t* span_reserve; 520 //! Master span for mapped but unused spans 521 span_t* span_reserve_master; 522 //! Number of mapped but unused spans 523 uint32_t spans_reserved; 524 //! Child count 525 atomic32_t child_count; 526 //! Next heap in id list 527 heap_t* next_heap; 528 //! Next heap in orphan list 529 heap_t* next_orphan; 530 //! Heap ID 531 int32_t id; 532 //! Finalization state flag 533 int finalize; 534 //! Master heap owning the memory pages 535 heap_t* master_heap; 536 #if ENABLE_THREAD_CACHE 537 //! Arrays of fully freed spans, large spans with > 1 span count 538 span_large_cache_t span_large_cache[LARGE_CLASS_COUNT - 1]; 539 #endif 540 #if RPMALLOC_FIRST_CLASS_HEAPS 541 //! Double linked list of fully utilized spans with free blocks for each size class. 542 // Previous span pointer in head points to tail span of list. 543 span_t* full_span[SIZE_CLASS_COUNT]; 544 //! Double linked list of large and huge spans allocated by this heap 545 span_t* large_huge_span; 546 #endif 547 #if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS 548 //! Current and high water mark of spans used per span count 549 span_use_t span_use[LARGE_CLASS_COUNT]; 550 #endif 551 #if ENABLE_STATISTICS 552 //! Allocation stats per size class 553 size_class_use_t size_class_use[SIZE_CLASS_COUNT + 1]; 554 //! Number of bytes transitioned thread -> global 555 atomic64_t thread_to_global; 556 //! Number of bytes transitioned global -> thread 557 atomic64_t global_to_thread; 558 #endif 559 }; 560 561 // Size class for defining a block size bucket 562 struct size_class_t { 563 //! Size of blocks in this class 564 uint32_t block_size; 565 //! Number of blocks in each chunk 566 uint16_t block_count; 567 //! Class index this class is merged with 568 uint16_t class_idx; 569 }; 570 _Static_assert(sizeof(size_class_t) == 8, "Size class size mismatch"); 571 572 struct global_cache_t { 573 //! Cache lock 574 atomic32_t lock; 575 //! Cache count 576 uint32_t count; 577 #if ENABLE_STATISTICS 578 //! Insert count 579 size_t insert_count; 580 //! Extract count 581 size_t extract_count; 582 #endif 583 //! Cached spans 584 span_t* span[GLOBAL_CACHE_MULTIPLIER * MAX_THREAD_SPAN_CACHE]; 585 //! Unlimited cache overflow 586 span_t* overflow; 587 }; 588 589 //////////// 590 /// 591 /// Global data 592 /// 593 ////// 594 595 //! Default span size (64KiB) 596 #define _memory_default_span_size (64 * 1024) 597 #define _memory_default_span_size_shift 16 598 #define _memory_default_span_mask (~((uintptr_t)(_memory_span_size - 1))) 599 600 //! Initialized flag 601 static int _rpmalloc_initialized; 602 //! Main thread ID 603 static uintptr_t _rpmalloc_main_thread_id; 604 //! Configuration 605 static rpmalloc_config_t _memory_config; 606 //! Memory page size 607 static size_t _memory_page_size; 608 //! Shift to divide by page size 609 static size_t _memory_page_size_shift; 610 //! Granularity at which memory pages are mapped by OS 611 static size_t _memory_map_granularity; 612 #if RPMALLOC_CONFIGURABLE 613 //! Size of a span of memory pages 614 static size_t _memory_span_size; 615 //! Shift to divide by span size 616 static size_t _memory_span_size_shift; 617 //! Mask to get to start of a memory span 618 static uintptr_t _memory_span_mask; 619 #else 620 //! Hardwired span size 621 #define _memory_span_size _memory_default_span_size 622 #define _memory_span_size_shift _memory_default_span_size_shift 623 #define _memory_span_mask _memory_default_span_mask 624 #endif 625 //! Number of spans to map in each map call 626 static size_t _memory_span_map_count; 627 //! Number of spans to keep reserved in each heap 628 static size_t _memory_heap_reserve_count; 629 //! Global size classes 630 static size_class_t _memory_size_class[SIZE_CLASS_COUNT]; 631 //! Run-time size limit of medium blocks 632 static size_t _memory_medium_size_limit; 633 //! Heap ID counter 634 static atomic32_t _memory_heap_id; 635 //! Huge page support 636 static int _memory_huge_pages; 637 #if ENABLE_GLOBAL_CACHE 638 //! Global span cache 639 static global_cache_t _memory_span_cache[LARGE_CLASS_COUNT]; 640 #endif 641 //! Global reserved spans 642 static span_t* _memory_global_reserve; 643 //! Global reserved count 644 static size_t _memory_global_reserve_count; 645 //! Global reserved master 646 static span_t* _memory_global_reserve_master; 647 //! All heaps 648 static heap_t* _memory_heaps[HEAP_ARRAY_SIZE]; 649 //! Used to restrict access to mapping memory for huge pages 650 static atomic32_t _memory_global_lock; 651 //! Orphaned heaps 652 static heap_t* _memory_orphan_heaps; 653 #if RPMALLOC_FIRST_CLASS_HEAPS 654 //! Orphaned heaps (first class heaps) 655 static heap_t* _memory_first_class_orphan_heaps; 656 #endif 657 #if ENABLE_STATISTICS 658 //! Allocations counter 659 static atomic64_t _allocation_counter; 660 //! Deallocations counter 661 static atomic64_t _deallocation_counter; 662 //! Active heap count 663 static atomic32_t _memory_active_heaps; 664 //! Number of currently mapped memory pages 665 static atomic32_t _mapped_pages; 666 //! Peak number of concurrently mapped memory pages 667 static int32_t _mapped_pages_peak; 668 //! Number of mapped master spans 669 static atomic32_t _master_spans; 670 //! Number of unmapped dangling master spans 671 static atomic32_t _unmapped_master_spans; 672 //! Running counter of total number of mapped memory pages since start 673 static atomic32_t _mapped_total; 674 //! Running counter of total number of unmapped memory pages since start 675 static atomic32_t _unmapped_total; 676 //! Number of currently mapped memory pages in OS calls 677 static atomic32_t _mapped_pages_os; 678 //! Number of currently allocated pages in huge allocations 679 static atomic32_t _huge_pages_current; 680 //! Peak number of currently allocated pages in huge allocations 681 static int32_t _huge_pages_peak; 682 #endif 683 684 //////////// 685 /// 686 /// Thread local heap and ID 687 /// 688 ////// 689 690 //! Current thread heap 691 #if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD 692 static pthread_key_t _memory_thread_heap; 693 #else 694 # ifdef _MSC_VER 695 # define _Thread_local __declspec(thread) 696 # define TLS_MODEL 697 # else 698 # ifndef __HAIKU__ 699 # define TLS_MODEL __attribute__((tls_model("initial-exec"))) 700 # else 701 # define TLS_MODEL 702 # endif 703 # if !defined(__clang__) && defined(__GNUC__) 704 # define _Thread_local __thread 705 # endif 706 # endif 707 static _Thread_local heap_t* _memory_thread_heap TLS_MODEL; 708 #endif 709 710 static inline heap_t* 711 get_thread_heap_raw(void) { 712 #if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD 713 return pthread_getspecific(_memory_thread_heap); 714 #else 715 return _memory_thread_heap; 716 #endif 717 } 718 719 //! Get the current thread heap 720 static inline heap_t* 721 get_thread_heap(void) { 722 heap_t* heap = get_thread_heap_raw(); 723 #if ENABLE_PRELOAD 724 if (EXPECTED(heap != 0)) 725 return heap; 726 rpmalloc_initialize(); 727 return get_thread_heap_raw(); 728 #else 729 return heap; 730 #endif 731 } 732 733 //! Fast thread ID 734 static inline uintptr_t 735 get_thread_id(void) { 736 #if defined(_WIN32) 737 return (uintptr_t)((void*)NtCurrentTeb()); 738 #elif defined(__GNUC__) || defined(__clang__) 739 uintptr_t tid; 740 # if defined(__i386__) 741 __asm__("movl %%gs:0, %0" : "=r" (tid) : : ); 742 # elif defined(__x86_64__) 743 # if defined(__MACH__) 744 __asm__("movq %%gs:0, %0" : "=r" (tid) : : ); 745 # else 746 __asm__("movq %%fs:0, %0" : "=r" (tid) : : ); 747 # endif 748 # elif defined(__arm__) 749 __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3" : "=r" (tid)); 750 # elif defined(__aarch64__) 751 # if defined(__MACH__) 752 // tpidr_el0 likely unused, always return 0 on iOS 753 __asm__ volatile ("mrs %0, tpidrro_el0" : "=r" (tid)); 754 # else 755 __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tid)); 756 # endif 757 # else 758 tid = (uintptr_t)((void*)get_thread_heap_raw()); 759 # endif 760 return tid; 761 #else 762 return (uintptr_t)((void*)get_thread_heap_raw()); 763 #endif 764 } 765 766 //! Set the current thread heap 767 static void 768 set_thread_heap(heap_t* heap) { 769 #if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD 770 pthread_setspecific(_memory_thread_heap, heap); 771 #else 772 _memory_thread_heap = heap; 773 #endif 774 if (heap) 775 heap->owner_thread = get_thread_id(); 776 } 777 778 //! Set main thread ID 779 extern void 780 rpmalloc_set_main_thread(void); 781 782 void 783 rpmalloc_set_main_thread(void) { 784 _rpmalloc_main_thread_id = get_thread_id(); 785 } 786 787 static void 788 _rpmalloc_spin(void) { 789 #if defined(_MSC_VER) 790 _mm_pause(); 791 #elif defined(__x86_64__) || defined(__i386__) 792 __asm__ volatile("pause" ::: "memory"); 793 #elif defined(__aarch64__) || (defined(__arm__) && __ARM_ARCH >= 7) 794 __asm__ volatile("yield" ::: "memory"); 795 #elif defined(__powerpc__) || defined(__powerpc64__) 796 // No idea if ever been compiled in such archs but ... as precaution 797 __asm__ volatile("or 27,27,27"); 798 #elif defined(__sparc__) 799 __asm__ volatile("rd %ccr, %g0 \n\trd %ccr, %g0 \n\trd %ccr, %g0"); 800 #else 801 struct timespec ts = {0}; 802 nanosleep(&ts, 0); 803 #endif 804 } 805 806 #if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK) 807 static void NTAPI 808 _rpmalloc_thread_destructor(void* value) { 809 #if ENABLE_OVERRIDE 810 // If this is called on main thread it means rpmalloc_finalize 811 // has not been called and shutdown is forced (through _exit) or unclean 812 if (get_thread_id() == _rpmalloc_main_thread_id) 813 return; 814 #endif 815 if (value) 816 rpmalloc_thread_finalize(1); 817 } 818 #endif 819 820 821 //////////// 822 /// 823 /// Low level memory map/unmap 824 /// 825 ////// 826 827 //! Map more virtual memory 828 // size is number of bytes to map 829 // offset receives the offset in bytes from start of mapped region 830 // returns address to start of mapped region to use 831 static void* 832 _rpmalloc_mmap(size_t size, size_t* offset) { 833 rpmalloc_assert(!(size % _memory_page_size), "Invalid mmap size"); 834 rpmalloc_assert(size >= _memory_page_size, "Invalid mmap size"); 835 _rpmalloc_stat_add_peak(&_mapped_pages, (size >> _memory_page_size_shift), _mapped_pages_peak); 836 _rpmalloc_stat_add(&_mapped_total, (size >> _memory_page_size_shift)); 837 return _memory_config.memory_map(size, offset); 838 } 839 840 //! Unmap virtual memory 841 // address is the memory address to unmap, as returned from _memory_map 842 // size is the number of bytes to unmap, which might be less than full region for a partial unmap 843 // offset is the offset in bytes to the actual mapped region, as set by _memory_map 844 // release is set to 0 for partial unmap, or size of entire range for a full unmap 845 static void 846 _rpmalloc_unmap(void* address, size_t size, size_t offset, size_t release) { 847 rpmalloc_assert(!release || (release >= size), "Invalid unmap size"); 848 rpmalloc_assert(!release || (release >= _memory_page_size), "Invalid unmap size"); 849 if (release) { 850 rpmalloc_assert(!(release % _memory_page_size), "Invalid unmap size"); 851 _rpmalloc_stat_sub(&_mapped_pages, (release >> _memory_page_size_shift)); 852 _rpmalloc_stat_add(&_unmapped_total, (release >> _memory_page_size_shift)); 853 } 854 _memory_config.memory_unmap(address, size, offset, release); 855 } 856 857 //! Default implementation to map new pages to virtual memory 858 static void* 859 _rpmalloc_mmap_os(size_t size, size_t* offset) { 860 //Either size is a heap (a single page) or a (multiple) span - we only need to align spans, and only if larger than map granularity 861 size_t padding = ((size >= _memory_span_size) && (_memory_span_size > _memory_map_granularity)) ? _memory_span_size : 0; 862 rpmalloc_assert(size >= _memory_page_size, "Invalid mmap size"); 863 #if PLATFORM_WINDOWS 864 //Ok to MEM_COMMIT - according to MSDN, "actual physical pages are not allocated unless/until the virtual addresses are actually accessed" 865 void* ptr = VirtualAlloc(0, size + padding, (_memory_huge_pages ? MEM_LARGE_PAGES : 0) | MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); 866 if (!ptr) { 867 if (_memory_config.map_fail_callback) { 868 if (_memory_config.map_fail_callback(size + padding)) 869 return _rpmalloc_mmap_os(size, offset); 870 } else { 871 rpmalloc_assert(ptr, "Failed to map virtual memory block"); 872 } 873 return 0; 874 } 875 #else 876 int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_UNINITIALIZED; 877 # if defined(__APPLE__) && !TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR 878 int fd = (int)VM_MAKE_TAG(240U); 879 if (_memory_huge_pages) 880 fd |= VM_FLAGS_SUPERPAGE_SIZE_2MB; 881 void* ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, flags, fd, 0); 882 # elif defined(MAP_HUGETLB) 883 void* ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE | PROT_MAX(PROT_READ | PROT_WRITE), (_memory_huge_pages ? MAP_HUGETLB : 0) | flags, -1, 0); 884 # elif defined(MAP_ALIGNED) 885 const size_t align = (sizeof(size_t) * 8) - (size_t)(__builtin_clzl(size - 1)); 886 void* ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, (_memory_huge_pages ? MAP_ALIGNED(align) : 0) | flags, -1, 0); 887 # elif defined(MAP_ALIGN) 888 caddr_t base = (_memory_huge_pages ? (caddr_t)(4 << 20) : 0); 889 void* ptr = mmap(base, size + padding, PROT_READ | PROT_WRITE, (_memory_huge_pages ? MAP_ALIGN : 0) | flags, -1, 0); 890 # else 891 void* ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, flags, -1, 0); 892 # endif 893 if ((ptr == MAP_FAILED) || !ptr) { 894 if (_memory_config.map_fail_callback) { 895 if (_memory_config.map_fail_callback(size + padding)) 896 return _rpmalloc_mmap_os(size, offset); 897 } else if (errno != ENOMEM) { 898 rpmalloc_assert((ptr != MAP_FAILED) && ptr, "Failed to map virtual memory block"); 899 } 900 return 0; 901 } 902 #endif 903 _rpmalloc_stat_add(&_mapped_pages_os, (int32_t)((size + padding) >> _memory_page_size_shift)); 904 if (padding) { 905 size_t final_padding = padding - ((uintptr_t)ptr & ~_memory_span_mask); 906 rpmalloc_assert(final_padding <= _memory_span_size, "Internal failure in padding"); 907 rpmalloc_assert(final_padding <= padding, "Internal failure in padding"); 908 rpmalloc_assert(!(final_padding % 8), "Internal failure in padding"); 909 ptr = pointer_offset(ptr, final_padding); 910 *offset = final_padding >> 3; 911 } 912 rpmalloc_assert((size < _memory_span_size) || !((uintptr_t)ptr & ~_memory_span_mask), "Internal failure in padding"); 913 return ptr; 914 } 915 916 //! Default implementation to unmap pages from virtual memory 917 static void 918 _rpmalloc_unmap_os(void* address, size_t size, size_t offset, size_t release) { 919 rpmalloc_assert(release || (offset == 0), "Invalid unmap size"); 920 rpmalloc_assert(!release || (release >= _memory_page_size), "Invalid unmap size"); 921 rpmalloc_assert(size >= _memory_page_size, "Invalid unmap size"); 922 if (release && offset) { 923 offset <<= 3; 924 address = pointer_offset(address, -(int32_t)offset); 925 if ((release >= _memory_span_size) && (_memory_span_size > _memory_map_granularity)) { 926 //Padding is always one span size 927 release += _memory_span_size; 928 } 929 } 930 #if !DISABLE_UNMAP 931 #if PLATFORM_WINDOWS 932 if (!VirtualFree(address, release ? 0 : size, release ? MEM_RELEASE : MEM_DECOMMIT)) { 933 rpmalloc_assert(0, "Failed to unmap virtual memory block"); 934 } 935 #else 936 if (release) { 937 if (munmap(address, release)) { 938 rpmalloc_assert(0, "Failed to unmap virtual memory block"); 939 } 940 } else { 941 #if defined(MADV_FREE_REUSABLE) 942 int ret; 943 while ((ret = madvise(address, size, MADV_FREE_REUSABLE)) == -1 && (errno == EAGAIN)) 944 errno = 0; 945 if ((ret == -1) && (errno != 0)) { 946 #elif defined(MADV_DONTNEED) 947 if (madvise(address, size, MADV_DONTNEED)) { 948 #elif defined(MADV_PAGEOUT) 949 if (madvise(address, size, MADV_PAGEOUT)) { 950 #elif defined(MADV_FREE) 951 if (madvise(address, size, MADV_FREE)) { 952 #else 953 if (posix_madvise(address, size, POSIX_MADV_DONTNEED)) { 954 #endif 955 rpmalloc_assert(0, "Failed to madvise virtual memory block as free"); 956 } 957 } 958 #endif 959 #endif 960 if (release) 961 _rpmalloc_stat_sub(&_mapped_pages_os, release >> _memory_page_size_shift); 962 } 963 964 static void 965 _rpmalloc_span_mark_as_subspan_unless_master(span_t* master, span_t* subspan, size_t span_count); 966 967 //! Use global reserved spans to fulfill a memory map request (reserve size must be checked by caller) 968 static span_t* 969 _rpmalloc_global_get_reserved_spans(size_t span_count) { 970 span_t* span = _memory_global_reserve; 971 _rpmalloc_span_mark_as_subspan_unless_master(_memory_global_reserve_master, span, span_count); 972 _memory_global_reserve_count -= span_count; 973 if (_memory_global_reserve_count) 974 _memory_global_reserve = (span_t*)pointer_offset(span, span_count << _memory_span_size_shift); 975 else 976 _memory_global_reserve = 0; 977 return span; 978 } 979 980 //! Store the given spans as global reserve (must only be called from within new heap allocation, not thread safe) 981 static void 982 _rpmalloc_global_set_reserved_spans(span_t* master, span_t* reserve, size_t reserve_span_count) { 983 _memory_global_reserve_master = master; 984 _memory_global_reserve_count = reserve_span_count; 985 _memory_global_reserve = reserve; 986 } 987 988 989 //////////// 990 /// 991 /// Span linked list management 992 /// 993 ////// 994 995 //! Add a span to double linked list at the head 996 static void 997 _rpmalloc_span_double_link_list_add(span_t** head, span_t* span) { 998 if (*head) 999 (*head)->prev = span; 1000 span->next = *head; 1001 *head = span; 1002 } 1003 1004 //! Pop head span from double linked list 1005 static void 1006 _rpmalloc_span_double_link_list_pop_head(span_t** head, span_t* span) { 1007 rpmalloc_assert(*head == span, "Linked list corrupted"); 1008 span = *head; 1009 *head = span->next; 1010 } 1011 1012 //! Remove a span from double linked list 1013 static void 1014 _rpmalloc_span_double_link_list_remove(span_t** head, span_t* span) { 1015 rpmalloc_assert(*head, "Linked list corrupted"); 1016 if (*head == span) { 1017 *head = span->next; 1018 } else { 1019 span_t* next_span = span->next; 1020 span_t* prev_span = span->prev; 1021 prev_span->next = next_span; 1022 if (EXPECTED(next_span != 0)) 1023 next_span->prev = prev_span; 1024 } 1025 } 1026 1027 1028 //////////// 1029 /// 1030 /// Span control 1031 /// 1032 ////// 1033 1034 static void 1035 _rpmalloc_heap_cache_insert(heap_t* heap, span_t* span); 1036 1037 static void 1038 _rpmalloc_heap_finalize(heap_t* heap); 1039 1040 static void 1041 _rpmalloc_heap_set_reserved_spans(heap_t* heap, span_t* master, span_t* reserve, size_t reserve_span_count); 1042 1043 //! Declare the span to be a subspan and store distance from master span and span count 1044 static void 1045 _rpmalloc_span_mark_as_subspan_unless_master(span_t* master, span_t* subspan, size_t span_count) { 1046 rpmalloc_assert((subspan != master) || (subspan->flags & SPAN_FLAG_MASTER), "Span master pointer and/or flag mismatch"); 1047 if (subspan != master) { 1048 subspan->flags = SPAN_FLAG_SUBSPAN; 1049 subspan->offset_from_master = (uint32_t)((uintptr_t)pointer_diff(subspan, master) >> _memory_span_size_shift); 1050 subspan->align_offset = 0; 1051 } 1052 subspan->span_count = (uint32_t)span_count; 1053 } 1054 1055 //! Use reserved spans to fulfill a memory map request (reserve size must be checked by caller) 1056 static span_t* 1057 _rpmalloc_span_map_from_reserve(heap_t* heap, size_t span_count) { 1058 //Update the heap span reserve 1059 span_t* span = heap->span_reserve; 1060 heap->span_reserve = (span_t*)pointer_offset(span, span_count * _memory_span_size); 1061 heap->spans_reserved -= (uint32_t)span_count; 1062 1063 _rpmalloc_span_mark_as_subspan_unless_master(heap->span_reserve_master, span, span_count); 1064 if (span_count <= LARGE_CLASS_COUNT) 1065 _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_from_reserved); 1066 1067 return span; 1068 } 1069 1070 //! Get the aligned number of spans to map in based on wanted count, configured mapping granularity and the page size 1071 static size_t 1072 _rpmalloc_span_align_count(size_t span_count) { 1073 size_t request_count = (span_count > _memory_span_map_count) ? span_count : _memory_span_map_count; 1074 if ((_memory_page_size > _memory_span_size) && ((request_count * _memory_span_size) % _memory_page_size)) 1075 request_count += _memory_span_map_count - (request_count % _memory_span_map_count); 1076 return request_count; 1077 } 1078 1079 //! Setup a newly mapped span 1080 static void 1081 _rpmalloc_span_initialize(span_t* span, size_t total_span_count, size_t span_count, size_t align_offset) { 1082 span->total_spans = (uint32_t)total_span_count; 1083 span->span_count = (uint32_t)span_count; 1084 span->align_offset = (uint32_t)align_offset; 1085 span->flags = SPAN_FLAG_MASTER; 1086 atomic_store32(&span->remaining_spans, (int32_t)total_span_count); 1087 } 1088 1089 static void 1090 _rpmalloc_span_unmap(span_t* span); 1091 1092 //! Map an aligned set of spans, taking configured mapping granularity and the page size into account 1093 static span_t* 1094 _rpmalloc_span_map_aligned_count(heap_t* heap, size_t span_count) { 1095 //If we already have some, but not enough, reserved spans, release those to heap cache and map a new 1096 //full set of spans. Otherwise we would waste memory if page size > span size (huge pages) 1097 size_t aligned_span_count = _rpmalloc_span_align_count(span_count); 1098 size_t align_offset = 0; 1099 span_t* span = (span_t*)_rpmalloc_mmap(aligned_span_count * _memory_span_size, &align_offset); 1100 if (!span) 1101 return 0; 1102 _rpmalloc_span_initialize(span, aligned_span_count, span_count, align_offset); 1103 _rpmalloc_stat_inc(&_master_spans); 1104 if (span_count <= LARGE_CLASS_COUNT) 1105 _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_map_calls); 1106 if (aligned_span_count > span_count) { 1107 span_t* reserved_spans = (span_t*)pointer_offset(span, span_count * _memory_span_size); 1108 size_t reserved_count = aligned_span_count - span_count; 1109 if (heap->spans_reserved) { 1110 _rpmalloc_span_mark_as_subspan_unless_master(heap->span_reserve_master, heap->span_reserve, heap->spans_reserved); 1111 _rpmalloc_heap_cache_insert(heap, heap->span_reserve); 1112 } 1113 if (reserved_count > _memory_heap_reserve_count) { 1114 // If huge pages or eager spam map count, the global reserve spin lock is held by caller, _rpmalloc_span_map 1115 rpmalloc_assert(atomic_load32(&_memory_global_lock) == 1, "Global spin lock not held as expected"); 1116 size_t remain_count = reserved_count - _memory_heap_reserve_count; 1117 reserved_count = _memory_heap_reserve_count; 1118 span_t* remain_span = (span_t*)pointer_offset(reserved_spans, reserved_count * _memory_span_size); 1119 if (_memory_global_reserve) { 1120 _rpmalloc_span_mark_as_subspan_unless_master(_memory_global_reserve_master, _memory_global_reserve, _memory_global_reserve_count); 1121 _rpmalloc_span_unmap(_memory_global_reserve); 1122 } 1123 _rpmalloc_global_set_reserved_spans(span, remain_span, remain_count); 1124 } 1125 _rpmalloc_heap_set_reserved_spans(heap, span, reserved_spans, reserved_count); 1126 } 1127 return span; 1128 } 1129 1130 //! Map in memory pages for the given number of spans (or use previously reserved pages) 1131 static span_t* 1132 _rpmalloc_span_map(heap_t* heap, size_t span_count) { 1133 if (span_count <= heap->spans_reserved) 1134 return _rpmalloc_span_map_from_reserve(heap, span_count); 1135 span_t* span = 0; 1136 int use_global_reserve = (_memory_page_size > _memory_span_size) || (_memory_span_map_count > _memory_heap_reserve_count); 1137 if (use_global_reserve) { 1138 // If huge pages, make sure only one thread maps more memory to avoid bloat 1139 while (!atomic_cas32_acquire(&_memory_global_lock, 1, 0)) 1140 _rpmalloc_spin(); 1141 if (_memory_global_reserve_count >= span_count) { 1142 size_t reserve_count = (!heap->spans_reserved ? _memory_heap_reserve_count : span_count); 1143 if (_memory_global_reserve_count < reserve_count) 1144 reserve_count = _memory_global_reserve_count; 1145 span = _rpmalloc_global_get_reserved_spans(reserve_count); 1146 if (span) { 1147 if (reserve_count > span_count) { 1148 span_t* reserved_span = (span_t*)pointer_offset(span, span_count << _memory_span_size_shift); 1149 _rpmalloc_heap_set_reserved_spans(heap, _memory_global_reserve_master, reserved_span, reserve_count - span_count); 1150 } 1151 // Already marked as subspan in _rpmalloc_global_get_reserved_spans 1152 span->span_count = (uint32_t)span_count; 1153 } 1154 } 1155 } 1156 if (!span) 1157 span = _rpmalloc_span_map_aligned_count(heap, span_count); 1158 if (use_global_reserve) 1159 atomic_store32_release(&_memory_global_lock, 0); 1160 return span; 1161 } 1162 1163 //! Unmap memory pages for the given number of spans (or mark as unused if no partial unmappings) 1164 static void 1165 _rpmalloc_span_unmap(span_t* span) { 1166 rpmalloc_assert((span->flags & SPAN_FLAG_MASTER) || (span->flags & SPAN_FLAG_SUBSPAN), "Span flag corrupted"); 1167 rpmalloc_assert(!(span->flags & SPAN_FLAG_MASTER) || !(span->flags & SPAN_FLAG_SUBSPAN), "Span flag corrupted"); 1168 1169 int is_master = !!(span->flags & SPAN_FLAG_MASTER); 1170 span_t* master = is_master ? span : ((span_t*)pointer_offset(span, -(intptr_t)((uintptr_t)span->offset_from_master * _memory_span_size))); 1171 rpmalloc_assert(is_master || (span->flags & SPAN_FLAG_SUBSPAN), "Span flag corrupted"); 1172 rpmalloc_assert(master->flags & SPAN_FLAG_MASTER, "Span flag corrupted"); 1173 1174 size_t span_count = span->span_count; 1175 if (!is_master) { 1176 //Directly unmap subspans (unless huge pages, in which case we defer and unmap entire page range with master) 1177 rpmalloc_assert(span->align_offset == 0, "Span align offset corrupted"); 1178 if (_memory_span_size >= _memory_page_size) 1179 _rpmalloc_unmap(span, span_count * _memory_span_size, 0, 0); 1180 } else { 1181 //Special double flag to denote an unmapped master 1182 //It must be kept in memory since span header must be used 1183 span->flags |= SPAN_FLAG_MASTER | SPAN_FLAG_SUBSPAN | SPAN_FLAG_UNMAPPED_MASTER; 1184 _rpmalloc_stat_add(&_unmapped_master_spans, 1); 1185 } 1186 1187 if (atomic_add32(&master->remaining_spans, -(int32_t)span_count) <= 0) { 1188 //Everything unmapped, unmap the master span with release flag to unmap the entire range of the super span 1189 rpmalloc_assert(!!(master->flags & SPAN_FLAG_MASTER) && !!(master->flags & SPAN_FLAG_SUBSPAN), "Span flag corrupted"); 1190 size_t unmap_count = master->span_count; 1191 if (_memory_span_size < _memory_page_size) 1192 unmap_count = master->total_spans; 1193 _rpmalloc_stat_sub(&_master_spans, 1); 1194 _rpmalloc_stat_sub(&_unmapped_master_spans, 1); 1195 _rpmalloc_unmap(master, unmap_count * _memory_span_size, master->align_offset, (size_t)master->total_spans * _memory_span_size); 1196 } 1197 } 1198 1199 //! Move the span (used for small or medium allocations) to the heap thread cache 1200 static void 1201 _rpmalloc_span_release_to_cache(heap_t* heap, span_t* span) { 1202 rpmalloc_assert(heap == span->heap, "Span heap pointer corrupted"); 1203 rpmalloc_assert(span->size_class < SIZE_CLASS_COUNT, "Invalid span size class"); 1204 rpmalloc_assert(span->span_count == 1, "Invalid span count"); 1205 #if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS 1206 atomic_decr32(&heap->span_use[0].current); 1207 #endif 1208 _rpmalloc_stat_dec(&heap->size_class_use[span->size_class].spans_current); 1209 if (!heap->finalize) { 1210 _rpmalloc_stat_inc(&heap->span_use[0].spans_to_cache); 1211 _rpmalloc_stat_inc(&heap->size_class_use[span->size_class].spans_to_cache); 1212 if (heap->size_class[span->size_class].cache) 1213 _rpmalloc_heap_cache_insert(heap, heap->size_class[span->size_class].cache); 1214 heap->size_class[span->size_class].cache = span; 1215 } else { 1216 _rpmalloc_span_unmap(span); 1217 } 1218 } 1219 1220 //! Initialize a (partial) free list up to next system memory page, while reserving the first block 1221 //! as allocated, returning number of blocks in list 1222 static uint32_t 1223 free_list_partial_init(void** list, void** first_block, void* page_start, void* block_start, uint32_t block_count, uint32_t block_size) { 1224 rpmalloc_assert(block_count, "Internal failure"); 1225 *first_block = block_start; 1226 if (block_count > 1) { 1227 void* free_block = pointer_offset(block_start, block_size); 1228 void* block_end = pointer_offset(block_start, (size_t)block_size * block_count); 1229 //If block size is less than half a memory page, bound init to next memory page boundary 1230 if (block_size < (_memory_page_size >> 1)) { 1231 void* page_end = pointer_offset(page_start, _memory_page_size); 1232 if (page_end < block_end) 1233 block_end = page_end; 1234 } 1235 *list = free_block; 1236 block_count = 2; 1237 void* next_block = pointer_offset(free_block, block_size); 1238 while (next_block < block_end) { 1239 *((void**)free_block) = next_block; 1240 free_block = next_block; 1241 ++block_count; 1242 next_block = pointer_offset(next_block, block_size); 1243 } 1244 *((void**)free_block) = 0; 1245 } else { 1246 *list = 0; 1247 } 1248 return block_count; 1249 } 1250 1251 //! Initialize an unused span (from cache or mapped) to be new active span, putting the initial free list in heap class free list 1252 static void* 1253 _rpmalloc_span_initialize_new(heap_t* heap, heap_size_class_t* heap_size_class, span_t* span, uint32_t class_idx) { 1254 rpmalloc_assert(span->span_count == 1, "Internal failure"); 1255 size_class_t* size_class = _memory_size_class + class_idx; 1256 span->size_class = class_idx; 1257 span->heap = heap; 1258 span->flags &= ~SPAN_FLAG_ALIGNED_BLOCKS; 1259 span->block_size = size_class->block_size; 1260 span->block_count = size_class->block_count; 1261 span->free_list = 0; 1262 span->list_size = 0; 1263 atomic_store_ptr_release(&span->free_list_deferred, 0); 1264 1265 //Setup free list. Only initialize one system page worth of free blocks in list 1266 void* block; 1267 span->free_list_limit = free_list_partial_init(&heap_size_class->free_list, &block, 1268 span, pointer_offset(span, SPAN_HEADER_SIZE), size_class->block_count, size_class->block_size); 1269 //Link span as partial if there remains blocks to be initialized as free list, or full if fully initialized 1270 if (span->free_list_limit < span->block_count) { 1271 _rpmalloc_span_double_link_list_add(&heap_size_class->partial_span, span); 1272 span->used_count = span->free_list_limit; 1273 } else { 1274 #if RPMALLOC_FIRST_CLASS_HEAPS 1275 _rpmalloc_span_double_link_list_add(&heap->full_span[class_idx], span); 1276 #endif 1277 ++heap->full_span_count; 1278 span->used_count = span->block_count; 1279 } 1280 return block; 1281 } 1282 1283 static void 1284 _rpmalloc_span_extract_free_list_deferred(span_t* span) { 1285 // We need acquire semantics on the CAS operation since we are interested in the list size 1286 // Refer to _rpmalloc_deallocate_defer_small_or_medium for further comments on this dependency 1287 do { 1288 span->free_list = atomic_exchange_ptr_acquire(&span->free_list_deferred, INVALID_POINTER); 1289 } while (span->free_list == INVALID_POINTER); 1290 span->used_count -= span->list_size; 1291 span->list_size = 0; 1292 atomic_store_ptr_release(&span->free_list_deferred, 0); 1293 } 1294 1295 static int 1296 _rpmalloc_span_is_fully_utilized(span_t* span) { 1297 rpmalloc_assert(span->free_list_limit <= span->block_count, "Span free list corrupted"); 1298 return !span->free_list && (span->free_list_limit >= span->block_count); 1299 } 1300 1301 static int 1302 _rpmalloc_span_finalize(heap_t* heap, size_t iclass, span_t* span, span_t** list_head) { 1303 void* free_list = heap->size_class[iclass].free_list; 1304 span_t* class_span = (span_t*)((uintptr_t)free_list & _memory_span_mask); 1305 if (span == class_span) { 1306 // Adopt the heap class free list back into the span free list 1307 void* block = span->free_list; 1308 void* last_block = 0; 1309 while (block) { 1310 last_block = block; 1311 block = *((void**)block); 1312 } 1313 uint32_t free_count = 0; 1314 block = free_list; 1315 while (block) { 1316 ++free_count; 1317 block = *((void**)block); 1318 } 1319 if (last_block) { 1320 *((void**)last_block) = free_list; 1321 } else { 1322 span->free_list = free_list; 1323 } 1324 heap->size_class[iclass].free_list = 0; 1325 span->used_count -= free_count; 1326 } 1327 //If this assert triggers you have memory leaks 1328 rpmalloc_assert(span->list_size == span->used_count, "Memory leak detected"); 1329 if (span->list_size == span->used_count) { 1330 _rpmalloc_stat_dec(&heap->span_use[0].current); 1331 _rpmalloc_stat_dec(&heap->size_class_use[iclass].spans_current); 1332 // This function only used for spans in double linked lists 1333 if (list_head) 1334 _rpmalloc_span_double_link_list_remove(list_head, span); 1335 _rpmalloc_span_unmap(span); 1336 return 1; 1337 } 1338 return 0; 1339 } 1340 1341 1342 //////////// 1343 /// 1344 /// Global cache 1345 /// 1346 ////// 1347 1348 #if ENABLE_GLOBAL_CACHE 1349 1350 //! Finalize a global cache 1351 static void 1352 _rpmalloc_global_cache_finalize(global_cache_t* cache) { 1353 while (!atomic_cas32_acquire(&cache->lock, 1, 0)) 1354 _rpmalloc_spin(); 1355 1356 for (size_t ispan = 0; ispan < cache->count; ++ispan) 1357 _rpmalloc_span_unmap(cache->span[ispan]); 1358 cache->count = 0; 1359 1360 while (cache->overflow) { 1361 span_t* span = cache->overflow; 1362 cache->overflow = span->next; 1363 _rpmalloc_span_unmap(span); 1364 } 1365 1366 atomic_store32_release(&cache->lock, 0); 1367 } 1368 1369 static void 1370 _rpmalloc_global_cache_insert_spans(span_t** span, size_t span_count, size_t count) { 1371 const size_t cache_limit = (span_count == 1) ? 1372 GLOBAL_CACHE_MULTIPLIER * MAX_THREAD_SPAN_CACHE : 1373 GLOBAL_CACHE_MULTIPLIER * (MAX_THREAD_SPAN_LARGE_CACHE - (span_count >> 1)); 1374 1375 global_cache_t* cache = &_memory_span_cache[span_count - 1]; 1376 1377 size_t insert_count = count; 1378 while (!atomic_cas32_acquire(&cache->lock, 1, 0)) 1379 _rpmalloc_spin(); 1380 1381 #if ENABLE_STATISTICS 1382 cache->insert_count += count; 1383 #endif 1384 if ((cache->count + insert_count) > cache_limit) 1385 insert_count = cache_limit - cache->count; 1386 1387 memcpy(cache->span + cache->count, span, sizeof(span_t*) * insert_count); 1388 cache->count += (uint32_t)insert_count; 1389 1390 #if ENABLE_UNLIMITED_CACHE 1391 while (insert_count < count) { 1392 #else 1393 // Enable unlimited cache if huge pages, or we will leak since it is unlikely that an entire huge page 1394 // will be unmapped, and we're unable to partially decommit a huge page 1395 while ((_memory_page_size > _memory_span_size) && (insert_count < count)) { 1396 #endif 1397 span_t* current_span = span[insert_count++]; 1398 current_span->next = cache->overflow; 1399 cache->overflow = current_span; 1400 } 1401 atomic_store32_release(&cache->lock, 0); 1402 1403 span_t* keep = 0; 1404 for (size_t ispan = insert_count; ispan < count; ++ispan) { 1405 span_t* current_span = span[ispan]; 1406 // Keep master spans that has remaining subspans to avoid dangling them 1407 if ((current_span->flags & SPAN_FLAG_MASTER) && 1408 (atomic_load32(¤t_span->remaining_spans) > (int32_t)current_span->span_count)) { 1409 current_span->next = keep; 1410 keep = current_span; 1411 } else { 1412 _rpmalloc_span_unmap(current_span); 1413 } 1414 } 1415 1416 if (keep) { 1417 while (!atomic_cas32_acquire(&cache->lock, 1, 0)) 1418 _rpmalloc_spin(); 1419 1420 size_t islot = 0; 1421 while (keep) { 1422 for (; islot < cache->count; ++islot) { 1423 span_t* current_span = cache->span[islot]; 1424 if (!(current_span->flags & SPAN_FLAG_MASTER) || ((current_span->flags & SPAN_FLAG_MASTER) && 1425 (atomic_load32(¤t_span->remaining_spans) <= (int32_t)current_span->span_count))) { 1426 _rpmalloc_span_unmap(current_span); 1427 cache->span[islot] = keep; 1428 break; 1429 } 1430 } 1431 if (islot == cache->count) 1432 break; 1433 keep = keep->next; 1434 } 1435 1436 if (keep) { 1437 span_t* tail = keep; 1438 while (tail->next) 1439 tail = tail->next; 1440 tail->next = cache->overflow; 1441 cache->overflow = keep; 1442 } 1443 1444 atomic_store32_release(&cache->lock, 0); 1445 } 1446 } 1447 1448 static size_t 1449 _rpmalloc_global_cache_extract_spans(span_t** span, size_t span_count, size_t count) { 1450 global_cache_t* cache = &_memory_span_cache[span_count - 1]; 1451 1452 size_t extract_count = 0; 1453 while (!atomic_cas32_acquire(&cache->lock, 1, 0)) 1454 _rpmalloc_spin(); 1455 1456 #if ENABLE_STATISTICS 1457 cache->extract_count += count; 1458 #endif 1459 size_t want = count - extract_count; 1460 if (want > cache->count) 1461 want = cache->count; 1462 1463 memcpy(span + extract_count, cache->span + (cache->count - want), sizeof(span_t*) * want); 1464 cache->count -= (uint32_t)want; 1465 extract_count += want; 1466 1467 while ((extract_count < count) && cache->overflow) { 1468 span_t* current_span = cache->overflow; 1469 span[extract_count++] = current_span; 1470 cache->overflow = current_span->next; 1471 } 1472 1473 #if ENABLE_ASSERTS 1474 for (size_t ispan = 0; ispan < extract_count; ++ispan) { 1475 assert(span[ispan]->span_count == span_count); 1476 } 1477 #endif 1478 1479 atomic_store32_release(&cache->lock, 0); 1480 1481 return extract_count; 1482 } 1483 1484 #endif 1485 1486 //////////// 1487 /// 1488 /// Heap control 1489 /// 1490 ////// 1491 1492 static void _rpmalloc_deallocate_huge(span_t*); 1493 1494 //! Store the given spans as reserve in the given heap 1495 static void 1496 _rpmalloc_heap_set_reserved_spans(heap_t* heap, span_t* master, span_t* reserve, size_t reserve_span_count) { 1497 heap->span_reserve_master = master; 1498 heap->span_reserve = reserve; 1499 heap->spans_reserved = (uint32_t)reserve_span_count; 1500 } 1501 1502 //! Adopt the deferred span cache list, optionally extracting the first single span for immediate re-use 1503 static void 1504 _rpmalloc_heap_cache_adopt_deferred(heap_t* heap, span_t** single_span) { 1505 span_t* span = (span_t*)((void*)atomic_exchange_ptr_acquire(&heap->span_free_deferred, 0)); 1506 while (span) { 1507 span_t* next_span = (span_t*)span->free_list; 1508 rpmalloc_assert(span->heap == heap, "Span heap pointer corrupted"); 1509 if (EXPECTED(span->size_class < SIZE_CLASS_COUNT)) { 1510 rpmalloc_assert(heap->full_span_count, "Heap span counter corrupted"); 1511 --heap->full_span_count; 1512 _rpmalloc_stat_dec(&heap->span_use[0].spans_deferred); 1513 #if RPMALLOC_FIRST_CLASS_HEAPS 1514 _rpmalloc_span_double_link_list_remove(&heap->full_span[span->size_class], span); 1515 #endif 1516 _rpmalloc_stat_dec(&heap->span_use[0].current); 1517 _rpmalloc_stat_dec(&heap->size_class_use[span->size_class].spans_current); 1518 if (single_span && !*single_span) 1519 *single_span = span; 1520 else 1521 _rpmalloc_heap_cache_insert(heap, span); 1522 } else { 1523 if (span->size_class == SIZE_CLASS_HUGE) { 1524 _rpmalloc_deallocate_huge(span); 1525 } else { 1526 rpmalloc_assert(span->size_class == SIZE_CLASS_LARGE, "Span size class invalid"); 1527 rpmalloc_assert(heap->full_span_count, "Heap span counter corrupted"); 1528 --heap->full_span_count; 1529 #if RPMALLOC_FIRST_CLASS_HEAPS 1530 _rpmalloc_span_double_link_list_remove(&heap->large_huge_span, span); 1531 #endif 1532 uint32_t idx = span->span_count - 1; 1533 _rpmalloc_stat_dec(&heap->span_use[idx].spans_deferred); 1534 _rpmalloc_stat_dec(&heap->span_use[idx].current); 1535 if (!idx && single_span && !*single_span) 1536 *single_span = span; 1537 else 1538 _rpmalloc_heap_cache_insert(heap, span); 1539 } 1540 } 1541 span = next_span; 1542 } 1543 } 1544 1545 static void 1546 _rpmalloc_heap_unmap(heap_t* heap) { 1547 if (!heap->master_heap) { 1548 if ((heap->finalize > 1) && !atomic_load32(&heap->child_count)) { 1549 span_t* span = (span_t*)((uintptr_t)heap & _memory_span_mask); 1550 _rpmalloc_span_unmap(span); 1551 } 1552 } else { 1553 if (atomic_decr32(&heap->master_heap->child_count) == 0) { 1554 _rpmalloc_heap_unmap(heap->master_heap); 1555 } 1556 } 1557 } 1558 1559 static void 1560 _rpmalloc_heap_global_finalize(heap_t* heap) { 1561 if (heap->finalize++ > 1) { 1562 --heap->finalize; 1563 return; 1564 } 1565 1566 _rpmalloc_heap_finalize(heap); 1567 1568 #if ENABLE_THREAD_CACHE 1569 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) { 1570 span_cache_t* span_cache; 1571 if (!iclass) 1572 span_cache = &heap->span_cache; 1573 else 1574 span_cache = (span_cache_t*)(heap->span_large_cache + (iclass - 1)); 1575 for (size_t ispan = 0; ispan < span_cache->count; ++ispan) 1576 _rpmalloc_span_unmap(span_cache->span[ispan]); 1577 span_cache->count = 0; 1578 } 1579 #endif 1580 1581 if (heap->full_span_count) { 1582 --heap->finalize; 1583 return; 1584 } 1585 1586 for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) { 1587 if (heap->size_class[iclass].free_list || heap->size_class[iclass].partial_span) { 1588 --heap->finalize; 1589 return; 1590 } 1591 } 1592 //Heap is now completely free, unmap and remove from heap list 1593 size_t list_idx = (size_t)heap->id % HEAP_ARRAY_SIZE; 1594 heap_t* list_heap = _memory_heaps[list_idx]; 1595 if (list_heap == heap) { 1596 _memory_heaps[list_idx] = heap->next_heap; 1597 } else { 1598 while (list_heap->next_heap != heap) 1599 list_heap = list_heap->next_heap; 1600 list_heap->next_heap = heap->next_heap; 1601 } 1602 1603 _rpmalloc_heap_unmap(heap); 1604 } 1605 1606 //! Insert a single span into thread heap cache, releasing to global cache if overflow 1607 static void 1608 _rpmalloc_heap_cache_insert(heap_t* heap, span_t* span) { 1609 if (UNEXPECTED(heap->finalize != 0)) { 1610 _rpmalloc_span_unmap(span); 1611 _rpmalloc_heap_global_finalize(heap); 1612 return; 1613 } 1614 #if ENABLE_THREAD_CACHE 1615 size_t span_count = span->span_count; 1616 _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_to_cache); 1617 if (span_count == 1) { 1618 span_cache_t* span_cache = &heap->span_cache; 1619 span_cache->span[span_cache->count++] = span; 1620 if (span_cache->count == MAX_THREAD_SPAN_CACHE) { 1621 const size_t remain_count = MAX_THREAD_SPAN_CACHE - THREAD_SPAN_CACHE_TRANSFER; 1622 #if ENABLE_GLOBAL_CACHE 1623 _rpmalloc_stat_add64(&heap->thread_to_global, THREAD_SPAN_CACHE_TRANSFER * _memory_span_size); 1624 _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_to_global, THREAD_SPAN_CACHE_TRANSFER); 1625 _rpmalloc_global_cache_insert_spans(span_cache->span + remain_count, span_count, THREAD_SPAN_CACHE_TRANSFER); 1626 #else 1627 for (size_t ispan = 0; ispan < THREAD_SPAN_CACHE_TRANSFER; ++ispan) 1628 _rpmalloc_span_unmap(span_cache->span[remain_count + ispan]); 1629 #endif 1630 span_cache->count = remain_count; 1631 } 1632 } else { 1633 size_t cache_idx = span_count - 2; 1634 span_large_cache_t* span_cache = heap->span_large_cache + cache_idx; 1635 span_cache->span[span_cache->count++] = span; 1636 const size_t cache_limit = (MAX_THREAD_SPAN_LARGE_CACHE - (span_count >> 1)); 1637 if (span_cache->count == cache_limit) { 1638 const size_t transfer_limit = 2 + (cache_limit >> 2); 1639 const size_t transfer_count = (THREAD_SPAN_LARGE_CACHE_TRANSFER <= transfer_limit ? THREAD_SPAN_LARGE_CACHE_TRANSFER : transfer_limit); 1640 const size_t remain_count = cache_limit - transfer_count; 1641 #if ENABLE_GLOBAL_CACHE 1642 _rpmalloc_stat_add64(&heap->thread_to_global, transfer_count * span_count * _memory_span_size); 1643 _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_to_global, transfer_count); 1644 _rpmalloc_global_cache_insert_spans(span_cache->span + remain_count, span_count, transfer_count); 1645 #else 1646 for (size_t ispan = 0; ispan < transfer_count; ++ispan) 1647 _rpmalloc_span_unmap(span_cache->span[remain_count + ispan]); 1648 #endif 1649 span_cache->count = remain_count; 1650 } 1651 } 1652 #else 1653 (void)sizeof(heap); 1654 _rpmalloc_span_unmap(span); 1655 #endif 1656 } 1657 1658 //! Extract the given number of spans from the different cache levels 1659 static span_t* 1660 _rpmalloc_heap_thread_cache_extract(heap_t* heap, size_t span_count) { 1661 span_t* span = 0; 1662 #if ENABLE_THREAD_CACHE 1663 span_cache_t* span_cache; 1664 if (span_count == 1) 1665 span_cache = &heap->span_cache; 1666 else 1667 span_cache = (span_cache_t*)(heap->span_large_cache + (span_count - 2)); 1668 if (span_cache->count) { 1669 _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_from_cache); 1670 return span_cache->span[--span_cache->count]; 1671 } 1672 #endif 1673 return span; 1674 } 1675 1676 static span_t* 1677 _rpmalloc_heap_thread_cache_deferred_extract(heap_t* heap, size_t span_count) { 1678 span_t* span = 0; 1679 if (span_count == 1) { 1680 _rpmalloc_heap_cache_adopt_deferred(heap, &span); 1681 } else { 1682 _rpmalloc_heap_cache_adopt_deferred(heap, 0); 1683 span = _rpmalloc_heap_thread_cache_extract(heap, span_count); 1684 } 1685 return span; 1686 } 1687 1688 static span_t* 1689 _rpmalloc_heap_reserved_extract(heap_t* heap, size_t span_count) { 1690 if (heap->spans_reserved >= span_count) 1691 return _rpmalloc_span_map(heap, span_count); 1692 return 0; 1693 } 1694 1695 //! Extract a span from the global cache 1696 static span_t* 1697 _rpmalloc_heap_global_cache_extract(heap_t* heap, size_t span_count) { 1698 #if ENABLE_GLOBAL_CACHE 1699 #if ENABLE_THREAD_CACHE 1700 span_cache_t* span_cache; 1701 size_t wanted_count; 1702 if (span_count == 1) { 1703 span_cache = &heap->span_cache; 1704 wanted_count = THREAD_SPAN_CACHE_TRANSFER; 1705 } else { 1706 span_cache = (span_cache_t*)(heap->span_large_cache + (span_count - 2)); 1707 wanted_count = THREAD_SPAN_LARGE_CACHE_TRANSFER; 1708 } 1709 span_cache->count = _rpmalloc_global_cache_extract_spans(span_cache->span, span_count, wanted_count); 1710 if (span_cache->count) { 1711 _rpmalloc_stat_add64(&heap->global_to_thread, span_count * span_cache->count * _memory_span_size); 1712 _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_from_global, span_cache->count); 1713 return span_cache->span[--span_cache->count]; 1714 } 1715 #else 1716 span_t* span = 0; 1717 size_t count = _rpmalloc_global_cache_extract_spans(&span, span_count, 1); 1718 if (count) { 1719 _rpmalloc_stat_add64(&heap->global_to_thread, span_count * count * _memory_span_size); 1720 _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_from_global, count); 1721 return span; 1722 } 1723 #endif 1724 #endif 1725 (void)sizeof(heap); 1726 (void)sizeof(span_count); 1727 return 0; 1728 } 1729 1730 static void 1731 _rpmalloc_inc_span_statistics(heap_t* heap, size_t span_count, uint32_t class_idx) { 1732 (void)sizeof(heap); 1733 (void)sizeof(span_count); 1734 (void)sizeof(class_idx); 1735 #if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS 1736 uint32_t idx = (uint32_t)span_count - 1; 1737 uint32_t current_count = (uint32_t)atomic_incr32(&heap->span_use[idx].current); 1738 if (current_count > (uint32_t)atomic_load32(&heap->span_use[idx].high)) 1739 atomic_store32(&heap->span_use[idx].high, (int32_t)current_count); 1740 _rpmalloc_stat_add_peak(&heap->size_class_use[class_idx].spans_current, 1, heap->size_class_use[class_idx].spans_peak); 1741 #endif 1742 } 1743 1744 //! Get a span from one of the cache levels (thread cache, reserved, global cache) or fallback to mapping more memory 1745 static span_t* 1746 _rpmalloc_heap_extract_new_span(heap_t* heap, heap_size_class_t* heap_size_class, size_t span_count, uint32_t class_idx) { 1747 span_t* span; 1748 #if ENABLE_THREAD_CACHE 1749 if (heap_size_class && heap_size_class->cache) { 1750 span = heap_size_class->cache; 1751 heap_size_class->cache = (heap->span_cache.count ? heap->span_cache.span[--heap->span_cache.count] : 0); 1752 _rpmalloc_inc_span_statistics(heap, span_count, class_idx); 1753 return span; 1754 } 1755 #endif 1756 (void)sizeof(class_idx); 1757 // Allow 50% overhead to increase cache hits 1758 size_t base_span_count = span_count; 1759 size_t limit_span_count = (span_count > 2) ? (span_count + (span_count >> 1)) : span_count; 1760 if (limit_span_count > LARGE_CLASS_COUNT) 1761 limit_span_count = LARGE_CLASS_COUNT; 1762 do { 1763 span = _rpmalloc_heap_thread_cache_extract(heap, span_count); 1764 if (EXPECTED(span != 0)) { 1765 _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_cache); 1766 _rpmalloc_inc_span_statistics(heap, span_count, class_idx); 1767 return span; 1768 } 1769 span = _rpmalloc_heap_thread_cache_deferred_extract(heap, span_count); 1770 if (EXPECTED(span != 0)) { 1771 _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_cache); 1772 _rpmalloc_inc_span_statistics(heap, span_count, class_idx); 1773 return span; 1774 } 1775 span = _rpmalloc_heap_reserved_extract(heap, span_count); 1776 if (EXPECTED(span != 0)) { 1777 _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_reserved); 1778 _rpmalloc_inc_span_statistics(heap, span_count, class_idx); 1779 return span; 1780 } 1781 span = _rpmalloc_heap_global_cache_extract(heap, span_count); 1782 if (EXPECTED(span != 0)) { 1783 _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_cache); 1784 _rpmalloc_inc_span_statistics(heap, span_count, class_idx); 1785 return span; 1786 } 1787 ++span_count; 1788 } while (span_count <= limit_span_count); 1789 //Final fallback, map in more virtual memory 1790 span = _rpmalloc_span_map(heap, base_span_count); 1791 _rpmalloc_inc_span_statistics(heap, base_span_count, class_idx); 1792 _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_map_calls); 1793 return span; 1794 } 1795 1796 static void 1797 _rpmalloc_heap_initialize(heap_t* heap) { 1798 memset(heap, 0, sizeof(heap_t)); 1799 //Get a new heap ID 1800 heap->id = 1 + atomic_incr32(&_memory_heap_id); 1801 1802 //Link in heap in heap ID map 1803 size_t list_idx = (size_t)heap->id % HEAP_ARRAY_SIZE; 1804 heap->next_heap = _memory_heaps[list_idx]; 1805 _memory_heaps[list_idx] = heap; 1806 } 1807 1808 static void 1809 _rpmalloc_heap_orphan(heap_t* heap, int first_class) { 1810 heap->owner_thread = (uintptr_t)-1; 1811 #if RPMALLOC_FIRST_CLASS_HEAPS 1812 heap_t** heap_list = (first_class ? &_memory_first_class_orphan_heaps : &_memory_orphan_heaps); 1813 #else 1814 (void)sizeof(first_class); 1815 heap_t** heap_list = &_memory_orphan_heaps; 1816 #endif 1817 heap->next_orphan = *heap_list; 1818 *heap_list = heap; 1819 } 1820 1821 //! Allocate a new heap from newly mapped memory pages 1822 static heap_t* 1823 _rpmalloc_heap_allocate_new(void) { 1824 // Map in pages for a 16 heaps. If page size is greater than required size for this, map a page and 1825 // use first part for heaps and remaining part for spans for allocations. Adds a lot of complexity, 1826 // but saves a lot of memory on systems where page size > 64 spans (4MiB) 1827 size_t heap_size = sizeof(heap_t); 1828 size_t aligned_heap_size = 16 * ((heap_size + 15) / 16); 1829 size_t request_heap_count = 16; 1830 size_t heap_span_count = ((aligned_heap_size * request_heap_count) + sizeof(span_t) + _memory_span_size - 1) / _memory_span_size; 1831 size_t block_size = _memory_span_size * heap_span_count; 1832 size_t span_count = heap_span_count; 1833 span_t* span = 0; 1834 // If there are global reserved spans, use these first 1835 if (_memory_global_reserve_count >= heap_span_count) { 1836 span = _rpmalloc_global_get_reserved_spans(heap_span_count); 1837 } 1838 if (!span) { 1839 if (_memory_page_size > block_size) { 1840 span_count = _memory_page_size / _memory_span_size; 1841 block_size = _memory_page_size; 1842 // If using huge pages, make sure to grab enough heaps to avoid reallocating a huge page just to serve new heaps 1843 size_t possible_heap_count = (block_size - sizeof(span_t)) / aligned_heap_size; 1844 if (possible_heap_count >= (request_heap_count * 16)) 1845 request_heap_count *= 16; 1846 else if (possible_heap_count < request_heap_count) 1847 request_heap_count = possible_heap_count; 1848 heap_span_count = ((aligned_heap_size * request_heap_count) + sizeof(span_t) + _memory_span_size - 1) / _memory_span_size; 1849 } 1850 1851 size_t align_offset = 0; 1852 span = (span_t*)_rpmalloc_mmap(block_size, &align_offset); 1853 if (!span) 1854 return 0; 1855 1856 // Master span will contain the heaps 1857 _rpmalloc_stat_inc(&_master_spans); 1858 _rpmalloc_span_initialize(span, span_count, heap_span_count, align_offset); 1859 } 1860 1861 size_t remain_size = _memory_span_size - sizeof(span_t); 1862 heap_t* heap = (heap_t*)pointer_offset(span, sizeof(span_t)); 1863 _rpmalloc_heap_initialize(heap); 1864 1865 // Put extra heaps as orphans 1866 size_t num_heaps = remain_size / aligned_heap_size; 1867 if (num_heaps < request_heap_count) 1868 num_heaps = request_heap_count; 1869 atomic_store32(&heap->child_count, (int32_t)num_heaps - 1); 1870 heap_t* extra_heap = (heap_t*)pointer_offset(heap, aligned_heap_size); 1871 while (num_heaps > 1) { 1872 _rpmalloc_heap_initialize(extra_heap); 1873 extra_heap->master_heap = heap; 1874 _rpmalloc_heap_orphan(extra_heap, 1); 1875 extra_heap = (heap_t*)pointer_offset(extra_heap, aligned_heap_size); 1876 --num_heaps; 1877 } 1878 1879 if (span_count > heap_span_count) { 1880 // Cap reserved spans 1881 size_t remain_count = span_count - heap_span_count; 1882 size_t reserve_count = (remain_count > _memory_heap_reserve_count ? _memory_heap_reserve_count : remain_count); 1883 span_t* remain_span = (span_t*)pointer_offset(span, heap_span_count * _memory_span_size); 1884 _rpmalloc_heap_set_reserved_spans(heap, span, remain_span, reserve_count); 1885 1886 if (remain_count > reserve_count) { 1887 // Set to global reserved spans 1888 remain_span = (span_t*)pointer_offset(remain_span, reserve_count * _memory_span_size); 1889 reserve_count = remain_count - reserve_count; 1890 _rpmalloc_global_set_reserved_spans(span, remain_span, reserve_count); 1891 } 1892 } 1893 1894 return heap; 1895 } 1896 1897 static heap_t* 1898 _rpmalloc_heap_extract_orphan(heap_t** heap_list) { 1899 heap_t* heap = *heap_list; 1900 *heap_list = (heap ? heap->next_orphan : 0); 1901 return heap; 1902 } 1903 1904 //! Allocate a new heap, potentially reusing a previously orphaned heap 1905 static heap_t* 1906 _rpmalloc_heap_allocate(int first_class) { 1907 heap_t* heap = 0; 1908 while (!atomic_cas32_acquire(&_memory_global_lock, 1, 0)) 1909 _rpmalloc_spin(); 1910 if (first_class == 0) 1911 heap = _rpmalloc_heap_extract_orphan(&_memory_orphan_heaps); 1912 #if RPMALLOC_FIRST_CLASS_HEAPS 1913 if (!heap) 1914 heap = _rpmalloc_heap_extract_orphan(&_memory_first_class_orphan_heaps); 1915 #endif 1916 if (!heap) 1917 heap = _rpmalloc_heap_allocate_new(); 1918 atomic_store32_release(&_memory_global_lock, 0); 1919 _rpmalloc_heap_cache_adopt_deferred(heap, 0); 1920 return heap; 1921 } 1922 1923 static void 1924 _rpmalloc_heap_release(void* heapptr, int first_class, int release_cache) { 1925 heap_t* heap = (heap_t*)heapptr; 1926 if (!heap) 1927 return; 1928 //Release thread cache spans back to global cache 1929 _rpmalloc_heap_cache_adopt_deferred(heap, 0); 1930 if (release_cache || heap->finalize) { 1931 #if ENABLE_THREAD_CACHE 1932 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) { 1933 span_cache_t* span_cache; 1934 if (!iclass) 1935 span_cache = &heap->span_cache; 1936 else 1937 span_cache = (span_cache_t*)(heap->span_large_cache + (iclass - 1)); 1938 if (!span_cache->count) 1939 continue; 1940 #if ENABLE_GLOBAL_CACHE 1941 if (heap->finalize) { 1942 for (size_t ispan = 0; ispan < span_cache->count; ++ispan) 1943 _rpmalloc_span_unmap(span_cache->span[ispan]); 1944 } else { 1945 _rpmalloc_stat_add64(&heap->thread_to_global, span_cache->count * (iclass + 1) * _memory_span_size); 1946 _rpmalloc_stat_add(&heap->span_use[iclass].spans_to_global, span_cache->count); 1947 _rpmalloc_global_cache_insert_spans(span_cache->span, iclass + 1, span_cache->count); 1948 } 1949 #else 1950 for (size_t ispan = 0; ispan < span_cache->count; ++ispan) 1951 _rpmalloc_span_unmap(span_cache->span[ispan]); 1952 #endif 1953 span_cache->count = 0; 1954 } 1955 #endif 1956 } 1957 1958 if (get_thread_heap_raw() == heap) 1959 set_thread_heap(0); 1960 1961 #if ENABLE_STATISTICS 1962 atomic_decr32(&_memory_active_heaps); 1963 rpmalloc_assert(atomic_load32(&_memory_active_heaps) >= 0, "Still active heaps during finalization"); 1964 #endif 1965 1966 // If we are forcibly terminating with _exit the state of the 1967 // lock atomic is unknown and it's best to just go ahead and exit 1968 if (get_thread_id() != _rpmalloc_main_thread_id) { 1969 while (!atomic_cas32_acquire(&_memory_global_lock, 1, 0)) 1970 _rpmalloc_spin(); 1971 } 1972 _rpmalloc_heap_orphan(heap, first_class); 1973 atomic_store32_release(&_memory_global_lock, 0); 1974 } 1975 1976 static void 1977 _rpmalloc_heap_release_raw(void* heapptr, int release_cache) { 1978 _rpmalloc_heap_release(heapptr, 0, release_cache); 1979 } 1980 1981 static void 1982 _rpmalloc_heap_release_raw_fc(void* heapptr) { 1983 _rpmalloc_heap_release_raw(heapptr, 1); 1984 } 1985 1986 static void 1987 _rpmalloc_heap_finalize(heap_t* heap) { 1988 if (heap->spans_reserved) { 1989 span_t* span = _rpmalloc_span_map(heap, heap->spans_reserved); 1990 _rpmalloc_span_unmap(span); 1991 heap->spans_reserved = 0; 1992 } 1993 1994 _rpmalloc_heap_cache_adopt_deferred(heap, 0); 1995 1996 for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) { 1997 if (heap->size_class[iclass].cache) 1998 _rpmalloc_span_unmap(heap->size_class[iclass].cache); 1999 heap->size_class[iclass].cache = 0; 2000 span_t* span = heap->size_class[iclass].partial_span; 2001 while (span) { 2002 span_t* next = span->next; 2003 _rpmalloc_span_finalize(heap, iclass, span, &heap->size_class[iclass].partial_span); 2004 span = next; 2005 } 2006 // If class still has a free list it must be a full span 2007 if (heap->size_class[iclass].free_list) { 2008 span_t* class_span = (span_t*)((uintptr_t)heap->size_class[iclass].free_list & _memory_span_mask); 2009 span_t** list = 0; 2010 #if RPMALLOC_FIRST_CLASS_HEAPS 2011 list = &heap->full_span[iclass]; 2012 #endif 2013 --heap->full_span_count; 2014 if (!_rpmalloc_span_finalize(heap, iclass, class_span, list)) { 2015 if (list) 2016 _rpmalloc_span_double_link_list_remove(list, class_span); 2017 _rpmalloc_span_double_link_list_add(&heap->size_class[iclass].partial_span, class_span); 2018 } 2019 } 2020 } 2021 2022 #if ENABLE_THREAD_CACHE 2023 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) { 2024 span_cache_t* span_cache; 2025 if (!iclass) 2026 span_cache = &heap->span_cache; 2027 else 2028 span_cache = (span_cache_t*)(heap->span_large_cache + (iclass - 1)); 2029 for (size_t ispan = 0; ispan < span_cache->count; ++ispan) 2030 _rpmalloc_span_unmap(span_cache->span[ispan]); 2031 span_cache->count = 0; 2032 } 2033 #endif 2034 rpmalloc_assert(!atomic_load_ptr(&heap->span_free_deferred), "Heaps still active during finalization"); 2035 } 2036 2037 2038 //////////// 2039 /// 2040 /// Allocation entry points 2041 /// 2042 ////// 2043 2044 //! Pop first block from a free list 2045 static void* 2046 free_list_pop(void** list) { 2047 void* block = *list; 2048 *list = *((void**)block); 2049 return block; 2050 } 2051 2052 //! Allocate a small/medium sized memory block from the given heap 2053 static void* 2054 _rpmalloc_allocate_from_heap_fallback(heap_t* heap, heap_size_class_t* heap_size_class, uint32_t class_idx) { 2055 span_t* span = heap_size_class->partial_span; 2056 if (EXPECTED(span != 0)) { 2057 rpmalloc_assert(span->block_count == _memory_size_class[span->size_class].block_count, "Span block count corrupted"); 2058 rpmalloc_assert(!_rpmalloc_span_is_fully_utilized(span), "Internal failure"); 2059 void* block; 2060 if (span->free_list) { 2061 //Span local free list is not empty, swap to size class free list 2062 block = free_list_pop(&span->free_list); 2063 heap_size_class->free_list = span->free_list; 2064 span->free_list = 0; 2065 } else { 2066 //If the span did not fully initialize free list, link up another page worth of blocks 2067 void* block_start = pointer_offset(span, SPAN_HEADER_SIZE + ((size_t)span->free_list_limit * span->block_size)); 2068 span->free_list_limit += free_list_partial_init(&heap_size_class->free_list, &block, 2069 (void*)((uintptr_t)block_start & ~(_memory_page_size - 1)), block_start, 2070 span->block_count - span->free_list_limit, span->block_size); 2071 } 2072 rpmalloc_assert(span->free_list_limit <= span->block_count, "Span block count corrupted"); 2073 span->used_count = span->free_list_limit; 2074 2075 //Swap in deferred free list if present 2076 if (atomic_load_ptr(&span->free_list_deferred)) 2077 _rpmalloc_span_extract_free_list_deferred(span); 2078 2079 //If span is still not fully utilized keep it in partial list and early return block 2080 if (!_rpmalloc_span_is_fully_utilized(span)) 2081 return block; 2082 2083 //The span is fully utilized, unlink from partial list and add to fully utilized list 2084 _rpmalloc_span_double_link_list_pop_head(&heap_size_class->partial_span, span); 2085 #if RPMALLOC_FIRST_CLASS_HEAPS 2086 _rpmalloc_span_double_link_list_add(&heap->full_span[class_idx], span); 2087 #endif 2088 ++heap->full_span_count; 2089 return block; 2090 } 2091 2092 //Find a span in one of the cache levels 2093 span = _rpmalloc_heap_extract_new_span(heap, heap_size_class, 1, class_idx); 2094 if (EXPECTED(span != 0)) { 2095 //Mark span as owned by this heap and set base data, return first block 2096 return _rpmalloc_span_initialize_new(heap, heap_size_class, span, class_idx); 2097 } 2098 2099 return 0; 2100 } 2101 2102 //! Allocate a small sized memory block from the given heap 2103 static void* 2104 _rpmalloc_allocate_small(heap_t* heap, size_t size) { 2105 rpmalloc_assert(heap, "No thread heap"); 2106 //Small sizes have unique size classes 2107 const uint32_t class_idx = (uint32_t)((size + (SMALL_GRANULARITY - 1)) >> SMALL_GRANULARITY_SHIFT); 2108 heap_size_class_t* heap_size_class = heap->size_class + class_idx; 2109 _rpmalloc_stat_inc_alloc(heap, class_idx); 2110 if (EXPECTED(heap_size_class->free_list != 0)) 2111 return free_list_pop(&heap_size_class->free_list); 2112 return _rpmalloc_allocate_from_heap_fallback(heap, heap_size_class, class_idx); 2113 } 2114 2115 //! Allocate a medium sized memory block from the given heap 2116 static void* 2117 _rpmalloc_allocate_medium(heap_t* heap, size_t size) { 2118 rpmalloc_assert(heap, "No thread heap"); 2119 //Calculate the size class index and do a dependent lookup of the final class index (in case of merged classes) 2120 const uint32_t base_idx = (uint32_t)(SMALL_CLASS_COUNT + ((size - (SMALL_SIZE_LIMIT + 1)) >> MEDIUM_GRANULARITY_SHIFT)); 2121 const uint32_t class_idx = _memory_size_class[base_idx].class_idx; 2122 heap_size_class_t* heap_size_class = heap->size_class + class_idx; 2123 _rpmalloc_stat_inc_alloc(heap, class_idx); 2124 if (EXPECTED(heap_size_class->free_list != 0)) 2125 return free_list_pop(&heap_size_class->free_list); 2126 return _rpmalloc_allocate_from_heap_fallback(heap, heap_size_class, class_idx); 2127 } 2128 2129 //! Allocate a large sized memory block from the given heap 2130 static void* 2131 _rpmalloc_allocate_large(heap_t* heap, size_t size) { 2132 rpmalloc_assert(heap, "No thread heap"); 2133 //Calculate number of needed max sized spans (including header) 2134 //Since this function is never called if size > LARGE_SIZE_LIMIT 2135 //the span_count is guaranteed to be <= LARGE_CLASS_COUNT 2136 size += SPAN_HEADER_SIZE; 2137 size_t span_count = size >> _memory_span_size_shift; 2138 if (size & (_memory_span_size - 1)) 2139 ++span_count; 2140 2141 //Find a span in one of the cache levels 2142 span_t* span = _rpmalloc_heap_extract_new_span(heap, 0, span_count, SIZE_CLASS_LARGE); 2143 if (!span) 2144 return span; 2145 2146 //Mark span as owned by this heap and set base data 2147 rpmalloc_assert(span->span_count >= span_count, "Internal failure"); 2148 span->size_class = SIZE_CLASS_LARGE; 2149 span->heap = heap; 2150 2151 #if RPMALLOC_FIRST_CLASS_HEAPS 2152 _rpmalloc_span_double_link_list_add(&heap->large_huge_span, span); 2153 #endif 2154 ++heap->full_span_count; 2155 2156 return pointer_offset(span, SPAN_HEADER_SIZE); 2157 } 2158 2159 //! Allocate a huge block by mapping memory pages directly 2160 static void* 2161 _rpmalloc_allocate_huge(heap_t* heap, size_t size) { 2162 rpmalloc_assert(heap, "No thread heap"); 2163 _rpmalloc_heap_cache_adopt_deferred(heap, 0); 2164 size += SPAN_HEADER_SIZE; 2165 size_t num_pages = size >> _memory_page_size_shift; 2166 if (size & (_memory_page_size - 1)) 2167 ++num_pages; 2168 size_t align_offset = 0; 2169 span_t* span = (span_t*)_rpmalloc_mmap(num_pages * _memory_page_size, &align_offset); 2170 if (!span) 2171 return span; 2172 2173 //Store page count in span_count 2174 span->size_class = SIZE_CLASS_HUGE; 2175 span->span_count = (uint32_t)num_pages; 2176 span->align_offset = (uint32_t)align_offset; 2177 span->heap = heap; 2178 _rpmalloc_stat_add_peak(&_huge_pages_current, num_pages, _huge_pages_peak); 2179 2180 #if RPMALLOC_FIRST_CLASS_HEAPS 2181 _rpmalloc_span_double_link_list_add(&heap->large_huge_span, span); 2182 #endif 2183 ++heap->full_span_count; 2184 2185 return pointer_offset(span, SPAN_HEADER_SIZE); 2186 } 2187 2188 //! Allocate a block of the given size 2189 static void* 2190 _rpmalloc_allocate(heap_t* heap, size_t size) { 2191 // fprintf(stderr, "rpmalloc_allocate\n"); 2192 _rpmalloc_stat_add64(&_allocation_counter, 1); 2193 if (EXPECTED(size <= SMALL_SIZE_LIMIT)) 2194 return _rpmalloc_allocate_small(heap, size); 2195 else if (size <= _memory_medium_size_limit) 2196 return _rpmalloc_allocate_medium(heap, size); 2197 else if (size <= LARGE_SIZE_LIMIT) 2198 return _rpmalloc_allocate_large(heap, size); 2199 return _rpmalloc_allocate_huge(heap, size); 2200 } 2201 2202 static void* 2203 _rpmalloc_aligned_allocate(heap_t* heap, size_t alignment, size_t size) { 2204 if (alignment <= SMALL_GRANULARITY) 2205 return _rpmalloc_allocate(heap, size); 2206 2207 #if ENABLE_VALIDATE_ARGS 2208 if ((size + alignment) < size) { 2209 errno = EINVAL; 2210 return 0; 2211 } 2212 if (alignment & (alignment - 1)) { 2213 errno = EINVAL; 2214 return 0; 2215 } 2216 #endif 2217 2218 if ((alignment <= SPAN_HEADER_SIZE) && (size < _memory_medium_size_limit)) { 2219 // If alignment is less or equal to span header size (which is power of two), 2220 // and size aligned to span header size multiples is less than size + alignment, 2221 // then use natural alignment of blocks to provide alignment 2222 size_t multiple_size = size ? (size + (SPAN_HEADER_SIZE - 1)) & ~(uintptr_t)(SPAN_HEADER_SIZE - 1) : SPAN_HEADER_SIZE; 2223 rpmalloc_assert(!(multiple_size % SPAN_HEADER_SIZE), "Failed alignment calculation"); 2224 if (multiple_size <= (size + alignment)) 2225 return _rpmalloc_allocate(heap, multiple_size); 2226 } 2227 2228 void* ptr = 0; 2229 size_t align_mask = alignment - 1; 2230 if (alignment <= _memory_page_size) { 2231 ptr = _rpmalloc_allocate(heap, size + alignment); 2232 if ((uintptr_t)ptr & align_mask) { 2233 ptr = (void*)(((uintptr_t)ptr & ~(uintptr_t)align_mask) + alignment); 2234 //Mark as having aligned blocks 2235 span_t* span = (span_t*)((uintptr_t)ptr & _memory_span_mask); 2236 span->flags |= SPAN_FLAG_ALIGNED_BLOCKS; 2237 } 2238 return ptr; 2239 } 2240 2241 // Fallback to mapping new pages for this request. Since pointers passed 2242 // to rpfree must be able to reach the start of the span by bitmasking of 2243 // the address with the span size, the returned aligned pointer from this 2244 // function must be with a span size of the start of the mapped area. 2245 // In worst case this requires us to loop and map pages until we get a 2246 // suitable memory address. It also means we can never align to span size 2247 // or greater, since the span header will push alignment more than one 2248 // span size away from span start (thus causing pointer mask to give us 2249 // an invalid span start on free) 2250 if (alignment & align_mask) { 2251 errno = EINVAL; 2252 return 0; 2253 } 2254 if (alignment >= _memory_span_size) { 2255 errno = EINVAL; 2256 return 0; 2257 } 2258 2259 size_t extra_pages = alignment / _memory_page_size; 2260 2261 // Since each span has a header, we will at least need one extra memory page 2262 size_t num_pages = 1 + (size / _memory_page_size); 2263 if (size & (_memory_page_size - 1)) 2264 ++num_pages; 2265 2266 if (extra_pages > num_pages) 2267 num_pages = 1 + extra_pages; 2268 2269 size_t original_pages = num_pages; 2270 size_t limit_pages = (_memory_span_size / _memory_page_size) * 2; 2271 if (limit_pages < (original_pages * 2)) 2272 limit_pages = original_pages * 2; 2273 2274 size_t mapped_size, align_offset; 2275 span_t* span; 2276 2277 retry: 2278 align_offset = 0; 2279 mapped_size = num_pages * _memory_page_size; 2280 2281 span = (span_t*)_rpmalloc_mmap(mapped_size, &align_offset); 2282 if (!span) { 2283 errno = ENOMEM; 2284 return 0; 2285 } 2286 ptr = pointer_offset(span, SPAN_HEADER_SIZE); 2287 2288 if ((uintptr_t)ptr & align_mask) 2289 ptr = (void*)(((uintptr_t)ptr & ~(uintptr_t)align_mask) + alignment); 2290 2291 if (((size_t)pointer_diff(ptr, span) >= _memory_span_size) || 2292 (pointer_offset(ptr, size) > pointer_offset(span, mapped_size)) || 2293 (((uintptr_t)ptr & _memory_span_mask) != (uintptr_t)span)) { 2294 _rpmalloc_unmap(span, mapped_size, align_offset, mapped_size); 2295 ++num_pages; 2296 if (num_pages > limit_pages) { 2297 errno = EINVAL; 2298 return 0; 2299 } 2300 goto retry; 2301 } 2302 2303 //Store page count in span_count 2304 span->size_class = SIZE_CLASS_HUGE; 2305 span->span_count = (uint32_t)num_pages; 2306 span->align_offset = (uint32_t)align_offset; 2307 span->heap = heap; 2308 _rpmalloc_stat_add_peak(&_huge_pages_current, num_pages, _huge_pages_peak); 2309 2310 #if RPMALLOC_FIRST_CLASS_HEAPS 2311 _rpmalloc_span_double_link_list_add(&heap->large_huge_span, span); 2312 #endif 2313 ++heap->full_span_count; 2314 2315 _rpmalloc_stat_add64(&_allocation_counter, 1); 2316 2317 return ptr; 2318 } 2319 2320 2321 //////////// 2322 /// 2323 /// Deallocation entry points 2324 /// 2325 ////// 2326 2327 //! Deallocate the given small/medium memory block in the current thread local heap 2328 static void 2329 _rpmalloc_deallocate_direct_small_or_medium(span_t* span, void* block) { 2330 heap_t* heap = span->heap; 2331 rpmalloc_assert(heap->owner_thread == get_thread_id() || !heap->owner_thread || heap->finalize, "Internal failure"); 2332 //Add block to free list 2333 if (UNEXPECTED(_rpmalloc_span_is_fully_utilized(span))) { 2334 span->used_count = span->block_count; 2335 #if RPMALLOC_FIRST_CLASS_HEAPS 2336 _rpmalloc_span_double_link_list_remove(&heap->full_span[span->size_class], span); 2337 #endif 2338 _rpmalloc_span_double_link_list_add(&heap->size_class[span->size_class].partial_span, span); 2339 --heap->full_span_count; 2340 } 2341 *((void**)block) = span->free_list; 2342 --span->used_count; 2343 span->free_list = block; 2344 if (UNEXPECTED(span->used_count == span->list_size)) { 2345 _rpmalloc_span_double_link_list_remove(&heap->size_class[span->size_class].partial_span, span); 2346 _rpmalloc_span_release_to_cache(heap, span); 2347 } 2348 } 2349 2350 static void 2351 _rpmalloc_deallocate_defer_free_span(heap_t* heap, span_t* span) { 2352 if (span->size_class != SIZE_CLASS_HUGE) 2353 _rpmalloc_stat_inc(&heap->span_use[span->span_count - 1].spans_deferred); 2354 //This list does not need ABA protection, no mutable side state 2355 do { 2356 span->free_list = (void*)atomic_load_ptr(&heap->span_free_deferred); 2357 } while (!atomic_cas_ptr(&heap->span_free_deferred, span, span->free_list)); 2358 } 2359 2360 //! Put the block in the deferred free list of the owning span 2361 static void 2362 _rpmalloc_deallocate_defer_small_or_medium(span_t* span, void* block) { 2363 // The memory ordering here is a bit tricky, to avoid having to ABA protect 2364 // the deferred free list to avoid desynchronization of list and list size 2365 // we need to have acquire semantics on successful CAS of the pointer to 2366 // guarantee the list_size variable validity + release semantics on pointer store 2367 void* free_list; 2368 do { 2369 free_list = atomic_exchange_ptr_acquire(&span->free_list_deferred, INVALID_POINTER); 2370 } while (free_list == INVALID_POINTER); 2371 *((void**)block) = free_list; 2372 uint32_t free_count = ++span->list_size; 2373 atomic_store_ptr_release(&span->free_list_deferred, block); 2374 if (free_count == span->block_count) { 2375 // Span was completely freed by this block. Due to the INVALID_POINTER spin lock 2376 // no other thread can reach this state simultaneously on this span. 2377 // Safe to move to owner heap deferred cache 2378 _rpmalloc_deallocate_defer_free_span(span->heap, span); 2379 } 2380 } 2381 2382 static void 2383 _rpmalloc_deallocate_small_or_medium(span_t* span, void* p) { 2384 _rpmalloc_stat_inc_free(span->heap, span->size_class); 2385 if (span->flags & SPAN_FLAG_ALIGNED_BLOCKS) { 2386 //Realign pointer to block start 2387 void* blocks_start = pointer_offset(span, SPAN_HEADER_SIZE); 2388 uint32_t block_offset = (uint32_t)pointer_diff(p, blocks_start); 2389 p = pointer_offset(p, -(int32_t)(block_offset % span->block_size)); 2390 } 2391 //Check if block belongs to this heap or if deallocation should be deferred 2392 #if RPMALLOC_FIRST_CLASS_HEAPS 2393 int defer = (span->heap->owner_thread && (span->heap->owner_thread != get_thread_id()) && !span->heap->finalize); 2394 #else 2395 int defer = ((span->heap->owner_thread != get_thread_id()) && !span->heap->finalize); 2396 #endif 2397 if (!defer) 2398 _rpmalloc_deallocate_direct_small_or_medium(span, p); 2399 else 2400 _rpmalloc_deallocate_defer_small_or_medium(span, p); 2401 } 2402 2403 //! Deallocate the given large memory block to the current heap 2404 static void 2405 _rpmalloc_deallocate_large(span_t* span) { 2406 rpmalloc_assert(span->size_class == SIZE_CLASS_LARGE, "Bad span size class"); 2407 rpmalloc_assert(!(span->flags & SPAN_FLAG_MASTER) || !(span->flags & SPAN_FLAG_SUBSPAN), "Span flag corrupted"); 2408 rpmalloc_assert((span->flags & SPAN_FLAG_MASTER) || (span->flags & SPAN_FLAG_SUBSPAN), "Span flag corrupted"); 2409 //We must always defer (unless finalizing) if from another heap since we cannot touch the list or counters of another heap 2410 #if RPMALLOC_FIRST_CLASS_HEAPS 2411 int defer = (span->heap->owner_thread && (span->heap->owner_thread != get_thread_id()) && !span->heap->finalize); 2412 #else 2413 int defer = ((span->heap->owner_thread != get_thread_id()) && !span->heap->finalize); 2414 #endif 2415 if (defer) { 2416 _rpmalloc_deallocate_defer_free_span(span->heap, span); 2417 return; 2418 } 2419 rpmalloc_assert(span->heap->full_span_count, "Heap span counter corrupted"); 2420 --span->heap->full_span_count; 2421 #if RPMALLOC_FIRST_CLASS_HEAPS 2422 _rpmalloc_span_double_link_list_remove(&span->heap->large_huge_span, span); 2423 #endif 2424 #if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS 2425 //Decrease counter 2426 size_t idx = span->span_count - 1; 2427 atomic_decr32(&span->heap->span_use[idx].current); 2428 #endif 2429 heap_t* heap = span->heap; 2430 rpmalloc_assert(heap, "No thread heap"); 2431 #if ENABLE_THREAD_CACHE 2432 const int set_as_reserved = ((span->span_count > 1) && (heap->span_cache.count == 0) && !heap->finalize && !heap->spans_reserved); 2433 #else 2434 const int set_as_reserved = ((span->span_count > 1) && !heap->finalize && !heap->spans_reserved); 2435 #endif 2436 if (set_as_reserved) { 2437 heap->span_reserve = span; 2438 heap->spans_reserved = span->span_count; 2439 if (span->flags & SPAN_FLAG_MASTER) { 2440 heap->span_reserve_master = span; 2441 } else { //SPAN_FLAG_SUBSPAN 2442 span_t* master = (span_t*)pointer_offset(span, -(intptr_t)((size_t)span->offset_from_master * _memory_span_size)); 2443 heap->span_reserve_master = master; 2444 rpmalloc_assert(master->flags & SPAN_FLAG_MASTER, "Span flag corrupted"); 2445 rpmalloc_assert(atomic_load32(&master->remaining_spans) >= (int32_t)span->span_count, "Master span count corrupted"); 2446 } 2447 _rpmalloc_stat_inc(&heap->span_use[idx].spans_to_reserved); 2448 } else { 2449 //Insert into cache list 2450 _rpmalloc_heap_cache_insert(heap, span); 2451 } 2452 } 2453 2454 //! Deallocate the given huge span 2455 static void 2456 _rpmalloc_deallocate_huge(span_t* span) { 2457 rpmalloc_assert(span->heap, "No span heap"); 2458 #if RPMALLOC_FIRST_CLASS_HEAPS 2459 int defer = (span->heap->owner_thread && (span->heap->owner_thread != get_thread_id()) && !span->heap->finalize); 2460 #else 2461 int defer = ((span->heap->owner_thread != get_thread_id()) && !span->heap->finalize); 2462 #endif 2463 if (defer) { 2464 _rpmalloc_deallocate_defer_free_span(span->heap, span); 2465 return; 2466 } 2467 rpmalloc_assert(span->heap->full_span_count, "Heap span counter corrupted"); 2468 --span->heap->full_span_count; 2469 #if RPMALLOC_FIRST_CLASS_HEAPS 2470 _rpmalloc_span_double_link_list_remove(&span->heap->large_huge_span, span); 2471 #endif 2472 2473 //Oversized allocation, page count is stored in span_count 2474 size_t num_pages = span->span_count; 2475 _rpmalloc_unmap(span, num_pages * _memory_page_size, span->align_offset, num_pages * _memory_page_size); 2476 _rpmalloc_stat_sub(&_huge_pages_current, num_pages); 2477 } 2478 2479 //! Deallocate the given block 2480 static void 2481 _rpmalloc_deallocate(void* p) { 2482 _rpmalloc_stat_add64(&_deallocation_counter, 1); 2483 //Grab the span (always at start of span, using span alignment) 2484 span_t* span = (span_t*)((uintptr_t)p & _memory_span_mask); 2485 if (UNEXPECTED(!span)) 2486 return; 2487 if (EXPECTED(span->size_class < SIZE_CLASS_COUNT)) 2488 _rpmalloc_deallocate_small_or_medium(span, p); 2489 else if (span->size_class == SIZE_CLASS_LARGE) 2490 _rpmalloc_deallocate_large(span); 2491 else 2492 _rpmalloc_deallocate_huge(span); 2493 } 2494 2495 //////////// 2496 /// 2497 /// Reallocation entry points 2498 /// 2499 ////// 2500 2501 static size_t 2502 _rpmalloc_usable_size(void* p); 2503 2504 //! Reallocate the given block to the given size 2505 static void* 2506 _rpmalloc_reallocate(heap_t* heap, void* p, size_t size, size_t oldsize, unsigned int flags) { 2507 if (p) { 2508 //Grab the span using guaranteed span alignment 2509 span_t* span = (span_t*)((uintptr_t)p & _memory_span_mask); 2510 if (EXPECTED(span->size_class < SIZE_CLASS_COUNT)) { 2511 //Small/medium sized block 2512 rpmalloc_assert(span->span_count == 1, "Span counter corrupted"); 2513 void* blocks_start = pointer_offset(span, SPAN_HEADER_SIZE); 2514 uint32_t block_offset = (uint32_t)pointer_diff(p, blocks_start); 2515 uint32_t block_idx = block_offset / span->block_size; 2516 void* block = pointer_offset(blocks_start, (size_t)block_idx * span->block_size); 2517 if (!oldsize) 2518 oldsize = (size_t)((ptrdiff_t)span->block_size - pointer_diff(p, block)); 2519 if ((size_t)span->block_size >= size) { 2520 //Still fits in block, never mind trying to save memory, but preserve data if alignment changed 2521 if ((p != block) && !(flags & RPMALLOC_NO_PRESERVE)) 2522 memmove(block, p, oldsize); 2523 return block; 2524 } 2525 } else if (span->size_class == SIZE_CLASS_LARGE) { 2526 //Large block 2527 size_t total_size = size + SPAN_HEADER_SIZE; 2528 size_t num_spans = total_size >> _memory_span_size_shift; 2529 if (total_size & (_memory_span_mask - 1)) 2530 ++num_spans; 2531 size_t current_spans = span->span_count; 2532 void* block = pointer_offset(span, SPAN_HEADER_SIZE); 2533 if (!oldsize) 2534 oldsize = (current_spans * _memory_span_size) - (size_t)pointer_diff(p, block) - SPAN_HEADER_SIZE; 2535 if ((current_spans >= num_spans) && (total_size >= (oldsize / 2))) { 2536 //Still fits in block, never mind trying to save memory, but preserve data if alignment changed 2537 if ((p != block) && !(flags & RPMALLOC_NO_PRESERVE)) 2538 memmove(block, p, oldsize); 2539 return block; 2540 } 2541 } else { 2542 //Oversized block 2543 size_t total_size = size + SPAN_HEADER_SIZE; 2544 size_t num_pages = total_size >> _memory_page_size_shift; 2545 if (total_size & (_memory_page_size - 1)) 2546 ++num_pages; 2547 //Page count is stored in span_count 2548 size_t current_pages = span->span_count; 2549 void* block = pointer_offset(span, SPAN_HEADER_SIZE); 2550 if (!oldsize) 2551 oldsize = (current_pages * _memory_page_size) - (size_t)pointer_diff(p, block) - SPAN_HEADER_SIZE; 2552 if ((current_pages >= num_pages) && (num_pages >= (current_pages / 2))) { 2553 //Still fits in block, never mind trying to save memory, but preserve data if alignment changed 2554 if ((p != block) && !(flags & RPMALLOC_NO_PRESERVE)) 2555 memmove(block, p, oldsize); 2556 return block; 2557 } 2558 } 2559 } else { 2560 oldsize = 0; 2561 } 2562 2563 if (!!(flags & RPMALLOC_GROW_OR_FAIL)) 2564 return 0; 2565 2566 //Size is greater than block size, need to allocate a new block and deallocate the old 2567 //Avoid hysteresis by overallocating if increase is small (below 37%) 2568 size_t lower_bound = oldsize + (oldsize >> 2) + (oldsize >> 3); 2569 size_t new_size = (size > lower_bound) ? size : ((size > oldsize) ? lower_bound : size); 2570 void* block = _rpmalloc_allocate(heap, new_size); 2571 if (p && block) { 2572 if (!(flags & RPMALLOC_NO_PRESERVE)) 2573 memcpy(block, p, oldsize < new_size ? oldsize : new_size); 2574 _rpmalloc_deallocate(p); 2575 } 2576 2577 return block; 2578 } 2579 2580 static void* 2581 _rpmalloc_aligned_reallocate(heap_t* heap, void* ptr, size_t alignment, size_t size, size_t oldsize, 2582 unsigned int flags) { 2583 if (alignment <= SMALL_GRANULARITY) 2584 return _rpmalloc_reallocate(heap, ptr, size, oldsize, flags); 2585 2586 int no_alloc = !!(flags & RPMALLOC_GROW_OR_FAIL); 2587 size_t usablesize = (ptr ? _rpmalloc_usable_size(ptr) : 0); 2588 if ((usablesize >= size) && !((uintptr_t)ptr & (alignment - 1))) { 2589 if (no_alloc || (size >= (usablesize / 2))) 2590 return ptr; 2591 } 2592 // Aligned alloc marks span as having aligned blocks 2593 void* block = (!no_alloc ? _rpmalloc_aligned_allocate(heap, alignment, size) : 0); 2594 if (EXPECTED(block != 0)) { 2595 if (!(flags & RPMALLOC_NO_PRESERVE) && ptr) { 2596 if (!oldsize) 2597 oldsize = usablesize; 2598 memcpy(block, ptr, oldsize < size ? oldsize : size); 2599 } 2600 _rpmalloc_deallocate(ptr); 2601 } 2602 return block; 2603 } 2604 2605 2606 //////////// 2607 /// 2608 /// Initialization, finalization and utility 2609 /// 2610 ////// 2611 2612 //! Get the usable size of the given block 2613 static size_t 2614 _rpmalloc_usable_size(void* p) { 2615 //Grab the span using guaranteed span alignment 2616 span_t* span = (span_t*)((uintptr_t)p & _memory_span_mask); 2617 if (span->size_class < SIZE_CLASS_COUNT) { 2618 //Small/medium block 2619 void* blocks_start = pointer_offset(span, SPAN_HEADER_SIZE); 2620 return span->block_size - ((size_t)pointer_diff(p, blocks_start) % span->block_size); 2621 } 2622 if (span->size_class == SIZE_CLASS_LARGE) { 2623 //Large block 2624 size_t current_spans = span->span_count; 2625 return (current_spans * _memory_span_size) - (size_t)pointer_diff(p, span); 2626 } 2627 //Oversized block, page count is stored in span_count 2628 size_t current_pages = span->span_count; 2629 return (current_pages * _memory_page_size) - (size_t)pointer_diff(p, span); 2630 } 2631 2632 //! Adjust and optimize the size class properties for the given class 2633 static void 2634 _rpmalloc_adjust_size_class(size_t iclass) { 2635 size_t block_size = _memory_size_class[iclass].block_size; 2636 size_t block_count = (_memory_span_size - SPAN_HEADER_SIZE) / block_size; 2637 2638 _memory_size_class[iclass].block_count = (uint16_t)block_count; 2639 _memory_size_class[iclass].class_idx = (uint16_t)iclass; 2640 2641 //Check if previous size classes can be merged 2642 if (iclass >= SMALL_CLASS_COUNT) { 2643 size_t prevclass = iclass; 2644 while (prevclass > 0) { 2645 --prevclass; 2646 //A class can be merged if number of pages and number of blocks are equal 2647 if (_memory_size_class[prevclass].block_count == _memory_size_class[iclass].block_count) 2648 memcpy(_memory_size_class + prevclass, _memory_size_class + iclass, sizeof(_memory_size_class[iclass])); 2649 else 2650 break; 2651 } 2652 } 2653 } 2654 2655 //! Initialize the allocator and setup global data 2656 extern inline int 2657 rpmalloc_initialize(void) { 2658 if (_rpmalloc_initialized) { 2659 rpmalloc_thread_initialize(); 2660 return 0; 2661 } 2662 return rpmalloc_initialize_config(0); 2663 } 2664 2665 int 2666 rpmalloc_initialize_config(const rpmalloc_config_t* config) { 2667 if (_rpmalloc_initialized) { 2668 rpmalloc_thread_initialize(); 2669 return 0; 2670 } 2671 _rpmalloc_initialized = 1; 2672 2673 if (config) 2674 memcpy(&_memory_config, config, sizeof(rpmalloc_config_t)); 2675 else 2676 memset(&_memory_config, 0, sizeof(rpmalloc_config_t)); 2677 2678 if (!_memory_config.memory_map || !_memory_config.memory_unmap) { 2679 _memory_config.memory_map = _rpmalloc_mmap_os; 2680 _memory_config.memory_unmap = _rpmalloc_unmap_os; 2681 } 2682 2683 #if PLATFORM_WINDOWS 2684 SYSTEM_INFO system_info; 2685 memset(&system_info, 0, sizeof(system_info)); 2686 GetSystemInfo(&system_info); 2687 _memory_map_granularity = system_info.dwAllocationGranularity; 2688 #else 2689 _memory_map_granularity = (size_t)sysconf(_SC_PAGESIZE); 2690 #endif 2691 2692 #if RPMALLOC_CONFIGURABLE 2693 _memory_page_size = _memory_config.page_size; 2694 #else 2695 _memory_page_size = 0; 2696 #endif 2697 _memory_huge_pages = 0; 2698 if (!_memory_page_size) { 2699 #if PLATFORM_WINDOWS 2700 _memory_page_size = system_info.dwPageSize; 2701 #else 2702 _memory_page_size = _memory_map_granularity; 2703 if (_memory_config.enable_huge_pages) { 2704 #if defined(__linux__) 2705 size_t huge_page_size = 0; 2706 FILE* meminfo = fopen("/proc/meminfo", "r"); 2707 if (meminfo) { 2708 char line[128]; 2709 while (!huge_page_size && fgets(line, sizeof(line) - 1, meminfo)) { 2710 line[sizeof(line) - 1] = 0; 2711 if (strstr(line, "Hugepagesize:")) 2712 huge_page_size = (size_t)strtol(line + 13, 0, 10) * 1024; 2713 } 2714 fclose(meminfo); 2715 } 2716 if (huge_page_size) { 2717 _memory_huge_pages = 1; 2718 _memory_page_size = huge_page_size; 2719 _memory_map_granularity = huge_page_size; 2720 } 2721 #elif defined(__FreeBSD__) 2722 int rc; 2723 size_t sz = sizeof(rc); 2724 2725 if (sysctlbyname("vm.pmap.pg_ps_enabled", &rc, &sz, NULL, 0) == 0 && rc == 1) { 2726 _memory_huge_pages = 1; 2727 _memory_page_size = 2 * 1024 * 1024; 2728 _memory_map_granularity = _memory_page_size; 2729 } 2730 #elif defined(__APPLE__) || defined(__NetBSD__) 2731 _memory_huge_pages = 1; 2732 _memory_page_size = 2 * 1024 * 1024; 2733 _memory_map_granularity = _memory_page_size; 2734 #endif 2735 } 2736 #endif 2737 } else { 2738 if (_memory_config.enable_huge_pages) 2739 _memory_huge_pages = 1; 2740 } 2741 2742 #if PLATFORM_WINDOWS 2743 if (_memory_config.enable_huge_pages) { 2744 HANDLE token = 0; 2745 size_t large_page_minimum = GetLargePageMinimum(); 2746 if (large_page_minimum) 2747 OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token); 2748 if (token) { 2749 LUID luid; 2750 if (LookupPrivilegeValue(0, SE_LOCK_MEMORY_NAME, &luid)) { 2751 TOKEN_PRIVILEGES token_privileges; 2752 memset(&token_privileges, 0, sizeof(token_privileges)); 2753 token_privileges.PrivilegeCount = 1; 2754 token_privileges.Privileges[0].Luid = luid; 2755 token_privileges.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; 2756 if (AdjustTokenPrivileges(token, FALSE, &token_privileges, 0, 0, 0)) { 2757 if (GetLastError() == ERROR_SUCCESS) 2758 _memory_huge_pages = 1; 2759 } 2760 } 2761 CloseHandle(token); 2762 } 2763 if (_memory_huge_pages) { 2764 if (large_page_minimum > _memory_page_size) 2765 _memory_page_size = large_page_minimum; 2766 if (large_page_minimum > _memory_map_granularity) 2767 _memory_map_granularity = large_page_minimum; 2768 } 2769 } 2770 #endif 2771 2772 size_t min_span_size = 256; 2773 size_t max_page_size; 2774 #if UINTPTR_MAX > 0xFFFFFFFF 2775 max_page_size = 4096ULL * 1024ULL * 1024ULL; 2776 #else 2777 max_page_size = 4 * 1024 * 1024; 2778 #endif 2779 if (_memory_page_size < min_span_size) 2780 _memory_page_size = min_span_size; 2781 if (_memory_page_size > max_page_size) 2782 _memory_page_size = max_page_size; 2783 _memory_page_size_shift = 0; 2784 size_t page_size_bit = _memory_page_size; 2785 while (page_size_bit != 1) { 2786 ++_memory_page_size_shift; 2787 page_size_bit >>= 1; 2788 } 2789 _memory_page_size = ((size_t)1 << _memory_page_size_shift); 2790 2791 #if RPMALLOC_CONFIGURABLE 2792 if (!_memory_config.span_size) { 2793 _memory_span_size = _memory_default_span_size; 2794 _memory_span_size_shift = _memory_default_span_size_shift; 2795 _memory_span_mask = _memory_default_span_mask; 2796 } else { 2797 size_t span_size = _memory_config.span_size; 2798 if (span_size > (256 * 1024)) 2799 span_size = (256 * 1024); 2800 _memory_span_size = 4096; 2801 _memory_span_size_shift = 12; 2802 while (_memory_span_size < span_size) { 2803 _memory_span_size <<= 1; 2804 ++_memory_span_size_shift; 2805 } 2806 _memory_span_mask = ~(uintptr_t)(_memory_span_size - 1); 2807 } 2808 #endif 2809 2810 _memory_span_map_count = ( _memory_config.span_map_count ? _memory_config.span_map_count : DEFAULT_SPAN_MAP_COUNT); 2811 if ((_memory_span_size * _memory_span_map_count) < _memory_page_size) 2812 _memory_span_map_count = (_memory_page_size / _memory_span_size); 2813 if ((_memory_page_size >= _memory_span_size) && ((_memory_span_map_count * _memory_span_size) % _memory_page_size)) 2814 _memory_span_map_count = (_memory_page_size / _memory_span_size); 2815 _memory_heap_reserve_count = (_memory_span_map_count > DEFAULT_SPAN_MAP_COUNT) ? DEFAULT_SPAN_MAP_COUNT : _memory_span_map_count; 2816 2817 _memory_config.page_size = _memory_page_size; 2818 _memory_config.span_size = _memory_span_size; 2819 _memory_config.span_map_count = _memory_span_map_count; 2820 _memory_config.enable_huge_pages = _memory_huge_pages; 2821 2822 #if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD 2823 if (pthread_key_create(&_memory_thread_heap, _rpmalloc_heap_release_raw_fc)) 2824 return -1; 2825 #endif 2826 #if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK) 2827 fls_key = FlsAlloc(&_rpmalloc_thread_destructor); 2828 #endif 2829 2830 //Setup all small and medium size classes 2831 size_t iclass = 0; 2832 _memory_size_class[iclass].block_size = SMALL_GRANULARITY; 2833 _rpmalloc_adjust_size_class(iclass); 2834 for (iclass = 1; iclass < SMALL_CLASS_COUNT; ++iclass) { 2835 size_t size = iclass * SMALL_GRANULARITY; 2836 _memory_size_class[iclass].block_size = (uint32_t)size; 2837 _rpmalloc_adjust_size_class(iclass); 2838 } 2839 //At least two blocks per span, then fall back to large allocations 2840 _memory_medium_size_limit = (_memory_span_size - SPAN_HEADER_SIZE) >> 1; 2841 if (_memory_medium_size_limit > MEDIUM_SIZE_LIMIT) 2842 _memory_medium_size_limit = MEDIUM_SIZE_LIMIT; 2843 for (iclass = 0; iclass < MEDIUM_CLASS_COUNT; ++iclass) { 2844 size_t size = SMALL_SIZE_LIMIT + ((iclass + 1) * MEDIUM_GRANULARITY); 2845 if (size > _memory_medium_size_limit) 2846 break; 2847 _memory_size_class[SMALL_CLASS_COUNT + iclass].block_size = (uint32_t)size; 2848 _rpmalloc_adjust_size_class(SMALL_CLASS_COUNT + iclass); 2849 } 2850 2851 _memory_orphan_heaps = 0; 2852 #if RPMALLOC_FIRST_CLASS_HEAPS 2853 _memory_first_class_orphan_heaps = 0; 2854 #endif 2855 #if ENABLE_STATISTICS 2856 atomic_store32(&_memory_active_heaps, 0); 2857 atomic_store32(&_mapped_pages, 0); 2858 _mapped_pages_peak = 0; 2859 atomic_store32(&_master_spans, 0); 2860 atomic_store32(&_mapped_total, 0); 2861 atomic_store32(&_unmapped_total, 0); 2862 atomic_store32(&_mapped_pages_os, 0); 2863 atomic_store32(&_huge_pages_current, 0); 2864 _huge_pages_peak = 0; 2865 #endif 2866 memset(_memory_heaps, 0, sizeof(_memory_heaps)); 2867 atomic_store32_release(&_memory_global_lock, 0); 2868 2869 //Initialize this thread 2870 rpmalloc_thread_initialize(); 2871 return 0; 2872 } 2873 2874 //! Finalize the allocator 2875 void 2876 rpmalloc_finalize(void) { 2877 rpmalloc_thread_finalize(1); 2878 //rpmalloc_dump_statistics(stdout); 2879 2880 if (_memory_global_reserve) { 2881 atomic_add32(&_memory_global_reserve_master->remaining_spans, -(int32_t)_memory_global_reserve_count); 2882 _memory_global_reserve_master = 0; 2883 _memory_global_reserve_count = 0; 2884 _memory_global_reserve = 0; 2885 } 2886 atomic_store32_release(&_memory_global_lock, 0); 2887 2888 //Free all thread caches and fully free spans 2889 for (size_t list_idx = 0; list_idx < HEAP_ARRAY_SIZE; ++list_idx) { 2890 heap_t* heap = _memory_heaps[list_idx]; 2891 while (heap) { 2892 heap_t* next_heap = heap->next_heap; 2893 heap->finalize = 1; 2894 _rpmalloc_heap_global_finalize(heap); 2895 heap = next_heap; 2896 } 2897 } 2898 2899 #if ENABLE_GLOBAL_CACHE 2900 //Free global caches 2901 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) 2902 _rpmalloc_global_cache_finalize(&_memory_span_cache[iclass]); 2903 #endif 2904 2905 #if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD 2906 pthread_key_delete(_memory_thread_heap); 2907 #endif 2908 #if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK) 2909 FlsFree(fls_key); 2910 fls_key = 0; 2911 #endif 2912 #if ENABLE_STATISTICS 2913 //If you hit these asserts you probably have memory leaks (perhaps global scope data doing dynamic allocations) or double frees in your code 2914 rpmalloc_assert(atomic_load32(&_mapped_pages) == 0, "Memory leak detected"); 2915 rpmalloc_assert(atomic_load32(&_mapped_pages_os) == 0, "Memory leak detected"); 2916 #endif 2917 2918 _rpmalloc_initialized = 0; 2919 } 2920 2921 //! Initialize thread, assign heap 2922 extern inline void 2923 rpmalloc_thread_initialize(void) { 2924 if (!get_thread_heap_raw()) { 2925 heap_t* heap = _rpmalloc_heap_allocate(0); 2926 if (heap) { 2927 _rpmalloc_stat_inc(&_memory_active_heaps); 2928 set_thread_heap(heap); 2929 #if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK) 2930 FlsSetValue(fls_key, heap); 2931 #endif 2932 } 2933 } 2934 } 2935 2936 //! Finalize thread, orphan heap 2937 void 2938 rpmalloc_thread_finalize(int release_caches) { 2939 heap_t* heap = get_thread_heap_raw(); 2940 if (heap) 2941 _rpmalloc_heap_release_raw(heap, release_caches); 2942 set_thread_heap(0); 2943 #if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK) 2944 FlsSetValue(fls_key, 0); 2945 #endif 2946 } 2947 2948 int 2949 rpmalloc_is_thread_initialized(void) { 2950 return (get_thread_heap_raw() != 0) ? 1 : 0; 2951 } 2952 2953 const rpmalloc_config_t* 2954 rpmalloc_config(void) { 2955 return &_memory_config; 2956 } 2957 2958 // Extern interface 2959 2960 extern inline RPMALLOC_ALLOCATOR void* 2961 rpmalloc(size_t size) { 2962 #if ENABLE_VALIDATE_ARGS 2963 if (size >= MAX_ALLOC_SIZE) { 2964 errno = EINVAL; 2965 return 0; 2966 } 2967 #endif 2968 heap_t* heap = get_thread_heap(); 2969 return _rpmalloc_allocate(heap, size); 2970 } 2971 2972 extern inline void 2973 rpfree(void* ptr) { 2974 _rpmalloc_deallocate(ptr); 2975 } 2976 2977 extern inline RPMALLOC_ALLOCATOR void* 2978 rpcalloc(size_t num, size_t size) { 2979 size_t total; 2980 #if ENABLE_VALIDATE_ARGS 2981 #if PLATFORM_WINDOWS 2982 int err = SizeTMult(num, size, &total); 2983 if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) { 2984 errno = EINVAL; 2985 return 0; 2986 } 2987 #else 2988 int err = __builtin_umull_overflow(num, size, &total); 2989 if (err || (total >= MAX_ALLOC_SIZE)) { 2990 errno = EINVAL; 2991 return 0; 2992 } 2993 #endif 2994 #else 2995 total = num * size; 2996 #endif 2997 heap_t* heap = get_thread_heap(); 2998 void* block = _rpmalloc_allocate(heap, total); 2999 if (block) 3000 memset(block, 0, total); 3001 return block; 3002 } 3003 3004 extern inline RPMALLOC_ALLOCATOR void* 3005 rprealloc(void* ptr, size_t size) { 3006 #if ENABLE_VALIDATE_ARGS 3007 if (size >= MAX_ALLOC_SIZE) { 3008 errno = EINVAL; 3009 return ptr; 3010 } 3011 #endif 3012 heap_t* heap = get_thread_heap(); 3013 return _rpmalloc_reallocate(heap, ptr, size, 0, 0); 3014 } 3015 3016 extern RPMALLOC_ALLOCATOR void* 3017 rpaligned_realloc(void* ptr, size_t alignment, size_t size, size_t oldsize, 3018 unsigned int flags) { 3019 #if ENABLE_VALIDATE_ARGS 3020 if ((size + alignment < size) || (alignment > _memory_page_size)) { 3021 errno = EINVAL; 3022 return 0; 3023 } 3024 #endif 3025 heap_t* heap = get_thread_heap(); 3026 return _rpmalloc_aligned_reallocate(heap, ptr, alignment, size, oldsize, flags); 3027 } 3028 3029 extern RPMALLOC_ALLOCATOR void* 3030 rpaligned_alloc(size_t alignment, size_t size) { 3031 heap_t* heap = get_thread_heap(); 3032 return _rpmalloc_aligned_allocate(heap, alignment, size); 3033 } 3034 3035 extern inline RPMALLOC_ALLOCATOR void* 3036 rpaligned_calloc(size_t alignment, size_t num, size_t size) { 3037 size_t total; 3038 #if ENABLE_VALIDATE_ARGS 3039 #if PLATFORM_WINDOWS 3040 int err = SizeTMult(num, size, &total); 3041 if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) { 3042 errno = EINVAL; 3043 return 0; 3044 } 3045 #else 3046 int err = __builtin_umull_overflow(num, size, &total); 3047 if (err || (total >= MAX_ALLOC_SIZE)) { 3048 errno = EINVAL; 3049 return 0; 3050 } 3051 #endif 3052 #else 3053 total = num * size; 3054 #endif 3055 void* block = rpaligned_alloc(alignment, total); 3056 if (block) 3057 memset(block, 0, total); 3058 return block; 3059 } 3060 3061 extern inline RPMALLOC_ALLOCATOR void* 3062 rpmemalign(size_t alignment, size_t size) { 3063 return rpaligned_alloc(alignment, size); 3064 } 3065 3066 extern inline int 3067 rpposix_memalign(void **memptr, size_t alignment, size_t size) { 3068 if (memptr) 3069 *memptr = rpaligned_alloc(alignment, size); 3070 else 3071 return EINVAL; 3072 return *memptr ? 0 : ENOMEM; 3073 } 3074 3075 extern inline size_t 3076 rpmalloc_usable_size(void* ptr) { 3077 return (ptr ? _rpmalloc_usable_size(ptr) : 0); 3078 } 3079 3080 extern inline void 3081 rpmalloc_thread_collect(void) { 3082 } 3083 3084 void 3085 rpmalloc_thread_statistics(rpmalloc_thread_statistics_t* stats) { 3086 memset(stats, 0, sizeof(rpmalloc_thread_statistics_t)); 3087 heap_t* heap = get_thread_heap_raw(); 3088 if (!heap) 3089 return; 3090 3091 for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) { 3092 size_class_t* size_class = _memory_size_class + iclass; 3093 span_t* span = heap->size_class[iclass].partial_span; 3094 while (span) { 3095 size_t free_count = span->list_size; 3096 size_t block_count = size_class->block_count; 3097 if (span->free_list_limit < block_count) 3098 block_count = span->free_list_limit; 3099 free_count += (block_count - span->used_count); 3100 stats->sizecache = free_count * size_class->block_size; 3101 span = span->next; 3102 } 3103 } 3104 3105 #if ENABLE_THREAD_CACHE 3106 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) { 3107 span_cache_t* span_cache; 3108 if (!iclass) 3109 span_cache = &heap->span_cache; 3110 else 3111 span_cache = (span_cache_t*)(heap->span_large_cache + (iclass - 1)); 3112 stats->spancache = span_cache->count * (iclass + 1) * _memory_span_size; 3113 } 3114 #endif 3115 3116 span_t* deferred = (span_t*)atomic_load_ptr(&heap->span_free_deferred); 3117 while (deferred) { 3118 if (deferred->size_class != SIZE_CLASS_HUGE) 3119 stats->spancache = (size_t)deferred->span_count * _memory_span_size; 3120 deferred = (span_t*)deferred->free_list; 3121 } 3122 3123 #if ENABLE_STATISTICS 3124 stats->thread_to_global = (size_t)atomic_load64(&heap->thread_to_global); 3125 stats->global_to_thread = (size_t)atomic_load64(&heap->global_to_thread); 3126 3127 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) { 3128 stats->span_use[iclass].current = (size_t)atomic_load32(&heap->span_use[iclass].current); 3129 stats->span_use[iclass].peak = (size_t)atomic_load32(&heap->span_use[iclass].high); 3130 stats->span_use[iclass].to_global = (size_t)atomic_load32(&heap->span_use[iclass].spans_to_global); 3131 stats->span_use[iclass].from_global = (size_t)atomic_load32(&heap->span_use[iclass].spans_from_global); 3132 stats->span_use[iclass].to_cache = (size_t)atomic_load32(&heap->span_use[iclass].spans_to_cache); 3133 stats->span_use[iclass].from_cache = (size_t)atomic_load32(&heap->span_use[iclass].spans_from_cache); 3134 stats->span_use[iclass].to_reserved = (size_t)atomic_load32(&heap->span_use[iclass].spans_to_reserved); 3135 stats->span_use[iclass].from_reserved = (size_t)atomic_load32(&heap->span_use[iclass].spans_from_reserved); 3136 stats->span_use[iclass].map_calls = (size_t)atomic_load32(&heap->span_use[iclass].spans_map_calls); 3137 } 3138 for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) { 3139 stats->size_use[iclass].alloc_current = (size_t)atomic_load32(&heap->size_class_use[iclass].alloc_current); 3140 stats->size_use[iclass].alloc_peak = (size_t)heap->size_class_use[iclass].alloc_peak; 3141 stats->size_use[iclass].alloc_total = (size_t)atomic_load32(&heap->size_class_use[iclass].alloc_total); 3142 stats->size_use[iclass].free_total = (size_t)atomic_load32(&heap->size_class_use[iclass].free_total); 3143 stats->size_use[iclass].spans_to_cache = (size_t)atomic_load32(&heap->size_class_use[iclass].spans_to_cache); 3144 stats->size_use[iclass].spans_from_cache = (size_t)atomic_load32(&heap->size_class_use[iclass].spans_from_cache); 3145 stats->size_use[iclass].spans_from_reserved = (size_t)atomic_load32(&heap->size_class_use[iclass].spans_from_reserved); 3146 stats->size_use[iclass].map_calls = (size_t)atomic_load32(&heap->size_class_use[iclass].spans_map_calls); 3147 } 3148 #endif 3149 } 3150 3151 void 3152 rpmalloc_global_statistics(rpmalloc_global_statistics_t* stats) { 3153 memset(stats, 0, sizeof(rpmalloc_global_statistics_t)); 3154 #if ENABLE_STATISTICS 3155 stats->mapped = (size_t)atomic_load32(&_mapped_pages) * _memory_page_size; 3156 stats->mapped_peak = (size_t)_mapped_pages_peak * _memory_page_size; 3157 stats->mapped_total = (size_t)atomic_load32(&_mapped_total) * _memory_page_size; 3158 stats->unmapped_total = (size_t)atomic_load32(&_unmapped_total) * _memory_page_size; 3159 stats->huge_alloc = (size_t)atomic_load32(&_huge_pages_current) * _memory_page_size; 3160 stats->huge_alloc_peak = (size_t)_huge_pages_peak * _memory_page_size; 3161 #endif 3162 #if ENABLE_GLOBAL_CACHE 3163 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) 3164 stats->cached += _memory_span_cache[iclass].count * (iclass + 1) * _memory_span_size; 3165 #endif 3166 } 3167 3168 #if ENABLE_STATISTICS 3169 3170 static void 3171 _memory_heap_dump_statistics(heap_t* heap, void* file) { 3172 fprintf(file, "Heap %d stats:\n", heap->id); 3173 fprintf(file, "Class CurAlloc PeakAlloc TotAlloc TotFree BlkSize BlkCount SpansCur SpansPeak PeakAllocMiB ToCacheMiB FromCacheMiB FromReserveMiB MmapCalls\n"); 3174 for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) { 3175 if (!atomic_load32(&heap->size_class_use[iclass].alloc_total)) 3176 continue; 3177 fprintf(file, "%3u: %10u %10u %10u %10u %8u %8u %8d %9d %13zu %11zu %12zu %14zu %9u\n", (uint32_t)iclass, 3178 atomic_load32(&heap->size_class_use[iclass].alloc_current), 3179 heap->size_class_use[iclass].alloc_peak, 3180 atomic_load32(&heap->size_class_use[iclass].alloc_total), 3181 atomic_load32(&heap->size_class_use[iclass].free_total), 3182 _memory_size_class[iclass].block_size, 3183 _memory_size_class[iclass].block_count, 3184 atomic_load32(&heap->size_class_use[iclass].spans_current), 3185 heap->size_class_use[iclass].spans_peak, 3186 ((size_t)heap->size_class_use[iclass].alloc_peak * (size_t)_memory_size_class[iclass].block_size) / (size_t)(1024 * 1024), 3187 ((size_t)atomic_load32(&heap->size_class_use[iclass].spans_to_cache) * _memory_span_size) / (size_t)(1024 * 1024), 3188 ((size_t)atomic_load32(&heap->size_class_use[iclass].spans_from_cache) * _memory_span_size) / (size_t)(1024 * 1024), 3189 ((size_t)atomic_load32(&heap->size_class_use[iclass].spans_from_reserved) * _memory_span_size) / (size_t)(1024 * 1024), 3190 atomic_load32(&heap->size_class_use[iclass].spans_map_calls)); 3191 } 3192 fprintf(file, "Spans Current Peak Deferred PeakMiB Cached ToCacheMiB FromCacheMiB ToReserveMiB FromReserveMiB ToGlobalMiB FromGlobalMiB MmapCalls\n"); 3193 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) { 3194 if (!atomic_load32(&heap->span_use[iclass].high) && !atomic_load32(&heap->span_use[iclass].spans_map_calls)) 3195 continue; 3196 fprintf(file, "%4u: %8d %8u %8u %8zu %7u %11zu %12zu %12zu %14zu %11zu %13zu %10u\n", (uint32_t)(iclass + 1), 3197 atomic_load32(&heap->span_use[iclass].current), 3198 atomic_load32(&heap->span_use[iclass].high), 3199 atomic_load32(&heap->span_use[iclass].spans_deferred), 3200 ((size_t)atomic_load32(&heap->span_use[iclass].high) * (size_t)_memory_span_size * (iclass + 1)) / (size_t)(1024 * 1024), 3201 #if ENABLE_THREAD_CACHE 3202 (unsigned int)(!iclass ? heap->span_cache.count : heap->span_large_cache[iclass - 1].count), 3203 ((size_t)atomic_load32(&heap->span_use[iclass].spans_to_cache) * (iclass + 1) * _memory_span_size) / (size_t)(1024 * 1024), 3204 ((size_t)atomic_load32(&heap->span_use[iclass].spans_from_cache) * (iclass + 1) * _memory_span_size) / (size_t)(1024 * 1024), 3205 #else 3206 0, (size_t)0, (size_t)0, 3207 #endif 3208 ((size_t)atomic_load32(&heap->span_use[iclass].spans_to_reserved) * (iclass + 1) * _memory_span_size) / (size_t)(1024 * 1024), 3209 ((size_t)atomic_load32(&heap->span_use[iclass].spans_from_reserved) * (iclass + 1) * _memory_span_size) / (size_t)(1024 * 1024), 3210 ((size_t)atomic_load32(&heap->span_use[iclass].spans_to_global) * (size_t)_memory_span_size * (iclass + 1)) / (size_t)(1024 * 1024), 3211 ((size_t)atomic_load32(&heap->span_use[iclass].spans_from_global) * (size_t)_memory_span_size * (iclass + 1)) / (size_t)(1024 * 1024), 3212 atomic_load32(&heap->span_use[iclass].spans_map_calls)); 3213 } 3214 fprintf(file, "Full spans: %zu\n", heap->full_span_count); 3215 fprintf(file, "ThreadToGlobalMiB GlobalToThreadMiB\n"); 3216 fprintf(file, "%17zu %17zu\n", (size_t)atomic_load64(&heap->thread_to_global) / (size_t)(1024 * 1024), (size_t)atomic_load64(&heap->global_to_thread) / (size_t)(1024 * 1024)); 3217 } 3218 3219 #endif 3220 3221 void 3222 rpmalloc_dump_statistics(void* file) { 3223 #if ENABLE_STATISTICS 3224 for (size_t list_idx = 0; list_idx < HEAP_ARRAY_SIZE; ++list_idx) { 3225 heap_t* heap = _memory_heaps[list_idx]; 3226 while (heap) { 3227 int need_dump = 0; 3228 for (size_t iclass = 0; !need_dump && (iclass < SIZE_CLASS_COUNT); ++iclass) { 3229 if (!atomic_load32(&heap->size_class_use[iclass].alloc_total)) { 3230 rpmalloc_assert(!atomic_load32(&heap->size_class_use[iclass].free_total), "Heap statistics counter mismatch"); 3231 rpmalloc_assert(!atomic_load32(&heap->size_class_use[iclass].spans_map_calls), "Heap statistics counter mismatch"); 3232 continue; 3233 } 3234 need_dump = 1; 3235 } 3236 for (size_t iclass = 0; !need_dump && (iclass < LARGE_CLASS_COUNT); ++iclass) { 3237 if (!atomic_load32(&heap->span_use[iclass].high) && !atomic_load32(&heap->span_use[iclass].spans_map_calls)) 3238 continue; 3239 need_dump = 1; 3240 } 3241 if (need_dump) 3242 _memory_heap_dump_statistics(heap, file); 3243 heap = heap->next_heap; 3244 } 3245 } 3246 fprintf(file, "Global stats:\n"); 3247 size_t huge_current = (size_t)atomic_load32(&_huge_pages_current) * _memory_page_size; 3248 size_t huge_peak = (size_t)_huge_pages_peak * _memory_page_size; 3249 fprintf(file, "HugeCurrentMiB HugePeakMiB\n"); 3250 fprintf(file, "%14zu %11zu\n", huge_current / (size_t)(1024 * 1024), huge_peak / (size_t)(1024 * 1024)); 3251 3252 fprintf(file, "GlobalCacheMiB\n"); 3253 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) { 3254 global_cache_t* cache = _memory_span_cache + iclass; 3255 size_t global_cache = (size_t)cache->count * iclass * _memory_span_size; 3256 3257 size_t global_overflow_cache = 0; 3258 span_t* span = cache->overflow; 3259 while (span) { 3260 global_overflow_cache += iclass * _memory_span_size; 3261 span = span->next; 3262 } 3263 if (global_cache || global_overflow_cache || cache->insert_count || cache->extract_count) 3264 fprintf(file, "%4zu: %8zuMiB (%8zuMiB overflow) %14zu insert %14zu extract\n", iclass + 1, global_cache / (size_t)(1024 * 1024), global_overflow_cache / (size_t)(1024 * 1024), cache->insert_count, cache->extract_count); 3265 } 3266 3267 size_t mapped = (size_t)atomic_load32(&_mapped_pages) * _memory_page_size; 3268 size_t mapped_os = (size_t)atomic_load32(&_mapped_pages_os) * _memory_page_size; 3269 size_t mapped_peak = (size_t)_mapped_pages_peak * _memory_page_size; 3270 size_t mapped_total = (size_t)atomic_load32(&_mapped_total) * _memory_page_size; 3271 size_t unmapped_total = (size_t)atomic_load32(&_unmapped_total) * _memory_page_size; 3272 fprintf(file, "MappedMiB MappedOSMiB MappedPeakMiB MappedTotalMiB UnmappedTotalMiB\n"); 3273 fprintf(file, "%9zu %11zu %13zu %14zu %16zu\n", 3274 mapped / (size_t)(1024 * 1024), 3275 mapped_os / (size_t)(1024 * 1024), 3276 mapped_peak / (size_t)(1024 * 1024), 3277 mapped_total / (size_t)(1024 * 1024), 3278 unmapped_total / (size_t)(1024 * 1024)); 3279 3280 fprintf(file, "\n"); 3281 #if 0 3282 int64_t allocated = atomic_load64(&_allocation_counter); 3283 int64_t deallocated = atomic_load64(&_deallocation_counter); 3284 fprintf(file, "Allocation count: %lli\n", allocated); 3285 fprintf(file, "Deallocation count: %lli\n", deallocated); 3286 fprintf(file, "Current allocations: %lli\n", (allocated - deallocated)); 3287 fprintf(file, "Master spans: %d\n", atomic_load32(&_master_spans)); 3288 fprintf(file, "Dangling master spans: %d\n", atomic_load32(&_unmapped_master_spans)); 3289 #endif 3290 #endif 3291 (void)sizeof(file); 3292 } 3293 3294 #if RPMALLOC_FIRST_CLASS_HEAPS 3295 3296 extern inline rpmalloc_heap_t* 3297 rpmalloc_heap_acquire(void) { 3298 // Must be a pristine heap from newly mapped memory pages, or else memory blocks 3299 // could already be allocated from the heap which would (wrongly) be released when 3300 // heap is cleared with rpmalloc_heap_free_all(). Also heaps guaranteed to be 3301 // pristine from the dedicated orphan list can be used. 3302 heap_t* heap = _rpmalloc_heap_allocate(1); 3303 heap->owner_thread = 0; 3304 _rpmalloc_stat_inc(&_memory_active_heaps); 3305 return heap; 3306 } 3307 3308 extern inline void 3309 rpmalloc_heap_release(rpmalloc_heap_t* heap) { 3310 if (heap) 3311 _rpmalloc_heap_release(heap, 1, 1); 3312 } 3313 3314 extern inline RPMALLOC_ALLOCATOR void* 3315 rpmalloc_heap_alloc(rpmalloc_heap_t* heap, size_t size) { 3316 #if ENABLE_VALIDATE_ARGS 3317 if (size >= MAX_ALLOC_SIZE) { 3318 errno = EINVAL; 3319 return 0; 3320 } 3321 #endif 3322 return _rpmalloc_allocate(heap, size); 3323 } 3324 3325 extern inline RPMALLOC_ALLOCATOR void* 3326 rpmalloc_heap_aligned_alloc(rpmalloc_heap_t* heap, size_t alignment, size_t size) { 3327 #if ENABLE_VALIDATE_ARGS 3328 if (size >= MAX_ALLOC_SIZE) { 3329 errno = EINVAL; 3330 return 0; 3331 } 3332 #endif 3333 return _rpmalloc_aligned_allocate(heap, alignment, size); 3334 } 3335 3336 extern inline RPMALLOC_ALLOCATOR void* 3337 rpmalloc_heap_calloc(rpmalloc_heap_t* heap, size_t num, size_t size) { 3338 return rpmalloc_heap_aligned_calloc(heap, 0, num, size); 3339 } 3340 3341 extern inline RPMALLOC_ALLOCATOR void* 3342 rpmalloc_heap_aligned_calloc(rpmalloc_heap_t* heap, size_t alignment, size_t num, size_t size) { 3343 size_t total; 3344 #if ENABLE_VALIDATE_ARGS 3345 #if PLATFORM_WINDOWS 3346 int err = SizeTMult(num, size, &total); 3347 if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) { 3348 errno = EINVAL; 3349 return 0; 3350 } 3351 #else 3352 int err = __builtin_umull_overflow(num, size, &total); 3353 if (err || (total >= MAX_ALLOC_SIZE)) { 3354 errno = EINVAL; 3355 return 0; 3356 } 3357 #endif 3358 #else 3359 total = num * size; 3360 #endif 3361 void* block = _rpmalloc_aligned_allocate(heap, alignment, total); 3362 if (block) 3363 memset(block, 0, total); 3364 return block; 3365 } 3366 3367 extern inline RPMALLOC_ALLOCATOR void* 3368 rpmalloc_heap_realloc(rpmalloc_heap_t* heap, void* ptr, size_t size, unsigned int flags) { 3369 #if ENABLE_VALIDATE_ARGS 3370 if (size >= MAX_ALLOC_SIZE) { 3371 errno = EINVAL; 3372 return ptr; 3373 } 3374 #endif 3375 return _rpmalloc_reallocate(heap, ptr, size, 0, flags); 3376 } 3377 3378 extern inline RPMALLOC_ALLOCATOR void* 3379 rpmalloc_heap_aligned_realloc(rpmalloc_heap_t* heap, void* ptr, size_t alignment, size_t size, unsigned int flags) { 3380 #if ENABLE_VALIDATE_ARGS 3381 if ((size + alignment < size) || (alignment > _memory_page_size)) { 3382 errno = EINVAL; 3383 return 0; 3384 } 3385 #endif 3386 return _rpmalloc_aligned_reallocate(heap, ptr, alignment, size, 0, flags); 3387 } 3388 3389 extern inline void 3390 rpmalloc_heap_free(rpmalloc_heap_t* heap, void* ptr) { 3391 (void)sizeof(heap); 3392 _rpmalloc_deallocate(ptr); 3393 } 3394 3395 extern inline void 3396 rpmalloc_heap_free_all(rpmalloc_heap_t* heap) { 3397 span_t* span; 3398 span_t* next_span; 3399 3400 _rpmalloc_heap_cache_adopt_deferred(heap, 0); 3401 3402 for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) { 3403 span = heap->size_class[iclass].partial_span; 3404 while (span) { 3405 next_span = span->next; 3406 _rpmalloc_heap_cache_insert(heap, span); 3407 span = next_span; 3408 } 3409 heap->size_class[iclass].partial_span = 0; 3410 span = heap->full_span[iclass]; 3411 while (span) { 3412 next_span = span->next; 3413 _rpmalloc_heap_cache_insert(heap, span); 3414 span = next_span; 3415 } 3416 } 3417 memset(heap->size_class, 0, sizeof(heap->size_class)); 3418 memset(heap->full_span, 0, sizeof(heap->full_span)); 3419 3420 span = heap->large_huge_span; 3421 while (span) { 3422 next_span = span->next; 3423 if (UNEXPECTED(span->size_class == SIZE_CLASS_HUGE)) 3424 _rpmalloc_deallocate_huge(span); 3425 else 3426 _rpmalloc_heap_cache_insert(heap, span); 3427 span = next_span; 3428 } 3429 heap->large_huge_span = 0; 3430 heap->full_span_count = 0; 3431 3432 #if ENABLE_THREAD_CACHE 3433 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) { 3434 span_cache_t* span_cache; 3435 if (!iclass) 3436 span_cache = &heap->span_cache; 3437 else 3438 span_cache = (span_cache_t*)(heap->span_large_cache + (iclass - 1)); 3439 if (!span_cache->count) 3440 continue; 3441 #if ENABLE_GLOBAL_CACHE 3442 _rpmalloc_stat_add64(&heap->thread_to_global, span_cache->count * (iclass + 1) * _memory_span_size); 3443 _rpmalloc_stat_add(&heap->span_use[iclass].spans_to_global, span_cache->count); 3444 _rpmalloc_global_cache_insert_spans(span_cache->span, iclass + 1, span_cache->count); 3445 #else 3446 for (size_t ispan = 0; ispan < span_cache->count; ++ispan) 3447 _rpmalloc_span_unmap(span_cache->span[ispan]); 3448 #endif 3449 span_cache->count = 0; 3450 } 3451 #endif 3452 3453 #if ENABLE_STATISTICS 3454 for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) { 3455 atomic_store32(&heap->size_class_use[iclass].alloc_current, 0); 3456 atomic_store32(&heap->size_class_use[iclass].spans_current, 0); 3457 } 3458 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) { 3459 atomic_store32(&heap->span_use[iclass].current, 0); 3460 } 3461 #endif 3462 } 3463 3464 extern inline void 3465 rpmalloc_heap_thread_set_current(rpmalloc_heap_t* heap) { 3466 heap_t* prev_heap = get_thread_heap_raw(); 3467 if (prev_heap != heap) { 3468 set_thread_heap(heap); 3469 if (prev_heap) 3470 rpmalloc_heap_release(prev_heap); 3471 } 3472 } 3473 3474 #endif 3475 3476 #if ENABLE_PRELOAD || ENABLE_OVERRIDE 3477 3478 #include "malloc.c" 3479 3480 #endif