github.com/moontrade/mdbx-go@v0.4.0/mdbx_stat.c (about) 1 /* mdbx_stat.c - memory-mapped database status tool */ 2 3 /* 4 * Copyright 2015-2022 Leonid Yuriev <leo@yuriev.ru> 5 * and other libmdbx authors: please see AUTHORS file. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted only as authorized by the OpenLDAP 10 * Public License. 11 * 12 * A copy of this license is available in the file LICENSE in the 13 * top-level directory of the distribution or, alternatively, at 14 * <http://www.OpenLDAP.org/license.html>. */ 15 16 #ifdef _MSC_VER 17 #if _MSC_VER > 1800 18 #pragma warning(disable : 4464) /* relative include path contains '..' */ 19 #endif 20 #pragma warning(disable : 4996) /* The POSIX name is deprecated... */ 21 #endif /* _MSC_VER (warnings) */ 22 23 #define xMDBX_TOOLS /* Avoid using internal eASSERT() */ 24 /* 25 * Copyright 2015-2022 Leonid Yuriev <leo@yuriev.ru> 26 * and other libmdbx authors: please see AUTHORS file. 27 * All rights reserved. 28 * 29 * Redistribution and use in source and binary forms, with or without 30 * modification, are permitted only as authorized by the OpenLDAP 31 * Public License. 32 * 33 * A copy of this license is available in the file LICENSE in the 34 * top-level directory of the distribution or, alternatively, at 35 * <http://www.OpenLDAP.org/license.html>. */ 36 37 #define MDBX_BUILD_SOURCERY 86a8d6c403a2023fc2df0ab38f71339b78e82f0aa786f480a1cb166c05497134_v0_12_1_0_gb36a07a5 38 #ifdef MDBX_CONFIG_H 39 #include MDBX_CONFIG_H 40 #endif 41 42 #define LIBMDBX_INTERNALS 43 #ifdef xMDBX_TOOLS 44 #define MDBX_DEPRECATED 45 #endif /* xMDBX_TOOLS */ 46 47 #ifdef xMDBX_ALLOY 48 /* Amalgamated build */ 49 #define MDBX_INTERNAL_FUNC static 50 #define MDBX_INTERNAL_VAR static 51 #else 52 /* Non-amalgamated build */ 53 #define MDBX_INTERNAL_FUNC 54 #define MDBX_INTERNAL_VAR extern 55 #endif /* xMDBX_ALLOY */ 56 57 /*----------------------------------------------------------------------------*/ 58 59 /** Disables using GNU/Linux libc extensions. 60 * \ingroup build_option 61 * \note This option couldn't be moved to the options.h since dependant 62 * control macros/defined should be prepared before include the options.h */ 63 #ifndef MDBX_DISABLE_GNU_SOURCE 64 #define MDBX_DISABLE_GNU_SOURCE 0 65 #endif 66 #if MDBX_DISABLE_GNU_SOURCE 67 #undef _GNU_SOURCE 68 #elif (defined(__linux__) || defined(__gnu_linux__)) && !defined(_GNU_SOURCE) 69 #define _GNU_SOURCE 70 #endif /* MDBX_DISABLE_GNU_SOURCE */ 71 72 /* Should be defined before any includes */ 73 #if !defined(_FILE_OFFSET_BITS) && !defined(__ANDROID_API__) && \ 74 !defined(ANDROID) 75 #define _FILE_OFFSET_BITS 64 76 #endif 77 78 #ifdef __APPLE__ 79 #define _DARWIN_C_SOURCE 80 #endif 81 82 #ifdef _MSC_VER 83 #if _MSC_FULL_VER < 190024234 84 /* Actually libmdbx was not tested with compilers older than 19.00.24234 (Visual 85 * Studio 2015 Update 3). But you could remove this #error and try to continue 86 * at your own risk. In such case please don't rise up an issues related ONLY to 87 * old compilers. 88 * 89 * NOTE: 90 * Unfortunately, there are several different builds of "Visual Studio" that 91 * are called "Visual Studio 2015 Update 3". 92 * 93 * The 190024234 is used here because it is minimal version of Visual Studio 94 * that was used for build and testing libmdbx in recent years. Soon this 95 * value will be increased to 19.0.24241.7, since build and testing using 96 * "Visual Studio 2015" will be performed only at https://ci.appveyor.com. 97 * 98 * Please ask Microsoft (but not us) for information about version differences 99 * and how to and where you can obtain the latest "Visual Studio 2015" build 100 * with all fixes. 101 */ 102 #error \ 103 "At least \"Microsoft C/C++ Compiler\" version 19.00.24234 (Visual Studio 2015 Update 3) is required." 104 #endif 105 #ifndef _CRT_SECURE_NO_WARNINGS 106 #define _CRT_SECURE_NO_WARNINGS 107 #endif /* _CRT_SECURE_NO_WARNINGS */ 108 #if _MSC_VER > 1800 109 #pragma warning(disable : 4464) /* relative include path contains '..' */ 110 #endif 111 #if _MSC_VER > 1913 112 #pragma warning(disable : 5045) /* Compiler will insert Spectre mitigation... \ 113 */ 114 #endif 115 #if _MSC_VER > 1914 116 #pragma warning( \ 117 disable : 5105) /* winbase.h(9531): warning C5105: macro expansion \ 118 producing 'defined' has undefined behavior */ 119 #endif 120 #pragma warning(disable : 4710) /* 'xyz': function not inlined */ 121 #pragma warning(disable : 4711) /* function 'xyz' selected for automatic \ 122 inline expansion */ 123 #pragma warning( \ 124 disable : 4201) /* nonstandard extension used : nameless struct / union */ 125 #pragma warning(disable : 4702) /* unreachable code */ 126 #pragma warning(disable : 4706) /* assignment within conditional expression */ 127 #pragma warning(disable : 4127) /* conditional expression is constant */ 128 #pragma warning(disable : 4324) /* 'xyz': structure was padded due to \ 129 alignment specifier */ 130 #pragma warning(disable : 4310) /* cast truncates constant value */ 131 #pragma warning( \ 132 disable : 4820) /* bytes padding added after data member for alignment */ 133 #pragma warning(disable : 4548) /* expression before comma has no effect; \ 134 expected expression with side - effect */ 135 #pragma warning(disable : 4366) /* the result of the unary '&' operator may be \ 136 unaligned */ 137 #pragma warning(disable : 4200) /* nonstandard extension used: zero-sized \ 138 array in struct/union */ 139 #pragma warning(disable : 4204) /* nonstandard extension used: non-constant \ 140 aggregate initializer */ 141 #pragma warning( \ 142 disable : 4505) /* unreferenced local function has been removed */ 143 #endif /* _MSC_VER (warnings) */ 144 145 #if defined(__GNUC__) && __GNUC__ < 9 146 #pragma GCC diagnostic ignored "-Wattributes" 147 #endif /* GCC < 9 */ 148 149 #if (defined(__MINGW__) || defined(__MINGW32__) || defined(__MINGW64__)) && \ 150 !defined(__USE_MINGW_ANSI_STDIO) 151 #define __USE_MINGW_ANSI_STDIO 1 152 #endif /* __USE_MINGW_ANSI_STDIO */ 153 154 #include "mdbx.h" 155 /* 156 * Copyright 2015-2022 Leonid Yuriev <leo@yuriev.ru> 157 * and other libmdbx authors: please see AUTHORS file. 158 * All rights reserved. 159 * 160 * Redistribution and use in source and binary forms, with or without 161 * modification, are permitted only as authorized by the OpenLDAP 162 * Public License. 163 * 164 * A copy of this license is available in the file LICENSE in the 165 * top-level directory of the distribution or, alternatively, at 166 * <http://www.OpenLDAP.org/license.html>. 167 */ 168 169 170 /*----------------------------------------------------------------------------*/ 171 /* Microsoft compiler generates a lot of warning for self includes... */ 172 173 #ifdef _MSC_VER 174 #pragma warning(push, 1) 175 #pragma warning(disable : 4548) /* expression before comma has no effect; \ 176 expected expression with side - effect */ 177 #pragma warning(disable : 4530) /* C++ exception handler used, but unwind \ 178 * semantics are not enabled. Specify /EHsc */ 179 #pragma warning(disable : 4577) /* 'noexcept' used with no exception handling \ 180 * mode specified; termination on exception is \ 181 * not guaranteed. Specify /EHsc */ 182 #endif /* _MSC_VER (warnings) */ 183 184 #if defined(_WIN32) || defined(_WIN64) 185 #if !defined(_CRT_SECURE_NO_WARNINGS) 186 #define _CRT_SECURE_NO_WARNINGS 187 #endif /* _CRT_SECURE_NO_WARNINGS */ 188 #if !defined(_NO_CRT_STDIO_INLINE) && MDBX_BUILD_SHARED_LIBRARY && \ 189 !defined(xMDBX_TOOLS) && MDBX_WITHOUT_MSVC_CRT 190 #define _NO_CRT_STDIO_INLINE 191 #endif 192 #elif !defined(_POSIX_C_SOURCE) 193 #define _POSIX_C_SOURCE 200809L 194 #endif /* Windows */ 195 196 /*----------------------------------------------------------------------------*/ 197 /* basic C99 includes */ 198 #include <inttypes.h> 199 #include <stddef.h> 200 #include <stdint.h> 201 #include <stdlib.h> 202 203 #include <assert.h> 204 #include <fcntl.h> 205 #include <limits.h> 206 #include <stdio.h> 207 #include <string.h> 208 #include <time.h> 209 210 #if (-6 & 5) || CHAR_BIT != 8 || UINT_MAX < 0xffffffff || ULONG_MAX % 0xFFFF 211 #error \ 212 "Sanity checking failed: Two's complement, reasonably sized integer types" 213 #endif 214 215 #ifndef SSIZE_MAX 216 #define SSIZE_MAX INTPTR_MAX 217 #endif 218 219 #if UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul 220 #define MDBX_WORDBITS 64 221 #else 222 #define MDBX_WORDBITS 32 223 #endif /* MDBX_WORDBITS */ 224 225 /*----------------------------------------------------------------------------*/ 226 /* feature testing */ 227 228 #ifndef __has_warning 229 #define __has_warning(x) (0) 230 #endif 231 232 #ifndef __has_include 233 #define __has_include(x) (0) 234 #endif 235 236 #ifndef __has_feature 237 #define __has_feature(x) (0) 238 #endif 239 240 #ifndef __has_extension 241 #define __has_extension(x) (0) 242 #endif 243 244 #if __has_feature(thread_sanitizer) 245 #define __SANITIZE_THREAD__ 1 246 #endif 247 248 #if __has_feature(address_sanitizer) 249 #define __SANITIZE_ADDRESS__ 1 250 #endif 251 252 #ifndef __GNUC_PREREQ 253 #if defined(__GNUC__) && defined(__GNUC_MINOR__) 254 #define __GNUC_PREREQ(maj, min) \ 255 ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min)) 256 #else 257 #define __GNUC_PREREQ(maj, min) (0) 258 #endif 259 #endif /* __GNUC_PREREQ */ 260 261 #ifndef __CLANG_PREREQ 262 #ifdef __clang__ 263 #define __CLANG_PREREQ(maj, min) \ 264 ((__clang_major__ << 16) + __clang_minor__ >= ((maj) << 16) + (min)) 265 #else 266 #define __CLANG_PREREQ(maj, min) (0) 267 #endif 268 #endif /* __CLANG_PREREQ */ 269 270 #ifndef __GLIBC_PREREQ 271 #if defined(__GLIBC__) && defined(__GLIBC_MINOR__) 272 #define __GLIBC_PREREQ(maj, min) \ 273 ((__GLIBC__ << 16) + __GLIBC_MINOR__ >= ((maj) << 16) + (min)) 274 #else 275 #define __GLIBC_PREREQ(maj, min) (0) 276 #endif 277 #endif /* __GLIBC_PREREQ */ 278 279 /*----------------------------------------------------------------------------*/ 280 /* C11' alignas() */ 281 282 #if __has_include(<stdalign.h>) 283 #include <stdalign.h> 284 #endif 285 #if defined(alignas) || defined(__cplusplus) 286 #define MDBX_ALIGNAS(N) alignas(N) 287 #elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L 288 #define MDBX_ALIGNAS(N) _Alignas(N) 289 #elif defined(_MSC_VER) 290 #define MDBX_ALIGNAS(N) __declspec(align(N)) 291 #elif __has_attribute(__aligned__) || defined(__GNUC__) 292 #define MDBX_ALIGNAS(N) __attribute__((__aligned__(N))) 293 #else 294 #error "FIXME: Required alignas() or equivalent." 295 #endif /* MDBX_ALIGNAS */ 296 297 /*----------------------------------------------------------------------------*/ 298 /* Systems macros and includes */ 299 300 #ifndef __extern_C 301 #ifdef __cplusplus 302 #define __extern_C extern "C" 303 #else 304 #define __extern_C 305 #endif 306 #endif /* __extern_C */ 307 308 #if !defined(nullptr) && !defined(__cplusplus) || \ 309 (__cplusplus < 201103L && !defined(_MSC_VER)) 310 #define nullptr NULL 311 #endif 312 313 #if defined(__APPLE__) || defined(_DARWIN_C_SOURCE) 314 #include <AvailabilityMacros.h> 315 #include <TargetConditionals.h> 316 #ifndef MAC_OS_X_VERSION_MIN_REQUIRED 317 #define MAC_OS_X_VERSION_MIN_REQUIRED 1070 /* Mac OS X 10.7, 2011 */ 318 #endif 319 #endif /* Apple OSX & iOS */ 320 321 #if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \ 322 defined(__BSD__) || defined(__bsdi__) || defined(__DragonFly__) || \ 323 defined(__APPLE__) || defined(__MACH__) 324 #include <sys/cdefs.h> 325 #include <sys/mount.h> 326 #include <sys/sysctl.h> 327 #include <sys/types.h> 328 #if defined(__FreeBSD__) || defined(__DragonFly__) 329 #include <vm/vm_param.h> 330 #elif defined(__OpenBSD__) || defined(__NetBSD__) 331 #include <uvm/uvm_param.h> 332 #else 333 #define SYSCTL_LEGACY_NONCONST_MIB 334 #endif 335 #ifndef __MACH__ 336 #include <sys/vmmeter.h> 337 #endif 338 #else 339 #include <malloc.h> 340 #if !(defined(__sun) || defined(__SVR4) || defined(__svr4__) || \ 341 defined(_WIN32) || defined(_WIN64)) 342 #include <mntent.h> 343 #endif /* !Solaris */ 344 #endif /* !xBSD */ 345 346 #if defined(__FreeBSD__) || __has_include(<malloc_np.h>) 347 #include <malloc_np.h> 348 #endif 349 350 #if defined(__APPLE__) || defined(__MACH__) || __has_include(<malloc/malloc.h>) 351 #include <malloc/malloc.h> 352 #endif /* MacOS */ 353 354 #if defined(__MACH__) 355 #include <mach/host_info.h> 356 #include <mach/mach_host.h> 357 #include <mach/mach_port.h> 358 #include <uuid/uuid.h> 359 #endif 360 361 #if defined(__linux__) || defined(__gnu_linux__) 362 #include <sched.h> 363 #include <sys/sendfile.h> 364 #include <sys/statfs.h> 365 #endif /* Linux */ 366 367 #ifndef _XOPEN_SOURCE 368 #define _XOPEN_SOURCE 0 369 #endif 370 371 #ifndef _XOPEN_SOURCE_EXTENDED 372 #define _XOPEN_SOURCE_EXTENDED 0 373 #else 374 #include <utmpx.h> 375 #endif /* _XOPEN_SOURCE_EXTENDED */ 376 377 #if defined(__sun) || defined(__SVR4) || defined(__svr4__) 378 #include <kstat.h> 379 #include <sys/mnttab.h> 380 /* On Solaris, it's easier to add a missing prototype rather than find a 381 * combination of #defines that break nothing. */ 382 __extern_C key_t ftok(const char *, int); 383 #endif /* SunOS/Solaris */ 384 385 #if defined(_WIN32) || defined(_WIN64) /*-------------------------------------*/ 386 387 #ifndef _WIN32_WINNT 388 #define _WIN32_WINNT 0x0601 /* Windows 7 */ 389 #elif _WIN32_WINNT < 0x0500 390 #error At least 'Windows 2000' API is required for libmdbx. 391 #endif /* _WIN32_WINNT */ 392 #if (defined(__MINGW32__) || defined(__MINGW64__)) && \ 393 !defined(__USE_MINGW_ANSI_STDIO) 394 #define __USE_MINGW_ANSI_STDIO 1 395 #endif /* MinGW */ 396 #ifndef WIN32_LEAN_AND_MEAN 397 #define WIN32_LEAN_AND_MEAN 398 #endif /* WIN32_LEAN_AND_MEAN */ 399 #include <excpt.h> 400 #include <tlhelp32.h> 401 #include <windows.h> 402 #include <winnt.h> 403 #include <winternl.h> 404 405 #else /*----------------------------------------------------------------------*/ 406 407 #include <unistd.h> 408 #if !defined(_POSIX_MAPPED_FILES) || _POSIX_MAPPED_FILES < 1 409 #error "libmdbx requires the _POSIX_MAPPED_FILES feature" 410 #endif /* _POSIX_MAPPED_FILES */ 411 412 #include <pthread.h> 413 #include <semaphore.h> 414 #include <signal.h> 415 #include <sys/file.h> 416 #include <sys/ipc.h> 417 #include <sys/mman.h> 418 #include <sys/param.h> 419 #include <sys/stat.h> 420 #include <sys/statvfs.h> 421 #include <sys/uio.h> 422 423 #endif /*---------------------------------------------------------------------*/ 424 425 #if defined(__ANDROID_API__) || defined(ANDROID) 426 #include <android/log.h> 427 #if __ANDROID_API__ >= 21 428 #include <sys/sendfile.h> 429 #endif 430 #if defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS != MDBX_WORDBITS 431 #error "_FILE_OFFSET_BITS != MDBX_WORDBITS" (_FILE_OFFSET_BITS != MDBX_WORDBITS) 432 #elif defined(__FILE_OFFSET_BITS) && __FILE_OFFSET_BITS != MDBX_WORDBITS 433 #error "__FILE_OFFSET_BITS != MDBX_WORDBITS" (__FILE_OFFSET_BITS != MDBX_WORDBITS) 434 #endif 435 #endif /* Android */ 436 437 #if defined(HAVE_SYS_STAT_H) || __has_include(<sys/stat.h>) 438 #include <sys/stat.h> 439 #endif 440 #if defined(HAVE_SYS_TYPES_H) || __has_include(<sys/types.h>) 441 #include <sys/types.h> 442 #endif 443 #if defined(HAVE_SYS_FILE_H) || __has_include(<sys/file.h>) 444 #include <sys/file.h> 445 #endif 446 447 /*----------------------------------------------------------------------------*/ 448 /* Byteorder */ 449 450 #if defined(i386) || defined(__386) || defined(__i386) || defined(__i386__) || \ 451 defined(i486) || defined(__i486) || defined(__i486__) || \ 452 defined(i586) | defined(__i586) || defined(__i586__) || defined(i686) || \ 453 defined(__i686) || defined(__i686__) || defined(_M_IX86) || \ 454 defined(_X86_) || defined(__THW_INTEL__) || defined(__I86__) || \ 455 defined(__INTEL__) || defined(__x86_64) || defined(__x86_64__) || \ 456 defined(__amd64__) || defined(__amd64) || defined(_M_X64) || \ 457 defined(_M_AMD64) || defined(__IA32__) || defined(__INTEL__) 458 #ifndef __ia32__ 459 /* LY: define neutral __ia32__ for x86 and x86-64 */ 460 #define __ia32__ 1 461 #endif /* __ia32__ */ 462 #if !defined(__amd64__) && \ 463 (defined(__x86_64) || defined(__x86_64__) || defined(__amd64) || \ 464 defined(_M_X64) || defined(_M_AMD64)) 465 /* LY: define trusty __amd64__ for all AMD64/x86-64 arch */ 466 #define __amd64__ 1 467 #endif /* __amd64__ */ 468 #endif /* all x86 */ 469 470 #if !defined(__BYTE_ORDER__) || !defined(__ORDER_LITTLE_ENDIAN__) || \ 471 !defined(__ORDER_BIG_ENDIAN__) 472 473 #if defined(__GLIBC__) || defined(__GNU_LIBRARY__) || \ 474 defined(__ANDROID_API__) || defined(HAVE_ENDIAN_H) || __has_include(<endian.h>) 475 #include <endian.h> 476 #elif defined(__APPLE__) || defined(__MACH__) || defined(__OpenBSD__) || \ 477 defined(HAVE_MACHINE_ENDIAN_H) || __has_include(<machine/endian.h>) 478 #include <machine/endian.h> 479 #elif defined(HAVE_SYS_ISA_DEFS_H) || __has_include(<sys/isa_defs.h>) 480 #include <sys/isa_defs.h> 481 #elif (defined(HAVE_SYS_TYPES_H) && defined(HAVE_SYS_ENDIAN_H)) || \ 482 (__has_include(<sys/types.h>) && __has_include(<sys/endian.h>)) 483 #include <sys/endian.h> 484 #include <sys/types.h> 485 #elif defined(__bsdi__) || defined(__DragonFly__) || defined(__FreeBSD__) || \ 486 defined(__NetBSD__) || defined(HAVE_SYS_PARAM_H) || __has_include(<sys/param.h>) 487 #include <sys/param.h> 488 #endif /* OS */ 489 490 #if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && defined(__BIG_ENDIAN) 491 #define __ORDER_LITTLE_ENDIAN__ __LITTLE_ENDIAN 492 #define __ORDER_BIG_ENDIAN__ __BIG_ENDIAN 493 #define __BYTE_ORDER__ __BYTE_ORDER 494 #elif defined(_BYTE_ORDER) && defined(_LITTLE_ENDIAN) && defined(_BIG_ENDIAN) 495 #define __ORDER_LITTLE_ENDIAN__ _LITTLE_ENDIAN 496 #define __ORDER_BIG_ENDIAN__ _BIG_ENDIAN 497 #define __BYTE_ORDER__ _BYTE_ORDER 498 #else 499 #define __ORDER_LITTLE_ENDIAN__ 1234 500 #define __ORDER_BIG_ENDIAN__ 4321 501 502 #if defined(__LITTLE_ENDIAN__) || \ 503 (defined(_LITTLE_ENDIAN) && !defined(_BIG_ENDIAN)) || \ 504 defined(__ARMEL__) || defined(__THUMBEL__) || defined(__AARCH64EL__) || \ 505 defined(__MIPSEL__) || defined(_MIPSEL) || defined(__MIPSEL) || \ 506 defined(_M_ARM) || defined(_M_ARM64) || defined(__e2k__) || \ 507 defined(__elbrus_4c__) || defined(__elbrus_8c__) || defined(__bfin__) || \ 508 defined(__BFIN__) || defined(__ia64__) || defined(_IA64) || \ 509 defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || \ 510 defined(__itanium__) || defined(__ia32__) || defined(__CYGWIN__) || \ 511 defined(_WIN64) || defined(_WIN32) || defined(__TOS_WIN__) || \ 512 defined(__WINDOWS__) 513 #define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__ 514 515 #elif defined(__BIG_ENDIAN__) || \ 516 (defined(_BIG_ENDIAN) && !defined(_LITTLE_ENDIAN)) || \ 517 defined(__ARMEB__) || defined(__THUMBEB__) || defined(__AARCH64EB__) || \ 518 defined(__MIPSEB__) || defined(_MIPSEB) || defined(__MIPSEB) || \ 519 defined(__m68k__) || defined(M68000) || defined(__hppa__) || \ 520 defined(__hppa) || defined(__HPPA__) || defined(__sparc__) || \ 521 defined(__sparc) || defined(__370__) || defined(__THW_370__) || \ 522 defined(__s390__) || defined(__s390x__) || defined(__SYSC_ZARCH__) 523 #define __BYTE_ORDER__ __ORDER_BIG_ENDIAN__ 524 525 #else 526 #error __BYTE_ORDER__ should be defined. 527 #endif /* Arch */ 528 529 #endif 530 #endif /* __BYTE_ORDER__ || __ORDER_LITTLE_ENDIAN__ || __ORDER_BIG_ENDIAN__ */ 531 532 /*----------------------------------------------------------------------------*/ 533 /* Availability of CMOV or equivalent */ 534 535 #ifndef MDBX_HAVE_CMOV 536 #if defined(__e2k__) 537 #define MDBX_HAVE_CMOV 1 538 #elif defined(__thumb2__) || defined(__thumb2) 539 #define MDBX_HAVE_CMOV 1 540 #elif defined(__thumb__) || defined(__thumb) || defined(__TARGET_ARCH_THUMB) 541 #define MDBX_HAVE_CMOV 0 542 #elif defined(_M_ARM) || defined(_M_ARM64) || defined(__aarch64__) || \ 543 defined(__aarch64) || defined(__arm__) || defined(__arm) || \ 544 defined(__CC_ARM) 545 #define MDBX_HAVE_CMOV 1 546 #elif (defined(__riscv__) || defined(__riscv64)) && \ 547 (defined(__riscv_b) || defined(__riscv_bitmanip)) 548 #define MDBX_HAVE_CMOV 1 549 #elif defined(i686) || defined(__i686) || defined(__i686__) || \ 550 (defined(_M_IX86) && _M_IX86 > 600) || defined(__x86_64) || \ 551 defined(__x86_64__) || defined(__amd64__) || defined(__amd64) || \ 552 defined(_M_X64) || defined(_M_AMD64) 553 #define MDBX_HAVE_CMOV 1 554 #else 555 #define MDBX_HAVE_CMOV 0 556 #endif 557 #endif /* MDBX_HAVE_CMOV */ 558 559 /*----------------------------------------------------------------------------*/ 560 /* Compiler's includes for builtins/intrinsics */ 561 562 #if defined(_MSC_VER) || defined(__INTEL_COMPILER) 563 #include <intrin.h> 564 #elif __GNUC_PREREQ(4, 4) || defined(__clang__) 565 #if defined(__e2k__) 566 #include <e2kintrin.h> 567 #include <x86intrin.h> 568 #endif /* __e2k__ */ 569 #if defined(__ia32__) 570 #include <cpuid.h> 571 #include <x86intrin.h> 572 #endif /* __ia32__ */ 573 #ifdef __ARM_NEON 574 #include <arm_neon.h> 575 #endif 576 #elif defined(__SUNPRO_C) || defined(__sun) || defined(sun) 577 #include <mbarrier.h> 578 #elif (defined(_HPUX_SOURCE) || defined(__hpux) || defined(__HP_aCC)) && \ 579 (defined(HP_IA64) || defined(__ia64)) 580 #include <machine/sys/inline.h> 581 #elif defined(__IBMC__) && defined(__powerpc) 582 #include <atomic.h> 583 #elif defined(_AIX) 584 #include <builtins.h> 585 #include <sys/atomic_op.h> 586 #elif (defined(__osf__) && defined(__DECC)) || defined(__alpha) 587 #include <c_asm.h> 588 #include <machine/builtins.h> 589 #elif defined(__MWERKS__) 590 /* CodeWarrior - troubles ? */ 591 #pragma gcc_extensions 592 #elif defined(__SNC__) 593 /* Sony PS3 - troubles ? */ 594 #elif defined(__hppa__) || defined(__hppa) 595 #include <machine/inline.h> 596 #else 597 #error Unsupported C compiler, please use GNU C 4.4 or newer 598 #endif /* Compiler */ 599 600 #if !defined(__noop) && !defined(_MSC_VER) 601 #define __noop \ 602 do { \ 603 } while (0) 604 #endif /* __noop */ 605 606 #if defined(__fallthrough) && \ 607 (defined(__MINGW__) || defined(__MINGW32__) || defined(__MINGW64__)) 608 #undef __fallthrough 609 #endif /* __fallthrough workaround for MinGW */ 610 611 #ifndef __fallthrough 612 #if defined(__cplusplus) && (__has_cpp_attribute(fallthrough) && \ 613 (!defined(__clang__) || __clang__ > 4)) || \ 614 __cplusplus >= 201703L 615 #define __fallthrough [[fallthrough]] 616 #elif __GNUC_PREREQ(8, 0) && defined(__cplusplus) && __cplusplus >= 201103L 617 #define __fallthrough [[fallthrough]] 618 #elif __GNUC_PREREQ(7, 0) && \ 619 (!defined(__LCC__) || (__LCC__ == 124 && __LCC_MINOR__ >= 12) || \ 620 (__LCC__ == 125 && __LCC_MINOR__ >= 5) || (__LCC__ >= 126)) 621 #define __fallthrough __attribute__((__fallthrough__)) 622 #elif defined(__clang__) && defined(__cplusplus) && __cplusplus >= 201103L && \ 623 __has_feature(cxx_attributes) && __has_warning("-Wimplicit-fallthrough") 624 #define __fallthrough [[clang::fallthrough]] 625 #else 626 #define __fallthrough 627 #endif 628 #endif /* __fallthrough */ 629 630 #ifndef __unreachable 631 #if __GNUC_PREREQ(4, 5) || __has_builtin(__builtin_unreachable) 632 #define __unreachable() __builtin_unreachable() 633 #elif defined(_MSC_VER) 634 #define __unreachable() __assume(0) 635 #else 636 #define __unreachable() \ 637 do { \ 638 } while (1) 639 #endif 640 #endif /* __unreachable */ 641 642 #ifndef __prefetch 643 #if defined(__GNUC__) || defined(__clang__) || __has_builtin(__builtin_prefetch) 644 #define __prefetch(ptr) __builtin_prefetch(ptr) 645 #else 646 #define __prefetch(ptr) \ 647 do { \ 648 (void)(ptr); \ 649 } while (0) 650 #endif 651 #endif /* __prefetch */ 652 653 #ifndef offsetof 654 #define offsetof(type, member) __builtin_offsetof(type, member) 655 #endif /* offsetof */ 656 657 #ifndef container_of 658 #define container_of(ptr, type, member) \ 659 ((type *)((char *)(ptr)-offsetof(type, member))) 660 #endif /* container_of */ 661 662 /*----------------------------------------------------------------------------*/ 663 664 #ifndef __always_inline 665 #if defined(__GNUC__) || __has_attribute(__always_inline__) 666 #define __always_inline __inline __attribute__((__always_inline__)) 667 #elif defined(_MSC_VER) 668 #define __always_inline __forceinline 669 #else 670 #define __always_inline 671 #endif 672 #endif /* __always_inline */ 673 674 #ifndef __noinline 675 #if defined(__GNUC__) || __has_attribute(__noinline__) 676 #define __noinline __attribute__((__noinline__)) 677 #elif defined(_MSC_VER) 678 #define __noinline __declspec(noinline) 679 #else 680 #define __noinline 681 #endif 682 #endif /* __noinline */ 683 684 #ifndef __must_check_result 685 #if defined(__GNUC__) || __has_attribute(__warn_unused_result__) 686 #define __must_check_result __attribute__((__warn_unused_result__)) 687 #else 688 #define __must_check_result 689 #endif 690 #endif /* __must_check_result */ 691 692 #ifndef __nothrow 693 #if defined(__cplusplus) 694 #if __cplusplus < 201703L 695 #define __nothrow throw() 696 #else 697 #define __nothrow noexcept(true) 698 #endif /* __cplusplus */ 699 #elif defined(__GNUC__) || __has_attribute(__nothrow__) 700 #define __nothrow __attribute__((__nothrow__)) 701 #elif defined(_MSC_VER) && defined(__cplusplus) 702 #define __nothrow __declspec(nothrow) 703 #else 704 #define __nothrow 705 #endif 706 #endif /* __nothrow */ 707 708 #ifndef __hidden 709 #if defined(__GNUC__) || __has_attribute(__visibility__) 710 #define __hidden __attribute__((__visibility__("hidden"))) 711 #else 712 #define __hidden 713 #endif 714 #endif /* __hidden */ 715 716 #ifndef __optimize 717 #if defined(__OPTIMIZE__) 718 #if (defined(__GNUC__) && !defined(__clang__)) || __has_attribute(__optimize__) 719 #define __optimize(ops) __attribute__((__optimize__(ops))) 720 #else 721 #define __optimize(ops) 722 #endif 723 #else 724 #define __optimize(ops) 725 #endif 726 #endif /* __optimize */ 727 728 #ifndef __hot 729 #if defined(__OPTIMIZE__) 730 #if defined(__e2k__) 731 #define __hot __attribute__((__hot__)) __optimize(3) 732 #elif defined(__clang__) && !__has_attribute(__hot_) && \ 733 __has_attribute(__section__) && \ 734 (defined(__linux__) || defined(__gnu_linux__)) 735 /* just put frequently used functions in separate section */ 736 #define __hot __attribute__((__section__("text.hot"))) __optimize("O3") 737 #elif defined(__LCC__) 738 #define __hot __attribute__((__hot__, __optimize__("Ofast,O4"))) 739 #elif defined(__GNUC__) || __has_attribute(__hot__) 740 #define __hot __attribute__((__hot__)) __optimize("O3") 741 #else 742 #define __hot __optimize("O3") 743 #endif 744 #else 745 #define __hot 746 #endif 747 #endif /* __hot */ 748 749 #ifndef __cold 750 #if defined(__OPTIMIZE__) 751 #if defined(__e2k__) 752 #define __cold __attribute__((__cold__)) __optimize(1) 753 #elif defined(__clang__) && !__has_attribute(cold) && \ 754 __has_attribute(__section__) && \ 755 (defined(__linux__) || defined(__gnu_linux__)) 756 /* just put infrequently used functions in separate section */ 757 #define __cold __attribute__((__section__("text.unlikely"))) __optimize("Os") 758 #elif defined(__LCC__) 759 #define __hot __attribute__((__cold__, __optimize__("Osize"))) 760 #elif defined(__GNUC__) || __has_attribute(cold) 761 #define __cold __attribute__((__cold__)) __optimize("Os") 762 #else 763 #define __cold __optimize("Os") 764 #endif 765 #else 766 #define __cold 767 #endif 768 #endif /* __cold */ 769 770 #ifndef __flatten 771 #if defined(__OPTIMIZE__) && (defined(__GNUC__) || __has_attribute(__flatten__)) 772 #define __flatten __attribute__((__flatten__)) 773 #else 774 #define __flatten 775 #endif 776 #endif /* __flatten */ 777 778 #ifndef likely 779 #if (defined(__GNUC__) || __has_builtin(__builtin_expect)) && \ 780 !defined(__COVERITY__) 781 #define likely(cond) __builtin_expect(!!(cond), 1) 782 #else 783 #define likely(x) (!!(x)) 784 #endif 785 #endif /* likely */ 786 787 #ifndef unlikely 788 #if (defined(__GNUC__) || __has_builtin(__builtin_expect)) && \ 789 !defined(__COVERITY__) 790 #define unlikely(cond) __builtin_expect(!!(cond), 0) 791 #else 792 #define unlikely(x) (!!(x)) 793 #endif 794 #endif /* unlikely */ 795 796 #ifndef __anonymous_struct_extension__ 797 #if defined(__GNUC__) 798 #define __anonymous_struct_extension__ __extension__ 799 #else 800 #define __anonymous_struct_extension__ 801 #endif 802 #endif /* __anonymous_struct_extension__ */ 803 804 #ifndef expect_with_probability 805 #if defined(__builtin_expect_with_probability) || \ 806 __has_builtin(__builtin_expect_with_probability) || __GNUC_PREREQ(9, 0) 807 #define expect_with_probability(expr, value, prob) \ 808 __builtin_expect_with_probability(expr, value, prob) 809 #else 810 #define expect_with_probability(expr, value, prob) (expr) 811 #endif 812 #endif /* expect_with_probability */ 813 814 #ifndef MDBX_WEAK_IMPORT_ATTRIBUTE 815 #ifdef WEAK_IMPORT_ATTRIBUTE 816 #define MDBX_WEAK_IMPORT_ATTRIBUTE WEAK_IMPORT_ATTRIBUTE 817 #elif __has_attribute(__weak__) && __has_attribute(__weak_import__) 818 #define MDBX_WEAK_IMPORT_ATTRIBUTE __attribute__((__weak__, __weak_import__)) 819 #elif __has_attribute(__weak__) || \ 820 (defined(__GNUC__) && __GNUC__ >= 4 && defined(__ELF__)) 821 #define MDBX_WEAK_IMPORT_ATTRIBUTE __attribute__((__weak__)) 822 #else 823 #define MDBX_WEAK_IMPORT_ATTRIBUTE 824 #endif 825 #endif /* MDBX_WEAK_IMPORT_ATTRIBUTE */ 826 827 /*----------------------------------------------------------------------------*/ 828 829 #if defined(MDBX_USE_VALGRIND) 830 #include <valgrind/memcheck.h> 831 #ifndef VALGRIND_DISABLE_ADDR_ERROR_REPORTING_IN_RANGE 832 /* LY: available since Valgrind 3.10 */ 833 #define VALGRIND_DISABLE_ADDR_ERROR_REPORTING_IN_RANGE(a, s) 834 #define VALGRIND_ENABLE_ADDR_ERROR_REPORTING_IN_RANGE(a, s) 835 #endif 836 #elif !defined(RUNNING_ON_VALGRIND) 837 #define VALGRIND_CREATE_MEMPOOL(h, r, z) 838 #define VALGRIND_DESTROY_MEMPOOL(h) 839 #define VALGRIND_MEMPOOL_TRIM(h, a, s) 840 #define VALGRIND_MEMPOOL_ALLOC(h, a, s) 841 #define VALGRIND_MEMPOOL_FREE(h, a) 842 #define VALGRIND_MEMPOOL_CHANGE(h, a, b, s) 843 #define VALGRIND_MAKE_MEM_NOACCESS(a, s) 844 #define VALGRIND_MAKE_MEM_DEFINED(a, s) 845 #define VALGRIND_MAKE_MEM_UNDEFINED(a, s) 846 #define VALGRIND_DISABLE_ADDR_ERROR_REPORTING_IN_RANGE(a, s) 847 #define VALGRIND_ENABLE_ADDR_ERROR_REPORTING_IN_RANGE(a, s) 848 #define VALGRIND_CHECK_MEM_IS_ADDRESSABLE(a, s) (0) 849 #define VALGRIND_CHECK_MEM_IS_DEFINED(a, s) (0) 850 #define RUNNING_ON_VALGRIND (0) 851 #endif /* MDBX_USE_VALGRIND */ 852 853 #ifdef __SANITIZE_ADDRESS__ 854 #include <sanitizer/asan_interface.h> 855 #elif !defined(ASAN_POISON_MEMORY_REGION) 856 #define ASAN_POISON_MEMORY_REGION(addr, size) ((void)(addr), (void)(size)) 857 #define ASAN_UNPOISON_MEMORY_REGION(addr, size) ((void)(addr), (void)(size)) 858 #endif /* __SANITIZE_ADDRESS__ */ 859 860 /*----------------------------------------------------------------------------*/ 861 862 #ifndef ARRAY_LENGTH 863 #ifdef __cplusplus 864 template <typename T, size_t N> char (&__ArraySizeHelper(T (&array)[N]))[N]; 865 #define ARRAY_LENGTH(array) (sizeof(::__ArraySizeHelper(array))) 866 #else 867 #define ARRAY_LENGTH(array) (sizeof(array) / sizeof(array[0])) 868 #endif 869 #endif /* ARRAY_LENGTH */ 870 871 #ifndef ARRAY_END 872 #define ARRAY_END(array) (&array[ARRAY_LENGTH(array)]) 873 #endif /* ARRAY_END */ 874 875 #define CONCAT(a, b) a##b 876 #define XCONCAT(a, b) CONCAT(a, b) 877 878 #define MDBX_TETRAD(a, b, c, d) \ 879 ((uint32_t)(a) << 24 | (uint32_t)(b) << 16 | (uint32_t)(c) << 8 | (d)) 880 881 #define MDBX_STRING_TETRAD(str) MDBX_TETRAD(str[0], str[1], str[2], str[3]) 882 883 #define FIXME "FIXME: " __FILE__ ", " MDBX_STRINGIFY(__LINE__) 884 885 #ifndef STATIC_ASSERT_MSG 886 #if defined(static_assert) 887 #define STATIC_ASSERT_MSG(expr, msg) static_assert(expr, msg) 888 #elif defined(_STATIC_ASSERT) 889 #define STATIC_ASSERT_MSG(expr, msg) _STATIC_ASSERT(expr) 890 #elif defined(_MSC_VER) 891 #include <crtdbg.h> 892 #define STATIC_ASSERT_MSG(expr, msg) _STATIC_ASSERT(expr) 893 #elif (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) || \ 894 __has_feature(c_static_assert) 895 #define STATIC_ASSERT_MSG(expr, msg) _Static_assert(expr, msg) 896 #else 897 #define STATIC_ASSERT_MSG(expr, msg) \ 898 switch (0) { \ 899 case 0: \ 900 case (expr):; \ 901 } 902 #endif 903 #endif /* STATIC_ASSERT */ 904 905 #ifndef STATIC_ASSERT 906 #define STATIC_ASSERT(expr) STATIC_ASSERT_MSG(expr, #expr) 907 #endif 908 909 #ifndef __Wpedantic_format_voidptr 910 MDBX_MAYBE_UNUSED MDBX_PURE_FUNCTION static __inline const void * 911 __Wpedantic_format_voidptr(const void *ptr) { 912 return ptr; 913 } 914 #define __Wpedantic_format_voidptr(ARG) __Wpedantic_format_voidptr(ARG) 915 #endif /* __Wpedantic_format_voidptr */ 916 917 #if defined(__GNUC__) && !__GNUC_PREREQ(4, 2) 918 /* Actually libmdbx was not tested with compilers older than GCC 4.2. 919 * But you could ignore this warning at your own risk. 920 * In such case please don't rise up an issues related ONLY to old compilers. 921 */ 922 #warning "libmdbx required GCC >= 4.2" 923 #endif 924 925 #if defined(__clang__) && !__CLANG_PREREQ(3, 8) 926 /* Actually libmdbx was not tested with CLANG older than 3.8. 927 * But you could ignore this warning at your own risk. 928 * In such case please don't rise up an issues related ONLY to old compilers. 929 */ 930 #warning "libmdbx required CLANG >= 3.8" 931 #endif 932 933 #if defined(__GLIBC__) && !__GLIBC_PREREQ(2, 12) 934 /* Actually libmdbx was not tested with something older than glibc 2.12. 935 * But you could ignore this warning at your own risk. 936 * In such case please don't rise up an issues related ONLY to old systems. 937 */ 938 #warning "libmdbx was only tested with GLIBC >= 2.12." 939 #endif 940 941 #ifdef __SANITIZE_THREAD__ 942 #warning \ 943 "libmdbx don't compatible with ThreadSanitizer, you will get a lot of false-positive issues." 944 #endif /* __SANITIZE_THREAD__ */ 945 946 #if __has_warning("-Wnested-anon-types") 947 #if defined(__clang__) 948 #pragma clang diagnostic ignored "-Wnested-anon-types" 949 #elif defined(__GNUC__) 950 #pragma GCC diagnostic ignored "-Wnested-anon-types" 951 #else 952 #pragma warning disable "nested-anon-types" 953 #endif 954 #endif /* -Wnested-anon-types */ 955 956 #if __has_warning("-Wconstant-logical-operand") 957 #if defined(__clang__) 958 #pragma clang diagnostic ignored "-Wconstant-logical-operand" 959 #elif defined(__GNUC__) 960 #pragma GCC diagnostic ignored "-Wconstant-logical-operand" 961 #else 962 #pragma warning disable "constant-logical-operand" 963 #endif 964 #endif /* -Wconstant-logical-operand */ 965 966 #if defined(__LCC__) && (__LCC__ <= 121) 967 /* bug #2798 */ 968 #pragma diag_suppress alignment_reduction_ignored 969 #elif defined(__ICC) 970 #pragma warning(disable : 3453 1366) 971 #elif __has_warning("-Walignment-reduction-ignored") 972 #if defined(__clang__) 973 #pragma clang diagnostic ignored "-Walignment-reduction-ignored" 974 #elif defined(__GNUC__) 975 #pragma GCC diagnostic ignored "-Walignment-reduction-ignored" 976 #else 977 #pragma warning disable "alignment-reduction-ignored" 978 #endif 979 #endif /* -Walignment-reduction-ignored */ 980 981 #ifndef MDBX_EXCLUDE_FOR_GPROF 982 #ifdef ENABLE_GPROF 983 #define MDBX_EXCLUDE_FOR_GPROF \ 984 __attribute__((__no_instrument_function__, \ 985 __no_profile_instrument_function__)) 986 #else 987 #define MDBX_EXCLUDE_FOR_GPROF 988 #endif /* ENABLE_GPROF */ 989 #endif /* MDBX_EXCLUDE_FOR_GPROF */ 990 991 #ifdef __cplusplus 992 extern "C" { 993 #endif 994 995 /* https://en.wikipedia.org/wiki/Operating_system_abstraction_layer */ 996 997 /* 998 * Copyright 2015-2022 Leonid Yuriev <leo@yuriev.ru> 999 * and other libmdbx authors: please see AUTHORS file. 1000 * All rights reserved. 1001 * 1002 * Redistribution and use in source and binary forms, with or without 1003 * modification, are permitted only as authorized by the OpenLDAP 1004 * Public License. 1005 * 1006 * A copy of this license is available in the file LICENSE in the 1007 * top-level directory of the distribution or, alternatively, at 1008 * <http://www.OpenLDAP.org/license.html>. 1009 */ 1010 1011 1012 /*----------------------------------------------------------------------------*/ 1013 /* C11 Atomics */ 1014 1015 #if defined(__cplusplus) && !defined(__STDC_NO_ATOMICS__) && __has_include(<cstdatomic>) 1016 #include <cstdatomic> 1017 #define MDBX_HAVE_C11ATOMICS 1018 #elif !defined(__cplusplus) && \ 1019 (__STDC_VERSION__ >= 201112L || __has_extension(c_atomic)) && \ 1020 !defined(__STDC_NO_ATOMICS__) && \ 1021 (__GNUC_PREREQ(4, 9) || __CLANG_PREREQ(3, 8) || \ 1022 !(defined(__GNUC__) || defined(__clang__))) 1023 #include <stdatomic.h> 1024 #define MDBX_HAVE_C11ATOMICS 1025 #elif defined(__GNUC__) || defined(__clang__) 1026 #elif defined(_MSC_VER) 1027 #pragma warning(disable : 4163) /* 'xyz': not available as an intrinsic */ 1028 #pragma warning(disable : 4133) /* 'function': incompatible types - from \ 1029 'size_t' to 'LONGLONG' */ 1030 #pragma warning(disable : 4244) /* 'return': conversion from 'LONGLONG' to \ 1031 'std::size_t', possible loss of data */ 1032 #pragma warning(disable : 4267) /* 'function': conversion from 'size_t' to \ 1033 'long', possible loss of data */ 1034 #pragma intrinsic(_InterlockedExchangeAdd, _InterlockedCompareExchange) 1035 #pragma intrinsic(_InterlockedExchangeAdd64, _InterlockedCompareExchange64) 1036 #elif defined(__APPLE__) 1037 #include <libkern/OSAtomic.h> 1038 #else 1039 #error FIXME atomic-ops 1040 #endif 1041 1042 /*----------------------------------------------------------------------------*/ 1043 /* Memory/Compiler barriers, cache coherence */ 1044 1045 #if __has_include(<sys/cachectl.h>) 1046 #include <sys/cachectl.h> 1047 #elif defined(__mips) || defined(__mips__) || defined(__mips64) || \ 1048 defined(__mips64__) || defined(_M_MRX000) || defined(_MIPS_) || \ 1049 defined(__MWERKS__) || defined(__sgi) 1050 /* MIPS should have explicit cache control */ 1051 #include <sys/cachectl.h> 1052 #endif 1053 1054 MDBX_MAYBE_UNUSED static __inline void osal_compiler_barrier(void) { 1055 #if defined(__clang__) || defined(__GNUC__) 1056 __asm__ __volatile__("" ::: "memory"); 1057 #elif defined(_MSC_VER) 1058 _ReadWriteBarrier(); 1059 #elif defined(__INTEL_COMPILER) /* LY: Intel Compiler may mimic GCC and MSC */ 1060 __memory_barrier(); 1061 #elif defined(__SUNPRO_C) || defined(__sun) || defined(sun) 1062 __compiler_barrier(); 1063 #elif (defined(_HPUX_SOURCE) || defined(__hpux) || defined(__HP_aCC)) && \ 1064 (defined(HP_IA64) || defined(__ia64)) 1065 _Asm_sched_fence(/* LY: no-arg meaning 'all expect ALU', e.g. 0x3D3D */); 1066 #elif defined(_AIX) || defined(__ppc__) || defined(__powerpc__) || \ 1067 defined(__ppc64__) || defined(__powerpc64__) 1068 __fence(); 1069 #else 1070 #error "Could not guess the kind of compiler, please report to us." 1071 #endif 1072 } 1073 1074 MDBX_MAYBE_UNUSED static __inline void osal_memory_barrier(void) { 1075 #ifdef MDBX_HAVE_C11ATOMICS 1076 atomic_thread_fence(memory_order_seq_cst); 1077 #elif defined(__ATOMIC_SEQ_CST) 1078 #ifdef __clang__ 1079 __c11_atomic_thread_fence(__ATOMIC_SEQ_CST); 1080 #else 1081 __atomic_thread_fence(__ATOMIC_SEQ_CST); 1082 #endif 1083 #elif defined(__clang__) || defined(__GNUC__) 1084 __sync_synchronize(); 1085 #elif defined(_WIN32) || defined(_WIN64) 1086 MemoryBarrier(); 1087 #elif defined(__INTEL_COMPILER) /* LY: Intel Compiler may mimic GCC and MSC */ 1088 #if defined(__ia32__) 1089 _mm_mfence(); 1090 #else 1091 __mf(); 1092 #endif 1093 #elif defined(__SUNPRO_C) || defined(__sun) || defined(sun) 1094 __machine_rw_barrier(); 1095 #elif (defined(_HPUX_SOURCE) || defined(__hpux) || defined(__HP_aCC)) && \ 1096 (defined(HP_IA64) || defined(__ia64)) 1097 _Asm_mf(); 1098 #elif defined(_AIX) || defined(__ppc__) || defined(__powerpc__) || \ 1099 defined(__ppc64__) || defined(__powerpc64__) 1100 __lwsync(); 1101 #else 1102 #error "Could not guess the kind of compiler, please report to us." 1103 #endif 1104 } 1105 1106 /*----------------------------------------------------------------------------*/ 1107 /* system-depended definitions */ 1108 1109 #if defined(_WIN32) || defined(_WIN64) 1110 #define HAVE_SYS_STAT_H 1111 #define HAVE_SYS_TYPES_H 1112 typedef HANDLE osal_thread_t; 1113 typedef unsigned osal_thread_key_t; 1114 #define MAP_FAILED NULL 1115 #define HIGH_DWORD(v) ((DWORD)((sizeof(v) > 4) ? ((uint64_t)(v) >> 32) : 0)) 1116 #define THREAD_CALL WINAPI 1117 #define THREAD_RESULT DWORD 1118 typedef struct { 1119 HANDLE mutex; 1120 HANDLE event[2]; 1121 } osal_condpair_t; 1122 typedef CRITICAL_SECTION osal_fastmutex_t; 1123 1124 #if !defined(_MSC_VER) && !defined(__try) 1125 #define __try 1126 #define __except(COND) if (false) 1127 #endif /* stub for MSVC's __try/__except */ 1128 1129 #if MDBX_WITHOUT_MSVC_CRT 1130 1131 #ifndef osal_malloc 1132 static inline void *osal_malloc(size_t bytes) { 1133 return HeapAlloc(GetProcessHeap(), 0, bytes); 1134 } 1135 #endif /* osal_malloc */ 1136 1137 #ifndef osal_calloc 1138 static inline void *osal_calloc(size_t nelem, size_t size) { 1139 return HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, nelem * size); 1140 } 1141 #endif /* osal_calloc */ 1142 1143 #ifndef osal_realloc 1144 static inline void *osal_realloc(void *ptr, size_t bytes) { 1145 return ptr ? HeapReAlloc(GetProcessHeap(), 0, ptr, bytes) 1146 : HeapAlloc(GetProcessHeap(), 0, bytes); 1147 } 1148 #endif /* osal_realloc */ 1149 1150 #ifndef osal_free 1151 static inline void osal_free(void *ptr) { HeapFree(GetProcessHeap(), 0, ptr); } 1152 #endif /* osal_free */ 1153 1154 #else /* MDBX_WITHOUT_MSVC_CRT */ 1155 1156 #define osal_malloc malloc 1157 #define osal_calloc calloc 1158 #define osal_realloc realloc 1159 #define osal_free free 1160 #define osal_strdup _strdup 1161 1162 #endif /* MDBX_WITHOUT_MSVC_CRT */ 1163 1164 #ifndef snprintf 1165 #define snprintf _snprintf /* ntdll */ 1166 #endif 1167 1168 #ifndef vsnprintf 1169 #define vsnprintf _vsnprintf /* ntdll */ 1170 #endif 1171 1172 MDBX_INTERNAL_FUNC size_t osal_mb2w(wchar_t *dst, size_t dst_n, const char *src, 1173 size_t src_n); 1174 1175 #else /*----------------------------------------------------------------------*/ 1176 1177 typedef pthread_t osal_thread_t; 1178 typedef pthread_key_t osal_thread_key_t; 1179 #define INVALID_HANDLE_VALUE (-1) 1180 #define THREAD_CALL 1181 #define THREAD_RESULT void * 1182 typedef struct { 1183 pthread_mutex_t mutex; 1184 pthread_cond_t cond[2]; 1185 } osal_condpair_t; 1186 typedef pthread_mutex_t osal_fastmutex_t; 1187 #define osal_malloc malloc 1188 #define osal_calloc calloc 1189 #define osal_realloc realloc 1190 #define osal_free free 1191 #define osal_strdup strdup 1192 #endif /* Platform */ 1193 1194 #if __GLIBC_PREREQ(2, 12) || defined(__FreeBSD__) || defined(malloc_usable_size) 1195 /* malloc_usable_size() already provided */ 1196 #elif defined(__APPLE__) 1197 #define malloc_usable_size(ptr) malloc_size(ptr) 1198 #elif defined(_MSC_VER) && !MDBX_WITHOUT_MSVC_CRT 1199 #define malloc_usable_size(ptr) _msize(ptr) 1200 #endif /* malloc_usable_size */ 1201 1202 /*----------------------------------------------------------------------------*/ 1203 /* OS abstraction layer stuff */ 1204 1205 /* Get the size of a memory page for the system. 1206 * This is the basic size that the platform's memory manager uses, and is 1207 * fundamental to the use of memory-mapped files. */ 1208 MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static __inline size_t 1209 osal_syspagesize(void) { 1210 #if defined(_WIN32) || defined(_WIN64) 1211 SYSTEM_INFO si; 1212 GetSystemInfo(&si); 1213 return si.dwPageSize; 1214 #else 1215 return sysconf(_SC_PAGE_SIZE); 1216 #endif 1217 } 1218 1219 #if defined(_WIN32) || defined(_WIN64) 1220 typedef wchar_t pathchar_t; 1221 #else 1222 typedef char pathchar_t; 1223 #endif 1224 1225 typedef struct osal_mmap_param { 1226 union { 1227 void *address; 1228 uint8_t *dxb; 1229 struct MDBX_lockinfo *lck; 1230 }; 1231 mdbx_filehandle_t fd; 1232 size_t limit; /* mapping length, but NOT a size of file nor DB */ 1233 size_t current; /* mapped region size, i.e. the size of file and DB */ 1234 uint64_t filesize /* in-process cache of a file size */; 1235 #if defined(_WIN32) || defined(_WIN64) 1236 HANDLE section; /* memory-mapped section handle */ 1237 #endif 1238 } osal_mmap_t; 1239 1240 typedef union bin128 { 1241 __anonymous_struct_extension__ struct { uint64_t x, y; }; 1242 __anonymous_struct_extension__ struct { uint32_t a, b, c, d; }; 1243 } bin128_t; 1244 1245 #if defined(_WIN32) || defined(_WIN64) 1246 typedef union osal_srwlock { 1247 __anonymous_struct_extension__ struct { 1248 long volatile readerCount; 1249 long volatile writerCount; 1250 }; 1251 RTL_SRWLOCK native; 1252 } osal_srwlock_t; 1253 #endif /* Windows */ 1254 1255 #ifndef __cplusplus 1256 1257 /*----------------------------------------------------------------------------*/ 1258 /* libc compatibility stuff */ 1259 1260 #if (!defined(__GLIBC__) && __GLIBC_PREREQ(2, 1)) && \ 1261 (defined(_GNU_SOURCE) || defined(_BSD_SOURCE)) 1262 #define osal_asprintf asprintf 1263 #define osal_vasprintf vasprintf 1264 #else 1265 MDBX_MAYBE_UNUSED MDBX_INTERNAL_FUNC 1266 MDBX_PRINTF_ARGS(2, 3) int osal_asprintf(char **strp, const char *fmt, ...); 1267 MDBX_INTERNAL_FUNC int osal_vasprintf(char **strp, const char *fmt, va_list ap); 1268 #endif 1269 1270 #if !defined(MADV_DODUMP) && defined(MADV_CORE) 1271 #define MADV_DODUMP MADV_CORE 1272 #endif /* MADV_CORE -> MADV_DODUMP */ 1273 1274 #if !defined(MADV_DONTDUMP) && defined(MADV_NOCORE) 1275 #define MADV_DONTDUMP MADV_NOCORE 1276 #endif /* MADV_NOCORE -> MADV_DONTDUMP */ 1277 1278 MDBX_MAYBE_UNUSED MDBX_INTERNAL_FUNC void osal_jitter(bool tiny); 1279 MDBX_MAYBE_UNUSED static __inline void jitter4testing(bool tiny); 1280 1281 /* max bytes to write in one call */ 1282 #if defined(_WIN32) || defined(_WIN64) 1283 #define MAX_WRITE UINT32_C(0x01000000) 1284 #else 1285 #define MAX_WRITE UINT32_C(0x3fff0000) 1286 #endif 1287 1288 #if defined(__linux__) || defined(__gnu_linux__) 1289 MDBX_INTERNAL_VAR uint32_t linux_kernel_version; 1290 MDBX_INTERNAL_VAR bool mdbx_RunningOnWSL1 /* Windows Subsystem 1 for Linux */; 1291 #endif /* Linux */ 1292 1293 #ifndef osal_strdup 1294 LIBMDBX_API char *osal_strdup(const char *str); 1295 #endif 1296 1297 MDBX_MAYBE_UNUSED static __inline int osal_get_errno(void) { 1298 #if defined(_WIN32) || defined(_WIN64) 1299 DWORD rc = GetLastError(); 1300 #else 1301 int rc = errno; 1302 #endif 1303 return rc; 1304 } 1305 1306 #ifndef osal_memalign_alloc 1307 MDBX_INTERNAL_FUNC int osal_memalign_alloc(size_t alignment, size_t bytes, 1308 void **result); 1309 #endif 1310 #ifndef osal_memalign_free 1311 MDBX_INTERNAL_FUNC void osal_memalign_free(void *ptr); 1312 #endif 1313 1314 MDBX_INTERNAL_FUNC int osal_condpair_init(osal_condpair_t *condpair); 1315 MDBX_INTERNAL_FUNC int osal_condpair_lock(osal_condpair_t *condpair); 1316 MDBX_INTERNAL_FUNC int osal_condpair_unlock(osal_condpair_t *condpair); 1317 MDBX_INTERNAL_FUNC int osal_condpair_signal(osal_condpair_t *condpair, 1318 bool part); 1319 MDBX_INTERNAL_FUNC int osal_condpair_wait(osal_condpair_t *condpair, bool part); 1320 MDBX_INTERNAL_FUNC int osal_condpair_destroy(osal_condpair_t *condpair); 1321 1322 MDBX_INTERNAL_FUNC int osal_fastmutex_init(osal_fastmutex_t *fastmutex); 1323 MDBX_INTERNAL_FUNC int osal_fastmutex_acquire(osal_fastmutex_t *fastmutex); 1324 MDBX_INTERNAL_FUNC int osal_fastmutex_release(osal_fastmutex_t *fastmutex); 1325 MDBX_INTERNAL_FUNC int osal_fastmutex_destroy(osal_fastmutex_t *fastmutex); 1326 1327 MDBX_INTERNAL_FUNC int osal_pwritev(mdbx_filehandle_t fd, struct iovec *iov, 1328 int iovcnt, uint64_t offset, 1329 size_t expected_written); 1330 MDBX_INTERNAL_FUNC int osal_pread(mdbx_filehandle_t fd, void *buf, size_t count, 1331 uint64_t offset); 1332 MDBX_INTERNAL_FUNC int osal_pwrite(mdbx_filehandle_t fd, const void *buf, 1333 size_t count, uint64_t offset); 1334 MDBX_INTERNAL_FUNC int osal_write(mdbx_filehandle_t fd, const void *buf, 1335 size_t count); 1336 1337 MDBX_INTERNAL_FUNC int 1338 osal_thread_create(osal_thread_t *thread, 1339 THREAD_RESULT(THREAD_CALL *start_routine)(void *), 1340 void *arg); 1341 MDBX_INTERNAL_FUNC int osal_thread_join(osal_thread_t thread); 1342 1343 enum osal_syncmode_bits { 1344 MDBX_SYNC_NONE = 0, 1345 MDBX_SYNC_DATA = 1, 1346 MDBX_SYNC_SIZE = 2, 1347 MDBX_SYNC_IODQ = 4 1348 }; 1349 1350 MDBX_INTERNAL_FUNC int osal_fsync(mdbx_filehandle_t fd, 1351 const enum osal_syncmode_bits mode_bits); 1352 MDBX_INTERNAL_FUNC int osal_ftruncate(mdbx_filehandle_t fd, uint64_t length); 1353 MDBX_INTERNAL_FUNC int osal_fseek(mdbx_filehandle_t fd, uint64_t pos); 1354 MDBX_INTERNAL_FUNC int osal_filesize(mdbx_filehandle_t fd, uint64_t *length); 1355 1356 enum osal_openfile_purpose { 1357 MDBX_OPEN_DXB_READ = 0, 1358 MDBX_OPEN_DXB_LAZY = 1, 1359 MDBX_OPEN_DXB_DSYNC = 2, 1360 MDBX_OPEN_LCK = 3, 1361 MDBX_OPEN_COPY = 4, 1362 MDBX_OPEN_DELETE = 5 1363 }; 1364 1365 MDBX_INTERNAL_FUNC int osal_openfile(const enum osal_openfile_purpose purpose, 1366 const MDBX_env *env, 1367 const pathchar_t *pathname, 1368 mdbx_filehandle_t *fd, 1369 mdbx_mode_t unix_mode_bits); 1370 MDBX_INTERNAL_FUNC int osal_closefile(mdbx_filehandle_t fd); 1371 MDBX_INTERNAL_FUNC int osal_removefile(const pathchar_t *pathname); 1372 MDBX_INTERNAL_FUNC int osal_removedirectory(const pathchar_t *pathname); 1373 MDBX_INTERNAL_FUNC int osal_is_pipe(mdbx_filehandle_t fd); 1374 MDBX_INTERNAL_FUNC int osal_lockfile(mdbx_filehandle_t fd, bool wait); 1375 1376 #define MMAP_OPTION_TRUNCATE 1 1377 #define MMAP_OPTION_SEMAPHORE 2 1378 MDBX_INTERNAL_FUNC int osal_mmap(const int flags, osal_mmap_t *map, 1379 const size_t must, const size_t limit, 1380 const unsigned options); 1381 MDBX_INTERNAL_FUNC int osal_munmap(osal_mmap_t *map); 1382 #define MDBX_MRESIZE_MAY_MOVE 0x00000100 1383 #define MDBX_MRESIZE_MAY_UNMAP 0x00000200 1384 MDBX_INTERNAL_FUNC int osal_mresize(const int flags, osal_mmap_t *map, 1385 size_t size, size_t limit); 1386 #if defined(_WIN32) || defined(_WIN64) 1387 typedef struct { 1388 unsigned limit, count; 1389 HANDLE handles[31]; 1390 } mdbx_handle_array_t; 1391 MDBX_INTERNAL_FUNC int 1392 osal_suspend_threads_before_remap(MDBX_env *env, mdbx_handle_array_t **array); 1393 MDBX_INTERNAL_FUNC int 1394 osal_resume_threads_after_remap(mdbx_handle_array_t *array); 1395 #endif /* Windows */ 1396 MDBX_INTERNAL_FUNC int osal_msync(osal_mmap_t *map, size_t offset, 1397 size_t length, 1398 enum osal_syncmode_bits mode_bits); 1399 MDBX_INTERNAL_FUNC int osal_check_fs_rdonly(mdbx_filehandle_t handle, 1400 const pathchar_t *pathname, 1401 int err); 1402 1403 MDBX_MAYBE_UNUSED static __inline uint32_t osal_getpid(void) { 1404 STATIC_ASSERT(sizeof(mdbx_pid_t) <= sizeof(uint32_t)); 1405 #if defined(_WIN32) || defined(_WIN64) 1406 return GetCurrentProcessId(); 1407 #else 1408 STATIC_ASSERT(sizeof(pid_t) <= sizeof(uint32_t)); 1409 return getpid(); 1410 #endif 1411 } 1412 1413 MDBX_MAYBE_UNUSED static __inline uintptr_t osal_thread_self(void) { 1414 mdbx_tid_t thunk; 1415 STATIC_ASSERT(sizeof(uintptr_t) >= sizeof(thunk)); 1416 #if defined(_WIN32) || defined(_WIN64) 1417 thunk = GetCurrentThreadId(); 1418 #else 1419 thunk = pthread_self(); 1420 #endif 1421 return (uintptr_t)thunk; 1422 } 1423 1424 #if !defined(_WIN32) && !defined(_WIN64) 1425 #if defined(__ANDROID_API__) || defined(ANDROID) || defined(BIONIC) 1426 MDBX_INTERNAL_FUNC int osal_check_tid4bionic(void); 1427 #else 1428 static __inline int osal_check_tid4bionic(void) { return 0; } 1429 #endif /* __ANDROID_API__ || ANDROID) || BIONIC */ 1430 1431 MDBX_MAYBE_UNUSED static __inline int 1432 osal_pthread_mutex_lock(pthread_mutex_t *mutex) { 1433 int err = osal_check_tid4bionic(); 1434 return unlikely(err) ? err : pthread_mutex_lock(mutex); 1435 } 1436 #endif /* !Windows */ 1437 1438 MDBX_INTERNAL_FUNC uint64_t osal_monotime(void); 1439 MDBX_INTERNAL_FUNC uint64_t osal_16dot16_to_monotime(uint32_t seconds_16dot16); 1440 MDBX_INTERNAL_FUNC uint32_t osal_monotime_to_16dot16(uint64_t monotime); 1441 1442 MDBX_INTERNAL_FUNC bin128_t osal_bootid(void); 1443 /*----------------------------------------------------------------------------*/ 1444 /* lck stuff */ 1445 1446 /// \brief Initialization of synchronization primitives linked with MDBX_env 1447 /// instance both in LCK-file and within the current process. 1448 /// \param 1449 /// global_uniqueness_flag = true - denotes that there are no other processes 1450 /// working with DB and LCK-file. Thus the function MUST initialize 1451 /// shared synchronization objects in memory-mapped LCK-file. 1452 /// global_uniqueness_flag = false - denotes that at least one process is 1453 /// already working with DB and LCK-file, including the case when DB 1454 /// has already been opened in the current process. Thus the function 1455 /// MUST NOT initialize shared synchronization objects in memory-mapped 1456 /// LCK-file that are already in use. 1457 /// \return Error code or zero on success. 1458 MDBX_INTERNAL_FUNC int osal_lck_init(MDBX_env *env, 1459 MDBX_env *inprocess_neighbor, 1460 int global_uniqueness_flag); 1461 1462 /// \brief Disconnects from shared interprocess objects and destructs 1463 /// synchronization objects linked with MDBX_env instance 1464 /// within the current process. 1465 /// \param 1466 /// inprocess_neighbor = NULL - if the current process does not have other 1467 /// instances of MDBX_env linked with the DB being closed. 1468 /// Thus the function MUST check for other processes working with DB or 1469 /// LCK-file, and keep or destroy shared synchronization objects in 1470 /// memory-mapped LCK-file depending on the result. 1471 /// inprocess_neighbor = not-NULL - pointer to another instance of MDBX_env 1472 /// (anyone of there is several) working with DB or LCK-file within the 1473 /// current process. Thus the function MUST NOT try to acquire exclusive 1474 /// lock and/or try to destruct shared synchronization objects linked with 1475 /// DB or LCK-file. Moreover, the implementation MUST ensure correct work 1476 /// of other instances of MDBX_env within the current process, e.g. 1477 /// restore POSIX-fcntl locks after the closing of file descriptors. 1478 /// \return Error code (MDBX_PANIC) or zero on success. 1479 MDBX_INTERNAL_FUNC int osal_lck_destroy(MDBX_env *env, 1480 MDBX_env *inprocess_neighbor); 1481 1482 /// \brief Connects to shared interprocess locking objects and tries to acquire 1483 /// the maximum lock level (shared if exclusive is not available) 1484 /// Depending on implementation or/and platform (Windows) this function may 1485 /// acquire the non-OS super-level lock (e.g. for shared synchronization 1486 /// objects initialization), which will be downgraded to OS-exclusive or 1487 /// shared via explicit calling of osal_lck_downgrade(). 1488 /// \return 1489 /// MDBX_RESULT_TRUE (-1) - if an exclusive lock was acquired and thus 1490 /// the current process is the first and only after the last use of DB. 1491 /// MDBX_RESULT_FALSE (0) - if a shared lock was acquired and thus 1492 /// DB has already been opened and now is used by other processes. 1493 /// Otherwise (not 0 and not -1) - error code. 1494 MDBX_INTERNAL_FUNC int osal_lck_seize(MDBX_env *env); 1495 1496 /// \brief Downgrades the level of initially acquired lock to 1497 /// operational level specified by argument. The reson for such downgrade: 1498 /// - unblocking of other processes that are waiting for access, i.e. 1499 /// if (env->me_flags & MDBX_EXCLUSIVE) != 0, then other processes 1500 /// should be made aware that access is unavailable rather than 1501 /// wait for it. 1502 /// - freeing locks that interfere file operation (especially for Windows) 1503 /// (env->me_flags & MDBX_EXCLUSIVE) == 0 - downgrade to shared lock. 1504 /// (env->me_flags & MDBX_EXCLUSIVE) != 0 - downgrade to exclusive 1505 /// operational lock. 1506 /// \return Error code or zero on success 1507 MDBX_INTERNAL_FUNC int osal_lck_downgrade(MDBX_env *env); 1508 1509 /// \brief Locks LCK-file or/and table of readers for (de)registering. 1510 /// \return Error code or zero on success 1511 MDBX_INTERNAL_FUNC int osal_rdt_lock(MDBX_env *env); 1512 1513 /// \brief Unlocks LCK-file or/and table of readers after (de)registering. 1514 MDBX_INTERNAL_FUNC void osal_rdt_unlock(MDBX_env *env); 1515 1516 /// \brief Acquires lock for DB change (on writing transaction start) 1517 /// Reading transactions will not be blocked. 1518 /// Declared as LIBMDBX_API because it is used in mdbx_chk. 1519 /// \return Error code or zero on success 1520 LIBMDBX_API int mdbx_txn_lock(MDBX_env *env, bool dont_wait); 1521 1522 /// \brief Releases lock once DB changes is made (after writing transaction 1523 /// has finished). 1524 /// Declared as LIBMDBX_API because it is used in mdbx_chk. 1525 LIBMDBX_API void mdbx_txn_unlock(MDBX_env *env); 1526 1527 /// \brief Sets alive-flag of reader presence (indicative lock) for PID of 1528 /// the current process. The function does no more than needed for 1529 /// the correct working of osal_rpid_check() in other processes. 1530 /// \return Error code or zero on success 1531 MDBX_INTERNAL_FUNC int osal_rpid_set(MDBX_env *env); 1532 1533 /// \brief Resets alive-flag of reader presence (indicative lock) 1534 /// for PID of the current process. The function does no more than needed 1535 /// for the correct working of osal_rpid_check() in other processes. 1536 /// \return Error code or zero on success 1537 MDBX_INTERNAL_FUNC int osal_rpid_clear(MDBX_env *env); 1538 1539 /// \brief Checks for reading process status with the given pid with help of 1540 /// alive-flag of presence (indicative lock) or using another way. 1541 /// \return 1542 /// MDBX_RESULT_TRUE (-1) - if the reader process with the given PID is alive 1543 /// and working with DB (indicative lock is present). 1544 /// MDBX_RESULT_FALSE (0) - if the reader process with the given PID is absent 1545 /// or not working with DB (indicative lock is not present). 1546 /// Otherwise (not 0 and not -1) - error code. 1547 MDBX_INTERNAL_FUNC int osal_rpid_check(MDBX_env *env, uint32_t pid); 1548 1549 #if defined(_WIN32) || defined(_WIN64) 1550 1551 #define OSAL_MB2WIDE(FROM, TO) \ 1552 do { \ 1553 const char *const from_tmp = (FROM); \ 1554 const size_t from_mblen = strlen(from_tmp); \ 1555 const size_t to_wlen = osal_mb2w(nullptr, 0, from_tmp, from_mblen); \ 1556 if (to_wlen < 1 || to_wlen > /* MAX_PATH */ INT16_MAX) \ 1557 return ERROR_INVALID_NAME; \ 1558 wchar_t *const to_tmp = _alloca((to_wlen + 1) * sizeof(wchar_t)); \ 1559 if (to_wlen + 1 != \ 1560 osal_mb2w(to_tmp, to_wlen + 1, from_tmp, from_mblen + 1)) \ 1561 return ERROR_INVALID_NAME; \ 1562 (TO) = to_tmp; \ 1563 } while (0) 1564 1565 typedef void(WINAPI *osal_srwlock_t_function)(osal_srwlock_t *); 1566 MDBX_INTERNAL_VAR osal_srwlock_t_function osal_srwlock_Init, 1567 osal_srwlock_AcquireShared, osal_srwlock_ReleaseShared, 1568 osal_srwlock_AcquireExclusive, osal_srwlock_ReleaseExclusive; 1569 1570 #if _WIN32_WINNT < 0x0600 /* prior to Windows Vista */ 1571 typedef enum _FILE_INFO_BY_HANDLE_CLASS { 1572 FileBasicInfo, 1573 FileStandardInfo, 1574 FileNameInfo, 1575 FileRenameInfo, 1576 FileDispositionInfo, 1577 FileAllocationInfo, 1578 FileEndOfFileInfo, 1579 FileStreamInfo, 1580 FileCompressionInfo, 1581 FileAttributeTagInfo, 1582 FileIdBothDirectoryInfo, 1583 FileIdBothDirectoryRestartInfo, 1584 FileIoPriorityHintInfo, 1585 FileRemoteProtocolInfo, 1586 MaximumFileInfoByHandleClass 1587 } FILE_INFO_BY_HANDLE_CLASS, 1588 *PFILE_INFO_BY_HANDLE_CLASS; 1589 1590 typedef struct _FILE_END_OF_FILE_INFO { 1591 LARGE_INTEGER EndOfFile; 1592 } FILE_END_OF_FILE_INFO, *PFILE_END_OF_FILE_INFO; 1593 1594 #define REMOTE_PROTOCOL_INFO_FLAG_LOOPBACK 0x00000001 1595 #define REMOTE_PROTOCOL_INFO_FLAG_OFFLINE 0x00000002 1596 1597 typedef struct _FILE_REMOTE_PROTOCOL_INFO { 1598 USHORT StructureVersion; 1599 USHORT StructureSize; 1600 DWORD Protocol; 1601 USHORT ProtocolMajorVersion; 1602 USHORT ProtocolMinorVersion; 1603 USHORT ProtocolRevision; 1604 USHORT Reserved; 1605 DWORD Flags; 1606 struct { 1607 DWORD Reserved[8]; 1608 } GenericReserved; 1609 struct { 1610 DWORD Reserved[16]; 1611 } ProtocolSpecificReserved; 1612 } FILE_REMOTE_PROTOCOL_INFO, *PFILE_REMOTE_PROTOCOL_INFO; 1613 1614 #endif /* _WIN32_WINNT < 0x0600 (prior to Windows Vista) */ 1615 1616 typedef BOOL(WINAPI *MDBX_GetFileInformationByHandleEx)( 1617 _In_ HANDLE hFile, _In_ FILE_INFO_BY_HANDLE_CLASS FileInformationClass, 1618 _Out_ LPVOID lpFileInformation, _In_ DWORD dwBufferSize); 1619 MDBX_INTERNAL_VAR MDBX_GetFileInformationByHandleEx 1620 mdbx_GetFileInformationByHandleEx; 1621 1622 typedef BOOL(WINAPI *MDBX_GetVolumeInformationByHandleW)( 1623 _In_ HANDLE hFile, _Out_opt_ LPWSTR lpVolumeNameBuffer, 1624 _In_ DWORD nVolumeNameSize, _Out_opt_ LPDWORD lpVolumeSerialNumber, 1625 _Out_opt_ LPDWORD lpMaximumComponentLength, 1626 _Out_opt_ LPDWORD lpFileSystemFlags, 1627 _Out_opt_ LPWSTR lpFileSystemNameBuffer, _In_ DWORD nFileSystemNameSize); 1628 MDBX_INTERNAL_VAR MDBX_GetVolumeInformationByHandleW 1629 mdbx_GetVolumeInformationByHandleW; 1630 1631 typedef DWORD(WINAPI *MDBX_GetFinalPathNameByHandleW)(_In_ HANDLE hFile, 1632 _Out_ LPWSTR lpszFilePath, 1633 _In_ DWORD cchFilePath, 1634 _In_ DWORD dwFlags); 1635 MDBX_INTERNAL_VAR MDBX_GetFinalPathNameByHandleW mdbx_GetFinalPathNameByHandleW; 1636 1637 typedef BOOL(WINAPI *MDBX_SetFileInformationByHandle)( 1638 _In_ HANDLE hFile, _In_ FILE_INFO_BY_HANDLE_CLASS FileInformationClass, 1639 _Out_ LPVOID lpFileInformation, _In_ DWORD dwBufferSize); 1640 MDBX_INTERNAL_VAR MDBX_SetFileInformationByHandle 1641 mdbx_SetFileInformationByHandle; 1642 1643 typedef NTSTATUS(NTAPI *MDBX_NtFsControlFile)( 1644 IN HANDLE FileHandle, IN OUT HANDLE Event, 1645 IN OUT PVOID /* PIO_APC_ROUTINE */ ApcRoutine, IN OUT PVOID ApcContext, 1646 OUT PIO_STATUS_BLOCK IoStatusBlock, IN ULONG FsControlCode, 1647 IN OUT PVOID InputBuffer, IN ULONG InputBufferLength, 1648 OUT OPTIONAL PVOID OutputBuffer, IN ULONG OutputBufferLength); 1649 MDBX_INTERNAL_VAR MDBX_NtFsControlFile mdbx_NtFsControlFile; 1650 1651 typedef uint64_t(WINAPI *MDBX_GetTickCount64)(void); 1652 MDBX_INTERNAL_VAR MDBX_GetTickCount64 mdbx_GetTickCount64; 1653 1654 #if !defined(_WIN32_WINNT_WIN8) || _WIN32_WINNT < _WIN32_WINNT_WIN8 1655 typedef struct _WIN32_MEMORY_RANGE_ENTRY { 1656 PVOID VirtualAddress; 1657 SIZE_T NumberOfBytes; 1658 } WIN32_MEMORY_RANGE_ENTRY, *PWIN32_MEMORY_RANGE_ENTRY; 1659 #endif /* Windows 8.x */ 1660 1661 typedef BOOL(WINAPI *MDBX_PrefetchVirtualMemory)( 1662 HANDLE hProcess, ULONG_PTR NumberOfEntries, 1663 PWIN32_MEMORY_RANGE_ENTRY VirtualAddresses, ULONG Flags); 1664 MDBX_INTERNAL_VAR MDBX_PrefetchVirtualMemory mdbx_PrefetchVirtualMemory; 1665 1666 typedef enum _SECTION_INHERIT { ViewShare = 1, ViewUnmap = 2 } SECTION_INHERIT; 1667 1668 typedef NTSTATUS(NTAPI *MDBX_NtExtendSection)(IN HANDLE SectionHandle, 1669 IN PLARGE_INTEGER NewSectionSize); 1670 MDBX_INTERNAL_VAR MDBX_NtExtendSection mdbx_NtExtendSection; 1671 1672 static __inline bool mdbx_RunningUnderWine(void) { 1673 return !mdbx_NtExtendSection; 1674 } 1675 1676 typedef LSTATUS(WINAPI *MDBX_RegGetValueA)(HKEY hkey, LPCSTR lpSubKey, 1677 LPCSTR lpValue, DWORD dwFlags, 1678 LPDWORD pdwType, PVOID pvData, 1679 LPDWORD pcbData); 1680 MDBX_INTERNAL_VAR MDBX_RegGetValueA mdbx_RegGetValueA; 1681 1682 NTSYSAPI ULONG RtlRandomEx(PULONG Seed); 1683 1684 #endif /* Windows */ 1685 1686 #endif /* !__cplusplus */ 1687 1688 /*----------------------------------------------------------------------------*/ 1689 1690 #if defined(_MSC_VER) && _MSC_VER >= 1900 1691 /* LY: MSVC 2015/2017/2019 has buggy/inconsistent PRIuPTR/PRIxPTR macros 1692 * for internal format-args checker. */ 1693 #undef PRIuPTR 1694 #undef PRIiPTR 1695 #undef PRIdPTR 1696 #undef PRIxPTR 1697 #define PRIuPTR "Iu" 1698 #define PRIiPTR "Ii" 1699 #define PRIdPTR "Id" 1700 #define PRIxPTR "Ix" 1701 #define PRIuSIZE "zu" 1702 #define PRIiSIZE "zi" 1703 #define PRIdSIZE "zd" 1704 #define PRIxSIZE "zx" 1705 #endif /* fix PRI*PTR for _MSC_VER */ 1706 1707 #ifndef PRIuSIZE 1708 #define PRIuSIZE PRIuPTR 1709 #define PRIiSIZE PRIiPTR 1710 #define PRIdSIZE PRIdPTR 1711 #define PRIxSIZE PRIxPTR 1712 #endif /* PRI*SIZE macros for MSVC */ 1713 1714 #ifdef _MSC_VER 1715 #pragma warning(pop) 1716 #endif 1717 1718 #define mdbx_sourcery_anchor XCONCAT(mdbx_sourcery_, MDBX_BUILD_SOURCERY) 1719 #if defined(xMDBX_TOOLS) 1720 extern LIBMDBX_API const char *const mdbx_sourcery_anchor; 1721 #endif 1722 1723 /******************************************************************************* 1724 ******************************************************************************* 1725 ******************************************************************************* 1726 * 1727 * 1728 * #### ##### ##### # #### # # #### 1729 * # # # # # # # # ## # # 1730 * # # # # # # # # # # # #### 1731 * # # ##### # # # # # # # # 1732 * # # # # # # # # ## # # 1733 * #### # # # #### # # #### 1734 * 1735 * 1736 */ 1737 1738 /** \defgroup build_option Build options 1739 * The libmdbx build options. 1740 @{ */ 1741 1742 /** Using fcntl(F_FULLFSYNC) with 5-10 times slowdown */ 1743 #define MDBX_OSX_WANNA_DURABILITY 0 1744 /** Using fsync() with chance of data lost on power failure */ 1745 #define MDBX_OSX_WANNA_SPEED 1 1746 1747 #ifndef MDBX_OSX_SPEED_INSTEADOF_DURABILITY 1748 /** Choices \ref MDBX_OSX_WANNA_DURABILITY or \ref MDBX_OSX_WANNA_SPEED 1749 * for OSX & iOS */ 1750 #define MDBX_OSX_SPEED_INSTEADOF_DURABILITY MDBX_OSX_WANNA_DURABILITY 1751 #endif /* MDBX_OSX_SPEED_INSTEADOF_DURABILITY */ 1752 1753 /** Controls checking PID against reuse DB environment after the fork() */ 1754 #ifndef MDBX_ENV_CHECKPID 1755 #if defined(MADV_DONTFORK) || defined(_WIN32) || defined(_WIN64) 1756 /* PID check could be omitted: 1757 * - on Linux when madvise(MADV_DONTFORK) is available, i.e. after the fork() 1758 * mapped pages will not be available for child process. 1759 * - in Windows where fork() not available. */ 1760 #define MDBX_ENV_CHECKPID 0 1761 #else 1762 #define MDBX_ENV_CHECKPID 1 1763 #endif 1764 #define MDBX_ENV_CHECKPID_CONFIG "AUTO=" MDBX_STRINGIFY(MDBX_ENV_CHECKPID) 1765 #else 1766 #define MDBX_ENV_CHECKPID_CONFIG MDBX_STRINGIFY(MDBX_ENV_CHECKPID) 1767 #endif /* MDBX_ENV_CHECKPID */ 1768 1769 /** Controls checking transaction owner thread against misuse transactions from 1770 * other threads. */ 1771 #ifndef MDBX_TXN_CHECKOWNER 1772 #define MDBX_TXN_CHECKOWNER 1 1773 #define MDBX_TXN_CHECKOWNER_CONFIG "AUTO=" MDBX_STRINGIFY(MDBX_TXN_CHECKOWNER) 1774 #else 1775 #define MDBX_TXN_CHECKOWNER_CONFIG MDBX_STRINGIFY(MDBX_TXN_CHECKOWNER) 1776 #endif /* MDBX_TXN_CHECKOWNER */ 1777 1778 /** Does a system have battery-backed Real-Time Clock or just a fake. */ 1779 #ifndef MDBX_TRUST_RTC 1780 #if defined(__linux__) || defined(__gnu_linux__) || defined(__NetBSD__) || \ 1781 defined(__OpenBSD__) 1782 #define MDBX_TRUST_RTC 0 /* a lot of embedded systems have a fake RTC */ 1783 #else 1784 #define MDBX_TRUST_RTC 1 1785 #endif 1786 #define MDBX_TRUST_RTC_CONFIG "AUTO=" MDBX_STRINGIFY(MDBX_TRUST_RTC) 1787 #else 1788 #define MDBX_TRUST_RTC_CONFIG MDBX_STRINGIFY(MDBX_TRUST_RTC) 1789 #endif /* MDBX_TRUST_RTC */ 1790 1791 /** Controls online database auto-compactification during write-transactions. */ 1792 #ifndef MDBX_ENABLE_REFUND 1793 #define MDBX_ENABLE_REFUND 1 1794 #elif !(MDBX_ENABLE_REFUND == 0 || MDBX_ENABLE_REFUND == 1) 1795 #error MDBX_ENABLE_REFUND must be defined as 0 or 1 1796 #endif /* MDBX_ENABLE_REFUND */ 1797 1798 /** Controls gathering statistics for page operations. */ 1799 #ifndef MDBX_ENABLE_PGOP_STAT 1800 #define MDBX_ENABLE_PGOP_STAT 1 1801 #elif !(MDBX_ENABLE_PGOP_STAT == 0 || MDBX_ENABLE_PGOP_STAT == 1) 1802 #error MDBX_ENABLE_PGOP_STAT must be defined as 0 or 1 1803 #endif /* MDBX_ENABLE_PGOP_STAT */ 1804 1805 /** Enables chunking long list of retired pages during huge transactions commit 1806 * to avoid use sequences of pages. */ 1807 #ifndef MDBX_ENABLE_BIGFOOT 1808 #if MDBX_WORDBITS >= 64 || defined(DOXYGEN) 1809 #define MDBX_ENABLE_BIGFOOT 1 1810 #else 1811 #define MDBX_ENABLE_BIGFOOT 0 1812 #endif 1813 #elif !(MDBX_ENABLE_BIGFOOT == 0 || MDBX_ENABLE_BIGFOOT == 1) 1814 #error MDBX_ENABLE_BIGFOOT must be defined as 0 or 1 1815 #endif /* MDBX_ENABLE_BIGFOOT */ 1816 1817 /** Controls use of POSIX madvise() hints and friends. */ 1818 #ifndef MDBX_ENABLE_MADVISE 1819 #define MDBX_ENABLE_MADVISE 1 1820 #elif !(MDBX_ENABLE_MADVISE == 0 || MDBX_ENABLE_MADVISE == 1) 1821 #error MDBX_ENABLE_MADVISE must be defined as 0 or 1 1822 #endif /* MDBX_ENABLE_MADVISE */ 1823 1824 /** Disable some checks to reduce an overhead and detection probability of 1825 * database corruption to a values closer to the LMDB. */ 1826 #ifndef MDBX_DISABLE_VALIDATION 1827 #define MDBX_DISABLE_VALIDATION 0 1828 #elif !(MDBX_DISABLE_VALIDATION == 0 || MDBX_DISABLE_VALIDATION == 1) 1829 #error MDBX_DISABLE_VALIDATION must be defined as 0 or 1 1830 #endif /* MDBX_DISABLE_VALIDATION */ 1831 1832 #ifndef MDBX_PNL_PREALLOC_FOR_RADIXSORT 1833 #define MDBX_PNL_PREALLOC_FOR_RADIXSORT 1 1834 #elif !(MDBX_PNL_PREALLOC_FOR_RADIXSORT == 0 || \ 1835 MDBX_PNL_PREALLOC_FOR_RADIXSORT == 1) 1836 #error MDBX_PNL_PREALLOC_FOR_RADIXSORT must be defined as 0 or 1 1837 #endif /* MDBX_PNL_PREALLOC_FOR_RADIXSORT */ 1838 1839 #ifndef MDBX_DPL_PREALLOC_FOR_RADIXSORT 1840 #define MDBX_DPL_PREALLOC_FOR_RADIXSORT 1 1841 #elif !(MDBX_DPL_PREALLOC_FOR_RADIXSORT == 0 || \ 1842 MDBX_DPL_PREALLOC_FOR_RADIXSORT == 1) 1843 #error MDBX_DPL_PREALLOC_FOR_RADIXSORT must be defined as 0 or 1 1844 #endif /* MDBX_DPL_PREALLOC_FOR_RADIXSORT */ 1845 1846 /** Basically, this build-option is for TODO. Guess it should be replaced 1847 * with MDBX_ENABLE_WRITEMAP_SPILLING with the three variants: 1848 * 0/OFF = Don't track dirty pages at all and don't spilling ones. 1849 * This should be by-default on Linux and may-be other systems 1850 * (not sure: Darwin/OSX, FreeBSD, Windows 10) where kernel provides 1851 * properly LRU tracking and async writing on-demand. 1852 * 1/ON = Lite tracking of dirty pages but with LRU labels and explicit 1853 * spilling with msync(MS_ASYNC). */ 1854 #ifndef MDBX_FAKE_SPILL_WRITEMAP 1855 #if defined(__linux__) || defined(__gnu_linux__) 1856 #define MDBX_FAKE_SPILL_WRITEMAP 1 /* msync(MS_ASYNC) is no-op on Linux */ 1857 #else 1858 #define MDBX_FAKE_SPILL_WRITEMAP 0 1859 #endif 1860 #elif !(MDBX_FAKE_SPILL_WRITEMAP == 0 || MDBX_FAKE_SPILL_WRITEMAP == 1) 1861 #error MDBX_FAKE_SPILL_WRITEMAP must be defined as 0 or 1 1862 #endif /* MDBX_FAKE_SPILL_WRITEMAP */ 1863 1864 /** Controls sort order of internal page number lists. 1865 * This mostly experimental/advanced option with not for regular MDBX users. 1866 * \warning The database format depend on this option and libmdbx builded with 1867 * different option value are incompatible. */ 1868 #ifndef MDBX_PNL_ASCENDING 1869 #define MDBX_PNL_ASCENDING 0 1870 #elif !(MDBX_PNL_ASCENDING == 0 || MDBX_PNL_ASCENDING == 1) 1871 #error MDBX_PNL_ASCENDING must be defined as 0 or 1 1872 #endif /* MDBX_PNL_ASCENDING */ 1873 1874 /** Avoid dependence from MSVC CRT and use ntdll.dll instead. */ 1875 #ifndef MDBX_WITHOUT_MSVC_CRT 1876 #define MDBX_WITHOUT_MSVC_CRT 1 1877 #elif !(MDBX_WITHOUT_MSVC_CRT == 0 || MDBX_WITHOUT_MSVC_CRT == 1) 1878 #error MDBX_WITHOUT_MSVC_CRT must be defined as 0 or 1 1879 #endif /* MDBX_WITHOUT_MSVC_CRT */ 1880 1881 /** Size of buffer used during copying a environment/database file. */ 1882 #ifndef MDBX_ENVCOPY_WRITEBUF 1883 #define MDBX_ENVCOPY_WRITEBUF 1048576u 1884 #elif MDBX_ENVCOPY_WRITEBUF < 65536u || MDBX_ENVCOPY_WRITEBUF > 1073741824u || \ 1885 MDBX_ENVCOPY_WRITEBUF % 65536u 1886 #error MDBX_ENVCOPY_WRITEBUF must be defined in range 65536..1073741824 and be multiple of 65536 1887 #endif /* MDBX_ENVCOPY_WRITEBUF */ 1888 1889 /** Forces assertion checking */ 1890 #ifndef MDBX_FORCE_ASSERTIONS 1891 #define MDBX_FORCE_ASSERTIONS 0 1892 #elif !(MDBX_FORCE_ASSERTIONS == 0 || MDBX_FORCE_ASSERTIONS == 1) 1893 #error MDBX_FORCE_ASSERTIONS must be defined as 0 or 1 1894 #endif /* MDBX_FORCE_ASSERTIONS */ 1895 1896 /** Presumed malloc size overhead for each allocation 1897 * to adjust allocations to be more aligned. */ 1898 #ifndef MDBX_ASSUME_MALLOC_OVERHEAD 1899 #ifdef __SIZEOF_POINTER__ 1900 #define MDBX_ASSUME_MALLOC_OVERHEAD (__SIZEOF_POINTER__ * 2u) 1901 #else 1902 #define MDBX_ASSUME_MALLOC_OVERHEAD (sizeof(void *) * 2u) 1903 #endif 1904 #elif MDBX_ASSUME_MALLOC_OVERHEAD < 0 || MDBX_ASSUME_MALLOC_OVERHEAD > 64 || \ 1905 MDBX_ASSUME_MALLOC_OVERHEAD % 4 1906 #error MDBX_ASSUME_MALLOC_OVERHEAD must be defined in range 0..64 and be multiple of 4 1907 #endif /* MDBX_ASSUME_MALLOC_OVERHEAD */ 1908 1909 /** If defined then enables integration with Valgrind, 1910 * a memory analyzing tool. */ 1911 #ifndef MDBX_USE_VALGRIND 1912 #endif /* MDBX_USE_VALGRIND */ 1913 1914 /** If defined then enables use C11 atomics, 1915 * otherwise detects ones availability automatically. */ 1916 #ifndef MDBX_HAVE_C11ATOMICS 1917 #endif /* MDBX_HAVE_C11ATOMICS */ 1918 1919 //------------------------------------------------------------------------------ 1920 1921 /** Win32 File Locking API for \ref MDBX_LOCKING */ 1922 #define MDBX_LOCKING_WIN32FILES -1 1923 1924 /** SystemV IPC semaphores for \ref MDBX_LOCKING */ 1925 #define MDBX_LOCKING_SYSV 5 1926 1927 /** POSIX-1 Shared anonymous semaphores for \ref MDBX_LOCKING */ 1928 #define MDBX_LOCKING_POSIX1988 1988 1929 1930 /** POSIX-2001 Shared Mutexes for \ref MDBX_LOCKING */ 1931 #define MDBX_LOCKING_POSIX2001 2001 1932 1933 /** POSIX-2008 Robust Mutexes for \ref MDBX_LOCKING */ 1934 #define MDBX_LOCKING_POSIX2008 2008 1935 1936 /** BeOS Benaphores, aka Futexes for \ref MDBX_LOCKING */ 1937 #define MDBX_LOCKING_BENAPHORE 1995 1938 1939 /** Advanced: Choices the locking implementation (autodetection by default). */ 1940 #if defined(_WIN32) || defined(_WIN64) 1941 #define MDBX_LOCKING MDBX_LOCKING_WIN32FILES 1942 #else 1943 #ifndef MDBX_LOCKING 1944 #if defined(_POSIX_THREAD_PROCESS_SHARED) && \ 1945 _POSIX_THREAD_PROCESS_SHARED >= 200112L && !defined(__FreeBSD__) 1946 1947 /* Some platforms define the EOWNERDEAD error code even though they 1948 * don't support Robust Mutexes. If doubt compile with -MDBX_LOCKING=2001. */ 1949 #if defined(EOWNERDEAD) && _POSIX_THREAD_PROCESS_SHARED >= 200809L && \ 1950 ((defined(_POSIX_THREAD_ROBUST_PRIO_INHERIT) && \ 1951 _POSIX_THREAD_ROBUST_PRIO_INHERIT > 0) || \ 1952 (defined(_POSIX_THREAD_ROBUST_PRIO_PROTECT) && \ 1953 _POSIX_THREAD_ROBUST_PRIO_PROTECT > 0) || \ 1954 defined(PTHREAD_MUTEX_ROBUST) || defined(PTHREAD_MUTEX_ROBUST_NP)) && \ 1955 (!defined(__GLIBC__) || \ 1956 __GLIBC_PREREQ(2, 10) /* troubles with Robust mutexes before 2.10 */) 1957 #define MDBX_LOCKING MDBX_LOCKING_POSIX2008 1958 #else 1959 #define MDBX_LOCKING MDBX_LOCKING_POSIX2001 1960 #endif 1961 #elif defined(__sun) || defined(__SVR4) || defined(__svr4__) 1962 #define MDBX_LOCKING MDBX_LOCKING_POSIX1988 1963 #else 1964 #define MDBX_LOCKING MDBX_LOCKING_SYSV 1965 #endif 1966 #define MDBX_LOCKING_CONFIG "AUTO=" MDBX_STRINGIFY(MDBX_LOCKING) 1967 #else 1968 #define MDBX_LOCKING_CONFIG MDBX_STRINGIFY(MDBX_LOCKING) 1969 #endif /* MDBX_LOCKING */ 1970 #endif /* !Windows */ 1971 1972 /** Advanced: Using POSIX OFD-locks (autodetection by default). */ 1973 #ifndef MDBX_USE_OFDLOCKS 1974 #if defined(F_OFD_SETLK) && defined(F_OFD_SETLKW) && defined(F_OFD_GETLK) && \ 1975 !defined(MDBX_SAFE4QEMU) && \ 1976 !defined(__sun) /* OFD-lock are broken on Solaris */ 1977 #define MDBX_USE_OFDLOCKS 1 1978 #else 1979 #define MDBX_USE_OFDLOCKS 0 1980 #endif 1981 #define MDBX_USE_OFDLOCKS_CONFIG "AUTO=" MDBX_STRINGIFY(MDBX_USE_OFDLOCKS) 1982 #else 1983 #define MDBX_USE_OFDLOCKS_CONFIG MDBX_STRINGIFY(MDBX_USE_OFDLOCKS) 1984 #endif /* MDBX_USE_OFDLOCKS */ 1985 1986 /** Advanced: Using sendfile() syscall (autodetection by default). */ 1987 #ifndef MDBX_USE_SENDFILE 1988 #if ((defined(__linux__) || defined(__gnu_linux__)) && \ 1989 !defined(__ANDROID_API__)) || \ 1990 (defined(__ANDROID_API__) && __ANDROID_API__ >= 21) 1991 #define MDBX_USE_SENDFILE 1 1992 #else 1993 #define MDBX_USE_SENDFILE 0 1994 #endif 1995 #endif /* MDBX_USE_SENDFILE */ 1996 1997 /** Advanced: Using copy_file_range() syscall (autodetection by default). */ 1998 #ifndef MDBX_USE_COPYFILERANGE 1999 #if __GLIBC_PREREQ(2, 27) && defined(_GNU_SOURCE) 2000 #define MDBX_USE_COPYFILERANGE 1 2001 #else 2002 #define MDBX_USE_COPYFILERANGE 0 2003 #endif 2004 #endif /* MDBX_USE_COPYFILERANGE */ 2005 2006 /** Advanced: Using sync_file_range() syscall (autodetection by default). */ 2007 #ifndef MDBX_USE_SYNCFILERANGE 2008 #if ((defined(__linux__) || defined(__gnu_linux__)) && \ 2009 defined(SYNC_FILE_RANGE_WRITE) && !defined(__ANDROID_API__)) || \ 2010 (defined(__ANDROID_API__) && __ANDROID_API__ >= 26) 2011 #define MDBX_USE_SYNCFILERANGE 1 2012 #else 2013 #define MDBX_USE_SYNCFILERANGE 0 2014 #endif 2015 #endif /* MDBX_USE_SYNCFILERANGE */ 2016 2017 //------------------------------------------------------------------------------ 2018 2019 #ifndef MDBX_CPU_WRITEBACK_INCOHERENT 2020 #if defined(__ia32__) || defined(__e2k__) || defined(__hppa) || \ 2021 defined(__hppa__) || defined(DOXYGEN) 2022 #define MDBX_CPU_WRITEBACK_INCOHERENT 0 2023 #else 2024 #define MDBX_CPU_WRITEBACK_INCOHERENT 1 2025 #endif 2026 #endif /* MDBX_CPU_WRITEBACK_INCOHERENT */ 2027 2028 #ifndef MDBX_MMAP_INCOHERENT_FILE_WRITE 2029 #ifdef __OpenBSD__ 2030 #define MDBX_MMAP_INCOHERENT_FILE_WRITE 1 2031 #else 2032 #define MDBX_MMAP_INCOHERENT_FILE_WRITE 0 2033 #endif 2034 #endif /* MDBX_MMAP_INCOHERENT_FILE_WRITE */ 2035 2036 #ifndef MDBX_MMAP_INCOHERENT_CPU_CACHE 2037 #if defined(__mips) || defined(__mips__) || defined(__mips64) || \ 2038 defined(__mips64__) || defined(_M_MRX000) || defined(_MIPS_) || \ 2039 defined(__MWERKS__) || defined(__sgi) 2040 /* MIPS has cache coherency issues. */ 2041 #define MDBX_MMAP_INCOHERENT_CPU_CACHE 1 2042 #else 2043 /* LY: assume no relevant mmap/dcache issues. */ 2044 #define MDBX_MMAP_INCOHERENT_CPU_CACHE 0 2045 #endif 2046 #endif /* MDBX_MMAP_INCOHERENT_CPU_CACHE */ 2047 2048 #ifndef MDBX_64BIT_ATOMIC 2049 #if MDBX_WORDBITS >= 64 || defined(DOXYGEN) 2050 #define MDBX_64BIT_ATOMIC 1 2051 #else 2052 #define MDBX_64BIT_ATOMIC 0 2053 #endif 2054 #define MDBX_64BIT_ATOMIC_CONFIG "AUTO=" MDBX_STRINGIFY(MDBX_64BIT_ATOMIC) 2055 #else 2056 #define MDBX_64BIT_ATOMIC_CONFIG MDBX_STRINGIFY(MDBX_64BIT_ATOMIC) 2057 #endif /* MDBX_64BIT_ATOMIC */ 2058 2059 #ifndef MDBX_64BIT_CAS 2060 #if defined(ATOMIC_LLONG_LOCK_FREE) 2061 #if ATOMIC_LLONG_LOCK_FREE > 1 2062 #define MDBX_64BIT_CAS 1 2063 #else 2064 #define MDBX_64BIT_CAS 0 2065 #endif 2066 #elif defined(__GCC_ATOMIC_LLONG_LOCK_FREE) 2067 #if __GCC_ATOMIC_LLONG_LOCK_FREE > 1 2068 #define MDBX_64BIT_CAS 1 2069 #else 2070 #define MDBX_64BIT_CAS 0 2071 #endif 2072 #elif defined(__CLANG_ATOMIC_LLONG_LOCK_FREE) 2073 #if __CLANG_ATOMIC_LLONG_LOCK_FREE > 1 2074 #define MDBX_64BIT_CAS 1 2075 #else 2076 #define MDBX_64BIT_CAS 0 2077 #endif 2078 #elif defined(_MSC_VER) || defined(__APPLE__) || defined(DOXYGEN) 2079 #define MDBX_64BIT_CAS 1 2080 #else 2081 #define MDBX_64BIT_CAS MDBX_64BIT_ATOMIC 2082 #endif 2083 #define MDBX_64BIT_CAS_CONFIG "AUTO=" MDBX_STRINGIFY(MDBX_64BIT_CAS) 2084 #else 2085 #define MDBX_64BIT_CAS_CONFIG MDBX_STRINGIFY(MDBX_64BIT_CAS) 2086 #endif /* MDBX_64BIT_CAS */ 2087 2088 #ifndef MDBX_UNALIGNED_OK 2089 #if defined(__ALIGNED__) || defined(__SANITIZE_UNDEFINED__) || \ 2090 defined(ENABLE_UBSAN) 2091 #define MDBX_UNALIGNED_OK 0 /* no unaligned access allowed */ 2092 #elif defined(__ARM_FEATURE_UNALIGNED) 2093 #define MDBX_UNALIGNED_OK 4 /* ok unaligned for 32-bit words */ 2094 #elif defined(__e2k__) || defined(__elbrus__) 2095 #if __iset__ > 4 2096 #define MDBX_UNALIGNED_OK 8 /* ok unaligned for 64-bit words */ 2097 #else 2098 #define MDBX_UNALIGNED_OK 4 /* ok unaligned for 32-bit words */ 2099 #endif 2100 #elif defined(__ia32__) 2101 #define MDBX_UNALIGNED_OK 8 /* ok unaligned for 64-bit words */ 2102 #elif __CLANG_PREREQ(5, 0) || __GNUC_PREREQ(5, 0) 2103 /* expecting an optimization will well done, also this 2104 * hushes false-positives from UBSAN (undefined behaviour sanitizer) */ 2105 #define MDBX_UNALIGNED_OK 0 2106 #else 2107 #define MDBX_UNALIGNED_OK 0 /* no unaligned access allowed */ 2108 #endif 2109 #elif MDBX_UNALIGNED_OK == 1 2110 #undef MDBX_UNALIGNED_OK 2111 #define MDBX_UNALIGNED_OK 32 /* any unaligned access allowed */ 2112 #endif /* MDBX_UNALIGNED_OK */ 2113 2114 #ifndef MDBX_CACHELINE_SIZE 2115 #if defined(SYSTEM_CACHE_ALIGNMENT_SIZE) 2116 #define MDBX_CACHELINE_SIZE SYSTEM_CACHE_ALIGNMENT_SIZE 2117 #elif defined(__ia64__) || defined(__ia64) || defined(_M_IA64) 2118 #define MDBX_CACHELINE_SIZE 128 2119 #else 2120 #define MDBX_CACHELINE_SIZE 64 2121 #endif 2122 #endif /* MDBX_CACHELINE_SIZE */ 2123 2124 /** @} end of build options */ 2125 /******************************************************************************* 2126 ******************************************************************************* 2127 ******************************************************************************/ 2128 2129 #ifndef DOXYGEN 2130 2131 /* In case the MDBX_DEBUG is undefined set it corresponding to NDEBUG */ 2132 #ifndef MDBX_DEBUG 2133 #ifdef NDEBUG 2134 #define MDBX_DEBUG 0 2135 #else 2136 #define MDBX_DEBUG 1 2137 #endif 2138 #endif /* MDBX_DEBUG */ 2139 2140 #else 2141 2142 /* !!! Actually this is a fake definitions for Doxygen !!! */ 2143 2144 /** Controls enabling of debugging features. 2145 * 2146 * - `MDBX_DEBUG = 0` (by default) Disables any debugging features at all, 2147 * including logging and assertion controls. 2148 * Logging level and corresponding debug flags changing 2149 * by \ref mdbx_setup_debug() will not have effect. 2150 * - `MDBX_DEBUG > 0` Enables code for the debugging features (logging, 2151 * assertions checking and internal audit). 2152 * Simultaneously sets the default logging level 2153 * to the `MDBX_DEBUG` value. 2154 * Also enables \ref MDBX_DBG_AUDIT if `MDBX_DEBUG >= 2`. 2155 * 2156 * \ingroup build_option */ 2157 #define MDBX_DEBUG 0...7 2158 2159 /** Disables using of GNU libc extensions. */ 2160 #define MDBX_DISABLE_GNU_SOURCE 0 or 1 2161 2162 #endif /* DOXYGEN */ 2163 2164 /* Undefine the NDEBUG if debugging is enforced by MDBX_DEBUG */ 2165 #if MDBX_DEBUG 2166 #undef NDEBUG 2167 #endif 2168 2169 /*----------------------------------------------------------------------------*/ 2170 /* Atomics */ 2171 2172 enum MDBX_memory_order { 2173 mo_Relaxed, 2174 mo_AcquireRelease 2175 /* , mo_SequentialConsistency */ 2176 }; 2177 2178 typedef union { 2179 volatile uint32_t weak; 2180 #ifdef MDBX_HAVE_C11ATOMICS 2181 volatile _Atomic uint32_t c11a; 2182 #endif /* MDBX_HAVE_C11ATOMICS */ 2183 } MDBX_atomic_uint32_t; 2184 2185 typedef union { 2186 volatile uint64_t weak; 2187 #if defined(MDBX_HAVE_C11ATOMICS) && (MDBX_64BIT_CAS || MDBX_64BIT_ATOMIC) 2188 volatile _Atomic uint64_t c11a; 2189 #endif 2190 #if !defined(MDBX_HAVE_C11ATOMICS) || !MDBX_64BIT_CAS || !MDBX_64BIT_ATOMIC 2191 __anonymous_struct_extension__ struct { 2192 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 2193 MDBX_atomic_uint32_t low, high; 2194 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 2195 MDBX_atomic_uint32_t high, low; 2196 #else 2197 #error "FIXME: Unsupported byte order" 2198 #endif /* __BYTE_ORDER__ */ 2199 }; 2200 #endif 2201 } MDBX_atomic_uint64_t; 2202 2203 #ifdef MDBX_HAVE_C11ATOMICS 2204 2205 /* Crutches for C11 atomic compiler's bugs */ 2206 #if defined(__e2k__) && defined(__LCC__) && __LCC__ < /* FIXME */ 127 2207 #define MDBX_c11a_ro(type, ptr) (&(ptr)->weak) 2208 #define MDBX_c11a_rw(type, ptr) (&(ptr)->weak) 2209 #elif defined(__clang__) && __clang__ < 8 2210 #define MDBX_c11a_ro(type, ptr) ((volatile _Atomic(type) *)&(ptr)->c11a) 2211 #define MDBX_c11a_rw(type, ptr) (&(ptr)->c11a) 2212 #else 2213 #define MDBX_c11a_ro(type, ptr) (&(ptr)->c11a) 2214 #define MDBX_c11a_rw(type, ptr) (&(ptr)->c11a) 2215 #endif /* Crutches for C11 atomic compiler's bugs */ 2216 2217 #define mo_c11_store(fence) \ 2218 (((fence) == mo_Relaxed) ? memory_order_relaxed \ 2219 : ((fence) == mo_AcquireRelease) ? memory_order_release \ 2220 : memory_order_seq_cst) 2221 #define mo_c11_load(fence) \ 2222 (((fence) == mo_Relaxed) ? memory_order_relaxed \ 2223 : ((fence) == mo_AcquireRelease) ? memory_order_acquire \ 2224 : memory_order_seq_cst) 2225 2226 #endif /* MDBX_HAVE_C11ATOMICS */ 2227 2228 #ifndef __cplusplus 2229 2230 #ifdef MDBX_HAVE_C11ATOMICS 2231 #define osal_memory_fence(order, write) \ 2232 atomic_thread_fence((write) ? mo_c11_store(order) : mo_c11_load(order)) 2233 #else /* MDBX_HAVE_C11ATOMICS */ 2234 #define osal_memory_fence(order, write) \ 2235 do { \ 2236 osal_compiler_barrier(); \ 2237 if (write && order > (MDBX_CPU_WRITEBACK_INCOHERENT ? mo_Relaxed \ 2238 : mo_AcquireRelease)) \ 2239 osal_memory_barrier(); \ 2240 } while (0) 2241 #endif /* MDBX_HAVE_C11ATOMICS */ 2242 2243 #if defined(MDBX_HAVE_C11ATOMICS) && defined(__LCC__) 2244 #define atomic_store32(p, value, order) \ 2245 ({ \ 2246 const uint32_t value_to_store = (value); \ 2247 atomic_store_explicit(MDBX_c11a_rw(uint32_t, p), value_to_store, \ 2248 mo_c11_store(order)); \ 2249 value_to_store; \ 2250 }) 2251 #define atomic_load32(p, order) \ 2252 atomic_load_explicit(MDBX_c11a_ro(uint32_t, p), mo_c11_load(order)) 2253 #define atomic_store64(p, value, order) \ 2254 ({ \ 2255 const uint64_t value_to_store = (value); \ 2256 atomic_store_explicit(MDBX_c11a_rw(uint64_t, p), value_to_store, \ 2257 mo_c11_store(order)); \ 2258 value_to_store; \ 2259 }) 2260 #define atomic_load64(p, order) \ 2261 atomic_load_explicit(MDBX_c11a_ro(uint64_t, p), mo_c11_load(order)) 2262 #endif /* LCC && MDBX_HAVE_C11ATOMICS */ 2263 2264 #ifndef atomic_store32 2265 MDBX_MAYBE_UNUSED static __always_inline uint32_t 2266 atomic_store32(MDBX_atomic_uint32_t *p, const uint32_t value, 2267 enum MDBX_memory_order order) { 2268 STATIC_ASSERT(sizeof(MDBX_atomic_uint32_t) == 4); 2269 #ifdef MDBX_HAVE_C11ATOMICS 2270 assert(atomic_is_lock_free(MDBX_c11a_rw(uint32_t, p))); 2271 atomic_store_explicit(MDBX_c11a_rw(uint32_t, p), value, mo_c11_store(order)); 2272 #else /* MDBX_HAVE_C11ATOMICS */ 2273 if (order != mo_Relaxed) 2274 osal_compiler_barrier(); 2275 p->weak = value; 2276 osal_memory_fence(order, true); 2277 #endif /* MDBX_HAVE_C11ATOMICS */ 2278 return value; 2279 } 2280 #endif /* atomic_store32 */ 2281 2282 #ifndef atomic_load32 2283 MDBX_MAYBE_UNUSED static __always_inline uint32_t atomic_load32( 2284 const volatile MDBX_atomic_uint32_t *p, enum MDBX_memory_order order) { 2285 STATIC_ASSERT(sizeof(MDBX_atomic_uint32_t) == 4); 2286 #ifdef MDBX_HAVE_C11ATOMICS 2287 assert(atomic_is_lock_free(MDBX_c11a_ro(uint32_t, p))); 2288 return atomic_load_explicit(MDBX_c11a_ro(uint32_t, p), mo_c11_load(order)); 2289 #else /* MDBX_HAVE_C11ATOMICS */ 2290 osal_memory_fence(order, false); 2291 const uint32_t value = p->weak; 2292 if (order != mo_Relaxed) 2293 osal_compiler_barrier(); 2294 return value; 2295 #endif /* MDBX_HAVE_C11ATOMICS */ 2296 } 2297 #endif /* atomic_load32 */ 2298 2299 #endif /* !__cplusplus */ 2300 2301 /*----------------------------------------------------------------------------*/ 2302 /* Basic constants and types */ 2303 2304 /* A stamp that identifies a file as an MDBX file. 2305 * There's nothing special about this value other than that it is easily 2306 * recognizable, and it will reflect any byte order mismatches. */ 2307 #define MDBX_MAGIC UINT64_C(/* 56-bit prime */ 0x59659DBDEF4C11) 2308 2309 /* FROZEN: The version number for a database's datafile format. */ 2310 #define MDBX_DATA_VERSION 3 2311 /* The version number for a database's lockfile format. */ 2312 #define MDBX_LOCK_VERSION 4 2313 2314 /* handle for the DB used to track free pages. */ 2315 #define FREE_DBI 0 2316 /* handle for the default DB. */ 2317 #define MAIN_DBI 1 2318 /* Number of DBs in metapage (free and main) - also hardcoded elsewhere */ 2319 #define CORE_DBS 2 2320 2321 /* Number of meta pages - also hardcoded elsewhere */ 2322 #define NUM_METAS 3 2323 2324 /* A page number in the database. 2325 * 2326 * MDBX uses 32 bit for page numbers. This limits database 2327 * size up to 2^44 bytes, in case of 4K pages. */ 2328 typedef uint32_t pgno_t; 2329 typedef MDBX_atomic_uint32_t atomic_pgno_t; 2330 #define PRIaPGNO PRIu32 2331 #define MAX_PAGENO UINT32_C(0x7FFFffff) 2332 #define MIN_PAGENO NUM_METAS 2333 2334 #define SAFE64_INVALID_THRESHOLD UINT64_C(0xffffFFFF00000000) 2335 2336 /* A transaction ID. */ 2337 typedef uint64_t txnid_t; 2338 typedef MDBX_atomic_uint64_t atomic_txnid_t; 2339 #define PRIaTXN PRIi64 2340 #define MIN_TXNID UINT64_C(1) 2341 #define MAX_TXNID (SAFE64_INVALID_THRESHOLD - 1) 2342 #define INITIAL_TXNID (MIN_TXNID + NUM_METAS - 1) 2343 #define INVALID_TXNID UINT64_MAX 2344 /* LY: for testing non-atomic 64-bit txnid on 32-bit arches. 2345 * #define xMDBX_TXNID_STEP (UINT32_MAX / 3) */ 2346 #ifndef xMDBX_TXNID_STEP 2347 #if MDBX_64BIT_CAS 2348 #define xMDBX_TXNID_STEP 1u 2349 #else 2350 #define xMDBX_TXNID_STEP 2u 2351 #endif 2352 #endif /* xMDBX_TXNID_STEP */ 2353 2354 /* Used for offsets within a single page. 2355 * Since memory pages are typically 4 or 8KB in size, 12-13 bits, 2356 * this is plenty. */ 2357 typedef uint16_t indx_t; 2358 2359 #define MEGABYTE ((size_t)1 << 20) 2360 2361 /*----------------------------------------------------------------------------*/ 2362 /* Core structures for database and shared memory (i.e. format definition) */ 2363 #pragma pack(push, 4) 2364 2365 /* Information about a single database in the environment. */ 2366 typedef struct MDBX_db { 2367 uint16_t md_flags; /* see mdbx_dbi_open */ 2368 uint16_t md_depth; /* depth of this tree */ 2369 uint32_t md_xsize; /* key-size for MDBX_DUPFIXED (LEAF2 pages) */ 2370 pgno_t md_root; /* the root page of this tree */ 2371 pgno_t md_branch_pages; /* number of internal pages */ 2372 pgno_t md_leaf_pages; /* number of leaf pages */ 2373 pgno_t md_overflow_pages; /* number of overflow pages */ 2374 uint64_t md_seq; /* table sequence counter */ 2375 uint64_t md_entries; /* number of data items */ 2376 uint64_t md_mod_txnid; /* txnid of last committed modification */ 2377 } MDBX_db; 2378 2379 /* database size-related parameters */ 2380 typedef struct MDBX_geo { 2381 uint16_t grow_pv; /* datafile growth step as a 16-bit packed (exponential 2382 quantized) value */ 2383 uint16_t shrink_pv; /* datafile shrink threshold as a 16-bit packed 2384 (exponential quantized) value */ 2385 pgno_t lower; /* minimal size of datafile in pages */ 2386 pgno_t upper; /* maximal size of datafile in pages */ 2387 pgno_t now; /* current size of datafile in pages */ 2388 pgno_t next; /* first unused page in the datafile, 2389 but actually the file may be shorter. */ 2390 } MDBX_geo; 2391 2392 /* Meta page content. 2393 * A meta page is the start point for accessing a database snapshot. 2394 * Pages 0-1 are meta pages. Transaction N writes meta page (N % 2). */ 2395 typedef struct MDBX_meta { 2396 /* Stamp identifying this as an MDBX file. 2397 * It must be set to MDBX_MAGIC with MDBX_DATA_VERSION. */ 2398 uint32_t mm_magic_and_version[2]; 2399 2400 /* txnid that committed this page, the first of a two-phase-update pair */ 2401 union { 2402 MDBX_atomic_uint32_t mm_txnid_a[2]; 2403 uint64_t unsafe_txnid; 2404 }; 2405 2406 uint16_t mm_extra_flags; /* extra DB flags, zero (nothing) for now */ 2407 uint8_t mm_validator_id; /* ID of checksum and page validation method, 2408 * zero (nothing) for now */ 2409 uint8_t mm_extra_pagehdr; /* extra bytes in the page header, 2410 * zero (nothing) for now */ 2411 2412 MDBX_geo mm_geo; /* database size-related parameters */ 2413 2414 MDBX_db mm_dbs[CORE_DBS]; /* first is free space, 2nd is main db */ 2415 /* The size of pages used in this DB */ 2416 #define mm_psize mm_dbs[FREE_DBI].md_xsize 2417 MDBX_canary mm_canary; 2418 2419 #define MDBX_DATASIGN_NONE 0u 2420 #define MDBX_DATASIGN_WEAK 1u 2421 #define SIGN_IS_STEADY(sign) ((sign) > MDBX_DATASIGN_WEAK) 2422 #define META_IS_STEADY(meta) \ 2423 SIGN_IS_STEADY(unaligned_peek_u64_volatile(4, (meta)->mm_sign)) 2424 union { 2425 uint32_t mm_sign[2]; 2426 uint64_t unsafe_sign; 2427 }; 2428 2429 /* txnid that committed this page, the second of a two-phase-update pair */ 2430 MDBX_atomic_uint32_t mm_txnid_b[2]; 2431 2432 /* Number of non-meta pages which were put in GC after COW. May be 0 in case 2433 * DB was previously handled by libmdbx without corresponding feature. 2434 * This value in couple with mr_snapshot_pages_retired allows fast estimation 2435 * of "how much reader is restraining GC recycling". */ 2436 uint32_t mm_pages_retired[2]; 2437 2438 /* The analogue /proc/sys/kernel/random/boot_id or similar to determine 2439 * whether the system was rebooted after the last use of the database files. 2440 * If there was no reboot, but there is no need to rollback to the last 2441 * steady sync point. Zeros mean that no relevant information is available 2442 * from the system. */ 2443 bin128_t mm_bootid; 2444 2445 } MDBX_meta; 2446 2447 #pragma pack(1) 2448 2449 /* Common header for all page types. The page type depends on mp_flags. 2450 * 2451 * P_BRANCH and P_LEAF pages have unsorted 'MDBX_node's at the end, with 2452 * sorted mp_ptrs[] entries referring to them. Exception: P_LEAF2 pages 2453 * omit mp_ptrs and pack sorted MDBX_DUPFIXED values after the page header. 2454 * 2455 * P_OVERFLOW records occupy one or more contiguous pages where only the 2456 * first has a page header. They hold the real data of F_BIGDATA nodes. 2457 * 2458 * P_SUBP sub-pages are small leaf "pages" with duplicate data. 2459 * A node with flag F_DUPDATA but not F_SUBDATA contains a sub-page. 2460 * (Duplicate data can also go in sub-databases, which use normal pages.) 2461 * 2462 * P_META pages contain MDBX_meta, the start point of an MDBX snapshot. 2463 * 2464 * Each non-metapage up to MDBX_meta.mm_last_pg is reachable exactly once 2465 * in the snapshot: Either used by a database or listed in a GC record. */ 2466 typedef struct MDBX_page { 2467 union { 2468 #define IS_FROZEN(txn, p) ((p)->mp_txnid < (txn)->mt_txnid) 2469 #define IS_SPILLED(txn, p) ((p)->mp_txnid == (txn)->mt_txnid) 2470 #define IS_SHADOWED(txn, p) ((p)->mp_txnid > (txn)->mt_txnid) 2471 #define IS_VALID(txn, p) ((p)->mp_txnid <= (txn)->mt_front) 2472 #define IS_MODIFIABLE(txn, p) ((p)->mp_txnid == (txn)->mt_front) 2473 uint64_t 2474 mp_txnid; /* txnid which created this page, maybe zero in legacy DB */ 2475 struct MDBX_page *mp_next; /* for in-memory list of freed pages */ 2476 }; 2477 uint16_t mp_leaf2_ksize; /* key size if this is a LEAF2 page */ 2478 #define P_BRANCH 0x01u /* branch page */ 2479 #define P_LEAF 0x02u /* leaf page */ 2480 #define P_OVERFLOW 0x04u /* overflow page */ 2481 #define P_META 0x08u /* meta page */ 2482 #define P_LEGACY_DIRTY 0x10u /* legacy P_DIRTY flag prior to v0.10 958fd5b9 */ 2483 #define P_BAD P_LEGACY_DIRTY /* explicit flag for invalid/bad page */ 2484 #define P_LEAF2 0x20u /* for MDBX_DUPFIXED records */ 2485 #define P_SUBP 0x40u /* for MDBX_DUPSORT sub-pages */ 2486 #define P_SPILLED 0x2000u /* spilled in parent txn */ 2487 #define P_LOOSE 0x4000u /* page was dirtied then freed, can be reused */ 2488 #define P_FROZEN 0x8000u /* used for retire page with known status */ 2489 #define P_ILL_BITS \ 2490 ((uint16_t) ~(P_BRANCH | P_LEAF | P_LEAF2 | P_OVERFLOW | P_SPILLED)) 2491 uint16_t mp_flags; 2492 union { 2493 uint32_t mp_pages; /* number of overflow pages */ 2494 __anonymous_struct_extension__ struct { 2495 indx_t mp_lower; /* lower bound of free space */ 2496 indx_t mp_upper; /* upper bound of free space */ 2497 }; 2498 }; 2499 pgno_t mp_pgno; /* page number */ 2500 2501 #if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \ 2502 (!defined(__cplusplus) && defined(_MSC_VER)) 2503 indx_t mp_ptrs[] /* dynamic size */; 2504 #endif /* C99 */ 2505 } MDBX_page; 2506 2507 #define PAGETYPE_WHOLE(p) ((uint8_t)(p)->mp_flags) 2508 2509 /* Drop legacy P_DIRTY flag for sub-pages for compatilibity */ 2510 #define PAGETYPE_COMPAT(p) \ 2511 (unlikely(PAGETYPE_WHOLE(p) & P_SUBP) \ 2512 ? PAGETYPE_WHOLE(p) & ~(P_SUBP | P_LEGACY_DIRTY) \ 2513 : PAGETYPE_WHOLE(p)) 2514 2515 /* Size of the page header, excluding dynamic data at the end */ 2516 #define PAGEHDRSZ ((unsigned)offsetof(MDBX_page, mp_ptrs)) 2517 2518 #pragma pack(pop) 2519 2520 #if MDBX_ENABLE_PGOP_STAT 2521 /* Statistics of page operations overall of all (running, completed and aborted) 2522 * transactions */ 2523 typedef struct { 2524 MDBX_atomic_uint64_t newly; /* Quantity of a new pages added */ 2525 MDBX_atomic_uint64_t cow; /* Quantity of pages copied for update */ 2526 MDBX_atomic_uint64_t clone; /* Quantity of parent's dirty pages clones 2527 for nested transactions */ 2528 MDBX_atomic_uint64_t split; /* Page splits */ 2529 MDBX_atomic_uint64_t merge; /* Page merges */ 2530 MDBX_atomic_uint64_t spill; /* Quantity of spilled dirty pages */ 2531 MDBX_atomic_uint64_t unspill; /* Quantity of unspilled/reloaded pages */ 2532 MDBX_atomic_uint64_t 2533 wops; /* Number of explicit write operations (not a pages) to a disk */ 2534 MDBX_atomic_uint64_t 2535 gcrtime; /* Time spending for reading/searching GC (aka FreeDB). The 2536 unit/scale is platform-depended, see osal_monotime(). */ 2537 } MDBX_pgop_stat_t; 2538 #endif /* MDBX_ENABLE_PGOP_STAT */ 2539 2540 #if MDBX_LOCKING == MDBX_LOCKING_WIN32FILES 2541 #define MDBX_CLOCK_SIGN UINT32_C(0xF10C) 2542 typedef void osal_ipclock_t; 2543 #elif MDBX_LOCKING == MDBX_LOCKING_SYSV 2544 2545 #define MDBX_CLOCK_SIGN UINT32_C(0xF18D) 2546 typedef mdbx_pid_t osal_ipclock_t; 2547 #ifndef EOWNERDEAD 2548 #define EOWNERDEAD MDBX_RESULT_TRUE 2549 #endif 2550 2551 #elif MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || \ 2552 MDBX_LOCKING == MDBX_LOCKING_POSIX2008 2553 #define MDBX_CLOCK_SIGN UINT32_C(0x8017) 2554 typedef pthread_mutex_t osal_ipclock_t; 2555 #elif MDBX_LOCKING == MDBX_LOCKING_POSIX1988 2556 #define MDBX_CLOCK_SIGN UINT32_C(0xFC29) 2557 typedef sem_t osal_ipclock_t; 2558 #else 2559 #error "FIXME" 2560 #endif /* MDBX_LOCKING */ 2561 2562 #if MDBX_LOCKING > MDBX_LOCKING_SYSV && !defined(__cplusplus) 2563 MDBX_INTERNAL_FUNC int osal_ipclock_stub(osal_ipclock_t *ipc); 2564 MDBX_INTERNAL_FUNC int osal_ipclock_destroy(osal_ipclock_t *ipc); 2565 #endif /* MDBX_LOCKING */ 2566 2567 /* Reader Lock Table 2568 * 2569 * Readers don't acquire any locks for their data access. Instead, they 2570 * simply record their transaction ID in the reader table. The reader 2571 * mutex is needed just to find an empty slot in the reader table. The 2572 * slot's address is saved in thread-specific data so that subsequent 2573 * read transactions started by the same thread need no further locking to 2574 * proceed. 2575 * 2576 * If MDBX_NOTLS is set, the slot address is not saved in thread-specific data. 2577 * No reader table is used if the database is on a read-only filesystem. 2578 * 2579 * Since the database uses multi-version concurrency control, readers don't 2580 * actually need any locking. This table is used to keep track of which 2581 * readers are using data from which old transactions, so that we'll know 2582 * when a particular old transaction is no longer in use. Old transactions 2583 * that have discarded any data pages can then have those pages reclaimed 2584 * for use by a later write transaction. 2585 * 2586 * The lock table is constructed such that reader slots are aligned with the 2587 * processor's cache line size. Any slot is only ever used by one thread. 2588 * This alignment guarantees that there will be no contention or cache 2589 * thrashing as threads update their own slot info, and also eliminates 2590 * any need for locking when accessing a slot. 2591 * 2592 * A writer thread will scan every slot in the table to determine the oldest 2593 * outstanding reader transaction. Any freed pages older than this will be 2594 * reclaimed by the writer. The writer doesn't use any locks when scanning 2595 * this table. This means that there's no guarantee that the writer will 2596 * see the most up-to-date reader info, but that's not required for correct 2597 * operation - all we need is to know the upper bound on the oldest reader, 2598 * we don't care at all about the newest reader. So the only consequence of 2599 * reading stale information here is that old pages might hang around a 2600 * while longer before being reclaimed. That's actually good anyway, because 2601 * the longer we delay reclaiming old pages, the more likely it is that a 2602 * string of contiguous pages can be found after coalescing old pages from 2603 * many old transactions together. */ 2604 2605 /* The actual reader record, with cacheline padding. */ 2606 typedef struct MDBX_reader { 2607 /* Current Transaction ID when this transaction began, or (txnid_t)-1. 2608 * Multiple readers that start at the same time will probably have the 2609 * same ID here. Again, it's not important to exclude them from 2610 * anything; all we need to know is which version of the DB they 2611 * started from so we can avoid overwriting any data used in that 2612 * particular version. */ 2613 MDBX_atomic_uint64_t /* txnid_t */ mr_txnid; 2614 2615 /* The information we store in a single slot of the reader table. 2616 * In addition to a transaction ID, we also record the process and 2617 * thread ID that owns a slot, so that we can detect stale information, 2618 * e.g. threads or processes that went away without cleaning up. 2619 * 2620 * NOTE: We currently don't check for stale records. 2621 * We simply re-init the table when we know that we're the only process 2622 * opening the lock file. */ 2623 2624 /* The thread ID of the thread owning this txn. */ 2625 MDBX_atomic_uint64_t mr_tid; 2626 2627 /* The process ID of the process owning this reader txn. */ 2628 MDBX_atomic_uint32_t mr_pid; 2629 2630 /* The number of pages used in the reader's MVCC snapshot, 2631 * i.e. the value of meta->mm_geo.next and txn->mt_next_pgno */ 2632 atomic_pgno_t mr_snapshot_pages_used; 2633 /* Number of retired pages at the time this reader starts transaction. So, 2634 * at any time the difference mm_pages_retired - mr_snapshot_pages_retired 2635 * will give the number of pages which this reader restraining from reuse. */ 2636 MDBX_atomic_uint64_t mr_snapshot_pages_retired; 2637 } MDBX_reader; 2638 2639 /* The header for the reader table (a memory-mapped lock file). */ 2640 typedef struct MDBX_lockinfo { 2641 /* Stamp identifying this as an MDBX file. 2642 * It must be set to MDBX_MAGIC with with MDBX_LOCK_VERSION. */ 2643 uint64_t mti_magic_and_version; 2644 2645 /* Format of this lock file. Must be set to MDBX_LOCK_FORMAT. */ 2646 uint32_t mti_os_and_format; 2647 2648 /* Flags which environment was opened. */ 2649 MDBX_atomic_uint32_t mti_envmode; 2650 2651 /* Threshold of un-synced-with-disk pages for auto-sync feature, 2652 * zero means no-threshold, i.e. auto-sync is disabled. */ 2653 atomic_pgno_t mti_autosync_threshold; 2654 2655 /* Low 32-bit of txnid with which meta-pages was synced, 2656 * i.e. for sync-polling in the MDBX_NOMETASYNC mode. */ 2657 MDBX_atomic_uint32_t mti_meta_sync_txnid; 2658 2659 /* Period for timed auto-sync feature, i.e. at the every steady checkpoint 2660 * the mti_unsynced_timeout sets to the current_time + mti_autosync_period. 2661 * The time value is represented in a suitable system-dependent form, for 2662 * example clock_gettime(CLOCK_BOOTTIME) or clock_gettime(CLOCK_MONOTONIC). 2663 * Zero means timed auto-sync is disabled. */ 2664 MDBX_atomic_uint64_t mti_autosync_period; 2665 2666 /* Marker to distinguish uniqueness of DB/CLK. */ 2667 MDBX_atomic_uint64_t mti_bait_uniqueness; 2668 2669 MDBX_ALIGNAS(MDBX_CACHELINE_SIZE) /* cacheline ----------------------------*/ 2670 2671 #if MDBX_ENABLE_PGOP_STAT 2672 /* Statistics of costly ops of all (running, completed and aborted) 2673 * transactions */ 2674 MDBX_pgop_stat_t mti_pgop_stat; 2675 #endif /* MDBX_ENABLE_PGOP_STAT*/ 2676 2677 MDBX_ALIGNAS(MDBX_CACHELINE_SIZE) /* cacheline ----------------------------*/ 2678 2679 /* Write transaction lock. */ 2680 #if MDBX_LOCKING > 0 2681 osal_ipclock_t mti_wlock; 2682 #endif /* MDBX_LOCKING > 0 */ 2683 2684 atomic_txnid_t mti_oldest_reader; 2685 2686 /* Timestamp of the last steady sync. Value is represented in a suitable 2687 * system-dependent form, for example clock_gettime(CLOCK_BOOTTIME) or 2688 * clock_gettime(CLOCK_MONOTONIC). */ 2689 MDBX_atomic_uint64_t mti_sync_timestamp; 2690 2691 /* Number un-synced-with-disk pages for auto-sync feature. */ 2692 atomic_pgno_t mti_unsynced_pages; 2693 2694 /* Number of page which was discarded last time by madvise(MADV_FREE). */ 2695 atomic_pgno_t mti_discarded_tail; 2696 2697 /* Timestamp of the last readers check. */ 2698 MDBX_atomic_uint64_t mti_reader_check_timestamp; 2699 2700 /* Shared anchor for tracking readahead edge and enabled/disabled status. */ 2701 pgno_t mti_readahead_anchor; 2702 2703 MDBX_ALIGNAS(MDBX_CACHELINE_SIZE) /* cacheline ----------------------------*/ 2704 2705 /* Readeaders registration lock. */ 2706 #if MDBX_LOCKING > 0 2707 osal_ipclock_t mti_rlock; 2708 #endif /* MDBX_LOCKING > 0 */ 2709 2710 /* The number of slots that have been used in the reader table. 2711 * This always records the maximum count, it is not decremented 2712 * when readers release their slots. */ 2713 MDBX_atomic_uint32_t mti_numreaders; 2714 MDBX_atomic_uint32_t mti_readers_refresh_flag; 2715 2716 #if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \ 2717 (!defined(__cplusplus) && defined(_MSC_VER)) 2718 MDBX_ALIGNAS(MDBX_CACHELINE_SIZE) /* cacheline ----------------------------*/ 2719 MDBX_reader mti_readers[] /* dynamic size */; 2720 #endif /* C99 */ 2721 } MDBX_lockinfo; 2722 2723 /* Lockfile format signature: version, features and field layout */ 2724 #define MDBX_LOCK_FORMAT \ 2725 (MDBX_CLOCK_SIGN * 27733 + (unsigned)sizeof(MDBX_reader) * 13 + \ 2726 (unsigned)offsetof(MDBX_reader, mr_snapshot_pages_used) * 251 + \ 2727 (unsigned)offsetof(MDBX_lockinfo, mti_oldest_reader) * 83 + \ 2728 (unsigned)offsetof(MDBX_lockinfo, mti_numreaders) * 37 + \ 2729 (unsigned)offsetof(MDBX_lockinfo, mti_readers) * 29) 2730 2731 #define MDBX_DATA_MAGIC \ 2732 ((MDBX_MAGIC << 8) + MDBX_PNL_ASCENDING * 64 + MDBX_DATA_VERSION) 2733 2734 #define MDBX_DATA_MAGIC_LEGACY_COMPAT \ 2735 ((MDBX_MAGIC << 8) + MDBX_PNL_ASCENDING * 64 + 2) 2736 2737 #define MDBX_DATA_MAGIC_LEGACY_DEVEL ((MDBX_MAGIC << 8) + 255) 2738 2739 #define MDBX_LOCK_MAGIC ((MDBX_MAGIC << 8) + MDBX_LOCK_VERSION) 2740 2741 /* The maximum size of a database page. 2742 * 2743 * It is 64K, but value-PAGEHDRSZ must fit in MDBX_page.mp_upper. 2744 * 2745 * MDBX will use database pages < OS pages if needed. 2746 * That causes more I/O in write transactions: The OS must 2747 * know (read) the whole page before writing a partial page. 2748 * 2749 * Note that we don't currently support Huge pages. On Linux, 2750 * regular data files cannot use Huge pages, and in general 2751 * Huge pages aren't actually pageable. We rely on the OS 2752 * demand-pager to read our data and page it out when memory 2753 * pressure from other processes is high. So until OSs have 2754 * actual paging support for Huge pages, they're not viable. */ 2755 #define MAX_PAGESIZE MDBX_MAX_PAGESIZE 2756 #define MIN_PAGESIZE MDBX_MIN_PAGESIZE 2757 2758 #define MIN_MAPSIZE (MIN_PAGESIZE * MIN_PAGENO) 2759 #if defined(_WIN32) || defined(_WIN64) 2760 #define MAX_MAPSIZE32 UINT32_C(0x38000000) 2761 #else 2762 #define MAX_MAPSIZE32 UINT32_C(0x7f000000) 2763 #endif 2764 #define MAX_MAPSIZE64 ((MAX_PAGENO + 1) * (uint64_t)MAX_PAGESIZE) 2765 2766 #if MDBX_WORDBITS >= 64 2767 #define MAX_MAPSIZE MAX_MAPSIZE64 2768 #define MDBX_PGL_LIMIT ((size_t)MAX_PAGENO) 2769 #else 2770 #define MAX_MAPSIZE MAX_MAPSIZE32 2771 #define MDBX_PGL_LIMIT (MAX_MAPSIZE32 / MIN_PAGESIZE) 2772 #endif /* MDBX_WORDBITS */ 2773 2774 #define MDBX_READERS_LIMIT 32767 2775 #define MDBX_RADIXSORT_THRESHOLD 333 2776 2777 /*----------------------------------------------------------------------------*/ 2778 2779 /* An PNL is an Page Number List, a sorted array of IDs. 2780 * The first element of the array is a counter for how many actual page-numbers 2781 * are in the list. By default PNLs are sorted in descending order, this allow 2782 * cut off a page with lowest pgno (at the tail) just truncating the list. The 2783 * sort order of PNLs is controlled by the MDBX_PNL_ASCENDING build option. */ 2784 typedef pgno_t *MDBX_PNL; 2785 2786 #if MDBX_PNL_ASCENDING 2787 #define MDBX_PNL_ORDERED(first, last) ((first) < (last)) 2788 #define MDBX_PNL_DISORDERED(first, last) ((first) >= (last)) 2789 #else 2790 #define MDBX_PNL_ORDERED(first, last) ((first) > (last)) 2791 #define MDBX_PNL_DISORDERED(first, last) ((first) <= (last)) 2792 #endif 2793 2794 /* List of txnid, only for MDBX_txn.tw.lifo_reclaimed */ 2795 typedef txnid_t *MDBX_TXL; 2796 2797 /* An Dirty-Page list item is an pgno/pointer pair. */ 2798 typedef struct MDBX_dp { 2799 MDBX_page *ptr; 2800 pgno_t pgno; 2801 union { 2802 unsigned extra; 2803 __anonymous_struct_extension__ struct { 2804 unsigned multi : 1; 2805 unsigned lru : 31; 2806 }; 2807 }; 2808 } MDBX_dp; 2809 2810 /* An DPL (dirty-page list) is a sorted array of MDBX_DPs. */ 2811 typedef struct MDBX_dpl { 2812 unsigned sorted; 2813 unsigned length; 2814 unsigned pages_including_loose; /* number of pages, but not an entries. */ 2815 unsigned detent; /* allocated size excluding the MDBX_DPL_RESERVE_GAP */ 2816 #if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \ 2817 (!defined(__cplusplus) && defined(_MSC_VER)) 2818 MDBX_dp items[] /* dynamic size with holes at zero and after the last */; 2819 #endif 2820 } MDBX_dpl; 2821 2822 /* PNL sizes */ 2823 #define MDBX_PNL_GRANULATE 1024 2824 #define MDBX_PNL_INITIAL \ 2825 (MDBX_PNL_GRANULATE - 2 - MDBX_ASSUME_MALLOC_OVERHEAD / sizeof(pgno_t)) 2826 2827 #define MDBX_TXL_GRANULATE 32 2828 #define MDBX_TXL_INITIAL \ 2829 (MDBX_TXL_GRANULATE - 2 - MDBX_ASSUME_MALLOC_OVERHEAD / sizeof(txnid_t)) 2830 #define MDBX_TXL_MAX \ 2831 ((1u << 17) - 2 - MDBX_ASSUME_MALLOC_OVERHEAD / sizeof(txnid_t)) 2832 2833 #define MDBX_PNL_ALLOCLEN(pl) ((pl)[-1]) 2834 #define MDBX_PNL_SIZE(pl) ((pl)[0]) 2835 #define MDBX_PNL_FIRST(pl) ((pl)[1]) 2836 #define MDBX_PNL_LAST(pl) ((pl)[MDBX_PNL_SIZE(pl)]) 2837 #define MDBX_PNL_BEGIN(pl) (&(pl)[1]) 2838 #define MDBX_PNL_END(pl) (&(pl)[MDBX_PNL_SIZE(pl) + 1]) 2839 2840 #if MDBX_PNL_ASCENDING 2841 #define MDBX_PNL_LEAST(pl) MDBX_PNL_FIRST(pl) 2842 #define MDBX_PNL_MOST(pl) MDBX_PNL_LAST(pl) 2843 #else 2844 #define MDBX_PNL_LEAST(pl) MDBX_PNL_LAST(pl) 2845 #define MDBX_PNL_MOST(pl) MDBX_PNL_FIRST(pl) 2846 #endif 2847 2848 #define MDBX_PNL_SIZEOF(pl) ((MDBX_PNL_SIZE(pl) + 1) * sizeof(pgno_t)) 2849 #define MDBX_PNL_IS_EMPTY(pl) (MDBX_PNL_SIZE(pl) == 0) 2850 2851 /*----------------------------------------------------------------------------*/ 2852 /* Internal structures */ 2853 2854 /* Auxiliary DB info. 2855 * The information here is mostly static/read-only. There is 2856 * only a single copy of this record in the environment. */ 2857 typedef struct MDBX_dbx { 2858 MDBX_val md_name; /* name of the database */ 2859 MDBX_cmp_func *md_cmp; /* function for comparing keys */ 2860 MDBX_cmp_func *md_dcmp; /* function for comparing data items */ 2861 size_t md_klen_min, md_klen_max; /* min/max key length for the database */ 2862 size_t md_vlen_min, 2863 md_vlen_max; /* min/max value/data length for the database */ 2864 } MDBX_dbx; 2865 2866 typedef struct troika { 2867 uint8_t fsm, recent, prefer_steady, tail_and_flags; 2868 #define TROIKA_HAVE_STEADY(troika) ((troika)->fsm & 7) 2869 #define TROIKA_STRICT_VALID(troika) ((troika)->tail_and_flags & 64) 2870 #define TROIKA_VALID(troika) ((troika)->tail_and_flags & 128) 2871 #define TROIKA_TAIL(troika) ((troika)->tail_and_flags & 3) 2872 txnid_t txnid[NUM_METAS]; 2873 } meta_troika_t; 2874 2875 /* A database transaction. 2876 * Every operation requires a transaction handle. */ 2877 struct MDBX_txn { 2878 #define MDBX_MT_SIGNATURE UINT32_C(0x93D53A31) 2879 uint32_t mt_signature; 2880 2881 /* Transaction Flags */ 2882 /* mdbx_txn_begin() flags */ 2883 #define MDBX_TXN_RO_BEGIN_FLAGS (MDBX_TXN_RDONLY | MDBX_TXN_RDONLY_PREPARE) 2884 #define MDBX_TXN_RW_BEGIN_FLAGS \ 2885 (MDBX_TXN_NOMETASYNC | MDBX_TXN_NOSYNC | MDBX_TXN_TRY) 2886 /* Additional flag for sync_locked() */ 2887 #define MDBX_SHRINK_ALLOWED UINT32_C(0x40000000) 2888 2889 #define TXN_FLAGS \ 2890 (MDBX_TXN_FINISHED | MDBX_TXN_ERROR | MDBX_TXN_DIRTY | MDBX_TXN_SPILLS | \ 2891 MDBX_TXN_HAS_CHILD | MDBX_TXN_INVALID) 2892 2893 #if (TXN_FLAGS & (MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_RO_BEGIN_FLAGS)) || \ 2894 ((MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_RO_BEGIN_FLAGS | TXN_FLAGS) & \ 2895 MDBX_SHRINK_ALLOWED) 2896 #error "Oops, some txn flags overlapped or wrong" 2897 #endif 2898 uint32_t mt_flags; 2899 2900 MDBX_txn *mt_parent; /* parent of a nested txn */ 2901 /* Nested txn under this txn, set together with flag MDBX_TXN_HAS_CHILD */ 2902 MDBX_txn *mt_child; 2903 MDBX_geo mt_geo; 2904 /* next unallocated page */ 2905 #define mt_next_pgno mt_geo.next 2906 /* corresponding to the current size of datafile */ 2907 #define mt_end_pgno mt_geo.now 2908 2909 /* The ID of this transaction. IDs are integers incrementing from 2910 * INITIAL_TXNID. Only committed write transactions increment the ID. If a 2911 * transaction aborts, the ID may be re-used by the next writer. */ 2912 txnid_t mt_txnid; 2913 txnid_t mt_front; 2914 2915 MDBX_env *mt_env; /* the DB environment */ 2916 /* Array of records for each DB known in the environment. */ 2917 MDBX_dbx *mt_dbxs; 2918 /* Array of MDBX_db records for each known DB */ 2919 MDBX_db *mt_dbs; 2920 /* Array of sequence numbers for each DB handle */ 2921 MDBX_atomic_uint32_t *mt_dbiseqs; 2922 2923 /* Transaction DBI Flags */ 2924 #define DBI_DIRTY MDBX_DBI_DIRTY /* DB was written in this txn */ 2925 #define DBI_STALE MDBX_DBI_STALE /* Named-DB record is older than txnID */ 2926 #define DBI_FRESH MDBX_DBI_FRESH /* Named-DB handle opened in this txn */ 2927 #define DBI_CREAT MDBX_DBI_CREAT /* Named-DB handle created in this txn */ 2928 #define DBI_VALID 0x10 /* DB handle is valid, see also DB_VALID */ 2929 #define DBI_USRVALID 0x20 /* As DB_VALID, but not set for FREE_DBI */ 2930 #define DBI_AUDITED 0x40 /* Internal flag for accounting during audit */ 2931 /* Array of flags for each DB */ 2932 uint8_t *mt_dbistate; 2933 /* Number of DB records in use, or 0 when the txn is finished. 2934 * This number only ever increments until the txn finishes; we 2935 * don't decrement it when individual DB handles are closed. */ 2936 MDBX_dbi mt_numdbs; 2937 size_t mt_owner; /* thread ID that owns this transaction */ 2938 MDBX_canary mt_canary; 2939 void *mt_userctx; /* User-settable context */ 2940 MDBX_cursor **mt_cursors; 2941 2942 union { 2943 struct { 2944 /* For read txns: This thread/txn's reader table slot, or NULL. */ 2945 MDBX_reader *reader; 2946 } to; 2947 struct { 2948 meta_troika_t troika; 2949 /* In write txns, array of cursors for each DB */ 2950 pgno_t *reclaimed_pglist; /* Reclaimed GC pages */ 2951 txnid_t last_reclaimed; /* ID of last used record */ 2952 #if MDBX_ENABLE_REFUND 2953 pgno_t loose_refund_wl /* FIXME: describe */; 2954 #endif /* MDBX_ENABLE_REFUND */ 2955 /* dirtylist room: Dirty array size - dirty pages visible to this txn. 2956 * Includes ancestor txns' dirty pages not hidden by other txns' 2957 * dirty/spilled pages. Thus commit(nested txn) has room to merge 2958 * dirtylist into mt_parent after freeing hidden mt_parent pages. */ 2959 unsigned dirtyroom; 2960 /* a sequence to spilling dirty page with LRU policy */ 2961 unsigned dirtylru; 2962 /* For write txns: Modified pages. Sorted when not MDBX_WRITEMAP. */ 2963 MDBX_dpl *dirtylist; 2964 /* The list of reclaimed txns from GC */ 2965 MDBX_TXL lifo_reclaimed; 2966 /* The list of pages that became unused during this transaction. */ 2967 MDBX_PNL retired_pages; 2968 /* The list of loose pages that became unused and may be reused 2969 * in this transaction, linked through `mp_next`. */ 2970 MDBX_page *loose_pages; 2971 /* Number of loose pages (tw.loose_pages) */ 2972 unsigned loose_count; 2973 unsigned spill_least_removed; 2974 /* The sorted list of dirty pages we temporarily wrote to disk 2975 * because the dirty list was full. page numbers in here are 2976 * shifted left by 1, deleted slots have the LSB set. */ 2977 MDBX_PNL spill_pages; 2978 } tw; 2979 }; 2980 }; 2981 2982 #if MDBX_WORDBITS >= 64 2983 #define CURSOR_STACK 32 2984 #else 2985 #define CURSOR_STACK 24 2986 #endif 2987 2988 struct MDBX_xcursor; 2989 2990 /* Cursors are used for all DB operations. 2991 * A cursor holds a path of (page pointer, key index) from the DB 2992 * root to a position in the DB, plus other state. MDBX_DUPSORT 2993 * cursors include an xcursor to the current data item. Write txns 2994 * track their cursors and keep them up to date when data moves. 2995 * Exception: An xcursor's pointer to a P_SUBP page can be stale. 2996 * (A node with F_DUPDATA but no F_SUBDATA contains a subpage). */ 2997 struct MDBX_cursor { 2998 #define MDBX_MC_LIVE UINT32_C(0xFE05D5B1) 2999 #define MDBX_MC_READY4CLOSE UINT32_C(0x2817A047) 3000 #define MDBX_MC_WAIT4EOT UINT32_C(0x90E297A7) 3001 uint32_t mc_signature; 3002 /* The database handle this cursor operates on */ 3003 MDBX_dbi mc_dbi; 3004 /* Next cursor on this DB in this txn */ 3005 MDBX_cursor *mc_next; 3006 /* Backup of the original cursor if this cursor is a shadow */ 3007 MDBX_cursor *mc_backup; 3008 /* Context used for databases with MDBX_DUPSORT, otherwise NULL */ 3009 struct MDBX_xcursor *mc_xcursor; 3010 /* The transaction that owns this cursor */ 3011 MDBX_txn *mc_txn; 3012 /* The database record for this cursor */ 3013 MDBX_db *mc_db; 3014 /* The database auxiliary record for this cursor */ 3015 MDBX_dbx *mc_dbx; 3016 /* The mt_dbistate for this database */ 3017 uint8_t *mc_dbistate; 3018 uint8_t mc_snum; /* number of pushed pages */ 3019 uint8_t mc_top; /* index of top page, normally mc_snum-1 */ 3020 3021 /* Cursor state flags. */ 3022 #define C_INITIALIZED 0x01 /* cursor has been initialized and is valid */ 3023 #define C_EOF 0x02 /* No more data */ 3024 #define C_SUB 0x04 /* Cursor is a sub-cursor */ 3025 #define C_DEL 0x08 /* last op was a cursor_del */ 3026 #define C_UNTRACK 0x10 /* Un-track cursor when closing */ 3027 #define C_RECLAIMING 0x20 /* GC lookup is prohibited */ 3028 #define C_GCFREEZE 0x40 /* reclaimed_pglist must not be updated */ 3029 uint8_t mc_flags; /* see mdbx_cursor */ 3030 3031 /* Cursor checking flags. */ 3032 #define CC_BRANCH 0x01 /* same as P_BRANCH for CHECK_LEAF_TYPE() */ 3033 #define CC_LEAF 0x02 /* same as P_LEAF for CHECK_LEAF_TYPE() */ 3034 #define CC_OVERFLOW 0x04 /* same as P_OVERFLOW for CHECK_LEAF_TYPE() */ 3035 #define CC_UPDATING 0x08 /* update/rebalance pending */ 3036 #define CC_SKIPORD 0x10 /* don't check keys ordering */ 3037 #define CC_LEAF2 0x20 /* same as P_LEAF2 for CHECK_LEAF_TYPE() */ 3038 #define CC_RETIRING 0x40 /* refs to child pages may be invalid */ 3039 #define CC_PAGECHECK 0x80 /* perform page checking, see MDBX_VALIDATION */ 3040 uint8_t mc_checking; /* page checking level */ 3041 3042 MDBX_page *mc_pg[CURSOR_STACK]; /* stack of pushed pages */ 3043 indx_t mc_ki[CURSOR_STACK]; /* stack of page indices */ 3044 }; 3045 3046 #define CHECK_LEAF_TYPE(mc, mp) \ 3047 (((PAGETYPE_WHOLE(mp) ^ (mc)->mc_checking) & \ 3048 (CC_BRANCH | CC_LEAF | CC_OVERFLOW | CC_LEAF2)) == 0) 3049 3050 /* Context for sorted-dup records. 3051 * We could have gone to a fully recursive design, with arbitrarily 3052 * deep nesting of sub-databases. But for now we only handle these 3053 * levels - main DB, optional sub-DB, sorted-duplicate DB. */ 3054 typedef struct MDBX_xcursor { 3055 /* A sub-cursor for traversing the Dup DB */ 3056 MDBX_cursor mx_cursor; 3057 /* The database record for this Dup DB */ 3058 MDBX_db mx_db; 3059 /* The auxiliary DB record for this Dup DB */ 3060 MDBX_dbx mx_dbx; 3061 } MDBX_xcursor; 3062 3063 typedef struct MDBX_cursor_couple { 3064 MDBX_cursor outer; 3065 void *mc_userctx; /* User-settable context */ 3066 MDBX_xcursor inner; 3067 } MDBX_cursor_couple; 3068 3069 /* The database environment. */ 3070 struct MDBX_env { 3071 /* ----------------------------------------------------- mostly static part */ 3072 #define MDBX_ME_SIGNATURE UINT32_C(0x9A899641) 3073 MDBX_atomic_uint32_t me_signature; 3074 /* Failed to update the meta page. Probably an I/O error. */ 3075 #define MDBX_FATAL_ERROR UINT32_C(0x80000000) 3076 /* Some fields are initialized. */ 3077 #define MDBX_ENV_ACTIVE UINT32_C(0x20000000) 3078 /* me_txkey is set */ 3079 #define MDBX_ENV_TXKEY UINT32_C(0x10000000) 3080 /* Legacy MDBX_MAPASYNC (prior v0.9) */ 3081 #define MDBX_DEPRECATED_MAPASYNC UINT32_C(0x100000) 3082 /* Legacy MDBX_COALESCE (prior v0.12) */ 3083 #define MDBX_DEPRECATED_COALESCE UINT32_C(0x2000000) 3084 #define ENV_INTERNAL_FLAGS (MDBX_FATAL_ERROR | MDBX_ENV_ACTIVE | MDBX_ENV_TXKEY) 3085 uint32_t me_flags; 3086 osal_mmap_t me_dxb_mmap; /* The main data file */ 3087 #define me_map me_dxb_mmap.dxb 3088 #define me_lazy_fd me_dxb_mmap.fd 3089 mdbx_filehandle_t me_dsync_fd; 3090 osal_mmap_t me_lck_mmap; /* The lock file */ 3091 #define me_lfd me_lck_mmap.fd 3092 struct MDBX_lockinfo *me_lck; 3093 3094 unsigned me_psize; /* DB page size, initialized from me_os_psize */ 3095 unsigned me_leaf_nodemax; /* max size of a leaf-node */ 3096 uint8_t me_psize2log; /* log2 of DB page size */ 3097 int8_t me_stuck_meta; /* recovery-only: target meta page or less that zero */ 3098 uint16_t me_merge_threshold, 3099 me_merge_threshold_gc; /* pages emptier than this are candidates for 3100 merging */ 3101 unsigned me_os_psize; /* OS page size, from osal_syspagesize() */ 3102 unsigned me_maxreaders; /* size of the reader table */ 3103 MDBX_dbi me_maxdbs; /* size of the DB table */ 3104 uint32_t me_pid; /* process ID of this env */ 3105 osal_thread_key_t me_txkey; /* thread-key for readers */ 3106 pathchar_t *me_pathname; /* path to the DB files */ 3107 void *me_pbuf; /* scratch area for DUPSORT put() */ 3108 MDBX_txn *me_txn0; /* preallocated write transaction */ 3109 3110 MDBX_dbx *me_dbxs; /* array of static DB info */ 3111 uint16_t *me_dbflags; /* array of flags from MDBX_db.md_flags */ 3112 MDBX_atomic_uint32_t *me_dbiseqs; /* array of dbi sequence numbers */ 3113 unsigned 3114 me_maxgc_ov1page; /* Number of pgno_t fit in a single overflow page */ 3115 uint32_t me_live_reader; /* have liveness lock in reader table */ 3116 void *me_userctx; /* User-settable context */ 3117 MDBX_hsr_func *me_hsr_callback; /* Callback for kicking laggard readers */ 3118 3119 struct { 3120 unsigned dp_reserve_limit; 3121 unsigned rp_augment_limit; 3122 unsigned dp_limit; 3123 unsigned dp_initial; 3124 uint8_t dp_loose_limit; 3125 uint8_t spill_max_denominator; 3126 uint8_t spill_min_denominator; 3127 uint8_t spill_parent4child_denominator; 3128 unsigned merge_threshold_16dot16_percent; 3129 union { 3130 unsigned all; 3131 /* tracks options with non-auto values but tuned by user */ 3132 struct { 3133 unsigned dp_limit : 1; 3134 } non_auto; 3135 } flags; 3136 } me_options; 3137 3138 /* struct me_dbgeo used for accepting db-geo params from user for the new 3139 * database creation, i.e. when mdbx_env_set_geometry() was called before 3140 * mdbx_env_open(). */ 3141 struct { 3142 size_t lower; /* minimal size of datafile */ 3143 size_t upper; /* maximal size of datafile */ 3144 size_t now; /* current size of datafile */ 3145 size_t grow; /* step to grow datafile */ 3146 size_t shrink; /* threshold to shrink datafile */ 3147 } me_dbgeo; 3148 3149 #if MDBX_LOCKING == MDBX_LOCKING_SYSV 3150 union { 3151 key_t key; 3152 int semid; 3153 } me_sysv_ipc; 3154 #endif /* MDBX_LOCKING == MDBX_LOCKING_SYSV */ 3155 3156 MDBX_env *me_lcklist_next; 3157 3158 /* --------------------------------------------------- mostly volatile part */ 3159 3160 MDBX_txn *me_txn; /* current write transaction */ 3161 osal_fastmutex_t me_dbi_lock; 3162 MDBX_dbi me_numdbs; /* number of DBs opened */ 3163 3164 MDBX_page *me_dp_reserve; /* list of malloc'ed blocks for re-use */ 3165 unsigned me_dp_reserve_len; 3166 /* PNL of pages that became unused in a write txn */ 3167 MDBX_PNL me_retired_pages; 3168 3169 #if defined(_WIN32) || defined(_WIN64) 3170 osal_srwlock_t me_remap_guard; 3171 /* Workaround for LockFileEx and WriteFile multithread bug */ 3172 CRITICAL_SECTION me_windowsbug_lock; 3173 #else 3174 osal_fastmutex_t me_remap_guard; 3175 #endif 3176 3177 /* -------------------------------------------------------------- debugging */ 3178 3179 #if MDBX_DEBUG 3180 MDBX_assert_func *me_assert_func; /* Callback for assertion failures */ 3181 #endif 3182 #ifdef MDBX_USE_VALGRIND 3183 int me_valgrind_handle; 3184 #endif 3185 #if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) 3186 pgno_t me_poison_edge; 3187 #endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ 3188 3189 #ifndef xMDBX_DEBUG_SPILLING 3190 #define xMDBX_DEBUG_SPILLING 0 3191 #endif 3192 #if xMDBX_DEBUG_SPILLING == 2 3193 unsigned debug_dirtied_est, debug_dirtied_act; 3194 #endif /* xMDBX_DEBUG_SPILLING */ 3195 3196 /* ------------------------------------------------- stub for lck-less mode */ 3197 MDBX_atomic_uint64_t 3198 x_lckless_stub[(sizeof(MDBX_lockinfo) + MDBX_CACHELINE_SIZE - 1) / 3199 sizeof(MDBX_atomic_uint64_t)]; 3200 }; 3201 3202 #ifndef __cplusplus 3203 /*----------------------------------------------------------------------------*/ 3204 /* Debug and Logging stuff */ 3205 3206 #define MDBX_RUNTIME_FLAGS_INIT \ 3207 ((MDBX_DEBUG) > 0) * MDBX_DBG_ASSERT + ((MDBX_DEBUG) > 1) * MDBX_DBG_AUDIT 3208 3209 extern uint8_t runtime_flags; 3210 extern uint8_t loglevel; 3211 extern MDBX_debug_func *debug_logger; 3212 3213 MDBX_MAYBE_UNUSED static __inline void jitter4testing(bool tiny) { 3214 #if MDBX_DEBUG 3215 if (MDBX_DBG_JITTER & runtime_flags) 3216 osal_jitter(tiny); 3217 #else 3218 (void)tiny; 3219 #endif 3220 } 3221 3222 MDBX_INTERNAL_FUNC void MDBX_PRINTF_ARGS(4, 5) 3223 debug_log(int level, const char *function, int line, const char *fmt, ...) 3224 MDBX_PRINTF_ARGS(4, 5); 3225 MDBX_INTERNAL_FUNC void debug_log_va(int level, const char *function, int line, 3226 const char *fmt, va_list args); 3227 3228 #if MDBX_DEBUG 3229 #define LOG_ENABLED(msg) unlikely(msg <= loglevel) 3230 #define AUDIT_ENABLED() unlikely((runtime_flags & MDBX_DBG_AUDIT)) 3231 #else /* MDBX_DEBUG */ 3232 #define LOG_ENABLED(msg) (msg < MDBX_LOG_VERBOSE && msg <= loglevel) 3233 #define AUDIT_ENABLED() (0) 3234 #endif /* MDBX_DEBUG */ 3235 3236 #if MDBX_FORCE_ASSERTIONS 3237 #define ASSERT_ENABLED() (1) 3238 #elif MDBX_DEBUG 3239 #define ASSERT_ENABLED() likely((runtime_flags & MDBX_DBG_ASSERT)) 3240 #else 3241 #define ASSERT_ENABLED() (0) 3242 #endif /* assertions */ 3243 3244 #define DEBUG_EXTRA(fmt, ...) \ 3245 do { \ 3246 if (LOG_ENABLED(MDBX_LOG_EXTRA)) \ 3247 debug_log(MDBX_LOG_EXTRA, __func__, __LINE__, fmt, __VA_ARGS__); \ 3248 } while (0) 3249 3250 #define DEBUG_EXTRA_PRINT(fmt, ...) \ 3251 do { \ 3252 if (LOG_ENABLED(MDBX_LOG_EXTRA)) \ 3253 debug_log(MDBX_LOG_EXTRA, NULL, 0, fmt, __VA_ARGS__); \ 3254 } while (0) 3255 3256 #define TRACE(fmt, ...) \ 3257 do { \ 3258 if (LOG_ENABLED(MDBX_LOG_TRACE)) \ 3259 debug_log(MDBX_LOG_TRACE, __func__, __LINE__, fmt "\n", __VA_ARGS__); \ 3260 } while (0) 3261 3262 #define DEBUG(fmt, ...) \ 3263 do { \ 3264 if (LOG_ENABLED(MDBX_LOG_DEBUG)) \ 3265 debug_log(MDBX_LOG_DEBUG, __func__, __LINE__, fmt "\n", __VA_ARGS__); \ 3266 } while (0) 3267 3268 #define VERBOSE(fmt, ...) \ 3269 do { \ 3270 if (LOG_ENABLED(MDBX_LOG_VERBOSE)) \ 3271 debug_log(MDBX_LOG_VERBOSE, __func__, __LINE__, fmt "\n", __VA_ARGS__); \ 3272 } while (0) 3273 3274 #define NOTICE(fmt, ...) \ 3275 do { \ 3276 if (LOG_ENABLED(MDBX_LOG_NOTICE)) \ 3277 debug_log(MDBX_LOG_NOTICE, __func__, __LINE__, fmt "\n", __VA_ARGS__); \ 3278 } while (0) 3279 3280 #define WARNING(fmt, ...) \ 3281 do { \ 3282 if (LOG_ENABLED(MDBX_LOG_WARN)) \ 3283 debug_log(MDBX_LOG_WARN, __func__, __LINE__, fmt "\n", __VA_ARGS__); \ 3284 } while (0) 3285 3286 #undef ERROR /* wingdi.h \ 3287 Yeah, morons from M$ put such definition to the public header. */ 3288 3289 #define ERROR(fmt, ...) \ 3290 do { \ 3291 if (LOG_ENABLED(MDBX_LOG_ERROR)) \ 3292 debug_log(MDBX_LOG_ERROR, __func__, __LINE__, fmt "\n", __VA_ARGS__); \ 3293 } while (0) 3294 3295 #define FATAL(fmt, ...) \ 3296 debug_log(MDBX_LOG_FATAL, __func__, __LINE__, fmt "\n", __VA_ARGS__); 3297 3298 #define ENSURE_MSG(env, expr, msg) \ 3299 do { \ 3300 if (unlikely(!(expr))) \ 3301 mdbx_assert_fail(env, msg, __func__, __LINE__); \ 3302 } while (0) 3303 3304 #define ENSURE(env, expr) ENSURE_MSG(env, expr, #expr) 3305 3306 /* assert(3) variant in environment context */ 3307 #define eASSERT(env, expr) \ 3308 do { \ 3309 if (ASSERT_ENABLED()) \ 3310 ENSURE(env, expr); \ 3311 } while (0) 3312 3313 /* assert(3) variant in cursor context */ 3314 #define cASSERT(mc, expr) eASSERT((mc)->mc_txn->mt_env, expr) 3315 3316 /* assert(3) variant in transaction context */ 3317 #define tASSERT(txn, expr) eASSERT((txn)->mt_env, expr) 3318 3319 #ifndef xMDBX_TOOLS /* Avoid using internal eASSERT() */ 3320 #undef assert 3321 #define assert(expr) eASSERT(NULL, expr) 3322 #endif 3323 3324 /*----------------------------------------------------------------------------*/ 3325 /* Cache coherence and mmap invalidation */ 3326 3327 #if MDBX_CPU_WRITEBACK_INCOHERENT 3328 #define osal_flush_incoherent_cpu_writeback() osal_memory_barrier() 3329 #else 3330 #define osal_flush_incoherent_cpu_writeback() osal_compiler_barrier() 3331 #endif /* MDBX_CPU_WRITEBACK_INCOHERENT */ 3332 3333 MDBX_MAYBE_UNUSED static __inline void 3334 osal_flush_incoherent_mmap(void *addr, size_t nbytes, const intptr_t pagesize) { 3335 #if MDBX_MMAP_INCOHERENT_FILE_WRITE 3336 char *const begin = (char *)(-pagesize & (intptr_t)addr); 3337 char *const end = 3338 (char *)(-pagesize & (intptr_t)((char *)addr + nbytes + pagesize - 1)); 3339 int err = msync(begin, end - begin, MS_SYNC | MS_INVALIDATE) ? errno : 0; 3340 eASSERT(nullptr, err == 0); 3341 (void)err; 3342 #else 3343 (void)pagesize; 3344 #endif /* MDBX_MMAP_INCOHERENT_FILE_WRITE */ 3345 3346 #if MDBX_MMAP_INCOHERENT_CPU_CACHE 3347 #ifdef DCACHE 3348 /* MIPS has cache coherency issues. 3349 * Note: for any nbytes >= on-chip cache size, entire is flushed. */ 3350 cacheflush(addr, nbytes, DCACHE); 3351 #else 3352 #error "Oops, cacheflush() not available" 3353 #endif /* DCACHE */ 3354 #endif /* MDBX_MMAP_INCOHERENT_CPU_CACHE */ 3355 3356 #if !MDBX_MMAP_INCOHERENT_FILE_WRITE && !MDBX_MMAP_INCOHERENT_CPU_CACHE 3357 (void)addr; 3358 (void)nbytes; 3359 #endif 3360 } 3361 3362 /*----------------------------------------------------------------------------*/ 3363 /* Internal prototypes */ 3364 3365 MDBX_INTERNAL_FUNC int cleanup_dead_readers(MDBX_env *env, int rlocked, 3366 int *dead); 3367 MDBX_INTERNAL_FUNC int rthc_alloc(osal_thread_key_t *key, MDBX_reader *begin, 3368 MDBX_reader *end); 3369 MDBX_INTERNAL_FUNC void rthc_remove(const osal_thread_key_t key); 3370 3371 MDBX_INTERNAL_FUNC void global_ctor(void); 3372 MDBX_INTERNAL_FUNC void global_dtor(void); 3373 MDBX_INTERNAL_FUNC void thread_dtor(void *ptr); 3374 3375 #endif /* !__cplusplus */ 3376 3377 #define MDBX_IS_ERROR(rc) \ 3378 ((rc) != MDBX_RESULT_TRUE && (rc) != MDBX_RESULT_FALSE) 3379 3380 /* Internal error codes, not exposed outside libmdbx */ 3381 #define MDBX_NO_ROOT (MDBX_LAST_ADDED_ERRCODE + 10) 3382 3383 /* Debugging output value of a cursor DBI: Negative in a sub-cursor. */ 3384 #define DDBI(mc) \ 3385 (((mc)->mc_flags & C_SUB) ? -(int)(mc)->mc_dbi : (int)(mc)->mc_dbi) 3386 3387 /* Key size which fits in a DKBUF (debug key buffer). */ 3388 #define DKBUF_MAX 511 3389 #define DKBUF char _kbuf[DKBUF_MAX * 4 + 2] 3390 #define DKEY(x) mdbx_dump_val(x, _kbuf, DKBUF_MAX * 2 + 1) 3391 #define DVAL(x) mdbx_dump_val(x, _kbuf + DKBUF_MAX * 2 + 1, DKBUF_MAX * 2 + 1) 3392 3393 #if MDBX_DEBUG 3394 #define DKBUF_DEBUG DKBUF 3395 #define DKEY_DEBUG(x) DKEY(x) 3396 #define DVAL_DEBUG(x) DVAL(x) 3397 #else 3398 #define DKBUF_DEBUG ((void)(0)) 3399 #define DKEY_DEBUG(x) ("-") 3400 #define DVAL_DEBUG(x) ("-") 3401 #endif 3402 3403 /* An invalid page number. 3404 * Mainly used to denote an empty tree. */ 3405 #define P_INVALID (~(pgno_t)0) 3406 3407 /* Test if the flags f are set in a flag word w. */ 3408 #define F_ISSET(w, f) (((w) & (f)) == (f)) 3409 3410 /* Round n up to an even number. */ 3411 #define EVEN(n) (((n) + 1UL) & -2L) /* sign-extending -2 to match n+1U */ 3412 3413 /* Default size of memory map. 3414 * This is certainly too small for any actual applications. Apps should 3415 * always set the size explicitly using mdbx_env_set_geometry(). */ 3416 #define DEFAULT_MAPSIZE MEGABYTE 3417 3418 /* Number of slots in the reader table. 3419 * This value was chosen somewhat arbitrarily. The 61 is a prime number, 3420 * and such readers plus a couple mutexes fit into single 4KB page. 3421 * Applications should set the table size using mdbx_env_set_maxreaders(). */ 3422 #define DEFAULT_READERS 61 3423 3424 /* Test if a page is a leaf page */ 3425 #define IS_LEAF(p) (((p)->mp_flags & P_LEAF) != 0) 3426 /* Test if a page is a LEAF2 page */ 3427 #define IS_LEAF2(p) unlikely(((p)->mp_flags & P_LEAF2) != 0) 3428 /* Test if a page is a branch page */ 3429 #define IS_BRANCH(p) (((p)->mp_flags & P_BRANCH) != 0) 3430 /* Test if a page is an overflow page */ 3431 #define IS_OVERFLOW(p) unlikely(((p)->mp_flags & P_OVERFLOW) != 0) 3432 /* Test if a page is a sub page */ 3433 #define IS_SUBP(p) (((p)->mp_flags & P_SUBP) != 0) 3434 3435 /* Header for a single key/data pair within a page. 3436 * Used in pages of type P_BRANCH and P_LEAF without P_LEAF2. 3437 * We guarantee 2-byte alignment for 'MDBX_node's. 3438 * 3439 * Leaf node flags describe node contents. F_BIGDATA says the node's 3440 * data part is the page number of an overflow page with actual data. 3441 * F_DUPDATA and F_SUBDATA can be combined giving duplicate data in 3442 * a sub-page/sub-database, and named databases (just F_SUBDATA). */ 3443 typedef struct MDBX_node { 3444 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 3445 union { 3446 uint32_t mn_dsize; 3447 uint32_t mn_pgno32; 3448 }; 3449 uint8_t mn_flags; /* see mdbx_node flags */ 3450 uint8_t mn_extra; 3451 uint16_t mn_ksize; /* key size */ 3452 #else 3453 uint16_t mn_ksize; /* key size */ 3454 uint8_t mn_extra; 3455 uint8_t mn_flags; /* see mdbx_node flags */ 3456 union { 3457 uint32_t mn_pgno32; 3458 uint32_t mn_dsize; 3459 }; 3460 #endif /* __BYTE_ORDER__ */ 3461 3462 /* mdbx_node Flags */ 3463 #define F_BIGDATA 0x01 /* data put on overflow page */ 3464 #define F_SUBDATA 0x02 /* data is a sub-database */ 3465 #define F_DUPDATA 0x04 /* data has duplicates */ 3466 3467 /* valid flags for mdbx_node_add() */ 3468 #define NODE_ADD_FLAGS (F_DUPDATA | F_SUBDATA | MDBX_RESERVE | MDBX_APPEND) 3469 3470 #if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \ 3471 (!defined(__cplusplus) && defined(_MSC_VER)) 3472 uint8_t mn_data[] /* key and data are appended here */; 3473 #endif /* C99 */ 3474 } MDBX_node; 3475 3476 #define DB_PERSISTENT_FLAGS \ 3477 (MDBX_REVERSEKEY | MDBX_DUPSORT | MDBX_INTEGERKEY | MDBX_DUPFIXED | \ 3478 MDBX_INTEGERDUP | MDBX_REVERSEDUP) 3479 3480 /* mdbx_dbi_open() flags */ 3481 #define DB_USABLE_FLAGS (DB_PERSISTENT_FLAGS | MDBX_CREATE | MDBX_DB_ACCEDE) 3482 3483 #define DB_VALID 0x8000 /* DB handle is valid, for me_dbflags */ 3484 #define DB_INTERNAL_FLAGS DB_VALID 3485 3486 #if DB_INTERNAL_FLAGS & DB_USABLE_FLAGS 3487 #error "Oops, some flags overlapped or wrong" 3488 #endif 3489 #if DB_PERSISTENT_FLAGS & ~DB_USABLE_FLAGS 3490 #error "Oops, some flags overlapped or wrong" 3491 #endif 3492 3493 /* max number of pages to commit in one writev() call */ 3494 #define MDBX_COMMIT_PAGES 64 3495 #if defined(IOV_MAX) && IOV_MAX < MDBX_COMMIT_PAGES /* sysconf(_SC_IOV_MAX) */ 3496 #undef MDBX_COMMIT_PAGES 3497 #define MDBX_COMMIT_PAGES IOV_MAX 3498 #endif 3499 3500 /* 3501 * / 3502 * | -1, a < b 3503 * CMP2INT(a,b) = < 0, a == b 3504 * | 1, a > b 3505 * \ 3506 */ 3507 #ifndef __e2k__ 3508 /* LY: fast enough on most systems */ 3509 #define CMP2INT(a, b) (((b) > (a)) ? -1 : (a) > (b)) 3510 #else 3511 /* LY: more parallelable on VLIW Elbrus */ 3512 #define CMP2INT(a, b) (((a) > (b)) - ((b) > (a))) 3513 #endif 3514 3515 /* Do not spill pages to disk if txn is getting full, may fail instead */ 3516 #define MDBX_NOSPILL 0x8000 3517 3518 MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static __inline pgno_t 3519 int64pgno(int64_t i64) { 3520 if (likely(i64 >= (int64_t)MIN_PAGENO && i64 <= (int64_t)MAX_PAGENO + 1)) 3521 return (pgno_t)i64; 3522 return (i64 < (int64_t)MIN_PAGENO) ? MIN_PAGENO : MAX_PAGENO; 3523 } 3524 3525 MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static __inline pgno_t 3526 pgno_add(size_t base, size_t augend) { 3527 assert(base <= MAX_PAGENO + 1 && augend < MAX_PAGENO); 3528 return int64pgno(base + augend); 3529 } 3530 3531 MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static __inline pgno_t 3532 pgno_sub(size_t base, size_t subtrahend) { 3533 assert(base >= MIN_PAGENO && base <= MAX_PAGENO + 1 && 3534 subtrahend < MAX_PAGENO); 3535 return int64pgno(base - subtrahend); 3536 } 3537 3538 MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static __always_inline bool 3539 is_powerof2(size_t x) { 3540 return (x & (x - 1)) == 0; 3541 } 3542 3543 MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static __always_inline size_t 3544 floor_powerof2(size_t value, size_t granularity) { 3545 assert(is_powerof2(granularity)); 3546 return value & ~(granularity - 1); 3547 } 3548 3549 MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static __always_inline size_t 3550 ceil_powerof2(size_t value, size_t granularity) { 3551 return floor_powerof2(value + granularity - 1, granularity); 3552 } 3553 3554 MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static unsigned 3555 log2n_powerof2(size_t value) { 3556 assert(value > 0 && value < INT32_MAX && is_powerof2(value)); 3557 assert((value & -(int32_t)value) == value); 3558 #if __GNUC_PREREQ(4, 1) || __has_builtin(__builtin_ctzl) 3559 return __builtin_ctzl(value); 3560 #elif defined(_MSC_VER) 3561 unsigned long index; 3562 _BitScanForward(&index, (unsigned long)value); 3563 return index; 3564 #else 3565 static const uint8_t debruijn_ctz32[32] = { 3566 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, 3567 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9}; 3568 return debruijn_ctz32[(uint32_t)(value * 0x077CB531u) >> 27]; 3569 #endif 3570 } 3571 3572 /* Only a subset of the mdbx_env flags can be changed 3573 * at runtime. Changing other flags requires closing the 3574 * environment and re-opening it with the new flags. */ 3575 #define ENV_CHANGEABLE_FLAGS \ 3576 (MDBX_SAFE_NOSYNC | MDBX_NOMETASYNC | MDBX_DEPRECATED_MAPASYNC | \ 3577 MDBX_NOMEMINIT | MDBX_COALESCE | MDBX_PAGEPERTURB | MDBX_ACCEDE | \ 3578 MDBX_VALIDATION) 3579 #define ENV_CHANGELESS_FLAGS \ 3580 (MDBX_NOSUBDIR | MDBX_RDONLY | MDBX_WRITEMAP | MDBX_NOTLS | MDBX_NORDAHEAD | \ 3581 MDBX_LIFORECLAIM | MDBX_EXCLUSIVE) 3582 #define ENV_USABLE_FLAGS (ENV_CHANGEABLE_FLAGS | ENV_CHANGELESS_FLAGS) 3583 3584 #if !defined(__cplusplus) || CONSTEXPR_ENUM_FLAGS_OPERATIONS 3585 MDBX_MAYBE_UNUSED static void static_checks(void) { 3586 STATIC_ASSERT_MSG(INT16_MAX - CORE_DBS == MDBX_MAX_DBI, 3587 "Oops, MDBX_MAX_DBI or CORE_DBS?"); 3588 STATIC_ASSERT_MSG((unsigned)(MDBX_DB_ACCEDE | MDBX_CREATE) == 3589 ((DB_USABLE_FLAGS | DB_INTERNAL_FLAGS) & 3590 (ENV_USABLE_FLAGS | ENV_INTERNAL_FLAGS)), 3591 "Oops, some flags overlapped or wrong"); 3592 STATIC_ASSERT_MSG((ENV_INTERNAL_FLAGS & ENV_USABLE_FLAGS) == 0, 3593 "Oops, some flags overlapped or wrong"); 3594 } 3595 #endif /* Disabled for MSVC 19.0 (VisualStudio 2015) */ 3596 3597 #ifdef __cplusplus 3598 } 3599 #endif 3600 3601 #define MDBX_ASAN_POISON_MEMORY_REGION(addr, size) \ 3602 do { \ 3603 TRACE("POISON_MEMORY_REGION(%p, %zu) at %u", (void *)(addr), \ 3604 (size_t)(size), __LINE__); \ 3605 ASAN_POISON_MEMORY_REGION(addr, size); \ 3606 } while (0) 3607 3608 #define MDBX_ASAN_UNPOISON_MEMORY_REGION(addr, size) \ 3609 do { \ 3610 TRACE("UNPOISON_MEMORY_REGION(%p, %zu) at %u", (void *)(addr), \ 3611 (size_t)(size), __LINE__); \ 3612 ASAN_UNPOISON_MEMORY_REGION(addr, size); \ 3613 } while (0) 3614 3615 #if defined(_WIN32) || defined(_WIN64) 3616 /* 3617 * POSIX getopt for Windows 3618 * 3619 * AT&T Public License 3620 * 3621 * Code given out at the 1985 UNIFORUM conference in Dallas. 3622 */ 3623 3624 /*----------------------------------------------------------------------------*/ 3625 /* Microsoft compiler generates a lot of warning for self includes... */ 3626 3627 #ifdef _MSC_VER 3628 #pragma warning(push, 1) 3629 #pragma warning(disable : 4548) /* expression before comma has no effect; \ 3630 expected expression with side - effect */ 3631 #pragma warning(disable : 4530) /* C++ exception handler used, but unwind \ 3632 * semantics are not enabled. Specify /EHsc */ 3633 #pragma warning(disable : 4577) /* 'noexcept' used with no exception handling \ 3634 * mode specified; termination on exception is \ 3635 * not guaranteed. Specify /EHsc */ 3636 #if !defined(_CRT_SECURE_NO_WARNINGS) 3637 #define _CRT_SECURE_NO_WARNINGS 3638 #endif 3639 #endif /* _MSC_VER (warnings) */ 3640 3641 #include <stdio.h> 3642 #include <string.h> 3643 3644 #ifdef _MSC_VER 3645 #pragma warning(pop) 3646 #endif 3647 /*----------------------------------------------------------------------------*/ 3648 3649 #ifndef NULL 3650 #define NULL 0 3651 #endif 3652 3653 #ifndef EOF 3654 #define EOF (-1) 3655 #endif 3656 3657 int optind = 1; 3658 int optopt; 3659 char *optarg; 3660 3661 int getopt(int argc, char *const argv[], const char *opts) { 3662 static int sp = 1; 3663 int c; 3664 const char *cp; 3665 3666 if (sp == 1) { 3667 if (optind >= argc || argv[optind][0] != '-' || argv[optind][1] == '\0') 3668 return EOF; 3669 else if (strcmp(argv[optind], "--") == 0) { 3670 optind++; 3671 return EOF; 3672 } 3673 } 3674 optopt = c = argv[optind][sp]; 3675 if (c == ':' || (cp = strchr(opts, c)) == NULL) { 3676 fprintf(stderr, "%s: %s -- %c\n", argv[0], "illegal option", c); 3677 if (argv[optind][++sp] == '\0') { 3678 optind++; 3679 sp = 1; 3680 } 3681 return '?'; 3682 } 3683 if (*++cp == ':') { 3684 if (argv[optind][sp + 1] != '\0') 3685 optarg = &argv[optind++][sp + 1]; 3686 else if (++optind >= argc) { 3687 fprintf(stderr, "%s: %s -- %c\n", argv[0], "option requires an argument", 3688 c); 3689 sp = 1; 3690 return '?'; 3691 } else 3692 optarg = argv[optind++]; 3693 sp = 1; 3694 } else { 3695 if (argv[optind][++sp] == '\0') { 3696 sp = 1; 3697 optind++; 3698 } 3699 optarg = NULL; 3700 } 3701 return c; 3702 } 3703 3704 static volatile BOOL user_break; 3705 static BOOL WINAPI ConsoleBreakHandlerRoutine(DWORD dwCtrlType) { 3706 (void)dwCtrlType; 3707 user_break = true; 3708 return true; 3709 } 3710 3711 #else /* WINDOWS */ 3712 3713 static volatile sig_atomic_t user_break; 3714 static void signal_handler(int sig) { 3715 (void)sig; 3716 user_break = 1; 3717 } 3718 3719 #endif /* !WINDOWS */ 3720 3721 static void print_stat(MDBX_stat *ms) { 3722 printf(" Pagesize: %u\n", ms->ms_psize); 3723 printf(" Tree depth: %u\n", ms->ms_depth); 3724 printf(" Branch pages: %" PRIu64 "\n", ms->ms_branch_pages); 3725 printf(" Leaf pages: %" PRIu64 "\n", ms->ms_leaf_pages); 3726 printf(" Overflow pages: %" PRIu64 "\n", ms->ms_overflow_pages); 3727 printf(" Entries: %" PRIu64 "\n", ms->ms_entries); 3728 } 3729 3730 static void usage(const char *prog) { 3731 fprintf(stderr, 3732 "usage: %s [-V] [-q] [-e] [-f[f[f]]] [-r[r]] [-a|-s name] dbpath\n" 3733 " -V\t\tprint version and exit\n" 3734 " -q\t\tbe quiet\n" 3735 " -p\t\tshow statistics of page operations for current session\n" 3736 " -e\t\tshow whole DB info\n" 3737 " -f\t\tshow GC info\n" 3738 " -r\t\tshow readers\n" 3739 " -a\t\tprint stat of main DB and all subDBs\n" 3740 " -s name\tprint stat of only the specified named subDB\n" 3741 " \t\tby default print stat of only the main DB\n", 3742 prog); 3743 exit(EXIT_FAILURE); 3744 } 3745 3746 static int reader_list_func(void *ctx, int num, int slot, mdbx_pid_t pid, 3747 mdbx_tid_t thread, uint64_t txnid, uint64_t lag, 3748 size_t bytes_used, size_t bytes_retained) { 3749 (void)ctx; 3750 if (num == 1) 3751 printf("Reader Table\n" 3752 " #\tslot\t%6s %*s %20s %10s %13s %13s\n", 3753 "pid", (int)sizeof(size_t) * 2, "thread", "txnid", "lag", "used", 3754 "retained"); 3755 3756 printf(" %3d)\t[%d]\t%6" PRIdSIZE " %*" PRIxPTR, num, slot, (size_t)pid, 3757 (int)sizeof(size_t) * 2, (uintptr_t)thread); 3758 if (txnid) 3759 printf(" %20" PRIu64 " %10" PRIu64 " %12.1fM %12.1fM\n", txnid, lag, 3760 bytes_used / 1048576.0, bytes_retained / 1048576.0); 3761 else 3762 printf(" %20s %10s %13s %13s\n", "-", "0", "0", "0"); 3763 3764 return user_break ? MDBX_RESULT_TRUE : MDBX_RESULT_FALSE; 3765 } 3766 3767 static const char *prog; 3768 static bool quiet = false; 3769 static void error(const char *func, int rc) { 3770 if (!quiet) 3771 fprintf(stderr, "%s: %s() error %d %s\n", prog, func, rc, 3772 mdbx_strerror(rc)); 3773 } 3774 3775 int mdbx_stat(int argc, char *argv[]) { 3776 int opt, rc; 3777 MDBX_env *env; 3778 MDBX_txn *txn; 3779 MDBX_dbi dbi; 3780 MDBX_envinfo mei; 3781 prog = argv[0]; 3782 char *envname; 3783 char *subname = nullptr; 3784 bool alldbs = false, envinfo = false, pgop = false; 3785 int freinfo = 0, rdrinfo = 0; 3786 3787 if (argc < 2) 3788 usage(prog); 3789 3790 while ((opt = getopt(argc, argv, 3791 "V" 3792 "q" 3793 "p" 3794 "a" 3795 "e" 3796 "f" 3797 "n" 3798 "r" 3799 "s:")) != EOF) { 3800 switch (opt) { 3801 case 'V': 3802 printf("mdbx_stat version %d.%d.%d.%d\n" 3803 " - source: %s %s, commit %s, tree %s\n" 3804 " - anchor: %s\n" 3805 " - build: %s for %s by %s\n" 3806 " - flags: %s\n" 3807 " - options: %s\n", 3808 mdbx_version.major, mdbx_version.minor, mdbx_version.release, 3809 mdbx_version.revision, mdbx_version.git.describe, 3810 mdbx_version.git.datetime, mdbx_version.git.commit, 3811 mdbx_version.git.tree, mdbx_sourcery_anchor, mdbx_build.datetime, 3812 mdbx_build.target, mdbx_build.compiler, mdbx_build.flags, 3813 mdbx_build.options); 3814 return EXIT_SUCCESS; 3815 case 'q': 3816 quiet = true; 3817 break; 3818 case 'p': 3819 pgop = true; 3820 break; 3821 case 'a': 3822 if (subname) 3823 usage(prog); 3824 alldbs = true; 3825 break; 3826 case 'e': 3827 envinfo = true; 3828 break; 3829 case 'f': 3830 freinfo += 1; 3831 break; 3832 case 'n': 3833 break; 3834 case 'r': 3835 rdrinfo += 1; 3836 break; 3837 case 's': 3838 if (alldbs) 3839 usage(prog); 3840 subname = optarg; 3841 break; 3842 default: 3843 usage(prog); 3844 } 3845 } 3846 3847 if (optind != argc - 1) 3848 usage(prog); 3849 3850 #if defined(_WIN32) || defined(_WIN64) 3851 SetConsoleCtrlHandler(ConsoleBreakHandlerRoutine, true); 3852 #else 3853 #ifdef SIGPIPE 3854 signal(SIGPIPE, signal_handler); 3855 #endif 3856 #ifdef SIGHUP 3857 signal(SIGHUP, signal_handler); 3858 #endif 3859 signal(SIGINT, signal_handler); 3860 signal(SIGTERM, signal_handler); 3861 #endif /* !WINDOWS */ 3862 3863 envname = argv[optind]; 3864 envname = argv[optind]; 3865 if (!quiet) { 3866 printf("mdbx_stat %s (%s, T-%s)\nRunning for %s...\n", 3867 mdbx_version.git.describe, mdbx_version.git.datetime, 3868 mdbx_version.git.tree, envname); 3869 fflush(nullptr); 3870 } 3871 3872 rc = mdbx_env_create(&env); 3873 if (unlikely(rc != MDBX_SUCCESS)) { 3874 error("mdbx_env_create", rc); 3875 return EXIT_FAILURE; 3876 } 3877 3878 if (alldbs || subname) { 3879 rc = mdbx_env_set_maxdbs(env, 2); 3880 if (unlikely(rc != MDBX_SUCCESS)) { 3881 error("mdbx_env_set_maxdbs", rc); 3882 goto env_close; 3883 } 3884 } 3885 3886 rc = mdbx_env_open(env, envname, MDBX_RDONLY, 0); 3887 if (unlikely(rc != MDBX_SUCCESS)) { 3888 error("mdbx_env_open", rc); 3889 goto env_close; 3890 } 3891 3892 rc = mdbx_txn_begin(env, nullptr, MDBX_TXN_RDONLY, &txn); 3893 if (unlikely(rc != MDBX_SUCCESS)) { 3894 error("mdbx_txn_begin", rc); 3895 goto txn_abort; 3896 } 3897 3898 if (envinfo || freinfo || pgop) { 3899 rc = mdbx_env_info_ex(env, txn, &mei, sizeof(mei)); 3900 if (unlikely(rc != MDBX_SUCCESS)) { 3901 error("mdbx_env_info_ex", rc); 3902 goto txn_abort; 3903 } 3904 } else { 3905 /* LY: zap warnings from gcc */ 3906 memset(&mei, 0, sizeof(mei)); 3907 } 3908 3909 if (pgop) { 3910 printf("Page Operations (for current session):\n"); 3911 printf(" New: %8" PRIu64 "\t// quantity of a new pages added\n", 3912 mei.mi_pgop_stat.newly); 3913 printf(" CoW: %8" PRIu64 3914 "\t// quantity of pages copied for altering\n", 3915 mei.mi_pgop_stat.cow); 3916 printf(" Clone: %8" PRIu64 "\t// quantity of parent's dirty pages " 3917 "clones for nested transactions\n", 3918 mei.mi_pgop_stat.clone); 3919 printf(" Split: %8" PRIu64 3920 "\t// page splits during insertions or updates\n", 3921 mei.mi_pgop_stat.split); 3922 printf(" Merge: %8" PRIu64 3923 "\t// page merges during deletions or updates\n", 3924 mei.mi_pgop_stat.merge); 3925 printf(" Spill: %8" PRIu64 "\t// quantity of spilled/ousted `dirty` " 3926 "pages during large transactions\n", 3927 mei.mi_pgop_stat.spill); 3928 printf(" Unspill: %8" PRIu64 "\t// quantity of unspilled/redone `dirty` " 3929 "pages during large transactions\n", 3930 mei.mi_pgop_stat.unspill); 3931 printf(" WOP: %8" PRIu64 3932 "\t// number of explicit write operations (not a pages) to a disk\n", 3933 mei.mi_pgop_stat.wops); 3934 } 3935 3936 if (envinfo) { 3937 printf("Environment Info\n"); 3938 printf(" Pagesize: %u\n", mei.mi_dxb_pagesize); 3939 if (mei.mi_geo.lower != mei.mi_geo.upper) { 3940 printf(" Dynamic datafile: %" PRIu64 "..%" PRIu64 " bytes (+%" PRIu64 3941 "/-%" PRIu64 "), %" PRIu64 "..%" PRIu64 " pages (+%" PRIu64 3942 "/-%" PRIu64 ")\n", 3943 mei.mi_geo.lower, mei.mi_geo.upper, mei.mi_geo.grow, 3944 mei.mi_geo.shrink, mei.mi_geo.lower / mei.mi_dxb_pagesize, 3945 mei.mi_geo.upper / mei.mi_dxb_pagesize, 3946 mei.mi_geo.grow / mei.mi_dxb_pagesize, 3947 mei.mi_geo.shrink / mei.mi_dxb_pagesize); 3948 printf(" Current mapsize: %" PRIu64 " bytes, %" PRIu64 " pages \n", 3949 mei.mi_mapsize, mei.mi_mapsize / mei.mi_dxb_pagesize); 3950 printf(" Current datafile: %" PRIu64 " bytes, %" PRIu64 " pages\n", 3951 mei.mi_geo.current, mei.mi_geo.current / mei.mi_dxb_pagesize); 3952 #if defined(_WIN32) || defined(_WIN64) 3953 if (mei.mi_geo.shrink && mei.mi_geo.current != mei.mi_geo.upper) 3954 printf(" WARNING: Due Windows system limitations a " 3955 "file couldn't\n be truncated while database " 3956 "is opened. So, the size of\n database file " 3957 "may by large than the database itself,\n " 3958 "until it will be closed or reopened in read-write mode.\n"); 3959 #endif 3960 } else { 3961 printf(" Fixed datafile: %" PRIu64 " bytes, %" PRIu64 " pages\n", 3962 mei.mi_geo.current, mei.mi_geo.current / mei.mi_dxb_pagesize); 3963 } 3964 printf(" Last transaction ID: %" PRIu64 "\n", mei.mi_recent_txnid); 3965 printf(" Latter reader transaction ID: %" PRIu64 " (%" PRIi64 ")\n", 3966 mei.mi_latter_reader_txnid, 3967 mei.mi_latter_reader_txnid - mei.mi_recent_txnid); 3968 printf(" Max readers: %u\n", mei.mi_maxreaders); 3969 printf(" Number of reader slots uses: %u\n", mei.mi_numreaders); 3970 } 3971 3972 if (rdrinfo) { 3973 rc = mdbx_reader_list(env, reader_list_func, nullptr); 3974 if (MDBX_IS_ERROR(rc)) { 3975 error("mdbx_reader_list", rc); 3976 goto txn_abort; 3977 } 3978 if (rc == MDBX_RESULT_TRUE) 3979 printf("Reader Table is empty\n"); 3980 else if (rc == MDBX_SUCCESS && rdrinfo > 1) { 3981 int dead; 3982 rc = mdbx_reader_check(env, &dead); 3983 if (MDBX_IS_ERROR(rc)) { 3984 error("mdbx_reader_check", rc); 3985 goto txn_abort; 3986 } 3987 if (rc == MDBX_RESULT_TRUE) { 3988 printf(" %d stale readers cleared.\n", dead); 3989 rc = mdbx_reader_list(env, reader_list_func, nullptr); 3990 if (rc == MDBX_RESULT_TRUE) 3991 printf(" Now Reader Table is empty\n"); 3992 } else 3993 printf(" No stale readers.\n"); 3994 } 3995 if (!(subname || alldbs || freinfo)) 3996 goto txn_abort; 3997 } 3998 3999 if (freinfo) { 4000 printf("Garbage Collection\n"); 4001 dbi = 0; 4002 MDBX_cursor *cursor; 4003 rc = mdbx_cursor_open(txn, dbi, &cursor); 4004 if (unlikely(rc != MDBX_SUCCESS)) { 4005 error("mdbx_cursor_open", rc); 4006 goto txn_abort; 4007 } 4008 4009 MDBX_stat mst; 4010 rc = mdbx_dbi_stat(txn, dbi, &mst, sizeof(mst)); 4011 if (unlikely(rc != MDBX_SUCCESS)) { 4012 error("mdbx_dbi_stat", rc); 4013 goto txn_abort; 4014 } 4015 print_stat(&mst); 4016 4017 pgno_t pages = 0, *iptr; 4018 pgno_t reclaimable = 0; 4019 MDBX_val key, data; 4020 while (MDBX_SUCCESS == 4021 (rc = mdbx_cursor_get(cursor, &key, &data, MDBX_NEXT))) { 4022 if (user_break) { 4023 rc = MDBX_EINTR; 4024 break; 4025 } 4026 iptr = data.iov_base; 4027 const pgno_t number = *iptr++; 4028 4029 pages += number; 4030 if (envinfo && mei.mi_latter_reader_txnid > *(txnid_t *)key.iov_base) 4031 reclaimable += number; 4032 4033 if (freinfo > 1) { 4034 char *bad = ""; 4035 pgno_t prev = 4036 MDBX_PNL_ASCENDING ? NUM_METAS - 1 : (pgno_t)mei.mi_last_pgno + 1; 4037 pgno_t span = 1; 4038 for (unsigned i = 0; i < number; ++i) { 4039 pgno_t pg = iptr[i]; 4040 if (MDBX_PNL_DISORDERED(prev, pg)) 4041 bad = " [bad sequence]"; 4042 prev = pg; 4043 while (i + span < number && 4044 iptr[i + span] == (MDBX_PNL_ASCENDING ? pgno_add(pg, span) 4045 : pgno_sub(pg, span))) 4046 ++span; 4047 } 4048 printf(" Transaction %" PRIaTXN ", %" PRIaPGNO 4049 " pages, maxspan %" PRIaPGNO "%s\n", 4050 *(txnid_t *)key.iov_base, number, span, bad); 4051 if (freinfo > 2) { 4052 for (unsigned i = 0; i < number; i += span) { 4053 const pgno_t pg = iptr[i]; 4054 for (span = 1; 4055 i + span < number && 4056 iptr[i + span] == (MDBX_PNL_ASCENDING ? pgno_add(pg, span) 4057 : pgno_sub(pg, span)); 4058 ++span) 4059 ; 4060 if (span > 1) 4061 printf(" %9" PRIaPGNO "[%" PRIaPGNO "]\n", pg, span); 4062 else 4063 printf(" %9" PRIaPGNO "\n", pg); 4064 } 4065 } 4066 } 4067 } 4068 mdbx_cursor_close(cursor); 4069 cursor = nullptr; 4070 4071 switch (rc) { 4072 case MDBX_SUCCESS: 4073 case MDBX_NOTFOUND: 4074 break; 4075 case MDBX_EINTR: 4076 if (!quiet) 4077 fprintf(stderr, "Interrupted by signal/user\n"); 4078 goto txn_abort; 4079 default: 4080 error("mdbx_cursor_get", rc); 4081 goto txn_abort; 4082 } 4083 4084 if (envinfo) { 4085 uint64_t value = mei.mi_mapsize / mei.mi_dxb_pagesize; 4086 double percent = value / 100.0; 4087 printf("Page Usage\n"); 4088 printf(" Total: %" PRIu64 " 100%%\n", value); 4089 4090 value = mei.mi_geo.current / mei.mi_dxb_pagesize; 4091 printf(" Backed: %" PRIu64 " %.1f%%\n", value, value / percent); 4092 4093 value = mei.mi_last_pgno + 1; 4094 printf(" Allocated: %" PRIu64 " %.1f%%\n", value, value / percent); 4095 4096 value = mei.mi_mapsize / mei.mi_dxb_pagesize - (mei.mi_last_pgno + 1); 4097 printf(" Remained: %" PRIu64 " %.1f%%\n", value, value / percent); 4098 4099 value = mei.mi_last_pgno + 1 - pages; 4100 printf(" Used: %" PRIu64 " %.1f%%\n", value, value / percent); 4101 4102 value = pages; 4103 printf(" GC: %" PRIu64 " %.1f%%\n", value, value / percent); 4104 4105 value = pages - reclaimable; 4106 printf(" Retained: %" PRIu64 " %.1f%%\n", value, value / percent); 4107 4108 value = reclaimable; 4109 printf(" Reclaimable: %" PRIu64 " %.1f%%\n", value, value / percent); 4110 4111 value = mei.mi_mapsize / mei.mi_dxb_pagesize - (mei.mi_last_pgno + 1) + 4112 reclaimable; 4113 printf(" Available: %" PRIu64 " %.1f%%\n", value, value / percent); 4114 } else 4115 printf(" GC: %" PRIaPGNO " pages\n", pages); 4116 } 4117 4118 rc = mdbx_dbi_open(txn, subname, MDBX_DB_ACCEDE, &dbi); 4119 if (unlikely(rc != MDBX_SUCCESS)) { 4120 error("mdbx_dbi_open", rc); 4121 goto txn_abort; 4122 } 4123 4124 MDBX_stat mst; 4125 rc = mdbx_dbi_stat(txn, dbi, &mst, sizeof(mst)); 4126 if (unlikely(rc != MDBX_SUCCESS)) { 4127 error("mdbx_dbi_stat", rc); 4128 goto txn_abort; 4129 } 4130 printf("Status of %s\n", subname ? subname : "Main DB"); 4131 print_stat(&mst); 4132 4133 if (alldbs) { 4134 MDBX_cursor *cursor; 4135 rc = mdbx_cursor_open(txn, dbi, &cursor); 4136 if (unlikely(rc != MDBX_SUCCESS)) { 4137 error("mdbx_cursor_open", rc); 4138 goto txn_abort; 4139 } 4140 4141 MDBX_val key; 4142 while (MDBX_SUCCESS == 4143 (rc = mdbx_cursor_get(cursor, &key, nullptr, MDBX_NEXT_NODUP))) { 4144 MDBX_dbi subdbi; 4145 if (memchr(key.iov_base, '\0', key.iov_len)) 4146 continue; 4147 subname = osal_malloc(key.iov_len + 1); 4148 memcpy(subname, key.iov_base, key.iov_len); 4149 subname[key.iov_len] = '\0'; 4150 rc = mdbx_dbi_open(txn, subname, MDBX_DB_ACCEDE, &subdbi); 4151 if (rc == MDBX_SUCCESS) 4152 printf("Status of %s\n", subname); 4153 osal_free(subname); 4154 if (unlikely(rc != MDBX_SUCCESS)) { 4155 if (rc == MDBX_INCOMPATIBLE) 4156 continue; 4157 error("mdbx_dbi_open", rc); 4158 goto txn_abort; 4159 } 4160 4161 rc = mdbx_dbi_stat(txn, subdbi, &mst, sizeof(mst)); 4162 if (unlikely(rc != MDBX_SUCCESS)) { 4163 error("mdbx_dbi_stat", rc); 4164 goto txn_abort; 4165 } 4166 print_stat(&mst); 4167 4168 rc = mdbx_dbi_close(env, subdbi); 4169 if (unlikely(rc != MDBX_SUCCESS)) { 4170 error("mdbx_dbi_close", rc); 4171 goto txn_abort; 4172 } 4173 } 4174 mdbx_cursor_close(cursor); 4175 cursor = nullptr; 4176 } 4177 4178 switch (rc) { 4179 case MDBX_SUCCESS: 4180 case MDBX_NOTFOUND: 4181 break; 4182 case MDBX_EINTR: 4183 if (!quiet) 4184 fprintf(stderr, "Interrupted by signal/user\n"); 4185 break; 4186 default: 4187 if (unlikely(rc != MDBX_SUCCESS)) 4188 error("mdbx_cursor_get", rc); 4189 } 4190 4191 mdbx_dbi_close(env, dbi); 4192 txn_abort: 4193 mdbx_txn_abort(txn); 4194 env_close: 4195 mdbx_env_close(env); 4196 4197 return rc ? EXIT_FAILURE : EXIT_SUCCESS; 4198 }