github.com/moontrade/mdbx-go@v0.4.0/mdbx_stat.c (about)

     1  /* mdbx_stat.c - memory-mapped database status tool */
     2  
     3  /*
     4   * Copyright 2015-2022 Leonid Yuriev <leo@yuriev.ru>
     5   * and other libmdbx authors: please see AUTHORS file.
     6   * All rights reserved.
     7   *
     8   * Redistribution and use in source and binary forms, with or without
     9   * modification, are permitted only as authorized by the OpenLDAP
    10   * Public License.
    11   *
    12   * A copy of this license is available in the file LICENSE in the
    13   * top-level directory of the distribution or, alternatively, at
    14   * <http://www.OpenLDAP.org/license.html>. */
    15  
    16  #ifdef _MSC_VER
    17  #if _MSC_VER > 1800
    18  #pragma warning(disable : 4464) /* relative include path contains '..' */
    19  #endif
    20  #pragma warning(disable : 4996) /* The POSIX name is deprecated... */
    21  #endif                          /* _MSC_VER (warnings) */
    22  
    23  #define xMDBX_TOOLS /* Avoid using internal eASSERT() */
    24  /*
    25   * Copyright 2015-2022 Leonid Yuriev <leo@yuriev.ru>
    26   * and other libmdbx authors: please see AUTHORS file.
    27   * All rights reserved.
    28   *
    29   * Redistribution and use in source and binary forms, with or without
    30   * modification, are permitted only as authorized by the OpenLDAP
    31   * Public License.
    32   *
    33   * A copy of this license is available in the file LICENSE in the
    34   * top-level directory of the distribution or, alternatively, at
    35   * <http://www.OpenLDAP.org/license.html>. */
    36  
    37  #define MDBX_BUILD_SOURCERY 86a8d6c403a2023fc2df0ab38f71339b78e82f0aa786f480a1cb166c05497134_v0_12_1_0_gb36a07a5
    38  #ifdef MDBX_CONFIG_H
    39  #include MDBX_CONFIG_H
    40  #endif
    41  
    42  #define LIBMDBX_INTERNALS
    43  #ifdef xMDBX_TOOLS
    44  #define MDBX_DEPRECATED
    45  #endif /* xMDBX_TOOLS */
    46  
    47  #ifdef xMDBX_ALLOY
    48  /* Amalgamated build */
    49  #define MDBX_INTERNAL_FUNC static
    50  #define MDBX_INTERNAL_VAR static
    51  #else
    52  /* Non-amalgamated build */
    53  #define MDBX_INTERNAL_FUNC
    54  #define MDBX_INTERNAL_VAR extern
    55  #endif /* xMDBX_ALLOY */
    56  
    57  /*----------------------------------------------------------------------------*/
    58  
    59  /** Disables using GNU/Linux libc extensions.
    60   * \ingroup build_option
    61   * \note This option couldn't be moved to the options.h since dependant
    62   * control macros/defined should be prepared before include the options.h */
    63  #ifndef MDBX_DISABLE_GNU_SOURCE
    64  #define MDBX_DISABLE_GNU_SOURCE 0
    65  #endif
    66  #if MDBX_DISABLE_GNU_SOURCE
    67  #undef _GNU_SOURCE
    68  #elif (defined(__linux__) || defined(__gnu_linux__)) && !defined(_GNU_SOURCE)
    69  #define _GNU_SOURCE
    70  #endif /* MDBX_DISABLE_GNU_SOURCE */
    71  
    72  /* Should be defined before any includes */
    73  #if !defined(_FILE_OFFSET_BITS) && !defined(__ANDROID_API__) &&                \
    74      !defined(ANDROID)
    75  #define _FILE_OFFSET_BITS 64
    76  #endif
    77  
    78  #ifdef __APPLE__
    79  #define _DARWIN_C_SOURCE
    80  #endif
    81  
    82  #ifdef _MSC_VER
    83  #if _MSC_FULL_VER < 190024234
    84  /* Actually libmdbx was not tested with compilers older than 19.00.24234 (Visual
    85   * Studio 2015 Update 3). But you could remove this #error and try to continue
    86   * at your own risk. In such case please don't rise up an issues related ONLY to
    87   * old compilers.
    88   *
    89   * NOTE:
    90   *   Unfortunately, there are several different builds of "Visual Studio" that
    91   *   are called "Visual Studio 2015 Update 3".
    92   *
    93   *   The 190024234 is used here because it is minimal version of Visual Studio
    94   *   that was used for build and testing libmdbx in recent years. Soon this
    95   *   value will be increased to 19.0.24241.7, since build and testing using
    96   *   "Visual Studio 2015" will be performed only at https://ci.appveyor.com.
    97   *
    98   *   Please ask Microsoft (but not us) for information about version differences
    99   *   and how to and where you can obtain the latest "Visual Studio 2015" build
   100   *   with all fixes.
   101   */
   102  #error                                                                         \
   103      "At least \"Microsoft C/C++ Compiler\" version 19.00.24234 (Visual Studio 2015 Update 3) is required."
   104  #endif
   105  #ifndef _CRT_SECURE_NO_WARNINGS
   106  #define _CRT_SECURE_NO_WARNINGS
   107  #endif /* _CRT_SECURE_NO_WARNINGS */
   108  #if _MSC_VER > 1800
   109  #pragma warning(disable : 4464) /* relative include path contains '..' */
   110  #endif
   111  #if _MSC_VER > 1913
   112  #pragma warning(disable : 5045) /* Compiler will insert Spectre mitigation...  \
   113                                   */
   114  #endif
   115  #if _MSC_VER > 1914
   116  #pragma warning(                                                               \
   117      disable : 5105) /* winbase.h(9531): warning C5105: macro expansion         \
   118                         producing 'defined' has undefined behavior */
   119  #endif
   120  #pragma warning(disable : 4710) /* 'xyz': function not inlined */
   121  #pragma warning(disable : 4711) /* function 'xyz' selected for automatic       \
   122                                     inline expansion */
   123  #pragma warning(                                                               \
   124      disable : 4201) /* nonstandard extension used : nameless struct / union */
   125  #pragma warning(disable : 4702) /* unreachable code */
   126  #pragma warning(disable : 4706) /* assignment within conditional expression */
   127  #pragma warning(disable : 4127) /* conditional expression is constant */
   128  #pragma warning(disable : 4324) /* 'xyz': structure was padded due to          \
   129                                     alignment specifier */
   130  #pragma warning(disable : 4310) /* cast truncates constant value */
   131  #pragma warning(                                                               \
   132      disable : 4820) /* bytes padding added after data member for alignment */
   133  #pragma warning(disable : 4548) /* expression before comma has no effect;      \
   134                                     expected expression with side - effect */
   135  #pragma warning(disable : 4366) /* the result of the unary '&' operator may be \
   136                                     unaligned */
   137  #pragma warning(disable : 4200) /* nonstandard extension used: zero-sized      \
   138                                     array in struct/union */
   139  #pragma warning(disable : 4204) /* nonstandard extension used: non-constant    \
   140                                     aggregate initializer */
   141  #pragma warning(                                                               \
   142      disable : 4505) /* unreferenced local function has been removed */
   143  #endif              /* _MSC_VER (warnings) */
   144  
   145  #if defined(__GNUC__) && __GNUC__ < 9
   146  #pragma GCC diagnostic ignored "-Wattributes"
   147  #endif /* GCC < 9 */
   148  
   149  #if (defined(__MINGW__) || defined(__MINGW32__) || defined(__MINGW64__)) &&    \
   150      !defined(__USE_MINGW_ANSI_STDIO)
   151  #define __USE_MINGW_ANSI_STDIO 1
   152  #endif /* __USE_MINGW_ANSI_STDIO */
   153  
   154  #include "mdbx.h"
   155  /*
   156   * Copyright 2015-2022 Leonid Yuriev <leo@yuriev.ru>
   157   * and other libmdbx authors: please see AUTHORS file.
   158   * All rights reserved.
   159   *
   160   * Redistribution and use in source and binary forms, with or without
   161   * modification, are permitted only as authorized by the OpenLDAP
   162   * Public License.
   163   *
   164   * A copy of this license is available in the file LICENSE in the
   165   * top-level directory of the distribution or, alternatively, at
   166   * <http://www.OpenLDAP.org/license.html>.
   167   */
   168  
   169  
   170  /*----------------------------------------------------------------------------*/
   171  /* Microsoft compiler generates a lot of warning for self includes... */
   172  
   173  #ifdef _MSC_VER
   174  #pragma warning(push, 1)
   175  #pragma warning(disable : 4548) /* expression before comma has no effect;      \
   176                                     expected expression with side - effect */
   177  #pragma warning(disable : 4530) /* C++ exception handler used, but unwind      \
   178                                   * semantics are not enabled. Specify /EHsc */
   179  #pragma warning(disable : 4577) /* 'noexcept' used with no exception handling  \
   180                                   * mode specified; termination on exception is \
   181                                   * not guaranteed. Specify /EHsc */
   182  #endif                          /* _MSC_VER (warnings) */
   183  
   184  #if defined(_WIN32) || defined(_WIN64)
   185  #if !defined(_CRT_SECURE_NO_WARNINGS)
   186  #define _CRT_SECURE_NO_WARNINGS
   187  #endif /* _CRT_SECURE_NO_WARNINGS */
   188  #if !defined(_NO_CRT_STDIO_INLINE) && MDBX_BUILD_SHARED_LIBRARY &&             \
   189      !defined(xMDBX_TOOLS) && MDBX_WITHOUT_MSVC_CRT
   190  #define _NO_CRT_STDIO_INLINE
   191  #endif
   192  #elif !defined(_POSIX_C_SOURCE)
   193  #define _POSIX_C_SOURCE 200809L
   194  #endif /* Windows */
   195  
   196  /*----------------------------------------------------------------------------*/
   197  /* basic C99 includes */
   198  #include <inttypes.h>
   199  #include <stddef.h>
   200  #include <stdint.h>
   201  #include <stdlib.h>
   202  
   203  #include <assert.h>
   204  #include <fcntl.h>
   205  #include <limits.h>
   206  #include <stdio.h>
   207  #include <string.h>
   208  #include <time.h>
   209  
   210  #if (-6 & 5) || CHAR_BIT != 8 || UINT_MAX < 0xffffffff || ULONG_MAX % 0xFFFF
   211  #error                                                                         \
   212      "Sanity checking failed: Two's complement, reasonably sized integer types"
   213  #endif
   214  
   215  #ifndef SSIZE_MAX
   216  #define SSIZE_MAX INTPTR_MAX
   217  #endif
   218  
   219  #if UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul
   220  #define MDBX_WORDBITS 64
   221  #else
   222  #define MDBX_WORDBITS 32
   223  #endif /* MDBX_WORDBITS */
   224  
   225  /*----------------------------------------------------------------------------*/
   226  /* feature testing */
   227  
   228  #ifndef __has_warning
   229  #define __has_warning(x) (0)
   230  #endif
   231  
   232  #ifndef __has_include
   233  #define __has_include(x) (0)
   234  #endif
   235  
   236  #ifndef __has_feature
   237  #define __has_feature(x) (0)
   238  #endif
   239  
   240  #ifndef __has_extension
   241  #define __has_extension(x) (0)
   242  #endif
   243  
   244  #if __has_feature(thread_sanitizer)
   245  #define __SANITIZE_THREAD__ 1
   246  #endif
   247  
   248  #if __has_feature(address_sanitizer)
   249  #define __SANITIZE_ADDRESS__ 1
   250  #endif
   251  
   252  #ifndef __GNUC_PREREQ
   253  #if defined(__GNUC__) && defined(__GNUC_MINOR__)
   254  #define __GNUC_PREREQ(maj, min)                                                \
   255    ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
   256  #else
   257  #define __GNUC_PREREQ(maj, min) (0)
   258  #endif
   259  #endif /* __GNUC_PREREQ */
   260  
   261  #ifndef __CLANG_PREREQ
   262  #ifdef __clang__
   263  #define __CLANG_PREREQ(maj, min)                                               \
   264    ((__clang_major__ << 16) + __clang_minor__ >= ((maj) << 16) + (min))
   265  #else
   266  #define __CLANG_PREREQ(maj, min) (0)
   267  #endif
   268  #endif /* __CLANG_PREREQ */
   269  
   270  #ifndef __GLIBC_PREREQ
   271  #if defined(__GLIBC__) && defined(__GLIBC_MINOR__)
   272  #define __GLIBC_PREREQ(maj, min)                                               \
   273    ((__GLIBC__ << 16) + __GLIBC_MINOR__ >= ((maj) << 16) + (min))
   274  #else
   275  #define __GLIBC_PREREQ(maj, min) (0)
   276  #endif
   277  #endif /* __GLIBC_PREREQ */
   278  
   279  /*----------------------------------------------------------------------------*/
   280  /* C11' alignas() */
   281  
   282  #if __has_include(<stdalign.h>)
   283  #include <stdalign.h>
   284  #endif
   285  #if defined(alignas) || defined(__cplusplus)
   286  #define MDBX_ALIGNAS(N) alignas(N)
   287  #elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
   288  #define MDBX_ALIGNAS(N) _Alignas(N)
   289  #elif defined(_MSC_VER)
   290  #define MDBX_ALIGNAS(N) __declspec(align(N))
   291  #elif __has_attribute(__aligned__) || defined(__GNUC__)
   292  #define MDBX_ALIGNAS(N) __attribute__((__aligned__(N)))
   293  #else
   294  #error "FIXME: Required alignas() or equivalent."
   295  #endif /* MDBX_ALIGNAS */
   296  
   297  /*----------------------------------------------------------------------------*/
   298  /* Systems macros and includes */
   299  
   300  #ifndef __extern_C
   301  #ifdef __cplusplus
   302  #define __extern_C extern "C"
   303  #else
   304  #define __extern_C
   305  #endif
   306  #endif /* __extern_C */
   307  
   308  #if !defined(nullptr) && !defined(__cplusplus) ||                              \
   309      (__cplusplus < 201103L && !defined(_MSC_VER))
   310  #define nullptr NULL
   311  #endif
   312  
   313  #if defined(__APPLE__) || defined(_DARWIN_C_SOURCE)
   314  #include <AvailabilityMacros.h>
   315  #include <TargetConditionals.h>
   316  #ifndef MAC_OS_X_VERSION_MIN_REQUIRED
   317  #define MAC_OS_X_VERSION_MIN_REQUIRED 1070 /* Mac OS X 10.7, 2011 */
   318  #endif
   319  #endif /* Apple OSX & iOS */
   320  
   321  #if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) ||     \
   322      defined(__BSD__) || defined(__bsdi__) || defined(__DragonFly__) ||         \
   323      defined(__APPLE__) || defined(__MACH__)
   324  #include <sys/cdefs.h>
   325  #include <sys/mount.h>
   326  #include <sys/sysctl.h>
   327  #include <sys/types.h>
   328  #if defined(__FreeBSD__) || defined(__DragonFly__)
   329  #include <vm/vm_param.h>
   330  #elif defined(__OpenBSD__) || defined(__NetBSD__)
   331  #include <uvm/uvm_param.h>
   332  #else
   333  #define SYSCTL_LEGACY_NONCONST_MIB
   334  #endif
   335  #ifndef __MACH__
   336  #include <sys/vmmeter.h>
   337  #endif
   338  #else
   339  #include <malloc.h>
   340  #if !(defined(__sun) || defined(__SVR4) || defined(__svr4__) ||                \
   341        defined(_WIN32) || defined(_WIN64))
   342  #include <mntent.h>
   343  #endif /* !Solaris */
   344  #endif /* !xBSD */
   345  
   346  #if defined(__FreeBSD__) || __has_include(<malloc_np.h>)
   347  #include <malloc_np.h>
   348  #endif
   349  
   350  #if defined(__APPLE__) || defined(__MACH__) || __has_include(<malloc/malloc.h>)
   351  #include <malloc/malloc.h>
   352  #endif /* MacOS */
   353  
   354  #if defined(__MACH__)
   355  #include <mach/host_info.h>
   356  #include <mach/mach_host.h>
   357  #include <mach/mach_port.h>
   358  #include <uuid/uuid.h>
   359  #endif
   360  
   361  #if defined(__linux__) || defined(__gnu_linux__)
   362  #include <sched.h>
   363  #include <sys/sendfile.h>
   364  #include <sys/statfs.h>
   365  #endif /* Linux */
   366  
   367  #ifndef _XOPEN_SOURCE
   368  #define _XOPEN_SOURCE 0
   369  #endif
   370  
   371  #ifndef _XOPEN_SOURCE_EXTENDED
   372  #define _XOPEN_SOURCE_EXTENDED 0
   373  #else
   374  #include <utmpx.h>
   375  #endif /* _XOPEN_SOURCE_EXTENDED */
   376  
   377  #if defined(__sun) || defined(__SVR4) || defined(__svr4__)
   378  #include <kstat.h>
   379  #include <sys/mnttab.h>
   380  /* On Solaris, it's easier to add a missing prototype rather than find a
   381   * combination of #defines that break nothing. */
   382  __extern_C key_t ftok(const char *, int);
   383  #endif /* SunOS/Solaris */
   384  
   385  #if defined(_WIN32) || defined(_WIN64) /*-------------------------------------*/
   386  
   387  #ifndef _WIN32_WINNT
   388  #define _WIN32_WINNT 0x0601 /* Windows 7 */
   389  #elif _WIN32_WINNT < 0x0500
   390  #error At least 'Windows 2000' API is required for libmdbx.
   391  #endif /* _WIN32_WINNT */
   392  #if (defined(__MINGW32__) || defined(__MINGW64__)) &&                          \
   393      !defined(__USE_MINGW_ANSI_STDIO)
   394  #define __USE_MINGW_ANSI_STDIO 1
   395  #endif /* MinGW */
   396  #ifndef WIN32_LEAN_AND_MEAN
   397  #define WIN32_LEAN_AND_MEAN
   398  #endif /* WIN32_LEAN_AND_MEAN */
   399  #include <excpt.h>
   400  #include <tlhelp32.h>
   401  #include <windows.h>
   402  #include <winnt.h>
   403  #include <winternl.h>
   404  
   405  #else /*----------------------------------------------------------------------*/
   406  
   407  #include <unistd.h>
   408  #if !defined(_POSIX_MAPPED_FILES) || _POSIX_MAPPED_FILES < 1
   409  #error "libmdbx requires the _POSIX_MAPPED_FILES feature"
   410  #endif /* _POSIX_MAPPED_FILES */
   411  
   412  #include <pthread.h>
   413  #include <semaphore.h>
   414  #include <signal.h>
   415  #include <sys/file.h>
   416  #include <sys/ipc.h>
   417  #include <sys/mman.h>
   418  #include <sys/param.h>
   419  #include <sys/stat.h>
   420  #include <sys/statvfs.h>
   421  #include <sys/uio.h>
   422  
   423  #endif /*---------------------------------------------------------------------*/
   424  
   425  #if defined(__ANDROID_API__) || defined(ANDROID)
   426  #include <android/log.h>
   427  #if __ANDROID_API__ >= 21
   428  #include <sys/sendfile.h>
   429  #endif
   430  #if defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS != MDBX_WORDBITS
   431  #error "_FILE_OFFSET_BITS != MDBX_WORDBITS" (_FILE_OFFSET_BITS != MDBX_WORDBITS)
   432  #elif defined(__FILE_OFFSET_BITS) && __FILE_OFFSET_BITS != MDBX_WORDBITS
   433  #error "__FILE_OFFSET_BITS != MDBX_WORDBITS" (__FILE_OFFSET_BITS != MDBX_WORDBITS)
   434  #endif
   435  #endif /* Android */
   436  
   437  #if defined(HAVE_SYS_STAT_H) || __has_include(<sys/stat.h>)
   438  #include <sys/stat.h>
   439  #endif
   440  #if defined(HAVE_SYS_TYPES_H) || __has_include(<sys/types.h>)
   441  #include <sys/types.h>
   442  #endif
   443  #if defined(HAVE_SYS_FILE_H) || __has_include(<sys/file.h>)
   444  #include <sys/file.h>
   445  #endif
   446  
   447  /*----------------------------------------------------------------------------*/
   448  /* Byteorder */
   449  
   450  #if defined(i386) || defined(__386) || defined(__i386) || defined(__i386__) || \
   451      defined(i486) || defined(__i486) || defined(__i486__) ||                   \
   452      defined(i586) | defined(__i586) || defined(__i586__) || defined(i686) ||   \
   453      defined(__i686) || defined(__i686__) || defined(_M_IX86) ||                \
   454      defined(_X86_) || defined(__THW_INTEL__) || defined(__I86__) ||            \
   455      defined(__INTEL__) || defined(__x86_64) || defined(__x86_64__) ||          \
   456      defined(__amd64__) || defined(__amd64) || defined(_M_X64) ||               \
   457      defined(_M_AMD64) || defined(__IA32__) || defined(__INTEL__)
   458  #ifndef __ia32__
   459  /* LY: define neutral __ia32__ for x86 and x86-64 */
   460  #define __ia32__ 1
   461  #endif /* __ia32__ */
   462  #if !defined(__amd64__) &&                                                     \
   463      (defined(__x86_64) || defined(__x86_64__) || defined(__amd64) ||           \
   464       defined(_M_X64) || defined(_M_AMD64))
   465  /* LY: define trusty __amd64__ for all AMD64/x86-64 arch */
   466  #define __amd64__ 1
   467  #endif /* __amd64__ */
   468  #endif /* all x86 */
   469  
   470  #if !defined(__BYTE_ORDER__) || !defined(__ORDER_LITTLE_ENDIAN__) ||           \
   471      !defined(__ORDER_BIG_ENDIAN__)
   472  
   473  #if defined(__GLIBC__) || defined(__GNU_LIBRARY__) ||                          \
   474      defined(__ANDROID_API__) || defined(HAVE_ENDIAN_H) || __has_include(<endian.h>)
   475  #include <endian.h>
   476  #elif defined(__APPLE__) || defined(__MACH__) || defined(__OpenBSD__) ||       \
   477      defined(HAVE_MACHINE_ENDIAN_H) || __has_include(<machine/endian.h>)
   478  #include <machine/endian.h>
   479  #elif defined(HAVE_SYS_ISA_DEFS_H) || __has_include(<sys/isa_defs.h>)
   480  #include <sys/isa_defs.h>
   481  #elif (defined(HAVE_SYS_TYPES_H) && defined(HAVE_SYS_ENDIAN_H)) ||             \
   482      (__has_include(<sys/types.h>) && __has_include(<sys/endian.h>))
   483  #include <sys/endian.h>
   484  #include <sys/types.h>
   485  #elif defined(__bsdi__) || defined(__DragonFly__) || defined(__FreeBSD__) ||   \
   486      defined(__NetBSD__) || defined(HAVE_SYS_PARAM_H) || __has_include(<sys/param.h>)
   487  #include <sys/param.h>
   488  #endif /* OS */
   489  
   490  #if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && defined(__BIG_ENDIAN)
   491  #define __ORDER_LITTLE_ENDIAN__ __LITTLE_ENDIAN
   492  #define __ORDER_BIG_ENDIAN__ __BIG_ENDIAN
   493  #define __BYTE_ORDER__ __BYTE_ORDER
   494  #elif defined(_BYTE_ORDER) && defined(_LITTLE_ENDIAN) && defined(_BIG_ENDIAN)
   495  #define __ORDER_LITTLE_ENDIAN__ _LITTLE_ENDIAN
   496  #define __ORDER_BIG_ENDIAN__ _BIG_ENDIAN
   497  #define __BYTE_ORDER__ _BYTE_ORDER
   498  #else
   499  #define __ORDER_LITTLE_ENDIAN__ 1234
   500  #define __ORDER_BIG_ENDIAN__ 4321
   501  
   502  #if defined(__LITTLE_ENDIAN__) ||                                              \
   503      (defined(_LITTLE_ENDIAN) && !defined(_BIG_ENDIAN)) ||                      \
   504      defined(__ARMEL__) || defined(__THUMBEL__) || defined(__AARCH64EL__) ||    \
   505      defined(__MIPSEL__) || defined(_MIPSEL) || defined(__MIPSEL) ||            \
   506      defined(_M_ARM) || defined(_M_ARM64) || defined(__e2k__) ||                \
   507      defined(__elbrus_4c__) || defined(__elbrus_8c__) || defined(__bfin__) ||   \
   508      defined(__BFIN__) || defined(__ia64__) || defined(_IA64) ||                \
   509      defined(__IA64__) || defined(__ia64) || defined(_M_IA64) ||                \
   510      defined(__itanium__) || defined(__ia32__) || defined(__CYGWIN__) ||        \
   511      defined(_WIN64) || defined(_WIN32) || defined(__TOS_WIN__) ||              \
   512      defined(__WINDOWS__)
   513  #define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__
   514  
   515  #elif defined(__BIG_ENDIAN__) ||                                               \
   516      (defined(_BIG_ENDIAN) && !defined(_LITTLE_ENDIAN)) ||                      \
   517      defined(__ARMEB__) || defined(__THUMBEB__) || defined(__AARCH64EB__) ||    \
   518      defined(__MIPSEB__) || defined(_MIPSEB) || defined(__MIPSEB) ||            \
   519      defined(__m68k__) || defined(M68000) || defined(__hppa__) ||               \
   520      defined(__hppa) || defined(__HPPA__) || defined(__sparc__) ||              \
   521      defined(__sparc) || defined(__370__) || defined(__THW_370__) ||            \
   522      defined(__s390__) || defined(__s390x__) || defined(__SYSC_ZARCH__)
   523  #define __BYTE_ORDER__ __ORDER_BIG_ENDIAN__
   524  
   525  #else
   526  #error __BYTE_ORDER__ should be defined.
   527  #endif /* Arch */
   528  
   529  #endif
   530  #endif /* __BYTE_ORDER__ || __ORDER_LITTLE_ENDIAN__ || __ORDER_BIG_ENDIAN__ */
   531  
   532  /*----------------------------------------------------------------------------*/
   533  /* Availability of CMOV or equivalent */
   534  
   535  #ifndef MDBX_HAVE_CMOV
   536  #if defined(__e2k__)
   537  #define MDBX_HAVE_CMOV 1
   538  #elif defined(__thumb2__) || defined(__thumb2)
   539  #define MDBX_HAVE_CMOV 1
   540  #elif defined(__thumb__) || defined(__thumb) || defined(__TARGET_ARCH_THUMB)
   541  #define MDBX_HAVE_CMOV 0
   542  #elif defined(_M_ARM) || defined(_M_ARM64) || defined(__aarch64__) ||          \
   543      defined(__aarch64) || defined(__arm__) || defined(__arm) ||                \
   544      defined(__CC_ARM)
   545  #define MDBX_HAVE_CMOV 1
   546  #elif (defined(__riscv__) || defined(__riscv64)) &&                            \
   547      (defined(__riscv_b) || defined(__riscv_bitmanip))
   548  #define MDBX_HAVE_CMOV 1
   549  #elif defined(i686) || defined(__i686) || defined(__i686__) ||                 \
   550      (defined(_M_IX86) && _M_IX86 > 600) || defined(__x86_64) ||                \
   551      defined(__x86_64__) || defined(__amd64__) || defined(__amd64) ||           \
   552      defined(_M_X64) || defined(_M_AMD64)
   553  #define MDBX_HAVE_CMOV 1
   554  #else
   555  #define MDBX_HAVE_CMOV 0
   556  #endif
   557  #endif /* MDBX_HAVE_CMOV */
   558  
   559  /*----------------------------------------------------------------------------*/
   560  /* Compiler's includes for builtins/intrinsics */
   561  
   562  #if defined(_MSC_VER) || defined(__INTEL_COMPILER)
   563  #include <intrin.h>
   564  #elif __GNUC_PREREQ(4, 4) || defined(__clang__)
   565  #if defined(__e2k__)
   566  #include <e2kintrin.h>
   567  #include <x86intrin.h>
   568  #endif /* __e2k__ */
   569  #if defined(__ia32__)
   570  #include <cpuid.h>
   571  #include <x86intrin.h>
   572  #endif /* __ia32__ */
   573  #ifdef __ARM_NEON
   574  #include <arm_neon.h>
   575  #endif
   576  #elif defined(__SUNPRO_C) || defined(__sun) || defined(sun)
   577  #include <mbarrier.h>
   578  #elif (defined(_HPUX_SOURCE) || defined(__hpux) || defined(__HP_aCC)) &&       \
   579      (defined(HP_IA64) || defined(__ia64))
   580  #include <machine/sys/inline.h>
   581  #elif defined(__IBMC__) && defined(__powerpc)
   582  #include <atomic.h>
   583  #elif defined(_AIX)
   584  #include <builtins.h>
   585  #include <sys/atomic_op.h>
   586  #elif (defined(__osf__) && defined(__DECC)) || defined(__alpha)
   587  #include <c_asm.h>
   588  #include <machine/builtins.h>
   589  #elif defined(__MWERKS__)
   590  /* CodeWarrior - troubles ? */
   591  #pragma gcc_extensions
   592  #elif defined(__SNC__)
   593  /* Sony PS3 - troubles ? */
   594  #elif defined(__hppa__) || defined(__hppa)
   595  #include <machine/inline.h>
   596  #else
   597  #error Unsupported C compiler, please use GNU C 4.4 or newer
   598  #endif /* Compiler */
   599  
   600  #if !defined(__noop) && !defined(_MSC_VER)
   601  #define __noop                                                                 \
   602    do {                                                                         \
   603    } while (0)
   604  #endif /* __noop */
   605  
   606  #if defined(__fallthrough) &&                                                  \
   607      (defined(__MINGW__) || defined(__MINGW32__) || defined(__MINGW64__))
   608  #undef __fallthrough
   609  #endif /* __fallthrough workaround for MinGW */
   610  
   611  #ifndef __fallthrough
   612  #if defined(__cplusplus) && (__has_cpp_attribute(fallthrough) &&               \
   613                               (!defined(__clang__) || __clang__ > 4)) ||        \
   614      __cplusplus >= 201703L
   615  #define __fallthrough [[fallthrough]]
   616  #elif __GNUC_PREREQ(8, 0) && defined(__cplusplus) && __cplusplus >= 201103L
   617  #define __fallthrough [[fallthrough]]
   618  #elif __GNUC_PREREQ(7, 0) &&                                                   \
   619      (!defined(__LCC__) || (__LCC__ == 124 && __LCC_MINOR__ >= 12) ||           \
   620       (__LCC__ == 125 && __LCC_MINOR__ >= 5) || (__LCC__ >= 126))
   621  #define __fallthrough __attribute__((__fallthrough__))
   622  #elif defined(__clang__) && defined(__cplusplus) && __cplusplus >= 201103L &&  \
   623      __has_feature(cxx_attributes) && __has_warning("-Wimplicit-fallthrough")
   624  #define __fallthrough [[clang::fallthrough]]
   625  #else
   626  #define __fallthrough
   627  #endif
   628  #endif /* __fallthrough */
   629  
   630  #ifndef __unreachable
   631  #if __GNUC_PREREQ(4, 5) || __has_builtin(__builtin_unreachable)
   632  #define __unreachable() __builtin_unreachable()
   633  #elif defined(_MSC_VER)
   634  #define __unreachable() __assume(0)
   635  #else
   636  #define __unreachable()                                                        \
   637    do {                                                                         \
   638    } while (1)
   639  #endif
   640  #endif /* __unreachable */
   641  
   642  #ifndef __prefetch
   643  #if defined(__GNUC__) || defined(__clang__) || __has_builtin(__builtin_prefetch)
   644  #define __prefetch(ptr) __builtin_prefetch(ptr)
   645  #else
   646  #define __prefetch(ptr)                                                        \
   647    do {                                                                         \
   648      (void)(ptr);                                                               \
   649    } while (0)
   650  #endif
   651  #endif /* __prefetch */
   652  
   653  #ifndef offsetof
   654  #define offsetof(type, member) __builtin_offsetof(type, member)
   655  #endif /* offsetof */
   656  
   657  #ifndef container_of
   658  #define container_of(ptr, type, member)                                        \
   659    ((type *)((char *)(ptr)-offsetof(type, member)))
   660  #endif /* container_of */
   661  
   662  /*----------------------------------------------------------------------------*/
   663  
   664  #ifndef __always_inline
   665  #if defined(__GNUC__) || __has_attribute(__always_inline__)
   666  #define __always_inline __inline __attribute__((__always_inline__))
   667  #elif defined(_MSC_VER)
   668  #define __always_inline __forceinline
   669  #else
   670  #define __always_inline
   671  #endif
   672  #endif /* __always_inline */
   673  
   674  #ifndef __noinline
   675  #if defined(__GNUC__) || __has_attribute(__noinline__)
   676  #define __noinline __attribute__((__noinline__))
   677  #elif defined(_MSC_VER)
   678  #define __noinline __declspec(noinline)
   679  #else
   680  #define __noinline
   681  #endif
   682  #endif /* __noinline */
   683  
   684  #ifndef __must_check_result
   685  #if defined(__GNUC__) || __has_attribute(__warn_unused_result__)
   686  #define __must_check_result __attribute__((__warn_unused_result__))
   687  #else
   688  #define __must_check_result
   689  #endif
   690  #endif /* __must_check_result */
   691  
   692  #ifndef __nothrow
   693  #if defined(__cplusplus)
   694  #if __cplusplus < 201703L
   695  #define __nothrow throw()
   696  #else
   697  #define __nothrow noexcept(true)
   698  #endif /* __cplusplus */
   699  #elif defined(__GNUC__) || __has_attribute(__nothrow__)
   700  #define __nothrow __attribute__((__nothrow__))
   701  #elif defined(_MSC_VER) && defined(__cplusplus)
   702  #define __nothrow __declspec(nothrow)
   703  #else
   704  #define __nothrow
   705  #endif
   706  #endif /* __nothrow */
   707  
   708  #ifndef __hidden
   709  #if defined(__GNUC__) || __has_attribute(__visibility__)
   710  #define __hidden __attribute__((__visibility__("hidden")))
   711  #else
   712  #define __hidden
   713  #endif
   714  #endif /* __hidden */
   715  
   716  #ifndef __optimize
   717  #if defined(__OPTIMIZE__)
   718  #if (defined(__GNUC__) && !defined(__clang__)) || __has_attribute(__optimize__)
   719  #define __optimize(ops) __attribute__((__optimize__(ops)))
   720  #else
   721  #define __optimize(ops)
   722  #endif
   723  #else
   724  #define __optimize(ops)
   725  #endif
   726  #endif /* __optimize */
   727  
   728  #ifndef __hot
   729  #if defined(__OPTIMIZE__)
   730  #if defined(__e2k__)
   731  #define __hot __attribute__((__hot__)) __optimize(3)
   732  #elif defined(__clang__) && !__has_attribute(__hot_) &&                        \
   733      __has_attribute(__section__) &&                                            \
   734      (defined(__linux__) || defined(__gnu_linux__))
   735  /* just put frequently used functions in separate section */
   736  #define __hot __attribute__((__section__("text.hot"))) __optimize("O3")
   737  #elif defined(__LCC__)
   738  #define __hot __attribute__((__hot__, __optimize__("Ofast,O4")))
   739  #elif defined(__GNUC__) || __has_attribute(__hot__)
   740  #define __hot __attribute__((__hot__)) __optimize("O3")
   741  #else
   742  #define __hot __optimize("O3")
   743  #endif
   744  #else
   745  #define __hot
   746  #endif
   747  #endif /* __hot */
   748  
   749  #ifndef __cold
   750  #if defined(__OPTIMIZE__)
   751  #if defined(__e2k__)
   752  #define __cold __attribute__((__cold__)) __optimize(1)
   753  #elif defined(__clang__) && !__has_attribute(cold) &&                          \
   754      __has_attribute(__section__) &&                                            \
   755      (defined(__linux__) || defined(__gnu_linux__))
   756  /* just put infrequently used functions in separate section */
   757  #define __cold __attribute__((__section__("text.unlikely"))) __optimize("Os")
   758  #elif defined(__LCC__)
   759  #define __hot __attribute__((__cold__, __optimize__("Osize")))
   760  #elif defined(__GNUC__) || __has_attribute(cold)
   761  #define __cold __attribute__((__cold__)) __optimize("Os")
   762  #else
   763  #define __cold __optimize("Os")
   764  #endif
   765  #else
   766  #define __cold
   767  #endif
   768  #endif /* __cold */
   769  
   770  #ifndef __flatten
   771  #if defined(__OPTIMIZE__) && (defined(__GNUC__) || __has_attribute(__flatten__))
   772  #define __flatten __attribute__((__flatten__))
   773  #else
   774  #define __flatten
   775  #endif
   776  #endif /* __flatten */
   777  
   778  #ifndef likely
   779  #if (defined(__GNUC__) || __has_builtin(__builtin_expect)) &&                  \
   780      !defined(__COVERITY__)
   781  #define likely(cond) __builtin_expect(!!(cond), 1)
   782  #else
   783  #define likely(x) (!!(x))
   784  #endif
   785  #endif /* likely */
   786  
   787  #ifndef unlikely
   788  #if (defined(__GNUC__) || __has_builtin(__builtin_expect)) &&                  \
   789      !defined(__COVERITY__)
   790  #define unlikely(cond) __builtin_expect(!!(cond), 0)
   791  #else
   792  #define unlikely(x) (!!(x))
   793  #endif
   794  #endif /* unlikely */
   795  
   796  #ifndef __anonymous_struct_extension__
   797  #if defined(__GNUC__)
   798  #define __anonymous_struct_extension__ __extension__
   799  #else
   800  #define __anonymous_struct_extension__
   801  #endif
   802  #endif /* __anonymous_struct_extension__ */
   803  
   804  #ifndef expect_with_probability
   805  #if defined(__builtin_expect_with_probability) ||                              \
   806      __has_builtin(__builtin_expect_with_probability) || __GNUC_PREREQ(9, 0)
   807  #define expect_with_probability(expr, value, prob)                             \
   808    __builtin_expect_with_probability(expr, value, prob)
   809  #else
   810  #define expect_with_probability(expr, value, prob) (expr)
   811  #endif
   812  #endif /* expect_with_probability */
   813  
   814  #ifndef MDBX_WEAK_IMPORT_ATTRIBUTE
   815  #ifdef WEAK_IMPORT_ATTRIBUTE
   816  #define MDBX_WEAK_IMPORT_ATTRIBUTE WEAK_IMPORT_ATTRIBUTE
   817  #elif __has_attribute(__weak__) && __has_attribute(__weak_import__)
   818  #define MDBX_WEAK_IMPORT_ATTRIBUTE __attribute__((__weak__, __weak_import__))
   819  #elif __has_attribute(__weak__) ||                                             \
   820      (defined(__GNUC__) && __GNUC__ >= 4 && defined(__ELF__))
   821  #define MDBX_WEAK_IMPORT_ATTRIBUTE __attribute__((__weak__))
   822  #else
   823  #define MDBX_WEAK_IMPORT_ATTRIBUTE
   824  #endif
   825  #endif /* MDBX_WEAK_IMPORT_ATTRIBUTE */
   826  
   827  /*----------------------------------------------------------------------------*/
   828  
   829  #if defined(MDBX_USE_VALGRIND)
   830  #include <valgrind/memcheck.h>
   831  #ifndef VALGRIND_DISABLE_ADDR_ERROR_REPORTING_IN_RANGE
   832  /* LY: available since Valgrind 3.10 */
   833  #define VALGRIND_DISABLE_ADDR_ERROR_REPORTING_IN_RANGE(a, s)
   834  #define VALGRIND_ENABLE_ADDR_ERROR_REPORTING_IN_RANGE(a, s)
   835  #endif
   836  #elif !defined(RUNNING_ON_VALGRIND)
   837  #define VALGRIND_CREATE_MEMPOOL(h, r, z)
   838  #define VALGRIND_DESTROY_MEMPOOL(h)
   839  #define VALGRIND_MEMPOOL_TRIM(h, a, s)
   840  #define VALGRIND_MEMPOOL_ALLOC(h, a, s)
   841  #define VALGRIND_MEMPOOL_FREE(h, a)
   842  #define VALGRIND_MEMPOOL_CHANGE(h, a, b, s)
   843  #define VALGRIND_MAKE_MEM_NOACCESS(a, s)
   844  #define VALGRIND_MAKE_MEM_DEFINED(a, s)
   845  #define VALGRIND_MAKE_MEM_UNDEFINED(a, s)
   846  #define VALGRIND_DISABLE_ADDR_ERROR_REPORTING_IN_RANGE(a, s)
   847  #define VALGRIND_ENABLE_ADDR_ERROR_REPORTING_IN_RANGE(a, s)
   848  #define VALGRIND_CHECK_MEM_IS_ADDRESSABLE(a, s) (0)
   849  #define VALGRIND_CHECK_MEM_IS_DEFINED(a, s) (0)
   850  #define RUNNING_ON_VALGRIND (0)
   851  #endif /* MDBX_USE_VALGRIND */
   852  
   853  #ifdef __SANITIZE_ADDRESS__
   854  #include <sanitizer/asan_interface.h>
   855  #elif !defined(ASAN_POISON_MEMORY_REGION)
   856  #define ASAN_POISON_MEMORY_REGION(addr, size) ((void)(addr), (void)(size))
   857  #define ASAN_UNPOISON_MEMORY_REGION(addr, size) ((void)(addr), (void)(size))
   858  #endif /* __SANITIZE_ADDRESS__ */
   859  
   860  /*----------------------------------------------------------------------------*/
   861  
   862  #ifndef ARRAY_LENGTH
   863  #ifdef __cplusplus
   864  template <typename T, size_t N> char (&__ArraySizeHelper(T (&array)[N]))[N];
   865  #define ARRAY_LENGTH(array) (sizeof(::__ArraySizeHelper(array)))
   866  #else
   867  #define ARRAY_LENGTH(array) (sizeof(array) / sizeof(array[0]))
   868  #endif
   869  #endif /* ARRAY_LENGTH */
   870  
   871  #ifndef ARRAY_END
   872  #define ARRAY_END(array) (&array[ARRAY_LENGTH(array)])
   873  #endif /* ARRAY_END */
   874  
   875  #define CONCAT(a, b) a##b
   876  #define XCONCAT(a, b) CONCAT(a, b)
   877  
   878  #define MDBX_TETRAD(a, b, c, d)                                                \
   879    ((uint32_t)(a) << 24 | (uint32_t)(b) << 16 | (uint32_t)(c) << 8 | (d))
   880  
   881  #define MDBX_STRING_TETRAD(str) MDBX_TETRAD(str[0], str[1], str[2], str[3])
   882  
   883  #define FIXME "FIXME: " __FILE__ ", " MDBX_STRINGIFY(__LINE__)
   884  
   885  #ifndef STATIC_ASSERT_MSG
   886  #if defined(static_assert)
   887  #define STATIC_ASSERT_MSG(expr, msg) static_assert(expr, msg)
   888  #elif defined(_STATIC_ASSERT)
   889  #define STATIC_ASSERT_MSG(expr, msg) _STATIC_ASSERT(expr)
   890  #elif defined(_MSC_VER)
   891  #include <crtdbg.h>
   892  #define STATIC_ASSERT_MSG(expr, msg) _STATIC_ASSERT(expr)
   893  #elif (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) ||            \
   894      __has_feature(c_static_assert)
   895  #define STATIC_ASSERT_MSG(expr, msg) _Static_assert(expr, msg)
   896  #else
   897  #define STATIC_ASSERT_MSG(expr, msg)                                           \
   898    switch (0) {                                                                 \
   899    case 0:                                                                      \
   900    case (expr):;                                                                \
   901    }
   902  #endif
   903  #endif /* STATIC_ASSERT */
   904  
   905  #ifndef STATIC_ASSERT
   906  #define STATIC_ASSERT(expr) STATIC_ASSERT_MSG(expr, #expr)
   907  #endif
   908  
   909  #ifndef __Wpedantic_format_voidptr
   910  MDBX_MAYBE_UNUSED MDBX_PURE_FUNCTION static __inline const void *
   911  __Wpedantic_format_voidptr(const void *ptr) {
   912    return ptr;
   913  }
   914  #define __Wpedantic_format_voidptr(ARG) __Wpedantic_format_voidptr(ARG)
   915  #endif /* __Wpedantic_format_voidptr */
   916  
   917  #if defined(__GNUC__) && !__GNUC_PREREQ(4, 2)
   918  /* Actually libmdbx was not tested with compilers older than GCC 4.2.
   919   * But you could ignore this warning at your own risk.
   920   * In such case please don't rise up an issues related ONLY to old compilers.
   921   */
   922  #warning "libmdbx required GCC >= 4.2"
   923  #endif
   924  
   925  #if defined(__clang__) && !__CLANG_PREREQ(3, 8)
   926  /* Actually libmdbx was not tested with CLANG older than 3.8.
   927   * But you could ignore this warning at your own risk.
   928   * In such case please don't rise up an issues related ONLY to old compilers.
   929   */
   930  #warning "libmdbx required CLANG >= 3.8"
   931  #endif
   932  
   933  #if defined(__GLIBC__) && !__GLIBC_PREREQ(2, 12)
   934  /* Actually libmdbx was not tested with something older than glibc 2.12.
   935   * But you could ignore this warning at your own risk.
   936   * In such case please don't rise up an issues related ONLY to old systems.
   937   */
   938  #warning "libmdbx was only tested with GLIBC >= 2.12."
   939  #endif
   940  
   941  #ifdef __SANITIZE_THREAD__
   942  #warning                                                                       \
   943      "libmdbx don't compatible with ThreadSanitizer, you will get a lot of false-positive issues."
   944  #endif /* __SANITIZE_THREAD__ */
   945  
   946  #if __has_warning("-Wnested-anon-types")
   947  #if defined(__clang__)
   948  #pragma clang diagnostic ignored "-Wnested-anon-types"
   949  #elif defined(__GNUC__)
   950  #pragma GCC diagnostic ignored "-Wnested-anon-types"
   951  #else
   952  #pragma warning disable "nested-anon-types"
   953  #endif
   954  #endif /* -Wnested-anon-types */
   955  
   956  #if __has_warning("-Wconstant-logical-operand")
   957  #if defined(__clang__)
   958  #pragma clang diagnostic ignored "-Wconstant-logical-operand"
   959  #elif defined(__GNUC__)
   960  #pragma GCC diagnostic ignored "-Wconstant-logical-operand"
   961  #else
   962  #pragma warning disable "constant-logical-operand"
   963  #endif
   964  #endif /* -Wconstant-logical-operand */
   965  
   966  #if defined(__LCC__) && (__LCC__ <= 121)
   967  /* bug #2798 */
   968  #pragma diag_suppress alignment_reduction_ignored
   969  #elif defined(__ICC)
   970  #pragma warning(disable : 3453 1366)
   971  #elif __has_warning("-Walignment-reduction-ignored")
   972  #if defined(__clang__)
   973  #pragma clang diagnostic ignored "-Walignment-reduction-ignored"
   974  #elif defined(__GNUC__)
   975  #pragma GCC diagnostic ignored "-Walignment-reduction-ignored"
   976  #else
   977  #pragma warning disable "alignment-reduction-ignored"
   978  #endif
   979  #endif /* -Walignment-reduction-ignored */
   980  
   981  #ifndef MDBX_EXCLUDE_FOR_GPROF
   982  #ifdef ENABLE_GPROF
   983  #define MDBX_EXCLUDE_FOR_GPROF                                                 \
   984    __attribute__((__no_instrument_function__,                                   \
   985                   __no_profile_instrument_function__))
   986  #else
   987  #define MDBX_EXCLUDE_FOR_GPROF
   988  #endif /* ENABLE_GPROF */
   989  #endif /* MDBX_EXCLUDE_FOR_GPROF */
   990  
   991  #ifdef __cplusplus
   992  extern "C" {
   993  #endif
   994  
   995  /* https://en.wikipedia.org/wiki/Operating_system_abstraction_layer */
   996  
   997  /*
   998   * Copyright 2015-2022 Leonid Yuriev <leo@yuriev.ru>
   999   * and other libmdbx authors: please see AUTHORS file.
  1000   * All rights reserved.
  1001   *
  1002   * Redistribution and use in source and binary forms, with or without
  1003   * modification, are permitted only as authorized by the OpenLDAP
  1004   * Public License.
  1005   *
  1006   * A copy of this license is available in the file LICENSE in the
  1007   * top-level directory of the distribution or, alternatively, at
  1008   * <http://www.OpenLDAP.org/license.html>.
  1009   */
  1010  
  1011  
  1012  /*----------------------------------------------------------------------------*/
  1013  /* C11 Atomics */
  1014  
  1015  #if defined(__cplusplus) && !defined(__STDC_NO_ATOMICS__) && __has_include(<cstdatomic>)
  1016  #include <cstdatomic>
  1017  #define MDBX_HAVE_C11ATOMICS
  1018  #elif !defined(__cplusplus) &&                                                 \
  1019      (__STDC_VERSION__ >= 201112L || __has_extension(c_atomic)) &&              \
  1020      !defined(__STDC_NO_ATOMICS__) &&                                           \
  1021      (__GNUC_PREREQ(4, 9) || __CLANG_PREREQ(3, 8) ||                            \
  1022       !(defined(__GNUC__) || defined(__clang__)))
  1023  #include <stdatomic.h>
  1024  #define MDBX_HAVE_C11ATOMICS
  1025  #elif defined(__GNUC__) || defined(__clang__)
  1026  #elif defined(_MSC_VER)
  1027  #pragma warning(disable : 4163) /* 'xyz': not available as an intrinsic */
  1028  #pragma warning(disable : 4133) /* 'function': incompatible types - from       \
  1029                                     'size_t' to 'LONGLONG' */
  1030  #pragma warning(disable : 4244) /* 'return': conversion from 'LONGLONG' to     \
  1031                                     'std::size_t', possible loss of data */
  1032  #pragma warning(disable : 4267) /* 'function': conversion from 'size_t' to     \
  1033                                     'long', possible loss of data */
  1034  #pragma intrinsic(_InterlockedExchangeAdd, _InterlockedCompareExchange)
  1035  #pragma intrinsic(_InterlockedExchangeAdd64, _InterlockedCompareExchange64)
  1036  #elif defined(__APPLE__)
  1037  #include <libkern/OSAtomic.h>
  1038  #else
  1039  #error FIXME atomic-ops
  1040  #endif
  1041  
  1042  /*----------------------------------------------------------------------------*/
  1043  /* Memory/Compiler barriers, cache coherence */
  1044  
  1045  #if __has_include(<sys/cachectl.h>)
  1046  #include <sys/cachectl.h>
  1047  #elif defined(__mips) || defined(__mips__) || defined(__mips64) ||             \
  1048      defined(__mips64__) || defined(_M_MRX000) || defined(_MIPS_) ||            \
  1049      defined(__MWERKS__) || defined(__sgi)
  1050  /* MIPS should have explicit cache control */
  1051  #include <sys/cachectl.h>
  1052  #endif
  1053  
  1054  MDBX_MAYBE_UNUSED static __inline void osal_compiler_barrier(void) {
  1055  #if defined(__clang__) || defined(__GNUC__)
  1056    __asm__ __volatile__("" ::: "memory");
  1057  #elif defined(_MSC_VER)
  1058    _ReadWriteBarrier();
  1059  #elif defined(__INTEL_COMPILER) /* LY: Intel Compiler may mimic GCC and MSC */
  1060    __memory_barrier();
  1061  #elif defined(__SUNPRO_C) || defined(__sun) || defined(sun)
  1062    __compiler_barrier();
  1063  #elif (defined(_HPUX_SOURCE) || defined(__hpux) || defined(__HP_aCC)) &&       \
  1064      (defined(HP_IA64) || defined(__ia64))
  1065    _Asm_sched_fence(/* LY: no-arg meaning 'all expect ALU', e.g. 0x3D3D */);
  1066  #elif defined(_AIX) || defined(__ppc__) || defined(__powerpc__) ||             \
  1067      defined(__ppc64__) || defined(__powerpc64__)
  1068    __fence();
  1069  #else
  1070  #error "Could not guess the kind of compiler, please report to us."
  1071  #endif
  1072  }
  1073  
  1074  MDBX_MAYBE_UNUSED static __inline void osal_memory_barrier(void) {
  1075  #ifdef MDBX_HAVE_C11ATOMICS
  1076    atomic_thread_fence(memory_order_seq_cst);
  1077  #elif defined(__ATOMIC_SEQ_CST)
  1078  #ifdef __clang__
  1079    __c11_atomic_thread_fence(__ATOMIC_SEQ_CST);
  1080  #else
  1081    __atomic_thread_fence(__ATOMIC_SEQ_CST);
  1082  #endif
  1083  #elif defined(__clang__) || defined(__GNUC__)
  1084    __sync_synchronize();
  1085  #elif defined(_WIN32) || defined(_WIN64)
  1086    MemoryBarrier();
  1087  #elif defined(__INTEL_COMPILER) /* LY: Intel Compiler may mimic GCC and MSC */
  1088  #if defined(__ia32__)
  1089    _mm_mfence();
  1090  #else
  1091    __mf();
  1092  #endif
  1093  #elif defined(__SUNPRO_C) || defined(__sun) || defined(sun)
  1094    __machine_rw_barrier();
  1095  #elif (defined(_HPUX_SOURCE) || defined(__hpux) || defined(__HP_aCC)) &&       \
  1096      (defined(HP_IA64) || defined(__ia64))
  1097    _Asm_mf();
  1098  #elif defined(_AIX) || defined(__ppc__) || defined(__powerpc__) ||             \
  1099      defined(__ppc64__) || defined(__powerpc64__)
  1100    __lwsync();
  1101  #else
  1102  #error "Could not guess the kind of compiler, please report to us."
  1103  #endif
  1104  }
  1105  
  1106  /*----------------------------------------------------------------------------*/
  1107  /* system-depended definitions */
  1108  
  1109  #if defined(_WIN32) || defined(_WIN64)
  1110  #define HAVE_SYS_STAT_H
  1111  #define HAVE_SYS_TYPES_H
  1112  typedef HANDLE osal_thread_t;
  1113  typedef unsigned osal_thread_key_t;
  1114  #define MAP_FAILED NULL
  1115  #define HIGH_DWORD(v) ((DWORD)((sizeof(v) > 4) ? ((uint64_t)(v) >> 32) : 0))
  1116  #define THREAD_CALL WINAPI
  1117  #define THREAD_RESULT DWORD
  1118  typedef struct {
  1119    HANDLE mutex;
  1120    HANDLE event[2];
  1121  } osal_condpair_t;
  1122  typedef CRITICAL_SECTION osal_fastmutex_t;
  1123  
  1124  #if !defined(_MSC_VER) && !defined(__try)
  1125  #define __try
  1126  #define __except(COND) if (false)
  1127  #endif /* stub for MSVC's __try/__except */
  1128  
  1129  #if MDBX_WITHOUT_MSVC_CRT
  1130  
  1131  #ifndef osal_malloc
  1132  static inline void *osal_malloc(size_t bytes) {
  1133    return HeapAlloc(GetProcessHeap(), 0, bytes);
  1134  }
  1135  #endif /* osal_malloc */
  1136  
  1137  #ifndef osal_calloc
  1138  static inline void *osal_calloc(size_t nelem, size_t size) {
  1139    return HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, nelem * size);
  1140  }
  1141  #endif /* osal_calloc */
  1142  
  1143  #ifndef osal_realloc
  1144  static inline void *osal_realloc(void *ptr, size_t bytes) {
  1145    return ptr ? HeapReAlloc(GetProcessHeap(), 0, ptr, bytes)
  1146               : HeapAlloc(GetProcessHeap(), 0, bytes);
  1147  }
  1148  #endif /* osal_realloc */
  1149  
  1150  #ifndef osal_free
  1151  static inline void osal_free(void *ptr) { HeapFree(GetProcessHeap(), 0, ptr); }
  1152  #endif /* osal_free */
  1153  
  1154  #else /* MDBX_WITHOUT_MSVC_CRT */
  1155  
  1156  #define osal_malloc malloc
  1157  #define osal_calloc calloc
  1158  #define osal_realloc realloc
  1159  #define osal_free free
  1160  #define osal_strdup _strdup
  1161  
  1162  #endif /* MDBX_WITHOUT_MSVC_CRT */
  1163  
  1164  #ifndef snprintf
  1165  #define snprintf _snprintf /* ntdll */
  1166  #endif
  1167  
  1168  #ifndef vsnprintf
  1169  #define vsnprintf _vsnprintf /* ntdll */
  1170  #endif
  1171  
  1172  MDBX_INTERNAL_FUNC size_t osal_mb2w(wchar_t *dst, size_t dst_n, const char *src,
  1173                                      size_t src_n);
  1174  
  1175  #else /*----------------------------------------------------------------------*/
  1176  
  1177  typedef pthread_t osal_thread_t;
  1178  typedef pthread_key_t osal_thread_key_t;
  1179  #define INVALID_HANDLE_VALUE (-1)
  1180  #define THREAD_CALL
  1181  #define THREAD_RESULT void *
  1182  typedef struct {
  1183    pthread_mutex_t mutex;
  1184    pthread_cond_t cond[2];
  1185  } osal_condpair_t;
  1186  typedef pthread_mutex_t osal_fastmutex_t;
  1187  #define osal_malloc malloc
  1188  #define osal_calloc calloc
  1189  #define osal_realloc realloc
  1190  #define osal_free free
  1191  #define osal_strdup strdup
  1192  #endif /* Platform */
  1193  
  1194  #if __GLIBC_PREREQ(2, 12) || defined(__FreeBSD__) || defined(malloc_usable_size)
  1195  /* malloc_usable_size() already provided */
  1196  #elif defined(__APPLE__)
  1197  #define malloc_usable_size(ptr) malloc_size(ptr)
  1198  #elif defined(_MSC_VER) && !MDBX_WITHOUT_MSVC_CRT
  1199  #define malloc_usable_size(ptr) _msize(ptr)
  1200  #endif /* malloc_usable_size */
  1201  
  1202  /*----------------------------------------------------------------------------*/
  1203  /* OS abstraction layer stuff */
  1204  
  1205  /* Get the size of a memory page for the system.
  1206   * This is the basic size that the platform's memory manager uses, and is
  1207   * fundamental to the use of memory-mapped files. */
  1208  MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static __inline size_t
  1209  osal_syspagesize(void) {
  1210  #if defined(_WIN32) || defined(_WIN64)
  1211    SYSTEM_INFO si;
  1212    GetSystemInfo(&si);
  1213    return si.dwPageSize;
  1214  #else
  1215    return sysconf(_SC_PAGE_SIZE);
  1216  #endif
  1217  }
  1218  
  1219  #if defined(_WIN32) || defined(_WIN64)
  1220  typedef wchar_t pathchar_t;
  1221  #else
  1222  typedef char pathchar_t;
  1223  #endif
  1224  
  1225  typedef struct osal_mmap_param {
  1226    union {
  1227      void *address;
  1228      uint8_t *dxb;
  1229      struct MDBX_lockinfo *lck;
  1230    };
  1231    mdbx_filehandle_t fd;
  1232    size_t limit;   /* mapping length, but NOT a size of file nor DB */
  1233    size_t current; /* mapped region size, i.e. the size of file and DB */
  1234    uint64_t filesize /* in-process cache of a file size */;
  1235  #if defined(_WIN32) || defined(_WIN64)
  1236    HANDLE section; /* memory-mapped section handle */
  1237  #endif
  1238  } osal_mmap_t;
  1239  
  1240  typedef union bin128 {
  1241    __anonymous_struct_extension__ struct { uint64_t x, y; };
  1242    __anonymous_struct_extension__ struct { uint32_t a, b, c, d; };
  1243  } bin128_t;
  1244  
  1245  #if defined(_WIN32) || defined(_WIN64)
  1246  typedef union osal_srwlock {
  1247    __anonymous_struct_extension__ struct {
  1248      long volatile readerCount;
  1249      long volatile writerCount;
  1250    };
  1251    RTL_SRWLOCK native;
  1252  } osal_srwlock_t;
  1253  #endif /* Windows */
  1254  
  1255  #ifndef __cplusplus
  1256  
  1257  /*----------------------------------------------------------------------------*/
  1258  /* libc compatibility stuff */
  1259  
  1260  #if (!defined(__GLIBC__) && __GLIBC_PREREQ(2, 1)) &&                           \
  1261      (defined(_GNU_SOURCE) || defined(_BSD_SOURCE))
  1262  #define osal_asprintf asprintf
  1263  #define osal_vasprintf vasprintf
  1264  #else
  1265  MDBX_MAYBE_UNUSED MDBX_INTERNAL_FUNC
  1266      MDBX_PRINTF_ARGS(2, 3) int osal_asprintf(char **strp, const char *fmt, ...);
  1267  MDBX_INTERNAL_FUNC int osal_vasprintf(char **strp, const char *fmt, va_list ap);
  1268  #endif
  1269  
  1270  #if !defined(MADV_DODUMP) && defined(MADV_CORE)
  1271  #define MADV_DODUMP MADV_CORE
  1272  #endif /* MADV_CORE -> MADV_DODUMP */
  1273  
  1274  #if !defined(MADV_DONTDUMP) && defined(MADV_NOCORE)
  1275  #define MADV_DONTDUMP MADV_NOCORE
  1276  #endif /* MADV_NOCORE -> MADV_DONTDUMP */
  1277  
  1278  MDBX_MAYBE_UNUSED MDBX_INTERNAL_FUNC void osal_jitter(bool tiny);
  1279  MDBX_MAYBE_UNUSED static __inline void jitter4testing(bool tiny);
  1280  
  1281  /* max bytes to write in one call */
  1282  #if defined(_WIN32) || defined(_WIN64)
  1283  #define MAX_WRITE UINT32_C(0x01000000)
  1284  #else
  1285  #define MAX_WRITE UINT32_C(0x3fff0000)
  1286  #endif
  1287  
  1288  #if defined(__linux__) || defined(__gnu_linux__)
  1289  MDBX_INTERNAL_VAR uint32_t linux_kernel_version;
  1290  MDBX_INTERNAL_VAR bool mdbx_RunningOnWSL1 /* Windows Subsystem 1 for Linux */;
  1291  #endif /* Linux */
  1292  
  1293  #ifndef osal_strdup
  1294  LIBMDBX_API char *osal_strdup(const char *str);
  1295  #endif
  1296  
  1297  MDBX_MAYBE_UNUSED static __inline int osal_get_errno(void) {
  1298  #if defined(_WIN32) || defined(_WIN64)
  1299    DWORD rc = GetLastError();
  1300  #else
  1301    int rc = errno;
  1302  #endif
  1303    return rc;
  1304  }
  1305  
  1306  #ifndef osal_memalign_alloc
  1307  MDBX_INTERNAL_FUNC int osal_memalign_alloc(size_t alignment, size_t bytes,
  1308                                             void **result);
  1309  #endif
  1310  #ifndef osal_memalign_free
  1311  MDBX_INTERNAL_FUNC void osal_memalign_free(void *ptr);
  1312  #endif
  1313  
  1314  MDBX_INTERNAL_FUNC int osal_condpair_init(osal_condpair_t *condpair);
  1315  MDBX_INTERNAL_FUNC int osal_condpair_lock(osal_condpair_t *condpair);
  1316  MDBX_INTERNAL_FUNC int osal_condpair_unlock(osal_condpair_t *condpair);
  1317  MDBX_INTERNAL_FUNC int osal_condpair_signal(osal_condpair_t *condpair,
  1318                                              bool part);
  1319  MDBX_INTERNAL_FUNC int osal_condpair_wait(osal_condpair_t *condpair, bool part);
  1320  MDBX_INTERNAL_FUNC int osal_condpair_destroy(osal_condpair_t *condpair);
  1321  
  1322  MDBX_INTERNAL_FUNC int osal_fastmutex_init(osal_fastmutex_t *fastmutex);
  1323  MDBX_INTERNAL_FUNC int osal_fastmutex_acquire(osal_fastmutex_t *fastmutex);
  1324  MDBX_INTERNAL_FUNC int osal_fastmutex_release(osal_fastmutex_t *fastmutex);
  1325  MDBX_INTERNAL_FUNC int osal_fastmutex_destroy(osal_fastmutex_t *fastmutex);
  1326  
  1327  MDBX_INTERNAL_FUNC int osal_pwritev(mdbx_filehandle_t fd, struct iovec *iov,
  1328                                      int iovcnt, uint64_t offset,
  1329                                      size_t expected_written);
  1330  MDBX_INTERNAL_FUNC int osal_pread(mdbx_filehandle_t fd, void *buf, size_t count,
  1331                                    uint64_t offset);
  1332  MDBX_INTERNAL_FUNC int osal_pwrite(mdbx_filehandle_t fd, const void *buf,
  1333                                     size_t count, uint64_t offset);
  1334  MDBX_INTERNAL_FUNC int osal_write(mdbx_filehandle_t fd, const void *buf,
  1335                                    size_t count);
  1336  
  1337  MDBX_INTERNAL_FUNC int
  1338  osal_thread_create(osal_thread_t *thread,
  1339                     THREAD_RESULT(THREAD_CALL *start_routine)(void *),
  1340                     void *arg);
  1341  MDBX_INTERNAL_FUNC int osal_thread_join(osal_thread_t thread);
  1342  
  1343  enum osal_syncmode_bits {
  1344    MDBX_SYNC_NONE = 0,
  1345    MDBX_SYNC_DATA = 1,
  1346    MDBX_SYNC_SIZE = 2,
  1347    MDBX_SYNC_IODQ = 4
  1348  };
  1349  
  1350  MDBX_INTERNAL_FUNC int osal_fsync(mdbx_filehandle_t fd,
  1351                                    const enum osal_syncmode_bits mode_bits);
  1352  MDBX_INTERNAL_FUNC int osal_ftruncate(mdbx_filehandle_t fd, uint64_t length);
  1353  MDBX_INTERNAL_FUNC int osal_fseek(mdbx_filehandle_t fd, uint64_t pos);
  1354  MDBX_INTERNAL_FUNC int osal_filesize(mdbx_filehandle_t fd, uint64_t *length);
  1355  
  1356  enum osal_openfile_purpose {
  1357    MDBX_OPEN_DXB_READ = 0,
  1358    MDBX_OPEN_DXB_LAZY = 1,
  1359    MDBX_OPEN_DXB_DSYNC = 2,
  1360    MDBX_OPEN_LCK = 3,
  1361    MDBX_OPEN_COPY = 4,
  1362    MDBX_OPEN_DELETE = 5
  1363  };
  1364  
  1365  MDBX_INTERNAL_FUNC int osal_openfile(const enum osal_openfile_purpose purpose,
  1366                                       const MDBX_env *env,
  1367                                       const pathchar_t *pathname,
  1368                                       mdbx_filehandle_t *fd,
  1369                                       mdbx_mode_t unix_mode_bits);
  1370  MDBX_INTERNAL_FUNC int osal_closefile(mdbx_filehandle_t fd);
  1371  MDBX_INTERNAL_FUNC int osal_removefile(const pathchar_t *pathname);
  1372  MDBX_INTERNAL_FUNC int osal_removedirectory(const pathchar_t *pathname);
  1373  MDBX_INTERNAL_FUNC int osal_is_pipe(mdbx_filehandle_t fd);
  1374  MDBX_INTERNAL_FUNC int osal_lockfile(mdbx_filehandle_t fd, bool wait);
  1375  
  1376  #define MMAP_OPTION_TRUNCATE 1
  1377  #define MMAP_OPTION_SEMAPHORE 2
  1378  MDBX_INTERNAL_FUNC int osal_mmap(const int flags, osal_mmap_t *map,
  1379                                   const size_t must, const size_t limit,
  1380                                   const unsigned options);
  1381  MDBX_INTERNAL_FUNC int osal_munmap(osal_mmap_t *map);
  1382  #define MDBX_MRESIZE_MAY_MOVE 0x00000100
  1383  #define MDBX_MRESIZE_MAY_UNMAP 0x00000200
  1384  MDBX_INTERNAL_FUNC int osal_mresize(const int flags, osal_mmap_t *map,
  1385                                      size_t size, size_t limit);
  1386  #if defined(_WIN32) || defined(_WIN64)
  1387  typedef struct {
  1388    unsigned limit, count;
  1389    HANDLE handles[31];
  1390  } mdbx_handle_array_t;
  1391  MDBX_INTERNAL_FUNC int
  1392  osal_suspend_threads_before_remap(MDBX_env *env, mdbx_handle_array_t **array);
  1393  MDBX_INTERNAL_FUNC int
  1394  osal_resume_threads_after_remap(mdbx_handle_array_t *array);
  1395  #endif /* Windows */
  1396  MDBX_INTERNAL_FUNC int osal_msync(osal_mmap_t *map, size_t offset,
  1397                                    size_t length,
  1398                                    enum osal_syncmode_bits mode_bits);
  1399  MDBX_INTERNAL_FUNC int osal_check_fs_rdonly(mdbx_filehandle_t handle,
  1400                                              const pathchar_t *pathname,
  1401                                              int err);
  1402  
  1403  MDBX_MAYBE_UNUSED static __inline uint32_t osal_getpid(void) {
  1404    STATIC_ASSERT(sizeof(mdbx_pid_t) <= sizeof(uint32_t));
  1405  #if defined(_WIN32) || defined(_WIN64)
  1406    return GetCurrentProcessId();
  1407  #else
  1408    STATIC_ASSERT(sizeof(pid_t) <= sizeof(uint32_t));
  1409    return getpid();
  1410  #endif
  1411  }
  1412  
  1413  MDBX_MAYBE_UNUSED static __inline uintptr_t osal_thread_self(void) {
  1414    mdbx_tid_t thunk;
  1415    STATIC_ASSERT(sizeof(uintptr_t) >= sizeof(thunk));
  1416  #if defined(_WIN32) || defined(_WIN64)
  1417    thunk = GetCurrentThreadId();
  1418  #else
  1419    thunk = pthread_self();
  1420  #endif
  1421    return (uintptr_t)thunk;
  1422  }
  1423  
  1424  #if !defined(_WIN32) && !defined(_WIN64)
  1425  #if defined(__ANDROID_API__) || defined(ANDROID) || defined(BIONIC)
  1426  MDBX_INTERNAL_FUNC int osal_check_tid4bionic(void);
  1427  #else
  1428  static __inline int osal_check_tid4bionic(void) { return 0; }
  1429  #endif /* __ANDROID_API__ || ANDROID) || BIONIC */
  1430  
  1431  MDBX_MAYBE_UNUSED static __inline int
  1432  osal_pthread_mutex_lock(pthread_mutex_t *mutex) {
  1433    int err = osal_check_tid4bionic();
  1434    return unlikely(err) ? err : pthread_mutex_lock(mutex);
  1435  }
  1436  #endif /* !Windows */
  1437  
  1438  MDBX_INTERNAL_FUNC uint64_t osal_monotime(void);
  1439  MDBX_INTERNAL_FUNC uint64_t osal_16dot16_to_monotime(uint32_t seconds_16dot16);
  1440  MDBX_INTERNAL_FUNC uint32_t osal_monotime_to_16dot16(uint64_t monotime);
  1441  
  1442  MDBX_INTERNAL_FUNC bin128_t osal_bootid(void);
  1443  /*----------------------------------------------------------------------------*/
  1444  /* lck stuff */
  1445  
  1446  /// \brief Initialization of synchronization primitives linked with MDBX_env
  1447  ///   instance both in LCK-file and within the current process.
  1448  /// \param
  1449  ///   global_uniqueness_flag = true - denotes that there are no other processes
  1450  ///     working with DB and LCK-file. Thus the function MUST initialize
  1451  ///     shared synchronization objects in memory-mapped LCK-file.
  1452  ///   global_uniqueness_flag = false - denotes that at least one process is
  1453  ///     already working with DB and LCK-file, including the case when DB
  1454  ///     has already been opened in the current process. Thus the function
  1455  ///     MUST NOT initialize shared synchronization objects in memory-mapped
  1456  ///     LCK-file that are already in use.
  1457  /// \return Error code or zero on success.
  1458  MDBX_INTERNAL_FUNC int osal_lck_init(MDBX_env *env,
  1459                                       MDBX_env *inprocess_neighbor,
  1460                                       int global_uniqueness_flag);
  1461  
  1462  /// \brief Disconnects from shared interprocess objects and destructs
  1463  ///   synchronization objects linked with MDBX_env instance
  1464  ///   within the current process.
  1465  /// \param
  1466  ///   inprocess_neighbor = NULL - if the current process does not have other
  1467  ///     instances of MDBX_env linked with the DB being closed.
  1468  ///     Thus the function MUST check for other processes working with DB or
  1469  ///     LCK-file, and keep or destroy shared synchronization objects in
  1470  ///     memory-mapped LCK-file depending on the result.
  1471  ///   inprocess_neighbor = not-NULL - pointer to another instance of MDBX_env
  1472  ///     (anyone of there is several) working with DB or LCK-file within the
  1473  ///     current process. Thus the function MUST NOT try to acquire exclusive
  1474  ///     lock and/or try to destruct shared synchronization objects linked with
  1475  ///     DB or LCK-file. Moreover, the implementation MUST ensure correct work
  1476  ///     of other instances of MDBX_env within the current process, e.g.
  1477  ///     restore POSIX-fcntl locks after the closing of file descriptors.
  1478  /// \return Error code (MDBX_PANIC) or zero on success.
  1479  MDBX_INTERNAL_FUNC int osal_lck_destroy(MDBX_env *env,
  1480                                          MDBX_env *inprocess_neighbor);
  1481  
  1482  /// \brief Connects to shared interprocess locking objects and tries to acquire
  1483  ///   the maximum lock level (shared if exclusive is not available)
  1484  ///   Depending on implementation or/and platform (Windows) this function may
  1485  ///   acquire the non-OS super-level lock (e.g. for shared synchronization
  1486  ///   objects initialization), which will be downgraded to OS-exclusive or
  1487  ///   shared via explicit calling of osal_lck_downgrade().
  1488  /// \return
  1489  ///   MDBX_RESULT_TRUE (-1) - if an exclusive lock was acquired and thus
  1490  ///     the current process is the first and only after the last use of DB.
  1491  ///   MDBX_RESULT_FALSE (0) - if a shared lock was acquired and thus
  1492  ///     DB has already been opened and now is used by other processes.
  1493  ///   Otherwise (not 0 and not -1) - error code.
  1494  MDBX_INTERNAL_FUNC int osal_lck_seize(MDBX_env *env);
  1495  
  1496  /// \brief Downgrades the level of initially acquired lock to
  1497  ///   operational level specified by argument. The reson for such downgrade:
  1498  ///    - unblocking of other processes that are waiting for access, i.e.
  1499  ///      if (env->me_flags & MDBX_EXCLUSIVE) != 0, then other processes
  1500  ///      should be made aware that access is unavailable rather than
  1501  ///      wait for it.
  1502  ///    - freeing locks that interfere file operation (especially for Windows)
  1503  ///   (env->me_flags & MDBX_EXCLUSIVE) == 0 - downgrade to shared lock.
  1504  ///   (env->me_flags & MDBX_EXCLUSIVE) != 0 - downgrade to exclusive
  1505  ///   operational lock.
  1506  /// \return Error code or zero on success
  1507  MDBX_INTERNAL_FUNC int osal_lck_downgrade(MDBX_env *env);
  1508  
  1509  /// \brief Locks LCK-file or/and table of readers for (de)registering.
  1510  /// \return Error code or zero on success
  1511  MDBX_INTERNAL_FUNC int osal_rdt_lock(MDBX_env *env);
  1512  
  1513  /// \brief Unlocks LCK-file or/and table of readers after (de)registering.
  1514  MDBX_INTERNAL_FUNC void osal_rdt_unlock(MDBX_env *env);
  1515  
  1516  /// \brief Acquires lock for DB change (on writing transaction start)
  1517  ///   Reading transactions will not be blocked.
  1518  ///   Declared as LIBMDBX_API because it is used in mdbx_chk.
  1519  /// \return Error code or zero on success
  1520  LIBMDBX_API int mdbx_txn_lock(MDBX_env *env, bool dont_wait);
  1521  
  1522  /// \brief Releases lock once DB changes is made (after writing transaction
  1523  ///   has finished).
  1524  ///   Declared as LIBMDBX_API because it is used in mdbx_chk.
  1525  LIBMDBX_API void mdbx_txn_unlock(MDBX_env *env);
  1526  
  1527  /// \brief Sets alive-flag of reader presence (indicative lock) for PID of
  1528  ///   the current process. The function does no more than needed for
  1529  ///   the correct working of osal_rpid_check() in other processes.
  1530  /// \return Error code or zero on success
  1531  MDBX_INTERNAL_FUNC int osal_rpid_set(MDBX_env *env);
  1532  
  1533  /// \brief Resets alive-flag of reader presence (indicative lock)
  1534  ///   for PID of the current process. The function does no more than needed
  1535  ///   for the correct working of osal_rpid_check() in other processes.
  1536  /// \return Error code or zero on success
  1537  MDBX_INTERNAL_FUNC int osal_rpid_clear(MDBX_env *env);
  1538  
  1539  /// \brief Checks for reading process status with the given pid with help of
  1540  ///   alive-flag of presence (indicative lock) or using another way.
  1541  /// \return
  1542  ///   MDBX_RESULT_TRUE (-1) - if the reader process with the given PID is alive
  1543  ///     and working with DB (indicative lock is present).
  1544  ///   MDBX_RESULT_FALSE (0) - if the reader process with the given PID is absent
  1545  ///     or not working with DB (indicative lock is not present).
  1546  ///   Otherwise (not 0 and not -1) - error code.
  1547  MDBX_INTERNAL_FUNC int osal_rpid_check(MDBX_env *env, uint32_t pid);
  1548  
  1549  #if defined(_WIN32) || defined(_WIN64)
  1550  
  1551  #define OSAL_MB2WIDE(FROM, TO)                                                 \
  1552    do {                                                                         \
  1553      const char *const from_tmp = (FROM);                                       \
  1554      const size_t from_mblen = strlen(from_tmp);                                \
  1555      const size_t to_wlen = osal_mb2w(nullptr, 0, from_tmp, from_mblen);        \
  1556      if (to_wlen < 1 || to_wlen > /* MAX_PATH */ INT16_MAX)                     \
  1557        return ERROR_INVALID_NAME;                                               \
  1558      wchar_t *const to_tmp = _alloca((to_wlen + 1) * sizeof(wchar_t));          \
  1559      if (to_wlen + 1 !=                                                         \
  1560          osal_mb2w(to_tmp, to_wlen + 1, from_tmp, from_mblen + 1))              \
  1561        return ERROR_INVALID_NAME;                                               \
  1562      (TO) = to_tmp;                                                             \
  1563    } while (0)
  1564  
  1565  typedef void(WINAPI *osal_srwlock_t_function)(osal_srwlock_t *);
  1566  MDBX_INTERNAL_VAR osal_srwlock_t_function osal_srwlock_Init,
  1567      osal_srwlock_AcquireShared, osal_srwlock_ReleaseShared,
  1568      osal_srwlock_AcquireExclusive, osal_srwlock_ReleaseExclusive;
  1569  
  1570  #if _WIN32_WINNT < 0x0600 /* prior to Windows Vista */
  1571  typedef enum _FILE_INFO_BY_HANDLE_CLASS {
  1572    FileBasicInfo,
  1573    FileStandardInfo,
  1574    FileNameInfo,
  1575    FileRenameInfo,
  1576    FileDispositionInfo,
  1577    FileAllocationInfo,
  1578    FileEndOfFileInfo,
  1579    FileStreamInfo,
  1580    FileCompressionInfo,
  1581    FileAttributeTagInfo,
  1582    FileIdBothDirectoryInfo,
  1583    FileIdBothDirectoryRestartInfo,
  1584    FileIoPriorityHintInfo,
  1585    FileRemoteProtocolInfo,
  1586    MaximumFileInfoByHandleClass
  1587  } FILE_INFO_BY_HANDLE_CLASS,
  1588      *PFILE_INFO_BY_HANDLE_CLASS;
  1589  
  1590  typedef struct _FILE_END_OF_FILE_INFO {
  1591    LARGE_INTEGER EndOfFile;
  1592  } FILE_END_OF_FILE_INFO, *PFILE_END_OF_FILE_INFO;
  1593  
  1594  #define REMOTE_PROTOCOL_INFO_FLAG_LOOPBACK 0x00000001
  1595  #define REMOTE_PROTOCOL_INFO_FLAG_OFFLINE 0x00000002
  1596  
  1597  typedef struct _FILE_REMOTE_PROTOCOL_INFO {
  1598    USHORT StructureVersion;
  1599    USHORT StructureSize;
  1600    DWORD Protocol;
  1601    USHORT ProtocolMajorVersion;
  1602    USHORT ProtocolMinorVersion;
  1603    USHORT ProtocolRevision;
  1604    USHORT Reserved;
  1605    DWORD Flags;
  1606    struct {
  1607      DWORD Reserved[8];
  1608    } GenericReserved;
  1609    struct {
  1610      DWORD Reserved[16];
  1611    } ProtocolSpecificReserved;
  1612  } FILE_REMOTE_PROTOCOL_INFO, *PFILE_REMOTE_PROTOCOL_INFO;
  1613  
  1614  #endif /* _WIN32_WINNT < 0x0600 (prior to Windows Vista) */
  1615  
  1616  typedef BOOL(WINAPI *MDBX_GetFileInformationByHandleEx)(
  1617      _In_ HANDLE hFile, _In_ FILE_INFO_BY_HANDLE_CLASS FileInformationClass,
  1618      _Out_ LPVOID lpFileInformation, _In_ DWORD dwBufferSize);
  1619  MDBX_INTERNAL_VAR MDBX_GetFileInformationByHandleEx
  1620      mdbx_GetFileInformationByHandleEx;
  1621  
  1622  typedef BOOL(WINAPI *MDBX_GetVolumeInformationByHandleW)(
  1623      _In_ HANDLE hFile, _Out_opt_ LPWSTR lpVolumeNameBuffer,
  1624      _In_ DWORD nVolumeNameSize, _Out_opt_ LPDWORD lpVolumeSerialNumber,
  1625      _Out_opt_ LPDWORD lpMaximumComponentLength,
  1626      _Out_opt_ LPDWORD lpFileSystemFlags,
  1627      _Out_opt_ LPWSTR lpFileSystemNameBuffer, _In_ DWORD nFileSystemNameSize);
  1628  MDBX_INTERNAL_VAR MDBX_GetVolumeInformationByHandleW
  1629      mdbx_GetVolumeInformationByHandleW;
  1630  
  1631  typedef DWORD(WINAPI *MDBX_GetFinalPathNameByHandleW)(_In_ HANDLE hFile,
  1632                                                        _Out_ LPWSTR lpszFilePath,
  1633                                                        _In_ DWORD cchFilePath,
  1634                                                        _In_ DWORD dwFlags);
  1635  MDBX_INTERNAL_VAR MDBX_GetFinalPathNameByHandleW mdbx_GetFinalPathNameByHandleW;
  1636  
  1637  typedef BOOL(WINAPI *MDBX_SetFileInformationByHandle)(
  1638      _In_ HANDLE hFile, _In_ FILE_INFO_BY_HANDLE_CLASS FileInformationClass,
  1639      _Out_ LPVOID lpFileInformation, _In_ DWORD dwBufferSize);
  1640  MDBX_INTERNAL_VAR MDBX_SetFileInformationByHandle
  1641      mdbx_SetFileInformationByHandle;
  1642  
  1643  typedef NTSTATUS(NTAPI *MDBX_NtFsControlFile)(
  1644      IN HANDLE FileHandle, IN OUT HANDLE Event,
  1645      IN OUT PVOID /* PIO_APC_ROUTINE */ ApcRoutine, IN OUT PVOID ApcContext,
  1646      OUT PIO_STATUS_BLOCK IoStatusBlock, IN ULONG FsControlCode,
  1647      IN OUT PVOID InputBuffer, IN ULONG InputBufferLength,
  1648      OUT OPTIONAL PVOID OutputBuffer, IN ULONG OutputBufferLength);
  1649  MDBX_INTERNAL_VAR MDBX_NtFsControlFile mdbx_NtFsControlFile;
  1650  
  1651  typedef uint64_t(WINAPI *MDBX_GetTickCount64)(void);
  1652  MDBX_INTERNAL_VAR MDBX_GetTickCount64 mdbx_GetTickCount64;
  1653  
  1654  #if !defined(_WIN32_WINNT_WIN8) || _WIN32_WINNT < _WIN32_WINNT_WIN8
  1655  typedef struct _WIN32_MEMORY_RANGE_ENTRY {
  1656    PVOID VirtualAddress;
  1657    SIZE_T NumberOfBytes;
  1658  } WIN32_MEMORY_RANGE_ENTRY, *PWIN32_MEMORY_RANGE_ENTRY;
  1659  #endif /* Windows 8.x */
  1660  
  1661  typedef BOOL(WINAPI *MDBX_PrefetchVirtualMemory)(
  1662      HANDLE hProcess, ULONG_PTR NumberOfEntries,
  1663      PWIN32_MEMORY_RANGE_ENTRY VirtualAddresses, ULONG Flags);
  1664  MDBX_INTERNAL_VAR MDBX_PrefetchVirtualMemory mdbx_PrefetchVirtualMemory;
  1665  
  1666  typedef enum _SECTION_INHERIT { ViewShare = 1, ViewUnmap = 2 } SECTION_INHERIT;
  1667  
  1668  typedef NTSTATUS(NTAPI *MDBX_NtExtendSection)(IN HANDLE SectionHandle,
  1669                                                IN PLARGE_INTEGER NewSectionSize);
  1670  MDBX_INTERNAL_VAR MDBX_NtExtendSection mdbx_NtExtendSection;
  1671  
  1672  static __inline bool mdbx_RunningUnderWine(void) {
  1673    return !mdbx_NtExtendSection;
  1674  }
  1675  
  1676  typedef LSTATUS(WINAPI *MDBX_RegGetValueA)(HKEY hkey, LPCSTR lpSubKey,
  1677                                             LPCSTR lpValue, DWORD dwFlags,
  1678                                             LPDWORD pdwType, PVOID pvData,
  1679                                             LPDWORD pcbData);
  1680  MDBX_INTERNAL_VAR MDBX_RegGetValueA mdbx_RegGetValueA;
  1681  
  1682  NTSYSAPI ULONG RtlRandomEx(PULONG Seed);
  1683  
  1684  #endif /* Windows */
  1685  
  1686  #endif /* !__cplusplus */
  1687  
  1688  /*----------------------------------------------------------------------------*/
  1689  
  1690  #if defined(_MSC_VER) && _MSC_VER >= 1900
  1691  /* LY: MSVC 2015/2017/2019 has buggy/inconsistent PRIuPTR/PRIxPTR macros
  1692   * for internal format-args checker. */
  1693  #undef PRIuPTR
  1694  #undef PRIiPTR
  1695  #undef PRIdPTR
  1696  #undef PRIxPTR
  1697  #define PRIuPTR "Iu"
  1698  #define PRIiPTR "Ii"
  1699  #define PRIdPTR "Id"
  1700  #define PRIxPTR "Ix"
  1701  #define PRIuSIZE "zu"
  1702  #define PRIiSIZE "zi"
  1703  #define PRIdSIZE "zd"
  1704  #define PRIxSIZE "zx"
  1705  #endif /* fix PRI*PTR for _MSC_VER */
  1706  
  1707  #ifndef PRIuSIZE
  1708  #define PRIuSIZE PRIuPTR
  1709  #define PRIiSIZE PRIiPTR
  1710  #define PRIdSIZE PRIdPTR
  1711  #define PRIxSIZE PRIxPTR
  1712  #endif /* PRI*SIZE macros for MSVC */
  1713  
  1714  #ifdef _MSC_VER
  1715  #pragma warning(pop)
  1716  #endif
  1717  
  1718  #define mdbx_sourcery_anchor XCONCAT(mdbx_sourcery_, MDBX_BUILD_SOURCERY)
  1719  #if defined(xMDBX_TOOLS)
  1720  extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
  1721  #endif
  1722  
  1723  /*******************************************************************************
  1724   *******************************************************************************
  1725   *******************************************************************************
  1726   *
  1727   *
  1728   *         ####   #####    #####     #     ####   #    #   ####
  1729   *        #    #  #    #     #       #    #    #  ##   #  #
  1730   *        #    #  #    #     #       #    #    #  # #  #   ####
  1731   *        #    #  #####      #       #    #    #  #  # #       #
  1732   *        #    #  #          #       #    #    #  #   ##  #    #
  1733   *         ####   #          #       #     ####   #    #   ####
  1734   *
  1735   *
  1736   */
  1737  
  1738  /** \defgroup build_option Build options
  1739   * The libmdbx build options.
  1740   @{ */
  1741  
  1742  /** Using fcntl(F_FULLFSYNC) with 5-10 times slowdown */
  1743  #define MDBX_OSX_WANNA_DURABILITY 0
  1744  /** Using fsync() with chance of data lost on power failure */
  1745  #define MDBX_OSX_WANNA_SPEED 1
  1746  
  1747  #ifndef MDBX_OSX_SPEED_INSTEADOF_DURABILITY
  1748  /** Choices \ref MDBX_OSX_WANNA_DURABILITY or \ref MDBX_OSX_WANNA_SPEED
  1749   * for OSX & iOS */
  1750  #define MDBX_OSX_SPEED_INSTEADOF_DURABILITY MDBX_OSX_WANNA_DURABILITY
  1751  #endif /* MDBX_OSX_SPEED_INSTEADOF_DURABILITY */
  1752  
  1753  /** Controls checking PID against reuse DB environment after the fork() */
  1754  #ifndef MDBX_ENV_CHECKPID
  1755  #if defined(MADV_DONTFORK) || defined(_WIN32) || defined(_WIN64)
  1756  /* PID check could be omitted:
  1757   *  - on Linux when madvise(MADV_DONTFORK) is available, i.e. after the fork()
  1758   *    mapped pages will not be available for child process.
  1759   *  - in Windows where fork() not available. */
  1760  #define MDBX_ENV_CHECKPID 0
  1761  #else
  1762  #define MDBX_ENV_CHECKPID 1
  1763  #endif
  1764  #define MDBX_ENV_CHECKPID_CONFIG "AUTO=" MDBX_STRINGIFY(MDBX_ENV_CHECKPID)
  1765  #else
  1766  #define MDBX_ENV_CHECKPID_CONFIG MDBX_STRINGIFY(MDBX_ENV_CHECKPID)
  1767  #endif /* MDBX_ENV_CHECKPID */
  1768  
  1769  /** Controls checking transaction owner thread against misuse transactions from
  1770   * other threads. */
  1771  #ifndef MDBX_TXN_CHECKOWNER
  1772  #define MDBX_TXN_CHECKOWNER 1
  1773  #define MDBX_TXN_CHECKOWNER_CONFIG "AUTO=" MDBX_STRINGIFY(MDBX_TXN_CHECKOWNER)
  1774  #else
  1775  #define MDBX_TXN_CHECKOWNER_CONFIG MDBX_STRINGIFY(MDBX_TXN_CHECKOWNER)
  1776  #endif /* MDBX_TXN_CHECKOWNER */
  1777  
  1778  /** Does a system have battery-backed Real-Time Clock or just a fake. */
  1779  #ifndef MDBX_TRUST_RTC
  1780  #if defined(__linux__) || defined(__gnu_linux__) || defined(__NetBSD__) ||     \
  1781      defined(__OpenBSD__)
  1782  #define MDBX_TRUST_RTC 0 /* a lot of embedded systems have a fake RTC */
  1783  #else
  1784  #define MDBX_TRUST_RTC 1
  1785  #endif
  1786  #define MDBX_TRUST_RTC_CONFIG "AUTO=" MDBX_STRINGIFY(MDBX_TRUST_RTC)
  1787  #else
  1788  #define MDBX_TRUST_RTC_CONFIG MDBX_STRINGIFY(MDBX_TRUST_RTC)
  1789  #endif /* MDBX_TRUST_RTC */
  1790  
  1791  /** Controls online database auto-compactification during write-transactions. */
  1792  #ifndef MDBX_ENABLE_REFUND
  1793  #define MDBX_ENABLE_REFUND 1
  1794  #elif !(MDBX_ENABLE_REFUND == 0 || MDBX_ENABLE_REFUND == 1)
  1795  #error MDBX_ENABLE_REFUND must be defined as 0 or 1
  1796  #endif /* MDBX_ENABLE_REFUND */
  1797  
  1798  /** Controls gathering statistics for page operations. */
  1799  #ifndef MDBX_ENABLE_PGOP_STAT
  1800  #define MDBX_ENABLE_PGOP_STAT 1
  1801  #elif !(MDBX_ENABLE_PGOP_STAT == 0 || MDBX_ENABLE_PGOP_STAT == 1)
  1802  #error MDBX_ENABLE_PGOP_STAT must be defined as 0 or 1
  1803  #endif /* MDBX_ENABLE_PGOP_STAT */
  1804  
  1805  /** Enables chunking long list of retired pages during huge transactions commit
  1806   * to avoid use sequences of pages. */
  1807  #ifndef MDBX_ENABLE_BIGFOOT
  1808  #if MDBX_WORDBITS >= 64 || defined(DOXYGEN)
  1809  #define MDBX_ENABLE_BIGFOOT 1
  1810  #else
  1811  #define MDBX_ENABLE_BIGFOOT 0
  1812  #endif
  1813  #elif !(MDBX_ENABLE_BIGFOOT == 0 || MDBX_ENABLE_BIGFOOT == 1)
  1814  #error MDBX_ENABLE_BIGFOOT must be defined as 0 or 1
  1815  #endif /* MDBX_ENABLE_BIGFOOT */
  1816  
  1817  /** Controls use of POSIX madvise() hints and friends. */
  1818  #ifndef MDBX_ENABLE_MADVISE
  1819  #define MDBX_ENABLE_MADVISE 1
  1820  #elif !(MDBX_ENABLE_MADVISE == 0 || MDBX_ENABLE_MADVISE == 1)
  1821  #error MDBX_ENABLE_MADVISE must be defined as 0 or 1
  1822  #endif /* MDBX_ENABLE_MADVISE */
  1823  
  1824  /** Disable some checks to reduce an overhead and detection probability of
  1825   * database corruption to a values closer to the LMDB. */
  1826  #ifndef MDBX_DISABLE_VALIDATION
  1827  #define MDBX_DISABLE_VALIDATION 0
  1828  #elif !(MDBX_DISABLE_VALIDATION == 0 || MDBX_DISABLE_VALIDATION == 1)
  1829  #error MDBX_DISABLE_VALIDATION must be defined as 0 or 1
  1830  #endif /* MDBX_DISABLE_VALIDATION */
  1831  
  1832  #ifndef MDBX_PNL_PREALLOC_FOR_RADIXSORT
  1833  #define MDBX_PNL_PREALLOC_FOR_RADIXSORT 1
  1834  #elif !(MDBX_PNL_PREALLOC_FOR_RADIXSORT == 0 ||                                \
  1835          MDBX_PNL_PREALLOC_FOR_RADIXSORT == 1)
  1836  #error MDBX_PNL_PREALLOC_FOR_RADIXSORT must be defined as 0 or 1
  1837  #endif /* MDBX_PNL_PREALLOC_FOR_RADIXSORT */
  1838  
  1839  #ifndef MDBX_DPL_PREALLOC_FOR_RADIXSORT
  1840  #define MDBX_DPL_PREALLOC_FOR_RADIXSORT 1
  1841  #elif !(MDBX_DPL_PREALLOC_FOR_RADIXSORT == 0 ||                                \
  1842          MDBX_DPL_PREALLOC_FOR_RADIXSORT == 1)
  1843  #error MDBX_DPL_PREALLOC_FOR_RADIXSORT must be defined as 0 or 1
  1844  #endif /* MDBX_DPL_PREALLOC_FOR_RADIXSORT */
  1845  
  1846  /** Basically, this build-option is for TODO. Guess it should be replaced
  1847   * with MDBX_ENABLE_WRITEMAP_SPILLING with the three variants:
  1848   *  0/OFF = Don't track dirty pages at all and don't spilling ones.
  1849   *          This should be by-default on Linux and may-be other systems
  1850   *          (not sure: Darwin/OSX, FreeBSD, Windows 10) where kernel provides
  1851   *          properly LRU tracking and async writing on-demand.
  1852   *  1/ON  = Lite tracking of dirty pages but with LRU labels and explicit
  1853   *          spilling with msync(MS_ASYNC). */
  1854  #ifndef MDBX_FAKE_SPILL_WRITEMAP
  1855  #if defined(__linux__) || defined(__gnu_linux__)
  1856  #define MDBX_FAKE_SPILL_WRITEMAP 1 /* msync(MS_ASYNC) is no-op on Linux */
  1857  #else
  1858  #define MDBX_FAKE_SPILL_WRITEMAP 0
  1859  #endif
  1860  #elif !(MDBX_FAKE_SPILL_WRITEMAP == 0 || MDBX_FAKE_SPILL_WRITEMAP == 1)
  1861  #error MDBX_FAKE_SPILL_WRITEMAP must be defined as 0 or 1
  1862  #endif /* MDBX_FAKE_SPILL_WRITEMAP */
  1863  
  1864  /** Controls sort order of internal page number lists.
  1865   * This mostly experimental/advanced option with not for regular MDBX users.
  1866   * \warning The database format depend on this option and libmdbx builded with
  1867   * different option value are incompatible. */
  1868  #ifndef MDBX_PNL_ASCENDING
  1869  #define MDBX_PNL_ASCENDING 0
  1870  #elif !(MDBX_PNL_ASCENDING == 0 || MDBX_PNL_ASCENDING == 1)
  1871  #error MDBX_PNL_ASCENDING must be defined as 0 or 1
  1872  #endif /* MDBX_PNL_ASCENDING */
  1873  
  1874  /** Avoid dependence from MSVC CRT and use ntdll.dll instead. */
  1875  #ifndef MDBX_WITHOUT_MSVC_CRT
  1876  #define MDBX_WITHOUT_MSVC_CRT 1
  1877  #elif !(MDBX_WITHOUT_MSVC_CRT == 0 || MDBX_WITHOUT_MSVC_CRT == 1)
  1878  #error MDBX_WITHOUT_MSVC_CRT must be defined as 0 or 1
  1879  #endif /* MDBX_WITHOUT_MSVC_CRT */
  1880  
  1881  /** Size of buffer used during copying a environment/database file. */
  1882  #ifndef MDBX_ENVCOPY_WRITEBUF
  1883  #define MDBX_ENVCOPY_WRITEBUF 1048576u
  1884  #elif MDBX_ENVCOPY_WRITEBUF < 65536u || MDBX_ENVCOPY_WRITEBUF > 1073741824u || \
  1885      MDBX_ENVCOPY_WRITEBUF % 65536u
  1886  #error MDBX_ENVCOPY_WRITEBUF must be defined in range 65536..1073741824 and be multiple of 65536
  1887  #endif /* MDBX_ENVCOPY_WRITEBUF */
  1888  
  1889  /** Forces assertion checking */
  1890  #ifndef MDBX_FORCE_ASSERTIONS
  1891  #define MDBX_FORCE_ASSERTIONS 0
  1892  #elif !(MDBX_FORCE_ASSERTIONS == 0 || MDBX_FORCE_ASSERTIONS == 1)
  1893  #error MDBX_FORCE_ASSERTIONS must be defined as 0 or 1
  1894  #endif /* MDBX_FORCE_ASSERTIONS */
  1895  
  1896  /** Presumed malloc size overhead for each allocation
  1897   * to adjust allocations to be more aligned. */
  1898  #ifndef MDBX_ASSUME_MALLOC_OVERHEAD
  1899  #ifdef __SIZEOF_POINTER__
  1900  #define MDBX_ASSUME_MALLOC_OVERHEAD (__SIZEOF_POINTER__ * 2u)
  1901  #else
  1902  #define MDBX_ASSUME_MALLOC_OVERHEAD (sizeof(void *) * 2u)
  1903  #endif
  1904  #elif MDBX_ASSUME_MALLOC_OVERHEAD < 0 || MDBX_ASSUME_MALLOC_OVERHEAD > 64 ||   \
  1905      MDBX_ASSUME_MALLOC_OVERHEAD % 4
  1906  #error MDBX_ASSUME_MALLOC_OVERHEAD must be defined in range 0..64 and be multiple of 4
  1907  #endif /* MDBX_ASSUME_MALLOC_OVERHEAD */
  1908  
  1909  /** If defined then enables integration with Valgrind,
  1910   * a memory analyzing tool. */
  1911  #ifndef MDBX_USE_VALGRIND
  1912  #endif /* MDBX_USE_VALGRIND */
  1913  
  1914  /** If defined then enables use C11 atomics,
  1915   *  otherwise detects ones availability automatically. */
  1916  #ifndef MDBX_HAVE_C11ATOMICS
  1917  #endif /* MDBX_HAVE_C11ATOMICS */
  1918  
  1919  //------------------------------------------------------------------------------
  1920  
  1921  /** Win32 File Locking API for \ref MDBX_LOCKING */
  1922  #define MDBX_LOCKING_WIN32FILES -1
  1923  
  1924  /** SystemV IPC semaphores for \ref MDBX_LOCKING */
  1925  #define MDBX_LOCKING_SYSV 5
  1926  
  1927  /** POSIX-1 Shared anonymous semaphores for \ref MDBX_LOCKING */
  1928  #define MDBX_LOCKING_POSIX1988 1988
  1929  
  1930  /** POSIX-2001 Shared Mutexes for \ref MDBX_LOCKING */
  1931  #define MDBX_LOCKING_POSIX2001 2001
  1932  
  1933  /** POSIX-2008 Robust Mutexes for \ref MDBX_LOCKING */
  1934  #define MDBX_LOCKING_POSIX2008 2008
  1935  
  1936  /** BeOS Benaphores, aka Futexes for \ref MDBX_LOCKING */
  1937  #define MDBX_LOCKING_BENAPHORE 1995
  1938  
  1939  /** Advanced: Choices the locking implementation (autodetection by default). */
  1940  #if defined(_WIN32) || defined(_WIN64)
  1941  #define MDBX_LOCKING MDBX_LOCKING_WIN32FILES
  1942  #else
  1943  #ifndef MDBX_LOCKING
  1944  #if defined(_POSIX_THREAD_PROCESS_SHARED) &&                                   \
  1945      _POSIX_THREAD_PROCESS_SHARED >= 200112L && !defined(__FreeBSD__)
  1946  
  1947  /* Some platforms define the EOWNERDEAD error code even though they
  1948   * don't support Robust Mutexes. If doubt compile with -MDBX_LOCKING=2001. */
  1949  #if defined(EOWNERDEAD) && _POSIX_THREAD_PROCESS_SHARED >= 200809L &&          \
  1950      ((defined(_POSIX_THREAD_ROBUST_PRIO_INHERIT) &&                            \
  1951        _POSIX_THREAD_ROBUST_PRIO_INHERIT > 0) ||                                \
  1952       (defined(_POSIX_THREAD_ROBUST_PRIO_PROTECT) &&                            \
  1953        _POSIX_THREAD_ROBUST_PRIO_PROTECT > 0) ||                                \
  1954       defined(PTHREAD_MUTEX_ROBUST) || defined(PTHREAD_MUTEX_ROBUST_NP)) &&     \
  1955      (!defined(__GLIBC__) ||                                                    \
  1956       __GLIBC_PREREQ(2, 10) /* troubles with Robust mutexes before 2.10 */)
  1957  #define MDBX_LOCKING MDBX_LOCKING_POSIX2008
  1958  #else
  1959  #define MDBX_LOCKING MDBX_LOCKING_POSIX2001
  1960  #endif
  1961  #elif defined(__sun) || defined(__SVR4) || defined(__svr4__)
  1962  #define MDBX_LOCKING MDBX_LOCKING_POSIX1988
  1963  #else
  1964  #define MDBX_LOCKING MDBX_LOCKING_SYSV
  1965  #endif
  1966  #define MDBX_LOCKING_CONFIG "AUTO=" MDBX_STRINGIFY(MDBX_LOCKING)
  1967  #else
  1968  #define MDBX_LOCKING_CONFIG MDBX_STRINGIFY(MDBX_LOCKING)
  1969  #endif /* MDBX_LOCKING */
  1970  #endif /* !Windows */
  1971  
  1972  /** Advanced: Using POSIX OFD-locks (autodetection by default). */
  1973  #ifndef MDBX_USE_OFDLOCKS
  1974  #if defined(F_OFD_SETLK) && defined(F_OFD_SETLKW) && defined(F_OFD_GETLK) &&   \
  1975      !defined(MDBX_SAFE4QEMU) &&                                                \
  1976      !defined(__sun) /* OFD-lock are broken on Solaris */
  1977  #define MDBX_USE_OFDLOCKS 1
  1978  #else
  1979  #define MDBX_USE_OFDLOCKS 0
  1980  #endif
  1981  #define MDBX_USE_OFDLOCKS_CONFIG "AUTO=" MDBX_STRINGIFY(MDBX_USE_OFDLOCKS)
  1982  #else
  1983  #define MDBX_USE_OFDLOCKS_CONFIG MDBX_STRINGIFY(MDBX_USE_OFDLOCKS)
  1984  #endif /* MDBX_USE_OFDLOCKS */
  1985  
  1986  /** Advanced: Using sendfile() syscall (autodetection by default). */
  1987  #ifndef MDBX_USE_SENDFILE
  1988  #if ((defined(__linux__) || defined(__gnu_linux__)) &&                         \
  1989       !defined(__ANDROID_API__)) ||                                             \
  1990      (defined(__ANDROID_API__) && __ANDROID_API__ >= 21)
  1991  #define MDBX_USE_SENDFILE 1
  1992  #else
  1993  #define MDBX_USE_SENDFILE 0
  1994  #endif
  1995  #endif /* MDBX_USE_SENDFILE */
  1996  
  1997  /** Advanced: Using copy_file_range() syscall (autodetection by default). */
  1998  #ifndef MDBX_USE_COPYFILERANGE
  1999  #if __GLIBC_PREREQ(2, 27) && defined(_GNU_SOURCE)
  2000  #define MDBX_USE_COPYFILERANGE 1
  2001  #else
  2002  #define MDBX_USE_COPYFILERANGE 0
  2003  #endif
  2004  #endif /* MDBX_USE_COPYFILERANGE */
  2005  
  2006  /** Advanced: Using sync_file_range() syscall (autodetection by default). */
  2007  #ifndef MDBX_USE_SYNCFILERANGE
  2008  #if ((defined(__linux__) || defined(__gnu_linux__)) &&                         \
  2009       defined(SYNC_FILE_RANGE_WRITE) && !defined(__ANDROID_API__)) ||           \
  2010      (defined(__ANDROID_API__) && __ANDROID_API__ >= 26)
  2011  #define MDBX_USE_SYNCFILERANGE 1
  2012  #else
  2013  #define MDBX_USE_SYNCFILERANGE 0
  2014  #endif
  2015  #endif /* MDBX_USE_SYNCFILERANGE */
  2016  
  2017  //------------------------------------------------------------------------------
  2018  
  2019  #ifndef MDBX_CPU_WRITEBACK_INCOHERENT
  2020  #if defined(__ia32__) || defined(__e2k__) || defined(__hppa) ||                \
  2021      defined(__hppa__) || defined(DOXYGEN)
  2022  #define MDBX_CPU_WRITEBACK_INCOHERENT 0
  2023  #else
  2024  #define MDBX_CPU_WRITEBACK_INCOHERENT 1
  2025  #endif
  2026  #endif /* MDBX_CPU_WRITEBACK_INCOHERENT */
  2027  
  2028  #ifndef MDBX_MMAP_INCOHERENT_FILE_WRITE
  2029  #ifdef __OpenBSD__
  2030  #define MDBX_MMAP_INCOHERENT_FILE_WRITE 1
  2031  #else
  2032  #define MDBX_MMAP_INCOHERENT_FILE_WRITE 0
  2033  #endif
  2034  #endif /* MDBX_MMAP_INCOHERENT_FILE_WRITE */
  2035  
  2036  #ifndef MDBX_MMAP_INCOHERENT_CPU_CACHE
  2037  #if defined(__mips) || defined(__mips__) || defined(__mips64) ||               \
  2038      defined(__mips64__) || defined(_M_MRX000) || defined(_MIPS_) ||            \
  2039      defined(__MWERKS__) || defined(__sgi)
  2040  /* MIPS has cache coherency issues. */
  2041  #define MDBX_MMAP_INCOHERENT_CPU_CACHE 1
  2042  #else
  2043  /* LY: assume no relevant mmap/dcache issues. */
  2044  #define MDBX_MMAP_INCOHERENT_CPU_CACHE 0
  2045  #endif
  2046  #endif /* MDBX_MMAP_INCOHERENT_CPU_CACHE */
  2047  
  2048  #ifndef MDBX_64BIT_ATOMIC
  2049  #if MDBX_WORDBITS >= 64 || defined(DOXYGEN)
  2050  #define MDBX_64BIT_ATOMIC 1
  2051  #else
  2052  #define MDBX_64BIT_ATOMIC 0
  2053  #endif
  2054  #define MDBX_64BIT_ATOMIC_CONFIG "AUTO=" MDBX_STRINGIFY(MDBX_64BIT_ATOMIC)
  2055  #else
  2056  #define MDBX_64BIT_ATOMIC_CONFIG MDBX_STRINGIFY(MDBX_64BIT_ATOMIC)
  2057  #endif /* MDBX_64BIT_ATOMIC */
  2058  
  2059  #ifndef MDBX_64BIT_CAS
  2060  #if defined(ATOMIC_LLONG_LOCK_FREE)
  2061  #if ATOMIC_LLONG_LOCK_FREE > 1
  2062  #define MDBX_64BIT_CAS 1
  2063  #else
  2064  #define MDBX_64BIT_CAS 0
  2065  #endif
  2066  #elif defined(__GCC_ATOMIC_LLONG_LOCK_FREE)
  2067  #if __GCC_ATOMIC_LLONG_LOCK_FREE > 1
  2068  #define MDBX_64BIT_CAS 1
  2069  #else
  2070  #define MDBX_64BIT_CAS 0
  2071  #endif
  2072  #elif defined(__CLANG_ATOMIC_LLONG_LOCK_FREE)
  2073  #if __CLANG_ATOMIC_LLONG_LOCK_FREE > 1
  2074  #define MDBX_64BIT_CAS 1
  2075  #else
  2076  #define MDBX_64BIT_CAS 0
  2077  #endif
  2078  #elif defined(_MSC_VER) || defined(__APPLE__) || defined(DOXYGEN)
  2079  #define MDBX_64BIT_CAS 1
  2080  #else
  2081  #define MDBX_64BIT_CAS MDBX_64BIT_ATOMIC
  2082  #endif
  2083  #define MDBX_64BIT_CAS_CONFIG "AUTO=" MDBX_STRINGIFY(MDBX_64BIT_CAS)
  2084  #else
  2085  #define MDBX_64BIT_CAS_CONFIG MDBX_STRINGIFY(MDBX_64BIT_CAS)
  2086  #endif /* MDBX_64BIT_CAS */
  2087  
  2088  #ifndef MDBX_UNALIGNED_OK
  2089  #if defined(__ALIGNED__) || defined(__SANITIZE_UNDEFINED__) ||                 \
  2090      defined(ENABLE_UBSAN)
  2091  #define MDBX_UNALIGNED_OK 0 /* no unaligned access allowed */
  2092  #elif defined(__ARM_FEATURE_UNALIGNED)
  2093  #define MDBX_UNALIGNED_OK 4 /* ok unaligned for 32-bit words */
  2094  #elif defined(__e2k__) || defined(__elbrus__)
  2095  #if __iset__ > 4
  2096  #define MDBX_UNALIGNED_OK 8 /* ok unaligned for 64-bit words */
  2097  #else
  2098  #define MDBX_UNALIGNED_OK 4 /* ok unaligned for 32-bit words */
  2099  #endif
  2100  #elif defined(__ia32__)
  2101  #define MDBX_UNALIGNED_OK 8 /* ok unaligned for 64-bit words */
  2102  #elif __CLANG_PREREQ(5, 0) || __GNUC_PREREQ(5, 0)
  2103  /* expecting an optimization will well done, also this
  2104   * hushes false-positives from UBSAN (undefined behaviour sanitizer) */
  2105  #define MDBX_UNALIGNED_OK 0
  2106  #else
  2107  #define MDBX_UNALIGNED_OK 0 /* no unaligned access allowed */
  2108  #endif
  2109  #elif MDBX_UNALIGNED_OK == 1
  2110  #undef MDBX_UNALIGNED_OK
  2111  #define MDBX_UNALIGNED_OK 32 /* any unaligned access allowed */
  2112  #endif                       /* MDBX_UNALIGNED_OK */
  2113  
  2114  #ifndef MDBX_CACHELINE_SIZE
  2115  #if defined(SYSTEM_CACHE_ALIGNMENT_SIZE)
  2116  #define MDBX_CACHELINE_SIZE SYSTEM_CACHE_ALIGNMENT_SIZE
  2117  #elif defined(__ia64__) || defined(__ia64) || defined(_M_IA64)
  2118  #define MDBX_CACHELINE_SIZE 128
  2119  #else
  2120  #define MDBX_CACHELINE_SIZE 64
  2121  #endif
  2122  #endif /* MDBX_CACHELINE_SIZE */
  2123  
  2124  /** @} end of build options */
  2125  /*******************************************************************************
  2126   *******************************************************************************
  2127   ******************************************************************************/
  2128  
  2129  #ifndef DOXYGEN
  2130  
  2131  /* In case the MDBX_DEBUG is undefined set it corresponding to NDEBUG */
  2132  #ifndef MDBX_DEBUG
  2133  #ifdef NDEBUG
  2134  #define MDBX_DEBUG 0
  2135  #else
  2136  #define MDBX_DEBUG 1
  2137  #endif
  2138  #endif /* MDBX_DEBUG */
  2139  
  2140  #else
  2141  
  2142  /* !!! Actually this is a fake definitions for Doxygen !!! */
  2143  
  2144  /** Controls enabling of debugging features.
  2145   *
  2146   *  - `MDBX_DEBUG = 0` (by default) Disables any debugging features at all,
  2147   *                     including logging and assertion controls.
  2148   *                     Logging level and corresponding debug flags changing
  2149   *                     by \ref mdbx_setup_debug() will not have effect.
  2150   *  - `MDBX_DEBUG > 0` Enables code for the debugging features (logging,
  2151   *                     assertions checking and internal audit).
  2152   *                     Simultaneously sets the default logging level
  2153   *                     to the `MDBX_DEBUG` value.
  2154   *                     Also enables \ref MDBX_DBG_AUDIT if `MDBX_DEBUG >= 2`.
  2155   *
  2156   * \ingroup build_option */
  2157  #define MDBX_DEBUG 0...7
  2158  
  2159  /** Disables using of GNU libc extensions. */
  2160  #define MDBX_DISABLE_GNU_SOURCE 0 or 1
  2161  
  2162  #endif /* DOXYGEN */
  2163  
  2164  /* Undefine the NDEBUG if debugging is enforced by MDBX_DEBUG */
  2165  #if MDBX_DEBUG
  2166  #undef NDEBUG
  2167  #endif
  2168  
  2169  /*----------------------------------------------------------------------------*/
  2170  /* Atomics */
  2171  
  2172  enum MDBX_memory_order {
  2173    mo_Relaxed,
  2174    mo_AcquireRelease
  2175    /* , mo_SequentialConsistency */
  2176  };
  2177  
  2178  typedef union {
  2179    volatile uint32_t weak;
  2180  #ifdef MDBX_HAVE_C11ATOMICS
  2181    volatile _Atomic uint32_t c11a;
  2182  #endif /* MDBX_HAVE_C11ATOMICS */
  2183  } MDBX_atomic_uint32_t;
  2184  
  2185  typedef union {
  2186    volatile uint64_t weak;
  2187  #if defined(MDBX_HAVE_C11ATOMICS) && (MDBX_64BIT_CAS || MDBX_64BIT_ATOMIC)
  2188    volatile _Atomic uint64_t c11a;
  2189  #endif
  2190  #if !defined(MDBX_HAVE_C11ATOMICS) || !MDBX_64BIT_CAS || !MDBX_64BIT_ATOMIC
  2191    __anonymous_struct_extension__ struct {
  2192  #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
  2193      MDBX_atomic_uint32_t low, high;
  2194  #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
  2195      MDBX_atomic_uint32_t high, low;
  2196  #else
  2197  #error "FIXME: Unsupported byte order"
  2198  #endif /* __BYTE_ORDER__ */
  2199    };
  2200  #endif
  2201  } MDBX_atomic_uint64_t;
  2202  
  2203  #ifdef MDBX_HAVE_C11ATOMICS
  2204  
  2205  /* Crutches for C11 atomic compiler's bugs */
  2206  #if defined(__e2k__) && defined(__LCC__) && __LCC__ < /* FIXME */ 127
  2207  #define MDBX_c11a_ro(type, ptr) (&(ptr)->weak)
  2208  #define MDBX_c11a_rw(type, ptr) (&(ptr)->weak)
  2209  #elif defined(__clang__) && __clang__ < 8
  2210  #define MDBX_c11a_ro(type, ptr) ((volatile _Atomic(type) *)&(ptr)->c11a)
  2211  #define MDBX_c11a_rw(type, ptr) (&(ptr)->c11a)
  2212  #else
  2213  #define MDBX_c11a_ro(type, ptr) (&(ptr)->c11a)
  2214  #define MDBX_c11a_rw(type, ptr) (&(ptr)->c11a)
  2215  #endif /* Crutches for C11 atomic compiler's bugs */
  2216  
  2217  #define mo_c11_store(fence)                                                    \
  2218    (((fence) == mo_Relaxed)          ? memory_order_relaxed                     \
  2219     : ((fence) == mo_AcquireRelease) ? memory_order_release                     \
  2220                                      : memory_order_seq_cst)
  2221  #define mo_c11_load(fence)                                                     \
  2222    (((fence) == mo_Relaxed)          ? memory_order_relaxed                     \
  2223     : ((fence) == mo_AcquireRelease) ? memory_order_acquire                     \
  2224                                      : memory_order_seq_cst)
  2225  
  2226  #endif /* MDBX_HAVE_C11ATOMICS */
  2227  
  2228  #ifndef __cplusplus
  2229  
  2230  #ifdef MDBX_HAVE_C11ATOMICS
  2231  #define osal_memory_fence(order, write)                                        \
  2232    atomic_thread_fence((write) ? mo_c11_store(order) : mo_c11_load(order))
  2233  #else /* MDBX_HAVE_C11ATOMICS */
  2234  #define osal_memory_fence(order, write)                                        \
  2235    do {                                                                         \
  2236      osal_compiler_barrier();                                                   \
  2237      if (write && order > (MDBX_CPU_WRITEBACK_INCOHERENT ? mo_Relaxed           \
  2238                                                          : mo_AcquireRelease))  \
  2239        osal_memory_barrier();                                                   \
  2240    } while (0)
  2241  #endif /* MDBX_HAVE_C11ATOMICS */
  2242  
  2243  #if defined(MDBX_HAVE_C11ATOMICS) && defined(__LCC__)
  2244  #define atomic_store32(p, value, order)                                        \
  2245    ({                                                                           \
  2246      const uint32_t value_to_store = (value);                                   \
  2247      atomic_store_explicit(MDBX_c11a_rw(uint32_t, p), value_to_store,           \
  2248                            mo_c11_store(order));                                \
  2249      value_to_store;                                                            \
  2250    })
  2251  #define atomic_load32(p, order)                                                \
  2252    atomic_load_explicit(MDBX_c11a_ro(uint32_t, p), mo_c11_load(order))
  2253  #define atomic_store64(p, value, order)                                        \
  2254    ({                                                                           \
  2255      const uint64_t value_to_store = (value);                                   \
  2256      atomic_store_explicit(MDBX_c11a_rw(uint64_t, p), value_to_store,           \
  2257                            mo_c11_store(order));                                \
  2258      value_to_store;                                                            \
  2259    })
  2260  #define atomic_load64(p, order)                                                \
  2261    atomic_load_explicit(MDBX_c11a_ro(uint64_t, p), mo_c11_load(order))
  2262  #endif /* LCC && MDBX_HAVE_C11ATOMICS */
  2263  
  2264  #ifndef atomic_store32
  2265  MDBX_MAYBE_UNUSED static __always_inline uint32_t
  2266  atomic_store32(MDBX_atomic_uint32_t *p, const uint32_t value,
  2267                 enum MDBX_memory_order order) {
  2268    STATIC_ASSERT(sizeof(MDBX_atomic_uint32_t) == 4);
  2269  #ifdef MDBX_HAVE_C11ATOMICS
  2270    assert(atomic_is_lock_free(MDBX_c11a_rw(uint32_t, p)));
  2271    atomic_store_explicit(MDBX_c11a_rw(uint32_t, p), value, mo_c11_store(order));
  2272  #else  /* MDBX_HAVE_C11ATOMICS */
  2273    if (order != mo_Relaxed)
  2274      osal_compiler_barrier();
  2275    p->weak = value;
  2276    osal_memory_fence(order, true);
  2277  #endif /* MDBX_HAVE_C11ATOMICS */
  2278    return value;
  2279  }
  2280  #endif /* atomic_store32 */
  2281  
  2282  #ifndef atomic_load32
  2283  MDBX_MAYBE_UNUSED static __always_inline uint32_t atomic_load32(
  2284      const volatile MDBX_atomic_uint32_t *p, enum MDBX_memory_order order) {
  2285    STATIC_ASSERT(sizeof(MDBX_atomic_uint32_t) == 4);
  2286  #ifdef MDBX_HAVE_C11ATOMICS
  2287    assert(atomic_is_lock_free(MDBX_c11a_ro(uint32_t, p)));
  2288    return atomic_load_explicit(MDBX_c11a_ro(uint32_t, p), mo_c11_load(order));
  2289  #else  /* MDBX_HAVE_C11ATOMICS */
  2290    osal_memory_fence(order, false);
  2291    const uint32_t value = p->weak;
  2292    if (order != mo_Relaxed)
  2293      osal_compiler_barrier();
  2294    return value;
  2295  #endif /* MDBX_HAVE_C11ATOMICS */
  2296  }
  2297  #endif /* atomic_load32 */
  2298  
  2299  #endif /* !__cplusplus */
  2300  
  2301  /*----------------------------------------------------------------------------*/
  2302  /* Basic constants and types */
  2303  
  2304  /* A stamp that identifies a file as an MDBX file.
  2305   * There's nothing special about this value other than that it is easily
  2306   * recognizable, and it will reflect any byte order mismatches. */
  2307  #define MDBX_MAGIC UINT64_C(/* 56-bit prime */ 0x59659DBDEF4C11)
  2308  
  2309  /* FROZEN: The version number for a database's datafile format. */
  2310  #define MDBX_DATA_VERSION 3
  2311  /* The version number for a database's lockfile format. */
  2312  #define MDBX_LOCK_VERSION 4
  2313  
  2314  /* handle for the DB used to track free pages. */
  2315  #define FREE_DBI 0
  2316  /* handle for the default DB. */
  2317  #define MAIN_DBI 1
  2318  /* Number of DBs in metapage (free and main) - also hardcoded elsewhere */
  2319  #define CORE_DBS 2
  2320  
  2321  /* Number of meta pages - also hardcoded elsewhere */
  2322  #define NUM_METAS 3
  2323  
  2324  /* A page number in the database.
  2325   *
  2326   * MDBX uses 32 bit for page numbers. This limits database
  2327   * size up to 2^44 bytes, in case of 4K pages. */
  2328  typedef uint32_t pgno_t;
  2329  typedef MDBX_atomic_uint32_t atomic_pgno_t;
  2330  #define PRIaPGNO PRIu32
  2331  #define MAX_PAGENO UINT32_C(0x7FFFffff)
  2332  #define MIN_PAGENO NUM_METAS
  2333  
  2334  #define SAFE64_INVALID_THRESHOLD UINT64_C(0xffffFFFF00000000)
  2335  
  2336  /* A transaction ID. */
  2337  typedef uint64_t txnid_t;
  2338  typedef MDBX_atomic_uint64_t atomic_txnid_t;
  2339  #define PRIaTXN PRIi64
  2340  #define MIN_TXNID UINT64_C(1)
  2341  #define MAX_TXNID (SAFE64_INVALID_THRESHOLD - 1)
  2342  #define INITIAL_TXNID (MIN_TXNID + NUM_METAS - 1)
  2343  #define INVALID_TXNID UINT64_MAX
  2344  /* LY: for testing non-atomic 64-bit txnid on 32-bit arches.
  2345   * #define xMDBX_TXNID_STEP (UINT32_MAX / 3) */
  2346  #ifndef xMDBX_TXNID_STEP
  2347  #if MDBX_64BIT_CAS
  2348  #define xMDBX_TXNID_STEP 1u
  2349  #else
  2350  #define xMDBX_TXNID_STEP 2u
  2351  #endif
  2352  #endif /* xMDBX_TXNID_STEP */
  2353  
  2354  /* Used for offsets within a single page.
  2355   * Since memory pages are typically 4 or 8KB in size, 12-13 bits,
  2356   * this is plenty. */
  2357  typedef uint16_t indx_t;
  2358  
  2359  #define MEGABYTE ((size_t)1 << 20)
  2360  
  2361  /*----------------------------------------------------------------------------*/
  2362  /* Core structures for database and shared memory (i.e. format definition) */
  2363  #pragma pack(push, 4)
  2364  
  2365  /* Information about a single database in the environment. */
  2366  typedef struct MDBX_db {
  2367    uint16_t md_flags;        /* see mdbx_dbi_open */
  2368    uint16_t md_depth;        /* depth of this tree */
  2369    uint32_t md_xsize;        /* key-size for MDBX_DUPFIXED (LEAF2 pages) */
  2370    pgno_t md_root;           /* the root page of this tree */
  2371    pgno_t md_branch_pages;   /* number of internal pages */
  2372    pgno_t md_leaf_pages;     /* number of leaf pages */
  2373    pgno_t md_overflow_pages; /* number of overflow pages */
  2374    uint64_t md_seq;          /* table sequence counter */
  2375    uint64_t md_entries;      /* number of data items */
  2376    uint64_t md_mod_txnid;    /* txnid of last committed modification */
  2377  } MDBX_db;
  2378  
  2379  /* database size-related parameters */
  2380  typedef struct MDBX_geo {
  2381    uint16_t grow_pv;   /* datafile growth step as a 16-bit packed (exponential
  2382                             quantized) value */
  2383    uint16_t shrink_pv; /* datafile shrink threshold as a 16-bit packed
  2384                             (exponential quantized) value */
  2385    pgno_t lower;       /* minimal size of datafile in pages */
  2386    pgno_t upper;       /* maximal size of datafile in pages */
  2387    pgno_t now;         /* current size of datafile in pages */
  2388    pgno_t next;        /* first unused page in the datafile,
  2389                           but actually the file may be shorter. */
  2390  } MDBX_geo;
  2391  
  2392  /* Meta page content.
  2393   * A meta page is the start point for accessing a database snapshot.
  2394   * Pages 0-1 are meta pages. Transaction N writes meta page (N % 2). */
  2395  typedef struct MDBX_meta {
  2396    /* Stamp identifying this as an MDBX file.
  2397     * It must be set to MDBX_MAGIC with MDBX_DATA_VERSION. */
  2398    uint32_t mm_magic_and_version[2];
  2399  
  2400    /* txnid that committed this page, the first of a two-phase-update pair */
  2401    union {
  2402      MDBX_atomic_uint32_t mm_txnid_a[2];
  2403      uint64_t unsafe_txnid;
  2404    };
  2405  
  2406    uint16_t mm_extra_flags;  /* extra DB flags, zero (nothing) for now */
  2407    uint8_t mm_validator_id;  /* ID of checksum and page validation method,
  2408                               * zero (nothing) for now */
  2409    uint8_t mm_extra_pagehdr; /* extra bytes in the page header,
  2410                               * zero (nothing) for now */
  2411  
  2412    MDBX_geo mm_geo; /* database size-related parameters */
  2413  
  2414    MDBX_db mm_dbs[CORE_DBS]; /* first is free space, 2nd is main db */
  2415                              /* The size of pages used in this DB */
  2416  #define mm_psize mm_dbs[FREE_DBI].md_xsize
  2417    MDBX_canary mm_canary;
  2418  
  2419  #define MDBX_DATASIGN_NONE 0u
  2420  #define MDBX_DATASIGN_WEAK 1u
  2421  #define SIGN_IS_STEADY(sign) ((sign) > MDBX_DATASIGN_WEAK)
  2422  #define META_IS_STEADY(meta)                                                   \
  2423    SIGN_IS_STEADY(unaligned_peek_u64_volatile(4, (meta)->mm_sign))
  2424    union {
  2425      uint32_t mm_sign[2];
  2426      uint64_t unsafe_sign;
  2427    };
  2428  
  2429    /* txnid that committed this page, the second of a two-phase-update pair */
  2430    MDBX_atomic_uint32_t mm_txnid_b[2];
  2431  
  2432    /* Number of non-meta pages which were put in GC after COW. May be 0 in case
  2433     * DB was previously handled by libmdbx without corresponding feature.
  2434     * This value in couple with mr_snapshot_pages_retired allows fast estimation
  2435     * of "how much reader is restraining GC recycling". */
  2436    uint32_t mm_pages_retired[2];
  2437  
  2438    /* The analogue /proc/sys/kernel/random/boot_id or similar to determine
  2439     * whether the system was rebooted after the last use of the database files.
  2440     * If there was no reboot, but there is no need to rollback to the last
  2441     * steady sync point. Zeros mean that no relevant information is available
  2442     * from the system. */
  2443    bin128_t mm_bootid;
  2444  
  2445  } MDBX_meta;
  2446  
  2447  #pragma pack(1)
  2448  
  2449  /* Common header for all page types. The page type depends on mp_flags.
  2450   *
  2451   * P_BRANCH and P_LEAF pages have unsorted 'MDBX_node's at the end, with
  2452   * sorted mp_ptrs[] entries referring to them. Exception: P_LEAF2 pages
  2453   * omit mp_ptrs and pack sorted MDBX_DUPFIXED values after the page header.
  2454   *
  2455   * P_OVERFLOW records occupy one or more contiguous pages where only the
  2456   * first has a page header. They hold the real data of F_BIGDATA nodes.
  2457   *
  2458   * P_SUBP sub-pages are small leaf "pages" with duplicate data.
  2459   * A node with flag F_DUPDATA but not F_SUBDATA contains a sub-page.
  2460   * (Duplicate data can also go in sub-databases, which use normal pages.)
  2461   *
  2462   * P_META pages contain MDBX_meta, the start point of an MDBX snapshot.
  2463   *
  2464   * Each non-metapage up to MDBX_meta.mm_last_pg is reachable exactly once
  2465   * in the snapshot: Either used by a database or listed in a GC record. */
  2466  typedef struct MDBX_page {
  2467    union {
  2468  #define IS_FROZEN(txn, p) ((p)->mp_txnid < (txn)->mt_txnid)
  2469  #define IS_SPILLED(txn, p) ((p)->mp_txnid == (txn)->mt_txnid)
  2470  #define IS_SHADOWED(txn, p) ((p)->mp_txnid > (txn)->mt_txnid)
  2471  #define IS_VALID(txn, p) ((p)->mp_txnid <= (txn)->mt_front)
  2472  #define IS_MODIFIABLE(txn, p) ((p)->mp_txnid == (txn)->mt_front)
  2473      uint64_t
  2474          mp_txnid; /* txnid which created this page, maybe zero in legacy DB */
  2475      struct MDBX_page *mp_next; /* for in-memory list of freed pages */
  2476    };
  2477    uint16_t mp_leaf2_ksize;   /* key size if this is a LEAF2 page */
  2478  #define P_BRANCH 0x01u       /* branch page */
  2479  #define P_LEAF 0x02u         /* leaf page */
  2480  #define P_OVERFLOW 0x04u     /* overflow page */
  2481  #define P_META 0x08u         /* meta page */
  2482  #define P_LEGACY_DIRTY 0x10u /* legacy P_DIRTY flag prior to v0.10 958fd5b9 */
  2483  #define P_BAD P_LEGACY_DIRTY /* explicit flag for invalid/bad page */
  2484  #define P_LEAF2 0x20u        /* for MDBX_DUPFIXED records */
  2485  #define P_SUBP 0x40u         /* for MDBX_DUPSORT sub-pages */
  2486  #define P_SPILLED 0x2000u    /* spilled in parent txn */
  2487  #define P_LOOSE 0x4000u      /* page was dirtied then freed, can be reused */
  2488  #define P_FROZEN 0x8000u     /* used for retire page with known status */
  2489  #define P_ILL_BITS                                                             \
  2490    ((uint16_t) ~(P_BRANCH | P_LEAF | P_LEAF2 | P_OVERFLOW | P_SPILLED))
  2491    uint16_t mp_flags;
  2492    union {
  2493      uint32_t mp_pages; /* number of overflow pages */
  2494      __anonymous_struct_extension__ struct {
  2495        indx_t mp_lower; /* lower bound of free space */
  2496        indx_t mp_upper; /* upper bound of free space */
  2497      };
  2498    };
  2499    pgno_t mp_pgno; /* page number */
  2500  
  2501  #if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) ||              \
  2502      (!defined(__cplusplus) && defined(_MSC_VER))
  2503    indx_t mp_ptrs[] /* dynamic size */;
  2504  #endif /* C99 */
  2505  } MDBX_page;
  2506  
  2507  #define PAGETYPE_WHOLE(p) ((uint8_t)(p)->mp_flags)
  2508  
  2509  /* Drop legacy P_DIRTY flag for sub-pages for compatilibity */
  2510  #define PAGETYPE_COMPAT(p)                                                     \
  2511    (unlikely(PAGETYPE_WHOLE(p) & P_SUBP)                                        \
  2512         ? PAGETYPE_WHOLE(p) & ~(P_SUBP | P_LEGACY_DIRTY)                        \
  2513         : PAGETYPE_WHOLE(p))
  2514  
  2515  /* Size of the page header, excluding dynamic data at the end */
  2516  #define PAGEHDRSZ ((unsigned)offsetof(MDBX_page, mp_ptrs))
  2517  
  2518  #pragma pack(pop)
  2519  
  2520  #if MDBX_ENABLE_PGOP_STAT
  2521  /* Statistics of page operations overall of all (running, completed and aborted)
  2522   * transactions */
  2523  typedef struct {
  2524    MDBX_atomic_uint64_t newly;   /* Quantity of a new pages added */
  2525    MDBX_atomic_uint64_t cow;     /* Quantity of pages copied for update */
  2526    MDBX_atomic_uint64_t clone;   /* Quantity of parent's dirty pages clones
  2527                                     for nested transactions */
  2528    MDBX_atomic_uint64_t split;   /* Page splits */
  2529    MDBX_atomic_uint64_t merge;   /* Page merges */
  2530    MDBX_atomic_uint64_t spill;   /* Quantity of spilled dirty pages */
  2531    MDBX_atomic_uint64_t unspill; /* Quantity of unspilled/reloaded pages */
  2532    MDBX_atomic_uint64_t
  2533        wops; /* Number of explicit write operations (not a pages) to a disk */
  2534    MDBX_atomic_uint64_t
  2535        gcrtime; /* Time spending for reading/searching GC (aka FreeDB). The
  2536                    unit/scale is platform-depended, see osal_monotime(). */
  2537  } MDBX_pgop_stat_t;
  2538  #endif /* MDBX_ENABLE_PGOP_STAT */
  2539  
  2540  #if MDBX_LOCKING == MDBX_LOCKING_WIN32FILES
  2541  #define MDBX_CLOCK_SIGN UINT32_C(0xF10C)
  2542  typedef void osal_ipclock_t;
  2543  #elif MDBX_LOCKING == MDBX_LOCKING_SYSV
  2544  
  2545  #define MDBX_CLOCK_SIGN UINT32_C(0xF18D)
  2546  typedef mdbx_pid_t osal_ipclock_t;
  2547  #ifndef EOWNERDEAD
  2548  #define EOWNERDEAD MDBX_RESULT_TRUE
  2549  #endif
  2550  
  2551  #elif MDBX_LOCKING == MDBX_LOCKING_POSIX2001 ||                                \
  2552      MDBX_LOCKING == MDBX_LOCKING_POSIX2008
  2553  #define MDBX_CLOCK_SIGN UINT32_C(0x8017)
  2554  typedef pthread_mutex_t osal_ipclock_t;
  2555  #elif MDBX_LOCKING == MDBX_LOCKING_POSIX1988
  2556  #define MDBX_CLOCK_SIGN UINT32_C(0xFC29)
  2557  typedef sem_t osal_ipclock_t;
  2558  #else
  2559  #error "FIXME"
  2560  #endif /* MDBX_LOCKING */
  2561  
  2562  #if MDBX_LOCKING > MDBX_LOCKING_SYSV && !defined(__cplusplus)
  2563  MDBX_INTERNAL_FUNC int osal_ipclock_stub(osal_ipclock_t *ipc);
  2564  MDBX_INTERNAL_FUNC int osal_ipclock_destroy(osal_ipclock_t *ipc);
  2565  #endif /* MDBX_LOCKING */
  2566  
  2567  /* Reader Lock Table
  2568   *
  2569   * Readers don't acquire any locks for their data access. Instead, they
  2570   * simply record their transaction ID in the reader table. The reader
  2571   * mutex is needed just to find an empty slot in the reader table. The
  2572   * slot's address is saved in thread-specific data so that subsequent
  2573   * read transactions started by the same thread need no further locking to
  2574   * proceed.
  2575   *
  2576   * If MDBX_NOTLS is set, the slot address is not saved in thread-specific data.
  2577   * No reader table is used if the database is on a read-only filesystem.
  2578   *
  2579   * Since the database uses multi-version concurrency control, readers don't
  2580   * actually need any locking. This table is used to keep track of which
  2581   * readers are using data from which old transactions, so that we'll know
  2582   * when a particular old transaction is no longer in use. Old transactions
  2583   * that have discarded any data pages can then have those pages reclaimed
  2584   * for use by a later write transaction.
  2585   *
  2586   * The lock table is constructed such that reader slots are aligned with the
  2587   * processor's cache line size. Any slot is only ever used by one thread.
  2588   * This alignment guarantees that there will be no contention or cache
  2589   * thrashing as threads update their own slot info, and also eliminates
  2590   * any need for locking when accessing a slot.
  2591   *
  2592   * A writer thread will scan every slot in the table to determine the oldest
  2593   * outstanding reader transaction. Any freed pages older than this will be
  2594   * reclaimed by the writer. The writer doesn't use any locks when scanning
  2595   * this table. This means that there's no guarantee that the writer will
  2596   * see the most up-to-date reader info, but that's not required for correct
  2597   * operation - all we need is to know the upper bound on the oldest reader,
  2598   * we don't care at all about the newest reader. So the only consequence of
  2599   * reading stale information here is that old pages might hang around a
  2600   * while longer before being reclaimed. That's actually good anyway, because
  2601   * the longer we delay reclaiming old pages, the more likely it is that a
  2602   * string of contiguous pages can be found after coalescing old pages from
  2603   * many old transactions together. */
  2604  
  2605  /* The actual reader record, with cacheline padding. */
  2606  typedef struct MDBX_reader {
  2607    /* Current Transaction ID when this transaction began, or (txnid_t)-1.
  2608     * Multiple readers that start at the same time will probably have the
  2609     * same ID here. Again, it's not important to exclude them from
  2610     * anything; all we need to know is which version of the DB they
  2611     * started from so we can avoid overwriting any data used in that
  2612     * particular version. */
  2613    MDBX_atomic_uint64_t /* txnid_t */ mr_txnid;
  2614  
  2615    /* The information we store in a single slot of the reader table.
  2616     * In addition to a transaction ID, we also record the process and
  2617     * thread ID that owns a slot, so that we can detect stale information,
  2618     * e.g. threads or processes that went away without cleaning up.
  2619     *
  2620     * NOTE: We currently don't check for stale records.
  2621     * We simply re-init the table when we know that we're the only process
  2622     * opening the lock file. */
  2623  
  2624    /* The thread ID of the thread owning this txn. */
  2625    MDBX_atomic_uint64_t mr_tid;
  2626  
  2627    /* The process ID of the process owning this reader txn. */
  2628    MDBX_atomic_uint32_t mr_pid;
  2629  
  2630    /* The number of pages used in the reader's MVCC snapshot,
  2631     * i.e. the value of meta->mm_geo.next and txn->mt_next_pgno */
  2632    atomic_pgno_t mr_snapshot_pages_used;
  2633    /* Number of retired pages at the time this reader starts transaction. So,
  2634     * at any time the difference mm_pages_retired - mr_snapshot_pages_retired
  2635     * will give the number of pages which this reader restraining from reuse. */
  2636    MDBX_atomic_uint64_t mr_snapshot_pages_retired;
  2637  } MDBX_reader;
  2638  
  2639  /* The header for the reader table (a memory-mapped lock file). */
  2640  typedef struct MDBX_lockinfo {
  2641    /* Stamp identifying this as an MDBX file.
  2642     * It must be set to MDBX_MAGIC with with MDBX_LOCK_VERSION. */
  2643    uint64_t mti_magic_and_version;
  2644  
  2645    /* Format of this lock file. Must be set to MDBX_LOCK_FORMAT. */
  2646    uint32_t mti_os_and_format;
  2647  
  2648    /* Flags which environment was opened. */
  2649    MDBX_atomic_uint32_t mti_envmode;
  2650  
  2651    /* Threshold of un-synced-with-disk pages for auto-sync feature,
  2652     * zero means no-threshold, i.e. auto-sync is disabled. */
  2653    atomic_pgno_t mti_autosync_threshold;
  2654  
  2655    /* Low 32-bit of txnid with which meta-pages was synced,
  2656     * i.e. for sync-polling in the MDBX_NOMETASYNC mode. */
  2657    MDBX_atomic_uint32_t mti_meta_sync_txnid;
  2658  
  2659    /* Period for timed auto-sync feature, i.e. at the every steady checkpoint
  2660     * the mti_unsynced_timeout sets to the current_time + mti_autosync_period.
  2661     * The time value is represented in a suitable system-dependent form, for
  2662     * example clock_gettime(CLOCK_BOOTTIME) or clock_gettime(CLOCK_MONOTONIC).
  2663     * Zero means timed auto-sync is disabled. */
  2664    MDBX_atomic_uint64_t mti_autosync_period;
  2665  
  2666    /* Marker to distinguish uniqueness of DB/CLK. */
  2667    MDBX_atomic_uint64_t mti_bait_uniqueness;
  2668  
  2669    MDBX_ALIGNAS(MDBX_CACHELINE_SIZE) /* cacheline ----------------------------*/
  2670  
  2671  #if MDBX_ENABLE_PGOP_STAT
  2672    /* Statistics of costly ops of all (running, completed and aborted)
  2673     * transactions */
  2674    MDBX_pgop_stat_t mti_pgop_stat;
  2675  #endif /* MDBX_ENABLE_PGOP_STAT*/
  2676  
  2677    MDBX_ALIGNAS(MDBX_CACHELINE_SIZE) /* cacheline ----------------------------*/
  2678  
  2679    /* Write transaction lock. */
  2680  #if MDBX_LOCKING > 0
  2681    osal_ipclock_t mti_wlock;
  2682  #endif /* MDBX_LOCKING > 0 */
  2683  
  2684    atomic_txnid_t mti_oldest_reader;
  2685  
  2686    /* Timestamp of the last steady sync. Value is represented in a suitable
  2687     * system-dependent form, for example clock_gettime(CLOCK_BOOTTIME) or
  2688     * clock_gettime(CLOCK_MONOTONIC). */
  2689    MDBX_atomic_uint64_t mti_sync_timestamp;
  2690  
  2691    /* Number un-synced-with-disk pages for auto-sync feature. */
  2692    atomic_pgno_t mti_unsynced_pages;
  2693  
  2694    /* Number of page which was discarded last time by madvise(MADV_FREE). */
  2695    atomic_pgno_t mti_discarded_tail;
  2696  
  2697    /* Timestamp of the last readers check. */
  2698    MDBX_atomic_uint64_t mti_reader_check_timestamp;
  2699  
  2700    /* Shared anchor for tracking readahead edge and enabled/disabled status. */
  2701    pgno_t mti_readahead_anchor;
  2702  
  2703    MDBX_ALIGNAS(MDBX_CACHELINE_SIZE) /* cacheline ----------------------------*/
  2704  
  2705    /* Readeaders registration lock. */
  2706  #if MDBX_LOCKING > 0
  2707    osal_ipclock_t mti_rlock;
  2708  #endif /* MDBX_LOCKING > 0 */
  2709  
  2710    /* The number of slots that have been used in the reader table.
  2711     * This always records the maximum count, it is not decremented
  2712     * when readers release their slots. */
  2713    MDBX_atomic_uint32_t mti_numreaders;
  2714    MDBX_atomic_uint32_t mti_readers_refresh_flag;
  2715  
  2716  #if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) ||              \
  2717      (!defined(__cplusplus) && defined(_MSC_VER))
  2718    MDBX_ALIGNAS(MDBX_CACHELINE_SIZE) /* cacheline ----------------------------*/
  2719    MDBX_reader mti_readers[] /* dynamic size */;
  2720  #endif /* C99 */
  2721  } MDBX_lockinfo;
  2722  
  2723  /* Lockfile format signature: version, features and field layout */
  2724  #define MDBX_LOCK_FORMAT                                                       \
  2725    (MDBX_CLOCK_SIGN * 27733 + (unsigned)sizeof(MDBX_reader) * 13 +              \
  2726     (unsigned)offsetof(MDBX_reader, mr_snapshot_pages_used) * 251 +             \
  2727     (unsigned)offsetof(MDBX_lockinfo, mti_oldest_reader) * 83 +                 \
  2728     (unsigned)offsetof(MDBX_lockinfo, mti_numreaders) * 37 +                    \
  2729     (unsigned)offsetof(MDBX_lockinfo, mti_readers) * 29)
  2730  
  2731  #define MDBX_DATA_MAGIC                                                        \
  2732    ((MDBX_MAGIC << 8) + MDBX_PNL_ASCENDING * 64 + MDBX_DATA_VERSION)
  2733  
  2734  #define MDBX_DATA_MAGIC_LEGACY_COMPAT                                          \
  2735    ((MDBX_MAGIC << 8) + MDBX_PNL_ASCENDING * 64 + 2)
  2736  
  2737  #define MDBX_DATA_MAGIC_LEGACY_DEVEL ((MDBX_MAGIC << 8) + 255)
  2738  
  2739  #define MDBX_LOCK_MAGIC ((MDBX_MAGIC << 8) + MDBX_LOCK_VERSION)
  2740  
  2741  /* The maximum size of a database page.
  2742   *
  2743   * It is 64K, but value-PAGEHDRSZ must fit in MDBX_page.mp_upper.
  2744   *
  2745   * MDBX will use database pages < OS pages if needed.
  2746   * That causes more I/O in write transactions: The OS must
  2747   * know (read) the whole page before writing a partial page.
  2748   *
  2749   * Note that we don't currently support Huge pages. On Linux,
  2750   * regular data files cannot use Huge pages, and in general
  2751   * Huge pages aren't actually pageable. We rely on the OS
  2752   * demand-pager to read our data and page it out when memory
  2753   * pressure from other processes is high. So until OSs have
  2754   * actual paging support for Huge pages, they're not viable. */
  2755  #define MAX_PAGESIZE MDBX_MAX_PAGESIZE
  2756  #define MIN_PAGESIZE MDBX_MIN_PAGESIZE
  2757  
  2758  #define MIN_MAPSIZE (MIN_PAGESIZE * MIN_PAGENO)
  2759  #if defined(_WIN32) || defined(_WIN64)
  2760  #define MAX_MAPSIZE32 UINT32_C(0x38000000)
  2761  #else
  2762  #define MAX_MAPSIZE32 UINT32_C(0x7f000000)
  2763  #endif
  2764  #define MAX_MAPSIZE64 ((MAX_PAGENO + 1) * (uint64_t)MAX_PAGESIZE)
  2765  
  2766  #if MDBX_WORDBITS >= 64
  2767  #define MAX_MAPSIZE MAX_MAPSIZE64
  2768  #define MDBX_PGL_LIMIT ((size_t)MAX_PAGENO)
  2769  #else
  2770  #define MAX_MAPSIZE MAX_MAPSIZE32
  2771  #define MDBX_PGL_LIMIT (MAX_MAPSIZE32 / MIN_PAGESIZE)
  2772  #endif /* MDBX_WORDBITS */
  2773  
  2774  #define MDBX_READERS_LIMIT 32767
  2775  #define MDBX_RADIXSORT_THRESHOLD 333
  2776  
  2777  /*----------------------------------------------------------------------------*/
  2778  
  2779  /* An PNL is an Page Number List, a sorted array of IDs.
  2780   * The first element of the array is a counter for how many actual page-numbers
  2781   * are in the list. By default PNLs are sorted in descending order, this allow
  2782   * cut off a page with lowest pgno (at the tail) just truncating the list. The
  2783   * sort order of PNLs is controlled by the MDBX_PNL_ASCENDING build option. */
  2784  typedef pgno_t *MDBX_PNL;
  2785  
  2786  #if MDBX_PNL_ASCENDING
  2787  #define MDBX_PNL_ORDERED(first, last) ((first) < (last))
  2788  #define MDBX_PNL_DISORDERED(first, last) ((first) >= (last))
  2789  #else
  2790  #define MDBX_PNL_ORDERED(first, last) ((first) > (last))
  2791  #define MDBX_PNL_DISORDERED(first, last) ((first) <= (last))
  2792  #endif
  2793  
  2794  /* List of txnid, only for MDBX_txn.tw.lifo_reclaimed */
  2795  typedef txnid_t *MDBX_TXL;
  2796  
  2797  /* An Dirty-Page list item is an pgno/pointer pair. */
  2798  typedef struct MDBX_dp {
  2799    MDBX_page *ptr;
  2800    pgno_t pgno;
  2801    union {
  2802      unsigned extra;
  2803      __anonymous_struct_extension__ struct {
  2804        unsigned multi : 1;
  2805        unsigned lru : 31;
  2806      };
  2807    };
  2808  } MDBX_dp;
  2809  
  2810  /* An DPL (dirty-page list) is a sorted array of MDBX_DPs. */
  2811  typedef struct MDBX_dpl {
  2812    unsigned sorted;
  2813    unsigned length;
  2814    unsigned pages_including_loose; /* number of pages, but not an entries. */
  2815    unsigned detent; /* allocated size excluding the MDBX_DPL_RESERVE_GAP */
  2816  #if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) ||              \
  2817      (!defined(__cplusplus) && defined(_MSC_VER))
  2818    MDBX_dp items[] /* dynamic size with holes at zero and after the last */;
  2819  #endif
  2820  } MDBX_dpl;
  2821  
  2822  /* PNL sizes */
  2823  #define MDBX_PNL_GRANULATE 1024
  2824  #define MDBX_PNL_INITIAL                                                       \
  2825    (MDBX_PNL_GRANULATE - 2 - MDBX_ASSUME_MALLOC_OVERHEAD / sizeof(pgno_t))
  2826  
  2827  #define MDBX_TXL_GRANULATE 32
  2828  #define MDBX_TXL_INITIAL                                                       \
  2829    (MDBX_TXL_GRANULATE - 2 - MDBX_ASSUME_MALLOC_OVERHEAD / sizeof(txnid_t))
  2830  #define MDBX_TXL_MAX                                                           \
  2831    ((1u << 17) - 2 - MDBX_ASSUME_MALLOC_OVERHEAD / sizeof(txnid_t))
  2832  
  2833  #define MDBX_PNL_ALLOCLEN(pl) ((pl)[-1])
  2834  #define MDBX_PNL_SIZE(pl) ((pl)[0])
  2835  #define MDBX_PNL_FIRST(pl) ((pl)[1])
  2836  #define MDBX_PNL_LAST(pl) ((pl)[MDBX_PNL_SIZE(pl)])
  2837  #define MDBX_PNL_BEGIN(pl) (&(pl)[1])
  2838  #define MDBX_PNL_END(pl) (&(pl)[MDBX_PNL_SIZE(pl) + 1])
  2839  
  2840  #if MDBX_PNL_ASCENDING
  2841  #define MDBX_PNL_LEAST(pl) MDBX_PNL_FIRST(pl)
  2842  #define MDBX_PNL_MOST(pl) MDBX_PNL_LAST(pl)
  2843  #else
  2844  #define MDBX_PNL_LEAST(pl) MDBX_PNL_LAST(pl)
  2845  #define MDBX_PNL_MOST(pl) MDBX_PNL_FIRST(pl)
  2846  #endif
  2847  
  2848  #define MDBX_PNL_SIZEOF(pl) ((MDBX_PNL_SIZE(pl) + 1) * sizeof(pgno_t))
  2849  #define MDBX_PNL_IS_EMPTY(pl) (MDBX_PNL_SIZE(pl) == 0)
  2850  
  2851  /*----------------------------------------------------------------------------*/
  2852  /* Internal structures */
  2853  
  2854  /* Auxiliary DB info.
  2855   * The information here is mostly static/read-only. There is
  2856   * only a single copy of this record in the environment. */
  2857  typedef struct MDBX_dbx {
  2858    MDBX_val md_name;                /* name of the database */
  2859    MDBX_cmp_func *md_cmp;           /* function for comparing keys */
  2860    MDBX_cmp_func *md_dcmp;          /* function for comparing data items */
  2861    size_t md_klen_min, md_klen_max; /* min/max key length for the database */
  2862    size_t md_vlen_min,
  2863        md_vlen_max; /* min/max value/data length for the database */
  2864  } MDBX_dbx;
  2865  
  2866  typedef struct troika {
  2867    uint8_t fsm, recent, prefer_steady, tail_and_flags;
  2868  #define TROIKA_HAVE_STEADY(troika) ((troika)->fsm & 7)
  2869  #define TROIKA_STRICT_VALID(troika) ((troika)->tail_and_flags & 64)
  2870  #define TROIKA_VALID(troika) ((troika)->tail_and_flags & 128)
  2871  #define TROIKA_TAIL(troika) ((troika)->tail_and_flags & 3)
  2872    txnid_t txnid[NUM_METAS];
  2873  } meta_troika_t;
  2874  
  2875  /* A database transaction.
  2876   * Every operation requires a transaction handle. */
  2877  struct MDBX_txn {
  2878  #define MDBX_MT_SIGNATURE UINT32_C(0x93D53A31)
  2879    uint32_t mt_signature;
  2880  
  2881    /* Transaction Flags */
  2882    /* mdbx_txn_begin() flags */
  2883  #define MDBX_TXN_RO_BEGIN_FLAGS (MDBX_TXN_RDONLY | MDBX_TXN_RDONLY_PREPARE)
  2884  #define MDBX_TXN_RW_BEGIN_FLAGS                                                \
  2885    (MDBX_TXN_NOMETASYNC | MDBX_TXN_NOSYNC | MDBX_TXN_TRY)
  2886    /* Additional flag for sync_locked() */
  2887  #define MDBX_SHRINK_ALLOWED UINT32_C(0x40000000)
  2888  
  2889  #define TXN_FLAGS                                                              \
  2890    (MDBX_TXN_FINISHED | MDBX_TXN_ERROR | MDBX_TXN_DIRTY | MDBX_TXN_SPILLS |     \
  2891     MDBX_TXN_HAS_CHILD | MDBX_TXN_INVALID)
  2892  
  2893  #if (TXN_FLAGS & (MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_RO_BEGIN_FLAGS)) ||       \
  2894      ((MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_RO_BEGIN_FLAGS | TXN_FLAGS) &         \
  2895       MDBX_SHRINK_ALLOWED)
  2896  #error "Oops, some txn flags overlapped or wrong"
  2897  #endif
  2898    uint32_t mt_flags;
  2899  
  2900    MDBX_txn *mt_parent; /* parent of a nested txn */
  2901    /* Nested txn under this txn, set together with flag MDBX_TXN_HAS_CHILD */
  2902    MDBX_txn *mt_child;
  2903    MDBX_geo mt_geo;
  2904    /* next unallocated page */
  2905  #define mt_next_pgno mt_geo.next
  2906    /* corresponding to the current size of datafile */
  2907  #define mt_end_pgno mt_geo.now
  2908  
  2909    /* The ID of this transaction. IDs are integers incrementing from
  2910     * INITIAL_TXNID. Only committed write transactions increment the ID. If a
  2911     * transaction aborts, the ID may be re-used by the next writer. */
  2912    txnid_t mt_txnid;
  2913    txnid_t mt_front;
  2914  
  2915    MDBX_env *mt_env; /* the DB environment */
  2916    /* Array of records for each DB known in the environment. */
  2917    MDBX_dbx *mt_dbxs;
  2918    /* Array of MDBX_db records for each known DB */
  2919    MDBX_db *mt_dbs;
  2920    /* Array of sequence numbers for each DB handle */
  2921    MDBX_atomic_uint32_t *mt_dbiseqs;
  2922  
  2923    /* Transaction DBI Flags */
  2924  #define DBI_DIRTY MDBX_DBI_DIRTY /* DB was written in this txn */
  2925  #define DBI_STALE MDBX_DBI_STALE /* Named-DB record is older than txnID */
  2926  #define DBI_FRESH MDBX_DBI_FRESH /* Named-DB handle opened in this txn */
  2927  #define DBI_CREAT MDBX_DBI_CREAT /* Named-DB handle created in this txn */
  2928  #define DBI_VALID 0x10           /* DB handle is valid, see also DB_VALID */
  2929  #define DBI_USRVALID 0x20        /* As DB_VALID, but not set for FREE_DBI */
  2930  #define DBI_AUDITED 0x40         /* Internal flag for accounting during audit */
  2931    /* Array of flags for each DB */
  2932    uint8_t *mt_dbistate;
  2933    /* Number of DB records in use, or 0 when the txn is finished.
  2934     * This number only ever increments until the txn finishes; we
  2935     * don't decrement it when individual DB handles are closed. */
  2936    MDBX_dbi mt_numdbs;
  2937    size_t mt_owner; /* thread ID that owns this transaction */
  2938    MDBX_canary mt_canary;
  2939    void *mt_userctx; /* User-settable context */
  2940    MDBX_cursor **mt_cursors;
  2941  
  2942    union {
  2943      struct {
  2944        /* For read txns: This thread/txn's reader table slot, or NULL. */
  2945        MDBX_reader *reader;
  2946      } to;
  2947      struct {
  2948        meta_troika_t troika;
  2949        /* In write txns, array of cursors for each DB */
  2950        pgno_t *reclaimed_pglist; /* Reclaimed GC pages */
  2951        txnid_t last_reclaimed;   /* ID of last used record */
  2952  #if MDBX_ENABLE_REFUND
  2953        pgno_t loose_refund_wl /* FIXME: describe */;
  2954  #endif /* MDBX_ENABLE_REFUND */
  2955        /* dirtylist room: Dirty array size - dirty pages visible to this txn.
  2956         * Includes ancestor txns' dirty pages not hidden by other txns'
  2957         * dirty/spilled pages. Thus commit(nested txn) has room to merge
  2958         * dirtylist into mt_parent after freeing hidden mt_parent pages. */
  2959        unsigned dirtyroom;
  2960        /* a sequence to spilling dirty page with LRU policy */
  2961        unsigned dirtylru;
  2962        /* For write txns: Modified pages. Sorted when not MDBX_WRITEMAP. */
  2963        MDBX_dpl *dirtylist;
  2964        /* The list of reclaimed txns from GC */
  2965        MDBX_TXL lifo_reclaimed;
  2966        /* The list of pages that became unused during this transaction. */
  2967        MDBX_PNL retired_pages;
  2968        /* The list of loose pages that became unused and may be reused
  2969         * in this transaction, linked through `mp_next`. */
  2970        MDBX_page *loose_pages;
  2971        /* Number of loose pages (tw.loose_pages) */
  2972        unsigned loose_count;
  2973        unsigned spill_least_removed;
  2974        /* The sorted list of dirty pages we temporarily wrote to disk
  2975         * because the dirty list was full. page numbers in here are
  2976         * shifted left by 1, deleted slots have the LSB set. */
  2977        MDBX_PNL spill_pages;
  2978      } tw;
  2979    };
  2980  };
  2981  
  2982  #if MDBX_WORDBITS >= 64
  2983  #define CURSOR_STACK 32
  2984  #else
  2985  #define CURSOR_STACK 24
  2986  #endif
  2987  
  2988  struct MDBX_xcursor;
  2989  
  2990  /* Cursors are used for all DB operations.
  2991   * A cursor holds a path of (page pointer, key index) from the DB
  2992   * root to a position in the DB, plus other state. MDBX_DUPSORT
  2993   * cursors include an xcursor to the current data item. Write txns
  2994   * track their cursors and keep them up to date when data moves.
  2995   * Exception: An xcursor's pointer to a P_SUBP page can be stale.
  2996   * (A node with F_DUPDATA but no F_SUBDATA contains a subpage). */
  2997  struct MDBX_cursor {
  2998  #define MDBX_MC_LIVE UINT32_C(0xFE05D5B1)
  2999  #define MDBX_MC_READY4CLOSE UINT32_C(0x2817A047)
  3000  #define MDBX_MC_WAIT4EOT UINT32_C(0x90E297A7)
  3001    uint32_t mc_signature;
  3002    /* The database handle this cursor operates on */
  3003    MDBX_dbi mc_dbi;
  3004    /* Next cursor on this DB in this txn */
  3005    MDBX_cursor *mc_next;
  3006    /* Backup of the original cursor if this cursor is a shadow */
  3007    MDBX_cursor *mc_backup;
  3008    /* Context used for databases with MDBX_DUPSORT, otherwise NULL */
  3009    struct MDBX_xcursor *mc_xcursor;
  3010    /* The transaction that owns this cursor */
  3011    MDBX_txn *mc_txn;
  3012    /* The database record for this cursor */
  3013    MDBX_db *mc_db;
  3014    /* The database auxiliary record for this cursor */
  3015    MDBX_dbx *mc_dbx;
  3016    /* The mt_dbistate for this database */
  3017    uint8_t *mc_dbistate;
  3018    uint8_t mc_snum; /* number of pushed pages */
  3019    uint8_t mc_top;  /* index of top page, normally mc_snum-1 */
  3020  
  3021    /* Cursor state flags. */
  3022  #define C_INITIALIZED 0x01 /* cursor has been initialized and is valid */
  3023  #define C_EOF 0x02         /* No more data */
  3024  #define C_SUB 0x04         /* Cursor is a sub-cursor */
  3025  #define C_DEL 0x08         /* last op was a cursor_del */
  3026  #define C_UNTRACK 0x10     /* Un-track cursor when closing */
  3027  #define C_RECLAIMING 0x20  /* GC lookup is prohibited */
  3028  #define C_GCFREEZE 0x40    /* reclaimed_pglist must not be updated */
  3029    uint8_t mc_flags;        /* see mdbx_cursor */
  3030  
  3031    /* Cursor checking flags. */
  3032  #define CC_BRANCH 0x01    /* same as P_BRANCH for CHECK_LEAF_TYPE() */
  3033  #define CC_LEAF 0x02      /* same as P_LEAF for CHECK_LEAF_TYPE() */
  3034  #define CC_OVERFLOW 0x04  /* same as P_OVERFLOW for CHECK_LEAF_TYPE() */
  3035  #define CC_UPDATING 0x08  /* update/rebalance pending */
  3036  #define CC_SKIPORD 0x10   /* don't check keys ordering */
  3037  #define CC_LEAF2 0x20     /* same as P_LEAF2 for CHECK_LEAF_TYPE() */
  3038  #define CC_RETIRING 0x40  /* refs to child pages may be invalid */
  3039  #define CC_PAGECHECK 0x80 /* perform page checking, see MDBX_VALIDATION */
  3040    uint8_t mc_checking;    /* page checking level */
  3041  
  3042    MDBX_page *mc_pg[CURSOR_STACK]; /* stack of pushed pages */
  3043    indx_t mc_ki[CURSOR_STACK];     /* stack of page indices */
  3044  };
  3045  
  3046  #define CHECK_LEAF_TYPE(mc, mp)                                                \
  3047    (((PAGETYPE_WHOLE(mp) ^ (mc)->mc_checking) &                                 \
  3048      (CC_BRANCH | CC_LEAF | CC_OVERFLOW | CC_LEAF2)) == 0)
  3049  
  3050  /* Context for sorted-dup records.
  3051   * We could have gone to a fully recursive design, with arbitrarily
  3052   * deep nesting of sub-databases. But for now we only handle these
  3053   * levels - main DB, optional sub-DB, sorted-duplicate DB. */
  3054  typedef struct MDBX_xcursor {
  3055    /* A sub-cursor for traversing the Dup DB */
  3056    MDBX_cursor mx_cursor;
  3057    /* The database record for this Dup DB */
  3058    MDBX_db mx_db;
  3059    /* The auxiliary DB record for this Dup DB */
  3060    MDBX_dbx mx_dbx;
  3061  } MDBX_xcursor;
  3062  
  3063  typedef struct MDBX_cursor_couple {
  3064    MDBX_cursor outer;
  3065    void *mc_userctx; /* User-settable context */
  3066    MDBX_xcursor inner;
  3067  } MDBX_cursor_couple;
  3068  
  3069  /* The database environment. */
  3070  struct MDBX_env {
  3071    /* ----------------------------------------------------- mostly static part */
  3072  #define MDBX_ME_SIGNATURE UINT32_C(0x9A899641)
  3073    MDBX_atomic_uint32_t me_signature;
  3074    /* Failed to update the meta page. Probably an I/O error. */
  3075  #define MDBX_FATAL_ERROR UINT32_C(0x80000000)
  3076    /* Some fields are initialized. */
  3077  #define MDBX_ENV_ACTIVE UINT32_C(0x20000000)
  3078    /* me_txkey is set */
  3079  #define MDBX_ENV_TXKEY UINT32_C(0x10000000)
  3080    /* Legacy MDBX_MAPASYNC (prior v0.9) */
  3081  #define MDBX_DEPRECATED_MAPASYNC UINT32_C(0x100000)
  3082    /* Legacy MDBX_COALESCE (prior v0.12) */
  3083  #define MDBX_DEPRECATED_COALESCE UINT32_C(0x2000000)
  3084  #define ENV_INTERNAL_FLAGS (MDBX_FATAL_ERROR | MDBX_ENV_ACTIVE | MDBX_ENV_TXKEY)
  3085    uint32_t me_flags;
  3086    osal_mmap_t me_dxb_mmap; /* The main data file */
  3087  #define me_map me_dxb_mmap.dxb
  3088  #define me_lazy_fd me_dxb_mmap.fd
  3089    mdbx_filehandle_t me_dsync_fd;
  3090    osal_mmap_t me_lck_mmap; /* The lock file */
  3091  #define me_lfd me_lck_mmap.fd
  3092    struct MDBX_lockinfo *me_lck;
  3093  
  3094    unsigned me_psize;        /* DB page size, initialized from me_os_psize */
  3095    unsigned me_leaf_nodemax; /* max size of a leaf-node */
  3096    uint8_t me_psize2log;     /* log2 of DB page size */
  3097    int8_t me_stuck_meta; /* recovery-only: target meta page or less that zero */
  3098    uint16_t me_merge_threshold,
  3099        me_merge_threshold_gc;  /* pages emptier than this are candidates for
  3100                                   merging */
  3101    unsigned me_os_psize;       /* OS page size, from osal_syspagesize() */
  3102    unsigned me_maxreaders;     /* size of the reader table */
  3103    MDBX_dbi me_maxdbs;         /* size of the DB table */
  3104    uint32_t me_pid;            /* process ID of this env */
  3105    osal_thread_key_t me_txkey; /* thread-key for readers */
  3106    pathchar_t *me_pathname;    /* path to the DB files */
  3107    void *me_pbuf;              /* scratch area for DUPSORT put() */
  3108    MDBX_txn *me_txn0;          /* preallocated write transaction */
  3109  
  3110    MDBX_dbx *me_dbxs;                /* array of static DB info */
  3111    uint16_t *me_dbflags;             /* array of flags from MDBX_db.md_flags */
  3112    MDBX_atomic_uint32_t *me_dbiseqs; /* array of dbi sequence numbers */
  3113    unsigned
  3114        me_maxgc_ov1page;    /* Number of pgno_t fit in a single overflow page */
  3115    uint32_t me_live_reader; /* have liveness lock in reader table */
  3116    void *me_userctx;        /* User-settable context */
  3117    MDBX_hsr_func *me_hsr_callback; /* Callback for kicking laggard readers */
  3118  
  3119    struct {
  3120      unsigned dp_reserve_limit;
  3121      unsigned rp_augment_limit;
  3122      unsigned dp_limit;
  3123      unsigned dp_initial;
  3124      uint8_t dp_loose_limit;
  3125      uint8_t spill_max_denominator;
  3126      uint8_t spill_min_denominator;
  3127      uint8_t spill_parent4child_denominator;
  3128      unsigned merge_threshold_16dot16_percent;
  3129      union {
  3130        unsigned all;
  3131        /* tracks options with non-auto values but tuned by user */
  3132        struct {
  3133          unsigned dp_limit : 1;
  3134        } non_auto;
  3135      } flags;
  3136    } me_options;
  3137  
  3138    /* struct me_dbgeo used for accepting db-geo params from user for the new
  3139     * database creation, i.e. when mdbx_env_set_geometry() was called before
  3140     * mdbx_env_open(). */
  3141    struct {
  3142      size_t lower;  /* minimal size of datafile */
  3143      size_t upper;  /* maximal size of datafile */
  3144      size_t now;    /* current size of datafile */
  3145      size_t grow;   /* step to grow datafile */
  3146      size_t shrink; /* threshold to shrink datafile */
  3147    } me_dbgeo;
  3148  
  3149  #if MDBX_LOCKING == MDBX_LOCKING_SYSV
  3150    union {
  3151      key_t key;
  3152      int semid;
  3153    } me_sysv_ipc;
  3154  #endif /* MDBX_LOCKING == MDBX_LOCKING_SYSV */
  3155  
  3156    MDBX_env *me_lcklist_next;
  3157  
  3158    /* --------------------------------------------------- mostly volatile part */
  3159  
  3160    MDBX_txn *me_txn; /* current write transaction */
  3161    osal_fastmutex_t me_dbi_lock;
  3162    MDBX_dbi me_numdbs; /* number of DBs opened */
  3163  
  3164    MDBX_page *me_dp_reserve; /* list of malloc'ed blocks for re-use */
  3165    unsigned me_dp_reserve_len;
  3166    /* PNL of pages that became unused in a write txn */
  3167    MDBX_PNL me_retired_pages;
  3168  
  3169  #if defined(_WIN32) || defined(_WIN64)
  3170    osal_srwlock_t me_remap_guard;
  3171    /* Workaround for LockFileEx and WriteFile multithread bug */
  3172    CRITICAL_SECTION me_windowsbug_lock;
  3173  #else
  3174    osal_fastmutex_t me_remap_guard;
  3175  #endif
  3176  
  3177    /* -------------------------------------------------------------- debugging */
  3178  
  3179  #if MDBX_DEBUG
  3180    MDBX_assert_func *me_assert_func; /*  Callback for assertion failures */
  3181  #endif
  3182  #ifdef MDBX_USE_VALGRIND
  3183    int me_valgrind_handle;
  3184  #endif
  3185  #if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__)
  3186    pgno_t me_poison_edge;
  3187  #endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */
  3188  
  3189  #ifndef xMDBX_DEBUG_SPILLING
  3190  #define xMDBX_DEBUG_SPILLING 0
  3191  #endif
  3192  #if xMDBX_DEBUG_SPILLING == 2
  3193    unsigned debug_dirtied_est, debug_dirtied_act;
  3194  #endif /* xMDBX_DEBUG_SPILLING */
  3195  
  3196    /* ------------------------------------------------- stub for lck-less mode */
  3197    MDBX_atomic_uint64_t
  3198        x_lckless_stub[(sizeof(MDBX_lockinfo) + MDBX_CACHELINE_SIZE - 1) /
  3199                       sizeof(MDBX_atomic_uint64_t)];
  3200  };
  3201  
  3202  #ifndef __cplusplus
  3203  /*----------------------------------------------------------------------------*/
  3204  /* Debug and Logging stuff */
  3205  
  3206  #define MDBX_RUNTIME_FLAGS_INIT                                                \
  3207    ((MDBX_DEBUG) > 0) * MDBX_DBG_ASSERT + ((MDBX_DEBUG) > 1) * MDBX_DBG_AUDIT
  3208  
  3209  extern uint8_t runtime_flags;
  3210  extern uint8_t loglevel;
  3211  extern MDBX_debug_func *debug_logger;
  3212  
  3213  MDBX_MAYBE_UNUSED static __inline void jitter4testing(bool tiny) {
  3214  #if MDBX_DEBUG
  3215    if (MDBX_DBG_JITTER & runtime_flags)
  3216      osal_jitter(tiny);
  3217  #else
  3218    (void)tiny;
  3219  #endif
  3220  }
  3221  
  3222  MDBX_INTERNAL_FUNC void MDBX_PRINTF_ARGS(4, 5)
  3223      debug_log(int level, const char *function, int line, const char *fmt, ...)
  3224          MDBX_PRINTF_ARGS(4, 5);
  3225  MDBX_INTERNAL_FUNC void debug_log_va(int level, const char *function, int line,
  3226                                       const char *fmt, va_list args);
  3227  
  3228  #if MDBX_DEBUG
  3229  #define LOG_ENABLED(msg) unlikely(msg <= loglevel)
  3230  #define AUDIT_ENABLED() unlikely((runtime_flags & MDBX_DBG_AUDIT))
  3231  #else /* MDBX_DEBUG */
  3232  #define LOG_ENABLED(msg) (msg < MDBX_LOG_VERBOSE && msg <= loglevel)
  3233  #define AUDIT_ENABLED() (0)
  3234  #endif /* MDBX_DEBUG */
  3235  
  3236  #if MDBX_FORCE_ASSERTIONS
  3237  #define ASSERT_ENABLED() (1)
  3238  #elif MDBX_DEBUG
  3239  #define ASSERT_ENABLED() likely((runtime_flags & MDBX_DBG_ASSERT))
  3240  #else
  3241  #define ASSERT_ENABLED() (0)
  3242  #endif /* assertions */
  3243  
  3244  #define DEBUG_EXTRA(fmt, ...)                                                  \
  3245    do {                                                                         \
  3246      if (LOG_ENABLED(MDBX_LOG_EXTRA))                                           \
  3247        debug_log(MDBX_LOG_EXTRA, __func__, __LINE__, fmt, __VA_ARGS__);         \
  3248    } while (0)
  3249  
  3250  #define DEBUG_EXTRA_PRINT(fmt, ...)                                            \
  3251    do {                                                                         \
  3252      if (LOG_ENABLED(MDBX_LOG_EXTRA))                                           \
  3253        debug_log(MDBX_LOG_EXTRA, NULL, 0, fmt, __VA_ARGS__);                    \
  3254    } while (0)
  3255  
  3256  #define TRACE(fmt, ...)                                                        \
  3257    do {                                                                         \
  3258      if (LOG_ENABLED(MDBX_LOG_TRACE))                                           \
  3259        debug_log(MDBX_LOG_TRACE, __func__, __LINE__, fmt "\n", __VA_ARGS__);    \
  3260    } while (0)
  3261  
  3262  #define DEBUG(fmt, ...)                                                        \
  3263    do {                                                                         \
  3264      if (LOG_ENABLED(MDBX_LOG_DEBUG))                                           \
  3265        debug_log(MDBX_LOG_DEBUG, __func__, __LINE__, fmt "\n", __VA_ARGS__);    \
  3266    } while (0)
  3267  
  3268  #define VERBOSE(fmt, ...)                                                      \
  3269    do {                                                                         \
  3270      if (LOG_ENABLED(MDBX_LOG_VERBOSE))                                         \
  3271        debug_log(MDBX_LOG_VERBOSE, __func__, __LINE__, fmt "\n", __VA_ARGS__);  \
  3272    } while (0)
  3273  
  3274  #define NOTICE(fmt, ...)                                                       \
  3275    do {                                                                         \
  3276      if (LOG_ENABLED(MDBX_LOG_NOTICE))                                          \
  3277        debug_log(MDBX_LOG_NOTICE, __func__, __LINE__, fmt "\n", __VA_ARGS__);   \
  3278    } while (0)
  3279  
  3280  #define WARNING(fmt, ...)                                                      \
  3281    do {                                                                         \
  3282      if (LOG_ENABLED(MDBX_LOG_WARN))                                            \
  3283        debug_log(MDBX_LOG_WARN, __func__, __LINE__, fmt "\n", __VA_ARGS__);     \
  3284    } while (0)
  3285  
  3286  #undef ERROR /* wingdi.h                                                       \
  3287    Yeah, morons from M$ put such definition to the public header. */
  3288  
  3289  #define ERROR(fmt, ...)                                                        \
  3290    do {                                                                         \
  3291      if (LOG_ENABLED(MDBX_LOG_ERROR))                                           \
  3292        debug_log(MDBX_LOG_ERROR, __func__, __LINE__, fmt "\n", __VA_ARGS__);    \
  3293    } while (0)
  3294  
  3295  #define FATAL(fmt, ...)                                                        \
  3296    debug_log(MDBX_LOG_FATAL, __func__, __LINE__, fmt "\n", __VA_ARGS__);
  3297  
  3298  #define ENSURE_MSG(env, expr, msg)                                             \
  3299    do {                                                                         \
  3300      if (unlikely(!(expr)))                                                     \
  3301        mdbx_assert_fail(env, msg, __func__, __LINE__);                          \
  3302    } while (0)
  3303  
  3304  #define ENSURE(env, expr) ENSURE_MSG(env, expr, #expr)
  3305  
  3306  /* assert(3) variant in environment context */
  3307  #define eASSERT(env, expr)                                                     \
  3308    do {                                                                         \
  3309      if (ASSERT_ENABLED())                                                      \
  3310        ENSURE(env, expr);                                                       \
  3311    } while (0)
  3312  
  3313  /* assert(3) variant in cursor context */
  3314  #define cASSERT(mc, expr) eASSERT((mc)->mc_txn->mt_env, expr)
  3315  
  3316  /* assert(3) variant in transaction context */
  3317  #define tASSERT(txn, expr) eASSERT((txn)->mt_env, expr)
  3318  
  3319  #ifndef xMDBX_TOOLS /* Avoid using internal eASSERT() */
  3320  #undef assert
  3321  #define assert(expr) eASSERT(NULL, expr)
  3322  #endif
  3323  
  3324  /*----------------------------------------------------------------------------*/
  3325  /* Cache coherence and mmap invalidation */
  3326  
  3327  #if MDBX_CPU_WRITEBACK_INCOHERENT
  3328  #define osal_flush_incoherent_cpu_writeback() osal_memory_barrier()
  3329  #else
  3330  #define osal_flush_incoherent_cpu_writeback() osal_compiler_barrier()
  3331  #endif /* MDBX_CPU_WRITEBACK_INCOHERENT */
  3332  
  3333  MDBX_MAYBE_UNUSED static __inline void
  3334  osal_flush_incoherent_mmap(void *addr, size_t nbytes, const intptr_t pagesize) {
  3335  #if MDBX_MMAP_INCOHERENT_FILE_WRITE
  3336    char *const begin = (char *)(-pagesize & (intptr_t)addr);
  3337    char *const end =
  3338        (char *)(-pagesize & (intptr_t)((char *)addr + nbytes + pagesize - 1));
  3339    int err = msync(begin, end - begin, MS_SYNC | MS_INVALIDATE) ? errno : 0;
  3340    eASSERT(nullptr, err == 0);
  3341    (void)err;
  3342  #else
  3343    (void)pagesize;
  3344  #endif /* MDBX_MMAP_INCOHERENT_FILE_WRITE */
  3345  
  3346  #if MDBX_MMAP_INCOHERENT_CPU_CACHE
  3347  #ifdef DCACHE
  3348    /* MIPS has cache coherency issues.
  3349     * Note: for any nbytes >= on-chip cache size, entire is flushed. */
  3350    cacheflush(addr, nbytes, DCACHE);
  3351  #else
  3352  #error "Oops, cacheflush() not available"
  3353  #endif /* DCACHE */
  3354  #endif /* MDBX_MMAP_INCOHERENT_CPU_CACHE */
  3355  
  3356  #if !MDBX_MMAP_INCOHERENT_FILE_WRITE && !MDBX_MMAP_INCOHERENT_CPU_CACHE
  3357    (void)addr;
  3358    (void)nbytes;
  3359  #endif
  3360  }
  3361  
  3362  /*----------------------------------------------------------------------------*/
  3363  /* Internal prototypes */
  3364  
  3365  MDBX_INTERNAL_FUNC int cleanup_dead_readers(MDBX_env *env, int rlocked,
  3366                                              int *dead);
  3367  MDBX_INTERNAL_FUNC int rthc_alloc(osal_thread_key_t *key, MDBX_reader *begin,
  3368                                    MDBX_reader *end);
  3369  MDBX_INTERNAL_FUNC void rthc_remove(const osal_thread_key_t key);
  3370  
  3371  MDBX_INTERNAL_FUNC void global_ctor(void);
  3372  MDBX_INTERNAL_FUNC void global_dtor(void);
  3373  MDBX_INTERNAL_FUNC void thread_dtor(void *ptr);
  3374  
  3375  #endif /* !__cplusplus */
  3376  
  3377  #define MDBX_IS_ERROR(rc)                                                      \
  3378    ((rc) != MDBX_RESULT_TRUE && (rc) != MDBX_RESULT_FALSE)
  3379  
  3380  /* Internal error codes, not exposed outside libmdbx */
  3381  #define MDBX_NO_ROOT (MDBX_LAST_ADDED_ERRCODE + 10)
  3382  
  3383  /* Debugging output value of a cursor DBI: Negative in a sub-cursor. */
  3384  #define DDBI(mc)                                                               \
  3385    (((mc)->mc_flags & C_SUB) ? -(int)(mc)->mc_dbi : (int)(mc)->mc_dbi)
  3386  
  3387  /* Key size which fits in a DKBUF (debug key buffer). */
  3388  #define DKBUF_MAX 511
  3389  #define DKBUF char _kbuf[DKBUF_MAX * 4 + 2]
  3390  #define DKEY(x) mdbx_dump_val(x, _kbuf, DKBUF_MAX * 2 + 1)
  3391  #define DVAL(x) mdbx_dump_val(x, _kbuf + DKBUF_MAX * 2 + 1, DKBUF_MAX * 2 + 1)
  3392  
  3393  #if MDBX_DEBUG
  3394  #define DKBUF_DEBUG DKBUF
  3395  #define DKEY_DEBUG(x) DKEY(x)
  3396  #define DVAL_DEBUG(x) DVAL(x)
  3397  #else
  3398  #define DKBUF_DEBUG ((void)(0))
  3399  #define DKEY_DEBUG(x) ("-")
  3400  #define DVAL_DEBUG(x) ("-")
  3401  #endif
  3402  
  3403  /* An invalid page number.
  3404   * Mainly used to denote an empty tree. */
  3405  #define P_INVALID (~(pgno_t)0)
  3406  
  3407  /* Test if the flags f are set in a flag word w. */
  3408  #define F_ISSET(w, f) (((w) & (f)) == (f))
  3409  
  3410  /* Round n up to an even number. */
  3411  #define EVEN(n) (((n) + 1UL) & -2L) /* sign-extending -2 to match n+1U */
  3412  
  3413  /* Default size of memory map.
  3414   * This is certainly too small for any actual applications. Apps should
  3415   * always set the size explicitly using mdbx_env_set_geometry(). */
  3416  #define DEFAULT_MAPSIZE MEGABYTE
  3417  
  3418  /* Number of slots in the reader table.
  3419   * This value was chosen somewhat arbitrarily. The 61 is a prime number,
  3420   * and such readers plus a couple mutexes fit into single 4KB page.
  3421   * Applications should set the table size using mdbx_env_set_maxreaders(). */
  3422  #define DEFAULT_READERS 61
  3423  
  3424  /* Test if a page is a leaf page */
  3425  #define IS_LEAF(p) (((p)->mp_flags & P_LEAF) != 0)
  3426  /* Test if a page is a LEAF2 page */
  3427  #define IS_LEAF2(p) unlikely(((p)->mp_flags & P_LEAF2) != 0)
  3428  /* Test if a page is a branch page */
  3429  #define IS_BRANCH(p) (((p)->mp_flags & P_BRANCH) != 0)
  3430  /* Test if a page is an overflow page */
  3431  #define IS_OVERFLOW(p) unlikely(((p)->mp_flags & P_OVERFLOW) != 0)
  3432  /* Test if a page is a sub page */
  3433  #define IS_SUBP(p) (((p)->mp_flags & P_SUBP) != 0)
  3434  
  3435  /* Header for a single key/data pair within a page.
  3436   * Used in pages of type P_BRANCH and P_LEAF without P_LEAF2.
  3437   * We guarantee 2-byte alignment for 'MDBX_node's.
  3438   *
  3439   * Leaf node flags describe node contents.  F_BIGDATA says the node's
  3440   * data part is the page number of an overflow page with actual data.
  3441   * F_DUPDATA and F_SUBDATA can be combined giving duplicate data in
  3442   * a sub-page/sub-database, and named databases (just F_SUBDATA). */
  3443  typedef struct MDBX_node {
  3444  #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
  3445    union {
  3446      uint32_t mn_dsize;
  3447      uint32_t mn_pgno32;
  3448    };
  3449    uint8_t mn_flags; /* see mdbx_node flags */
  3450    uint8_t mn_extra;
  3451    uint16_t mn_ksize; /* key size */
  3452  #else
  3453    uint16_t mn_ksize; /* key size */
  3454    uint8_t mn_extra;
  3455    uint8_t mn_flags; /* see mdbx_node flags */
  3456    union {
  3457      uint32_t mn_pgno32;
  3458      uint32_t mn_dsize;
  3459    };
  3460  #endif /* __BYTE_ORDER__ */
  3461  
  3462    /* mdbx_node Flags */
  3463  #define F_BIGDATA 0x01 /* data put on overflow page */
  3464  #define F_SUBDATA 0x02 /* data is a sub-database */
  3465  #define F_DUPDATA 0x04 /* data has duplicates */
  3466  
  3467    /* valid flags for mdbx_node_add() */
  3468  #define NODE_ADD_FLAGS (F_DUPDATA | F_SUBDATA | MDBX_RESERVE | MDBX_APPEND)
  3469  
  3470  #if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) ||              \
  3471      (!defined(__cplusplus) && defined(_MSC_VER))
  3472    uint8_t mn_data[] /* key and data are appended here */;
  3473  #endif /* C99 */
  3474  } MDBX_node;
  3475  
  3476  #define DB_PERSISTENT_FLAGS                                                    \
  3477    (MDBX_REVERSEKEY | MDBX_DUPSORT | MDBX_INTEGERKEY | MDBX_DUPFIXED |          \
  3478     MDBX_INTEGERDUP | MDBX_REVERSEDUP)
  3479  
  3480  /* mdbx_dbi_open() flags */
  3481  #define DB_USABLE_FLAGS (DB_PERSISTENT_FLAGS | MDBX_CREATE | MDBX_DB_ACCEDE)
  3482  
  3483  #define DB_VALID 0x8000 /* DB handle is valid, for me_dbflags */
  3484  #define DB_INTERNAL_FLAGS DB_VALID
  3485  
  3486  #if DB_INTERNAL_FLAGS & DB_USABLE_FLAGS
  3487  #error "Oops, some flags overlapped or wrong"
  3488  #endif
  3489  #if DB_PERSISTENT_FLAGS & ~DB_USABLE_FLAGS
  3490  #error "Oops, some flags overlapped or wrong"
  3491  #endif
  3492  
  3493  /* max number of pages to commit in one writev() call */
  3494  #define MDBX_COMMIT_PAGES 64
  3495  #if defined(IOV_MAX) && IOV_MAX < MDBX_COMMIT_PAGES /* sysconf(_SC_IOV_MAX) */
  3496  #undef MDBX_COMMIT_PAGES
  3497  #define MDBX_COMMIT_PAGES IOV_MAX
  3498  #endif
  3499  
  3500  /*
  3501   *                /
  3502   *                | -1, a < b
  3503   * CMP2INT(a,b) = <  0, a == b
  3504   *                |  1, a > b
  3505   *                \
  3506   */
  3507  #ifndef __e2k__
  3508  /* LY: fast enough on most systems */
  3509  #define CMP2INT(a, b) (((b) > (a)) ? -1 : (a) > (b))
  3510  #else
  3511  /* LY: more parallelable on VLIW Elbrus */
  3512  #define CMP2INT(a, b) (((a) > (b)) - ((b) > (a)))
  3513  #endif
  3514  
  3515  /* Do not spill pages to disk if txn is getting full, may fail instead */
  3516  #define MDBX_NOSPILL 0x8000
  3517  
  3518  MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static __inline pgno_t
  3519  int64pgno(int64_t i64) {
  3520    if (likely(i64 >= (int64_t)MIN_PAGENO && i64 <= (int64_t)MAX_PAGENO + 1))
  3521      return (pgno_t)i64;
  3522    return (i64 < (int64_t)MIN_PAGENO) ? MIN_PAGENO : MAX_PAGENO;
  3523  }
  3524  
  3525  MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static __inline pgno_t
  3526  pgno_add(size_t base, size_t augend) {
  3527    assert(base <= MAX_PAGENO + 1 && augend < MAX_PAGENO);
  3528    return int64pgno(base + augend);
  3529  }
  3530  
  3531  MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static __inline pgno_t
  3532  pgno_sub(size_t base, size_t subtrahend) {
  3533    assert(base >= MIN_PAGENO && base <= MAX_PAGENO + 1 &&
  3534           subtrahend < MAX_PAGENO);
  3535    return int64pgno(base - subtrahend);
  3536  }
  3537  
  3538  MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static __always_inline bool
  3539  is_powerof2(size_t x) {
  3540    return (x & (x - 1)) == 0;
  3541  }
  3542  
  3543  MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static __always_inline size_t
  3544  floor_powerof2(size_t value, size_t granularity) {
  3545    assert(is_powerof2(granularity));
  3546    return value & ~(granularity - 1);
  3547  }
  3548  
  3549  MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static __always_inline size_t
  3550  ceil_powerof2(size_t value, size_t granularity) {
  3551    return floor_powerof2(value + granularity - 1, granularity);
  3552  }
  3553  
  3554  MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static unsigned
  3555  log2n_powerof2(size_t value) {
  3556    assert(value > 0 && value < INT32_MAX && is_powerof2(value));
  3557    assert((value & -(int32_t)value) == value);
  3558  #if __GNUC_PREREQ(4, 1) || __has_builtin(__builtin_ctzl)
  3559    return __builtin_ctzl(value);
  3560  #elif defined(_MSC_VER)
  3561    unsigned long index;
  3562    _BitScanForward(&index, (unsigned long)value);
  3563    return index;
  3564  #else
  3565    static const uint8_t debruijn_ctz32[32] = {
  3566        0,  1,  28, 2,  29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4,  8,
  3567        31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6,  11, 5,  10, 9};
  3568    return debruijn_ctz32[(uint32_t)(value * 0x077CB531u) >> 27];
  3569  #endif
  3570  }
  3571  
  3572  /* Only a subset of the mdbx_env flags can be changed
  3573   * at runtime. Changing other flags requires closing the
  3574   * environment and re-opening it with the new flags. */
  3575  #define ENV_CHANGEABLE_FLAGS                                                   \
  3576    (MDBX_SAFE_NOSYNC | MDBX_NOMETASYNC | MDBX_DEPRECATED_MAPASYNC |             \
  3577     MDBX_NOMEMINIT | MDBX_COALESCE | MDBX_PAGEPERTURB | MDBX_ACCEDE |           \
  3578     MDBX_VALIDATION)
  3579  #define ENV_CHANGELESS_FLAGS                                                   \
  3580    (MDBX_NOSUBDIR | MDBX_RDONLY | MDBX_WRITEMAP | MDBX_NOTLS | MDBX_NORDAHEAD | \
  3581     MDBX_LIFORECLAIM | MDBX_EXCLUSIVE)
  3582  #define ENV_USABLE_FLAGS (ENV_CHANGEABLE_FLAGS | ENV_CHANGELESS_FLAGS)
  3583  
  3584  #if !defined(__cplusplus) || CONSTEXPR_ENUM_FLAGS_OPERATIONS
  3585  MDBX_MAYBE_UNUSED static void static_checks(void) {
  3586    STATIC_ASSERT_MSG(INT16_MAX - CORE_DBS == MDBX_MAX_DBI,
  3587                      "Oops, MDBX_MAX_DBI or CORE_DBS?");
  3588    STATIC_ASSERT_MSG((unsigned)(MDBX_DB_ACCEDE | MDBX_CREATE) ==
  3589                          ((DB_USABLE_FLAGS | DB_INTERNAL_FLAGS) &
  3590                           (ENV_USABLE_FLAGS | ENV_INTERNAL_FLAGS)),
  3591                      "Oops, some flags overlapped or wrong");
  3592    STATIC_ASSERT_MSG((ENV_INTERNAL_FLAGS & ENV_USABLE_FLAGS) == 0,
  3593                      "Oops, some flags overlapped or wrong");
  3594  }
  3595  #endif /* Disabled for MSVC 19.0 (VisualStudio 2015) */
  3596  
  3597  #ifdef __cplusplus
  3598  }
  3599  #endif
  3600  
  3601  #define MDBX_ASAN_POISON_MEMORY_REGION(addr, size)                             \
  3602    do {                                                                         \
  3603      TRACE("POISON_MEMORY_REGION(%p, %zu) at %u", (void *)(addr),               \
  3604            (size_t)(size), __LINE__);                                           \
  3605      ASAN_POISON_MEMORY_REGION(addr, size);                                     \
  3606    } while (0)
  3607  
  3608  #define MDBX_ASAN_UNPOISON_MEMORY_REGION(addr, size)                           \
  3609    do {                                                                         \
  3610      TRACE("UNPOISON_MEMORY_REGION(%p, %zu) at %u", (void *)(addr),             \
  3611            (size_t)(size), __LINE__);                                           \
  3612      ASAN_UNPOISON_MEMORY_REGION(addr, size);                                   \
  3613    } while (0)
  3614  
  3615  #if defined(_WIN32) || defined(_WIN64)
  3616  /*
  3617   * POSIX getopt for Windows
  3618   *
  3619   * AT&T Public License
  3620   *
  3621   * Code given out at the 1985 UNIFORUM conference in Dallas.
  3622   */
  3623  
  3624  /*----------------------------------------------------------------------------*/
  3625  /* Microsoft compiler generates a lot of warning for self includes... */
  3626  
  3627  #ifdef _MSC_VER
  3628  #pragma warning(push, 1)
  3629  #pragma warning(disable : 4548) /* expression before comma has no effect;      \
  3630                                     expected expression with side - effect */
  3631  #pragma warning(disable : 4530) /* C++ exception handler used, but unwind      \
  3632                                   * semantics are not enabled. Specify /EHsc */
  3633  #pragma warning(disable : 4577) /* 'noexcept' used with no exception handling  \
  3634                                   * mode specified; termination on exception is \
  3635                                   * not guaranteed. Specify /EHsc */
  3636  #if !defined(_CRT_SECURE_NO_WARNINGS)
  3637  #define _CRT_SECURE_NO_WARNINGS
  3638  #endif
  3639  #endif /* _MSC_VER (warnings) */
  3640  
  3641  #include <stdio.h>
  3642  #include <string.h>
  3643  
  3644  #ifdef _MSC_VER
  3645  #pragma warning(pop)
  3646  #endif
  3647  /*----------------------------------------------------------------------------*/
  3648  
  3649  #ifndef NULL
  3650  #define NULL 0
  3651  #endif
  3652  
  3653  #ifndef EOF
  3654  #define EOF (-1)
  3655  #endif
  3656  
  3657  int optind = 1;
  3658  int optopt;
  3659  char *optarg;
  3660  
  3661  int getopt(int argc, char *const argv[], const char *opts) {
  3662    static int sp = 1;
  3663    int c;
  3664    const char *cp;
  3665  
  3666    if (sp == 1) {
  3667      if (optind >= argc || argv[optind][0] != '-' || argv[optind][1] == '\0')
  3668        return EOF;
  3669      else if (strcmp(argv[optind], "--") == 0) {
  3670        optind++;
  3671        return EOF;
  3672      }
  3673    }
  3674    optopt = c = argv[optind][sp];
  3675    if (c == ':' || (cp = strchr(opts, c)) == NULL) {
  3676      fprintf(stderr, "%s: %s -- %c\n", argv[0], "illegal option", c);
  3677      if (argv[optind][++sp] == '\0') {
  3678        optind++;
  3679        sp = 1;
  3680      }
  3681      return '?';
  3682    }
  3683    if (*++cp == ':') {
  3684      if (argv[optind][sp + 1] != '\0')
  3685        optarg = &argv[optind++][sp + 1];
  3686      else if (++optind >= argc) {
  3687        fprintf(stderr, "%s: %s -- %c\n", argv[0], "option requires an argument",
  3688                c);
  3689        sp = 1;
  3690        return '?';
  3691      } else
  3692        optarg = argv[optind++];
  3693      sp = 1;
  3694    } else {
  3695      if (argv[optind][++sp] == '\0') {
  3696        sp = 1;
  3697        optind++;
  3698      }
  3699      optarg = NULL;
  3700    }
  3701    return c;
  3702  }
  3703  
  3704  static volatile BOOL user_break;
  3705  static BOOL WINAPI ConsoleBreakHandlerRoutine(DWORD dwCtrlType) {
  3706    (void)dwCtrlType;
  3707    user_break = true;
  3708    return true;
  3709  }
  3710  
  3711  #else /* WINDOWS */
  3712  
  3713  static volatile sig_atomic_t user_break;
  3714  static void signal_handler(int sig) {
  3715    (void)sig;
  3716    user_break = 1;
  3717  }
  3718  
  3719  #endif /* !WINDOWS */
  3720  
  3721  static void print_stat(MDBX_stat *ms) {
  3722    printf("  Pagesize: %u\n", ms->ms_psize);
  3723    printf("  Tree depth: %u\n", ms->ms_depth);
  3724    printf("  Branch pages: %" PRIu64 "\n", ms->ms_branch_pages);
  3725    printf("  Leaf pages: %" PRIu64 "\n", ms->ms_leaf_pages);
  3726    printf("  Overflow pages: %" PRIu64 "\n", ms->ms_overflow_pages);
  3727    printf("  Entries: %" PRIu64 "\n", ms->ms_entries);
  3728  }
  3729  
  3730  static void usage(const char *prog) {
  3731    fprintf(stderr,
  3732            "usage: %s [-V] [-q] [-e] [-f[f[f]]] [-r[r]] [-a|-s name] dbpath\n"
  3733            "  -V\t\tprint version and exit\n"
  3734            "  -q\t\tbe quiet\n"
  3735            "  -p\t\tshow statistics of page operations for current session\n"
  3736            "  -e\t\tshow whole DB info\n"
  3737            "  -f\t\tshow GC info\n"
  3738            "  -r\t\tshow readers\n"
  3739            "  -a\t\tprint stat of main DB and all subDBs\n"
  3740            "  -s name\tprint stat of only the specified named subDB\n"
  3741            "  \t\tby default print stat of only the main DB\n",
  3742            prog);
  3743    exit(EXIT_FAILURE);
  3744  }
  3745  
  3746  static int reader_list_func(void *ctx, int num, int slot, mdbx_pid_t pid,
  3747                              mdbx_tid_t thread, uint64_t txnid, uint64_t lag,
  3748                              size_t bytes_used, size_t bytes_retained) {
  3749    (void)ctx;
  3750    if (num == 1)
  3751      printf("Reader Table\n"
  3752             "   #\tslot\t%6s %*s %20s %10s %13s %13s\n",
  3753             "pid", (int)sizeof(size_t) * 2, "thread", "txnid", "lag", "used",
  3754             "retained");
  3755  
  3756    printf(" %3d)\t[%d]\t%6" PRIdSIZE " %*" PRIxPTR, num, slot, (size_t)pid,
  3757           (int)sizeof(size_t) * 2, (uintptr_t)thread);
  3758    if (txnid)
  3759      printf(" %20" PRIu64 " %10" PRIu64 " %12.1fM %12.1fM\n", txnid, lag,
  3760             bytes_used / 1048576.0, bytes_retained / 1048576.0);
  3761    else
  3762      printf(" %20s %10s %13s %13s\n", "-", "0", "0", "0");
  3763  
  3764    return user_break ? MDBX_RESULT_TRUE : MDBX_RESULT_FALSE;
  3765  }
  3766  
  3767  static const char *prog;
  3768  static bool quiet = false;
  3769  static void error(const char *func, int rc) {
  3770    if (!quiet)
  3771      fprintf(stderr, "%s: %s() error %d %s\n", prog, func, rc,
  3772              mdbx_strerror(rc));
  3773  }
  3774  
  3775  int mdbx_stat(int argc, char *argv[]) {
  3776    int opt, rc;
  3777    MDBX_env *env;
  3778    MDBX_txn *txn;
  3779    MDBX_dbi dbi;
  3780    MDBX_envinfo mei;
  3781    prog = argv[0];
  3782    char *envname;
  3783    char *subname = nullptr;
  3784    bool alldbs = false, envinfo = false, pgop = false;
  3785    int freinfo = 0, rdrinfo = 0;
  3786  
  3787    if (argc < 2)
  3788      usage(prog);
  3789  
  3790    while ((opt = getopt(argc, argv,
  3791                         "V"
  3792                         "q"
  3793                         "p"
  3794                         "a"
  3795                         "e"
  3796                         "f"
  3797                         "n"
  3798                         "r"
  3799                         "s:")) != EOF) {
  3800      switch (opt) {
  3801      case 'V':
  3802        printf("mdbx_stat version %d.%d.%d.%d\n"
  3803               " - source: %s %s, commit %s, tree %s\n"
  3804               " - anchor: %s\n"
  3805               " - build: %s for %s by %s\n"
  3806               " - flags: %s\n"
  3807               " - options: %s\n",
  3808               mdbx_version.major, mdbx_version.minor, mdbx_version.release,
  3809               mdbx_version.revision, mdbx_version.git.describe,
  3810               mdbx_version.git.datetime, mdbx_version.git.commit,
  3811               mdbx_version.git.tree, mdbx_sourcery_anchor, mdbx_build.datetime,
  3812               mdbx_build.target, mdbx_build.compiler, mdbx_build.flags,
  3813               mdbx_build.options);
  3814        return EXIT_SUCCESS;
  3815      case 'q':
  3816        quiet = true;
  3817        break;
  3818      case 'p':
  3819        pgop = true;
  3820        break;
  3821      case 'a':
  3822        if (subname)
  3823          usage(prog);
  3824        alldbs = true;
  3825        break;
  3826      case 'e':
  3827        envinfo = true;
  3828        break;
  3829      case 'f':
  3830        freinfo += 1;
  3831        break;
  3832      case 'n':
  3833        break;
  3834      case 'r':
  3835        rdrinfo += 1;
  3836        break;
  3837      case 's':
  3838        if (alldbs)
  3839          usage(prog);
  3840        subname = optarg;
  3841        break;
  3842      default:
  3843        usage(prog);
  3844      }
  3845    }
  3846  
  3847    if (optind != argc - 1)
  3848      usage(prog);
  3849  
  3850  #if defined(_WIN32) || defined(_WIN64)
  3851    SetConsoleCtrlHandler(ConsoleBreakHandlerRoutine, true);
  3852  #else
  3853  #ifdef SIGPIPE
  3854    signal(SIGPIPE, signal_handler);
  3855  #endif
  3856  #ifdef SIGHUP
  3857    signal(SIGHUP, signal_handler);
  3858  #endif
  3859    signal(SIGINT, signal_handler);
  3860    signal(SIGTERM, signal_handler);
  3861  #endif /* !WINDOWS */
  3862  
  3863    envname = argv[optind];
  3864    envname = argv[optind];
  3865    if (!quiet) {
  3866      printf("mdbx_stat %s (%s, T-%s)\nRunning for %s...\n",
  3867             mdbx_version.git.describe, mdbx_version.git.datetime,
  3868             mdbx_version.git.tree, envname);
  3869      fflush(nullptr);
  3870    }
  3871  
  3872    rc = mdbx_env_create(&env);
  3873    if (unlikely(rc != MDBX_SUCCESS)) {
  3874      error("mdbx_env_create", rc);
  3875      return EXIT_FAILURE;
  3876    }
  3877  
  3878    if (alldbs || subname) {
  3879      rc = mdbx_env_set_maxdbs(env, 2);
  3880      if (unlikely(rc != MDBX_SUCCESS)) {
  3881        error("mdbx_env_set_maxdbs", rc);
  3882        goto env_close;
  3883      }
  3884    }
  3885  
  3886    rc = mdbx_env_open(env, envname, MDBX_RDONLY, 0);
  3887    if (unlikely(rc != MDBX_SUCCESS)) {
  3888      error("mdbx_env_open", rc);
  3889      goto env_close;
  3890    }
  3891  
  3892    rc = mdbx_txn_begin(env, nullptr, MDBX_TXN_RDONLY, &txn);
  3893    if (unlikely(rc != MDBX_SUCCESS)) {
  3894      error("mdbx_txn_begin", rc);
  3895      goto txn_abort;
  3896    }
  3897  
  3898    if (envinfo || freinfo || pgop) {
  3899      rc = mdbx_env_info_ex(env, txn, &mei, sizeof(mei));
  3900      if (unlikely(rc != MDBX_SUCCESS)) {
  3901        error("mdbx_env_info_ex", rc);
  3902        goto txn_abort;
  3903      }
  3904    } else {
  3905      /* LY: zap warnings from gcc */
  3906      memset(&mei, 0, sizeof(mei));
  3907    }
  3908  
  3909    if (pgop) {
  3910      printf("Page Operations (for current session):\n");
  3911      printf("      New: %8" PRIu64 "\t// quantity of a new pages added\n",
  3912             mei.mi_pgop_stat.newly);
  3913      printf("      CoW: %8" PRIu64
  3914             "\t// quantity of pages copied for altering\n",
  3915             mei.mi_pgop_stat.cow);
  3916      printf("    Clone: %8" PRIu64 "\t// quantity of parent's dirty pages "
  3917             "clones for nested transactions\n",
  3918             mei.mi_pgop_stat.clone);
  3919      printf("    Split: %8" PRIu64
  3920             "\t// page splits during insertions or updates\n",
  3921             mei.mi_pgop_stat.split);
  3922      printf("    Merge: %8" PRIu64
  3923             "\t// page merges during deletions or updates\n",
  3924             mei.mi_pgop_stat.merge);
  3925      printf("    Spill: %8" PRIu64 "\t// quantity of spilled/ousted `dirty` "
  3926             "pages during large transactions\n",
  3927             mei.mi_pgop_stat.spill);
  3928      printf("  Unspill: %8" PRIu64 "\t// quantity of unspilled/redone `dirty` "
  3929             "pages during large transactions\n",
  3930             mei.mi_pgop_stat.unspill);
  3931      printf("      WOP: %8" PRIu64
  3932             "\t// number of explicit write operations (not a pages) to a disk\n",
  3933             mei.mi_pgop_stat.wops);
  3934    }
  3935  
  3936    if (envinfo) {
  3937      printf("Environment Info\n");
  3938      printf("  Pagesize: %u\n", mei.mi_dxb_pagesize);
  3939      if (mei.mi_geo.lower != mei.mi_geo.upper) {
  3940        printf("  Dynamic datafile: %" PRIu64 "..%" PRIu64 " bytes (+%" PRIu64
  3941               "/-%" PRIu64 "), %" PRIu64 "..%" PRIu64 " pages (+%" PRIu64
  3942               "/-%" PRIu64 ")\n",
  3943               mei.mi_geo.lower, mei.mi_geo.upper, mei.mi_geo.grow,
  3944               mei.mi_geo.shrink, mei.mi_geo.lower / mei.mi_dxb_pagesize,
  3945               mei.mi_geo.upper / mei.mi_dxb_pagesize,
  3946               mei.mi_geo.grow / mei.mi_dxb_pagesize,
  3947               mei.mi_geo.shrink / mei.mi_dxb_pagesize);
  3948        printf("  Current mapsize: %" PRIu64 " bytes, %" PRIu64 " pages \n",
  3949               mei.mi_mapsize, mei.mi_mapsize / mei.mi_dxb_pagesize);
  3950        printf("  Current datafile: %" PRIu64 " bytes, %" PRIu64 " pages\n",
  3951               mei.mi_geo.current, mei.mi_geo.current / mei.mi_dxb_pagesize);
  3952  #if defined(_WIN32) || defined(_WIN64)
  3953        if (mei.mi_geo.shrink && mei.mi_geo.current != mei.mi_geo.upper)
  3954          printf("                    WARNING: Due Windows system limitations a "
  3955                 "file couldn't\n                    be truncated while database "
  3956                 "is opened. So, the size of\n                    database file "
  3957                 "may by large than the database itself,\n                    "
  3958                 "until it will be closed or reopened in read-write mode.\n");
  3959  #endif
  3960      } else {
  3961        printf("  Fixed datafile: %" PRIu64 " bytes, %" PRIu64 " pages\n",
  3962               mei.mi_geo.current, mei.mi_geo.current / mei.mi_dxb_pagesize);
  3963      }
  3964      printf("  Last transaction ID: %" PRIu64 "\n", mei.mi_recent_txnid);
  3965      printf("  Latter reader transaction ID: %" PRIu64 " (%" PRIi64 ")\n",
  3966             mei.mi_latter_reader_txnid,
  3967             mei.mi_latter_reader_txnid - mei.mi_recent_txnid);
  3968      printf("  Max readers: %u\n", mei.mi_maxreaders);
  3969      printf("  Number of reader slots uses: %u\n", mei.mi_numreaders);
  3970    }
  3971  
  3972    if (rdrinfo) {
  3973      rc = mdbx_reader_list(env, reader_list_func, nullptr);
  3974      if (MDBX_IS_ERROR(rc)) {
  3975        error("mdbx_reader_list", rc);
  3976        goto txn_abort;
  3977      }
  3978      if (rc == MDBX_RESULT_TRUE)
  3979        printf("Reader Table is empty\n");
  3980      else if (rc == MDBX_SUCCESS && rdrinfo > 1) {
  3981        int dead;
  3982        rc = mdbx_reader_check(env, &dead);
  3983        if (MDBX_IS_ERROR(rc)) {
  3984          error("mdbx_reader_check", rc);
  3985          goto txn_abort;
  3986        }
  3987        if (rc == MDBX_RESULT_TRUE) {
  3988          printf("  %d stale readers cleared.\n", dead);
  3989          rc = mdbx_reader_list(env, reader_list_func, nullptr);
  3990          if (rc == MDBX_RESULT_TRUE)
  3991            printf("  Now Reader Table is empty\n");
  3992        } else
  3993          printf("  No stale readers.\n");
  3994      }
  3995      if (!(subname || alldbs || freinfo))
  3996        goto txn_abort;
  3997    }
  3998  
  3999    if (freinfo) {
  4000      printf("Garbage Collection\n");
  4001      dbi = 0;
  4002      MDBX_cursor *cursor;
  4003      rc = mdbx_cursor_open(txn, dbi, &cursor);
  4004      if (unlikely(rc != MDBX_SUCCESS)) {
  4005        error("mdbx_cursor_open", rc);
  4006        goto txn_abort;
  4007      }
  4008  
  4009      MDBX_stat mst;
  4010      rc = mdbx_dbi_stat(txn, dbi, &mst, sizeof(mst));
  4011      if (unlikely(rc != MDBX_SUCCESS)) {
  4012        error("mdbx_dbi_stat", rc);
  4013        goto txn_abort;
  4014      }
  4015      print_stat(&mst);
  4016  
  4017      pgno_t pages = 0, *iptr;
  4018      pgno_t reclaimable = 0;
  4019      MDBX_val key, data;
  4020      while (MDBX_SUCCESS ==
  4021             (rc = mdbx_cursor_get(cursor, &key, &data, MDBX_NEXT))) {
  4022        if (user_break) {
  4023          rc = MDBX_EINTR;
  4024          break;
  4025        }
  4026        iptr = data.iov_base;
  4027        const pgno_t number = *iptr++;
  4028  
  4029        pages += number;
  4030        if (envinfo && mei.mi_latter_reader_txnid > *(txnid_t *)key.iov_base)
  4031          reclaimable += number;
  4032  
  4033        if (freinfo > 1) {
  4034          char *bad = "";
  4035          pgno_t prev =
  4036              MDBX_PNL_ASCENDING ? NUM_METAS - 1 : (pgno_t)mei.mi_last_pgno + 1;
  4037          pgno_t span = 1;
  4038          for (unsigned i = 0; i < number; ++i) {
  4039            pgno_t pg = iptr[i];
  4040            if (MDBX_PNL_DISORDERED(prev, pg))
  4041              bad = " [bad sequence]";
  4042            prev = pg;
  4043            while (i + span < number &&
  4044                   iptr[i + span] == (MDBX_PNL_ASCENDING ? pgno_add(pg, span)
  4045                                                         : pgno_sub(pg, span)))
  4046              ++span;
  4047          }
  4048          printf("    Transaction %" PRIaTXN ", %" PRIaPGNO
  4049                 " pages, maxspan %" PRIaPGNO "%s\n",
  4050                 *(txnid_t *)key.iov_base, number, span, bad);
  4051          if (freinfo > 2) {
  4052            for (unsigned i = 0; i < number; i += span) {
  4053              const pgno_t pg = iptr[i];
  4054              for (span = 1;
  4055                   i + span < number &&
  4056                   iptr[i + span] == (MDBX_PNL_ASCENDING ? pgno_add(pg, span)
  4057                                                         : pgno_sub(pg, span));
  4058                   ++span)
  4059                ;
  4060              if (span > 1)
  4061                printf("     %9" PRIaPGNO "[%" PRIaPGNO "]\n", pg, span);
  4062              else
  4063                printf("     %9" PRIaPGNO "\n", pg);
  4064            }
  4065          }
  4066        }
  4067      }
  4068      mdbx_cursor_close(cursor);
  4069      cursor = nullptr;
  4070  
  4071      switch (rc) {
  4072      case MDBX_SUCCESS:
  4073      case MDBX_NOTFOUND:
  4074        break;
  4075      case MDBX_EINTR:
  4076        if (!quiet)
  4077          fprintf(stderr, "Interrupted by signal/user\n");
  4078        goto txn_abort;
  4079      default:
  4080        error("mdbx_cursor_get", rc);
  4081        goto txn_abort;
  4082      }
  4083  
  4084      if (envinfo) {
  4085        uint64_t value = mei.mi_mapsize / mei.mi_dxb_pagesize;
  4086        double percent = value / 100.0;
  4087        printf("Page Usage\n");
  4088        printf("  Total: %" PRIu64 " 100%%\n", value);
  4089  
  4090        value = mei.mi_geo.current / mei.mi_dxb_pagesize;
  4091        printf("  Backed: %" PRIu64 " %.1f%%\n", value, value / percent);
  4092  
  4093        value = mei.mi_last_pgno + 1;
  4094        printf("  Allocated: %" PRIu64 " %.1f%%\n", value, value / percent);
  4095  
  4096        value = mei.mi_mapsize / mei.mi_dxb_pagesize - (mei.mi_last_pgno + 1);
  4097        printf("  Remained: %" PRIu64 " %.1f%%\n", value, value / percent);
  4098  
  4099        value = mei.mi_last_pgno + 1 - pages;
  4100        printf("  Used: %" PRIu64 " %.1f%%\n", value, value / percent);
  4101  
  4102        value = pages;
  4103        printf("  GC: %" PRIu64 " %.1f%%\n", value, value / percent);
  4104  
  4105        value = pages - reclaimable;
  4106        printf("  Retained: %" PRIu64 " %.1f%%\n", value, value / percent);
  4107  
  4108        value = reclaimable;
  4109        printf("  Reclaimable: %" PRIu64 " %.1f%%\n", value, value / percent);
  4110  
  4111        value = mei.mi_mapsize / mei.mi_dxb_pagesize - (mei.mi_last_pgno + 1) +
  4112                reclaimable;
  4113        printf("  Available: %" PRIu64 " %.1f%%\n", value, value / percent);
  4114      } else
  4115        printf("  GC: %" PRIaPGNO " pages\n", pages);
  4116    }
  4117  
  4118    rc = mdbx_dbi_open(txn, subname, MDBX_DB_ACCEDE, &dbi);
  4119    if (unlikely(rc != MDBX_SUCCESS)) {
  4120      error("mdbx_dbi_open", rc);
  4121      goto txn_abort;
  4122    }
  4123  
  4124    MDBX_stat mst;
  4125    rc = mdbx_dbi_stat(txn, dbi, &mst, sizeof(mst));
  4126    if (unlikely(rc != MDBX_SUCCESS)) {
  4127      error("mdbx_dbi_stat", rc);
  4128      goto txn_abort;
  4129    }
  4130    printf("Status of %s\n", subname ? subname : "Main DB");
  4131    print_stat(&mst);
  4132  
  4133    if (alldbs) {
  4134      MDBX_cursor *cursor;
  4135      rc = mdbx_cursor_open(txn, dbi, &cursor);
  4136      if (unlikely(rc != MDBX_SUCCESS)) {
  4137        error("mdbx_cursor_open", rc);
  4138        goto txn_abort;
  4139      }
  4140  
  4141      MDBX_val key;
  4142      while (MDBX_SUCCESS ==
  4143             (rc = mdbx_cursor_get(cursor, &key, nullptr, MDBX_NEXT_NODUP))) {
  4144        MDBX_dbi subdbi;
  4145        if (memchr(key.iov_base, '\0', key.iov_len))
  4146          continue;
  4147        subname = osal_malloc(key.iov_len + 1);
  4148        memcpy(subname, key.iov_base, key.iov_len);
  4149        subname[key.iov_len] = '\0';
  4150        rc = mdbx_dbi_open(txn, subname, MDBX_DB_ACCEDE, &subdbi);
  4151        if (rc == MDBX_SUCCESS)
  4152          printf("Status of %s\n", subname);
  4153        osal_free(subname);
  4154        if (unlikely(rc != MDBX_SUCCESS)) {
  4155          if (rc == MDBX_INCOMPATIBLE)
  4156            continue;
  4157          error("mdbx_dbi_open", rc);
  4158          goto txn_abort;
  4159        }
  4160  
  4161        rc = mdbx_dbi_stat(txn, subdbi, &mst, sizeof(mst));
  4162        if (unlikely(rc != MDBX_SUCCESS)) {
  4163          error("mdbx_dbi_stat", rc);
  4164          goto txn_abort;
  4165        }
  4166        print_stat(&mst);
  4167  
  4168        rc = mdbx_dbi_close(env, subdbi);
  4169        if (unlikely(rc != MDBX_SUCCESS)) {
  4170          error("mdbx_dbi_close", rc);
  4171          goto txn_abort;
  4172        }
  4173      }
  4174      mdbx_cursor_close(cursor);
  4175      cursor = nullptr;
  4176    }
  4177  
  4178    switch (rc) {
  4179    case MDBX_SUCCESS:
  4180    case MDBX_NOTFOUND:
  4181      break;
  4182    case MDBX_EINTR:
  4183      if (!quiet)
  4184        fprintf(stderr, "Interrupted by signal/user\n");
  4185      break;
  4186    default:
  4187      if (unlikely(rc != MDBX_SUCCESS))
  4188        error("mdbx_cursor_get", rc);
  4189    }
  4190  
  4191    mdbx_dbi_close(env, dbi);
  4192  txn_abort:
  4193    mdbx_txn_abort(txn);
  4194  env_close:
  4195    mdbx_env_close(env);
  4196  
  4197    return rc ? EXIT_FAILURE : EXIT_SUCCESS;
  4198  }