github.com/moontrade/mdbx-go@v0.4.0/mdbx_chk.c (about)

     1  /* mdbx_chk.c - memory-mapped database check tool */
     2  
     3  /*
     4   * Copyright 2015-2022 Leonid Yuriev <leo@yuriev.ru>
     5   * and other libmdbx authors: please see AUTHORS file.
     6   * All rights reserved.
     7   *
     8   * Redistribution and use in source and binary forms, with or without
     9   * modification, are permitted only as authorized by the OpenLDAP
    10   * Public License.
    11   *
    12   * A copy of this license is available in the file LICENSE in the
    13   * top-level directory of the distribution or, alternatively, at
    14   * <http://www.OpenLDAP.org/license.html>. */
    15  
    16  #ifdef _MSC_VER
    17  #if _MSC_VER > 1800
    18  #pragma warning(disable : 4464) /* relative include path contains '..' */
    19  #endif
    20  #pragma warning(disable : 4996) /* The POSIX name is deprecated... */
    21  #endif                          /* _MSC_VER (warnings) */
    22  
    23  #define xMDBX_TOOLS /* Avoid using internal eASSERT() */
    24  /*
    25   * Copyright 2015-2022 Leonid Yuriev <leo@yuriev.ru>
    26   * and other libmdbx authors: please see AUTHORS file.
    27   * All rights reserved.
    28   *
    29   * Redistribution and use in source and binary forms, with or without
    30   * modification, are permitted only as authorized by the OpenLDAP
    31   * Public License.
    32   *
    33   * A copy of this license is available in the file LICENSE in the
    34   * top-level directory of the distribution or, alternatively, at
    35   * <http://www.OpenLDAP.org/license.html>. */
    36  
    37  #define MDBX_BUILD_SOURCERY 86a8d6c403a2023fc2df0ab38f71339b78e82f0aa786f480a1cb166c05497134_v0_12_1_0_gb36a07a5
    38  #ifdef MDBX_CONFIG_H
    39  #include MDBX_CONFIG_H
    40  #endif
    41  
    42  #define LIBMDBX_INTERNALS
    43  #ifdef xMDBX_TOOLS
    44  #define MDBX_DEPRECATED
    45  #endif /* xMDBX_TOOLS */
    46  
    47  #ifdef xMDBX_ALLOY
    48  /* Amalgamated build */
    49  #define MDBX_INTERNAL_FUNC static
    50  #define MDBX_INTERNAL_VAR static
    51  #else
    52  /* Non-amalgamated build */
    53  #define MDBX_INTERNAL_FUNC
    54  #define MDBX_INTERNAL_VAR extern
    55  #endif /* xMDBX_ALLOY */
    56  
    57  /*----------------------------------------------------------------------------*/
    58  
    59  /** Disables using GNU/Linux libc extensions.
    60   * \ingroup build_option
    61   * \note This option couldn't be moved to the options.h since dependant
    62   * control macros/defined should be prepared before include the options.h */
    63  #ifndef MDBX_DISABLE_GNU_SOURCE
    64  #define MDBX_DISABLE_GNU_SOURCE 0
    65  #endif
    66  #if MDBX_DISABLE_GNU_SOURCE
    67  #undef _GNU_SOURCE
    68  #elif (defined(__linux__) || defined(__gnu_linux__)) && !defined(_GNU_SOURCE)
    69  #define _GNU_SOURCE
    70  #endif /* MDBX_DISABLE_GNU_SOURCE */
    71  
    72  /* Should be defined before any includes */
    73  #if !defined(_FILE_OFFSET_BITS) && !defined(__ANDROID_API__) &&                \
    74      !defined(ANDROID)
    75  #define _FILE_OFFSET_BITS 64
    76  #endif
    77  
    78  #ifdef __APPLE__
    79  #define _DARWIN_C_SOURCE
    80  #endif
    81  
    82  #ifdef _MSC_VER
    83  #if _MSC_FULL_VER < 190024234
    84  /* Actually libmdbx was not tested with compilers older than 19.00.24234 (Visual
    85   * Studio 2015 Update 3). But you could remove this #error and try to continue
    86   * at your own risk. In such case please don't rise up an issues related ONLY to
    87   * old compilers.
    88   *
    89   * NOTE:
    90   *   Unfortunately, there are several different builds of "Visual Studio" that
    91   *   are called "Visual Studio 2015 Update 3".
    92   *
    93   *   The 190024234 is used here because it is minimal version of Visual Studio
    94   *   that was used for build and testing libmdbx in recent years. Soon this
    95   *   value will be increased to 19.0.24241.7, since build and testing using
    96   *   "Visual Studio 2015" will be performed only at https://ci.appveyor.com.
    97   *
    98   *   Please ask Microsoft (but not us) for information about version differences
    99   *   and how to and where you can obtain the latest "Visual Studio 2015" build
   100   *   with all fixes.
   101   */
   102  #error                                                                         \
   103      "At least \"Microsoft C/C++ Compiler\" version 19.00.24234 (Visual Studio 2015 Update 3) is required."
   104  #endif
   105  #ifndef _CRT_SECURE_NO_WARNINGS
   106  #define _CRT_SECURE_NO_WARNINGS
   107  #endif /* _CRT_SECURE_NO_WARNINGS */
   108  #if _MSC_VER > 1800
   109  #pragma warning(disable : 4464) /* relative include path contains '..' */
   110  #endif
   111  #if _MSC_VER > 1913
   112  #pragma warning(disable : 5045) /* Compiler will insert Spectre mitigation...  \
   113                                   */
   114  #endif
   115  #if _MSC_VER > 1914
   116  #pragma warning(                                                               \
   117      disable : 5105) /* winbase.h(9531): warning C5105: macro expansion         \
   118                         producing 'defined' has undefined behavior */
   119  #endif
   120  #pragma warning(disable : 4710) /* 'xyz': function not inlined */
   121  #pragma warning(disable : 4711) /* function 'xyz' selected for automatic       \
   122                                     inline expansion */
   123  #pragma warning(                                                               \
   124      disable : 4201) /* nonstandard extension used : nameless struct / union */
   125  #pragma warning(disable : 4702) /* unreachable code */
   126  #pragma warning(disable : 4706) /* assignment within conditional expression */
   127  #pragma warning(disable : 4127) /* conditional expression is constant */
   128  #pragma warning(disable : 4324) /* 'xyz': structure was padded due to          \
   129                                     alignment specifier */
   130  #pragma warning(disable : 4310) /* cast truncates constant value */
   131  #pragma warning(                                                               \
   132      disable : 4820) /* bytes padding added after data member for alignment */
   133  #pragma warning(disable : 4548) /* expression before comma has no effect;      \
   134                                     expected expression with side - effect */
   135  #pragma warning(disable : 4366) /* the result of the unary '&' operator may be \
   136                                     unaligned */
   137  #pragma warning(disable : 4200) /* nonstandard extension used: zero-sized      \
   138                                     array in struct/union */
   139  #pragma warning(disable : 4204) /* nonstandard extension used: non-constant    \
   140                                     aggregate initializer */
   141  #pragma warning(                                                               \
   142      disable : 4505) /* unreferenced local function has been removed */
   143  #endif              /* _MSC_VER (warnings) */
   144  
   145  #if defined(__GNUC__) && __GNUC__ < 9
   146  #pragma GCC diagnostic ignored "-Wattributes"
   147  #endif /* GCC < 9 */
   148  
   149  #if (defined(__MINGW__) || defined(__MINGW32__) || defined(__MINGW64__)) &&    \
   150      !defined(__USE_MINGW_ANSI_STDIO)
   151  #define __USE_MINGW_ANSI_STDIO 1
   152  #endif /* __USE_MINGW_ANSI_STDIO */
   153  
   154  #include "mdbx.h"
   155  /*
   156   * Copyright 2015-2022 Leonid Yuriev <leo@yuriev.ru>
   157   * and other libmdbx authors: please see AUTHORS file.
   158   * All rights reserved.
   159   *
   160   * Redistribution and use in source and binary forms, with or without
   161   * modification, are permitted only as authorized by the OpenLDAP
   162   * Public License.
   163   *
   164   * A copy of this license is available in the file LICENSE in the
   165   * top-level directory of the distribution or, alternatively, at
   166   * <http://www.OpenLDAP.org/license.html>.
   167   */
   168  
   169  
   170  /*----------------------------------------------------------------------------*/
   171  /* Microsoft compiler generates a lot of warning for self includes... */
   172  
   173  #ifdef _MSC_VER
   174  #pragma warning(push, 1)
   175  #pragma warning(disable : 4548) /* expression before comma has no effect;      \
   176                                     expected expression with side - effect */
   177  #pragma warning(disable : 4530) /* C++ exception handler used, but unwind      \
   178                                   * semantics are not enabled. Specify /EHsc */
   179  #pragma warning(disable : 4577) /* 'noexcept' used with no exception handling  \
   180                                   * mode specified; termination on exception is \
   181                                   * not guaranteed. Specify /EHsc */
   182  #endif                          /* _MSC_VER (warnings) */
   183  
   184  #if defined(_WIN32) || defined(_WIN64)
   185  #if !defined(_CRT_SECURE_NO_WARNINGS)
   186  #define _CRT_SECURE_NO_WARNINGS
   187  #endif /* _CRT_SECURE_NO_WARNINGS */
   188  #if !defined(_NO_CRT_STDIO_INLINE) && MDBX_BUILD_SHARED_LIBRARY &&             \
   189      !defined(xMDBX_TOOLS) && MDBX_WITHOUT_MSVC_CRT
   190  #define _NO_CRT_STDIO_INLINE
   191  #endif
   192  #elif !defined(_POSIX_C_SOURCE)
   193  #define _POSIX_C_SOURCE 200809L
   194  #endif /* Windows */
   195  
   196  /*----------------------------------------------------------------------------*/
   197  /* basic C99 includes */
   198  #include <inttypes.h>
   199  #include <stddef.h>
   200  #include <stdint.h>
   201  #include <stdlib.h>
   202  
   203  #include <assert.h>
   204  #include <fcntl.h>
   205  #include <limits.h>
   206  #include <stdio.h>
   207  #include <string.h>
   208  #include <time.h>
   209  
   210  #if (-6 & 5) || CHAR_BIT != 8 || UINT_MAX < 0xffffffff || ULONG_MAX % 0xFFFF
   211  #error                                                                         \
   212      "Sanity checking failed: Two's complement, reasonably sized integer types"
   213  #endif
   214  
   215  #ifndef SSIZE_MAX
   216  #define SSIZE_MAX INTPTR_MAX
   217  #endif
   218  
   219  #if UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul
   220  #define MDBX_WORDBITS 64
   221  #else
   222  #define MDBX_WORDBITS 32
   223  #endif /* MDBX_WORDBITS */
   224  
   225  /*----------------------------------------------------------------------------*/
   226  /* feature testing */
   227  
   228  #ifndef __has_warning
   229  #define __has_warning(x) (0)
   230  #endif
   231  
   232  #ifndef __has_include
   233  #define __has_include(x) (0)
   234  #endif
   235  
   236  #ifndef __has_feature
   237  #define __has_feature(x) (0)
   238  #endif
   239  
   240  #ifndef __has_extension
   241  #define __has_extension(x) (0)
   242  #endif
   243  
   244  #if __has_feature(thread_sanitizer)
   245  #define __SANITIZE_THREAD__ 1
   246  #endif
   247  
   248  #if __has_feature(address_sanitizer)
   249  #define __SANITIZE_ADDRESS__ 1
   250  #endif
   251  
   252  #ifndef __GNUC_PREREQ
   253  #if defined(__GNUC__) && defined(__GNUC_MINOR__)
   254  #define __GNUC_PREREQ(maj, min)                                                \
   255    ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
   256  #else
   257  #define __GNUC_PREREQ(maj, min) (0)
   258  #endif
   259  #endif /* __GNUC_PREREQ */
   260  
   261  #ifndef __CLANG_PREREQ
   262  #ifdef __clang__
   263  #define __CLANG_PREREQ(maj, min)                                               \
   264    ((__clang_major__ << 16) + __clang_minor__ >= ((maj) << 16) + (min))
   265  #else
   266  #define __CLANG_PREREQ(maj, min) (0)
   267  #endif
   268  #endif /* __CLANG_PREREQ */
   269  
   270  #ifndef __GLIBC_PREREQ
   271  #if defined(__GLIBC__) && defined(__GLIBC_MINOR__)
   272  #define __GLIBC_PREREQ(maj, min)                                               \
   273    ((__GLIBC__ << 16) + __GLIBC_MINOR__ >= ((maj) << 16) + (min))
   274  #else
   275  #define __GLIBC_PREREQ(maj, min) (0)
   276  #endif
   277  #endif /* __GLIBC_PREREQ */
   278  
   279  /*----------------------------------------------------------------------------*/
   280  /* C11' alignas() */
   281  
   282  #if __has_include(<stdalign.h>)
   283  #include <stdalign.h>
   284  #endif
   285  #if defined(alignas) || defined(__cplusplus)
   286  #define MDBX_ALIGNAS(N) alignas(N)
   287  #elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
   288  #define MDBX_ALIGNAS(N) _Alignas(N)
   289  #elif defined(_MSC_VER)
   290  #define MDBX_ALIGNAS(N) __declspec(align(N))
   291  #elif __has_attribute(__aligned__) || defined(__GNUC__)
   292  #define MDBX_ALIGNAS(N) __attribute__((__aligned__(N)))
   293  #else
   294  #error "FIXME: Required alignas() or equivalent."
   295  #endif /* MDBX_ALIGNAS */
   296  
   297  /*----------------------------------------------------------------------------*/
   298  /* Systems macros and includes */
   299  
   300  #ifndef __extern_C
   301  #ifdef __cplusplus
   302  #define __extern_C extern "C"
   303  #else
   304  #define __extern_C
   305  #endif
   306  #endif /* __extern_C */
   307  
   308  #if !defined(nullptr) && !defined(__cplusplus) ||                              \
   309      (__cplusplus < 201103L && !defined(_MSC_VER))
   310  #define nullptr NULL
   311  #endif
   312  
   313  #if defined(__APPLE__) || defined(_DARWIN_C_SOURCE)
   314  #include <AvailabilityMacros.h>
   315  #include <TargetConditionals.h>
   316  #ifndef MAC_OS_X_VERSION_MIN_REQUIRED
   317  #define MAC_OS_X_VERSION_MIN_REQUIRED 1070 /* Mac OS X 10.7, 2011 */
   318  #endif
   319  #endif /* Apple OSX & iOS */
   320  
   321  #if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) ||     \
   322      defined(__BSD__) || defined(__bsdi__) || defined(__DragonFly__) ||         \
   323      defined(__APPLE__) || defined(__MACH__)
   324  #include <sys/cdefs.h>
   325  #include <sys/mount.h>
   326  #include <sys/sysctl.h>
   327  #include <sys/types.h>
   328  #if defined(__FreeBSD__) || defined(__DragonFly__)
   329  #include <vm/vm_param.h>
   330  #elif defined(__OpenBSD__) || defined(__NetBSD__)
   331  #include <uvm/uvm_param.h>
   332  #else
   333  #define SYSCTL_LEGACY_NONCONST_MIB
   334  #endif
   335  #ifndef __MACH__
   336  #include <sys/vmmeter.h>
   337  #endif
   338  #else
   339  #include <malloc.h>
   340  #if !(defined(__sun) || defined(__SVR4) || defined(__svr4__) ||                \
   341        defined(_WIN32) || defined(_WIN64))
   342  #include <mntent.h>
   343  #endif /* !Solaris */
   344  #endif /* !xBSD */
   345  
   346  #if defined(__FreeBSD__) || __has_include(<malloc_np.h>)
   347  #include <malloc_np.h>
   348  #endif
   349  
   350  #if defined(__APPLE__) || defined(__MACH__) || __has_include(<malloc/malloc.h>)
   351  #include <malloc/malloc.h>
   352  #endif /* MacOS */
   353  
   354  #if defined(__MACH__)
   355  #include <mach/host_info.h>
   356  #include <mach/mach_host.h>
   357  #include <mach/mach_port.h>
   358  #include <uuid/uuid.h>
   359  #endif
   360  
   361  #if defined(__linux__) || defined(__gnu_linux__)
   362  #include <sched.h>
   363  #include <sys/sendfile.h>
   364  #include <sys/statfs.h>
   365  #endif /* Linux */
   366  
   367  #ifndef _XOPEN_SOURCE
   368  #define _XOPEN_SOURCE 0
   369  #endif
   370  
   371  #ifndef _XOPEN_SOURCE_EXTENDED
   372  #define _XOPEN_SOURCE_EXTENDED 0
   373  #else
   374  #include <utmpx.h>
   375  #endif /* _XOPEN_SOURCE_EXTENDED */
   376  
   377  #if defined(__sun) || defined(__SVR4) || defined(__svr4__)
   378  #include <kstat.h>
   379  #include <sys/mnttab.h>
   380  /* On Solaris, it's easier to add a missing prototype rather than find a
   381   * combination of #defines that break nothing. */
   382  __extern_C key_t ftok(const char *, int);
   383  #endif /* SunOS/Solaris */
   384  
   385  #if defined(_WIN32) || defined(_WIN64) /*-------------------------------------*/
   386  
   387  #ifndef _WIN32_WINNT
   388  #define _WIN32_WINNT 0x0601 /* Windows 7 */
   389  #elif _WIN32_WINNT < 0x0500
   390  #error At least 'Windows 2000' API is required for libmdbx.
   391  #endif /* _WIN32_WINNT */
   392  #if (defined(__MINGW32__) || defined(__MINGW64__)) &&                          \
   393      !defined(__USE_MINGW_ANSI_STDIO)
   394  #define __USE_MINGW_ANSI_STDIO 1
   395  #endif /* MinGW */
   396  #ifndef WIN32_LEAN_AND_MEAN
   397  #define WIN32_LEAN_AND_MEAN
   398  #endif /* WIN32_LEAN_AND_MEAN */
   399  #include <excpt.h>
   400  #include <tlhelp32.h>
   401  #include <windows.h>
   402  #include <winnt.h>
   403  #include <winternl.h>
   404  
   405  #else /*----------------------------------------------------------------------*/
   406  
   407  #include <unistd.h>
   408  #if !defined(_POSIX_MAPPED_FILES) || _POSIX_MAPPED_FILES < 1
   409  #error "libmdbx requires the _POSIX_MAPPED_FILES feature"
   410  #endif /* _POSIX_MAPPED_FILES */
   411  
   412  #include <pthread.h>
   413  #include <semaphore.h>
   414  #include <signal.h>
   415  #include <sys/file.h>
   416  #include <sys/ipc.h>
   417  #include <sys/mman.h>
   418  #include <sys/param.h>
   419  #include <sys/stat.h>
   420  #include <sys/statvfs.h>
   421  #include <sys/uio.h>
   422  
   423  #endif /*---------------------------------------------------------------------*/
   424  
   425  #if defined(__ANDROID_API__) || defined(ANDROID)
   426  #include <android/log.h>
   427  #if __ANDROID_API__ >= 21
   428  #include <sys/sendfile.h>
   429  #endif
   430  #if defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS != MDBX_WORDBITS
   431  #error "_FILE_OFFSET_BITS != MDBX_WORDBITS" (_FILE_OFFSET_BITS != MDBX_WORDBITS)
   432  #elif defined(__FILE_OFFSET_BITS) && __FILE_OFFSET_BITS != MDBX_WORDBITS
   433  #error "__FILE_OFFSET_BITS != MDBX_WORDBITS" (__FILE_OFFSET_BITS != MDBX_WORDBITS)
   434  #endif
   435  #endif /* Android */
   436  
   437  #if defined(HAVE_SYS_STAT_H) || __has_include(<sys/stat.h>)
   438  #include <sys/stat.h>
   439  #endif
   440  #if defined(HAVE_SYS_TYPES_H) || __has_include(<sys/types.h>)
   441  #include <sys/types.h>
   442  #endif
   443  #if defined(HAVE_SYS_FILE_H) || __has_include(<sys/file.h>)
   444  #include <sys/file.h>
   445  #endif
   446  
   447  /*----------------------------------------------------------------------------*/
   448  /* Byteorder */
   449  
   450  #if defined(i386) || defined(__386) || defined(__i386) || defined(__i386__) || \
   451      defined(i486) || defined(__i486) || defined(__i486__) ||                   \
   452      defined(i586) | defined(__i586) || defined(__i586__) || defined(i686) ||   \
   453      defined(__i686) || defined(__i686__) || defined(_M_IX86) ||                \
   454      defined(_X86_) || defined(__THW_INTEL__) || defined(__I86__) ||            \
   455      defined(__INTEL__) || defined(__x86_64) || defined(__x86_64__) ||          \
   456      defined(__amd64__) || defined(__amd64) || defined(_M_X64) ||               \
   457      defined(_M_AMD64) || defined(__IA32__) || defined(__INTEL__)
   458  #ifndef __ia32__
   459  /* LY: define neutral __ia32__ for x86 and x86-64 */
   460  #define __ia32__ 1
   461  #endif /* __ia32__ */
   462  #if !defined(__amd64__) &&                                                     \
   463      (defined(__x86_64) || defined(__x86_64__) || defined(__amd64) ||           \
   464       defined(_M_X64) || defined(_M_AMD64))
   465  /* LY: define trusty __amd64__ for all AMD64/x86-64 arch */
   466  #define __amd64__ 1
   467  #endif /* __amd64__ */
   468  #endif /* all x86 */
   469  
   470  #if !defined(__BYTE_ORDER__) || !defined(__ORDER_LITTLE_ENDIAN__) ||           \
   471      !defined(__ORDER_BIG_ENDIAN__)
   472  
   473  #if defined(__GLIBC__) || defined(__GNU_LIBRARY__) ||                          \
   474      defined(__ANDROID_API__) || defined(HAVE_ENDIAN_H) || __has_include(<endian.h>)
   475  #include <endian.h>
   476  #elif defined(__APPLE__) || defined(__MACH__) || defined(__OpenBSD__) ||       \
   477      defined(HAVE_MACHINE_ENDIAN_H) || __has_include(<machine/endian.h>)
   478  #include <machine/endian.h>
   479  #elif defined(HAVE_SYS_ISA_DEFS_H) || __has_include(<sys/isa_defs.h>)
   480  #include <sys/isa_defs.h>
   481  #elif (defined(HAVE_SYS_TYPES_H) && defined(HAVE_SYS_ENDIAN_H)) ||             \
   482      (__has_include(<sys/types.h>) && __has_include(<sys/endian.h>))
   483  #include <sys/endian.h>
   484  #include <sys/types.h>
   485  #elif defined(__bsdi__) || defined(__DragonFly__) || defined(__FreeBSD__) ||   \
   486      defined(__NetBSD__) || defined(HAVE_SYS_PARAM_H) || __has_include(<sys/param.h>)
   487  #include <sys/param.h>
   488  #endif /* OS */
   489  
   490  #if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && defined(__BIG_ENDIAN)
   491  #define __ORDER_LITTLE_ENDIAN__ __LITTLE_ENDIAN
   492  #define __ORDER_BIG_ENDIAN__ __BIG_ENDIAN
   493  #define __BYTE_ORDER__ __BYTE_ORDER
   494  #elif defined(_BYTE_ORDER) && defined(_LITTLE_ENDIAN) && defined(_BIG_ENDIAN)
   495  #define __ORDER_LITTLE_ENDIAN__ _LITTLE_ENDIAN
   496  #define __ORDER_BIG_ENDIAN__ _BIG_ENDIAN
   497  #define __BYTE_ORDER__ _BYTE_ORDER
   498  #else
   499  #define __ORDER_LITTLE_ENDIAN__ 1234
   500  #define __ORDER_BIG_ENDIAN__ 4321
   501  
   502  #if defined(__LITTLE_ENDIAN__) ||                                              \
   503      (defined(_LITTLE_ENDIAN) && !defined(_BIG_ENDIAN)) ||                      \
   504      defined(__ARMEL__) || defined(__THUMBEL__) || defined(__AARCH64EL__) ||    \
   505      defined(__MIPSEL__) || defined(_MIPSEL) || defined(__MIPSEL) ||            \
   506      defined(_M_ARM) || defined(_M_ARM64) || defined(__e2k__) ||                \
   507      defined(__elbrus_4c__) || defined(__elbrus_8c__) || defined(__bfin__) ||   \
   508      defined(__BFIN__) || defined(__ia64__) || defined(_IA64) ||                \
   509      defined(__IA64__) || defined(__ia64) || defined(_M_IA64) ||                \
   510      defined(__itanium__) || defined(__ia32__) || defined(__CYGWIN__) ||        \
   511      defined(_WIN64) || defined(_WIN32) || defined(__TOS_WIN__) ||              \
   512      defined(__WINDOWS__)
   513  #define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__
   514  
   515  #elif defined(__BIG_ENDIAN__) ||                                               \
   516      (defined(_BIG_ENDIAN) && !defined(_LITTLE_ENDIAN)) ||                      \
   517      defined(__ARMEB__) || defined(__THUMBEB__) || defined(__AARCH64EB__) ||    \
   518      defined(__MIPSEB__) || defined(_MIPSEB) || defined(__MIPSEB) ||            \
   519      defined(__m68k__) || defined(M68000) || defined(__hppa__) ||               \
   520      defined(__hppa) || defined(__HPPA__) || defined(__sparc__) ||              \
   521      defined(__sparc) || defined(__370__) || defined(__THW_370__) ||            \
   522      defined(__s390__) || defined(__s390x__) || defined(__SYSC_ZARCH__)
   523  #define __BYTE_ORDER__ __ORDER_BIG_ENDIAN__
   524  
   525  #else
   526  #error __BYTE_ORDER__ should be defined.
   527  #endif /* Arch */
   528  
   529  #endif
   530  #endif /* __BYTE_ORDER__ || __ORDER_LITTLE_ENDIAN__ || __ORDER_BIG_ENDIAN__ */
   531  
   532  /*----------------------------------------------------------------------------*/
   533  /* Availability of CMOV or equivalent */
   534  
   535  #ifndef MDBX_HAVE_CMOV
   536  #if defined(__e2k__)
   537  #define MDBX_HAVE_CMOV 1
   538  #elif defined(__thumb2__) || defined(__thumb2)
   539  #define MDBX_HAVE_CMOV 1
   540  #elif defined(__thumb__) || defined(__thumb) || defined(__TARGET_ARCH_THUMB)
   541  #define MDBX_HAVE_CMOV 0
   542  #elif defined(_M_ARM) || defined(_M_ARM64) || defined(__aarch64__) ||          \
   543      defined(__aarch64) || defined(__arm__) || defined(__arm) ||                \
   544      defined(__CC_ARM)
   545  #define MDBX_HAVE_CMOV 1
   546  #elif (defined(__riscv__) || defined(__riscv64)) &&                            \
   547      (defined(__riscv_b) || defined(__riscv_bitmanip))
   548  #define MDBX_HAVE_CMOV 1
   549  #elif defined(i686) || defined(__i686) || defined(__i686__) ||                 \
   550      (defined(_M_IX86) && _M_IX86 > 600) || defined(__x86_64) ||                \
   551      defined(__x86_64__) || defined(__amd64__) || defined(__amd64) ||           \
   552      defined(_M_X64) || defined(_M_AMD64)
   553  #define MDBX_HAVE_CMOV 1
   554  #else
   555  #define MDBX_HAVE_CMOV 0
   556  #endif
   557  #endif /* MDBX_HAVE_CMOV */
   558  
   559  /*----------------------------------------------------------------------------*/
   560  /* Compiler's includes for builtins/intrinsics */
   561  
   562  #if defined(_MSC_VER) || defined(__INTEL_COMPILER)
   563  #include <intrin.h>
   564  #elif __GNUC_PREREQ(4, 4) || defined(__clang__)
   565  #if defined(__e2k__)
   566  #include <e2kintrin.h>
   567  #include <x86intrin.h>
   568  #endif /* __e2k__ */
   569  #if defined(__ia32__)
   570  #include <cpuid.h>
   571  #include <x86intrin.h>
   572  #endif /* __ia32__ */
   573  #ifdef __ARM_NEON
   574  #include <arm_neon.h>
   575  #endif
   576  #elif defined(__SUNPRO_C) || defined(__sun) || defined(sun)
   577  #include <mbarrier.h>
   578  #elif (defined(_HPUX_SOURCE) || defined(__hpux) || defined(__HP_aCC)) &&       \
   579      (defined(HP_IA64) || defined(__ia64))
   580  #include <machine/sys/inline.h>
   581  #elif defined(__IBMC__) && defined(__powerpc)
   582  #include <atomic.h>
   583  #elif defined(_AIX)
   584  #include <builtins.h>
   585  #include <sys/atomic_op.h>
   586  #elif (defined(__osf__) && defined(__DECC)) || defined(__alpha)
   587  #include <c_asm.h>
   588  #include <machine/builtins.h>
   589  #elif defined(__MWERKS__)
   590  /* CodeWarrior - troubles ? */
   591  #pragma gcc_extensions
   592  #elif defined(__SNC__)
   593  /* Sony PS3 - troubles ? */
   594  #elif defined(__hppa__) || defined(__hppa)
   595  #include <machine/inline.h>
   596  #else
   597  #error Unsupported C compiler, please use GNU C 4.4 or newer
   598  #endif /* Compiler */
   599  
   600  #if !defined(__noop) && !defined(_MSC_VER)
   601  #define __noop                                                                 \
   602    do {                                                                         \
   603    } while (0)
   604  #endif /* __noop */
   605  
   606  #if defined(__fallthrough) &&                                                  \
   607      (defined(__MINGW__) || defined(__MINGW32__) || defined(__MINGW64__))
   608  #undef __fallthrough
   609  #endif /* __fallthrough workaround for MinGW */
   610  
   611  #ifndef __fallthrough
   612  #if defined(__cplusplus) && (__has_cpp_attribute(fallthrough) &&               \
   613                               (!defined(__clang__) || __clang__ > 4)) ||        \
   614      __cplusplus >= 201703L
   615  #define __fallthrough [[fallthrough]]
   616  #elif __GNUC_PREREQ(8, 0) && defined(__cplusplus) && __cplusplus >= 201103L
   617  #define __fallthrough [[fallthrough]]
   618  #elif __GNUC_PREREQ(7, 0) &&                                                   \
   619      (!defined(__LCC__) || (__LCC__ == 124 && __LCC_MINOR__ >= 12) ||           \
   620       (__LCC__ == 125 && __LCC_MINOR__ >= 5) || (__LCC__ >= 126))
   621  #define __fallthrough __attribute__((__fallthrough__))
   622  #elif defined(__clang__) && defined(__cplusplus) && __cplusplus >= 201103L &&  \
   623      __has_feature(cxx_attributes) && __has_warning("-Wimplicit-fallthrough")
   624  #define __fallthrough [[clang::fallthrough]]
   625  #else
   626  #define __fallthrough
   627  #endif
   628  #endif /* __fallthrough */
   629  
   630  #ifndef __unreachable
   631  #if __GNUC_PREREQ(4, 5) || __has_builtin(__builtin_unreachable)
   632  #define __unreachable() __builtin_unreachable()
   633  #elif defined(_MSC_VER)
   634  #define __unreachable() __assume(0)
   635  #else
   636  #define __unreachable()                                                        \
   637    do {                                                                         \
   638    } while (1)
   639  #endif
   640  #endif /* __unreachable */
   641  
   642  #ifndef __prefetch
   643  #if defined(__GNUC__) || defined(__clang__) || __has_builtin(__builtin_prefetch)
   644  #define __prefetch(ptr) __builtin_prefetch(ptr)
   645  #else
   646  #define __prefetch(ptr)                                                        \
   647    do {                                                                         \
   648      (void)(ptr);                                                               \
   649    } while (0)
   650  #endif
   651  #endif /* __prefetch */
   652  
   653  #ifndef offsetof
   654  #define offsetof(type, member) __builtin_offsetof(type, member)
   655  #endif /* offsetof */
   656  
   657  #ifndef container_of
   658  #define container_of(ptr, type, member)                                        \
   659    ((type *)((char *)(ptr)-offsetof(type, member)))
   660  #endif /* container_of */
   661  
   662  /*----------------------------------------------------------------------------*/
   663  
   664  #ifndef __always_inline
   665  #if defined(__GNUC__) || __has_attribute(__always_inline__)
   666  #define __always_inline __inline __attribute__((__always_inline__))
   667  #elif defined(_MSC_VER)
   668  #define __always_inline __forceinline
   669  #else
   670  #define __always_inline
   671  #endif
   672  #endif /* __always_inline */
   673  
   674  #ifndef __noinline
   675  #if defined(__GNUC__) || __has_attribute(__noinline__)
   676  #define __noinline __attribute__((__noinline__))
   677  #elif defined(_MSC_VER)
   678  #define __noinline __declspec(noinline)
   679  #else
   680  #define __noinline
   681  #endif
   682  #endif /* __noinline */
   683  
   684  #ifndef __must_check_result
   685  #if defined(__GNUC__) || __has_attribute(__warn_unused_result__)
   686  #define __must_check_result __attribute__((__warn_unused_result__))
   687  #else
   688  #define __must_check_result
   689  #endif
   690  #endif /* __must_check_result */
   691  
   692  #ifndef __nothrow
   693  #if defined(__cplusplus)
   694  #if __cplusplus < 201703L
   695  #define __nothrow throw()
   696  #else
   697  #define __nothrow noexcept(true)
   698  #endif /* __cplusplus */
   699  #elif defined(__GNUC__) || __has_attribute(__nothrow__)
   700  #define __nothrow __attribute__((__nothrow__))
   701  #elif defined(_MSC_VER) && defined(__cplusplus)
   702  #define __nothrow __declspec(nothrow)
   703  #else
   704  #define __nothrow
   705  #endif
   706  #endif /* __nothrow */
   707  
   708  #ifndef __hidden
   709  #if defined(__GNUC__) || __has_attribute(__visibility__)
   710  #define __hidden __attribute__((__visibility__("hidden")))
   711  #else
   712  #define __hidden
   713  #endif
   714  #endif /* __hidden */
   715  
   716  #ifndef __optimize
   717  #if defined(__OPTIMIZE__)
   718  #if (defined(__GNUC__) && !defined(__clang__)) || __has_attribute(__optimize__)
   719  #define __optimize(ops) __attribute__((__optimize__(ops)))
   720  #else
   721  #define __optimize(ops)
   722  #endif
   723  #else
   724  #define __optimize(ops)
   725  #endif
   726  #endif /* __optimize */
   727  
   728  #ifndef __hot
   729  #if defined(__OPTIMIZE__)
   730  #if defined(__e2k__)
   731  #define __hot __attribute__((__hot__)) __optimize(3)
   732  #elif defined(__clang__) && !__has_attribute(__hot_) &&                        \
   733      __has_attribute(__section__) &&                                            \
   734      (defined(__linux__) || defined(__gnu_linux__))
   735  /* just put frequently used functions in separate section */
   736  #define __hot __attribute__((__section__("text.hot"))) __optimize("O3")
   737  #elif defined(__LCC__)
   738  #define __hot __attribute__((__hot__, __optimize__("Ofast,O4")))
   739  #elif defined(__GNUC__) || __has_attribute(__hot__)
   740  #define __hot __attribute__((__hot__)) __optimize("O3")
   741  #else
   742  #define __hot __optimize("O3")
   743  #endif
   744  #else
   745  #define __hot
   746  #endif
   747  #endif /* __hot */
   748  
   749  #ifndef __cold
   750  #if defined(__OPTIMIZE__)
   751  #if defined(__e2k__)
   752  #define __cold __attribute__((__cold__)) __optimize(1)
   753  #elif defined(__clang__) && !__has_attribute(cold) &&                          \
   754      __has_attribute(__section__) &&                                            \
   755      (defined(__linux__) || defined(__gnu_linux__))
   756  /* just put infrequently used functions in separate section */
   757  #define __cold __attribute__((__section__("text.unlikely"))) __optimize("Os")
   758  #elif defined(__LCC__)
   759  #define __hot __attribute__((__cold__, __optimize__("Osize")))
   760  #elif defined(__GNUC__) || __has_attribute(cold)
   761  #define __cold __attribute__((__cold__)) __optimize("Os")
   762  #else
   763  #define __cold __optimize("Os")
   764  #endif
   765  #else
   766  #define __cold
   767  #endif
   768  #endif /* __cold */
   769  
   770  #ifndef __flatten
   771  #if defined(__OPTIMIZE__) && (defined(__GNUC__) || __has_attribute(__flatten__))
   772  #define __flatten __attribute__((__flatten__))
   773  #else
   774  #define __flatten
   775  #endif
   776  #endif /* __flatten */
   777  
   778  #ifndef likely
   779  #if (defined(__GNUC__) || __has_builtin(__builtin_expect)) &&                  \
   780      !defined(__COVERITY__)
   781  #define likely(cond) __builtin_expect(!!(cond), 1)
   782  #else
   783  #define likely(x) (!!(x))
   784  #endif
   785  #endif /* likely */
   786  
   787  #ifndef unlikely
   788  #if (defined(__GNUC__) || __has_builtin(__builtin_expect)) &&                  \
   789      !defined(__COVERITY__)
   790  #define unlikely(cond) __builtin_expect(!!(cond), 0)
   791  #else
   792  #define unlikely(x) (!!(x))
   793  #endif
   794  #endif /* unlikely */
   795  
   796  #ifndef __anonymous_struct_extension__
   797  #if defined(__GNUC__)
   798  #define __anonymous_struct_extension__ __extension__
   799  #else
   800  #define __anonymous_struct_extension__
   801  #endif
   802  #endif /* __anonymous_struct_extension__ */
   803  
   804  #ifndef expect_with_probability
   805  #if defined(__builtin_expect_with_probability) ||                              \
   806      __has_builtin(__builtin_expect_with_probability) || __GNUC_PREREQ(9, 0)
   807  #define expect_with_probability(expr, value, prob)                             \
   808    __builtin_expect_with_probability(expr, value, prob)
   809  #else
   810  #define expect_with_probability(expr, value, prob) (expr)
   811  #endif
   812  #endif /* expect_with_probability */
   813  
   814  #ifndef MDBX_WEAK_IMPORT_ATTRIBUTE
   815  #ifdef WEAK_IMPORT_ATTRIBUTE
   816  #define MDBX_WEAK_IMPORT_ATTRIBUTE WEAK_IMPORT_ATTRIBUTE
   817  #elif __has_attribute(__weak__) && __has_attribute(__weak_import__)
   818  #define MDBX_WEAK_IMPORT_ATTRIBUTE __attribute__((__weak__, __weak_import__))
   819  #elif __has_attribute(__weak__) ||                                             \
   820      (defined(__GNUC__) && __GNUC__ >= 4 && defined(__ELF__))
   821  #define MDBX_WEAK_IMPORT_ATTRIBUTE __attribute__((__weak__))
   822  #else
   823  #define MDBX_WEAK_IMPORT_ATTRIBUTE
   824  #endif
   825  #endif /* MDBX_WEAK_IMPORT_ATTRIBUTE */
   826  
   827  /*----------------------------------------------------------------------------*/
   828  
   829  #if defined(MDBX_USE_VALGRIND)
   830  #include <valgrind/memcheck.h>
   831  #ifndef VALGRIND_DISABLE_ADDR_ERROR_REPORTING_IN_RANGE
   832  /* LY: available since Valgrind 3.10 */
   833  #define VALGRIND_DISABLE_ADDR_ERROR_REPORTING_IN_RANGE(a, s)
   834  #define VALGRIND_ENABLE_ADDR_ERROR_REPORTING_IN_RANGE(a, s)
   835  #endif
   836  #elif !defined(RUNNING_ON_VALGRIND)
   837  #define VALGRIND_CREATE_MEMPOOL(h, r, z)
   838  #define VALGRIND_DESTROY_MEMPOOL(h)
   839  #define VALGRIND_MEMPOOL_TRIM(h, a, s)
   840  #define VALGRIND_MEMPOOL_ALLOC(h, a, s)
   841  #define VALGRIND_MEMPOOL_FREE(h, a)
   842  #define VALGRIND_MEMPOOL_CHANGE(h, a, b, s)
   843  #define VALGRIND_MAKE_MEM_NOACCESS(a, s)
   844  #define VALGRIND_MAKE_MEM_DEFINED(a, s)
   845  #define VALGRIND_MAKE_MEM_UNDEFINED(a, s)
   846  #define VALGRIND_DISABLE_ADDR_ERROR_REPORTING_IN_RANGE(a, s)
   847  #define VALGRIND_ENABLE_ADDR_ERROR_REPORTING_IN_RANGE(a, s)
   848  #define VALGRIND_CHECK_MEM_IS_ADDRESSABLE(a, s) (0)
   849  #define VALGRIND_CHECK_MEM_IS_DEFINED(a, s) (0)
   850  #define RUNNING_ON_VALGRIND (0)
   851  #endif /* MDBX_USE_VALGRIND */
   852  
   853  #ifdef __SANITIZE_ADDRESS__
   854  #include <sanitizer/asan_interface.h>
   855  #elif !defined(ASAN_POISON_MEMORY_REGION)
   856  #define ASAN_POISON_MEMORY_REGION(addr, size) ((void)(addr), (void)(size))
   857  #define ASAN_UNPOISON_MEMORY_REGION(addr, size) ((void)(addr), (void)(size))
   858  #endif /* __SANITIZE_ADDRESS__ */
   859  
   860  /*----------------------------------------------------------------------------*/
   861  
   862  #ifndef ARRAY_LENGTH
   863  #ifdef __cplusplus
   864  template <typename T, size_t N> char (&__ArraySizeHelper(T (&array)[N]))[N];
   865  #define ARRAY_LENGTH(array) (sizeof(::__ArraySizeHelper(array)))
   866  #else
   867  #define ARRAY_LENGTH(array) (sizeof(array) / sizeof(array[0]))
   868  #endif
   869  #endif /* ARRAY_LENGTH */
   870  
   871  #ifndef ARRAY_END
   872  #define ARRAY_END(array) (&array[ARRAY_LENGTH(array)])
   873  #endif /* ARRAY_END */
   874  
   875  #define CONCAT(a, b) a##b
   876  #define XCONCAT(a, b) CONCAT(a, b)
   877  
   878  #define MDBX_TETRAD(a, b, c, d)                                                \
   879    ((uint32_t)(a) << 24 | (uint32_t)(b) << 16 | (uint32_t)(c) << 8 | (d))
   880  
   881  #define MDBX_STRING_TETRAD(str) MDBX_TETRAD(str[0], str[1], str[2], str[3])
   882  
   883  #define FIXME "FIXME: " __FILE__ ", " MDBX_STRINGIFY(__LINE__)
   884  
   885  #ifndef STATIC_ASSERT_MSG
   886  #if defined(static_assert)
   887  #define STATIC_ASSERT_MSG(expr, msg) static_assert(expr, msg)
   888  #elif defined(_STATIC_ASSERT)
   889  #define STATIC_ASSERT_MSG(expr, msg) _STATIC_ASSERT(expr)
   890  #elif defined(_MSC_VER)
   891  #include <crtdbg.h>
   892  #define STATIC_ASSERT_MSG(expr, msg) _STATIC_ASSERT(expr)
   893  #elif (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) ||            \
   894      __has_feature(c_static_assert)
   895  #define STATIC_ASSERT_MSG(expr, msg) _Static_assert(expr, msg)
   896  #else
   897  #define STATIC_ASSERT_MSG(expr, msg)                                           \
   898    switch (0) {                                                                 \
   899    case 0:                                                                      \
   900    case (expr):;                                                                \
   901    }
   902  #endif
   903  #endif /* STATIC_ASSERT */
   904  
   905  #ifndef STATIC_ASSERT
   906  #define STATIC_ASSERT(expr) STATIC_ASSERT_MSG(expr, #expr)
   907  #endif
   908  
   909  #ifndef __Wpedantic_format_voidptr
   910  MDBX_MAYBE_UNUSED MDBX_PURE_FUNCTION static __inline const void *
   911  __Wpedantic_format_voidptr(const void *ptr) {
   912    return ptr;
   913  }
   914  #define __Wpedantic_format_voidptr(ARG) __Wpedantic_format_voidptr(ARG)
   915  #endif /* __Wpedantic_format_voidptr */
   916  
   917  #if defined(__GNUC__) && !__GNUC_PREREQ(4, 2)
   918  /* Actually libmdbx was not tested with compilers older than GCC 4.2.
   919   * But you could ignore this warning at your own risk.
   920   * In such case please don't rise up an issues related ONLY to old compilers.
   921   */
   922  #warning "libmdbx required GCC >= 4.2"
   923  #endif
   924  
   925  #if defined(__clang__) && !__CLANG_PREREQ(3, 8)
   926  /* Actually libmdbx was not tested with CLANG older than 3.8.
   927   * But you could ignore this warning at your own risk.
   928   * In such case please don't rise up an issues related ONLY to old compilers.
   929   */
   930  #warning "libmdbx required CLANG >= 3.8"
   931  #endif
   932  
   933  #if defined(__GLIBC__) && !__GLIBC_PREREQ(2, 12)
   934  /* Actually libmdbx was not tested with something older than glibc 2.12.
   935   * But you could ignore this warning at your own risk.
   936   * In such case please don't rise up an issues related ONLY to old systems.
   937   */
   938  #warning "libmdbx was only tested with GLIBC >= 2.12."
   939  #endif
   940  
   941  #ifdef __SANITIZE_THREAD__
   942  #warning                                                                       \
   943      "libmdbx don't compatible with ThreadSanitizer, you will get a lot of false-positive issues."
   944  #endif /* __SANITIZE_THREAD__ */
   945  
   946  #if __has_warning("-Wnested-anon-types")
   947  #if defined(__clang__)
   948  #pragma clang diagnostic ignored "-Wnested-anon-types"
   949  #elif defined(__GNUC__)
   950  #pragma GCC diagnostic ignored "-Wnested-anon-types"
   951  #else
   952  #pragma warning disable "nested-anon-types"
   953  #endif
   954  #endif /* -Wnested-anon-types */
   955  
   956  #if __has_warning("-Wconstant-logical-operand")
   957  #if defined(__clang__)
   958  #pragma clang diagnostic ignored "-Wconstant-logical-operand"
   959  #elif defined(__GNUC__)
   960  #pragma GCC diagnostic ignored "-Wconstant-logical-operand"
   961  #else
   962  #pragma warning disable "constant-logical-operand"
   963  #endif
   964  #endif /* -Wconstant-logical-operand */
   965  
   966  #if defined(__LCC__) && (__LCC__ <= 121)
   967  /* bug #2798 */
   968  #pragma diag_suppress alignment_reduction_ignored
   969  #elif defined(__ICC)
   970  #pragma warning(disable : 3453 1366)
   971  #elif __has_warning("-Walignment-reduction-ignored")
   972  #if defined(__clang__)
   973  #pragma clang diagnostic ignored "-Walignment-reduction-ignored"
   974  #elif defined(__GNUC__)
   975  #pragma GCC diagnostic ignored "-Walignment-reduction-ignored"
   976  #else
   977  #pragma warning disable "alignment-reduction-ignored"
   978  #endif
   979  #endif /* -Walignment-reduction-ignored */
   980  
   981  #ifndef MDBX_EXCLUDE_FOR_GPROF
   982  #ifdef ENABLE_GPROF
   983  #define MDBX_EXCLUDE_FOR_GPROF                                                 \
   984    __attribute__((__no_instrument_function__,                                   \
   985                   __no_profile_instrument_function__))
   986  #else
   987  #define MDBX_EXCLUDE_FOR_GPROF
   988  #endif /* ENABLE_GPROF */
   989  #endif /* MDBX_EXCLUDE_FOR_GPROF */
   990  
   991  #ifdef __cplusplus
   992  extern "C" {
   993  #endif
   994  
   995  /* https://en.wikipedia.org/wiki/Operating_system_abstraction_layer */
   996  
   997  /*
   998   * Copyright 2015-2022 Leonid Yuriev <leo@yuriev.ru>
   999   * and other libmdbx authors: please see AUTHORS file.
  1000   * All rights reserved.
  1001   *
  1002   * Redistribution and use in source and binary forms, with or without
  1003   * modification, are permitted only as authorized by the OpenLDAP
  1004   * Public License.
  1005   *
  1006   * A copy of this license is available in the file LICENSE in the
  1007   * top-level directory of the distribution or, alternatively, at
  1008   * <http://www.OpenLDAP.org/license.html>.
  1009   */
  1010  
  1011  
  1012  /*----------------------------------------------------------------------------*/
  1013  /* C11 Atomics */
  1014  
  1015  #if defined(__cplusplus) && !defined(__STDC_NO_ATOMICS__) && __has_include(<cstdatomic>)
  1016  #include <cstdatomic>
  1017  #define MDBX_HAVE_C11ATOMICS
  1018  #elif !defined(__cplusplus) &&                                                 \
  1019      (__STDC_VERSION__ >= 201112L || __has_extension(c_atomic)) &&              \
  1020      !defined(__STDC_NO_ATOMICS__) &&                                           \
  1021      (__GNUC_PREREQ(4, 9) || __CLANG_PREREQ(3, 8) ||                            \
  1022       !(defined(__GNUC__) || defined(__clang__)))
  1023  #include <stdatomic.h>
  1024  #define MDBX_HAVE_C11ATOMICS
  1025  #elif defined(__GNUC__) || defined(__clang__)
  1026  #elif defined(_MSC_VER)
  1027  #pragma warning(disable : 4163) /* 'xyz': not available as an intrinsic */
  1028  #pragma warning(disable : 4133) /* 'function': incompatible types - from       \
  1029                                     'size_t' to 'LONGLONG' */
  1030  #pragma warning(disable : 4244) /* 'return': conversion from 'LONGLONG' to     \
  1031                                     'std::size_t', possible loss of data */
  1032  #pragma warning(disable : 4267) /* 'function': conversion from 'size_t' to     \
  1033                                     'long', possible loss of data */
  1034  #pragma intrinsic(_InterlockedExchangeAdd, _InterlockedCompareExchange)
  1035  #pragma intrinsic(_InterlockedExchangeAdd64, _InterlockedCompareExchange64)
  1036  #elif defined(__APPLE__)
  1037  #include <libkern/OSAtomic.h>
  1038  #else
  1039  #error FIXME atomic-ops
  1040  #endif
  1041  
  1042  /*----------------------------------------------------------------------------*/
  1043  /* Memory/Compiler barriers, cache coherence */
  1044  
  1045  #if __has_include(<sys/cachectl.h>)
  1046  #include <sys/cachectl.h>
  1047  #elif defined(__mips) || defined(__mips__) || defined(__mips64) ||             \
  1048      defined(__mips64__) || defined(_M_MRX000) || defined(_MIPS_) ||            \
  1049      defined(__MWERKS__) || defined(__sgi)
  1050  /* MIPS should have explicit cache control */
  1051  #include <sys/cachectl.h>
  1052  #endif
  1053  
  1054  MDBX_MAYBE_UNUSED static __inline void osal_compiler_barrier(void) {
  1055  #if defined(__clang__) || defined(__GNUC__)
  1056    __asm__ __volatile__("" ::: "memory");
  1057  #elif defined(_MSC_VER)
  1058    _ReadWriteBarrier();
  1059  #elif defined(__INTEL_COMPILER) /* LY: Intel Compiler may mimic GCC and MSC */
  1060    __memory_barrier();
  1061  #elif defined(__SUNPRO_C) || defined(__sun) || defined(sun)
  1062    __compiler_barrier();
  1063  #elif (defined(_HPUX_SOURCE) || defined(__hpux) || defined(__HP_aCC)) &&       \
  1064      (defined(HP_IA64) || defined(__ia64))
  1065    _Asm_sched_fence(/* LY: no-arg meaning 'all expect ALU', e.g. 0x3D3D */);
  1066  #elif defined(_AIX) || defined(__ppc__) || defined(__powerpc__) ||             \
  1067      defined(__ppc64__) || defined(__powerpc64__)
  1068    __fence();
  1069  #else
  1070  #error "Could not guess the kind of compiler, please report to us."
  1071  #endif
  1072  }
  1073  
  1074  MDBX_MAYBE_UNUSED static __inline void osal_memory_barrier(void) {
  1075  #ifdef MDBX_HAVE_C11ATOMICS
  1076    atomic_thread_fence(memory_order_seq_cst);
  1077  #elif defined(__ATOMIC_SEQ_CST)
  1078  #ifdef __clang__
  1079    __c11_atomic_thread_fence(__ATOMIC_SEQ_CST);
  1080  #else
  1081    __atomic_thread_fence(__ATOMIC_SEQ_CST);
  1082  #endif
  1083  #elif defined(__clang__) || defined(__GNUC__)
  1084    __sync_synchronize();
  1085  #elif defined(_WIN32) || defined(_WIN64)
  1086    MemoryBarrier();
  1087  #elif defined(__INTEL_COMPILER) /* LY: Intel Compiler may mimic GCC and MSC */
  1088  #if defined(__ia32__)
  1089    _mm_mfence();
  1090  #else
  1091    __mf();
  1092  #endif
  1093  #elif defined(__SUNPRO_C) || defined(__sun) || defined(sun)
  1094    __machine_rw_barrier();
  1095  #elif (defined(_HPUX_SOURCE) || defined(__hpux) || defined(__HP_aCC)) &&       \
  1096      (defined(HP_IA64) || defined(__ia64))
  1097    _Asm_mf();
  1098  #elif defined(_AIX) || defined(__ppc__) || defined(__powerpc__) ||             \
  1099      defined(__ppc64__) || defined(__powerpc64__)
  1100    __lwsync();
  1101  #else
  1102  #error "Could not guess the kind of compiler, please report to us."
  1103  #endif
  1104  }
  1105  
  1106  /*----------------------------------------------------------------------------*/
  1107  /* system-depended definitions */
  1108  
  1109  #if defined(_WIN32) || defined(_WIN64)
  1110  #define HAVE_SYS_STAT_H
  1111  #define HAVE_SYS_TYPES_H
  1112  typedef HANDLE osal_thread_t;
  1113  typedef unsigned osal_thread_key_t;
  1114  #define MAP_FAILED NULL
  1115  #define HIGH_DWORD(v) ((DWORD)((sizeof(v) > 4) ? ((uint64_t)(v) >> 32) : 0))
  1116  #define THREAD_CALL WINAPI
  1117  #define THREAD_RESULT DWORD
  1118  typedef struct {
  1119    HANDLE mutex;
  1120    HANDLE event[2];
  1121  } osal_condpair_t;
  1122  typedef CRITICAL_SECTION osal_fastmutex_t;
  1123  
  1124  #if !defined(_MSC_VER) && !defined(__try)
  1125  #define __try
  1126  #define __except(COND) if (false)
  1127  #endif /* stub for MSVC's __try/__except */
  1128  
  1129  #if MDBX_WITHOUT_MSVC_CRT
  1130  
  1131  #ifndef osal_malloc
  1132  static inline void *osal_malloc(size_t bytes) {
  1133    return HeapAlloc(GetProcessHeap(), 0, bytes);
  1134  }
  1135  #endif /* osal_malloc */
  1136  
  1137  #ifndef osal_calloc
  1138  static inline void *osal_calloc(size_t nelem, size_t size) {
  1139    return HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, nelem * size);
  1140  }
  1141  #endif /* osal_calloc */
  1142  
  1143  #ifndef osal_realloc
  1144  static inline void *osal_realloc(void *ptr, size_t bytes) {
  1145    return ptr ? HeapReAlloc(GetProcessHeap(), 0, ptr, bytes)
  1146               : HeapAlloc(GetProcessHeap(), 0, bytes);
  1147  }
  1148  #endif /* osal_realloc */
  1149  
  1150  #ifndef osal_free
  1151  static inline void osal_free(void *ptr) { HeapFree(GetProcessHeap(), 0, ptr); }
  1152  #endif /* osal_free */
  1153  
  1154  #else /* MDBX_WITHOUT_MSVC_CRT */
  1155  
  1156  #define osal_malloc malloc
  1157  #define osal_calloc calloc
  1158  #define osal_realloc realloc
  1159  #define osal_free free
  1160  #define osal_strdup _strdup
  1161  
  1162  #endif /* MDBX_WITHOUT_MSVC_CRT */
  1163  
  1164  #ifndef snprintf
  1165  #define snprintf _snprintf /* ntdll */
  1166  #endif
  1167  
  1168  #ifndef vsnprintf
  1169  #define vsnprintf _vsnprintf /* ntdll */
  1170  #endif
  1171  
  1172  MDBX_INTERNAL_FUNC size_t osal_mb2w(wchar_t *dst, size_t dst_n, const char *src,
  1173                                      size_t src_n);
  1174  
  1175  #else /*----------------------------------------------------------------------*/
  1176  
  1177  typedef pthread_t osal_thread_t;
  1178  typedef pthread_key_t osal_thread_key_t;
  1179  #define INVALID_HANDLE_VALUE (-1)
  1180  #define THREAD_CALL
  1181  #define THREAD_RESULT void *
  1182  typedef struct {
  1183    pthread_mutex_t mutex;
  1184    pthread_cond_t cond[2];
  1185  } osal_condpair_t;
  1186  typedef pthread_mutex_t osal_fastmutex_t;
  1187  #define osal_malloc malloc
  1188  #define osal_calloc calloc
  1189  #define osal_realloc realloc
  1190  #define osal_free free
  1191  #define osal_strdup strdup
  1192  #endif /* Platform */
  1193  
  1194  #if __GLIBC_PREREQ(2, 12) || defined(__FreeBSD__) || defined(malloc_usable_size)
  1195  /* malloc_usable_size() already provided */
  1196  #elif defined(__APPLE__)
  1197  #define malloc_usable_size(ptr) malloc_size(ptr)
  1198  #elif defined(_MSC_VER) && !MDBX_WITHOUT_MSVC_CRT
  1199  #define malloc_usable_size(ptr) _msize(ptr)
  1200  #endif /* malloc_usable_size */
  1201  
  1202  /*----------------------------------------------------------------------------*/
  1203  /* OS abstraction layer stuff */
  1204  
  1205  /* Get the size of a memory page for the system.
  1206   * This is the basic size that the platform's memory manager uses, and is
  1207   * fundamental to the use of memory-mapped files. */
  1208  MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static __inline size_t
  1209  osal_syspagesize(void) {
  1210  #if defined(_WIN32) || defined(_WIN64)
  1211    SYSTEM_INFO si;
  1212    GetSystemInfo(&si);
  1213    return si.dwPageSize;
  1214  #else
  1215    return sysconf(_SC_PAGE_SIZE);
  1216  #endif
  1217  }
  1218  
  1219  #if defined(_WIN32) || defined(_WIN64)
  1220  typedef wchar_t pathchar_t;
  1221  #else
  1222  typedef char pathchar_t;
  1223  #endif
  1224  
  1225  typedef struct osal_mmap_param {
  1226    union {
  1227      void *address;
  1228      uint8_t *dxb;
  1229      struct MDBX_lockinfo *lck;
  1230    };
  1231    mdbx_filehandle_t fd;
  1232    size_t limit;   /* mapping length, but NOT a size of file nor DB */
  1233    size_t current; /* mapped region size, i.e. the size of file and DB */
  1234    uint64_t filesize /* in-process cache of a file size */;
  1235  #if defined(_WIN32) || defined(_WIN64)
  1236    HANDLE section; /* memory-mapped section handle */
  1237  #endif
  1238  } osal_mmap_t;
  1239  
  1240  typedef union bin128 {
  1241    __anonymous_struct_extension__ struct { uint64_t x, y; };
  1242    __anonymous_struct_extension__ struct { uint32_t a, b, c, d; };
  1243  } bin128_t;
  1244  
  1245  #if defined(_WIN32) || defined(_WIN64)
  1246  typedef union osal_srwlock {
  1247    __anonymous_struct_extension__ struct {
  1248      long volatile readerCount;
  1249      long volatile writerCount;
  1250    };
  1251    RTL_SRWLOCK native;
  1252  } osal_srwlock_t;
  1253  #endif /* Windows */
  1254  
  1255  #ifndef __cplusplus
  1256  
  1257  /*----------------------------------------------------------------------------*/
  1258  /* libc compatibility stuff */
  1259  
  1260  #if (!defined(__GLIBC__) && __GLIBC_PREREQ(2, 1)) &&                           \
  1261      (defined(_GNU_SOURCE) || defined(_BSD_SOURCE))
  1262  #define osal_asprintf asprintf
  1263  #define osal_vasprintf vasprintf
  1264  #else
  1265  MDBX_MAYBE_UNUSED MDBX_INTERNAL_FUNC
  1266      MDBX_PRINTF_ARGS(2, 3) int osal_asprintf(char **strp, const char *fmt, ...);
  1267  MDBX_INTERNAL_FUNC int osal_vasprintf(char **strp, const char *fmt, va_list ap);
  1268  #endif
  1269  
  1270  #if !defined(MADV_DODUMP) && defined(MADV_CORE)
  1271  #define MADV_DODUMP MADV_CORE
  1272  #endif /* MADV_CORE -> MADV_DODUMP */
  1273  
  1274  #if !defined(MADV_DONTDUMP) && defined(MADV_NOCORE)
  1275  #define MADV_DONTDUMP MADV_NOCORE
  1276  #endif /* MADV_NOCORE -> MADV_DONTDUMP */
  1277  
  1278  MDBX_MAYBE_UNUSED MDBX_INTERNAL_FUNC void osal_jitter(bool tiny);
  1279  MDBX_MAYBE_UNUSED static __inline void jitter4testing(bool tiny);
  1280  
  1281  /* max bytes to write in one call */
  1282  #if defined(_WIN32) || defined(_WIN64)
  1283  #define MAX_WRITE UINT32_C(0x01000000)
  1284  #else
  1285  #define MAX_WRITE UINT32_C(0x3fff0000)
  1286  #endif
  1287  
  1288  #if defined(__linux__) || defined(__gnu_linux__)
  1289  MDBX_INTERNAL_VAR uint32_t linux_kernel_version;
  1290  MDBX_INTERNAL_VAR bool mdbx_RunningOnWSL1 /* Windows Subsystem 1 for Linux */;
  1291  #endif /* Linux */
  1292  
  1293  #ifndef osal_strdup
  1294  LIBMDBX_API char *osal_strdup(const char *str);
  1295  #endif
  1296  
  1297  MDBX_MAYBE_UNUSED static __inline int osal_get_errno(void) {
  1298  #if defined(_WIN32) || defined(_WIN64)
  1299    DWORD rc = GetLastError();
  1300  #else
  1301    int rc = errno;
  1302  #endif
  1303    return rc;
  1304  }
  1305  
  1306  #ifndef osal_memalign_alloc
  1307  MDBX_INTERNAL_FUNC int osal_memalign_alloc(size_t alignment, size_t bytes,
  1308                                             void **result);
  1309  #endif
  1310  #ifndef osal_memalign_free
  1311  MDBX_INTERNAL_FUNC void osal_memalign_free(void *ptr);
  1312  #endif
  1313  
  1314  MDBX_INTERNAL_FUNC int osal_condpair_init(osal_condpair_t *condpair);
  1315  MDBX_INTERNAL_FUNC int osal_condpair_lock(osal_condpair_t *condpair);
  1316  MDBX_INTERNAL_FUNC int osal_condpair_unlock(osal_condpair_t *condpair);
  1317  MDBX_INTERNAL_FUNC int osal_condpair_signal(osal_condpair_t *condpair,
  1318                                              bool part);
  1319  MDBX_INTERNAL_FUNC int osal_condpair_wait(osal_condpair_t *condpair, bool part);
  1320  MDBX_INTERNAL_FUNC int osal_condpair_destroy(osal_condpair_t *condpair);
  1321  
  1322  MDBX_INTERNAL_FUNC int osal_fastmutex_init(osal_fastmutex_t *fastmutex);
  1323  MDBX_INTERNAL_FUNC int osal_fastmutex_acquire(osal_fastmutex_t *fastmutex);
  1324  MDBX_INTERNAL_FUNC int osal_fastmutex_release(osal_fastmutex_t *fastmutex);
  1325  MDBX_INTERNAL_FUNC int osal_fastmutex_destroy(osal_fastmutex_t *fastmutex);
  1326  
  1327  MDBX_INTERNAL_FUNC int osal_pwritev(mdbx_filehandle_t fd, struct iovec *iov,
  1328                                      int iovcnt, uint64_t offset,
  1329                                      size_t expected_written);
  1330  MDBX_INTERNAL_FUNC int osal_pread(mdbx_filehandle_t fd, void *buf, size_t count,
  1331                                    uint64_t offset);
  1332  MDBX_INTERNAL_FUNC int osal_pwrite(mdbx_filehandle_t fd, const void *buf,
  1333                                     size_t count, uint64_t offset);
  1334  MDBX_INTERNAL_FUNC int osal_write(mdbx_filehandle_t fd, const void *buf,
  1335                                    size_t count);
  1336  
  1337  MDBX_INTERNAL_FUNC int
  1338  osal_thread_create(osal_thread_t *thread,
  1339                     THREAD_RESULT(THREAD_CALL *start_routine)(void *),
  1340                     void *arg);
  1341  MDBX_INTERNAL_FUNC int osal_thread_join(osal_thread_t thread);
  1342  
  1343  enum osal_syncmode_bits {
  1344    MDBX_SYNC_NONE = 0,
  1345    MDBX_SYNC_DATA = 1,
  1346    MDBX_SYNC_SIZE = 2,
  1347    MDBX_SYNC_IODQ = 4
  1348  };
  1349  
  1350  MDBX_INTERNAL_FUNC int osal_fsync(mdbx_filehandle_t fd,
  1351                                    const enum osal_syncmode_bits mode_bits);
  1352  MDBX_INTERNAL_FUNC int osal_ftruncate(mdbx_filehandle_t fd, uint64_t length);
  1353  MDBX_INTERNAL_FUNC int osal_fseek(mdbx_filehandle_t fd, uint64_t pos);
  1354  MDBX_INTERNAL_FUNC int osal_filesize(mdbx_filehandle_t fd, uint64_t *length);
  1355  
  1356  enum osal_openfile_purpose {
  1357    MDBX_OPEN_DXB_READ = 0,
  1358    MDBX_OPEN_DXB_LAZY = 1,
  1359    MDBX_OPEN_DXB_DSYNC = 2,
  1360    MDBX_OPEN_LCK = 3,
  1361    MDBX_OPEN_COPY = 4,
  1362    MDBX_OPEN_DELETE = 5
  1363  };
  1364  
  1365  MDBX_INTERNAL_FUNC int osal_openfile(const enum osal_openfile_purpose purpose,
  1366                                       const MDBX_env *env,
  1367                                       const pathchar_t *pathname,
  1368                                       mdbx_filehandle_t *fd,
  1369                                       mdbx_mode_t unix_mode_bits);
  1370  MDBX_INTERNAL_FUNC int osal_closefile(mdbx_filehandle_t fd);
  1371  MDBX_INTERNAL_FUNC int osal_removefile(const pathchar_t *pathname);
  1372  MDBX_INTERNAL_FUNC int osal_removedirectory(const pathchar_t *pathname);
  1373  MDBX_INTERNAL_FUNC int osal_is_pipe(mdbx_filehandle_t fd);
  1374  MDBX_INTERNAL_FUNC int osal_lockfile(mdbx_filehandle_t fd, bool wait);
  1375  
  1376  #define MMAP_OPTION_TRUNCATE 1
  1377  #define MMAP_OPTION_SEMAPHORE 2
  1378  MDBX_INTERNAL_FUNC int osal_mmap(const int flags, osal_mmap_t *map,
  1379                                   const size_t must, const size_t limit,
  1380                                   const unsigned options);
  1381  MDBX_INTERNAL_FUNC int osal_munmap(osal_mmap_t *map);
  1382  #define MDBX_MRESIZE_MAY_MOVE 0x00000100
  1383  #define MDBX_MRESIZE_MAY_UNMAP 0x00000200
  1384  MDBX_INTERNAL_FUNC int osal_mresize(const int flags, osal_mmap_t *map,
  1385                                      size_t size, size_t limit);
  1386  #if defined(_WIN32) || defined(_WIN64)
  1387  typedef struct {
  1388    unsigned limit, count;
  1389    HANDLE handles[31];
  1390  } mdbx_handle_array_t;
  1391  MDBX_INTERNAL_FUNC int
  1392  osal_suspend_threads_before_remap(MDBX_env *env, mdbx_handle_array_t **array);
  1393  MDBX_INTERNAL_FUNC int
  1394  osal_resume_threads_after_remap(mdbx_handle_array_t *array);
  1395  #endif /* Windows */
  1396  MDBX_INTERNAL_FUNC int osal_msync(osal_mmap_t *map, size_t offset,
  1397                                    size_t length,
  1398                                    enum osal_syncmode_bits mode_bits);
  1399  MDBX_INTERNAL_FUNC int osal_check_fs_rdonly(mdbx_filehandle_t handle,
  1400                                              const pathchar_t *pathname,
  1401                                              int err);
  1402  
  1403  MDBX_MAYBE_UNUSED static __inline uint32_t osal_getpid(void) {
  1404    STATIC_ASSERT(sizeof(mdbx_pid_t) <= sizeof(uint32_t));
  1405  #if defined(_WIN32) || defined(_WIN64)
  1406    return GetCurrentProcessId();
  1407  #else
  1408    STATIC_ASSERT(sizeof(pid_t) <= sizeof(uint32_t));
  1409    return getpid();
  1410  #endif
  1411  }
  1412  
  1413  MDBX_MAYBE_UNUSED static __inline uintptr_t osal_thread_self(void) {
  1414    mdbx_tid_t thunk;
  1415    STATIC_ASSERT(sizeof(uintptr_t) >= sizeof(thunk));
  1416  #if defined(_WIN32) || defined(_WIN64)
  1417    thunk = GetCurrentThreadId();
  1418  #else
  1419    thunk = pthread_self();
  1420  #endif
  1421    return (uintptr_t)thunk;
  1422  }
  1423  
  1424  #if !defined(_WIN32) && !defined(_WIN64)
  1425  #if defined(__ANDROID_API__) || defined(ANDROID) || defined(BIONIC)
  1426  MDBX_INTERNAL_FUNC int osal_check_tid4bionic(void);
  1427  #else
  1428  static __inline int osal_check_tid4bionic(void) { return 0; }
  1429  #endif /* __ANDROID_API__ || ANDROID) || BIONIC */
  1430  
  1431  MDBX_MAYBE_UNUSED static __inline int
  1432  osal_pthread_mutex_lock(pthread_mutex_t *mutex) {
  1433    int err = osal_check_tid4bionic();
  1434    return unlikely(err) ? err : pthread_mutex_lock(mutex);
  1435  }
  1436  #endif /* !Windows */
  1437  
  1438  MDBX_INTERNAL_FUNC uint64_t osal_monotime(void);
  1439  MDBX_INTERNAL_FUNC uint64_t osal_16dot16_to_monotime(uint32_t seconds_16dot16);
  1440  MDBX_INTERNAL_FUNC uint32_t osal_monotime_to_16dot16(uint64_t monotime);
  1441  
  1442  MDBX_INTERNAL_FUNC bin128_t osal_bootid(void);
  1443  /*----------------------------------------------------------------------------*/
  1444  /* lck stuff */
  1445  
  1446  /// \brief Initialization of synchronization primitives linked with MDBX_env
  1447  ///   instance both in LCK-file and within the current process.
  1448  /// \param
  1449  ///   global_uniqueness_flag = true - denotes that there are no other processes
  1450  ///     working with DB and LCK-file. Thus the function MUST initialize
  1451  ///     shared synchronization objects in memory-mapped LCK-file.
  1452  ///   global_uniqueness_flag = false - denotes that at least one process is
  1453  ///     already working with DB and LCK-file, including the case when DB
  1454  ///     has already been opened in the current process. Thus the function
  1455  ///     MUST NOT initialize shared synchronization objects in memory-mapped
  1456  ///     LCK-file that are already in use.
  1457  /// \return Error code or zero on success.
  1458  MDBX_INTERNAL_FUNC int osal_lck_init(MDBX_env *env,
  1459                                       MDBX_env *inprocess_neighbor,
  1460                                       int global_uniqueness_flag);
  1461  
  1462  /// \brief Disconnects from shared interprocess objects and destructs
  1463  ///   synchronization objects linked with MDBX_env instance
  1464  ///   within the current process.
  1465  /// \param
  1466  ///   inprocess_neighbor = NULL - if the current process does not have other
  1467  ///     instances of MDBX_env linked with the DB being closed.
  1468  ///     Thus the function MUST check for other processes working with DB or
  1469  ///     LCK-file, and keep or destroy shared synchronization objects in
  1470  ///     memory-mapped LCK-file depending on the result.
  1471  ///   inprocess_neighbor = not-NULL - pointer to another instance of MDBX_env
  1472  ///     (anyone of there is several) working with DB or LCK-file within the
  1473  ///     current process. Thus the function MUST NOT try to acquire exclusive
  1474  ///     lock and/or try to destruct shared synchronization objects linked with
  1475  ///     DB or LCK-file. Moreover, the implementation MUST ensure correct work
  1476  ///     of other instances of MDBX_env within the current process, e.g.
  1477  ///     restore POSIX-fcntl locks after the closing of file descriptors.
  1478  /// \return Error code (MDBX_PANIC) or zero on success.
  1479  MDBX_INTERNAL_FUNC int osal_lck_destroy(MDBX_env *env,
  1480                                          MDBX_env *inprocess_neighbor);
  1481  
  1482  /// \brief Connects to shared interprocess locking objects and tries to acquire
  1483  ///   the maximum lock level (shared if exclusive is not available)
  1484  ///   Depending on implementation or/and platform (Windows) this function may
  1485  ///   acquire the non-OS super-level lock (e.g. for shared synchronization
  1486  ///   objects initialization), which will be downgraded to OS-exclusive or
  1487  ///   shared via explicit calling of osal_lck_downgrade().
  1488  /// \return
  1489  ///   MDBX_RESULT_TRUE (-1) - if an exclusive lock was acquired and thus
  1490  ///     the current process is the first and only after the last use of DB.
  1491  ///   MDBX_RESULT_FALSE (0) - if a shared lock was acquired and thus
  1492  ///     DB has already been opened and now is used by other processes.
  1493  ///   Otherwise (not 0 and not -1) - error code.
  1494  MDBX_INTERNAL_FUNC int osal_lck_seize(MDBX_env *env);
  1495  
  1496  /// \brief Downgrades the level of initially acquired lock to
  1497  ///   operational level specified by argument. The reson for such downgrade:
  1498  ///    - unblocking of other processes that are waiting for access, i.e.
  1499  ///      if (env->me_flags & MDBX_EXCLUSIVE) != 0, then other processes
  1500  ///      should be made aware that access is unavailable rather than
  1501  ///      wait for it.
  1502  ///    - freeing locks that interfere file operation (especially for Windows)
  1503  ///   (env->me_flags & MDBX_EXCLUSIVE) == 0 - downgrade to shared lock.
  1504  ///   (env->me_flags & MDBX_EXCLUSIVE) != 0 - downgrade to exclusive
  1505  ///   operational lock.
  1506  /// \return Error code or zero on success
  1507  MDBX_INTERNAL_FUNC int osal_lck_downgrade(MDBX_env *env);
  1508  
  1509  /// \brief Locks LCK-file or/and table of readers for (de)registering.
  1510  /// \return Error code or zero on success
  1511  MDBX_INTERNAL_FUNC int osal_rdt_lock(MDBX_env *env);
  1512  
  1513  /// \brief Unlocks LCK-file or/and table of readers after (de)registering.
  1514  MDBX_INTERNAL_FUNC void osal_rdt_unlock(MDBX_env *env);
  1515  
  1516  /// \brief Acquires lock for DB change (on writing transaction start)
  1517  ///   Reading transactions will not be blocked.
  1518  ///   Declared as LIBMDBX_API because it is used in mdbx_chk.
  1519  /// \return Error code or zero on success
  1520  LIBMDBX_API int mdbx_txn_lock(MDBX_env *env, bool dont_wait);
  1521  
  1522  /// \brief Releases lock once DB changes is made (after writing transaction
  1523  ///   has finished).
  1524  ///   Declared as LIBMDBX_API because it is used in mdbx_chk.
  1525  LIBMDBX_API void mdbx_txn_unlock(MDBX_env *env);
  1526  
  1527  /// \brief Sets alive-flag of reader presence (indicative lock) for PID of
  1528  ///   the current process. The function does no more than needed for
  1529  ///   the correct working of osal_rpid_check() in other processes.
  1530  /// \return Error code or zero on success
  1531  MDBX_INTERNAL_FUNC int osal_rpid_set(MDBX_env *env);
  1532  
  1533  /// \brief Resets alive-flag of reader presence (indicative lock)
  1534  ///   for PID of the current process. The function does no more than needed
  1535  ///   for the correct working of osal_rpid_check() in other processes.
  1536  /// \return Error code or zero on success
  1537  MDBX_INTERNAL_FUNC int osal_rpid_clear(MDBX_env *env);
  1538  
  1539  /// \brief Checks for reading process status with the given pid with help of
  1540  ///   alive-flag of presence (indicative lock) or using another way.
  1541  /// \return
  1542  ///   MDBX_RESULT_TRUE (-1) - if the reader process with the given PID is alive
  1543  ///     and working with DB (indicative lock is present).
  1544  ///   MDBX_RESULT_FALSE (0) - if the reader process with the given PID is absent
  1545  ///     or not working with DB (indicative lock is not present).
  1546  ///   Otherwise (not 0 and not -1) - error code.
  1547  MDBX_INTERNAL_FUNC int osal_rpid_check(MDBX_env *env, uint32_t pid);
  1548  
  1549  #if defined(_WIN32) || defined(_WIN64)
  1550  
  1551  #define OSAL_MB2WIDE(FROM, TO)                                                 \
  1552    do {                                                                         \
  1553      const char *const from_tmp = (FROM);                                       \
  1554      const size_t from_mblen = strlen(from_tmp);                                \
  1555      const size_t to_wlen = osal_mb2w(nullptr, 0, from_tmp, from_mblen);        \
  1556      if (to_wlen < 1 || to_wlen > /* MAX_PATH */ INT16_MAX)                     \
  1557        return ERROR_INVALID_NAME;                                               \
  1558      wchar_t *const to_tmp = _alloca((to_wlen + 1) * sizeof(wchar_t));          \
  1559      if (to_wlen + 1 !=                                                         \
  1560          osal_mb2w(to_tmp, to_wlen + 1, from_tmp, from_mblen + 1))              \
  1561        return ERROR_INVALID_NAME;                                               \
  1562      (TO) = to_tmp;                                                             \
  1563    } while (0)
  1564  
  1565  typedef void(WINAPI *osal_srwlock_t_function)(osal_srwlock_t *);
  1566  MDBX_INTERNAL_VAR osal_srwlock_t_function osal_srwlock_Init,
  1567      osal_srwlock_AcquireShared, osal_srwlock_ReleaseShared,
  1568      osal_srwlock_AcquireExclusive, osal_srwlock_ReleaseExclusive;
  1569  
  1570  #if _WIN32_WINNT < 0x0600 /* prior to Windows Vista */
  1571  typedef enum _FILE_INFO_BY_HANDLE_CLASS {
  1572    FileBasicInfo,
  1573    FileStandardInfo,
  1574    FileNameInfo,
  1575    FileRenameInfo,
  1576    FileDispositionInfo,
  1577    FileAllocationInfo,
  1578    FileEndOfFileInfo,
  1579    FileStreamInfo,
  1580    FileCompressionInfo,
  1581    FileAttributeTagInfo,
  1582    FileIdBothDirectoryInfo,
  1583    FileIdBothDirectoryRestartInfo,
  1584    FileIoPriorityHintInfo,
  1585    FileRemoteProtocolInfo,
  1586    MaximumFileInfoByHandleClass
  1587  } FILE_INFO_BY_HANDLE_CLASS,
  1588      *PFILE_INFO_BY_HANDLE_CLASS;
  1589  
  1590  typedef struct _FILE_END_OF_FILE_INFO {
  1591    LARGE_INTEGER EndOfFile;
  1592  } FILE_END_OF_FILE_INFO, *PFILE_END_OF_FILE_INFO;
  1593  
  1594  #define REMOTE_PROTOCOL_INFO_FLAG_LOOPBACK 0x00000001
  1595  #define REMOTE_PROTOCOL_INFO_FLAG_OFFLINE 0x00000002
  1596  
  1597  typedef struct _FILE_REMOTE_PROTOCOL_INFO {
  1598    USHORT StructureVersion;
  1599    USHORT StructureSize;
  1600    DWORD Protocol;
  1601    USHORT ProtocolMajorVersion;
  1602    USHORT ProtocolMinorVersion;
  1603    USHORT ProtocolRevision;
  1604    USHORT Reserved;
  1605    DWORD Flags;
  1606    struct {
  1607      DWORD Reserved[8];
  1608    } GenericReserved;
  1609    struct {
  1610      DWORD Reserved[16];
  1611    } ProtocolSpecificReserved;
  1612  } FILE_REMOTE_PROTOCOL_INFO, *PFILE_REMOTE_PROTOCOL_INFO;
  1613  
  1614  #endif /* _WIN32_WINNT < 0x0600 (prior to Windows Vista) */
  1615  
  1616  typedef BOOL(WINAPI *MDBX_GetFileInformationByHandleEx)(
  1617      _In_ HANDLE hFile, _In_ FILE_INFO_BY_HANDLE_CLASS FileInformationClass,
  1618      _Out_ LPVOID lpFileInformation, _In_ DWORD dwBufferSize);
  1619  MDBX_INTERNAL_VAR MDBX_GetFileInformationByHandleEx
  1620      mdbx_GetFileInformationByHandleEx;
  1621  
  1622  typedef BOOL(WINAPI *MDBX_GetVolumeInformationByHandleW)(
  1623      _In_ HANDLE hFile, _Out_opt_ LPWSTR lpVolumeNameBuffer,
  1624      _In_ DWORD nVolumeNameSize, _Out_opt_ LPDWORD lpVolumeSerialNumber,
  1625      _Out_opt_ LPDWORD lpMaximumComponentLength,
  1626      _Out_opt_ LPDWORD lpFileSystemFlags,
  1627      _Out_opt_ LPWSTR lpFileSystemNameBuffer, _In_ DWORD nFileSystemNameSize);
  1628  MDBX_INTERNAL_VAR MDBX_GetVolumeInformationByHandleW
  1629      mdbx_GetVolumeInformationByHandleW;
  1630  
  1631  typedef DWORD(WINAPI *MDBX_GetFinalPathNameByHandleW)(_In_ HANDLE hFile,
  1632                                                        _Out_ LPWSTR lpszFilePath,
  1633                                                        _In_ DWORD cchFilePath,
  1634                                                        _In_ DWORD dwFlags);
  1635  MDBX_INTERNAL_VAR MDBX_GetFinalPathNameByHandleW mdbx_GetFinalPathNameByHandleW;
  1636  
  1637  typedef BOOL(WINAPI *MDBX_SetFileInformationByHandle)(
  1638      _In_ HANDLE hFile, _In_ FILE_INFO_BY_HANDLE_CLASS FileInformationClass,
  1639      _Out_ LPVOID lpFileInformation, _In_ DWORD dwBufferSize);
  1640  MDBX_INTERNAL_VAR MDBX_SetFileInformationByHandle
  1641      mdbx_SetFileInformationByHandle;
  1642  
  1643  typedef NTSTATUS(NTAPI *MDBX_NtFsControlFile)(
  1644      IN HANDLE FileHandle, IN OUT HANDLE Event,
  1645      IN OUT PVOID /* PIO_APC_ROUTINE */ ApcRoutine, IN OUT PVOID ApcContext,
  1646      OUT PIO_STATUS_BLOCK IoStatusBlock, IN ULONG FsControlCode,
  1647      IN OUT PVOID InputBuffer, IN ULONG InputBufferLength,
  1648      OUT OPTIONAL PVOID OutputBuffer, IN ULONG OutputBufferLength);
  1649  MDBX_INTERNAL_VAR MDBX_NtFsControlFile mdbx_NtFsControlFile;
  1650  
  1651  typedef uint64_t(WINAPI *MDBX_GetTickCount64)(void);
  1652  MDBX_INTERNAL_VAR MDBX_GetTickCount64 mdbx_GetTickCount64;
  1653  
  1654  #if !defined(_WIN32_WINNT_WIN8) || _WIN32_WINNT < _WIN32_WINNT_WIN8
  1655  typedef struct _WIN32_MEMORY_RANGE_ENTRY {
  1656    PVOID VirtualAddress;
  1657    SIZE_T NumberOfBytes;
  1658  } WIN32_MEMORY_RANGE_ENTRY, *PWIN32_MEMORY_RANGE_ENTRY;
  1659  #endif /* Windows 8.x */
  1660  
  1661  typedef BOOL(WINAPI *MDBX_PrefetchVirtualMemory)(
  1662      HANDLE hProcess, ULONG_PTR NumberOfEntries,
  1663      PWIN32_MEMORY_RANGE_ENTRY VirtualAddresses, ULONG Flags);
  1664  MDBX_INTERNAL_VAR MDBX_PrefetchVirtualMemory mdbx_PrefetchVirtualMemory;
  1665  
  1666  typedef enum _SECTION_INHERIT { ViewShare = 1, ViewUnmap = 2 } SECTION_INHERIT;
  1667  
  1668  typedef NTSTATUS(NTAPI *MDBX_NtExtendSection)(IN HANDLE SectionHandle,
  1669                                                IN PLARGE_INTEGER NewSectionSize);
  1670  MDBX_INTERNAL_VAR MDBX_NtExtendSection mdbx_NtExtendSection;
  1671  
  1672  static __inline bool mdbx_RunningUnderWine(void) {
  1673    return !mdbx_NtExtendSection;
  1674  }
  1675  
  1676  typedef LSTATUS(WINAPI *MDBX_RegGetValueA)(HKEY hkey, LPCSTR lpSubKey,
  1677                                             LPCSTR lpValue, DWORD dwFlags,
  1678                                             LPDWORD pdwType, PVOID pvData,
  1679                                             LPDWORD pcbData);
  1680  MDBX_INTERNAL_VAR MDBX_RegGetValueA mdbx_RegGetValueA;
  1681  
  1682  NTSYSAPI ULONG RtlRandomEx(PULONG Seed);
  1683  
  1684  #endif /* Windows */
  1685  
  1686  #endif /* !__cplusplus */
  1687  
  1688  /*----------------------------------------------------------------------------*/
  1689  
  1690  #if defined(_MSC_VER) && _MSC_VER >= 1900
  1691  /* LY: MSVC 2015/2017/2019 has buggy/inconsistent PRIuPTR/PRIxPTR macros
  1692   * for internal format-args checker. */
  1693  #undef PRIuPTR
  1694  #undef PRIiPTR
  1695  #undef PRIdPTR
  1696  #undef PRIxPTR
  1697  #define PRIuPTR "Iu"
  1698  #define PRIiPTR "Ii"
  1699  #define PRIdPTR "Id"
  1700  #define PRIxPTR "Ix"
  1701  #define PRIuSIZE "zu"
  1702  #define PRIiSIZE "zi"
  1703  #define PRIdSIZE "zd"
  1704  #define PRIxSIZE "zx"
  1705  #endif /* fix PRI*PTR for _MSC_VER */
  1706  
  1707  #ifndef PRIuSIZE
  1708  #define PRIuSIZE PRIuPTR
  1709  #define PRIiSIZE PRIiPTR
  1710  #define PRIdSIZE PRIdPTR
  1711  #define PRIxSIZE PRIxPTR
  1712  #endif /* PRI*SIZE macros for MSVC */
  1713  
  1714  #ifdef _MSC_VER
  1715  #pragma warning(pop)
  1716  #endif
  1717  
  1718  #define mdbx_sourcery_anchor XCONCAT(mdbx_sourcery_, MDBX_BUILD_SOURCERY)
  1719  #if defined(xMDBX_TOOLS)
  1720  extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
  1721  #endif
  1722  
  1723  /*******************************************************************************
  1724   *******************************************************************************
  1725   *******************************************************************************
  1726   *
  1727   *
  1728   *         ####   #####    #####     #     ####   #    #   ####
  1729   *        #    #  #    #     #       #    #    #  ##   #  #
  1730   *        #    #  #    #     #       #    #    #  # #  #   ####
  1731   *        #    #  #####      #       #    #    #  #  # #       #
  1732   *        #    #  #          #       #    #    #  #   ##  #    #
  1733   *         ####   #          #       #     ####   #    #   ####
  1734   *
  1735   *
  1736   */
  1737  
  1738  /** \defgroup build_option Build options
  1739   * The libmdbx build options.
  1740   @{ */
  1741  
  1742  /** Using fcntl(F_FULLFSYNC) with 5-10 times slowdown */
  1743  #define MDBX_OSX_WANNA_DURABILITY 0
  1744  /** Using fsync() with chance of data lost on power failure */
  1745  #define MDBX_OSX_WANNA_SPEED 1
  1746  
  1747  #ifndef MDBX_OSX_SPEED_INSTEADOF_DURABILITY
  1748  /** Choices \ref MDBX_OSX_WANNA_DURABILITY or \ref MDBX_OSX_WANNA_SPEED
  1749   * for OSX & iOS */
  1750  #define MDBX_OSX_SPEED_INSTEADOF_DURABILITY MDBX_OSX_WANNA_DURABILITY
  1751  #endif /* MDBX_OSX_SPEED_INSTEADOF_DURABILITY */
  1752  
  1753  /** Controls checking PID against reuse DB environment after the fork() */
  1754  #ifndef MDBX_ENV_CHECKPID
  1755  #if defined(MADV_DONTFORK) || defined(_WIN32) || defined(_WIN64)
  1756  /* PID check could be omitted:
  1757   *  - on Linux when madvise(MADV_DONTFORK) is available, i.e. after the fork()
  1758   *    mapped pages will not be available for child process.
  1759   *  - in Windows where fork() not available. */
  1760  #define MDBX_ENV_CHECKPID 0
  1761  #else
  1762  #define MDBX_ENV_CHECKPID 1
  1763  #endif
  1764  #define MDBX_ENV_CHECKPID_CONFIG "AUTO=" MDBX_STRINGIFY(MDBX_ENV_CHECKPID)
  1765  #else
  1766  #define MDBX_ENV_CHECKPID_CONFIG MDBX_STRINGIFY(MDBX_ENV_CHECKPID)
  1767  #endif /* MDBX_ENV_CHECKPID */
  1768  
  1769  /** Controls checking transaction owner thread against misuse transactions from
  1770   * other threads. */
  1771  #ifndef MDBX_TXN_CHECKOWNER
  1772  #define MDBX_TXN_CHECKOWNER 1
  1773  #define MDBX_TXN_CHECKOWNER_CONFIG "AUTO=" MDBX_STRINGIFY(MDBX_TXN_CHECKOWNER)
  1774  #else
  1775  #define MDBX_TXN_CHECKOWNER_CONFIG MDBX_STRINGIFY(MDBX_TXN_CHECKOWNER)
  1776  #endif /* MDBX_TXN_CHECKOWNER */
  1777  
  1778  /** Does a system have battery-backed Real-Time Clock or just a fake. */
  1779  #ifndef MDBX_TRUST_RTC
  1780  #if defined(__linux__) || defined(__gnu_linux__) || defined(__NetBSD__) ||     \
  1781      defined(__OpenBSD__)
  1782  #define MDBX_TRUST_RTC 0 /* a lot of embedded systems have a fake RTC */
  1783  #else
  1784  #define MDBX_TRUST_RTC 1
  1785  #endif
  1786  #define MDBX_TRUST_RTC_CONFIG "AUTO=" MDBX_STRINGIFY(MDBX_TRUST_RTC)
  1787  #else
  1788  #define MDBX_TRUST_RTC_CONFIG MDBX_STRINGIFY(MDBX_TRUST_RTC)
  1789  #endif /* MDBX_TRUST_RTC */
  1790  
  1791  /** Controls online database auto-compactification during write-transactions. */
  1792  #ifndef MDBX_ENABLE_REFUND
  1793  #define MDBX_ENABLE_REFUND 1
  1794  #elif !(MDBX_ENABLE_REFUND == 0 || MDBX_ENABLE_REFUND == 1)
  1795  #error MDBX_ENABLE_REFUND must be defined as 0 or 1
  1796  #endif /* MDBX_ENABLE_REFUND */
  1797  
  1798  /** Controls gathering statistics for page operations. */
  1799  #ifndef MDBX_ENABLE_PGOP_STAT
  1800  #define MDBX_ENABLE_PGOP_STAT 1
  1801  #elif !(MDBX_ENABLE_PGOP_STAT == 0 || MDBX_ENABLE_PGOP_STAT == 1)
  1802  #error MDBX_ENABLE_PGOP_STAT must be defined as 0 or 1
  1803  #endif /* MDBX_ENABLE_PGOP_STAT */
  1804  
  1805  /** Enables chunking long list of retired pages during huge transactions commit
  1806   * to avoid use sequences of pages. */
  1807  #ifndef MDBX_ENABLE_BIGFOOT
  1808  #if MDBX_WORDBITS >= 64 || defined(DOXYGEN)
  1809  #define MDBX_ENABLE_BIGFOOT 1
  1810  #else
  1811  #define MDBX_ENABLE_BIGFOOT 0
  1812  #endif
  1813  #elif !(MDBX_ENABLE_BIGFOOT == 0 || MDBX_ENABLE_BIGFOOT == 1)
  1814  #error MDBX_ENABLE_BIGFOOT must be defined as 0 or 1
  1815  #endif /* MDBX_ENABLE_BIGFOOT */
  1816  
  1817  /** Controls use of POSIX madvise() hints and friends. */
  1818  #ifndef MDBX_ENABLE_MADVISE
  1819  #define MDBX_ENABLE_MADVISE 1
  1820  #elif !(MDBX_ENABLE_MADVISE == 0 || MDBX_ENABLE_MADVISE == 1)
  1821  #error MDBX_ENABLE_MADVISE must be defined as 0 or 1
  1822  #endif /* MDBX_ENABLE_MADVISE */
  1823  
  1824  /** Disable some checks to reduce an overhead and detection probability of
  1825   * database corruption to a values closer to the LMDB. */
  1826  #ifndef MDBX_DISABLE_VALIDATION
  1827  #define MDBX_DISABLE_VALIDATION 0
  1828  #elif !(MDBX_DISABLE_VALIDATION == 0 || MDBX_DISABLE_VALIDATION == 1)
  1829  #error MDBX_DISABLE_VALIDATION must be defined as 0 or 1
  1830  #endif /* MDBX_DISABLE_VALIDATION */
  1831  
  1832  #ifndef MDBX_PNL_PREALLOC_FOR_RADIXSORT
  1833  #define MDBX_PNL_PREALLOC_FOR_RADIXSORT 1
  1834  #elif !(MDBX_PNL_PREALLOC_FOR_RADIXSORT == 0 ||                                \
  1835          MDBX_PNL_PREALLOC_FOR_RADIXSORT == 1)
  1836  #error MDBX_PNL_PREALLOC_FOR_RADIXSORT must be defined as 0 or 1
  1837  #endif /* MDBX_PNL_PREALLOC_FOR_RADIXSORT */
  1838  
  1839  #ifndef MDBX_DPL_PREALLOC_FOR_RADIXSORT
  1840  #define MDBX_DPL_PREALLOC_FOR_RADIXSORT 1
  1841  #elif !(MDBX_DPL_PREALLOC_FOR_RADIXSORT == 0 ||                                \
  1842          MDBX_DPL_PREALLOC_FOR_RADIXSORT == 1)
  1843  #error MDBX_DPL_PREALLOC_FOR_RADIXSORT must be defined as 0 or 1
  1844  #endif /* MDBX_DPL_PREALLOC_FOR_RADIXSORT */
  1845  
  1846  /** Basically, this build-option is for TODO. Guess it should be replaced
  1847   * with MDBX_ENABLE_WRITEMAP_SPILLING with the three variants:
  1848   *  0/OFF = Don't track dirty pages at all and don't spilling ones.
  1849   *          This should be by-default on Linux and may-be other systems
  1850   *          (not sure: Darwin/OSX, FreeBSD, Windows 10) where kernel provides
  1851   *          properly LRU tracking and async writing on-demand.
  1852   *  1/ON  = Lite tracking of dirty pages but with LRU labels and explicit
  1853   *          spilling with msync(MS_ASYNC). */
  1854  #ifndef MDBX_FAKE_SPILL_WRITEMAP
  1855  #if defined(__linux__) || defined(__gnu_linux__)
  1856  #define MDBX_FAKE_SPILL_WRITEMAP 1 /* msync(MS_ASYNC) is no-op on Linux */
  1857  #else
  1858  #define MDBX_FAKE_SPILL_WRITEMAP 0
  1859  #endif
  1860  #elif !(MDBX_FAKE_SPILL_WRITEMAP == 0 || MDBX_FAKE_SPILL_WRITEMAP == 1)
  1861  #error MDBX_FAKE_SPILL_WRITEMAP must be defined as 0 or 1
  1862  #endif /* MDBX_FAKE_SPILL_WRITEMAP */
  1863  
  1864  /** Controls sort order of internal page number lists.
  1865   * This mostly experimental/advanced option with not for regular MDBX users.
  1866   * \warning The database format depend on this option and libmdbx builded with
  1867   * different option value are incompatible. */
  1868  #ifndef MDBX_PNL_ASCENDING
  1869  #define MDBX_PNL_ASCENDING 0
  1870  #elif !(MDBX_PNL_ASCENDING == 0 || MDBX_PNL_ASCENDING == 1)
  1871  #error MDBX_PNL_ASCENDING must be defined as 0 or 1
  1872  #endif /* MDBX_PNL_ASCENDING */
  1873  
  1874  /** Avoid dependence from MSVC CRT and use ntdll.dll instead. */
  1875  #ifndef MDBX_WITHOUT_MSVC_CRT
  1876  #define MDBX_WITHOUT_MSVC_CRT 1
  1877  #elif !(MDBX_WITHOUT_MSVC_CRT == 0 || MDBX_WITHOUT_MSVC_CRT == 1)
  1878  #error MDBX_WITHOUT_MSVC_CRT must be defined as 0 or 1
  1879  #endif /* MDBX_WITHOUT_MSVC_CRT */
  1880  
  1881  /** Size of buffer used during copying a environment/database file. */
  1882  #ifndef MDBX_ENVCOPY_WRITEBUF
  1883  #define MDBX_ENVCOPY_WRITEBUF 1048576u
  1884  #elif MDBX_ENVCOPY_WRITEBUF < 65536u || MDBX_ENVCOPY_WRITEBUF > 1073741824u || \
  1885      MDBX_ENVCOPY_WRITEBUF % 65536u
  1886  #error MDBX_ENVCOPY_WRITEBUF must be defined in range 65536..1073741824 and be multiple of 65536
  1887  #endif /* MDBX_ENVCOPY_WRITEBUF */
  1888  
  1889  /** Forces assertion checking */
  1890  #ifndef MDBX_FORCE_ASSERTIONS
  1891  #define MDBX_FORCE_ASSERTIONS 0
  1892  #elif !(MDBX_FORCE_ASSERTIONS == 0 || MDBX_FORCE_ASSERTIONS == 1)
  1893  #error MDBX_FORCE_ASSERTIONS must be defined as 0 or 1
  1894  #endif /* MDBX_FORCE_ASSERTIONS */
  1895  
  1896  /** Presumed malloc size overhead for each allocation
  1897   * to adjust allocations to be more aligned. */
  1898  #ifndef MDBX_ASSUME_MALLOC_OVERHEAD
  1899  #ifdef __SIZEOF_POINTER__
  1900  #define MDBX_ASSUME_MALLOC_OVERHEAD (__SIZEOF_POINTER__ * 2u)
  1901  #else
  1902  #define MDBX_ASSUME_MALLOC_OVERHEAD (sizeof(void *) * 2u)
  1903  #endif
  1904  #elif MDBX_ASSUME_MALLOC_OVERHEAD < 0 || MDBX_ASSUME_MALLOC_OVERHEAD > 64 ||   \
  1905      MDBX_ASSUME_MALLOC_OVERHEAD % 4
  1906  #error MDBX_ASSUME_MALLOC_OVERHEAD must be defined in range 0..64 and be multiple of 4
  1907  #endif /* MDBX_ASSUME_MALLOC_OVERHEAD */
  1908  
  1909  /** If defined then enables integration with Valgrind,
  1910   * a memory analyzing tool. */
  1911  #ifndef MDBX_USE_VALGRIND
  1912  #endif /* MDBX_USE_VALGRIND */
  1913  
  1914  /** If defined then enables use C11 atomics,
  1915   *  otherwise detects ones availability automatically. */
  1916  #ifndef MDBX_HAVE_C11ATOMICS
  1917  #endif /* MDBX_HAVE_C11ATOMICS */
  1918  
  1919  //------------------------------------------------------------------------------
  1920  
  1921  /** Win32 File Locking API for \ref MDBX_LOCKING */
  1922  #define MDBX_LOCKING_WIN32FILES -1
  1923  
  1924  /** SystemV IPC semaphores for \ref MDBX_LOCKING */
  1925  #define MDBX_LOCKING_SYSV 5
  1926  
  1927  /** POSIX-1 Shared anonymous semaphores for \ref MDBX_LOCKING */
  1928  #define MDBX_LOCKING_POSIX1988 1988
  1929  
  1930  /** POSIX-2001 Shared Mutexes for \ref MDBX_LOCKING */
  1931  #define MDBX_LOCKING_POSIX2001 2001
  1932  
  1933  /** POSIX-2008 Robust Mutexes for \ref MDBX_LOCKING */
  1934  #define MDBX_LOCKING_POSIX2008 2008
  1935  
  1936  /** BeOS Benaphores, aka Futexes for \ref MDBX_LOCKING */
  1937  #define MDBX_LOCKING_BENAPHORE 1995
  1938  
  1939  /** Advanced: Choices the locking implementation (autodetection by default). */
  1940  #if defined(_WIN32) || defined(_WIN64)
  1941  #define MDBX_LOCKING MDBX_LOCKING_WIN32FILES
  1942  #else
  1943  #ifndef MDBX_LOCKING
  1944  #if defined(_POSIX_THREAD_PROCESS_SHARED) &&                                   \
  1945      _POSIX_THREAD_PROCESS_SHARED >= 200112L && !defined(__FreeBSD__)
  1946  
  1947  /* Some platforms define the EOWNERDEAD error code even though they
  1948   * don't support Robust Mutexes. If doubt compile with -MDBX_LOCKING=2001. */
  1949  #if defined(EOWNERDEAD) && _POSIX_THREAD_PROCESS_SHARED >= 200809L &&          \
  1950      ((defined(_POSIX_THREAD_ROBUST_PRIO_INHERIT) &&                            \
  1951        _POSIX_THREAD_ROBUST_PRIO_INHERIT > 0) ||                                \
  1952       (defined(_POSIX_THREAD_ROBUST_PRIO_PROTECT) &&                            \
  1953        _POSIX_THREAD_ROBUST_PRIO_PROTECT > 0) ||                                \
  1954       defined(PTHREAD_MUTEX_ROBUST) || defined(PTHREAD_MUTEX_ROBUST_NP)) &&     \
  1955      (!defined(__GLIBC__) ||                                                    \
  1956       __GLIBC_PREREQ(2, 10) /* troubles with Robust mutexes before 2.10 */)
  1957  #define MDBX_LOCKING MDBX_LOCKING_POSIX2008
  1958  #else
  1959  #define MDBX_LOCKING MDBX_LOCKING_POSIX2001
  1960  #endif
  1961  #elif defined(__sun) || defined(__SVR4) || defined(__svr4__)
  1962  #define MDBX_LOCKING MDBX_LOCKING_POSIX1988
  1963  #else
  1964  #define MDBX_LOCKING MDBX_LOCKING_SYSV
  1965  #endif
  1966  #define MDBX_LOCKING_CONFIG "AUTO=" MDBX_STRINGIFY(MDBX_LOCKING)
  1967  #else
  1968  #define MDBX_LOCKING_CONFIG MDBX_STRINGIFY(MDBX_LOCKING)
  1969  #endif /* MDBX_LOCKING */
  1970  #endif /* !Windows */
  1971  
  1972  /** Advanced: Using POSIX OFD-locks (autodetection by default). */
  1973  #ifndef MDBX_USE_OFDLOCKS
  1974  #if defined(F_OFD_SETLK) && defined(F_OFD_SETLKW) && defined(F_OFD_GETLK) &&   \
  1975      !defined(MDBX_SAFE4QEMU) &&                                                \
  1976      !defined(__sun) /* OFD-lock are broken on Solaris */
  1977  #define MDBX_USE_OFDLOCKS 1
  1978  #else
  1979  #define MDBX_USE_OFDLOCKS 0
  1980  #endif
  1981  #define MDBX_USE_OFDLOCKS_CONFIG "AUTO=" MDBX_STRINGIFY(MDBX_USE_OFDLOCKS)
  1982  #else
  1983  #define MDBX_USE_OFDLOCKS_CONFIG MDBX_STRINGIFY(MDBX_USE_OFDLOCKS)
  1984  #endif /* MDBX_USE_OFDLOCKS */
  1985  
  1986  /** Advanced: Using sendfile() syscall (autodetection by default). */
  1987  #ifndef MDBX_USE_SENDFILE
  1988  #if ((defined(__linux__) || defined(__gnu_linux__)) &&                         \
  1989       !defined(__ANDROID_API__)) ||                                             \
  1990      (defined(__ANDROID_API__) && __ANDROID_API__ >= 21)
  1991  #define MDBX_USE_SENDFILE 1
  1992  #else
  1993  #define MDBX_USE_SENDFILE 0
  1994  #endif
  1995  #endif /* MDBX_USE_SENDFILE */
  1996  
  1997  /** Advanced: Using copy_file_range() syscall (autodetection by default). */
  1998  #ifndef MDBX_USE_COPYFILERANGE
  1999  #if __GLIBC_PREREQ(2, 27) && defined(_GNU_SOURCE)
  2000  #define MDBX_USE_COPYFILERANGE 1
  2001  #else
  2002  #define MDBX_USE_COPYFILERANGE 0
  2003  #endif
  2004  #endif /* MDBX_USE_COPYFILERANGE */
  2005  
  2006  /** Advanced: Using sync_file_range() syscall (autodetection by default). */
  2007  #ifndef MDBX_USE_SYNCFILERANGE
  2008  #if ((defined(__linux__) || defined(__gnu_linux__)) &&                         \
  2009       defined(SYNC_FILE_RANGE_WRITE) && !defined(__ANDROID_API__)) ||           \
  2010      (defined(__ANDROID_API__) && __ANDROID_API__ >= 26)
  2011  #define MDBX_USE_SYNCFILERANGE 1
  2012  #else
  2013  #define MDBX_USE_SYNCFILERANGE 0
  2014  #endif
  2015  #endif /* MDBX_USE_SYNCFILERANGE */
  2016  
  2017  //------------------------------------------------------------------------------
  2018  
  2019  #ifndef MDBX_CPU_WRITEBACK_INCOHERENT
  2020  #if defined(__ia32__) || defined(__e2k__) || defined(__hppa) ||                \
  2021      defined(__hppa__) || defined(DOXYGEN)
  2022  #define MDBX_CPU_WRITEBACK_INCOHERENT 0
  2023  #else
  2024  #define MDBX_CPU_WRITEBACK_INCOHERENT 1
  2025  #endif
  2026  #endif /* MDBX_CPU_WRITEBACK_INCOHERENT */
  2027  
  2028  #ifndef MDBX_MMAP_INCOHERENT_FILE_WRITE
  2029  #ifdef __OpenBSD__
  2030  #define MDBX_MMAP_INCOHERENT_FILE_WRITE 1
  2031  #else
  2032  #define MDBX_MMAP_INCOHERENT_FILE_WRITE 0
  2033  #endif
  2034  #endif /* MDBX_MMAP_INCOHERENT_FILE_WRITE */
  2035  
  2036  #ifndef MDBX_MMAP_INCOHERENT_CPU_CACHE
  2037  #if defined(__mips) || defined(__mips__) || defined(__mips64) ||               \
  2038      defined(__mips64__) || defined(_M_MRX000) || defined(_MIPS_) ||            \
  2039      defined(__MWERKS__) || defined(__sgi)
  2040  /* MIPS has cache coherency issues. */
  2041  #define MDBX_MMAP_INCOHERENT_CPU_CACHE 1
  2042  #else
  2043  /* LY: assume no relevant mmap/dcache issues. */
  2044  #define MDBX_MMAP_INCOHERENT_CPU_CACHE 0
  2045  #endif
  2046  #endif /* MDBX_MMAP_INCOHERENT_CPU_CACHE */
  2047  
  2048  #ifndef MDBX_64BIT_ATOMIC
  2049  #if MDBX_WORDBITS >= 64 || defined(DOXYGEN)
  2050  #define MDBX_64BIT_ATOMIC 1
  2051  #else
  2052  #define MDBX_64BIT_ATOMIC 0
  2053  #endif
  2054  #define MDBX_64BIT_ATOMIC_CONFIG "AUTO=" MDBX_STRINGIFY(MDBX_64BIT_ATOMIC)
  2055  #else
  2056  #define MDBX_64BIT_ATOMIC_CONFIG MDBX_STRINGIFY(MDBX_64BIT_ATOMIC)
  2057  #endif /* MDBX_64BIT_ATOMIC */
  2058  
  2059  #ifndef MDBX_64BIT_CAS
  2060  #if defined(ATOMIC_LLONG_LOCK_FREE)
  2061  #if ATOMIC_LLONG_LOCK_FREE > 1
  2062  #define MDBX_64BIT_CAS 1
  2063  #else
  2064  #define MDBX_64BIT_CAS 0
  2065  #endif
  2066  #elif defined(__GCC_ATOMIC_LLONG_LOCK_FREE)
  2067  #if __GCC_ATOMIC_LLONG_LOCK_FREE > 1
  2068  #define MDBX_64BIT_CAS 1
  2069  #else
  2070  #define MDBX_64BIT_CAS 0
  2071  #endif
  2072  #elif defined(__CLANG_ATOMIC_LLONG_LOCK_FREE)
  2073  #if __CLANG_ATOMIC_LLONG_LOCK_FREE > 1
  2074  #define MDBX_64BIT_CAS 1
  2075  #else
  2076  #define MDBX_64BIT_CAS 0
  2077  #endif
  2078  #elif defined(_MSC_VER) || defined(__APPLE__) || defined(DOXYGEN)
  2079  #define MDBX_64BIT_CAS 1
  2080  #else
  2081  #define MDBX_64BIT_CAS MDBX_64BIT_ATOMIC
  2082  #endif
  2083  #define MDBX_64BIT_CAS_CONFIG "AUTO=" MDBX_STRINGIFY(MDBX_64BIT_CAS)
  2084  #else
  2085  #define MDBX_64BIT_CAS_CONFIG MDBX_STRINGIFY(MDBX_64BIT_CAS)
  2086  #endif /* MDBX_64BIT_CAS */
  2087  
  2088  #ifndef MDBX_UNALIGNED_OK
  2089  #if defined(__ALIGNED__) || defined(__SANITIZE_UNDEFINED__) ||                 \
  2090      defined(ENABLE_UBSAN)
  2091  #define MDBX_UNALIGNED_OK 0 /* no unaligned access allowed */
  2092  #elif defined(__ARM_FEATURE_UNALIGNED)
  2093  #define MDBX_UNALIGNED_OK 4 /* ok unaligned for 32-bit words */
  2094  #elif defined(__e2k__) || defined(__elbrus__)
  2095  #if __iset__ > 4
  2096  #define MDBX_UNALIGNED_OK 8 /* ok unaligned for 64-bit words */
  2097  #else
  2098  #define MDBX_UNALIGNED_OK 4 /* ok unaligned for 32-bit words */
  2099  #endif
  2100  #elif defined(__ia32__)
  2101  #define MDBX_UNALIGNED_OK 8 /* ok unaligned for 64-bit words */
  2102  #elif __CLANG_PREREQ(5, 0) || __GNUC_PREREQ(5, 0)
  2103  /* expecting an optimization will well done, also this
  2104   * hushes false-positives from UBSAN (undefined behaviour sanitizer) */
  2105  #define MDBX_UNALIGNED_OK 0
  2106  #else
  2107  #define MDBX_UNALIGNED_OK 0 /* no unaligned access allowed */
  2108  #endif
  2109  #elif MDBX_UNALIGNED_OK == 1
  2110  #undef MDBX_UNALIGNED_OK
  2111  #define MDBX_UNALIGNED_OK 32 /* any unaligned access allowed */
  2112  #endif                       /* MDBX_UNALIGNED_OK */
  2113  
  2114  #ifndef MDBX_CACHELINE_SIZE
  2115  #if defined(SYSTEM_CACHE_ALIGNMENT_SIZE)
  2116  #define MDBX_CACHELINE_SIZE SYSTEM_CACHE_ALIGNMENT_SIZE
  2117  #elif defined(__ia64__) || defined(__ia64) || defined(_M_IA64)
  2118  #define MDBX_CACHELINE_SIZE 128
  2119  #else
  2120  #define MDBX_CACHELINE_SIZE 64
  2121  #endif
  2122  #endif /* MDBX_CACHELINE_SIZE */
  2123  
  2124  /** @} end of build options */
  2125  /*******************************************************************************
  2126   *******************************************************************************
  2127   ******************************************************************************/
  2128  
  2129  #ifndef DOXYGEN
  2130  
  2131  /* In case the MDBX_DEBUG is undefined set it corresponding to NDEBUG */
  2132  #ifndef MDBX_DEBUG
  2133  #ifdef NDEBUG
  2134  #define MDBX_DEBUG 0
  2135  #else
  2136  #define MDBX_DEBUG 1
  2137  #endif
  2138  #endif /* MDBX_DEBUG */
  2139  
  2140  #else
  2141  
  2142  /* !!! Actually this is a fake definitions for Doxygen !!! */
  2143  
  2144  /** Controls enabling of debugging features.
  2145   *
  2146   *  - `MDBX_DEBUG = 0` (by default) Disables any debugging features at all,
  2147   *                     including logging and assertion controls.
  2148   *                     Logging level and corresponding debug flags changing
  2149   *                     by \ref mdbx_setup_debug() will not have effect.
  2150   *  - `MDBX_DEBUG > 0` Enables code for the debugging features (logging,
  2151   *                     assertions checking and internal audit).
  2152   *                     Simultaneously sets the default logging level
  2153   *                     to the `MDBX_DEBUG` value.
  2154   *                     Also enables \ref MDBX_DBG_AUDIT if `MDBX_DEBUG >= 2`.
  2155   *
  2156   * \ingroup build_option */
  2157  #define MDBX_DEBUG 0...7
  2158  
  2159  /** Disables using of GNU libc extensions. */
  2160  #define MDBX_DISABLE_GNU_SOURCE 0 or 1
  2161  
  2162  #endif /* DOXYGEN */
  2163  
  2164  /* Undefine the NDEBUG if debugging is enforced by MDBX_DEBUG */
  2165  #if MDBX_DEBUG
  2166  #undef NDEBUG
  2167  #endif
  2168  
  2169  /*----------------------------------------------------------------------------*/
  2170  /* Atomics */
  2171  
  2172  enum MDBX_memory_order {
  2173    mo_Relaxed,
  2174    mo_AcquireRelease
  2175    /* , mo_SequentialConsistency */
  2176  };
  2177  
  2178  typedef union {
  2179    volatile uint32_t weak;
  2180  #ifdef MDBX_HAVE_C11ATOMICS
  2181    volatile _Atomic uint32_t c11a;
  2182  #endif /* MDBX_HAVE_C11ATOMICS */
  2183  } MDBX_atomic_uint32_t;
  2184  
  2185  typedef union {
  2186    volatile uint64_t weak;
  2187  #if defined(MDBX_HAVE_C11ATOMICS) && (MDBX_64BIT_CAS || MDBX_64BIT_ATOMIC)
  2188    volatile _Atomic uint64_t c11a;
  2189  #endif
  2190  #if !defined(MDBX_HAVE_C11ATOMICS) || !MDBX_64BIT_CAS || !MDBX_64BIT_ATOMIC
  2191    __anonymous_struct_extension__ struct {
  2192  #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
  2193      MDBX_atomic_uint32_t low, high;
  2194  #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
  2195      MDBX_atomic_uint32_t high, low;
  2196  #else
  2197  #error "FIXME: Unsupported byte order"
  2198  #endif /* __BYTE_ORDER__ */
  2199    };
  2200  #endif
  2201  } MDBX_atomic_uint64_t;
  2202  
  2203  #ifdef MDBX_HAVE_C11ATOMICS
  2204  
  2205  /* Crutches for C11 atomic compiler's bugs */
  2206  #if defined(__e2k__) && defined(__LCC__) && __LCC__ < /* FIXME */ 127
  2207  #define MDBX_c11a_ro(type, ptr) (&(ptr)->weak)
  2208  #define MDBX_c11a_rw(type, ptr) (&(ptr)->weak)
  2209  #elif defined(__clang__) && __clang__ < 8
  2210  #define MDBX_c11a_ro(type, ptr) ((volatile _Atomic(type) *)&(ptr)->c11a)
  2211  #define MDBX_c11a_rw(type, ptr) (&(ptr)->c11a)
  2212  #else
  2213  #define MDBX_c11a_ro(type, ptr) (&(ptr)->c11a)
  2214  #define MDBX_c11a_rw(type, ptr) (&(ptr)->c11a)
  2215  #endif /* Crutches for C11 atomic compiler's bugs */
  2216  
  2217  #define mo_c11_store(fence)                                                    \
  2218    (((fence) == mo_Relaxed)          ? memory_order_relaxed                     \
  2219     : ((fence) == mo_AcquireRelease) ? memory_order_release                     \
  2220                                      : memory_order_seq_cst)
  2221  #define mo_c11_load(fence)                                                     \
  2222    (((fence) == mo_Relaxed)          ? memory_order_relaxed                     \
  2223     : ((fence) == mo_AcquireRelease) ? memory_order_acquire                     \
  2224                                      : memory_order_seq_cst)
  2225  
  2226  #endif /* MDBX_HAVE_C11ATOMICS */
  2227  
  2228  #ifndef __cplusplus
  2229  
  2230  #ifdef MDBX_HAVE_C11ATOMICS
  2231  #define osal_memory_fence(order, write)                                        \
  2232    atomic_thread_fence((write) ? mo_c11_store(order) : mo_c11_load(order))
  2233  #else /* MDBX_HAVE_C11ATOMICS */
  2234  #define osal_memory_fence(order, write)                                        \
  2235    do {                                                                         \
  2236      osal_compiler_barrier();                                                   \
  2237      if (write && order > (MDBX_CPU_WRITEBACK_INCOHERENT ? mo_Relaxed           \
  2238                                                          : mo_AcquireRelease))  \
  2239        osal_memory_barrier();                                                   \
  2240    } while (0)
  2241  #endif /* MDBX_HAVE_C11ATOMICS */
  2242  
  2243  #if defined(MDBX_HAVE_C11ATOMICS) && defined(__LCC__)
  2244  #define atomic_store32(p, value, order)                                        \
  2245    ({                                                                           \
  2246      const uint32_t value_to_store = (value);                                   \
  2247      atomic_store_explicit(MDBX_c11a_rw(uint32_t, p), value_to_store,           \
  2248                            mo_c11_store(order));                                \
  2249      value_to_store;                                                            \
  2250    })
  2251  #define atomic_load32(p, order)                                                \
  2252    atomic_load_explicit(MDBX_c11a_ro(uint32_t, p), mo_c11_load(order))
  2253  #define atomic_store64(p, value, order)                                        \
  2254    ({                                                                           \
  2255      const uint64_t value_to_store = (value);                                   \
  2256      atomic_store_explicit(MDBX_c11a_rw(uint64_t, p), value_to_store,           \
  2257                            mo_c11_store(order));                                \
  2258      value_to_store;                                                            \
  2259    })
  2260  #define atomic_load64(p, order)                                                \
  2261    atomic_load_explicit(MDBX_c11a_ro(uint64_t, p), mo_c11_load(order))
  2262  #endif /* LCC && MDBX_HAVE_C11ATOMICS */
  2263  
  2264  #ifndef atomic_store32
  2265  MDBX_MAYBE_UNUSED static __always_inline uint32_t
  2266  atomic_store32(MDBX_atomic_uint32_t *p, const uint32_t value,
  2267                 enum MDBX_memory_order order) {
  2268    STATIC_ASSERT(sizeof(MDBX_atomic_uint32_t) == 4);
  2269  #ifdef MDBX_HAVE_C11ATOMICS
  2270    assert(atomic_is_lock_free(MDBX_c11a_rw(uint32_t, p)));
  2271    atomic_store_explicit(MDBX_c11a_rw(uint32_t, p), value, mo_c11_store(order));
  2272  #else  /* MDBX_HAVE_C11ATOMICS */
  2273    if (order != mo_Relaxed)
  2274      osal_compiler_barrier();
  2275    p->weak = value;
  2276    osal_memory_fence(order, true);
  2277  #endif /* MDBX_HAVE_C11ATOMICS */
  2278    return value;
  2279  }
  2280  #endif /* atomic_store32 */
  2281  
  2282  #ifndef atomic_load32
  2283  MDBX_MAYBE_UNUSED static __always_inline uint32_t atomic_load32(
  2284      const volatile MDBX_atomic_uint32_t *p, enum MDBX_memory_order order) {
  2285    STATIC_ASSERT(sizeof(MDBX_atomic_uint32_t) == 4);
  2286  #ifdef MDBX_HAVE_C11ATOMICS
  2287    assert(atomic_is_lock_free(MDBX_c11a_ro(uint32_t, p)));
  2288    return atomic_load_explicit(MDBX_c11a_ro(uint32_t, p), mo_c11_load(order));
  2289  #else  /* MDBX_HAVE_C11ATOMICS */
  2290    osal_memory_fence(order, false);
  2291    const uint32_t value = p->weak;
  2292    if (order != mo_Relaxed)
  2293      osal_compiler_barrier();
  2294    return value;
  2295  #endif /* MDBX_HAVE_C11ATOMICS */
  2296  }
  2297  #endif /* atomic_load32 */
  2298  
  2299  #endif /* !__cplusplus */
  2300  
  2301  /*----------------------------------------------------------------------------*/
  2302  /* Basic constants and types */
  2303  
  2304  /* A stamp that identifies a file as an MDBX file.
  2305   * There's nothing special about this value other than that it is easily
  2306   * recognizable, and it will reflect any byte order mismatches. */
  2307  #define MDBX_MAGIC UINT64_C(/* 56-bit prime */ 0x59659DBDEF4C11)
  2308  
  2309  /* FROZEN: The version number for a database's datafile format. */
  2310  #define MDBX_DATA_VERSION 3
  2311  /* The version number for a database's lockfile format. */
  2312  #define MDBX_LOCK_VERSION 4
  2313  
  2314  /* handle for the DB used to track free pages. */
  2315  #define FREE_DBI 0
  2316  /* handle for the default DB. */
  2317  #define MAIN_DBI 1
  2318  /* Number of DBs in metapage (free and main) - also hardcoded elsewhere */
  2319  #define CORE_DBS 2
  2320  
  2321  /* Number of meta pages - also hardcoded elsewhere */
  2322  #define NUM_METAS 3
  2323  
  2324  /* A page number in the database.
  2325   *
  2326   * MDBX uses 32 bit for page numbers. This limits database
  2327   * size up to 2^44 bytes, in case of 4K pages. */
  2328  typedef uint32_t pgno_t;
  2329  typedef MDBX_atomic_uint32_t atomic_pgno_t;
  2330  #define PRIaPGNO PRIu32
  2331  #define MAX_PAGENO UINT32_C(0x7FFFffff)
  2332  #define MIN_PAGENO NUM_METAS
  2333  
  2334  #define SAFE64_INVALID_THRESHOLD UINT64_C(0xffffFFFF00000000)
  2335  
  2336  /* A transaction ID. */
  2337  typedef uint64_t txnid_t;
  2338  typedef MDBX_atomic_uint64_t atomic_txnid_t;
  2339  #define PRIaTXN PRIi64
  2340  #define MIN_TXNID UINT64_C(1)
  2341  #define MAX_TXNID (SAFE64_INVALID_THRESHOLD - 1)
  2342  #define INITIAL_TXNID (MIN_TXNID + NUM_METAS - 1)
  2343  #define INVALID_TXNID UINT64_MAX
  2344  /* LY: for testing non-atomic 64-bit txnid on 32-bit arches.
  2345   * #define xMDBX_TXNID_STEP (UINT32_MAX / 3) */
  2346  #ifndef xMDBX_TXNID_STEP
  2347  #if MDBX_64BIT_CAS
  2348  #define xMDBX_TXNID_STEP 1u
  2349  #else
  2350  #define xMDBX_TXNID_STEP 2u
  2351  #endif
  2352  #endif /* xMDBX_TXNID_STEP */
  2353  
  2354  /* Used for offsets within a single page.
  2355   * Since memory pages are typically 4 or 8KB in size, 12-13 bits,
  2356   * this is plenty. */
  2357  typedef uint16_t indx_t;
  2358  
  2359  #define MEGABYTE ((size_t)1 << 20)
  2360  
  2361  /*----------------------------------------------------------------------------*/
  2362  /* Core structures for database and shared memory (i.e. format definition) */
  2363  #pragma pack(push, 4)
  2364  
  2365  /* Information about a single database in the environment. */
  2366  typedef struct MDBX_db {
  2367    uint16_t md_flags;        /* see mdbx_dbi_open */
  2368    uint16_t md_depth;        /* depth of this tree */
  2369    uint32_t md_xsize;        /* key-size for MDBX_DUPFIXED (LEAF2 pages) */
  2370    pgno_t md_root;           /* the root page of this tree */
  2371    pgno_t md_branch_pages;   /* number of internal pages */
  2372    pgno_t md_leaf_pages;     /* number of leaf pages */
  2373    pgno_t md_overflow_pages; /* number of overflow pages */
  2374    uint64_t md_seq;          /* table sequence counter */
  2375    uint64_t md_entries;      /* number of data items */
  2376    uint64_t md_mod_txnid;    /* txnid of last committed modification */
  2377  } MDBX_db;
  2378  
  2379  /* database size-related parameters */
  2380  typedef struct MDBX_geo {
  2381    uint16_t grow_pv;   /* datafile growth step as a 16-bit packed (exponential
  2382                             quantized) value */
  2383    uint16_t shrink_pv; /* datafile shrink threshold as a 16-bit packed
  2384                             (exponential quantized) value */
  2385    pgno_t lower;       /* minimal size of datafile in pages */
  2386    pgno_t upper;       /* maximal size of datafile in pages */
  2387    pgno_t now;         /* current size of datafile in pages */
  2388    pgno_t next;        /* first unused page in the datafile,
  2389                           but actually the file may be shorter. */
  2390  } MDBX_geo;
  2391  
  2392  /* Meta page content.
  2393   * A meta page is the start point for accessing a database snapshot.
  2394   * Pages 0-1 are meta pages. Transaction N writes meta page (N % 2). */
  2395  typedef struct MDBX_meta {
  2396    /* Stamp identifying this as an MDBX file.
  2397     * It must be set to MDBX_MAGIC with MDBX_DATA_VERSION. */
  2398    uint32_t mm_magic_and_version[2];
  2399  
  2400    /* txnid that committed this page, the first of a two-phase-update pair */
  2401    union {
  2402      MDBX_atomic_uint32_t mm_txnid_a[2];
  2403      uint64_t unsafe_txnid;
  2404    };
  2405  
  2406    uint16_t mm_extra_flags;  /* extra DB flags, zero (nothing) for now */
  2407    uint8_t mm_validator_id;  /* ID of checksum and page validation method,
  2408                               * zero (nothing) for now */
  2409    uint8_t mm_extra_pagehdr; /* extra bytes in the page header,
  2410                               * zero (nothing) for now */
  2411  
  2412    MDBX_geo mm_geo; /* database size-related parameters */
  2413  
  2414    MDBX_db mm_dbs[CORE_DBS]; /* first is free space, 2nd is main db */
  2415                              /* The size of pages used in this DB */
  2416  #define mm_psize mm_dbs[FREE_DBI].md_xsize
  2417    MDBX_canary mm_canary;
  2418  
  2419  #define MDBX_DATASIGN_NONE 0u
  2420  #define MDBX_DATASIGN_WEAK 1u
  2421  #define SIGN_IS_STEADY(sign) ((sign) > MDBX_DATASIGN_WEAK)
  2422  #define META_IS_STEADY(meta)                                                   \
  2423    SIGN_IS_STEADY(unaligned_peek_u64_volatile(4, (meta)->mm_sign))
  2424    union {
  2425      uint32_t mm_sign[2];
  2426      uint64_t unsafe_sign;
  2427    };
  2428  
  2429    /* txnid that committed this page, the second of a two-phase-update pair */
  2430    MDBX_atomic_uint32_t mm_txnid_b[2];
  2431  
  2432    /* Number of non-meta pages which were put in GC after COW. May be 0 in case
  2433     * DB was previously handled by libmdbx without corresponding feature.
  2434     * This value in couple with mr_snapshot_pages_retired allows fast estimation
  2435     * of "how much reader is restraining GC recycling". */
  2436    uint32_t mm_pages_retired[2];
  2437  
  2438    /* The analogue /proc/sys/kernel/random/boot_id or similar to determine
  2439     * whether the system was rebooted after the last use of the database files.
  2440     * If there was no reboot, but there is no need to rollback to the last
  2441     * steady sync point. Zeros mean that no relevant information is available
  2442     * from the system. */
  2443    bin128_t mm_bootid;
  2444  
  2445  } MDBX_meta;
  2446  
  2447  #pragma pack(1)
  2448  
  2449  /* Common header for all page types. The page type depends on mp_flags.
  2450   *
  2451   * P_BRANCH and P_LEAF pages have unsorted 'MDBX_node's at the end, with
  2452   * sorted mp_ptrs[] entries referring to them. Exception: P_LEAF2 pages
  2453   * omit mp_ptrs and pack sorted MDBX_DUPFIXED values after the page header.
  2454   *
  2455   * P_OVERFLOW records occupy one or more contiguous pages where only the
  2456   * first has a page header. They hold the real data of F_BIGDATA nodes.
  2457   *
  2458   * P_SUBP sub-pages are small leaf "pages" with duplicate data.
  2459   * A node with flag F_DUPDATA but not F_SUBDATA contains a sub-page.
  2460   * (Duplicate data can also go in sub-databases, which use normal pages.)
  2461   *
  2462   * P_META pages contain MDBX_meta, the start point of an MDBX snapshot.
  2463   *
  2464   * Each non-metapage up to MDBX_meta.mm_last_pg is reachable exactly once
  2465   * in the snapshot: Either used by a database or listed in a GC record. */
  2466  typedef struct MDBX_page {
  2467    union {
  2468  #define IS_FROZEN(txn, p) ((p)->mp_txnid < (txn)->mt_txnid)
  2469  #define IS_SPILLED(txn, p) ((p)->mp_txnid == (txn)->mt_txnid)
  2470  #define IS_SHADOWED(txn, p) ((p)->mp_txnid > (txn)->mt_txnid)
  2471  #define IS_VALID(txn, p) ((p)->mp_txnid <= (txn)->mt_front)
  2472  #define IS_MODIFIABLE(txn, p) ((p)->mp_txnid == (txn)->mt_front)
  2473      uint64_t
  2474          mp_txnid; /* txnid which created this page, maybe zero in legacy DB */
  2475      struct MDBX_page *mp_next; /* for in-memory list of freed pages */
  2476    };
  2477    uint16_t mp_leaf2_ksize;   /* key size if this is a LEAF2 page */
  2478  #define P_BRANCH 0x01u       /* branch page */
  2479  #define P_LEAF 0x02u         /* leaf page */
  2480  #define P_OVERFLOW 0x04u     /* overflow page */
  2481  #define P_META 0x08u         /* meta page */
  2482  #define P_LEGACY_DIRTY 0x10u /* legacy P_DIRTY flag prior to v0.10 958fd5b9 */
  2483  #define P_BAD P_LEGACY_DIRTY /* explicit flag for invalid/bad page */
  2484  #define P_LEAF2 0x20u        /* for MDBX_DUPFIXED records */
  2485  #define P_SUBP 0x40u         /* for MDBX_DUPSORT sub-pages */
  2486  #define P_SPILLED 0x2000u    /* spilled in parent txn */
  2487  #define P_LOOSE 0x4000u      /* page was dirtied then freed, can be reused */
  2488  #define P_FROZEN 0x8000u     /* used for retire page with known status */
  2489  #define P_ILL_BITS                                                             \
  2490    ((uint16_t) ~(P_BRANCH | P_LEAF | P_LEAF2 | P_OVERFLOW | P_SPILLED))
  2491    uint16_t mp_flags;
  2492    union {
  2493      uint32_t mp_pages; /* number of overflow pages */
  2494      __anonymous_struct_extension__ struct {
  2495        indx_t mp_lower; /* lower bound of free space */
  2496        indx_t mp_upper; /* upper bound of free space */
  2497      };
  2498    };
  2499    pgno_t mp_pgno; /* page number */
  2500  
  2501  #if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) ||              \
  2502      (!defined(__cplusplus) && defined(_MSC_VER))
  2503    indx_t mp_ptrs[] /* dynamic size */;
  2504  #endif /* C99 */
  2505  } MDBX_page;
  2506  
  2507  #define PAGETYPE_WHOLE(p) ((uint8_t)(p)->mp_flags)
  2508  
  2509  /* Drop legacy P_DIRTY flag for sub-pages for compatilibity */
  2510  #define PAGETYPE_COMPAT(p)                                                     \
  2511    (unlikely(PAGETYPE_WHOLE(p) & P_SUBP)                                        \
  2512         ? PAGETYPE_WHOLE(p) & ~(P_SUBP | P_LEGACY_DIRTY)                        \
  2513         : PAGETYPE_WHOLE(p))
  2514  
  2515  /* Size of the page header, excluding dynamic data at the end */
  2516  #define PAGEHDRSZ ((unsigned)offsetof(MDBX_page, mp_ptrs))
  2517  
  2518  #pragma pack(pop)
  2519  
  2520  #if MDBX_ENABLE_PGOP_STAT
  2521  /* Statistics of page operations overall of all (running, completed and aborted)
  2522   * transactions */
  2523  typedef struct {
  2524    MDBX_atomic_uint64_t newly;   /* Quantity of a new pages added */
  2525    MDBX_atomic_uint64_t cow;     /* Quantity of pages copied for update */
  2526    MDBX_atomic_uint64_t clone;   /* Quantity of parent's dirty pages clones
  2527                                     for nested transactions */
  2528    MDBX_atomic_uint64_t split;   /* Page splits */
  2529    MDBX_atomic_uint64_t merge;   /* Page merges */
  2530    MDBX_atomic_uint64_t spill;   /* Quantity of spilled dirty pages */
  2531    MDBX_atomic_uint64_t unspill; /* Quantity of unspilled/reloaded pages */
  2532    MDBX_atomic_uint64_t
  2533        wops; /* Number of explicit write operations (not a pages) to a disk */
  2534    MDBX_atomic_uint64_t
  2535        gcrtime; /* Time spending for reading/searching GC (aka FreeDB). The
  2536                    unit/scale is platform-depended, see osal_monotime(). */
  2537  } MDBX_pgop_stat_t;
  2538  #endif /* MDBX_ENABLE_PGOP_STAT */
  2539  
  2540  #if MDBX_LOCKING == MDBX_LOCKING_WIN32FILES
  2541  #define MDBX_CLOCK_SIGN UINT32_C(0xF10C)
  2542  typedef void osal_ipclock_t;
  2543  #elif MDBX_LOCKING == MDBX_LOCKING_SYSV
  2544  
  2545  #define MDBX_CLOCK_SIGN UINT32_C(0xF18D)
  2546  typedef mdbx_pid_t osal_ipclock_t;
  2547  #ifndef EOWNERDEAD
  2548  #define EOWNERDEAD MDBX_RESULT_TRUE
  2549  #endif
  2550  
  2551  #elif MDBX_LOCKING == MDBX_LOCKING_POSIX2001 ||                                \
  2552      MDBX_LOCKING == MDBX_LOCKING_POSIX2008
  2553  #define MDBX_CLOCK_SIGN UINT32_C(0x8017)
  2554  typedef pthread_mutex_t osal_ipclock_t;
  2555  #elif MDBX_LOCKING == MDBX_LOCKING_POSIX1988
  2556  #define MDBX_CLOCK_SIGN UINT32_C(0xFC29)
  2557  typedef sem_t osal_ipclock_t;
  2558  #else
  2559  #error "FIXME"
  2560  #endif /* MDBX_LOCKING */
  2561  
  2562  #if MDBX_LOCKING > MDBX_LOCKING_SYSV && !defined(__cplusplus)
  2563  MDBX_INTERNAL_FUNC int osal_ipclock_stub(osal_ipclock_t *ipc);
  2564  MDBX_INTERNAL_FUNC int osal_ipclock_destroy(osal_ipclock_t *ipc);
  2565  #endif /* MDBX_LOCKING */
  2566  
  2567  /* Reader Lock Table
  2568   *
  2569   * Readers don't acquire any locks for their data access. Instead, they
  2570   * simply record their transaction ID in the reader table. The reader
  2571   * mutex is needed just to find an empty slot in the reader table. The
  2572   * slot's address is saved in thread-specific data so that subsequent
  2573   * read transactions started by the same thread need no further locking to
  2574   * proceed.
  2575   *
  2576   * If MDBX_NOTLS is set, the slot address is not saved in thread-specific data.
  2577   * No reader table is used if the database is on a read-only filesystem.
  2578   *
  2579   * Since the database uses multi-version concurrency control, readers don't
  2580   * actually need any locking. This table is used to keep track of which
  2581   * readers are using data from which old transactions, so that we'll know
  2582   * when a particular old transaction is no longer in use. Old transactions
  2583   * that have discarded any data pages can then have those pages reclaimed
  2584   * for use by a later write transaction.
  2585   *
  2586   * The lock table is constructed such that reader slots are aligned with the
  2587   * processor's cache line size. Any slot is only ever used by one thread.
  2588   * This alignment guarantees that there will be no contention or cache
  2589   * thrashing as threads update their own slot info, and also eliminates
  2590   * any need for locking when accessing a slot.
  2591   *
  2592   * A writer thread will scan every slot in the table to determine the oldest
  2593   * outstanding reader transaction. Any freed pages older than this will be
  2594   * reclaimed by the writer. The writer doesn't use any locks when scanning
  2595   * this table. This means that there's no guarantee that the writer will
  2596   * see the most up-to-date reader info, but that's not required for correct
  2597   * operation - all we need is to know the upper bound on the oldest reader,
  2598   * we don't care at all about the newest reader. So the only consequence of
  2599   * reading stale information here is that old pages might hang around a
  2600   * while longer before being reclaimed. That's actually good anyway, because
  2601   * the longer we delay reclaiming old pages, the more likely it is that a
  2602   * string of contiguous pages can be found after coalescing old pages from
  2603   * many old transactions together. */
  2604  
  2605  /* The actual reader record, with cacheline padding. */
  2606  typedef struct MDBX_reader {
  2607    /* Current Transaction ID when this transaction began, or (txnid_t)-1.
  2608     * Multiple readers that start at the same time will probably have the
  2609     * same ID here. Again, it's not important to exclude them from
  2610     * anything; all we need to know is which version of the DB they
  2611     * started from so we can avoid overwriting any data used in that
  2612     * particular version. */
  2613    MDBX_atomic_uint64_t /* txnid_t */ mr_txnid;
  2614  
  2615    /* The information we store in a single slot of the reader table.
  2616     * In addition to a transaction ID, we also record the process and
  2617     * thread ID that owns a slot, so that we can detect stale information,
  2618     * e.g. threads or processes that went away without cleaning up.
  2619     *
  2620     * NOTE: We currently don't check for stale records.
  2621     * We simply re-init the table when we know that we're the only process
  2622     * opening the lock file. */
  2623  
  2624    /* The thread ID of the thread owning this txn. */
  2625    MDBX_atomic_uint64_t mr_tid;
  2626  
  2627    /* The process ID of the process owning this reader txn. */
  2628    MDBX_atomic_uint32_t mr_pid;
  2629  
  2630    /* The number of pages used in the reader's MVCC snapshot,
  2631     * i.e. the value of meta->mm_geo.next and txn->mt_next_pgno */
  2632    atomic_pgno_t mr_snapshot_pages_used;
  2633    /* Number of retired pages at the time this reader starts transaction. So,
  2634     * at any time the difference mm_pages_retired - mr_snapshot_pages_retired
  2635     * will give the number of pages which this reader restraining from reuse. */
  2636    MDBX_atomic_uint64_t mr_snapshot_pages_retired;
  2637  } MDBX_reader;
  2638  
  2639  /* The header for the reader table (a memory-mapped lock file). */
  2640  typedef struct MDBX_lockinfo {
  2641    /* Stamp identifying this as an MDBX file.
  2642     * It must be set to MDBX_MAGIC with with MDBX_LOCK_VERSION. */
  2643    uint64_t mti_magic_and_version;
  2644  
  2645    /* Format of this lock file. Must be set to MDBX_LOCK_FORMAT. */
  2646    uint32_t mti_os_and_format;
  2647  
  2648    /* Flags which environment was opened. */
  2649    MDBX_atomic_uint32_t mti_envmode;
  2650  
  2651    /* Threshold of un-synced-with-disk pages for auto-sync feature,
  2652     * zero means no-threshold, i.e. auto-sync is disabled. */
  2653    atomic_pgno_t mti_autosync_threshold;
  2654  
  2655    /* Low 32-bit of txnid with which meta-pages was synced,
  2656     * i.e. for sync-polling in the MDBX_NOMETASYNC mode. */
  2657    MDBX_atomic_uint32_t mti_meta_sync_txnid;
  2658  
  2659    /* Period for timed auto-sync feature, i.e. at the every steady checkpoint
  2660     * the mti_unsynced_timeout sets to the current_time + mti_autosync_period.
  2661     * The time value is represented in a suitable system-dependent form, for
  2662     * example clock_gettime(CLOCK_BOOTTIME) or clock_gettime(CLOCK_MONOTONIC).
  2663     * Zero means timed auto-sync is disabled. */
  2664    MDBX_atomic_uint64_t mti_autosync_period;
  2665  
  2666    /* Marker to distinguish uniqueness of DB/CLK. */
  2667    MDBX_atomic_uint64_t mti_bait_uniqueness;
  2668  
  2669    MDBX_ALIGNAS(MDBX_CACHELINE_SIZE) /* cacheline ----------------------------*/
  2670  
  2671  #if MDBX_ENABLE_PGOP_STAT
  2672    /* Statistics of costly ops of all (running, completed and aborted)
  2673     * transactions */
  2674    MDBX_pgop_stat_t mti_pgop_stat;
  2675  #endif /* MDBX_ENABLE_PGOP_STAT*/
  2676  
  2677    MDBX_ALIGNAS(MDBX_CACHELINE_SIZE) /* cacheline ----------------------------*/
  2678  
  2679    /* Write transaction lock. */
  2680  #if MDBX_LOCKING > 0
  2681    osal_ipclock_t mti_wlock;
  2682  #endif /* MDBX_LOCKING > 0 */
  2683  
  2684    atomic_txnid_t mti_oldest_reader;
  2685  
  2686    /* Timestamp of the last steady sync. Value is represented in a suitable
  2687     * system-dependent form, for example clock_gettime(CLOCK_BOOTTIME) or
  2688     * clock_gettime(CLOCK_MONOTONIC). */
  2689    MDBX_atomic_uint64_t mti_sync_timestamp;
  2690  
  2691    /* Number un-synced-with-disk pages for auto-sync feature. */
  2692    atomic_pgno_t mti_unsynced_pages;
  2693  
  2694    /* Number of page which was discarded last time by madvise(MADV_FREE). */
  2695    atomic_pgno_t mti_discarded_tail;
  2696  
  2697    /* Timestamp of the last readers check. */
  2698    MDBX_atomic_uint64_t mti_reader_check_timestamp;
  2699  
  2700    /* Shared anchor for tracking readahead edge and enabled/disabled status. */
  2701    pgno_t mti_readahead_anchor;
  2702  
  2703    MDBX_ALIGNAS(MDBX_CACHELINE_SIZE) /* cacheline ----------------------------*/
  2704  
  2705    /* Readeaders registration lock. */
  2706  #if MDBX_LOCKING > 0
  2707    osal_ipclock_t mti_rlock;
  2708  #endif /* MDBX_LOCKING > 0 */
  2709  
  2710    /* The number of slots that have been used in the reader table.
  2711     * This always records the maximum count, it is not decremented
  2712     * when readers release their slots. */
  2713    MDBX_atomic_uint32_t mti_numreaders;
  2714    MDBX_atomic_uint32_t mti_readers_refresh_flag;
  2715  
  2716  #if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) ||              \
  2717      (!defined(__cplusplus) && defined(_MSC_VER))
  2718    MDBX_ALIGNAS(MDBX_CACHELINE_SIZE) /* cacheline ----------------------------*/
  2719    MDBX_reader mti_readers[] /* dynamic size */;
  2720  #endif /* C99 */
  2721  } MDBX_lockinfo;
  2722  
  2723  /* Lockfile format signature: version, features and field layout */
  2724  #define MDBX_LOCK_FORMAT                                                       \
  2725    (MDBX_CLOCK_SIGN * 27733 + (unsigned)sizeof(MDBX_reader) * 13 +              \
  2726     (unsigned)offsetof(MDBX_reader, mr_snapshot_pages_used) * 251 +             \
  2727     (unsigned)offsetof(MDBX_lockinfo, mti_oldest_reader) * 83 +                 \
  2728     (unsigned)offsetof(MDBX_lockinfo, mti_numreaders) * 37 +                    \
  2729     (unsigned)offsetof(MDBX_lockinfo, mti_readers) * 29)
  2730  
  2731  #define MDBX_DATA_MAGIC                                                        \
  2732    ((MDBX_MAGIC << 8) + MDBX_PNL_ASCENDING * 64 + MDBX_DATA_VERSION)
  2733  
  2734  #define MDBX_DATA_MAGIC_LEGACY_COMPAT                                          \
  2735    ((MDBX_MAGIC << 8) + MDBX_PNL_ASCENDING * 64 + 2)
  2736  
  2737  #define MDBX_DATA_MAGIC_LEGACY_DEVEL ((MDBX_MAGIC << 8) + 255)
  2738  
  2739  #define MDBX_LOCK_MAGIC ((MDBX_MAGIC << 8) + MDBX_LOCK_VERSION)
  2740  
  2741  /* The maximum size of a database page.
  2742   *
  2743   * It is 64K, but value-PAGEHDRSZ must fit in MDBX_page.mp_upper.
  2744   *
  2745   * MDBX will use database pages < OS pages if needed.
  2746   * That causes more I/O in write transactions: The OS must
  2747   * know (read) the whole page before writing a partial page.
  2748   *
  2749   * Note that we don't currently support Huge pages. On Linux,
  2750   * regular data files cannot use Huge pages, and in general
  2751   * Huge pages aren't actually pageable. We rely on the OS
  2752   * demand-pager to read our data and page it out when memory
  2753   * pressure from other processes is high. So until OSs have
  2754   * actual paging support for Huge pages, they're not viable. */
  2755  #define MAX_PAGESIZE MDBX_MAX_PAGESIZE
  2756  #define MIN_PAGESIZE MDBX_MIN_PAGESIZE
  2757  
  2758  #define MIN_MAPSIZE (MIN_PAGESIZE * MIN_PAGENO)
  2759  #if defined(_WIN32) || defined(_WIN64)
  2760  #define MAX_MAPSIZE32 UINT32_C(0x38000000)
  2761  #else
  2762  #define MAX_MAPSIZE32 UINT32_C(0x7f000000)
  2763  #endif
  2764  #define MAX_MAPSIZE64 ((MAX_PAGENO + 1) * (uint64_t)MAX_PAGESIZE)
  2765  
  2766  #if MDBX_WORDBITS >= 64
  2767  #define MAX_MAPSIZE MAX_MAPSIZE64
  2768  #define MDBX_PGL_LIMIT ((size_t)MAX_PAGENO)
  2769  #else
  2770  #define MAX_MAPSIZE MAX_MAPSIZE32
  2771  #define MDBX_PGL_LIMIT (MAX_MAPSIZE32 / MIN_PAGESIZE)
  2772  #endif /* MDBX_WORDBITS */
  2773  
  2774  #define MDBX_READERS_LIMIT 32767
  2775  #define MDBX_RADIXSORT_THRESHOLD 333
  2776  
  2777  /*----------------------------------------------------------------------------*/
  2778  
  2779  /* An PNL is an Page Number List, a sorted array of IDs.
  2780   * The first element of the array is a counter for how many actual page-numbers
  2781   * are in the list. By default PNLs are sorted in descending order, this allow
  2782   * cut off a page with lowest pgno (at the tail) just truncating the list. The
  2783   * sort order of PNLs is controlled by the MDBX_PNL_ASCENDING build option. */
  2784  typedef pgno_t *MDBX_PNL;
  2785  
  2786  #if MDBX_PNL_ASCENDING
  2787  #define MDBX_PNL_ORDERED(first, last) ((first) < (last))
  2788  #define MDBX_PNL_DISORDERED(first, last) ((first) >= (last))
  2789  #else
  2790  #define MDBX_PNL_ORDERED(first, last) ((first) > (last))
  2791  #define MDBX_PNL_DISORDERED(first, last) ((first) <= (last))
  2792  #endif
  2793  
  2794  /* List of txnid, only for MDBX_txn.tw.lifo_reclaimed */
  2795  typedef txnid_t *MDBX_TXL;
  2796  
  2797  /* An Dirty-Page list item is an pgno/pointer pair. */
  2798  typedef struct MDBX_dp {
  2799    MDBX_page *ptr;
  2800    pgno_t pgno;
  2801    union {
  2802      unsigned extra;
  2803      __anonymous_struct_extension__ struct {
  2804        unsigned multi : 1;
  2805        unsigned lru : 31;
  2806      };
  2807    };
  2808  } MDBX_dp;
  2809  
  2810  /* An DPL (dirty-page list) is a sorted array of MDBX_DPs. */
  2811  typedef struct MDBX_dpl {
  2812    unsigned sorted;
  2813    unsigned length;
  2814    unsigned pages_including_loose; /* number of pages, but not an entries. */
  2815    unsigned detent; /* allocated size excluding the MDBX_DPL_RESERVE_GAP */
  2816  #if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) ||              \
  2817      (!defined(__cplusplus) && defined(_MSC_VER))
  2818    MDBX_dp items[] /* dynamic size with holes at zero and after the last */;
  2819  #endif
  2820  } MDBX_dpl;
  2821  
  2822  /* PNL sizes */
  2823  #define MDBX_PNL_GRANULATE 1024
  2824  #define MDBX_PNL_INITIAL                                                       \
  2825    (MDBX_PNL_GRANULATE - 2 - MDBX_ASSUME_MALLOC_OVERHEAD / sizeof(pgno_t))
  2826  
  2827  #define MDBX_TXL_GRANULATE 32
  2828  #define MDBX_TXL_INITIAL                                                       \
  2829    (MDBX_TXL_GRANULATE - 2 - MDBX_ASSUME_MALLOC_OVERHEAD / sizeof(txnid_t))
  2830  #define MDBX_TXL_MAX                                                           \
  2831    ((1u << 17) - 2 - MDBX_ASSUME_MALLOC_OVERHEAD / sizeof(txnid_t))
  2832  
  2833  #define MDBX_PNL_ALLOCLEN(pl) ((pl)[-1])
  2834  #define MDBX_PNL_SIZE(pl) ((pl)[0])
  2835  #define MDBX_PNL_FIRST(pl) ((pl)[1])
  2836  #define MDBX_PNL_LAST(pl) ((pl)[MDBX_PNL_SIZE(pl)])
  2837  #define MDBX_PNL_BEGIN(pl) (&(pl)[1])
  2838  #define MDBX_PNL_END(pl) (&(pl)[MDBX_PNL_SIZE(pl) + 1])
  2839  
  2840  #if MDBX_PNL_ASCENDING
  2841  #define MDBX_PNL_LEAST(pl) MDBX_PNL_FIRST(pl)
  2842  #define MDBX_PNL_MOST(pl) MDBX_PNL_LAST(pl)
  2843  #else
  2844  #define MDBX_PNL_LEAST(pl) MDBX_PNL_LAST(pl)
  2845  #define MDBX_PNL_MOST(pl) MDBX_PNL_FIRST(pl)
  2846  #endif
  2847  
  2848  #define MDBX_PNL_SIZEOF(pl) ((MDBX_PNL_SIZE(pl) + 1) * sizeof(pgno_t))
  2849  #define MDBX_PNL_IS_EMPTY(pl) (MDBX_PNL_SIZE(pl) == 0)
  2850  
  2851  /*----------------------------------------------------------------------------*/
  2852  /* Internal structures */
  2853  
  2854  /* Auxiliary DB info.
  2855   * The information here is mostly static/read-only. There is
  2856   * only a single copy of this record in the environment. */
  2857  typedef struct MDBX_dbx {
  2858    MDBX_val md_name;                /* name of the database */
  2859    MDBX_cmp_func *md_cmp;           /* function for comparing keys */
  2860    MDBX_cmp_func *md_dcmp;          /* function for comparing data items */
  2861    size_t md_klen_min, md_klen_max; /* min/max key length for the database */
  2862    size_t md_vlen_min,
  2863        md_vlen_max; /* min/max value/data length for the database */
  2864  } MDBX_dbx;
  2865  
  2866  typedef struct troika {
  2867    uint8_t fsm, recent, prefer_steady, tail_and_flags;
  2868  #define TROIKA_HAVE_STEADY(troika) ((troika)->fsm & 7)
  2869  #define TROIKA_STRICT_VALID(troika) ((troika)->tail_and_flags & 64)
  2870  #define TROIKA_VALID(troika) ((troika)->tail_and_flags & 128)
  2871  #define TROIKA_TAIL(troika) ((troika)->tail_and_flags & 3)
  2872    txnid_t txnid[NUM_METAS];
  2873  } meta_troika_t;
  2874  
  2875  /* A database transaction.
  2876   * Every operation requires a transaction handle. */
  2877  struct MDBX_txn {
  2878  #define MDBX_MT_SIGNATURE UINT32_C(0x93D53A31)
  2879    uint32_t mt_signature;
  2880  
  2881    /* Transaction Flags */
  2882    /* mdbx_txn_begin() flags */
  2883  #define MDBX_TXN_RO_BEGIN_FLAGS (MDBX_TXN_RDONLY | MDBX_TXN_RDONLY_PREPARE)
  2884  #define MDBX_TXN_RW_BEGIN_FLAGS                                                \
  2885    (MDBX_TXN_NOMETASYNC | MDBX_TXN_NOSYNC | MDBX_TXN_TRY)
  2886    /* Additional flag for sync_locked() */
  2887  #define MDBX_SHRINK_ALLOWED UINT32_C(0x40000000)
  2888  
  2889  #define TXN_FLAGS                                                              \
  2890    (MDBX_TXN_FINISHED | MDBX_TXN_ERROR | MDBX_TXN_DIRTY | MDBX_TXN_SPILLS |     \
  2891     MDBX_TXN_HAS_CHILD | MDBX_TXN_INVALID)
  2892  
  2893  #if (TXN_FLAGS & (MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_RO_BEGIN_FLAGS)) ||       \
  2894      ((MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_RO_BEGIN_FLAGS | TXN_FLAGS) &         \
  2895       MDBX_SHRINK_ALLOWED)
  2896  #error "Oops, some txn flags overlapped or wrong"
  2897  #endif
  2898    uint32_t mt_flags;
  2899  
  2900    MDBX_txn *mt_parent; /* parent of a nested txn */
  2901    /* Nested txn under this txn, set together with flag MDBX_TXN_HAS_CHILD */
  2902    MDBX_txn *mt_child;
  2903    MDBX_geo mt_geo;
  2904    /* next unallocated page */
  2905  #define mt_next_pgno mt_geo.next
  2906    /* corresponding to the current size of datafile */
  2907  #define mt_end_pgno mt_geo.now
  2908  
  2909    /* The ID of this transaction. IDs are integers incrementing from
  2910     * INITIAL_TXNID. Only committed write transactions increment the ID. If a
  2911     * transaction aborts, the ID may be re-used by the next writer. */
  2912    txnid_t mt_txnid;
  2913    txnid_t mt_front;
  2914  
  2915    MDBX_env *mt_env; /* the DB environment */
  2916    /* Array of records for each DB known in the environment. */
  2917    MDBX_dbx *mt_dbxs;
  2918    /* Array of MDBX_db records for each known DB */
  2919    MDBX_db *mt_dbs;
  2920    /* Array of sequence numbers for each DB handle */
  2921    MDBX_atomic_uint32_t *mt_dbiseqs;
  2922  
  2923    /* Transaction DBI Flags */
  2924  #define DBI_DIRTY MDBX_DBI_DIRTY /* DB was written in this txn */
  2925  #define DBI_STALE MDBX_DBI_STALE /* Named-DB record is older than txnID */
  2926  #define DBI_FRESH MDBX_DBI_FRESH /* Named-DB handle opened in this txn */
  2927  #define DBI_CREAT MDBX_DBI_CREAT /* Named-DB handle created in this txn */
  2928  #define DBI_VALID 0x10           /* DB handle is valid, see also DB_VALID */
  2929  #define DBI_USRVALID 0x20        /* As DB_VALID, but not set for FREE_DBI */
  2930  #define DBI_AUDITED 0x40         /* Internal flag for accounting during audit */
  2931    /* Array of flags for each DB */
  2932    uint8_t *mt_dbistate;
  2933    /* Number of DB records in use, or 0 when the txn is finished.
  2934     * This number only ever increments until the txn finishes; we
  2935     * don't decrement it when individual DB handles are closed. */
  2936    MDBX_dbi mt_numdbs;
  2937    size_t mt_owner; /* thread ID that owns this transaction */
  2938    MDBX_canary mt_canary;
  2939    void *mt_userctx; /* User-settable context */
  2940    MDBX_cursor **mt_cursors;
  2941  
  2942    union {
  2943      struct {
  2944        /* For read txns: This thread/txn's reader table slot, or NULL. */
  2945        MDBX_reader *reader;
  2946      } to;
  2947      struct {
  2948        meta_troika_t troika;
  2949        /* In write txns, array of cursors for each DB */
  2950        pgno_t *reclaimed_pglist; /* Reclaimed GC pages */
  2951        txnid_t last_reclaimed;   /* ID of last used record */
  2952  #if MDBX_ENABLE_REFUND
  2953        pgno_t loose_refund_wl /* FIXME: describe */;
  2954  #endif /* MDBX_ENABLE_REFUND */
  2955        /* dirtylist room: Dirty array size - dirty pages visible to this txn.
  2956         * Includes ancestor txns' dirty pages not hidden by other txns'
  2957         * dirty/spilled pages. Thus commit(nested txn) has room to merge
  2958         * dirtylist into mt_parent after freeing hidden mt_parent pages. */
  2959        unsigned dirtyroom;
  2960        /* a sequence to spilling dirty page with LRU policy */
  2961        unsigned dirtylru;
  2962        /* For write txns: Modified pages. Sorted when not MDBX_WRITEMAP. */
  2963        MDBX_dpl *dirtylist;
  2964        /* The list of reclaimed txns from GC */
  2965        MDBX_TXL lifo_reclaimed;
  2966        /* The list of pages that became unused during this transaction. */
  2967        MDBX_PNL retired_pages;
  2968        /* The list of loose pages that became unused and may be reused
  2969         * in this transaction, linked through `mp_next`. */
  2970        MDBX_page *loose_pages;
  2971        /* Number of loose pages (tw.loose_pages) */
  2972        unsigned loose_count;
  2973        unsigned spill_least_removed;
  2974        /* The sorted list of dirty pages we temporarily wrote to disk
  2975         * because the dirty list was full. page numbers in here are
  2976         * shifted left by 1, deleted slots have the LSB set. */
  2977        MDBX_PNL spill_pages;
  2978      } tw;
  2979    };
  2980  };
  2981  
  2982  #if MDBX_WORDBITS >= 64
  2983  #define CURSOR_STACK 32
  2984  #else
  2985  #define CURSOR_STACK 24
  2986  #endif
  2987  
  2988  struct MDBX_xcursor;
  2989  
  2990  /* Cursors are used for all DB operations.
  2991   * A cursor holds a path of (page pointer, key index) from the DB
  2992   * root to a position in the DB, plus other state. MDBX_DUPSORT
  2993   * cursors include an xcursor to the current data item. Write txns
  2994   * track their cursors and keep them up to date when data moves.
  2995   * Exception: An xcursor's pointer to a P_SUBP page can be stale.
  2996   * (A node with F_DUPDATA but no F_SUBDATA contains a subpage). */
  2997  struct MDBX_cursor {
  2998  #define MDBX_MC_LIVE UINT32_C(0xFE05D5B1)
  2999  #define MDBX_MC_READY4CLOSE UINT32_C(0x2817A047)
  3000  #define MDBX_MC_WAIT4EOT UINT32_C(0x90E297A7)
  3001    uint32_t mc_signature;
  3002    /* The database handle this cursor operates on */
  3003    MDBX_dbi mc_dbi;
  3004    /* Next cursor on this DB in this txn */
  3005    MDBX_cursor *mc_next;
  3006    /* Backup of the original cursor if this cursor is a shadow */
  3007    MDBX_cursor *mc_backup;
  3008    /* Context used for databases with MDBX_DUPSORT, otherwise NULL */
  3009    struct MDBX_xcursor *mc_xcursor;
  3010    /* The transaction that owns this cursor */
  3011    MDBX_txn *mc_txn;
  3012    /* The database record for this cursor */
  3013    MDBX_db *mc_db;
  3014    /* The database auxiliary record for this cursor */
  3015    MDBX_dbx *mc_dbx;
  3016    /* The mt_dbistate for this database */
  3017    uint8_t *mc_dbistate;
  3018    uint8_t mc_snum; /* number of pushed pages */
  3019    uint8_t mc_top;  /* index of top page, normally mc_snum-1 */
  3020  
  3021    /* Cursor state flags. */
  3022  #define C_INITIALIZED 0x01 /* cursor has been initialized and is valid */
  3023  #define C_EOF 0x02         /* No more data */
  3024  #define C_SUB 0x04         /* Cursor is a sub-cursor */
  3025  #define C_DEL 0x08         /* last op was a cursor_del */
  3026  #define C_UNTRACK 0x10     /* Un-track cursor when closing */
  3027  #define C_RECLAIMING 0x20  /* GC lookup is prohibited */
  3028  #define C_GCFREEZE 0x40    /* reclaimed_pglist must not be updated */
  3029    uint8_t mc_flags;        /* see mdbx_cursor */
  3030  
  3031    /* Cursor checking flags. */
  3032  #define CC_BRANCH 0x01    /* same as P_BRANCH for CHECK_LEAF_TYPE() */
  3033  #define CC_LEAF 0x02      /* same as P_LEAF for CHECK_LEAF_TYPE() */
  3034  #define CC_OVERFLOW 0x04  /* same as P_OVERFLOW for CHECK_LEAF_TYPE() */
  3035  #define CC_UPDATING 0x08  /* update/rebalance pending */
  3036  #define CC_SKIPORD 0x10   /* don't check keys ordering */
  3037  #define CC_LEAF2 0x20     /* same as P_LEAF2 for CHECK_LEAF_TYPE() */
  3038  #define CC_RETIRING 0x40  /* refs to child pages may be invalid */
  3039  #define CC_PAGECHECK 0x80 /* perform page checking, see MDBX_VALIDATION */
  3040    uint8_t mc_checking;    /* page checking level */
  3041  
  3042    MDBX_page *mc_pg[CURSOR_STACK]; /* stack of pushed pages */
  3043    indx_t mc_ki[CURSOR_STACK];     /* stack of page indices */
  3044  };
  3045  
  3046  #define CHECK_LEAF_TYPE(mc, mp)                                                \
  3047    (((PAGETYPE_WHOLE(mp) ^ (mc)->mc_checking) &                                 \
  3048      (CC_BRANCH | CC_LEAF | CC_OVERFLOW | CC_LEAF2)) == 0)
  3049  
  3050  /* Context for sorted-dup records.
  3051   * We could have gone to a fully recursive design, with arbitrarily
  3052   * deep nesting of sub-databases. But for now we only handle these
  3053   * levels - main DB, optional sub-DB, sorted-duplicate DB. */
  3054  typedef struct MDBX_xcursor {
  3055    /* A sub-cursor for traversing the Dup DB */
  3056    MDBX_cursor mx_cursor;
  3057    /* The database record for this Dup DB */
  3058    MDBX_db mx_db;
  3059    /* The auxiliary DB record for this Dup DB */
  3060    MDBX_dbx mx_dbx;
  3061  } MDBX_xcursor;
  3062  
  3063  typedef struct MDBX_cursor_couple {
  3064    MDBX_cursor outer;
  3065    void *mc_userctx; /* User-settable context */
  3066    MDBX_xcursor inner;
  3067  } MDBX_cursor_couple;
  3068  
  3069  /* The database environment. */
  3070  struct MDBX_env {
  3071    /* ----------------------------------------------------- mostly static part */
  3072  #define MDBX_ME_SIGNATURE UINT32_C(0x9A899641)
  3073    MDBX_atomic_uint32_t me_signature;
  3074    /* Failed to update the meta page. Probably an I/O error. */
  3075  #define MDBX_FATAL_ERROR UINT32_C(0x80000000)
  3076    /* Some fields are initialized. */
  3077  #define MDBX_ENV_ACTIVE UINT32_C(0x20000000)
  3078    /* me_txkey is set */
  3079  #define MDBX_ENV_TXKEY UINT32_C(0x10000000)
  3080    /* Legacy MDBX_MAPASYNC (prior v0.9) */
  3081  #define MDBX_DEPRECATED_MAPASYNC UINT32_C(0x100000)
  3082    /* Legacy MDBX_COALESCE (prior v0.12) */
  3083  #define MDBX_DEPRECATED_COALESCE UINT32_C(0x2000000)
  3084  #define ENV_INTERNAL_FLAGS (MDBX_FATAL_ERROR | MDBX_ENV_ACTIVE | MDBX_ENV_TXKEY)
  3085    uint32_t me_flags;
  3086    osal_mmap_t me_dxb_mmap; /* The main data file */
  3087  #define me_map me_dxb_mmap.dxb
  3088  #define me_lazy_fd me_dxb_mmap.fd
  3089    mdbx_filehandle_t me_dsync_fd;
  3090    osal_mmap_t me_lck_mmap; /* The lock file */
  3091  #define me_lfd me_lck_mmap.fd
  3092    struct MDBX_lockinfo *me_lck;
  3093  
  3094    unsigned me_psize;        /* DB page size, initialized from me_os_psize */
  3095    unsigned me_leaf_nodemax; /* max size of a leaf-node */
  3096    uint8_t me_psize2log;     /* log2 of DB page size */
  3097    int8_t me_stuck_meta; /* recovery-only: target meta page or less that zero */
  3098    uint16_t me_merge_threshold,
  3099        me_merge_threshold_gc;  /* pages emptier than this are candidates for
  3100                                   merging */
  3101    unsigned me_os_psize;       /* OS page size, from osal_syspagesize() */
  3102    unsigned me_maxreaders;     /* size of the reader table */
  3103    MDBX_dbi me_maxdbs;         /* size of the DB table */
  3104    uint32_t me_pid;            /* process ID of this env */
  3105    osal_thread_key_t me_txkey; /* thread-key for readers */
  3106    pathchar_t *me_pathname;    /* path to the DB files */
  3107    void *me_pbuf;              /* scratch area for DUPSORT put() */
  3108    MDBX_txn *me_txn0;          /* preallocated write transaction */
  3109  
  3110    MDBX_dbx *me_dbxs;                /* array of static DB info */
  3111    uint16_t *me_dbflags;             /* array of flags from MDBX_db.md_flags */
  3112    MDBX_atomic_uint32_t *me_dbiseqs; /* array of dbi sequence numbers */
  3113    unsigned
  3114        me_maxgc_ov1page;    /* Number of pgno_t fit in a single overflow page */
  3115    uint32_t me_live_reader; /* have liveness lock in reader table */
  3116    void *me_userctx;        /* User-settable context */
  3117    MDBX_hsr_func *me_hsr_callback; /* Callback for kicking laggard readers */
  3118  
  3119    struct {
  3120      unsigned dp_reserve_limit;
  3121      unsigned rp_augment_limit;
  3122      unsigned dp_limit;
  3123      unsigned dp_initial;
  3124      uint8_t dp_loose_limit;
  3125      uint8_t spill_max_denominator;
  3126      uint8_t spill_min_denominator;
  3127      uint8_t spill_parent4child_denominator;
  3128      unsigned merge_threshold_16dot16_percent;
  3129      union {
  3130        unsigned all;
  3131        /* tracks options with non-auto values but tuned by user */
  3132        struct {
  3133          unsigned dp_limit : 1;
  3134        } non_auto;
  3135      } flags;
  3136    } me_options;
  3137  
  3138    /* struct me_dbgeo used for accepting db-geo params from user for the new
  3139     * database creation, i.e. when mdbx_env_set_geometry() was called before
  3140     * mdbx_env_open(). */
  3141    struct {
  3142      size_t lower;  /* minimal size of datafile */
  3143      size_t upper;  /* maximal size of datafile */
  3144      size_t now;    /* current size of datafile */
  3145      size_t grow;   /* step to grow datafile */
  3146      size_t shrink; /* threshold to shrink datafile */
  3147    } me_dbgeo;
  3148  
  3149  #if MDBX_LOCKING == MDBX_LOCKING_SYSV
  3150    union {
  3151      key_t key;
  3152      int semid;
  3153    } me_sysv_ipc;
  3154  #endif /* MDBX_LOCKING == MDBX_LOCKING_SYSV */
  3155  
  3156    MDBX_env *me_lcklist_next;
  3157  
  3158    /* --------------------------------------------------- mostly volatile part */
  3159  
  3160    MDBX_txn *me_txn; /* current write transaction */
  3161    osal_fastmutex_t me_dbi_lock;
  3162    MDBX_dbi me_numdbs; /* number of DBs opened */
  3163  
  3164    MDBX_page *me_dp_reserve; /* list of malloc'ed blocks for re-use */
  3165    unsigned me_dp_reserve_len;
  3166    /* PNL of pages that became unused in a write txn */
  3167    MDBX_PNL me_retired_pages;
  3168  
  3169  #if defined(_WIN32) || defined(_WIN64)
  3170    osal_srwlock_t me_remap_guard;
  3171    /* Workaround for LockFileEx and WriteFile multithread bug */
  3172    CRITICAL_SECTION me_windowsbug_lock;
  3173  #else
  3174    osal_fastmutex_t me_remap_guard;
  3175  #endif
  3176  
  3177    /* -------------------------------------------------------------- debugging */
  3178  
  3179  #if MDBX_DEBUG
  3180    MDBX_assert_func *me_assert_func; /*  Callback for assertion failures */
  3181  #endif
  3182  #ifdef MDBX_USE_VALGRIND
  3183    int me_valgrind_handle;
  3184  #endif
  3185  #if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__)
  3186    pgno_t me_poison_edge;
  3187  #endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */
  3188  
  3189  #ifndef xMDBX_DEBUG_SPILLING
  3190  #define xMDBX_DEBUG_SPILLING 0
  3191  #endif
  3192  #if xMDBX_DEBUG_SPILLING == 2
  3193    unsigned debug_dirtied_est, debug_dirtied_act;
  3194  #endif /* xMDBX_DEBUG_SPILLING */
  3195  
  3196    /* ------------------------------------------------- stub for lck-less mode */
  3197    MDBX_atomic_uint64_t
  3198        x_lckless_stub[(sizeof(MDBX_lockinfo) + MDBX_CACHELINE_SIZE - 1) /
  3199                       sizeof(MDBX_atomic_uint64_t)];
  3200  };
  3201  
  3202  #ifndef __cplusplus
  3203  /*----------------------------------------------------------------------------*/
  3204  /* Debug and Logging stuff */
  3205  
  3206  #define MDBX_RUNTIME_FLAGS_INIT                                                \
  3207    ((MDBX_DEBUG) > 0) * MDBX_DBG_ASSERT + ((MDBX_DEBUG) > 1) * MDBX_DBG_AUDIT
  3208  
  3209  extern uint8_t runtime_flags;
  3210  extern uint8_t loglevel;
  3211  extern MDBX_debug_func *debug_logger;
  3212  
  3213  MDBX_MAYBE_UNUSED static __inline void jitter4testing(bool tiny) {
  3214  #if MDBX_DEBUG
  3215    if (MDBX_DBG_JITTER & runtime_flags)
  3216      osal_jitter(tiny);
  3217  #else
  3218    (void)tiny;
  3219  #endif
  3220  }
  3221  
  3222  MDBX_INTERNAL_FUNC void MDBX_PRINTF_ARGS(4, 5)
  3223      debug_log(int level, const char *function, int line, const char *fmt, ...)
  3224          MDBX_PRINTF_ARGS(4, 5);
  3225  MDBX_INTERNAL_FUNC void debug_log_va(int level, const char *function, int line,
  3226                                       const char *fmt, va_list args);
  3227  
  3228  #if MDBX_DEBUG
  3229  #define LOG_ENABLED(msg) unlikely(msg <= loglevel)
  3230  #define AUDIT_ENABLED() unlikely((runtime_flags & MDBX_DBG_AUDIT))
  3231  #else /* MDBX_DEBUG */
  3232  #define LOG_ENABLED(msg) (msg < MDBX_LOG_VERBOSE && msg <= loglevel)
  3233  #define AUDIT_ENABLED() (0)
  3234  #endif /* MDBX_DEBUG */
  3235  
  3236  #if MDBX_FORCE_ASSERTIONS
  3237  #define ASSERT_ENABLED() (1)
  3238  #elif MDBX_DEBUG
  3239  #define ASSERT_ENABLED() likely((runtime_flags & MDBX_DBG_ASSERT))
  3240  #else
  3241  #define ASSERT_ENABLED() (0)
  3242  #endif /* assertions */
  3243  
  3244  #define DEBUG_EXTRA(fmt, ...)                                                  \
  3245    do {                                                                         \
  3246      if (LOG_ENABLED(MDBX_LOG_EXTRA))                                           \
  3247        debug_log(MDBX_LOG_EXTRA, __func__, __LINE__, fmt, __VA_ARGS__);         \
  3248    } while (0)
  3249  
  3250  #define DEBUG_EXTRA_PRINT(fmt, ...)                                            \
  3251    do {                                                                         \
  3252      if (LOG_ENABLED(MDBX_LOG_EXTRA))                                           \
  3253        debug_log(MDBX_LOG_EXTRA, NULL, 0, fmt, __VA_ARGS__);                    \
  3254    } while (0)
  3255  
  3256  #define TRACE(fmt, ...)                                                        \
  3257    do {                                                                         \
  3258      if (LOG_ENABLED(MDBX_LOG_TRACE))                                           \
  3259        debug_log(MDBX_LOG_TRACE, __func__, __LINE__, fmt "\n", __VA_ARGS__);    \
  3260    } while (0)
  3261  
  3262  #define DEBUG(fmt, ...)                                                        \
  3263    do {                                                                         \
  3264      if (LOG_ENABLED(MDBX_LOG_DEBUG))                                           \
  3265        debug_log(MDBX_LOG_DEBUG, __func__, __LINE__, fmt "\n", __VA_ARGS__);    \
  3266    } while (0)
  3267  
  3268  #define VERBOSE(fmt, ...)                                                      \
  3269    do {                                                                         \
  3270      if (LOG_ENABLED(MDBX_LOG_VERBOSE))                                         \
  3271        debug_log(MDBX_LOG_VERBOSE, __func__, __LINE__, fmt "\n", __VA_ARGS__);  \
  3272    } while (0)
  3273  
  3274  #define NOTICE(fmt, ...)                                                       \
  3275    do {                                                                         \
  3276      if (LOG_ENABLED(MDBX_LOG_NOTICE))                                          \
  3277        debug_log(MDBX_LOG_NOTICE, __func__, __LINE__, fmt "\n", __VA_ARGS__);   \
  3278    } while (0)
  3279  
  3280  #define WARNING(fmt, ...)                                                      \
  3281    do {                                                                         \
  3282      if (LOG_ENABLED(MDBX_LOG_WARN))                                            \
  3283        debug_log(MDBX_LOG_WARN, __func__, __LINE__, fmt "\n", __VA_ARGS__);     \
  3284    } while (0)
  3285  
  3286  #undef ERROR /* wingdi.h                                                       \
  3287    Yeah, morons from M$ put such definition to the public header. */
  3288  
  3289  #define ERROR(fmt, ...)                                                        \
  3290    do {                                                                         \
  3291      if (LOG_ENABLED(MDBX_LOG_ERROR))                                           \
  3292        debug_log(MDBX_LOG_ERROR, __func__, __LINE__, fmt "\n", __VA_ARGS__);    \
  3293    } while (0)
  3294  
  3295  #define FATAL(fmt, ...)                                                        \
  3296    debug_log(MDBX_LOG_FATAL, __func__, __LINE__, fmt "\n", __VA_ARGS__);
  3297  
  3298  #define ENSURE_MSG(env, expr, msg)                                             \
  3299    do {                                                                         \
  3300      if (unlikely(!(expr)))                                                     \
  3301        mdbx_assert_fail(env, msg, __func__, __LINE__);                          \
  3302    } while (0)
  3303  
  3304  #define ENSURE(env, expr) ENSURE_MSG(env, expr, #expr)
  3305  
  3306  /* assert(3) variant in environment context */
  3307  #define eASSERT(env, expr)                                                     \
  3308    do {                                                                         \
  3309      if (ASSERT_ENABLED())                                                      \
  3310        ENSURE(env, expr);                                                       \
  3311    } while (0)
  3312  
  3313  /* assert(3) variant in cursor context */
  3314  #define cASSERT(mc, expr) eASSERT((mc)->mc_txn->mt_env, expr)
  3315  
  3316  /* assert(3) variant in transaction context */
  3317  #define tASSERT(txn, expr) eASSERT((txn)->mt_env, expr)
  3318  
  3319  #ifndef xMDBX_TOOLS /* Avoid using internal eASSERT() */
  3320  #undef assert
  3321  #define assert(expr) eASSERT(NULL, expr)
  3322  #endif
  3323  
  3324  /*----------------------------------------------------------------------------*/
  3325  /* Cache coherence and mmap invalidation */
  3326  
  3327  #if MDBX_CPU_WRITEBACK_INCOHERENT
  3328  #define osal_flush_incoherent_cpu_writeback() osal_memory_barrier()
  3329  #else
  3330  #define osal_flush_incoherent_cpu_writeback() osal_compiler_barrier()
  3331  #endif /* MDBX_CPU_WRITEBACK_INCOHERENT */
  3332  
  3333  MDBX_MAYBE_UNUSED static __inline void
  3334  osal_flush_incoherent_mmap(void *addr, size_t nbytes, const intptr_t pagesize) {
  3335  #if MDBX_MMAP_INCOHERENT_FILE_WRITE
  3336    char *const begin = (char *)(-pagesize & (intptr_t)addr);
  3337    char *const end =
  3338        (char *)(-pagesize & (intptr_t)((char *)addr + nbytes + pagesize - 1));
  3339    int err = msync(begin, end - begin, MS_SYNC | MS_INVALIDATE) ? errno : 0;
  3340    eASSERT(nullptr, err == 0);
  3341    (void)err;
  3342  #else
  3343    (void)pagesize;
  3344  #endif /* MDBX_MMAP_INCOHERENT_FILE_WRITE */
  3345  
  3346  #if MDBX_MMAP_INCOHERENT_CPU_CACHE
  3347  #ifdef DCACHE
  3348    /* MIPS has cache coherency issues.
  3349     * Note: for any nbytes >= on-chip cache size, entire is flushed. */
  3350    cacheflush(addr, nbytes, DCACHE);
  3351  #else
  3352  #error "Oops, cacheflush() not available"
  3353  #endif /* DCACHE */
  3354  #endif /* MDBX_MMAP_INCOHERENT_CPU_CACHE */
  3355  
  3356  #if !MDBX_MMAP_INCOHERENT_FILE_WRITE && !MDBX_MMAP_INCOHERENT_CPU_CACHE
  3357    (void)addr;
  3358    (void)nbytes;
  3359  #endif
  3360  }
  3361  
  3362  /*----------------------------------------------------------------------------*/
  3363  /* Internal prototypes */
  3364  
  3365  MDBX_INTERNAL_FUNC int cleanup_dead_readers(MDBX_env *env, int rlocked,
  3366                                              int *dead);
  3367  MDBX_INTERNAL_FUNC int rthc_alloc(osal_thread_key_t *key, MDBX_reader *begin,
  3368                                    MDBX_reader *end);
  3369  MDBX_INTERNAL_FUNC void rthc_remove(const osal_thread_key_t key);
  3370  
  3371  MDBX_INTERNAL_FUNC void global_ctor(void);
  3372  MDBX_INTERNAL_FUNC void global_dtor(void);
  3373  MDBX_INTERNAL_FUNC void thread_dtor(void *ptr);
  3374  
  3375  #endif /* !__cplusplus */
  3376  
  3377  #define MDBX_IS_ERROR(rc)                                                      \
  3378    ((rc) != MDBX_RESULT_TRUE && (rc) != MDBX_RESULT_FALSE)
  3379  
  3380  /* Internal error codes, not exposed outside libmdbx */
  3381  #define MDBX_NO_ROOT (MDBX_LAST_ADDED_ERRCODE + 10)
  3382  
  3383  /* Debugging output value of a cursor DBI: Negative in a sub-cursor. */
  3384  #define DDBI(mc)                                                               \
  3385    (((mc)->mc_flags & C_SUB) ? -(int)(mc)->mc_dbi : (int)(mc)->mc_dbi)
  3386  
  3387  /* Key size which fits in a DKBUF (debug key buffer). */
  3388  #define DKBUF_MAX 511
  3389  #define DKBUF char _kbuf[DKBUF_MAX * 4 + 2]
  3390  #define DKEY(x) mdbx_dump_val(x, _kbuf, DKBUF_MAX * 2 + 1)
  3391  #define DVAL(x) mdbx_dump_val(x, _kbuf + DKBUF_MAX * 2 + 1, DKBUF_MAX * 2 + 1)
  3392  
  3393  #if MDBX_DEBUG
  3394  #define DKBUF_DEBUG DKBUF
  3395  #define DKEY_DEBUG(x) DKEY(x)
  3396  #define DVAL_DEBUG(x) DVAL(x)
  3397  #else
  3398  #define DKBUF_DEBUG ((void)(0))
  3399  #define DKEY_DEBUG(x) ("-")
  3400  #define DVAL_DEBUG(x) ("-")
  3401  #endif
  3402  
  3403  /* An invalid page number.
  3404   * Mainly used to denote an empty tree. */
  3405  #define P_INVALID (~(pgno_t)0)
  3406  
  3407  /* Test if the flags f are set in a flag word w. */
  3408  #define F_ISSET(w, f) (((w) & (f)) == (f))
  3409  
  3410  /* Round n up to an even number. */
  3411  #define EVEN(n) (((n) + 1UL) & -2L) /* sign-extending -2 to match n+1U */
  3412  
  3413  /* Default size of memory map.
  3414   * This is certainly too small for any actual applications. Apps should
  3415   * always set the size explicitly using mdbx_env_set_geometry(). */
  3416  #define DEFAULT_MAPSIZE MEGABYTE
  3417  
  3418  /* Number of slots in the reader table.
  3419   * This value was chosen somewhat arbitrarily. The 61 is a prime number,
  3420   * and such readers plus a couple mutexes fit into single 4KB page.
  3421   * Applications should set the table size using mdbx_env_set_maxreaders(). */
  3422  #define DEFAULT_READERS 61
  3423  
  3424  /* Test if a page is a leaf page */
  3425  #define IS_LEAF(p) (((p)->mp_flags & P_LEAF) != 0)
  3426  /* Test if a page is a LEAF2 page */
  3427  #define IS_LEAF2(p) unlikely(((p)->mp_flags & P_LEAF2) != 0)
  3428  /* Test if a page is a branch page */
  3429  #define IS_BRANCH(p) (((p)->mp_flags & P_BRANCH) != 0)
  3430  /* Test if a page is an overflow page */
  3431  #define IS_OVERFLOW(p) unlikely(((p)->mp_flags & P_OVERFLOW) != 0)
  3432  /* Test if a page is a sub page */
  3433  #define IS_SUBP(p) (((p)->mp_flags & P_SUBP) != 0)
  3434  
  3435  /* Header for a single key/data pair within a page.
  3436   * Used in pages of type P_BRANCH and P_LEAF without P_LEAF2.
  3437   * We guarantee 2-byte alignment for 'MDBX_node's.
  3438   *
  3439   * Leaf node flags describe node contents.  F_BIGDATA says the node's
  3440   * data part is the page number of an overflow page with actual data.
  3441   * F_DUPDATA and F_SUBDATA can be combined giving duplicate data in
  3442   * a sub-page/sub-database, and named databases (just F_SUBDATA). */
  3443  typedef struct MDBX_node {
  3444  #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
  3445    union {
  3446      uint32_t mn_dsize;
  3447      uint32_t mn_pgno32;
  3448    };
  3449    uint8_t mn_flags; /* see mdbx_node flags */
  3450    uint8_t mn_extra;
  3451    uint16_t mn_ksize; /* key size */
  3452  #else
  3453    uint16_t mn_ksize; /* key size */
  3454    uint8_t mn_extra;
  3455    uint8_t mn_flags; /* see mdbx_node flags */
  3456    union {
  3457      uint32_t mn_pgno32;
  3458      uint32_t mn_dsize;
  3459    };
  3460  #endif /* __BYTE_ORDER__ */
  3461  
  3462    /* mdbx_node Flags */
  3463  #define F_BIGDATA 0x01 /* data put on overflow page */
  3464  #define F_SUBDATA 0x02 /* data is a sub-database */
  3465  #define F_DUPDATA 0x04 /* data has duplicates */
  3466  
  3467    /* valid flags for mdbx_node_add() */
  3468  #define NODE_ADD_FLAGS (F_DUPDATA | F_SUBDATA | MDBX_RESERVE | MDBX_APPEND)
  3469  
  3470  #if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) ||              \
  3471      (!defined(__cplusplus) && defined(_MSC_VER))
  3472    uint8_t mn_data[] /* key and data are appended here */;
  3473  #endif /* C99 */
  3474  } MDBX_node;
  3475  
  3476  #define DB_PERSISTENT_FLAGS                                                    \
  3477    (MDBX_REVERSEKEY | MDBX_DUPSORT | MDBX_INTEGERKEY | MDBX_DUPFIXED |          \
  3478     MDBX_INTEGERDUP | MDBX_REVERSEDUP)
  3479  
  3480  /* mdbx_dbi_open() flags */
  3481  #define DB_USABLE_FLAGS (DB_PERSISTENT_FLAGS | MDBX_CREATE | MDBX_DB_ACCEDE)
  3482  
  3483  #define DB_VALID 0x8000 /* DB handle is valid, for me_dbflags */
  3484  #define DB_INTERNAL_FLAGS DB_VALID
  3485  
  3486  #if DB_INTERNAL_FLAGS & DB_USABLE_FLAGS
  3487  #error "Oops, some flags overlapped or wrong"
  3488  #endif
  3489  #if DB_PERSISTENT_FLAGS & ~DB_USABLE_FLAGS
  3490  #error "Oops, some flags overlapped or wrong"
  3491  #endif
  3492  
  3493  /* max number of pages to commit in one writev() call */
  3494  #define MDBX_COMMIT_PAGES 64
  3495  #if defined(IOV_MAX) && IOV_MAX < MDBX_COMMIT_PAGES /* sysconf(_SC_IOV_MAX) */
  3496  #undef MDBX_COMMIT_PAGES
  3497  #define MDBX_COMMIT_PAGES IOV_MAX
  3498  #endif
  3499  
  3500  /*
  3501   *                /
  3502   *                | -1, a < b
  3503   * CMP2INT(a,b) = <  0, a == b
  3504   *                |  1, a > b
  3505   *                \
  3506   */
  3507  #ifndef __e2k__
  3508  /* LY: fast enough on most systems */
  3509  #define CMP2INT(a, b) (((b) > (a)) ? -1 : (a) > (b))
  3510  #else
  3511  /* LY: more parallelable on VLIW Elbrus */
  3512  #define CMP2INT(a, b) (((a) > (b)) - ((b) > (a)))
  3513  #endif
  3514  
  3515  /* Do not spill pages to disk if txn is getting full, may fail instead */
  3516  #define MDBX_NOSPILL 0x8000
  3517  
  3518  MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static __inline pgno_t
  3519  int64pgno(int64_t i64) {
  3520    if (likely(i64 >= (int64_t)MIN_PAGENO && i64 <= (int64_t)MAX_PAGENO + 1))
  3521      return (pgno_t)i64;
  3522    return (i64 < (int64_t)MIN_PAGENO) ? MIN_PAGENO : MAX_PAGENO;
  3523  }
  3524  
  3525  MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static __inline pgno_t
  3526  pgno_add(size_t base, size_t augend) {
  3527    assert(base <= MAX_PAGENO + 1 && augend < MAX_PAGENO);
  3528    return int64pgno(base + augend);
  3529  }
  3530  
  3531  MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static __inline pgno_t
  3532  pgno_sub(size_t base, size_t subtrahend) {
  3533    assert(base >= MIN_PAGENO && base <= MAX_PAGENO + 1 &&
  3534           subtrahend < MAX_PAGENO);
  3535    return int64pgno(base - subtrahend);
  3536  }
  3537  
  3538  MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static __always_inline bool
  3539  is_powerof2(size_t x) {
  3540    return (x & (x - 1)) == 0;
  3541  }
  3542  
  3543  MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static __always_inline size_t
  3544  floor_powerof2(size_t value, size_t granularity) {
  3545    assert(is_powerof2(granularity));
  3546    return value & ~(granularity - 1);
  3547  }
  3548  
  3549  MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static __always_inline size_t
  3550  ceil_powerof2(size_t value, size_t granularity) {
  3551    return floor_powerof2(value + granularity - 1, granularity);
  3552  }
  3553  
  3554  MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static unsigned
  3555  log2n_powerof2(size_t value) {
  3556    assert(value > 0 && value < INT32_MAX && is_powerof2(value));
  3557    assert((value & -(int32_t)value) == value);
  3558  #if __GNUC_PREREQ(4, 1) || __has_builtin(__builtin_ctzl)
  3559    return __builtin_ctzl(value);
  3560  #elif defined(_MSC_VER)
  3561    unsigned long index;
  3562    _BitScanForward(&index, (unsigned long)value);
  3563    return index;
  3564  #else
  3565    static const uint8_t debruijn_ctz32[32] = {
  3566        0,  1,  28, 2,  29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4,  8,
  3567        31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6,  11, 5,  10, 9};
  3568    return debruijn_ctz32[(uint32_t)(value * 0x077CB531u) >> 27];
  3569  #endif
  3570  }
  3571  
  3572  /* Only a subset of the mdbx_env flags can be changed
  3573   * at runtime. Changing other flags requires closing the
  3574   * environment and re-opening it with the new flags. */
  3575  #define ENV_CHANGEABLE_FLAGS                                                   \
  3576    (MDBX_SAFE_NOSYNC | MDBX_NOMETASYNC | MDBX_DEPRECATED_MAPASYNC |             \
  3577     MDBX_NOMEMINIT | MDBX_COALESCE | MDBX_PAGEPERTURB | MDBX_ACCEDE |           \
  3578     MDBX_VALIDATION)
  3579  #define ENV_CHANGELESS_FLAGS                                                   \
  3580    (MDBX_NOSUBDIR | MDBX_RDONLY | MDBX_WRITEMAP | MDBX_NOTLS | MDBX_NORDAHEAD | \
  3581     MDBX_LIFORECLAIM | MDBX_EXCLUSIVE)
  3582  #define ENV_USABLE_FLAGS (ENV_CHANGEABLE_FLAGS | ENV_CHANGELESS_FLAGS)
  3583  
  3584  #if !defined(__cplusplus) || CONSTEXPR_ENUM_FLAGS_OPERATIONS
  3585  MDBX_MAYBE_UNUSED static void static_checks(void) {
  3586    STATIC_ASSERT_MSG(INT16_MAX - CORE_DBS == MDBX_MAX_DBI,
  3587                      "Oops, MDBX_MAX_DBI or CORE_DBS?");
  3588    STATIC_ASSERT_MSG((unsigned)(MDBX_DB_ACCEDE | MDBX_CREATE) ==
  3589                          ((DB_USABLE_FLAGS | DB_INTERNAL_FLAGS) &
  3590                           (ENV_USABLE_FLAGS | ENV_INTERNAL_FLAGS)),
  3591                      "Oops, some flags overlapped or wrong");
  3592    STATIC_ASSERT_MSG((ENV_INTERNAL_FLAGS & ENV_USABLE_FLAGS) == 0,
  3593                      "Oops, some flags overlapped or wrong");
  3594  }
  3595  #endif /* Disabled for MSVC 19.0 (VisualStudio 2015) */
  3596  
  3597  #ifdef __cplusplus
  3598  }
  3599  #endif
  3600  
  3601  #define MDBX_ASAN_POISON_MEMORY_REGION(addr, size)                             \
  3602    do {                                                                         \
  3603      TRACE("POISON_MEMORY_REGION(%p, %zu) at %u", (void *)(addr),               \
  3604            (size_t)(size), __LINE__);                                           \
  3605      ASAN_POISON_MEMORY_REGION(addr, size);                                     \
  3606    } while (0)
  3607  
  3608  #define MDBX_ASAN_UNPOISON_MEMORY_REGION(addr, size)                           \
  3609    do {                                                                         \
  3610      TRACE("UNPOISON_MEMORY_REGION(%p, %zu) at %u", (void *)(addr),             \
  3611            (size_t)(size), __LINE__);                                           \
  3612      ASAN_UNPOISON_MEMORY_REGION(addr, size);                                   \
  3613    } while (0)
  3614  
  3615  typedef struct flagbit {
  3616    int bit;
  3617    const char *name;
  3618  } flagbit;
  3619  
  3620  const flagbit dbflags[] = {{MDBX_DUPSORT, "dupsort"},
  3621                             {MDBX_INTEGERKEY, "integerkey"},
  3622                             {MDBX_REVERSEKEY, "reversekey"},
  3623                             {MDBX_DUPFIXED, "dupfixed"},
  3624                             {MDBX_REVERSEDUP, "reversedup"},
  3625                             {MDBX_INTEGERDUP, "integerdup"},
  3626                             {0, nullptr}};
  3627  
  3628  #if defined(_WIN32) || defined(_WIN64)
  3629  /*
  3630   * POSIX getopt for Windows
  3631   *
  3632   * AT&T Public License
  3633   *
  3634   * Code given out at the 1985 UNIFORUM conference in Dallas.
  3635   */
  3636  
  3637  /*----------------------------------------------------------------------------*/
  3638  /* Microsoft compiler generates a lot of warning for self includes... */
  3639  
  3640  #ifdef _MSC_VER
  3641  #pragma warning(push, 1)
  3642  #pragma warning(disable : 4548) /* expression before comma has no effect;      \
  3643                                     expected expression with side - effect */
  3644  #pragma warning(disable : 4530) /* C++ exception handler used, but unwind      \
  3645                                   * semantics are not enabled. Specify /EHsc */
  3646  #pragma warning(disable : 4577) /* 'noexcept' used with no exception handling  \
  3647                                   * mode specified; termination on exception is \
  3648                                   * not guaranteed. Specify /EHsc */
  3649  #if !defined(_CRT_SECURE_NO_WARNINGS)
  3650  #define _CRT_SECURE_NO_WARNINGS
  3651  #endif
  3652  #endif /* _MSC_VER (warnings) */
  3653  
  3654  #include <stdio.h>
  3655  #include <string.h>
  3656  
  3657  #ifdef _MSC_VER
  3658  #pragma warning(pop)
  3659  #endif
  3660  /*----------------------------------------------------------------------------*/
  3661  
  3662  #ifndef NULL
  3663  #define NULL 0
  3664  #endif
  3665  
  3666  #ifndef EOF
  3667  #define EOF (-1)
  3668  #endif
  3669  
  3670  int optind = 1;
  3671  int optopt;
  3672  char *optarg;
  3673  
  3674  int getopt(int argc, char *const argv[], const char *opts) {
  3675    static int sp = 1;
  3676    int c;
  3677    const char *cp;
  3678  
  3679    if (sp == 1) {
  3680      if (optind >= argc || argv[optind][0] != '-' || argv[optind][1] == '\0')
  3681        return EOF;
  3682      else if (strcmp(argv[optind], "--") == 0) {
  3683        optind++;
  3684        return EOF;
  3685      }
  3686    }
  3687    optopt = c = argv[optind][sp];
  3688    if (c == ':' || (cp = strchr(opts, c)) == NULL) {
  3689      fprintf(stderr, "%s: %s -- %c\n", argv[0], "illegal option", c);
  3690      if (argv[optind][++sp] == '\0') {
  3691        optind++;
  3692        sp = 1;
  3693      }
  3694      return '?';
  3695    }
  3696    if (*++cp == ':') {
  3697      if (argv[optind][sp + 1] != '\0')
  3698        optarg = &argv[optind++][sp + 1];
  3699      else if (++optind >= argc) {
  3700        fprintf(stderr, "%s: %s -- %c\n", argv[0], "option requires an argument",
  3701                c);
  3702        sp = 1;
  3703        return '?';
  3704      } else
  3705        optarg = argv[optind++];
  3706      sp = 1;
  3707    } else {
  3708      if (argv[optind][++sp] == '\0') {
  3709        sp = 1;
  3710        optind++;
  3711      }
  3712      optarg = NULL;
  3713    }
  3714    return c;
  3715  }
  3716  
  3717  static volatile BOOL user_break;
  3718  static BOOL WINAPI ConsoleBreakHandlerRoutine(DWORD dwCtrlType) {
  3719    (void)dwCtrlType;
  3720    user_break = 1;
  3721    return true;
  3722  }
  3723  
  3724  static uint64_t GetMilliseconds(void) {
  3725    LARGE_INTEGER Counter, Frequency;
  3726    return (QueryPerformanceFrequency(&Frequency) &&
  3727            QueryPerformanceCounter(&Counter))
  3728               ? Counter.QuadPart * 1000ul / Frequency.QuadPart
  3729               : 0;
  3730  }
  3731  
  3732  #else /* WINDOWS */
  3733  
  3734  static volatile sig_atomic_t user_break;
  3735  static void signal_handler(int sig) {
  3736    (void)sig;
  3737    user_break = 1;
  3738  }
  3739  
  3740  #endif /* !WINDOWS */
  3741  
  3742  #define EXIT_INTERRUPTED (EXIT_FAILURE + 4)
  3743  #define EXIT_FAILURE_SYS (EXIT_FAILURE + 3)
  3744  #define EXIT_FAILURE_MDBX (EXIT_FAILURE + 2)
  3745  #define EXIT_FAILURE_CHECK_MAJOR (EXIT_FAILURE + 1)
  3746  #define EXIT_FAILURE_CHECK_MINOR EXIT_FAILURE
  3747  
  3748  typedef struct {
  3749    const char *name;
  3750    struct {
  3751      uint64_t branch, large_count, large_volume, leaf;
  3752      uint64_t subleaf_dupsort, leaf_dupfixed, subleaf_dupfixed;
  3753      uint64_t total, empty, other;
  3754    } pages;
  3755    uint64_t payload_bytes;
  3756    uint64_t lost_bytes;
  3757  } walk_dbi_t;
  3758  
  3759  struct {
  3760    short *pagemap;
  3761    uint64_t total_payload_bytes;
  3762    uint64_t pgcount;
  3763    walk_dbi_t
  3764        dbi[MDBX_MAX_DBI + CORE_DBS + /* account pseudo-entry for meta */ 1];
  3765  } walk;
  3766  
  3767  #define dbi_free walk.dbi[FREE_DBI]
  3768  #define dbi_main walk.dbi[MAIN_DBI]
  3769  #define dbi_meta walk.dbi[CORE_DBS]
  3770  
  3771  int envflags = MDBX_RDONLY | MDBX_EXCLUSIVE | MDBX_VALIDATION;
  3772  MDBX_env *env;
  3773  MDBX_txn *txn;
  3774  MDBX_envinfo envinfo;
  3775  size_t userdb_count, skipped_subdb;
  3776  uint64_t total_unused_bytes, reclaimable_pages, gc_pages, alloc_pages,
  3777      unused_pages, backed_pages;
  3778  unsigned verbose;
  3779  bool ignore_wrong_order, quiet, dont_traversal;
  3780  const char *only_subdb;
  3781  int stuck_meta = -1;
  3782  
  3783  struct problem {
  3784    struct problem *pr_next;
  3785    size_t count;
  3786    const char *caption;
  3787  };
  3788  
  3789  struct problem *problems_list;
  3790  unsigned total_problems, data_tree_problems, gc_tree_problems;
  3791  
  3792  static void MDBX_PRINTF_ARGS(1, 2) print(const char *msg, ...) {
  3793    if (!quiet) {
  3794      va_list args;
  3795  
  3796      fflush(stderr);
  3797      va_start(args, msg);
  3798      vfprintf(stdout, msg, args);
  3799      va_end(args);
  3800    }
  3801  }
  3802  
  3803  static void va_log(MDBX_log_level_t level, const char *function, int line,
  3804                     const char *msg, va_list args) {
  3805    static const char *const prefixes[] = {
  3806        "!!!fatal: ",       " ! " /* error */,      " ~ " /* warning */,
  3807        "   " /* notice */, "   // " /* verbose */, "   //// " /* debug */,
  3808        "   ////// " /* trace */
  3809    };
  3810  
  3811    FILE *out = stdout;
  3812    if (level <= MDBX_LOG_ERROR) {
  3813      total_problems++;
  3814      out = stderr;
  3815    }
  3816  
  3817    if (!quiet && verbose + 1 >= (unsigned)level &&
  3818        (unsigned)level < ARRAY_LENGTH(prefixes)) {
  3819      fflush(nullptr);
  3820      fputs(prefixes[level], out);
  3821      vfprintf(out, msg, args);
  3822  
  3823      const bool have_lf = msg[strlen(msg) - 1] == '\n';
  3824      if (level == MDBX_LOG_FATAL && function && line)
  3825        fprintf(out, have_lf ? "          %s(), %u\n" : " (%s:%u)\n",
  3826                function + (strncmp(function, "mdbx_", 5) ? 5 : 0), line);
  3827      else if (!have_lf)
  3828        fputc('\n', out);
  3829      fflush(nullptr);
  3830    }
  3831  
  3832    if (level == MDBX_LOG_FATAL) {
  3833  #if !MDBX_DEBUG && !MDBX_FORCE_ASSERTIONS
  3834      exit(EXIT_FAILURE_MDBX);
  3835  #endif
  3836      abort();
  3837    }
  3838  }
  3839  
  3840  static void MDBX_PRINTF_ARGS(1, 2) error(const char *msg, ...) {
  3841    va_list args;
  3842    va_start(args, msg);
  3843    va_log(MDBX_LOG_ERROR, nullptr, 0, msg, args);
  3844    va_end(args);
  3845  }
  3846  
  3847  static void logger(MDBX_log_level_t level, const char *function, int line,
  3848                     const char *msg, va_list args) {
  3849    (void)line;
  3850    (void)function;
  3851    if (level < MDBX_LOG_EXTRA)
  3852      va_log(level, function, line, msg, args);
  3853  }
  3854  
  3855  static int check_user_break(void) {
  3856    switch (user_break) {
  3857    case 0:
  3858      return MDBX_SUCCESS;
  3859    case 1:
  3860      print(" - interrupted by signal\n");
  3861      fflush(nullptr);
  3862      user_break = 2;
  3863    }
  3864    return MDBX_EINTR;
  3865  }
  3866  
  3867  static void pagemap_cleanup(void) {
  3868    for (size_t i = CORE_DBS + /* account pseudo-entry for meta */ 1;
  3869         i < ARRAY_LENGTH(walk.dbi); ++i) {
  3870      if (walk.dbi[i].name) {
  3871        osal_free((void *)walk.dbi[i].name);
  3872        walk.dbi[i].name = nullptr;
  3873      }
  3874    }
  3875  
  3876    osal_free(walk.pagemap);
  3877    walk.pagemap = nullptr;
  3878  }
  3879  
  3880  static walk_dbi_t *pagemap_lookup_dbi(const char *dbi_name, bool silent) {
  3881    static walk_dbi_t *last;
  3882  
  3883    if (dbi_name == MDBX_PGWALK_MAIN)
  3884      return &dbi_main;
  3885    if (dbi_name == MDBX_PGWALK_GC)
  3886      return &dbi_free;
  3887    if (dbi_name == MDBX_PGWALK_META)
  3888      return &dbi_meta;
  3889  
  3890    if (last && strcmp(last->name, dbi_name) == 0)
  3891      return last;
  3892  
  3893    walk_dbi_t *dbi = walk.dbi + CORE_DBS + /* account pseudo-entry for meta */ 1;
  3894    for (; dbi < ARRAY_END(walk.dbi) && dbi->name; ++dbi) {
  3895      if (strcmp(dbi->name, dbi_name) == 0)
  3896        return last = dbi;
  3897    }
  3898  
  3899    if (verbose > 0 && !silent) {
  3900      print(" - found '%s' area\n", dbi_name);
  3901      fflush(nullptr);
  3902    }
  3903  
  3904    if (dbi == ARRAY_END(walk.dbi))
  3905      return nullptr;
  3906  
  3907    dbi->name = osal_strdup(dbi_name);
  3908    return last = dbi;
  3909  }
  3910  
  3911  static void MDBX_PRINTF_ARGS(4, 5)
  3912      problem_add(const char *object, uint64_t entry_number, const char *msg,
  3913                  const char *extra, ...) {
  3914    total_problems++;
  3915  
  3916    if (!quiet) {
  3917      int need_fflush = 0;
  3918      struct problem *p;
  3919  
  3920      for (p = problems_list; p; p = p->pr_next)
  3921        if (p->caption == msg)
  3922          break;
  3923  
  3924      if (!p) {
  3925        p = osal_calloc(1, sizeof(*p));
  3926        if (unlikely(!p))
  3927          return;
  3928        p->caption = msg;
  3929        p->pr_next = problems_list;
  3930        problems_list = p;
  3931        need_fflush = 1;
  3932      }
  3933  
  3934      p->count++;
  3935      if (verbose > 1) {
  3936        print("     %s #%" PRIu64 ": %s", object, entry_number, msg);
  3937        if (extra) {
  3938          va_list args;
  3939          printf(" (");
  3940          va_start(args, extra);
  3941          vfprintf(stdout, extra, args);
  3942          va_end(args);
  3943          printf(")");
  3944        }
  3945        printf("\n");
  3946        if (need_fflush)
  3947          fflush(nullptr);
  3948      }
  3949    }
  3950  }
  3951  
  3952  static struct problem *problems_push(void) {
  3953    struct problem *p = problems_list;
  3954    problems_list = nullptr;
  3955    return p;
  3956  }
  3957  
  3958  static size_t problems_pop(struct problem *list) {
  3959    size_t count = 0;
  3960  
  3961    if (problems_list) {
  3962      int i;
  3963  
  3964      print(" - problems: ");
  3965      for (i = 0; problems_list; ++i) {
  3966        struct problem *p = problems_list->pr_next;
  3967        count += problems_list->count;
  3968        print("%s%s (%" PRIuPTR ")", i ? ", " : "", problems_list->caption,
  3969              problems_list->count);
  3970        osal_free(problems_list);
  3971        problems_list = p;
  3972      }
  3973      print("\n");
  3974      fflush(nullptr);
  3975    }
  3976  
  3977    problems_list = list;
  3978    return count;
  3979  }
  3980  
  3981  static int pgvisitor(const uint64_t pgno, const unsigned pgnumber,
  3982                       void *const ctx, const int deep,
  3983                       const char *const dbi_name_or_tag, const size_t page_size,
  3984                       const MDBX_page_type_t pagetype, const MDBX_error_t err,
  3985                       const size_t nentries, const size_t payload_bytes,
  3986                       const size_t header_bytes, const size_t unused_bytes) {
  3987    (void)ctx;
  3988    const bool is_gc_tree = dbi_name_or_tag == MDBX_PGWALK_GC;
  3989    if (deep > 42) {
  3990      problem_add("deep", deep, "too large", nullptr);
  3991      data_tree_problems += !is_gc_tree;
  3992      gc_tree_problems += is_gc_tree;
  3993      return MDBX_CORRUPTED /* avoid infinite loop/recursion */;
  3994    }
  3995  
  3996    walk_dbi_t *dbi = pagemap_lookup_dbi(dbi_name_or_tag, false);
  3997    if (!dbi) {
  3998      data_tree_problems += !is_gc_tree;
  3999      gc_tree_problems += is_gc_tree;
  4000      return MDBX_ENOMEM;
  4001    }
  4002  
  4003    const size_t page_bytes = payload_bytes + header_bytes + unused_bytes;
  4004    walk.pgcount += pgnumber;
  4005  
  4006    const char *pagetype_caption;
  4007    bool branch = false;
  4008    switch (pagetype) {
  4009    default:
  4010      problem_add("page", pgno, "unknown page-type", "type %u, deep %i",
  4011                  (unsigned)pagetype, deep);
  4012      pagetype_caption = "unknown";
  4013      dbi->pages.other += pgnumber;
  4014      data_tree_problems += !is_gc_tree;
  4015      gc_tree_problems += is_gc_tree;
  4016      break;
  4017    case MDBX_page_broken:
  4018      pagetype_caption = "broken";
  4019      dbi->pages.other += pgnumber;
  4020      data_tree_problems += !is_gc_tree;
  4021      gc_tree_problems += is_gc_tree;
  4022      break;
  4023    case MDBX_subpage_broken:
  4024      pagetype_caption = "broken-subpage";
  4025      data_tree_problems += !is_gc_tree;
  4026      gc_tree_problems += is_gc_tree;
  4027      break;
  4028    case MDBX_page_meta:
  4029      pagetype_caption = "meta";
  4030      dbi->pages.other += pgnumber;
  4031      break;
  4032    case MDBX_page_large:
  4033      pagetype_caption = "large";
  4034      dbi->pages.large_volume += pgnumber;
  4035      dbi->pages.large_count += 1;
  4036      break;
  4037    case MDBX_page_branch:
  4038      pagetype_caption = "branch";
  4039      dbi->pages.branch += pgnumber;
  4040      branch = true;
  4041      break;
  4042    case MDBX_page_leaf:
  4043      pagetype_caption = "leaf";
  4044      dbi->pages.leaf += pgnumber;
  4045      break;
  4046    case MDBX_page_dupfixed_leaf:
  4047      pagetype_caption = "leaf-dupfixed";
  4048      dbi->pages.leaf_dupfixed += pgnumber;
  4049      break;
  4050    case MDBX_subpage_leaf:
  4051      pagetype_caption = "subleaf-dupsort";
  4052      dbi->pages.subleaf_dupsort += 1;
  4053      break;
  4054    case MDBX_subpage_dupfixed_leaf:
  4055      pagetype_caption = "subleaf-dupfixed";
  4056      dbi->pages.subleaf_dupfixed += 1;
  4057      break;
  4058    }
  4059  
  4060    if (pgnumber) {
  4061      if (verbose > 3 && (!only_subdb || strcmp(only_subdb, dbi->name) == 0)) {
  4062        if (pgnumber == 1)
  4063          print("     %s-page %" PRIu64, pagetype_caption, pgno);
  4064        else
  4065          print("     %s-span %" PRIu64 "[%u]", pagetype_caption, pgno, pgnumber);
  4066        print(" of %s: header %" PRIiPTR ", %s %" PRIiPTR ", payload %" PRIiPTR
  4067              ", unused %" PRIiPTR ", deep %i\n",
  4068              dbi->name, header_bytes,
  4069              (pagetype == MDBX_page_branch) ? "keys" : "entries", nentries,
  4070              payload_bytes, unused_bytes, deep);
  4071      }
  4072  
  4073      bool already_used = false;
  4074      for (unsigned n = 0; n < pgnumber; ++n) {
  4075        uint64_t spanpgno = pgno + n;
  4076        if (spanpgno >= alloc_pages) {
  4077          problem_add("page", spanpgno, "wrong page-no",
  4078                      "%s-page: %" PRIu64 " > %" PRIu64 ", deep %i",
  4079                      pagetype_caption, spanpgno, alloc_pages, deep);
  4080          data_tree_problems += !is_gc_tree;
  4081          gc_tree_problems += is_gc_tree;
  4082        } else if (walk.pagemap[spanpgno]) {
  4083          walk_dbi_t *coll_dbi = &walk.dbi[walk.pagemap[spanpgno] - 1];
  4084          problem_add("page", spanpgno,
  4085                      (branch && coll_dbi == dbi) ? "loop" : "already used",
  4086                      "%s-page: by %s, deep %i", pagetype_caption, coll_dbi->name,
  4087                      deep);
  4088          already_used = true;
  4089          data_tree_problems += !is_gc_tree;
  4090          gc_tree_problems += is_gc_tree;
  4091        } else {
  4092          walk.pagemap[spanpgno] = (short)(dbi - walk.dbi + 1);
  4093          dbi->pages.total += 1;
  4094        }
  4095      }
  4096  
  4097      if (already_used)
  4098        return branch ? MDBX_RESULT_TRUE /* avoid infinite loop/recursion */
  4099                      : MDBX_SUCCESS;
  4100    }
  4101  
  4102    if (MDBX_IS_ERROR(err)) {
  4103      problem_add("page", pgno, "invalid/corrupted", "%s-page", pagetype_caption);
  4104      data_tree_problems += !is_gc_tree;
  4105      gc_tree_problems += is_gc_tree;
  4106    } else {
  4107      if (unused_bytes > page_size) {
  4108        problem_add("page", pgno, "illegal unused-bytes",
  4109                    "%s-page: %u < %" PRIuPTR " < %u", pagetype_caption, 0,
  4110                    unused_bytes, envinfo.mi_dxb_pagesize);
  4111        data_tree_problems += !is_gc_tree;
  4112        gc_tree_problems += is_gc_tree;
  4113      }
  4114  
  4115      if (header_bytes < (int)sizeof(long) ||
  4116          (size_t)header_bytes >= envinfo.mi_dxb_pagesize - sizeof(long)) {
  4117        problem_add("page", pgno, "illegal header-length",
  4118                    "%s-page: %" PRIuPTR " < %" PRIuPTR " < %" PRIuPTR,
  4119                    pagetype_caption, sizeof(long), header_bytes,
  4120                    envinfo.mi_dxb_pagesize - sizeof(long));
  4121        data_tree_problems += !is_gc_tree;
  4122        gc_tree_problems += is_gc_tree;
  4123      }
  4124      if (payload_bytes < 1) {
  4125        if (nentries > 1) {
  4126          problem_add("page", pgno, "zero size-of-entry",
  4127                      "%s-page: payload %" PRIuPTR " bytes, %" PRIuPTR " entries",
  4128                      pagetype_caption, payload_bytes, nentries);
  4129          /* if ((size_t)header_bytes + unused_bytes < page_size) {
  4130            // LY: hush a misuse error
  4131            page_bytes = page_size;
  4132          } */
  4133          data_tree_problems += !is_gc_tree;
  4134          gc_tree_problems += is_gc_tree;
  4135        } else {
  4136          problem_add("page", pgno, "empty",
  4137                      "%s-page: payload %" PRIuPTR " bytes, %" PRIuPTR
  4138                      " entries, deep %i",
  4139                      pagetype_caption, payload_bytes, nentries, deep);
  4140          dbi->pages.empty += 1;
  4141          data_tree_problems += !is_gc_tree;
  4142          gc_tree_problems += is_gc_tree;
  4143        }
  4144      }
  4145  
  4146      if (pgnumber) {
  4147        if (page_bytes != page_size) {
  4148          problem_add("page", pgno, "misused",
  4149                      "%s-page: %" PRIuPTR " != %" PRIuPTR " (%" PRIuPTR
  4150                      "h + %" PRIuPTR "p + %" PRIuPTR "u), deep %i",
  4151                      pagetype_caption, page_size, page_bytes, header_bytes,
  4152                      payload_bytes, unused_bytes, deep);
  4153          if (page_size > page_bytes)
  4154            dbi->lost_bytes += page_size - page_bytes;
  4155          data_tree_problems += !is_gc_tree;
  4156          gc_tree_problems += is_gc_tree;
  4157        } else {
  4158          dbi->payload_bytes += payload_bytes + header_bytes;
  4159          walk.total_payload_bytes += payload_bytes + header_bytes;
  4160        }
  4161      }
  4162    }
  4163  
  4164    return check_user_break();
  4165  }
  4166  
  4167  typedef int(visitor)(const uint64_t record_number, const MDBX_val *key,
  4168                       const MDBX_val *data);
  4169  static int process_db(MDBX_dbi dbi_handle, char *dbi_name, visitor *handler,
  4170                        bool silent);
  4171  
  4172  static int handle_userdb(const uint64_t record_number, const MDBX_val *key,
  4173                           const MDBX_val *data) {
  4174    (void)record_number;
  4175    (void)key;
  4176    (void)data;
  4177    return check_user_break();
  4178  }
  4179  
  4180  static int handle_freedb(const uint64_t record_number, const MDBX_val *key,
  4181                           const MDBX_val *data) {
  4182    char *bad = "";
  4183    pgno_t *iptr = data->iov_base;
  4184  
  4185    if (key->iov_len != sizeof(txnid_t))
  4186      problem_add("entry", record_number, "wrong txn-id size",
  4187                  "key-size %" PRIiPTR, key->iov_len);
  4188    else {
  4189      txnid_t txnid;
  4190      memcpy(&txnid, key->iov_base, sizeof(txnid));
  4191      if (txnid < 1 || txnid > envinfo.mi_recent_txnid)
  4192        problem_add("entry", record_number, "wrong txn-id", "%" PRIaTXN, txnid);
  4193      else {
  4194        if (data->iov_len < sizeof(pgno_t) || data->iov_len % sizeof(pgno_t))
  4195          problem_add("entry", txnid, "wrong idl size", "%" PRIuPTR,
  4196                      data->iov_len);
  4197        size_t number = (data->iov_len >= sizeof(pgno_t)) ? *iptr++ : 0;
  4198        if (number < 1 || number > MDBX_PGL_LIMIT)
  4199          problem_add("entry", txnid, "wrong idl length", "%" PRIuPTR, number);
  4200        else if ((number + 1) * sizeof(pgno_t) > data->iov_len) {
  4201          problem_add("entry", txnid, "trimmed idl",
  4202                      "%" PRIuSIZE " > %" PRIuSIZE " (corruption)",
  4203                      (number + 1) * sizeof(pgno_t), data->iov_len);
  4204          number = data->iov_len / sizeof(pgno_t) - 1;
  4205        } else if (data->iov_len - (number + 1) * sizeof(pgno_t) >=
  4206                   /* LY: allow gap up to one page. it is ok
  4207                    * and better than shink-and-retry inside update_gc() */
  4208                   envinfo.mi_dxb_pagesize)
  4209          problem_add("entry", txnid, "extra idl space",
  4210                      "%" PRIuSIZE " < %" PRIuSIZE " (minor, not a trouble)",
  4211                      (number + 1) * sizeof(pgno_t), data->iov_len);
  4212  
  4213        gc_pages += number;
  4214        if (envinfo.mi_latter_reader_txnid > txnid)
  4215          reclaimable_pages += number;
  4216  
  4217        pgno_t prev = MDBX_PNL_ASCENDING ? NUM_METAS - 1 : txn->mt_next_pgno;
  4218        pgno_t span = 1;
  4219        for (unsigned i = 0; i < number; ++i) {
  4220          if (check_user_break())
  4221            return MDBX_EINTR;
  4222          const pgno_t pgno = iptr[i];
  4223          if (pgno < NUM_METAS)
  4224            problem_add("entry", txnid, "wrong idl entry",
  4225                        "pgno %" PRIaPGNO " < meta-pages %u", pgno, NUM_METAS);
  4226          else if (pgno >= backed_pages)
  4227            problem_add("entry", txnid, "wrong idl entry",
  4228                        "pgno %" PRIaPGNO " > backed-pages %" PRIu64, pgno,
  4229                        backed_pages);
  4230          else if (pgno >= alloc_pages)
  4231            problem_add("entry", txnid, "wrong idl entry",
  4232                        "pgno %" PRIaPGNO " > alloc-pages %" PRIu64, pgno,
  4233                        alloc_pages - 1);
  4234          else {
  4235            if (MDBX_PNL_DISORDERED(prev, pgno)) {
  4236              bad = " [bad sequence]";
  4237              problem_add("entry", txnid, "bad sequence",
  4238                          "%" PRIaPGNO " %c [%u].%" PRIaPGNO, prev,
  4239                          (prev == pgno) ? '=' : (MDBX_PNL_ASCENDING ? '>' : '<'),
  4240                          i, pgno);
  4241            }
  4242            if (walk.pagemap) {
  4243              int idx = walk.pagemap[pgno];
  4244              if (idx == 0)
  4245                walk.pagemap[pgno] = -1;
  4246              else if (idx > 0)
  4247                problem_add("page", pgno, "already used", "by %s",
  4248                            walk.dbi[idx - 1].name);
  4249              else
  4250                problem_add("page", pgno, "already listed in GC", nullptr);
  4251            }
  4252          }
  4253          prev = pgno;
  4254          while (i + span < number &&
  4255                 iptr[i + span] == (MDBX_PNL_ASCENDING ? pgno_add(pgno, span)
  4256                                                       : pgno_sub(pgno, span)))
  4257            ++span;
  4258        }
  4259        if (verbose > 3 && !only_subdb) {
  4260          print("     transaction %" PRIaTXN ", %" PRIuPTR
  4261                " pages, maxspan %" PRIaPGNO "%s\n",
  4262                txnid, number, span, bad);
  4263          if (verbose > 4) {
  4264            for (unsigned i = 0; i < number; i += span) {
  4265              const pgno_t pgno = iptr[i];
  4266              for (span = 1;
  4267                   i + span < number &&
  4268                   iptr[i + span] == (MDBX_PNL_ASCENDING ? pgno_add(pgno, span)
  4269                                                         : pgno_sub(pgno, span));
  4270                   ++span)
  4271                ;
  4272              if (span > 1) {
  4273                print("    %9" PRIaPGNO "[%" PRIaPGNO "]\n", pgno, span);
  4274              } else
  4275                print("    %9" PRIaPGNO "\n", pgno);
  4276            }
  4277          }
  4278        }
  4279      }
  4280    }
  4281  
  4282    return check_user_break();
  4283  }
  4284  
  4285  static int equal_or_greater(const MDBX_val *a, const MDBX_val *b) {
  4286    return (a->iov_len == b->iov_len &&
  4287            memcmp(a->iov_base, b->iov_base, a->iov_len) == 0)
  4288               ? 0
  4289               : 1;
  4290  }
  4291  
  4292  static int handle_maindb(const uint64_t record_number, const MDBX_val *key,
  4293                           const MDBX_val *data) {
  4294    char *name;
  4295    int rc;
  4296    size_t i;
  4297  
  4298    name = key->iov_base;
  4299    for (i = 0; i < key->iov_len; ++i) {
  4300      if (name[i] < ' ')
  4301        return handle_userdb(record_number, key, data);
  4302    }
  4303  
  4304    name = osal_malloc(key->iov_len + 1);
  4305    if (unlikely(!name))
  4306      return MDBX_ENOMEM;
  4307    memcpy(name, key->iov_base, key->iov_len);
  4308    name[key->iov_len] = '\0';
  4309    userdb_count++;
  4310  
  4311    rc = process_db(~0u, name, handle_userdb, false);
  4312    osal_free(name);
  4313    if (rc != MDBX_INCOMPATIBLE)
  4314      return rc;
  4315  
  4316    return handle_userdb(record_number, key, data);
  4317  }
  4318  
  4319  static const char *db_flags2keymode(unsigned flags) {
  4320    flags &= (MDBX_REVERSEKEY | MDBX_INTEGERKEY);
  4321    switch (flags) {
  4322    case 0:
  4323      return "usual";
  4324    case MDBX_REVERSEKEY:
  4325      return "reserve";
  4326    case MDBX_INTEGERKEY:
  4327      return "ordinal";
  4328    case MDBX_REVERSEKEY | MDBX_INTEGERKEY:
  4329      return "msgpack";
  4330    default:
  4331      assert(false);
  4332      __unreachable();
  4333    }
  4334  }
  4335  
  4336  static const char *db_flags2valuemode(unsigned flags) {
  4337    flags &= (MDBX_DUPSORT | MDBX_REVERSEDUP | MDBX_DUPFIXED | MDBX_INTEGERDUP);
  4338    switch (flags) {
  4339    case 0:
  4340      return "single";
  4341    case MDBX_DUPSORT:
  4342      return "multi";
  4343    case MDBX_REVERSEDUP:
  4344    case MDBX_DUPSORT | MDBX_REVERSEDUP:
  4345      return "multi-reverse";
  4346    case MDBX_DUPFIXED:
  4347    case MDBX_DUPSORT | MDBX_DUPFIXED:
  4348      return "multi-samelength";
  4349    case MDBX_DUPFIXED | MDBX_REVERSEDUP:
  4350    case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_REVERSEDUP:
  4351      return "multi-reverse-samelength";
  4352    case MDBX_INTEGERDUP:
  4353    case MDBX_DUPSORT | MDBX_INTEGERDUP:
  4354    case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP:
  4355    case MDBX_DUPFIXED | MDBX_INTEGERDUP:
  4356      return "multi-ordinal";
  4357    case MDBX_INTEGERDUP | MDBX_REVERSEDUP:
  4358    case MDBX_DUPSORT | MDBX_INTEGERDUP | MDBX_REVERSEDUP:
  4359      return "multi-msgpack";
  4360    case MDBX_DUPFIXED | MDBX_INTEGERDUP | MDBX_REVERSEDUP:
  4361    case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP | MDBX_REVERSEDUP:
  4362      return "reserved";
  4363    default:
  4364      assert(false);
  4365      __unreachable();
  4366    }
  4367  }
  4368  
  4369  static int process_db(MDBX_dbi dbi_handle, char *dbi_name, visitor *handler,
  4370                        bool silent) {
  4371    MDBX_cursor *mc;
  4372    MDBX_stat ms;
  4373    MDBX_val key, data;
  4374    MDBX_val prev_key, prev_data;
  4375    unsigned flags;
  4376    int rc, i;
  4377    struct problem *saved_list;
  4378    uint64_t problems_count;
  4379  
  4380    uint64_t record_count = 0, dups = 0;
  4381    uint64_t key_bytes = 0, data_bytes = 0;
  4382  
  4383    if ((MDBX_TXN_FINISHED | MDBX_TXN_ERROR) & mdbx_txn_flags(txn)) {
  4384      print(" ! abort processing '%s' due to a previous error\n",
  4385            dbi_name ? dbi_name : "@MAIN");
  4386      return MDBX_BAD_TXN;
  4387    }
  4388  
  4389    if (dbi_handle == ~0u) {
  4390      rc = mdbx_dbi_open_ex(
  4391          txn, dbi_name, MDBX_DB_ACCEDE, &dbi_handle,
  4392          (dbi_name && ignore_wrong_order) ? equal_or_greater : nullptr,
  4393          (dbi_name && ignore_wrong_order) ? equal_or_greater : nullptr);
  4394      if (rc) {
  4395        if (!dbi_name ||
  4396            rc !=
  4397                MDBX_INCOMPATIBLE) /* LY: mainDB's record is not a user's DB. */ {
  4398          error("mdbx_dbi_open('%s') failed, error %d %s\n",
  4399                dbi_name ? dbi_name : "main", rc, mdbx_strerror(rc));
  4400        }
  4401        return rc;
  4402      }
  4403    }
  4404  
  4405    if (dbi_handle >= CORE_DBS && dbi_name && only_subdb &&
  4406        strcmp(only_subdb, dbi_name) != 0) {
  4407      if (verbose) {
  4408        print("Skip processing '%s'...\n", dbi_name);
  4409        fflush(nullptr);
  4410      }
  4411      skipped_subdb++;
  4412      return MDBX_SUCCESS;
  4413    }
  4414  
  4415    if (!silent && verbose) {
  4416      print("Processing '%s'...\n", dbi_name ? dbi_name : "@MAIN");
  4417      fflush(nullptr);
  4418    }
  4419  
  4420    rc = mdbx_dbi_flags(txn, dbi_handle, &flags);
  4421    if (rc) {
  4422      error("mdbx_dbi_flags() failed, error %d %s\n", rc, mdbx_strerror(rc));
  4423      return rc;
  4424    }
  4425  
  4426    rc = mdbx_dbi_stat(txn, dbi_handle, &ms, sizeof(ms));
  4427    if (rc) {
  4428      error("mdbx_dbi_stat() failed, error %d %s\n", rc, mdbx_strerror(rc));
  4429      return rc;
  4430    }
  4431  
  4432    if (!silent && verbose) {
  4433      print(" - key-value kind: %s-key => %s-value", db_flags2keymode(flags),
  4434            db_flags2valuemode(flags));
  4435      if (verbose > 1) {
  4436        print(", flags:");
  4437        if (!flags)
  4438          print(" none");
  4439        else {
  4440          for (i = 0; dbflags[i].bit; i++)
  4441            if (flags & dbflags[i].bit)
  4442              print(" %s", dbflags[i].name);
  4443        }
  4444        if (verbose > 2)
  4445          print(" (0x%02X), dbi-id %d", flags, dbi_handle);
  4446      }
  4447      print("\n");
  4448      if (ms.ms_mod_txnid)
  4449        print(" - last modification txn#%" PRIu64 "\n", ms.ms_mod_txnid);
  4450      if (verbose > 1) {
  4451        print(" - page size %u, entries %" PRIu64 "\n", ms.ms_psize,
  4452              ms.ms_entries);
  4453        print(" - b-tree depth %u, pages: branch %" PRIu64 ", leaf %" PRIu64
  4454              ", overflow %" PRIu64 "\n",
  4455              ms.ms_depth, ms.ms_branch_pages, ms.ms_leaf_pages,
  4456              ms.ms_overflow_pages);
  4457      }
  4458    }
  4459  
  4460    walk_dbi_t *dbi = (dbi_handle < CORE_DBS)
  4461                          ? &walk.dbi[dbi_handle]
  4462                          : pagemap_lookup_dbi(dbi_name, true);
  4463    if (!dbi) {
  4464      error("too many DBIs or out of memory\n");
  4465      return MDBX_ENOMEM;
  4466    }
  4467    if (!dont_traversal) {
  4468      const uint64_t subtotal_pages =
  4469          ms.ms_branch_pages + ms.ms_leaf_pages + ms.ms_overflow_pages;
  4470      if (subtotal_pages != dbi->pages.total)
  4471        error("%s pages mismatch (%" PRIu64 " != walked %" PRIu64 ")\n",
  4472              "subtotal", subtotal_pages, dbi->pages.total);
  4473      if (ms.ms_branch_pages != dbi->pages.branch)
  4474        error("%s pages mismatch (%" PRIu64 " != walked %" PRIu64 ")\n", "branch",
  4475              ms.ms_branch_pages, dbi->pages.branch);
  4476      const uint64_t allleaf_pages = dbi->pages.leaf + dbi->pages.leaf_dupfixed;
  4477      if (ms.ms_leaf_pages != allleaf_pages)
  4478        error("%s pages mismatch (%" PRIu64 " != walked %" PRIu64 ")\n",
  4479              "all-leaf", ms.ms_leaf_pages, allleaf_pages);
  4480      if (ms.ms_overflow_pages != dbi->pages.large_volume)
  4481        error("%s pages mismatch (%" PRIu64 " != walked %" PRIu64 ")\n",
  4482              "large/overlow", ms.ms_overflow_pages, dbi->pages.large_volume);
  4483    }
  4484    rc = mdbx_cursor_open(txn, dbi_handle, &mc);
  4485    if (rc) {
  4486      error("mdbx_cursor_open() failed, error %d %s\n", rc, mdbx_strerror(rc));
  4487      return rc;
  4488    }
  4489  
  4490    if (ignore_wrong_order) { /* for debugging with enabled assertions */
  4491      mc->mc_checking |= CC_SKIPORD;
  4492      if (mc->mc_xcursor)
  4493        mc->mc_xcursor->mx_cursor.mc_checking |= CC_SKIPORD;
  4494    }
  4495  
  4496    const size_t maxkeysize = mdbx_env_get_maxkeysize_ex(env, flags);
  4497    saved_list = problems_push();
  4498    prev_key.iov_base = nullptr;
  4499    prev_key.iov_len = 0;
  4500    prev_data.iov_base = nullptr;
  4501    prev_data.iov_len = 0;
  4502    rc = mdbx_cursor_get(mc, &key, &data, MDBX_FIRST);
  4503    while (rc == MDBX_SUCCESS) {
  4504      rc = check_user_break();
  4505      if (rc)
  4506        goto bailout;
  4507  
  4508      bool bad_key = false;
  4509      if (key.iov_len > maxkeysize) {
  4510        problem_add("entry", record_count, "key length exceeds max-key-size",
  4511                    "%" PRIuPTR " > %" PRIuPTR, key.iov_len, maxkeysize);
  4512        bad_key = true;
  4513      } else if ((flags & MDBX_INTEGERKEY) && key.iov_len != sizeof(uint64_t) &&
  4514                 key.iov_len != sizeof(uint32_t)) {
  4515        problem_add("entry", record_count, "wrong key length",
  4516                    "%" PRIuPTR " != 4or8", key.iov_len);
  4517        bad_key = true;
  4518      }
  4519  
  4520      bool bad_data = false;
  4521      if ((flags & MDBX_INTEGERDUP) && data.iov_len != sizeof(uint64_t) &&
  4522          data.iov_len != sizeof(uint32_t)) {
  4523        problem_add("entry", record_count, "wrong data length",
  4524                    "%" PRIuPTR " != 4or8", data.iov_len);
  4525        bad_data = true;
  4526      }
  4527  
  4528      if (prev_key.iov_base) {
  4529        if (prev_data.iov_base && !bad_data && (flags & MDBX_DUPFIXED) &&
  4530            prev_data.iov_len != data.iov_len) {
  4531          problem_add("entry", record_count, "different data length",
  4532                      "%" PRIuPTR " != %" PRIuPTR, prev_data.iov_len,
  4533                      data.iov_len);
  4534          bad_data = true;
  4535        }
  4536  
  4537        if (!bad_key) {
  4538          int cmp = mdbx_cmp(txn, dbi_handle, &key, &prev_key);
  4539          if (cmp == 0) {
  4540            ++dups;
  4541            if ((flags & MDBX_DUPSORT) == 0) {
  4542              problem_add("entry", record_count, "duplicated entries", nullptr);
  4543              if (prev_data.iov_base && data.iov_len == prev_data.iov_len &&
  4544                  memcmp(data.iov_base, prev_data.iov_base, data.iov_len) == 0) {
  4545                problem_add("entry", record_count, "complete duplicate", nullptr);
  4546              }
  4547            } else if (!bad_data && prev_data.iov_base) {
  4548              cmp = mdbx_dcmp(txn, dbi_handle, &data, &prev_data);
  4549              if (cmp == 0) {
  4550                problem_add("entry", record_count, "complete duplicate", nullptr);
  4551              } else if (cmp < 0 && !ignore_wrong_order) {
  4552                problem_add("entry", record_count, "wrong order of multi-values",
  4553                            nullptr);
  4554              }
  4555            }
  4556          } else if (cmp < 0 && !ignore_wrong_order) {
  4557            problem_add("entry", record_count, "wrong order of entries", nullptr);
  4558          }
  4559        }
  4560      }
  4561  
  4562      if (handler) {
  4563        rc = handler(record_count, &key, &data);
  4564        if (MDBX_IS_ERROR(rc))
  4565          goto bailout;
  4566      }
  4567  
  4568      record_count++;
  4569      key_bytes += key.iov_len;
  4570      data_bytes += data.iov_len;
  4571  
  4572      if (!bad_key) {
  4573        if (verbose && (flags & MDBX_INTEGERKEY) && !prev_key.iov_base)
  4574          print(" - fixed key-size %" PRIuPTR "\n", key.iov_len);
  4575        prev_key = key;
  4576      }
  4577      if (!bad_data) {
  4578        if (verbose && (flags & (MDBX_INTEGERDUP | MDBX_DUPFIXED)) &&
  4579            !prev_data.iov_base)
  4580          print(" - fixed data-size %" PRIuPTR "\n", data.iov_len);
  4581        prev_data = data;
  4582      }
  4583      rc = mdbx_cursor_get(mc, &key, &data, MDBX_NEXT);
  4584    }
  4585    if (rc != MDBX_NOTFOUND)
  4586      error("mdbx_cursor_get() failed, error %d %s\n", rc, mdbx_strerror(rc));
  4587    else
  4588      rc = 0;
  4589  
  4590    if (record_count != ms.ms_entries)
  4591      problem_add("entry", record_count, "different number of entries",
  4592                  "%" PRIu64 " != %" PRIu64, record_count, ms.ms_entries);
  4593  bailout:
  4594    problems_count = problems_pop(saved_list);
  4595    if (!silent && verbose) {
  4596      print(" - summary: %" PRIu64 " records, %" PRIu64 " dups, %" PRIu64
  4597            " key's bytes, %" PRIu64 " data's "
  4598            "bytes, %" PRIu64 " problems\n",
  4599            record_count, dups, key_bytes, data_bytes, problems_count);
  4600      fflush(nullptr);
  4601    }
  4602  
  4603    mdbx_cursor_close(mc);
  4604    return (rc || problems_count) ? MDBX_RESULT_TRUE : MDBX_SUCCESS;
  4605  }
  4606  
  4607  static void usage(char *prog) {
  4608    fprintf(stderr,
  4609            "usage: %s [-V] [-v] [-q] [-c] [-0|1|2] [-w] [-d] [-i] [-s subdb] "
  4610            "dbpath\n"
  4611            "  -V\t\tprint version and exit\n"
  4612            "  -v\t\tmore verbose, could be used multiple times\n"
  4613            "  -q\t\tbe quiet\n"
  4614            "  -c\t\tforce cooperative mode (don't try exclusive)\n"
  4615            "  -w\t\twrite-mode checking\n"
  4616            "  -d\t\tdisable page-by-page traversal of B-tree\n"
  4617            "  -i\t\tignore wrong order errors (for custom comparators case)\n"
  4618            "  -s subdb\tprocess a specific subdatabase only\n"
  4619            "  -0|1|2\tforce using specific meta-page 0, or 2 for checking\n"
  4620            "  -t\t\tturn to a specified meta-page on successful check\n"
  4621            "  -T\t\tturn to a specified meta-page EVEN ON UNSUCCESSFUL CHECK!\n",
  4622            prog);
  4623    exit(EXIT_INTERRUPTED);
  4624  }
  4625  
  4626  static bool meta_ot(txnid_t txn_a, uint64_t sign_a, txnid_t txn_b,
  4627                      uint64_t sign_b, const bool wanna_steady) {
  4628    if (txn_a == txn_b)
  4629      return SIGN_IS_STEADY(sign_b);
  4630  
  4631    if (wanna_steady && SIGN_IS_STEADY(sign_a) != SIGN_IS_STEADY(sign_b))
  4632      return SIGN_IS_STEADY(sign_b);
  4633  
  4634    return txn_a < txn_b;
  4635  }
  4636  
  4637  static bool meta_eq(txnid_t txn_a, uint64_t sign_a, txnid_t txn_b,
  4638                      uint64_t sign_b) {
  4639    if (!txn_a || txn_a != txn_b)
  4640      return false;
  4641  
  4642    if (SIGN_IS_STEADY(sign_a) != SIGN_IS_STEADY(sign_b))
  4643      return false;
  4644  
  4645    return true;
  4646  }
  4647  
  4648  static int meta_recent(const bool wanna_steady) {
  4649    if (meta_ot(envinfo.mi_meta0_txnid, envinfo.mi_meta0_sign,
  4650                envinfo.mi_meta1_txnid, envinfo.mi_meta1_sign, wanna_steady))
  4651      return meta_ot(envinfo.mi_meta2_txnid, envinfo.mi_meta2_sign,
  4652                     envinfo.mi_meta1_txnid, envinfo.mi_meta1_sign, wanna_steady)
  4653                 ? 1
  4654                 : 2;
  4655    else
  4656      return meta_ot(envinfo.mi_meta0_txnid, envinfo.mi_meta0_sign,
  4657                     envinfo.mi_meta2_txnid, envinfo.mi_meta2_sign, wanna_steady)
  4658                 ? 2
  4659                 : 0;
  4660  }
  4661  
  4662  static int meta_tail(int head) {
  4663    switch (head) {
  4664    case 0:
  4665      return meta_ot(envinfo.mi_meta1_txnid, envinfo.mi_meta1_sign,
  4666                     envinfo.mi_meta2_txnid, envinfo.mi_meta2_sign, true)
  4667                 ? 1
  4668                 : 2;
  4669    case 1:
  4670      return meta_ot(envinfo.mi_meta0_txnid, envinfo.mi_meta0_sign,
  4671                     envinfo.mi_meta2_txnid, envinfo.mi_meta2_sign, true)
  4672                 ? 0
  4673                 : 2;
  4674    case 2:
  4675      return meta_ot(envinfo.mi_meta0_txnid, envinfo.mi_meta0_sign,
  4676                     envinfo.mi_meta1_txnid, envinfo.mi_meta1_sign, true)
  4677                 ? 0
  4678                 : 1;
  4679    default:
  4680      assert(false);
  4681      return -1;
  4682    }
  4683  }
  4684  
  4685  static int meta_head(void) { return meta_recent(false); }
  4686  
  4687  void verbose_meta(int num, txnid_t txnid, uint64_t sign, uint64_t bootid_x,
  4688                    uint64_t bootid_y) {
  4689    const bool have_bootid = (bootid_x | bootid_y) != 0;
  4690    const bool bootid_match = bootid_x == envinfo.mi_bootid.current.x &&
  4691                              bootid_y == envinfo.mi_bootid.current.y;
  4692  
  4693    print(" - meta-%d: ", num);
  4694    switch (sign) {
  4695    case MDBX_DATASIGN_NONE:
  4696      print("no-sync/legacy");
  4697      break;
  4698    case MDBX_DATASIGN_WEAK:
  4699      print("weak-%s", bootid_match ? (have_bootid ? "intact (same boot-id)"
  4700                                                   : "unknown (no boot-id")
  4701                                    : "dead");
  4702      break;
  4703    default:
  4704      print("steady");
  4705      break;
  4706    }
  4707    print(" txn#%" PRIu64, txnid);
  4708  
  4709    const int head = meta_head();
  4710    if (num == head)
  4711      print(", head");
  4712    else if (num == meta_tail(head))
  4713      print(", tail");
  4714    else
  4715      print(", stay");
  4716  
  4717    if (stuck_meta >= 0) {
  4718      if (num == stuck_meta)
  4719        print(", forced for checking");
  4720    } else if (txnid > envinfo.mi_recent_txnid &&
  4721               (envflags & (MDBX_EXCLUSIVE | MDBX_RDONLY)) == MDBX_EXCLUSIVE)
  4722      print(", rolled-back %" PRIu64 " (%" PRIu64 " >>> %" PRIu64 ")",
  4723            txnid - envinfo.mi_recent_txnid, txnid, envinfo.mi_recent_txnid);
  4724    print("\n");
  4725  }
  4726  
  4727  static uint64_t get_meta_txnid(const unsigned meta_id) {
  4728    switch (meta_id) {
  4729    default:
  4730      assert(false);
  4731      error("unexpected meta_id %u\n", meta_id);
  4732      return 0;
  4733    case 0:
  4734      return envinfo.mi_meta0_txnid;
  4735    case 1:
  4736      return envinfo.mi_meta1_txnid;
  4737    case 2:
  4738      return envinfo.mi_meta2_txnid;
  4739    }
  4740  }
  4741  
  4742  static void print_size(const char *prefix, const uint64_t value,
  4743                         const char *suffix) {
  4744    const char sf[] =
  4745        "KMGTPEZY"; /* LY: Kilo, Mega, Giga, Tera, Peta, Exa, Zetta, Yotta! */
  4746    double k = 1024.0;
  4747    size_t i;
  4748    for (i = 0; sf[i + 1] && value / k > 1000.0; ++i)
  4749      k *= 1024;
  4750    print("%s%" PRIu64 " (%.2f %cb)%s", prefix, value, value / k, sf[i], suffix);
  4751  }
  4752  
  4753  int mdbx_chk(int argc, char *argv[]) {
  4754    int rc;
  4755    char *prog = argv[0];
  4756    char *envname;
  4757    unsigned problems_maindb = 0, problems_freedb = 0, problems_meta = 0;
  4758    bool write_locked = false;
  4759    bool turn_meta = false;
  4760    bool force_turn_meta = false;
  4761  
  4762    double elapsed;
  4763  #if defined(_WIN32) || defined(_WIN64)
  4764    uint64_t timestamp_start, timestamp_finish;
  4765    timestamp_start = GetMilliseconds();
  4766  #else
  4767    struct timespec timestamp_start, timestamp_finish;
  4768    if (clock_gettime(CLOCK_MONOTONIC, &timestamp_start)) {
  4769      rc = errno;
  4770      error("clock_gettime() failed, error %d %s\n", rc, mdbx_strerror(rc));
  4771      return EXIT_FAILURE_SYS;
  4772    }
  4773  #endif
  4774  
  4775    dbi_meta.name = "@META";
  4776    dbi_free.name = "@GC";
  4777    dbi_main.name = "@MAIN";
  4778    atexit(pagemap_cleanup);
  4779  
  4780    if (argc < 2)
  4781      usage(prog);
  4782  
  4783    for (int i; (i = getopt(argc, argv,
  4784                            "0"
  4785                            "1"
  4786                            "2"
  4787                            "T"
  4788                            "V"
  4789                            "v"
  4790                            "q"
  4791                            "n"
  4792                            "w"
  4793                            "c"
  4794                            "t"
  4795                            "d"
  4796                            "i"
  4797                            "s:")) != EOF;) {
  4798      switch (i) {
  4799      case 'V':
  4800        printf("mdbx_chk version %d.%d.%d.%d\n"
  4801               " - source: %s %s, commit %s, tree %s\n"
  4802               " - anchor: %s\n"
  4803               " - build: %s for %s by %s\n"
  4804               " - flags: %s\n"
  4805               " - options: %s\n",
  4806               mdbx_version.major, mdbx_version.minor, mdbx_version.release,
  4807               mdbx_version.revision, mdbx_version.git.describe,
  4808               mdbx_version.git.datetime, mdbx_version.git.commit,
  4809               mdbx_version.git.tree, mdbx_sourcery_anchor, mdbx_build.datetime,
  4810               mdbx_build.target, mdbx_build.compiler, mdbx_build.flags,
  4811               mdbx_build.options);
  4812        return EXIT_SUCCESS;
  4813      case 'v':
  4814        verbose++;
  4815        break;
  4816      case '0':
  4817        stuck_meta = 0;
  4818        break;
  4819      case '1':
  4820        stuck_meta = 1;
  4821        break;
  4822      case '2':
  4823        stuck_meta = 2;
  4824        break;
  4825      case 't':
  4826        turn_meta = true;
  4827        break;
  4828      case 'T':
  4829        turn_meta = force_turn_meta = true;
  4830        quiet = false;
  4831        if (verbose < 2)
  4832          verbose = 2;
  4833        break;
  4834      case 'q':
  4835        quiet = true;
  4836        break;
  4837      case 'n':
  4838        break;
  4839      case 'w':
  4840        envflags &= ~MDBX_RDONLY;
  4841  #if MDBX_MMAP_INCOHERENT_FILE_WRITE
  4842        /* Temporary `workaround` for OpenBSD kernel's flaw.
  4843         * See todo4recovery://erased_by_github/libmdbx/issues/67 */
  4844        envflags |= MDBX_WRITEMAP;
  4845  #endif /* MDBX_MMAP_INCOHERENT_FILE_WRITE */
  4846        break;
  4847      case 'c':
  4848        envflags = (envflags & ~MDBX_EXCLUSIVE) | MDBX_ACCEDE;
  4849        break;
  4850      case 'd':
  4851        dont_traversal = true;
  4852        break;
  4853      case 's':
  4854        if (only_subdb && strcmp(only_subdb, optarg))
  4855          usage(prog);
  4856        only_subdb = optarg;
  4857        break;
  4858      case 'i':
  4859        ignore_wrong_order = true;
  4860        break;
  4861      default:
  4862        usage(prog);
  4863      }
  4864    }
  4865  
  4866    if (optind != argc - 1)
  4867      usage(prog);
  4868  
  4869    rc = MDBX_SUCCESS;
  4870    if (stuck_meta >= 0 && (envflags & MDBX_EXCLUSIVE) == 0) {
  4871      error("exclusive mode is required to using specific meta-page(%d) for "
  4872            "checking.\n",
  4873            stuck_meta);
  4874      rc = EXIT_INTERRUPTED;
  4875    }
  4876    if (turn_meta) {
  4877      if (stuck_meta < 0) {
  4878        error("meta-page must be specified (by -0, -1 or -2 options) to turn to "
  4879              "it.\n");
  4880        rc = EXIT_INTERRUPTED;
  4881      }
  4882      if (envflags & MDBX_RDONLY) {
  4883        error("write-mode must be enabled to turn to the specified meta-page.\n");
  4884        rc = EXIT_INTERRUPTED;
  4885      }
  4886      if (only_subdb || dont_traversal) {
  4887        error("whole database checking with tree-traversal are required to turn "
  4888              "to the specified meta-page.\n");
  4889        rc = EXIT_INTERRUPTED;
  4890      }
  4891    }
  4892    if (rc)
  4893      exit(rc);
  4894  
  4895  #if defined(_WIN32) || defined(_WIN64)
  4896    SetConsoleCtrlHandler(ConsoleBreakHandlerRoutine, true);
  4897  #else
  4898  #ifdef SIGPIPE
  4899    signal(SIGPIPE, signal_handler);
  4900  #endif
  4901  #ifdef SIGHUP
  4902    signal(SIGHUP, signal_handler);
  4903  #endif
  4904    signal(SIGINT, signal_handler);
  4905    signal(SIGTERM, signal_handler);
  4906  #endif /* !WINDOWS */
  4907  
  4908    envname = argv[optind];
  4909    print("mdbx_chk %s (%s, T-%s)\nRunning for %s in 'read-%s' mode...\n",
  4910          mdbx_version.git.describe, mdbx_version.git.datetime,
  4911          mdbx_version.git.tree, envname,
  4912          (envflags & MDBX_RDONLY) ? "only" : "write");
  4913    fflush(nullptr);
  4914    mdbx_setup_debug((verbose < MDBX_LOG_TRACE - 1)
  4915                         ? (MDBX_log_level_t)(verbose + 1)
  4916                         : MDBX_LOG_TRACE,
  4917                     MDBX_DBG_DUMP | MDBX_DBG_ASSERT | MDBX_DBG_AUDIT |
  4918                         MDBX_DBG_LEGACY_OVERLAP | MDBX_DBG_DONT_UPGRADE,
  4919                     logger);
  4920  
  4921    rc = mdbx_env_create(&env);
  4922    if (rc) {
  4923      error("mdbx_env_create() failed, error %d %s\n", rc, mdbx_strerror(rc));
  4924      return rc < 0 ? EXIT_FAILURE_MDBX : EXIT_FAILURE_SYS;
  4925    }
  4926  
  4927    rc = mdbx_env_set_maxdbs(env, MDBX_MAX_DBI);
  4928    if (rc) {
  4929      error("mdbx_env_set_maxdbs() failed, error %d %s\n", rc, mdbx_strerror(rc));
  4930      goto bailout;
  4931    }
  4932  
  4933    if (stuck_meta >= 0) {
  4934      rc = mdbx_env_open_for_recovery(env, envname, stuck_meta,
  4935                                      (envflags & MDBX_RDONLY) ? false : true);
  4936    } else {
  4937      rc = mdbx_env_open(env, envname, envflags, 0);
  4938      if ((envflags & MDBX_EXCLUSIVE) &&
  4939          (rc == MDBX_BUSY ||
  4940  #if defined(_WIN32) || defined(_WIN64)
  4941           rc == ERROR_LOCK_VIOLATION || rc == ERROR_SHARING_VIOLATION
  4942  #else
  4943           rc == EBUSY || rc == EAGAIN
  4944  #endif
  4945           )) {
  4946        envflags &= ~MDBX_EXCLUSIVE;
  4947        rc = mdbx_env_open(env, envname, envflags | MDBX_ACCEDE, 0);
  4948      }
  4949    }
  4950  
  4951    if (rc) {
  4952      error("mdbx_env_open() failed, error %d %s\n", rc, mdbx_strerror(rc));
  4953      if (rc == MDBX_WANNA_RECOVERY && (envflags & MDBX_RDONLY))
  4954        print("Please run %s in the read-write mode (with '-w' option).\n", prog);
  4955      goto bailout;
  4956    }
  4957    if (verbose)
  4958      print(" - %s mode\n",
  4959            (envflags & MDBX_EXCLUSIVE) ? "monopolistic" : "cooperative");
  4960  
  4961    if ((envflags & (MDBX_RDONLY | MDBX_EXCLUSIVE)) == 0) {
  4962      rc = mdbx_txn_lock(env, false);
  4963      if (rc != MDBX_SUCCESS) {
  4964        error("mdbx_txn_lock() failed, error %d %s\n", rc, mdbx_strerror(rc));
  4965        goto bailout;
  4966      }
  4967      write_locked = true;
  4968    }
  4969  
  4970    rc = mdbx_txn_begin(env, nullptr, MDBX_TXN_RDONLY, &txn);
  4971    if (rc) {
  4972      error("mdbx_txn_begin() failed, error %d %s\n", rc, mdbx_strerror(rc));
  4973      goto bailout;
  4974    }
  4975  
  4976    rc = mdbx_env_info_ex(env, txn, &envinfo, sizeof(envinfo));
  4977    if (rc) {
  4978      error("mdbx_env_info_ex() failed, error %d %s\n", rc, mdbx_strerror(rc));
  4979      goto bailout;
  4980    }
  4981    if (verbose) {
  4982      print(" - current boot-id ");
  4983      if (envinfo.mi_bootid.current.x | envinfo.mi_bootid.current.y)
  4984        print("%016" PRIx64 "-%016" PRIx64 "\n", envinfo.mi_bootid.current.x,
  4985              envinfo.mi_bootid.current.y);
  4986      else
  4987        print("unavailable\n");
  4988    }
  4989  
  4990    mdbx_filehandle_t dxb_fd;
  4991    rc = mdbx_env_get_fd(env, &dxb_fd);
  4992    if (rc) {
  4993      error("mdbx_env_get_fd() failed, error %d %s\n", rc, mdbx_strerror(rc));
  4994      goto bailout;
  4995    }
  4996  
  4997    uint64_t dxb_filesize = 0;
  4998  #if defined(_WIN32) || defined(_WIN64)
  4999    {
  5000      BY_HANDLE_FILE_INFORMATION info;
  5001      if (!GetFileInformationByHandle(dxb_fd, &info))
  5002        rc = GetLastError();
  5003      else
  5004        dxb_filesize = info.nFileSizeLow | (uint64_t)info.nFileSizeHigh << 32;
  5005    }
  5006  #else
  5007    {
  5008      struct stat st;
  5009      STATIC_ASSERT_MSG(sizeof(off_t) <= sizeof(uint64_t),
  5010                        "libmdbx requires 64-bit file I/O on 64-bit systems");
  5011      if (fstat(dxb_fd, &st))
  5012        rc = errno;
  5013      else
  5014        dxb_filesize = st.st_size;
  5015    }
  5016  #endif
  5017    if (rc) {
  5018      error("osal_filesize() failed, error %d %s\n", rc, mdbx_strerror(rc));
  5019      goto bailout;
  5020    }
  5021  
  5022    errno = 0;
  5023    const uint64_t dxbfile_pages = dxb_filesize / envinfo.mi_dxb_pagesize;
  5024    alloc_pages = txn->mt_next_pgno;
  5025    backed_pages = envinfo.mi_geo.current / envinfo.mi_dxb_pagesize;
  5026    if (backed_pages > dxbfile_pages) {
  5027      print(" ! backed-pages %" PRIu64 " > file-pages %" PRIu64 "\n",
  5028            backed_pages, dxbfile_pages);
  5029      ++problems_meta;
  5030    }
  5031    if (dxbfile_pages < NUM_METAS)
  5032      print(" ! file-pages %" PRIu64 " < %u\n", dxbfile_pages, NUM_METAS);
  5033    if (backed_pages < NUM_METAS)
  5034      print(" ! backed-pages %" PRIu64 " < %u\n", backed_pages, NUM_METAS);
  5035    if (backed_pages < NUM_METAS || dxbfile_pages < NUM_METAS)
  5036      goto bailout;
  5037    if (backed_pages > MAX_PAGENO + 1) {
  5038      print(" ! backed-pages %" PRIu64 " > max-pages %" PRIaPGNO "\n",
  5039            backed_pages, MAX_PAGENO + 1);
  5040      ++problems_meta;
  5041      backed_pages = MAX_PAGENO + 1;
  5042    }
  5043  
  5044    if ((envflags & (MDBX_EXCLUSIVE | MDBX_RDONLY)) != MDBX_RDONLY) {
  5045      if (backed_pages > dxbfile_pages) {
  5046        print(" ! backed-pages %" PRIu64 " > file-pages %" PRIu64 "\n",
  5047              backed_pages, dxbfile_pages);
  5048        ++problems_meta;
  5049        backed_pages = dxbfile_pages;
  5050      }
  5051      if (alloc_pages > backed_pages) {
  5052        print(" ! alloc-pages %" PRIu64 " > backed-pages %" PRIu64 "\n",
  5053              alloc_pages, backed_pages);
  5054        ++problems_meta;
  5055        alloc_pages = backed_pages;
  5056      }
  5057    } else {
  5058      /* LY: DB may be shrinked by writer down to the allocated pages. */
  5059      if (alloc_pages > backed_pages) {
  5060        print(" ! alloc-pages %" PRIu64 " > backed-pages %" PRIu64 "\n",
  5061              alloc_pages, backed_pages);
  5062        ++problems_meta;
  5063        alloc_pages = backed_pages;
  5064      }
  5065      if (alloc_pages > dxbfile_pages) {
  5066        print(" ! alloc-pages %" PRIu64 " > file-pages %" PRIu64 "\n",
  5067              alloc_pages, dxbfile_pages);
  5068        ++problems_meta;
  5069        alloc_pages = dxbfile_pages;
  5070      }
  5071      if (backed_pages > dxbfile_pages)
  5072        backed_pages = dxbfile_pages;
  5073    }
  5074  
  5075    if (verbose) {
  5076      print(" - pagesize %u (%u system), max keysize %d..%d"
  5077            ", max readers %u\n",
  5078            envinfo.mi_dxb_pagesize, envinfo.mi_sys_pagesize,
  5079            mdbx_env_get_maxkeysize_ex(env, MDBX_DUPSORT),
  5080            mdbx_env_get_maxkeysize_ex(env, 0), envinfo.mi_maxreaders);
  5081      print_size(" - mapsize ", envinfo.mi_mapsize, "\n");
  5082      if (envinfo.mi_geo.lower == envinfo.mi_geo.upper)
  5083        print_size(" - fixed datafile: ", envinfo.mi_geo.current, "");
  5084      else {
  5085        print_size(" - dynamic datafile: ", envinfo.mi_geo.lower, "");
  5086        print_size(" .. ", envinfo.mi_geo.upper, ", ");
  5087        print_size("+", envinfo.mi_geo.grow, ", ");
  5088        print_size("-", envinfo.mi_geo.shrink, "\n");
  5089        print_size(" - current datafile: ", envinfo.mi_geo.current, "");
  5090      }
  5091      printf(", %" PRIu64 " pages\n",
  5092             envinfo.mi_geo.current / envinfo.mi_dxb_pagesize);
  5093  #if defined(_WIN32) || defined(_WIN64)
  5094      if (envinfo.mi_geo.shrink && envinfo.mi_geo.current != envinfo.mi_geo.upper)
  5095        print(
  5096            "                     WARNING: Due Windows system limitations a "
  5097            "file couldn't\n                     be truncated while the database "
  5098            "is opened. So, the size\n                     database file "
  5099            "of may by large than the database itself,\n                     "
  5100            "until it will be closed or reopened in read-write mode.\n");
  5101  #endif
  5102      verbose_meta(0, envinfo.mi_meta0_txnid, envinfo.mi_meta0_sign,
  5103                   envinfo.mi_bootid.meta0.x, envinfo.mi_bootid.meta0.y);
  5104      verbose_meta(1, envinfo.mi_meta1_txnid, envinfo.mi_meta1_sign,
  5105                   envinfo.mi_bootid.meta1.x, envinfo.mi_bootid.meta1.y);
  5106      verbose_meta(2, envinfo.mi_meta2_txnid, envinfo.mi_meta2_sign,
  5107                   envinfo.mi_bootid.meta2.x, envinfo.mi_bootid.meta2.y);
  5108    }
  5109  
  5110    if (stuck_meta >= 0) {
  5111      if (verbose) {
  5112        print(" - skip checking meta-pages since the %u"
  5113              " is selected for verification\n",
  5114              stuck_meta);
  5115        print(" - transactions: recent %" PRIu64
  5116              ", selected for verification %" PRIu64 ", lag %" PRIi64 "\n",
  5117              envinfo.mi_recent_txnid, get_meta_txnid(stuck_meta),
  5118              envinfo.mi_recent_txnid - get_meta_txnid(stuck_meta));
  5119      }
  5120    } else {
  5121      if (verbose > 1)
  5122        print(" - performs check for meta-pages clashes\n");
  5123      if (meta_eq(envinfo.mi_meta0_txnid, envinfo.mi_meta0_sign,
  5124                  envinfo.mi_meta1_txnid, envinfo.mi_meta1_sign)) {
  5125        print(" ! meta-%d and meta-%d are clashed\n", 0, 1);
  5126        ++problems_meta;
  5127      }
  5128      if (meta_eq(envinfo.mi_meta1_txnid, envinfo.mi_meta1_sign,
  5129                  envinfo.mi_meta2_txnid, envinfo.mi_meta2_sign)) {
  5130        print(" ! meta-%d and meta-%d are clashed\n", 1, 2);
  5131        ++problems_meta;
  5132      }
  5133      if (meta_eq(envinfo.mi_meta2_txnid, envinfo.mi_meta2_sign,
  5134                  envinfo.mi_meta0_txnid, envinfo.mi_meta0_sign)) {
  5135        print(" ! meta-%d and meta-%d are clashed\n", 2, 0);
  5136        ++problems_meta;
  5137      }
  5138  
  5139      const unsigned steady_meta_id = meta_recent(true);
  5140      const uint64_t steady_meta_txnid = get_meta_txnid(steady_meta_id);
  5141      const unsigned weak_meta_id = meta_recent(false);
  5142      const uint64_t weak_meta_txnid = get_meta_txnid(weak_meta_id);
  5143      if (envflags & MDBX_EXCLUSIVE) {
  5144        if (verbose > 1)
  5145          print(" - performs full check recent-txn-id with meta-pages\n");
  5146        if (steady_meta_txnid != envinfo.mi_recent_txnid) {
  5147          print(" ! steady meta-%d txn-id mismatch recent-txn-id (%" PRIi64
  5148                " != %" PRIi64 ")\n",
  5149                steady_meta_id, steady_meta_txnid, envinfo.mi_recent_txnid);
  5150          ++problems_meta;
  5151        }
  5152      } else if (write_locked) {
  5153        if (verbose > 1)
  5154          print(" - performs lite check recent-txn-id with meta-pages (not a "
  5155                "monopolistic mode)\n");
  5156        if (weak_meta_txnid != envinfo.mi_recent_txnid) {
  5157          print(" ! weak meta-%d txn-id mismatch recent-txn-id (%" PRIi64
  5158                " != %" PRIi64 ")\n",
  5159                weak_meta_id, weak_meta_txnid, envinfo.mi_recent_txnid);
  5160          ++problems_meta;
  5161        }
  5162      } else if (verbose) {
  5163        print(" - skip check recent-txn-id with meta-pages (monopolistic or "
  5164              "read-write mode only)\n");
  5165      }
  5166      total_problems += problems_meta;
  5167  
  5168      if (verbose)
  5169        print(" - transactions: recent %" PRIu64 ", latter reader %" PRIu64
  5170              ", lag %" PRIi64 "\n",
  5171              envinfo.mi_recent_txnid, envinfo.mi_latter_reader_txnid,
  5172              envinfo.mi_recent_txnid - envinfo.mi_latter_reader_txnid);
  5173    }
  5174  
  5175    if (!dont_traversal) {
  5176      struct problem *saved_list;
  5177      size_t traversal_problems;
  5178      uint64_t empty_pages, lost_bytes;
  5179  
  5180      print("Traversal b-tree by txn#%" PRIaTXN "...\n", txn->mt_txnid);
  5181      fflush(nullptr);
  5182      walk.pagemap = osal_calloc((size_t)backed_pages, sizeof(*walk.pagemap));
  5183      if (!walk.pagemap) {
  5184        rc = errno ? errno : MDBX_ENOMEM;
  5185        error("calloc() failed, error %d %s\n", rc, mdbx_strerror(rc));
  5186        goto bailout;
  5187      }
  5188  
  5189      saved_list = problems_push();
  5190      rc = mdbx_env_pgwalk(txn, pgvisitor, nullptr,
  5191                           true /* always skip key ordering checking to avoid
  5192                                 MDBX_CORRUPTED when using custom comparators */);
  5193      traversal_problems = problems_pop(saved_list);
  5194  
  5195      if (rc) {
  5196        if (rc != MDBX_EINTR || !check_user_break())
  5197          error("mdbx_env_pgwalk() failed, error %d %s\n", rc, mdbx_strerror(rc));
  5198        goto bailout;
  5199      }
  5200  
  5201      for (uint64_t n = 0; n < alloc_pages; ++n)
  5202        if (!walk.pagemap[n])
  5203          unused_pages += 1;
  5204  
  5205      empty_pages = lost_bytes = 0;
  5206      for (walk_dbi_t *dbi = &dbi_main; dbi < ARRAY_END(walk.dbi) && dbi->name;
  5207           ++dbi) {
  5208        empty_pages += dbi->pages.empty;
  5209        lost_bytes += dbi->lost_bytes;
  5210      }
  5211  
  5212      if (verbose) {
  5213        uint64_t total_page_bytes = walk.pgcount * envinfo.mi_dxb_pagesize;
  5214        print(" - pages: walked %" PRIu64 ", left/unused %" PRIu64 "\n",
  5215              walk.pgcount, unused_pages);
  5216        if (verbose > 1) {
  5217          for (walk_dbi_t *dbi = walk.dbi; dbi < ARRAY_END(walk.dbi) && dbi->name;
  5218               ++dbi) {
  5219            print("     %s: subtotal %" PRIu64, dbi->name, dbi->pages.total);
  5220            if (dbi->pages.other && dbi->pages.other != dbi->pages.total)
  5221              print(", other %" PRIu64, dbi->pages.other);
  5222            if (dbi->pages.branch)
  5223              print(", branch %" PRIu64, dbi->pages.branch);
  5224            if (dbi->pages.large_count)
  5225              print(", large %" PRIu64, dbi->pages.large_count);
  5226            uint64_t all_leaf = dbi->pages.leaf + dbi->pages.leaf_dupfixed;
  5227            if (all_leaf) {
  5228              print(", leaf %" PRIu64, all_leaf);
  5229              if (verbose > 2 &&
  5230                  (dbi->pages.subleaf_dupsort | dbi->pages.leaf_dupfixed |
  5231                   dbi->pages.subleaf_dupfixed))
  5232                print(" (usual %" PRIu64 ", sub-dupsort %" PRIu64
  5233                      ", dupfixed %" PRIu64 ", sub-dupfixed %" PRIu64 ")",
  5234                      dbi->pages.leaf, dbi->pages.subleaf_dupsort,
  5235                      dbi->pages.leaf_dupfixed, dbi->pages.subleaf_dupfixed);
  5236            }
  5237            print("\n");
  5238          }
  5239        }
  5240  
  5241        if (verbose > 1)
  5242          print(" - usage: total %" PRIu64 " bytes, payload %" PRIu64
  5243                " (%.1f%%), unused "
  5244                "%" PRIu64 " (%.1f%%)\n",
  5245                total_page_bytes, walk.total_payload_bytes,
  5246                walk.total_payload_bytes * 100.0 / total_page_bytes,
  5247                total_page_bytes - walk.total_payload_bytes,
  5248                (total_page_bytes - walk.total_payload_bytes) * 100.0 /
  5249                    total_page_bytes);
  5250        if (verbose > 2) {
  5251          for (walk_dbi_t *dbi = walk.dbi; dbi < ARRAY_END(walk.dbi) && dbi->name;
  5252               ++dbi)
  5253            if (dbi->pages.total) {
  5254              uint64_t dbi_bytes = dbi->pages.total * envinfo.mi_dxb_pagesize;
  5255              print("     %s: subtotal %" PRIu64 " bytes (%.1f%%),"
  5256                    " payload %" PRIu64 " (%.1f%%), unused %" PRIu64 " (%.1f%%)",
  5257                    dbi->name, dbi_bytes, dbi_bytes * 100.0 / total_page_bytes,
  5258                    dbi->payload_bytes, dbi->payload_bytes * 100.0 / dbi_bytes,
  5259                    dbi_bytes - dbi->payload_bytes,
  5260                    (dbi_bytes - dbi->payload_bytes) * 100.0 / dbi_bytes);
  5261              if (dbi->pages.empty)
  5262                print(", %" PRIu64 " empty pages", dbi->pages.empty);
  5263              if (dbi->lost_bytes)
  5264                print(", %" PRIu64 " bytes lost", dbi->lost_bytes);
  5265              print("\n");
  5266            } else
  5267              print("     %s: empty\n", dbi->name);
  5268        }
  5269        print(" - summary: average fill %.1f%%",
  5270              walk.total_payload_bytes * 100.0 / total_page_bytes);
  5271        if (empty_pages)
  5272          print(", %" PRIu64 " empty pages", empty_pages);
  5273        if (lost_bytes)
  5274          print(", %" PRIu64 " bytes lost", lost_bytes);
  5275        print(", %" PRIuPTR " problems\n", traversal_problems);
  5276      }
  5277    } else if (verbose) {
  5278      print("Skipping b-tree walk...\n");
  5279      fflush(nullptr);
  5280    }
  5281  
  5282    if (!verbose)
  5283      print("Iterating DBIs...\n");
  5284    if (data_tree_problems) {
  5285      print("Skip processing %s since tree is corrupted (%u problems)\n", "@MAIN",
  5286            data_tree_problems);
  5287      problems_maindb = data_tree_problems;
  5288    } else
  5289      problems_maindb = process_db(~0u, /* MAIN_DBI */ nullptr, nullptr, false);
  5290  
  5291    if (gc_tree_problems) {
  5292      print("Skip processing %s since tree is corrupted (%u problems)\n", "@GC",
  5293            gc_tree_problems);
  5294      problems_freedb = gc_tree_problems;
  5295    } else
  5296      problems_freedb = process_db(FREE_DBI, "@GC", handle_freedb, false);
  5297  
  5298    if (verbose) {
  5299      uint64_t value = envinfo.mi_mapsize / envinfo.mi_dxb_pagesize;
  5300      double percent = value / 100.0;
  5301      print(" - space: %" PRIu64 " total pages", value);
  5302      print(", backed %" PRIu64 " (%.1f%%)", backed_pages,
  5303            backed_pages / percent);
  5304      print(", allocated %" PRIu64 " (%.1f%%)", alloc_pages,
  5305            alloc_pages / percent);
  5306  
  5307      if (verbose > 1) {
  5308        value = envinfo.mi_mapsize / envinfo.mi_dxb_pagesize - alloc_pages;
  5309        print(", remained %" PRIu64 " (%.1f%%)", value, value / percent);
  5310  
  5311        value = dont_traversal ? alloc_pages - gc_pages : walk.pgcount;
  5312        print(", used %" PRIu64 " (%.1f%%)", value, value / percent);
  5313  
  5314        print(", gc %" PRIu64 " (%.1f%%)", gc_pages, gc_pages / percent);
  5315  
  5316        value = gc_pages - reclaimable_pages;
  5317        print(", detained %" PRIu64 " (%.1f%%)", value, value / percent);
  5318  
  5319        print(", reclaimable %" PRIu64 " (%.1f%%)", reclaimable_pages,
  5320              reclaimable_pages / percent);
  5321      }
  5322  
  5323      value = envinfo.mi_mapsize / envinfo.mi_dxb_pagesize - alloc_pages +
  5324              reclaimable_pages;
  5325      print(", available %" PRIu64 " (%.1f%%)\n", value, value / percent);
  5326    }
  5327  
  5328    if (problems_maindb == 0 && problems_freedb == 0) {
  5329      if (!dont_traversal &&
  5330          (envflags & (MDBX_EXCLUSIVE | MDBX_RDONLY)) != MDBX_RDONLY) {
  5331        if (walk.pgcount != alloc_pages - gc_pages) {
  5332          error("used pages mismatch (%" PRIu64 "(walked) != %" PRIu64
  5333                "(allocated - GC))\n",
  5334                walk.pgcount, alloc_pages - gc_pages);
  5335        }
  5336        if (unused_pages != gc_pages) {
  5337          error("gc pages mismatch (%" PRIu64 "(expected) != %" PRIu64 "(GC))\n",
  5338                unused_pages, gc_pages);
  5339        }
  5340      } else if (verbose) {
  5341        print(" - skip check used and gc pages (btree-traversal with "
  5342              "monopolistic or read-write mode only)\n");
  5343      }
  5344  
  5345      if (!process_db(MAIN_DBI, nullptr, handle_maindb, true)) {
  5346        if (!userdb_count && verbose)
  5347          print(" - does not contain multiple databases\n");
  5348      }
  5349    }
  5350  
  5351    if (rc == 0 && total_problems == 1 && problems_meta == 1 && !dont_traversal &&
  5352        (envflags & MDBX_RDONLY) == 0 && !only_subdb && stuck_meta < 0 &&
  5353        get_meta_txnid(meta_recent(true)) < envinfo.mi_recent_txnid) {
  5354      print("Perform sync-to-disk for make steady checkpoint at txn-id #%" PRIi64
  5355            "\n",
  5356            envinfo.mi_recent_txnid);
  5357      fflush(nullptr);
  5358      if (write_locked) {
  5359        mdbx_txn_unlock(env);
  5360        write_locked = false;
  5361      }
  5362      rc = mdbx_env_sync_ex(env, true, false);
  5363      if (rc != MDBX_SUCCESS)
  5364        error("mdbx_env_pgwalk() failed, error %d %s\n", rc, mdbx_strerror(rc));
  5365      else {
  5366        total_problems -= 1;
  5367        problems_meta -= 1;
  5368      }
  5369    }
  5370  
  5371    if (turn_meta && stuck_meta >= 0 && !dont_traversal && !only_subdb &&
  5372        (envflags & (MDBX_RDONLY | MDBX_EXCLUSIVE)) == MDBX_EXCLUSIVE) {
  5373      const bool successful_check = (rc | total_problems | problems_meta) == 0;
  5374      if (successful_check || force_turn_meta) {
  5375        fflush(nullptr);
  5376        print(" = Performing turn to the specified meta-page (%d) due to %s!\n",
  5377              stuck_meta,
  5378              successful_check ? "successful check" : "the -T option was given");
  5379        fflush(nullptr);
  5380        rc = mdbx_env_turn_for_recovery(env, stuck_meta);
  5381        if (rc != MDBX_SUCCESS)
  5382          error("mdbx_env_turn_for_recovery() failed, error %d %s\n", rc,
  5383                mdbx_strerror(rc));
  5384      } else {
  5385        print(" = Skipping turn to the specified meta-page (%d) due to "
  5386              "unsuccessful check!\n",
  5387              stuck_meta);
  5388      }
  5389    }
  5390  
  5391  bailout:
  5392    if (txn)
  5393      mdbx_txn_abort(txn);
  5394    if (write_locked) {
  5395      mdbx_txn_unlock(env);
  5396      write_locked = false;
  5397    }
  5398    if (env) {
  5399      const bool dont_sync = rc != 0 || total_problems;
  5400      mdbx_env_close_ex(env, dont_sync);
  5401    }
  5402    fflush(nullptr);
  5403    if (rc) {
  5404      if (rc < 0)
  5405        return user_break ? EXIT_INTERRUPTED : EXIT_FAILURE_SYS;
  5406      return EXIT_FAILURE_MDBX;
  5407    }
  5408  
  5409  #if defined(_WIN32) || defined(_WIN64)
  5410    timestamp_finish = GetMilliseconds();
  5411    elapsed = (timestamp_finish - timestamp_start) * 1e-3;
  5412  #else
  5413    if (clock_gettime(CLOCK_MONOTONIC, &timestamp_finish)) {
  5414      rc = errno;
  5415      error("clock_gettime() failed, error %d %s\n", rc, mdbx_strerror(rc));
  5416      return EXIT_FAILURE_SYS;
  5417    }
  5418    elapsed = timestamp_finish.tv_sec - timestamp_start.tv_sec +
  5419              (timestamp_finish.tv_nsec - timestamp_start.tv_nsec) * 1e-9;
  5420  #endif /* !WINDOWS */
  5421  
  5422    if (total_problems) {
  5423      print("Total %u error%s detected, elapsed %.3f seconds.\n", total_problems,
  5424            (total_problems > 1) ? "s are" : " is", elapsed);
  5425      if (problems_meta || problems_maindb || problems_freedb)
  5426        return EXIT_FAILURE_CHECK_MAJOR;
  5427      return EXIT_FAILURE_CHECK_MINOR;
  5428    }
  5429    print("No error is detected, elapsed %.3f seconds\n", elapsed);
  5430    return EXIT_SUCCESS;
  5431  }