github.com/moontrade/nogc@v0.1.7/collections/wormhole/wh.c (about)

     1  /*
     2   * Copyright (c) 2016--2021  Wu, Xingbo <wuxb45@gmail.com>
     3   *
     4   * All rights reserved. No warranty, explicit or implicit, provided.
     5   */
     6  #define _GNU_SOURCE
     7  
     8  // headers {{{
     9  #include <assert.h> // static_assert
    10  #include "lib.h"
    11  #include "ctypes.h"
    12  #include "kv.h"
    13  #include "wh.h"
    14  // }}} headers
    15  
    16  // def {{{
    17  #define WH_HMAPINIT_SIZE ((1u << 12)) // 10: 16KB/64KB  12: 64KB/256KB  14: 256KB/1MB
    18  #define WH_SLABMETA_SIZE ((1lu << 21)) // 2MB
    19  
    20  #ifndef HEAPCHECKING
    21  #define WH_SLABLEAF_SIZE ((1lu << 21)) // 2MB is ok
    22  #else
    23  #define WH_SLABLEAF_SIZE ((1lu << 21)) // 2MB for valgrind
    24  #endif
    25  
    26  #define WH_KPN ((128u)) // keys per node; power of 2
    27  #define WH_HDIV (((1u << 16)) / WH_KPN)
    28  #define WH_MID ((WH_KPN >> 1)) // ideal cut point for split, the closer the better
    29  #define WH_BKT_NR ((8))
    30  #define WH_KPN2 ((WH_KPN + WH_KPN))
    31  
    32  #define WH_KPN_MRG (((WH_KPN + WH_MID) >> 1 )) // 3/4
    33  
    34  // FO is fixed at 256. Don't change it
    35  #define WH_FO  ((256u)) // index fan-out
    36  // number of bits in a bitmap
    37  #define WH_BMNR ((WH_FO >> 6)) // number of u64
    38  // }}} def
    39  
    40  // struct {{{
    41  struct wormmeta {
    42    struct entry13 k13; // kref+klen
    43    struct entry13 l13; // lmost+bitmin+bitmax
    44    struct entry13 r13; // rmost+hash32_lo
    45    struct entry13 p13; // lpath+hash32_hi
    46    u64 bitmap[0]; // 4 if bitmin != bitmax
    47  };
    48  static_assert(sizeof(struct wormmeta) == 32, "sizeof(wormmeta) != 32");
    49  
    50  struct wormkv64 { u64 key; void * ptr; }; // u64 keys (whu64)
    51  
    52  struct wormleaf {
    53    // first line
    54    rwlock leaflock;
    55    spinlock sortlock; // to protect the seemingly "read-only" iter_seek
    56    au64 lv; // version (dont use the first u64)
    57    struct wormleaf * prev; // prev leaf
    58    struct wormleaf * next; // next leaf
    59    struct kv * anchor;
    60  
    61    u32 nr_sorted;
    62    u32 nr_keys;
    63    u64 reserved[2];
    64  
    65    struct entry13 hs[WH_KPN]; // sorted by hashes
    66    u8 ss[WH_KPN]; // sorted by keys
    67  };
    68  
    69  struct wormslot { u16 t[WH_BKT_NR]; };
    70  static_assert(sizeof(struct wormslot) == 16, "sizeof(wormslot) != 16");
    71  
    72  struct wormmbkt { struct wormmeta * e[WH_BKT_NR]; };
    73  static_assert(sizeof(struct wormmbkt) == 64, "sizeof(wormmbkt) != 64");
    74  
    75  struct wormhmap {
    76    au64 hv;
    77    struct wormslot * wmap;
    78    struct wormmbkt * pmap;
    79    u32 mask;
    80    u32 maxplen;
    81    u64 msize;
    82  
    83    struct slab * slab1;
    84    struct slab * slab2;
    85    struct kv * pbuf;
    86  };
    87  static_assert(sizeof(struct wormhmap) == 64, "sizeof(wormhmap) != 64");
    88  
    89  struct wormhole {
    90    // 1 line
    91    union {
    92      volatile au64 hmap_ptr; // safe
    93      struct wormhmap * hmap; // unsafe
    94    };
    95    u64 padding0[6];
    96    struct wormleaf * leaf0; // usually not used
    97    // 1 line
    98    struct kvmap_mm mm;
    99    struct qsbr * qsbr;
   100    struct slab * slab_leaf;
   101    struct kv * pbuf;
   102    u32 leaftype;
   103    u32 padding1;
   104    // 2 lines
   105    struct wormhmap hmap2[2];
   106    // fifth line
   107    rwlock metalock;
   108    u32 padding2[15];
   109  };
   110  
   111  struct wormhole_iter {
   112    struct wormref * ref; // safe-iter only
   113    struct wormhole * map;
   114    struct wormleaf * leaf;
   115    u32 is;
   116  };
   117  
   118  struct wormref {
   119    struct wormhole * map;
   120    struct qsbr_ref qref;
   121  };
   122  // }}} struct
   123  
   124  // helpers {{{
   125  
   126  // meta {{{
   127    static inline struct kv *
   128  wormmeta_keyref_load(const struct wormmeta * const meta)
   129  {
   130    return u64_to_ptr(meta->k13.e3);
   131  }
   132  
   133    static inline u16
   134  wormmeta_klen_load(const struct wormmeta * const meta)
   135  {
   136    return meta->k13.e1;
   137  }
   138  
   139    static inline struct wormleaf *
   140  wormmeta_lmost_load(const struct wormmeta * const meta)
   141  {
   142    return u64_to_ptr(meta->l13.e3 & (~0x3flu));
   143  }
   144  
   145    static inline u32
   146  wormmeta_bitmin_load(const struct wormmeta * const meta)
   147  {
   148    return (u32)(meta->l13.v64 & 0x1fflu);
   149  }
   150  
   151    static inline u32
   152  wormmeta_bitmax_load(const struct wormmeta * const meta)
   153  {
   154    return (u32)((meta->l13.v64 >> 9) & 0x1fflu);
   155  }
   156  
   157    static inline u32
   158  wormmeta_hash32_load(const struct wormmeta * const meta)
   159  {
   160    return ((u32)meta->r13.e1) | (((u32)meta->p13.e1) << 16);
   161  }
   162  
   163    static inline struct wormleaf *
   164  wormmeta_rmost_load(const struct wormmeta * const meta)
   165  {
   166    return u64_to_ptr(meta->r13.e3);
   167  }
   168  
   169    static inline struct wormleaf *
   170  wormmeta_lpath_load(const struct wormmeta * const meta)
   171  {
   172    return u64_to_ptr(meta->p13.e3);
   173  }
   174  
   175  // internal
   176    static inline void
   177  wormmeta_lpath_store(struct wormmeta * const meta, struct wormleaf * const leaf)
   178  {
   179    entry13_update_e3(&meta->p13, ptr_to_u64(leaf));
   180  }
   181  
   182  // also updates leaf_klen_eq and
   183    static inline void
   184  wormmeta_lmost_store(struct wormmeta * const meta, struct wormleaf * const leaf)
   185  {
   186    const u64 minmax = meta->l13.v64 & 0x3fffflu;
   187    meta->l13.v64 = (((u64)leaf) << 16) | minmax;
   188  
   189    const bool leaf_klen_eq = leaf->anchor->klen == wormmeta_klen_load(meta);
   190    wormmeta_lpath_store(meta, leaf_klen_eq ? leaf : leaf->prev);
   191  }
   192  
   193    static inline void
   194  wormmeta_bitmin_store(struct wormmeta * const meta, const u32 bitmin)
   195  {
   196    meta->l13.v64 = (meta->l13.v64 & (~0x1fflu)) | bitmin;
   197  }
   198  
   199    static inline void
   200  wormmeta_bitmax_store(struct wormmeta * const meta, const u32 bitmax)
   201  {
   202    meta->l13.v64 = (meta->l13.v64 & (~0x3fe00lu)) | (bitmax << 9);
   203  }
   204  
   205    static inline void
   206  wormmeta_rmost_store(struct wormmeta * const meta, struct wormleaf * const leaf)
   207  {
   208    entry13_update_e3(&meta->r13, ptr_to_u64(leaf));
   209  }
   210  
   211  // for wormmeta_alloc
   212    static void
   213  wormmeta_init(struct wormmeta * const meta, struct wormleaf * const lrmost,
   214      struct kv * const keyref, const u32 alen, const u32 bit)
   215  {
   216    keyref->refcnt++; // shared
   217  
   218    const u32 plen = keyref->klen;
   219    debug_assert(plen <= UINT16_MAX);
   220    meta->k13 = entry13((u16)plen, ptr_to_u64(keyref));
   221    meta->l13.v64 = (ptr_to_u64(lrmost) << 16) | (bit << 9) | bit;
   222  
   223    const u32 hash32 = keyref->hashlo;
   224    meta->r13 = entry13((u16)hash32, ptr_to_u64(lrmost));
   225  
   226    const bool leaf_klen_eq = alen == plen;
   227    meta->p13 = entry13((u16)(hash32 >> 16), ptr_to_u64(leaf_klen_eq ? lrmost : lrmost->prev));
   228  }
   229  // }}} meta
   230  
   231  // meta-bitmap {{{
   232    static inline bool
   233  wormmeta_bm_test(const struct wormmeta * const meta, const u32 id)
   234  {
   235    debug_assert(id < WH_FO);
   236    const u32 bitmin = wormmeta_bitmin_load(meta);
   237    const u32 bitmax = wormmeta_bitmax_load(meta);
   238    if (bitmin == bitmax) { // half node
   239      return bitmin == id;
   240    } else { // full node
   241      return (bool)((meta->bitmap[id >> 6u] >> (id & 0x3fu)) & 1lu);
   242    }
   243  }
   244  
   245  // meta must be a full node
   246    static void
   247  wormmeta_bm_set(struct wormmeta * const meta, const u32 id)
   248  {
   249    // need to replace meta
   250    u64 * const ptr = &(meta->bitmap[id >> 6u]);
   251    const u64 bit = 1lu << (id & 0x3fu);
   252    if ((*ptr) & bit)
   253      return;
   254  
   255    (*ptr) |= bit;
   256  
   257    // min
   258    if (id < wormmeta_bitmin_load(meta))
   259      wormmeta_bitmin_store(meta, id);
   260  
   261    // max
   262    const u32 oldmax = wormmeta_bitmax_load(meta);
   263    if (oldmax == WH_FO || id > oldmax)
   264      wormmeta_bitmax_store(meta, id);
   265  }
   266  
   267  // find the lowest bit > id0
   268  // return WH_FO if not found
   269    static inline u32
   270  wormmeta_bm_gt(const struct wormmeta * const meta, const u32 id0)
   271  {
   272    u32 ix = id0 >> 6;
   273    u64 bits = meta->bitmap[ix] & ~((1lu << (id0 & 0x3fu)) - 1lu);
   274    if (bits)
   275      return (ix << 6) + (u32)__builtin_ctzl(bits);
   276  
   277    while (++ix < WH_BMNR) {
   278      bits = meta->bitmap[ix];
   279      if (bits)
   280        return (ix << 6) + (u32)__builtin_ctzl(bits);
   281    }
   282  
   283    return WH_FO;
   284  }
   285  
   286  // find the highest bit that is lower than the id0
   287  // return WH_FO if not found
   288    static inline u32
   289  wormmeta_bm_lt(const struct wormmeta * const meta, const u32 id0)
   290  {
   291    u32 ix = id0 >> 6;
   292    u64 bits = meta->bitmap[ix] & ((1lu << (id0 & 0x3fu)) - 1lu);
   293    if (bits)
   294      return (ix << 6) + 63u - (u32)__builtin_clzl(bits);
   295  
   296    while (ix--) {
   297      bits = meta->bitmap[ix];
   298      if (bits)
   299        return (ix << 6) + 63u - (u32)__builtin_clzl(bits);
   300    }
   301  
   302    return WH_FO;
   303  }
   304  
   305  // meta must be a full node
   306    static inline void
   307  wormmeta_bm_clear(struct wormmeta * const meta, const u32 id)
   308  {
   309    debug_assert(wormmeta_bitmin_load(meta) < wormmeta_bitmax_load(meta));
   310    meta->bitmap[id >> 6u] &= (~(1lu << (id & 0x3fu)));
   311  
   312    // min
   313    if (id == wormmeta_bitmin_load(meta))
   314      wormmeta_bitmin_store(meta, wormmeta_bm_gt(meta, id));
   315  
   316    // max
   317    if (id == wormmeta_bitmax_load(meta))
   318      wormmeta_bitmax_store(meta, wormmeta_bm_lt(meta, id));
   319  }
   320  // }}} meta-bitmap
   321  
   322  // key/prefix {{{
   323    static inline u16
   324  wormhole_pkey(const u32 hash32)
   325  {
   326    const u16 pkey0 = ((u16)hash32) ^ ((u16)(hash32 >> 16));
   327    return pkey0 ? pkey0 : 1;
   328  }
   329  
   330    static inline u32
   331  wormhole_bswap(const u32 hashlo)
   332  {
   333    return __builtin_bswap32(hashlo);
   334  }
   335  
   336    static inline bool
   337  wormhole_key_meta_match(const struct kv * const key, const struct wormmeta * const meta)
   338  {
   339    return (key->klen == wormmeta_klen_load(meta))
   340      && (!memcmp(key->kv, wormmeta_keyref_load(meta)->kv, key->klen));
   341  }
   342  
   343  // called by get_kref_slot
   344    static inline bool
   345  wormhole_kref_meta_match(const struct kref * const kref,
   346      const struct wormmeta * const meta)
   347  {
   348    return (kref->len == wormmeta_klen_load(meta))
   349      && (!memcmp(kref->ptr, wormmeta_keyref_load(meta)->kv, kref->len));
   350  }
   351  
   352  // called from meta_down ... get_kref1_slot
   353  // will access rmost, prefetching is effective here
   354    static inline bool
   355  wormhole_kref1_meta_match(const struct kref * const kref,
   356      const struct wormmeta * const meta, const u8 cid)
   357  {
   358    const u8 * const keybuf = wormmeta_keyref_load(meta)->kv;
   359    const u32 plen = kref->len;
   360    return ((plen + 1) == wormmeta_klen_load(meta))
   361      && (!memcmp(kref->ptr, keybuf, plen))
   362      && (keybuf[plen] == cid);
   363  }
   364  
   365  // warning: be careful with buffer overflow
   366    static inline void
   367  wormhole_prefix(struct kv * const pfx, const u32 klen)
   368  {
   369    pfx->klen = klen;
   370    kv_update_hash(pfx);
   371  }
   372  
   373  // for split
   374    static inline void
   375  wormhole_prefix_inc1(struct kv * const pfx)
   376  {
   377    pfx->hashlo = crc32c_u8(pfx->hashlo, pfx->kv[pfx->klen]);
   378    pfx->klen++;
   379  }
   380  
   381  // meta_lcp only
   382    static inline void
   383  wormhole_kref_inc(struct kref * const kref, const u32 len0,
   384      const u32 crc, const u32 inc)
   385  {
   386    kref->hash32 = crc32c_inc(kref->ptr + len0, inc, crc);
   387    kref->len = len0 + inc;
   388  }
   389  
   390  // meta_lcp only
   391    static inline void
   392  wormhole_kref_inc_123(struct kref * const kref, const u32 len0,
   393      const u32 crc, const u32 inc)
   394  {
   395    kref->hash32 = crc32c_inc_123(kref->ptr + len0, inc, crc);
   396    kref->len = len0 + inc;
   397  }
   398  // }}} key/prefix
   399  
   400  // alloc {{{
   401    static inline struct kv *
   402  wormhole_alloc_akey(const size_t klen)
   403  {
   404  #ifdef ALLOCFAIL
   405    if (alloc_fail())
   406      return NULL;
   407  #endif
   408    return malloc(sizeof(struct kv) + klen);
   409  }
   410  
   411    static inline void
   412  wormhole_free_akey(struct kv * const akey)
   413  {
   414    free(akey);
   415  }
   416  
   417    static inline struct kv *
   418  wormhole_alloc_mkey(const size_t klen)
   419  {
   420  #ifdef ALLOCFAIL
   421    if (alloc_fail())
   422      return NULL;
   423  #endif
   424    return malloc(sizeof(struct kv) + klen);
   425  }
   426  
   427    static inline void
   428  wormhole_free_mkey(struct kv * const mkey)
   429  {
   430    free(mkey);
   431  }
   432  
   433    static struct wormleaf *
   434  wormleaf_alloc(struct wormhole * const map, struct wormleaf * const prev,
   435      struct wormleaf * const next, struct kv * const anchor)
   436  {
   437    struct wormleaf * const leaf = slab_alloc_safe(map->slab_leaf);
   438    if (leaf == NULL)
   439      return NULL;
   440  
   441    rwlock_init(&(leaf->leaflock));
   442    spinlock_init(&(leaf->sortlock));
   443  
   444    // keep the old version; new version will be assigned by split functions
   445    //leaf->lv = 0;
   446  
   447    leaf->prev = prev;
   448    leaf->next = next;
   449    leaf->anchor = anchor;
   450  
   451    leaf->nr_keys = 0;
   452    leaf->nr_sorted = 0;
   453  
   454    // hs requires zero init.
   455    memset(leaf->hs, 0, sizeof(leaf->hs[0]) * WH_KPN);
   456    return leaf;
   457  }
   458  
   459    static void
   460  wormleaf_free(struct slab * const slab, struct wormleaf * const leaf)
   461  {
   462    debug_assert(leaf->leaflock.opaque == 0);
   463    wormhole_free_akey(leaf->anchor);
   464    slab_free_safe(slab, leaf);
   465  }
   466  
   467    static struct wormmeta *
   468  wormmeta_alloc(struct wormhmap * const hmap, struct wormleaf * const lrmost,
   469      struct kv * const keyref, const u32 alen, const u32 bit)
   470  {
   471    debug_assert(alen <= UINT16_MAX);
   472    debug_assert(lrmost && keyref);
   473  
   474    struct wormmeta * const meta = slab_alloc_unsafe(hmap->slab1);
   475    if (meta == NULL)
   476      return NULL;
   477  
   478    wormmeta_init(meta, lrmost, keyref, alen, bit);
   479    return meta;
   480  }
   481  
   482    static inline bool
   483  wormhole_slab_reserve(struct wormhole * const map, const u32 nr)
   484  {
   485  #ifdef ALLOCFAIL
   486    if (alloc_fail())
   487      return false;
   488  #endif
   489    for (u32 i = 0; i < 2; i++) {
   490      if (!(map->hmap2[i].slab1 && map->hmap2[i].slab2))
   491        continue;
   492      if (!slab_reserve_unsafe(map->hmap2[i].slab1, nr))
   493        return false;
   494      if (!slab_reserve_unsafe(map->hmap2[i].slab2, nr))
   495        return false;
   496    }
   497    return true;
   498  }
   499  
   500    static void
   501  wormmeta_keyref_release(struct wormmeta * const meta)
   502  {
   503    struct kv * const keyref = wormmeta_keyref_load(meta);
   504    debug_assert(keyref->refcnt);
   505    keyref->refcnt--;
   506    if (keyref->refcnt == 0)
   507      wormhole_free_mkey(keyref);
   508  }
   509  
   510    static void
   511  wormmeta_free(struct wormhmap * const hmap, struct wormmeta * const meta)
   512  {
   513    wormmeta_keyref_release(meta);
   514    slab_free_unsafe(hmap->slab1, meta);
   515  }
   516  // }}} alloc
   517  
   518  // lock {{{
   519    static void
   520  wormleaf_lock_write(struct wormleaf * const leaf, struct wormref * const ref)
   521  {
   522    if (!rwlock_trylock_write(&(leaf->leaflock))) {
   523      wormhole_park(ref);
   524      rwlock_lock_write(&(leaf->leaflock));
   525      wormhole_resume(ref);
   526    }
   527  }
   528  
   529    static void
   530  wormleaf_lock_read(struct wormleaf * const leaf, struct wormref * const ref)
   531  {
   532    if (!rwlock_trylock_read(&(leaf->leaflock))) {
   533      wormhole_park(ref);
   534      rwlock_lock_read(&(leaf->leaflock));
   535      wormhole_resume(ref);
   536    }
   537  }
   538  
   539    static void
   540  wormleaf_unlock_write(struct wormleaf * const leaf)
   541  {
   542    rwlock_unlock_write(&(leaf->leaflock));
   543  }
   544  
   545    static void
   546  wormleaf_unlock_read(struct wormleaf * const leaf)
   547  {
   548    rwlock_unlock_read(&(leaf->leaflock));
   549  }
   550  
   551    static void
   552  wormhmap_lock(struct wormhole * const map, struct wormref * const ref)
   553  {
   554    if (!rwlock_trylock_write(&(map->metalock))) {
   555      wormhole_park(ref);
   556      rwlock_lock_write(&(map->metalock));
   557      wormhole_resume(ref);
   558    }
   559  }
   560  
   561    static inline void
   562  wormhmap_unlock(struct wormhole * const map)
   563  {
   564    rwlock_unlock_write(&(map->metalock));
   565  }
   566  // }}} lock
   567  
   568  // hmap-version {{{
   569    static inline struct wormhmap *
   570  wormhmap_switch(struct wormhole * const map, struct wormhmap * const hmap)
   571  {
   572    return (hmap == map->hmap2) ? (hmap + 1) : (hmap - 1);
   573  }
   574  
   575    static inline struct wormhmap *
   576  wormhmap_load(struct wormhole * const map)
   577  {
   578    return (struct wormhmap *)atomic_load_explicit(&(map->hmap_ptr), MO_ACQUIRE);
   579  }
   580  
   581    static inline void
   582  wormhmap_store(struct wormhole * const map, struct wormhmap * const hmap)
   583  {
   584    atomic_store_explicit(&(map->hmap_ptr), (u64)hmap, MO_RELEASE);
   585  }
   586  
   587    static inline u64
   588  wormhmap_version_load(const struct wormhmap * const hmap)
   589  {
   590    // no concurrent access
   591    return atomic_load_explicit(&(hmap->hv), MO_ACQUIRE);
   592  }
   593  
   594    static inline void
   595  wormhmap_version_store(struct wormhmap * const hmap, const u64 v)
   596  {
   597    atomic_store_explicit(&(hmap->hv), v, MO_RELEASE);
   598  }
   599  
   600    static inline u64
   601  wormleaf_version_load(struct wormleaf * const leaf)
   602  {
   603    return atomic_load_explicit(&(leaf->lv), MO_CONSUME);
   604  }
   605  
   606    static inline void
   607  wormleaf_version_store(struct wormleaf * const leaf, const u64 v)
   608  {
   609    atomic_store_explicit(&(leaf->lv), v, MO_RELEASE);
   610  }
   611  // }}} hmap-version
   612  
   613  // co {{{
   614    static inline void
   615  wormhmap_prefetch_pmap(const struct wormhmap * const hmap, const u32 idx)
   616  {
   617  #if defined(CORR)
   618    (void)hmap;
   619    (void)idx;
   620  #else
   621    cpu_prefetch0(&(hmap->pmap[idx]));
   622  #endif
   623  }
   624  
   625    static inline struct wormmeta *
   626  wormhmap_get_meta(const struct wormhmap * const hmap, const u32 mid, const u32 i)
   627  {
   628    struct wormmeta * const meta = hmap->pmap[mid].e[i];
   629  #if defined(CORR)
   630    cpu_prefetch0(meta);
   631    corr_yield();
   632  #endif
   633    return meta;
   634  }
   635  
   636    static inline void
   637  wormleaf_prefetch(struct wormleaf * const leaf, const u32 hashlo)
   638  {
   639    const u32 i = wormhole_pkey(hashlo) / WH_HDIV;
   640  #if defined(CORR)
   641    cpu_prefetch0(leaf);
   642    cpu_prefetch0(&(leaf->hs[i-4]));
   643    cpu_prefetch0(&(leaf->hs[i+4]));
   644    corr_yield();
   645  #else
   646    cpu_prefetch0(&(leaf->hs[i]));
   647  #endif
   648  }
   649  
   650    static inline bool
   651  wormhole_kref_kv_match(const struct kref * const key, const struct kv * const curr)
   652  {
   653  #if defined(CORR)
   654    const u8 * const ptr = (typeof(ptr))curr;
   655    cpu_prefetch0(ptr);
   656    cpu_prefetch0(ptr + 64);
   657    if (key->len > 56) {
   658      cpu_prefetch0(ptr + 128);
   659      cpu_prefetch0(ptr + 192);
   660    }
   661    corr_yield();
   662  #endif
   663    return kref_kv_match(key, curr);
   664  }
   665  
   666    static inline void
   667  wormhole_qsbr_update_pause(struct wormref * const ref, const u64 v)
   668  {
   669    qsbr_update(&ref->qref, v);
   670  #if defined(CORR)
   671    corr_yield();
   672  #endif
   673  }
   674  // }}} co
   675  
   676  // }}} helpers
   677  
   678  // hmap {{{
   679  // hmap is the MetaTrieHT of Wormhole
   680    static bool
   681  wormhmap_init(struct wormhmap * const hmap, struct kv * const pbuf)
   682  {
   683    const u64 wsize = sizeof(hmap->wmap[0]) * WH_HMAPINIT_SIZE;
   684    const u64 psize = sizeof(hmap->pmap[0]) * WH_HMAPINIT_SIZE;
   685    u64 msize = wsize + psize;
   686    u8 * const mem = pages_alloc_best(msize, true, &msize);
   687    if (mem == NULL)
   688      return false;
   689  
   690    hmap->pmap = (typeof(hmap->pmap))mem;
   691    hmap->wmap = (typeof(hmap->wmap))(mem + psize);
   692    hmap->msize = msize;
   693    hmap->mask = WH_HMAPINIT_SIZE - 1;
   694    wormhmap_version_store(hmap, 0);
   695    hmap->maxplen = 0;
   696    hmap->pbuf = pbuf;
   697    return true;
   698  }
   699  
   700    static inline void
   701  wormhmap_deinit(struct wormhmap * const hmap)
   702  {
   703    if (hmap->pmap) {
   704      pages_unmap(hmap->pmap, hmap->msize);
   705      hmap->pmap = NULL;
   706      hmap->wmap = NULL;
   707    }
   708  }
   709  
   710    static inline m128
   711  wormhmap_zero(void)
   712  {
   713  #if defined(__x86_64__)
   714    return _mm_setzero_si128();
   715  #elif defined(__aarch64__)
   716    return vdupq_n_u8(0);
   717  #endif
   718  }
   719  
   720    static inline m128
   721  wormhmap_m128_pkey(const u16 pkey)
   722  {
   723  #if defined(__x86_64__)
   724    return _mm_set1_epi16((short)pkey);
   725  #elif defined(__aarch64__)
   726    return vreinterpretq_u8_u16(vdupq_n_u16(pkey));
   727  #endif
   728  }
   729  
   730    static inline u32
   731  wormhmap_match_mask(const struct wormslot * const s, const m128 skey)
   732  {
   733  #if defined(__x86_64__)
   734    const m128 sv = _mm_load_si128((const void *)s);
   735    return (u32)_mm_movemask_epi8(_mm_cmpeq_epi16(skey, sv));
   736  #elif defined(__aarch64__)
   737    const uint16x8_t sv = vld1q_u16((const u16 *)s); // load 16 bytes at s
   738    const uint16x8_t cmp = vceqq_u16(vreinterpretq_u16_u8(skey), sv); // cmpeq => 0xffff or 0x0000
   739    static const uint16x8_t mbits = {0x3, 0xc, 0x30, 0xc0, 0x300, 0xc00, 0x3000, 0xc000};
   740    return (u32)vaddvq_u16(vandq_u16(cmp, mbits));
   741  #endif
   742  }
   743  
   744    static inline bool
   745  wormhmap_match_any(const struct wormslot * const s, const m128 skey)
   746  {
   747  #if defined(__x86_64__)
   748    return wormhmap_match_mask(s, skey) != 0;
   749  #elif defined(__aarch64__)
   750    const uint16x8_t sv = vld1q_u16((const u16 *)s); // load 16 bytes at s
   751    const uint16x8_t cmp = vceqq_u16(vreinterpretq_u16_u8(skey), sv); // cmpeq => 0xffff or 0x0000
   752    return vaddvq_u32(vreinterpretq_u32_u16(cmp)) != 0;
   753  #endif
   754  }
   755  
   756  // meta_lcp only
   757    static inline bool
   758  wormhmap_peek(const struct wormhmap * const hmap, const u32 hash32)
   759  {
   760    const m128 sk = wormhmap_m128_pkey(wormhole_pkey(hash32));
   761    const u32 midx = hash32 & hmap->mask;
   762    const u32 midy = wormhole_bswap(hash32) & hmap->mask;
   763    return wormhmap_match_any(&(hmap->wmap[midx]), sk)
   764      || wormhmap_match_any(&(hmap->wmap[midy]), sk);
   765  }
   766  
   767    static inline struct wormmeta *
   768  wormhmap_get_slot(const struct wormhmap * const hmap, const u32 mid,
   769      const m128 skey, const struct kv * const key)
   770  {
   771    u32 mask = wormhmap_match_mask(&(hmap->wmap[mid]), skey);
   772    while (mask) {
   773      const u32 i2 = (u32)__builtin_ctz(mask);
   774      struct wormmeta * const meta = wormhmap_get_meta(hmap, mid, i2>>1);
   775      if (likely(wormhole_key_meta_match(key, meta)))
   776        return meta;
   777      mask ^= (3u << i2);
   778    }
   779    return NULL;
   780  }
   781  
   782    static struct wormmeta *
   783  wormhmap_get(const struct wormhmap * const hmap, const struct kv * const key)
   784  {
   785    const u32 hash32 = key->hashlo;
   786    const u32 midx = hash32 & hmap->mask;
   787    wormhmap_prefetch_pmap(hmap, midx);
   788    const u32 midy = wormhole_bswap(hash32) & hmap->mask;
   789    wormhmap_prefetch_pmap(hmap, midy);
   790    const m128 skey = wormhmap_m128_pkey(wormhole_pkey(hash32));
   791  
   792    struct wormmeta * const r = wormhmap_get_slot(hmap, midx, skey, key);
   793    if (r)
   794      return r;
   795    return wormhmap_get_slot(hmap, midy, skey, key);
   796  }
   797  
   798  // for meta_lcp only
   799    static inline struct wormmeta *
   800  wormhmap_get_kref_slot(const struct wormhmap * const hmap, const u32 mid,
   801      const m128 skey, const struct kref * const kref)
   802  {
   803    u32 mask = wormhmap_match_mask(&(hmap->wmap[mid]), skey);
   804    while (mask) {
   805      const u32 i2 = (u32)__builtin_ctz(mask);
   806      struct wormmeta * const meta = wormhmap_get_meta(hmap, mid, i2>>1);
   807      if (likely(wormhole_kref_meta_match(kref, meta)))
   808        return meta;
   809  
   810      mask ^= (3u << i2);
   811    }
   812    return NULL;
   813  }
   814  
   815  // for meta_lcp only
   816    static inline struct wormmeta *
   817  wormhmap_get_kref(const struct wormhmap * const hmap, const struct kref * const kref)
   818  {
   819    const u32 hash32 = kref->hash32;
   820    const u32 midx = hash32 & hmap->mask;
   821    wormhmap_prefetch_pmap(hmap, midx);
   822    const u32 midy = wormhole_bswap(hash32) & hmap->mask;
   823    wormhmap_prefetch_pmap(hmap, midy);
   824    const m128 skey = wormhmap_m128_pkey(wormhole_pkey(hash32));
   825  
   826    struct wormmeta * const r = wormhmap_get_kref_slot(hmap, midx, skey, kref);
   827    if (r)
   828      return r;
   829    return wormhmap_get_kref_slot(hmap, midy, skey, kref);
   830  }
   831  
   832  // for meta_down only
   833    static inline struct wormmeta *
   834  wormhmap_get_kref1_slot(const struct wormhmap * const hmap, const u32 mid,
   835      const m128 skey, const struct kref * const kref, const u8 cid)
   836  {
   837    u32 mask = wormhmap_match_mask(&(hmap->wmap[mid]), skey);
   838    while (mask) {
   839      const u32 i2 = (u32)__builtin_ctz(mask);
   840      struct wormmeta * const meta = wormhmap_get_meta(hmap, mid, i2>>1);
   841      //cpu_prefetch0(wormmeta_rmost_load(meta)); // will access
   842      if (likely(wormhole_kref1_meta_match(kref, meta, cid)))
   843        return meta;
   844  
   845      mask ^= (3u << i2);
   846    }
   847    return NULL;
   848  }
   849  
   850  // for meta_down only
   851    static inline struct wormmeta *
   852  wormhmap_get_kref1(const struct wormhmap * const hmap,
   853      const struct kref * const kref, const u8 cid)
   854  {
   855    const u32 hash32 = crc32c_u8(kref->hash32, cid);
   856    const u32 midx = hash32 & hmap->mask;
   857    wormhmap_prefetch_pmap(hmap, midx);
   858    const u32 midy = wormhole_bswap(hash32) & hmap->mask;
   859    wormhmap_prefetch_pmap(hmap, midy);
   860    const m128 skey = wormhmap_m128_pkey(wormhole_pkey(hash32));
   861  
   862    struct wormmeta * const r = wormhmap_get_kref1_slot(hmap, midx, skey, kref, cid);
   863    if (r)
   864      return r;
   865    return wormhmap_get_kref1_slot(hmap, midy, skey, kref, cid);
   866  }
   867  
   868    static inline u32
   869  wormhmap_slot_count(const struct wormslot * const slot)
   870  {
   871    const u32 mask = wormhmap_match_mask(slot, wormhmap_zero());
   872    return mask ? ((u32)__builtin_ctz(mask) >> 1) : 8;
   873  }
   874  
   875    static inline void
   876  wormhmap_squeeze(const struct wormhmap * const hmap)
   877  {
   878    struct wormslot * const wmap = hmap->wmap;
   879    struct wormmbkt * const pmap = hmap->pmap;
   880    const u32 mask = hmap->mask;
   881    const u64 nrs64 = ((u64)(hmap->mask)) + 1; // must use u64; u32 can overflow
   882    for (u64 si64 = 0; si64 < nrs64; si64++) { // # of buckets
   883      const u32 si = (u32)si64;
   884      u32 ci = wormhmap_slot_count(&(wmap[si]));
   885      for (u32 ei = ci - 1; ei < WH_BKT_NR; ei--) {
   886        struct wormmeta * const meta = pmap[si].e[ei];
   887        const u32 sj = wormmeta_hash32_load(meta) & mask; // first hash
   888        if (sj == si)
   889          continue;
   890  
   891        // move
   892        const u32 ej = wormhmap_slot_count(&(wmap[sj]));
   893        if (ej < WH_BKT_NR) { // has space at home location
   894          wmap[sj].t[ej] = wmap[si].t[ei];
   895          pmap[sj].e[ej] = pmap[si].e[ei];
   896          const u32 ni = ci - 1;
   897          if (ei < ni) {
   898            wmap[si].t[ei] = wmap[si].t[ni];
   899            pmap[si].e[ei] = pmap[si].e[ni];
   900          }
   901          wmap[si].t[ni] = 0;
   902          pmap[si].e[ni] = NULL;
   903          ci--;
   904        }
   905      }
   906    }
   907  }
   908  
   909    static void
   910  wormhmap_expand(struct wormhmap * const hmap)
   911  {
   912    // sync expand
   913    const u32 mask0 = hmap->mask;
   914    if (mask0 == UINT32_MAX)
   915      debug_die();
   916    const u32 nr0 = mask0 + 1;
   917    const u32 mask1 = mask0 + nr0;
   918    const u64 nr1 = ((u64)nr0) << 1; // must use u64; u32 can overflow
   919    const u64 wsize = nr1 * sizeof(hmap->wmap[0]);
   920    const u64 psize = nr1 * sizeof(hmap->pmap[0]);
   921    u64 msize = wsize + psize;
   922    u8 * mem = pages_alloc_best(msize, true, &msize);
   923    if (mem == NULL) {
   924      // We are at a very deep call stack from wormhole_put().
   925      // Gracefully handling the failure requires lots of changes.
   926      // Currently we simply wait for available memory
   927      // TODO: gracefully return with insertion failure
   928      char ts[64];
   929      time_stamp(ts, 64);
   930      fprintf(stderr, "%s %s sleep-wait for memory allocation %lukB\n",
   931          __func__, ts, msize >> 10);
   932      do {
   933        sleep(1);
   934        mem = pages_alloc_best(msize, true, &msize);
   935      } while (mem == NULL);
   936      time_stamp(ts, 64);
   937      fprintf(stderr, "%s %s memory allocation done\n", __func__, ts);
   938    }
   939  
   940    struct wormhmap hmap1 = *hmap;
   941    hmap1.pmap = (typeof(hmap1.pmap))mem;
   942    hmap1.wmap = (typeof(hmap1.wmap))(mem + psize);
   943    hmap1.msize = msize;
   944    hmap1.mask = mask1;
   945  
   946    const struct wormslot * const wmap0 = hmap->wmap;
   947    const struct wormmbkt * const pmap0 = hmap->pmap;
   948  
   949    for (u32 s = 0; s < nr0; s++) {
   950      const struct wormmbkt * const bkt = &pmap0[s];
   951      for (u32 i = 0; (i < WH_BKT_NR) && bkt->e[i]; i++) {
   952        const struct wormmeta * const meta = bkt->e[i];
   953        const u32 hash32 = wormmeta_hash32_load(meta);
   954        const u32 idx0 = hash32 & mask0;
   955        const u32 idx1 = ((idx0 == s) ? hash32 : wormhole_bswap(hash32)) & mask1;
   956  
   957        const u32 n = wormhmap_slot_count(&(hmap1.wmap[idx1]));
   958        debug_assert(n < 8);
   959        hmap1.wmap[idx1].t[n] = wmap0[s].t[i];
   960        hmap1.pmap[idx1].e[n] = bkt->e[i];
   961      }
   962    }
   963    pages_unmap(hmap->pmap, hmap->msize);
   964    hmap->pmap = hmap1.pmap;
   965    hmap->wmap = hmap1.wmap;
   966    hmap->msize = hmap1.msize;
   967    hmap->mask = hmap1.mask;
   968    wormhmap_squeeze(hmap);
   969  }
   970  
   971    static bool
   972  wormhmap_cuckoo(struct wormhmap * const hmap, const u32 mid0,
   973      struct wormmeta * const e0, const u16 s0, const u32 depth)
   974  {
   975    const u32 ii = wormhmap_slot_count(&(hmap->wmap[mid0]));
   976    if (ii < WH_BKT_NR) {
   977      hmap->wmap[mid0].t[ii] = s0;
   978      hmap->pmap[mid0].e[ii] = e0;
   979      return true;
   980    } else if (depth == 0) {
   981      return false;
   982    }
   983  
   984    // depth > 0
   985    struct wormmbkt * const bkt = &(hmap->pmap[mid0]);
   986    u16 * const sv = &(hmap->wmap[mid0].t[0]);
   987    for (u32 i = 0; i < WH_BKT_NR; i++) {
   988      const struct wormmeta * const meta = bkt->e[i];
   989      debug_assert(meta);
   990      const u32 hash32 = wormmeta_hash32_load(meta);
   991  
   992      const u32 midx = hash32 & hmap->mask;
   993      const u32 midy = wormhole_bswap(hash32) & hmap->mask;
   994      const u32 midt = (midx != mid0) ? midx : midy;
   995      if (midt != mid0) { // possible
   996        // no penalty if moving someone back to its 1st hash location
   997        const u32 depth1 = (midt == midx) ? depth : (depth - 1);
   998        if (wormhmap_cuckoo(hmap, midt, bkt->e[i], sv[i], depth1)) {
   999          bkt->e[i] = e0;
  1000          sv[i] = s0;
  1001          return true;
  1002        }
  1003      }
  1004    }
  1005    return false;
  1006  }
  1007  
  1008    static void
  1009  wormhmap_set(struct wormhmap * const hmap, struct wormmeta * const meta)
  1010  {
  1011    const u32 hash32 = wormmeta_hash32_load(meta);
  1012    const u32 midx = hash32 & hmap->mask;
  1013    wormhmap_prefetch_pmap(hmap, midx);
  1014    const u32 midy = wormhole_bswap(hash32) & hmap->mask;
  1015    wormhmap_prefetch_pmap(hmap, midy);
  1016    const u16 pkey = wormhole_pkey(hash32);
  1017    // insert with cuckoo
  1018    if (likely(wormhmap_cuckoo(hmap, midx, meta, pkey, 1)))
  1019      return;
  1020    if (wormhmap_cuckoo(hmap, midy, meta, pkey, 1))
  1021      return;
  1022    if (wormhmap_cuckoo(hmap, midx, meta, pkey, 2))
  1023      return;
  1024  
  1025    // expand
  1026    wormhmap_expand(hmap);
  1027  
  1028    wormhmap_set(hmap, meta);
  1029  }
  1030  
  1031    static bool
  1032  wormhmap_del_slot(struct wormhmap * const hmap, const u32 mid,
  1033      const struct wormmeta * const meta, const m128 skey)
  1034  {
  1035    u32 mask = wormhmap_match_mask(&(hmap->wmap[mid]), skey);
  1036    while (mask) {
  1037      const u32 i2 = (u32)__builtin_ctz(mask);
  1038      const struct wormmeta * const meta1 = hmap->pmap[mid].e[i2>>1];
  1039      if (likely(meta == meta1)) {
  1040        const u32 i = i2 >> 1;
  1041        const u32 j = wormhmap_slot_count(&(hmap->wmap[mid])) - 1;
  1042        hmap->wmap[mid].t[i] = hmap->wmap[mid].t[j];
  1043        hmap->pmap[mid].e[i] = hmap->pmap[mid].e[j];
  1044        hmap->wmap[mid].t[j] = 0;
  1045        hmap->pmap[mid].e[j] = NULL;
  1046        return true;
  1047      }
  1048      mask -= (3u << i2);
  1049    }
  1050    return false;
  1051  }
  1052  
  1053    static bool
  1054  wormhmap_del(struct wormhmap * const hmap, const struct wormmeta * const meta)
  1055  {
  1056    const u32 hash32 = wormmeta_hash32_load(meta);
  1057    const u32 midx = hash32 & hmap->mask;
  1058    const u32 midy = wormhole_bswap(hash32) & hmap->mask;
  1059    const m128 skey = wormhmap_m128_pkey(wormhole_pkey(hash32));
  1060    return wormhmap_del_slot(hmap, midx, meta, skey)
  1061      || wormhmap_del_slot(hmap, midy, meta, skey);
  1062  }
  1063  
  1064    static bool
  1065  wormhmap_replace_slot(struct wormhmap * const hmap, const u32 mid,
  1066      const struct wormmeta * const old, const m128 skey, struct wormmeta * const new)
  1067  {
  1068    u32 mask = wormhmap_match_mask(&(hmap->wmap[mid]), skey);
  1069    while (mask) {
  1070      const u32 i2 = (u32)__builtin_ctz(mask);
  1071      struct wormmeta ** const pslot = &hmap->pmap[mid].e[i2>>1];
  1072      if (likely(old == *pslot)) {
  1073        *pslot = new;
  1074        return true;
  1075      }
  1076      mask -= (3u << i2);
  1077    }
  1078    return false;
  1079  }
  1080  
  1081    static bool
  1082  wormhmap_replace(struct wormhmap * const hmap, const struct wormmeta * const old, struct wormmeta * const new)
  1083  {
  1084    const u32 hash32 = wormmeta_hash32_load(old);
  1085    const u32 midx = hash32 & hmap->mask;
  1086    const u32 midy = wormhole_bswap(hash32) & hmap->mask;
  1087    const m128 skey = wormhmap_m128_pkey(wormhole_pkey(hash32));
  1088    return wormhmap_replace_slot(hmap, midx, old, skey, new)
  1089      || wormhmap_replace_slot(hmap, midy, old, skey, new);
  1090  }
  1091  // }}} hmap
  1092  
  1093  // create {{{
  1094  // it's unsafe
  1095    static bool
  1096  wormhole_create_leaf0(struct wormhole * const map)
  1097  {
  1098    const bool sr = wormhole_slab_reserve(map, 1);
  1099    if (unlikely(!sr))
  1100      return false;
  1101  
  1102    // create leaf of empty key
  1103    struct kv * const anchor = wormhole_alloc_akey(0);
  1104    if (anchor == NULL)
  1105      return false;
  1106    kv_dup2(kv_null(), anchor);
  1107  
  1108    struct wormleaf * const leaf0 = wormleaf_alloc(map, NULL, NULL, anchor);
  1109    if (leaf0 == NULL) {
  1110      wormhole_free_akey(anchor);
  1111      return false;
  1112    }
  1113  
  1114    struct kv * const mkey = wormhole_alloc_mkey(0);
  1115    if (mkey == NULL) {
  1116      wormleaf_free(map->slab_leaf, leaf0);
  1117      return false;
  1118    }
  1119  
  1120    wormhole_prefix(mkey, 0);
  1121    mkey->refcnt = 0;
  1122    // create meta of empty key
  1123    for (u32 i = 0; i < 2; i++) {
  1124      if (map->hmap2[i].slab1) {
  1125        struct wormmeta * const m0 = wormmeta_alloc(&map->hmap2[i], leaf0, mkey, 0, WH_FO);
  1126        debug_assert(m0); // already reserved enough
  1127        wormhmap_set(&(map->hmap2[i]), m0);
  1128      }
  1129    }
  1130  
  1131    map->leaf0 = leaf0;
  1132    return true;
  1133  }
  1134  
  1135    static struct wormhole *
  1136  wormhole_create_internal(const struct kvmap_mm * const mm, const u32 nh)
  1137  {
  1138    struct wormhole * const map = yalloc(sizeof(*map));
  1139    if (map == NULL)
  1140      return NULL;
  1141    memset(map, 0, sizeof(*map));
  1142    // mm
  1143    map->mm = mm ? (*mm) : kvmap_mm_dup;
  1144  
  1145    // pbuf for meta-merge
  1146    map->pbuf = yalloc(1lu << 16); // 64kB
  1147    if (map->pbuf == NULL)
  1148      goto fail;
  1149  
  1150    // hmap
  1151    for (u32 i = 0; i < nh; i++) {
  1152      struct wormhmap * const hmap = &map->hmap2[i];
  1153      if (!wormhmap_init(hmap, map->pbuf))
  1154        goto fail;
  1155  
  1156      hmap->slab1 = slab_create(sizeof(struct wormmeta), WH_SLABMETA_SIZE);
  1157      if (hmap->slab1 == NULL)
  1158        goto fail;
  1159  
  1160      hmap->slab2 = slab_create(sizeof(struct wormmeta) + (sizeof(u64) * WH_BMNR), WH_SLABMETA_SIZE);
  1161      if (hmap->slab2 == NULL)
  1162        goto fail;
  1163    }
  1164  
  1165    // leaf slab
  1166    map->slab_leaf = slab_create(sizeof(struct wormleaf), WH_SLABLEAF_SIZE);
  1167    if (map->slab_leaf == NULL)
  1168      goto fail;
  1169  
  1170    // qsbr
  1171    map->qsbr = qsbr_create();
  1172    if (map->qsbr == NULL)
  1173      goto fail;
  1174  
  1175    // leaf0
  1176    if (!wormhole_create_leaf0(map))
  1177      goto fail;
  1178  
  1179    rwlock_init(&(map->metalock));
  1180    wormhmap_store(map, &map->hmap2[0]);
  1181    return map;
  1182  
  1183  fail:
  1184    if (map->qsbr)
  1185      qsbr_destroy(map->qsbr);
  1186  
  1187    if (map->slab_leaf)
  1188      slab_destroy(map->slab_leaf);
  1189  
  1190    for (u32 i = 0; i < nh; i++) {
  1191      struct wormhmap * const hmap = &map->hmap2[i];
  1192      if (hmap->slab1)
  1193        slab_destroy(hmap->slab1);
  1194      if (hmap->slab2)
  1195        slab_destroy(hmap->slab2);
  1196      wormhmap_deinit(hmap);
  1197    }
  1198  
  1199    if (map->pbuf)
  1200      free(map->pbuf);
  1201  
  1202    free(map);
  1203    return NULL;
  1204  }
  1205  
  1206    struct wormhole *
  1207  wormhole_create(const struct kvmap_mm * const mm)
  1208  {
  1209    return wormhole_create_internal(mm, 2);
  1210  }
  1211  
  1212    struct wormhole *
  1213  whunsafe_create(const struct kvmap_mm * const mm)
  1214  {
  1215    return wormhole_create_internal(mm, 1);
  1216  }
  1217  // }}} create
  1218  
  1219  // jump {{{
  1220  
  1221  // lcp {{{
  1222  // search in the hash table for the Longest Prefix Match of the search key
  1223  // The corresponding wormmeta node is returned and the LPM is recorded in kref
  1224    static struct wormmeta *
  1225  wormhole_meta_lcp(const struct wormhmap * const hmap, struct kref * const kref, const u32 klen)
  1226  {
  1227    // invariant: lo <= lcp < (lo + gd)
  1228    // ending condition: gd == 1
  1229    u32 gd = (hmap->maxplen < klen ? hmap->maxplen : klen) + 1u;
  1230    u32 lo = 0;
  1231    u32 loh = KV_CRC32C_SEED;
  1232  
  1233  #define META_LCP_GAP_1 ((7u))
  1234    while (META_LCP_GAP_1 < gd) {
  1235      const u32 inc = gd >> 3 << 2; // x4
  1236      const u32 hash32 = crc32c_inc_x4(kref->ptr + lo, inc, loh);
  1237      if (wormhmap_peek(hmap, hash32)) {
  1238        loh = hash32;
  1239        lo += inc;
  1240        gd -= inc;
  1241      } else {
  1242        gd = inc;
  1243      }
  1244    }
  1245  
  1246    while (1 < gd) {
  1247      const u32 inc = gd >> 1;
  1248      const u32 hash32 = crc32c_inc_123(kref->ptr + lo, inc, loh);
  1249      if (wormhmap_peek(hmap, hash32)) {
  1250        loh = hash32;
  1251        lo += inc;
  1252        gd -= inc;
  1253      } else {
  1254        gd = inc;
  1255      }
  1256    }
  1257  #undef META_LCP_GAP_1
  1258  
  1259    kref->hash32 = loh;
  1260    kref->len = lo;
  1261    struct wormmeta * ret = wormhmap_get_kref(hmap, kref);
  1262    if (likely(ret != NULL))
  1263      return ret;
  1264  
  1265    gd = lo;
  1266    lo = 0;
  1267    loh = KV_CRC32C_SEED;
  1268  
  1269  #define META_LCP_GAP_2 ((5u))
  1270    while (META_LCP_GAP_2 < gd) {
  1271      const u32 inc = (gd * 3) >> 2;
  1272      wormhole_kref_inc(kref, lo, loh, inc);
  1273      struct wormmeta * const tmp = wormhmap_get_kref(hmap, kref);
  1274      if (tmp) {
  1275        loh = kref->hash32;
  1276        lo += inc;
  1277        gd -= inc;
  1278        ret = tmp;
  1279        if (wormmeta_bm_test(tmp, kref->ptr[lo])) {
  1280          loh = crc32c_u8(loh, kref->ptr[lo]);
  1281          lo++;
  1282          gd--;
  1283          ret = NULL;
  1284        } else {
  1285          gd = 1;
  1286          break;
  1287        }
  1288      } else {
  1289        gd = inc;
  1290      }
  1291    }
  1292  
  1293    while (1 < gd) {
  1294      const u32 inc = (gd * 3) >> 2;
  1295      wormhole_kref_inc_123(kref, lo, loh, inc);
  1296      struct wormmeta * const tmp = wormhmap_get_kref(hmap, kref);
  1297      if (tmp) {
  1298        loh = kref->hash32;
  1299        lo += inc;
  1300        gd -= inc;
  1301        ret = tmp;
  1302        if (wormmeta_bm_test(tmp, kref->ptr[lo])) {
  1303          loh = crc32c_u8(loh, kref->ptr[lo]);
  1304          lo++;
  1305          gd--;
  1306          ret = NULL;
  1307        } else {
  1308          break;
  1309        }
  1310      } else {
  1311        gd = inc;
  1312      }
  1313    }
  1314  #undef META_LCP_GAP_2
  1315  
  1316    if (kref->len != lo) {
  1317      kref->hash32 = loh;
  1318      kref->len = lo;
  1319    }
  1320    if (ret == NULL)
  1321      ret = wormhmap_get_kref(hmap, kref);
  1322    debug_assert(ret);
  1323    return ret;
  1324  }
  1325  // }}} lcp
  1326  
  1327  // down {{{
  1328    static struct wormleaf *
  1329  wormhole_meta_down(const struct wormhmap * const hmap, const struct kref * const lcp,
  1330      const struct wormmeta * const meta, const u32 klen)
  1331  {
  1332    if (likely(lcp->len < klen)) { // partial match
  1333      const u32 id0 = lcp->ptr[lcp->len];
  1334      if (wormmeta_bitmin_load(meta) > id0) { // no left, don't care about right.
  1335        return wormmeta_lpath_load(meta);
  1336      } else if (wormmeta_bitmax_load(meta) < id0) { // has left sibling but no right sibling
  1337        return wormmeta_rmost_load(meta);
  1338      } else { // has both (expensive)
  1339        return wormmeta_rmost_load(wormhmap_get_kref1(hmap, lcp, (u8)wormmeta_bm_lt(meta, id0)));
  1340      }
  1341    } else { // lcp->len == klen
  1342      return wormmeta_lpath_load(meta);
  1343    }
  1344  }
  1345  // }}} down
  1346  
  1347  // jump-rw {{{
  1348    static struct wormleaf *
  1349  wormhole_jump_leaf(const struct wormhmap * const hmap, const struct kref * const key)
  1350  {
  1351    struct kref kref = {.ptr = key->ptr};
  1352    debug_assert(kv_crc32c(key->ptr, key->len) == key->hash32);
  1353  
  1354    const struct wormmeta * const meta = wormhole_meta_lcp(hmap, &kref, key->len);
  1355    return wormhole_meta_down(hmap, &kref, meta, key->len);
  1356  }
  1357  
  1358    static struct wormleaf *
  1359  wormhole_jump_leaf_read(struct wormref * const ref, const struct kref * const key)
  1360  {
  1361    struct wormhole * const map = ref->map;
  1362  #pragma nounroll
  1363    do {
  1364      const struct wormhmap * const hmap = wormhmap_load(map);
  1365      const u64 v = wormhmap_version_load(hmap);
  1366      qsbr_update(&ref->qref, v);
  1367      struct wormleaf * const leaf = wormhole_jump_leaf(hmap, key);
  1368      wormleaf_prefetch(leaf, key->hash32);
  1369  #pragma nounroll
  1370      do {
  1371        if (rwlock_trylock_read_nr(&(leaf->leaflock), 64)) {
  1372          if (wormleaf_version_load(leaf) <= v)
  1373            return leaf;
  1374          wormleaf_unlock_read(leaf);
  1375          break;
  1376        }
  1377        // v1 is loaded before lv; if lv <= v, can update v1 without redo jump
  1378        const u64 v1 = wormhmap_version_load(wormhmap_load(map));
  1379        if (wormleaf_version_load(leaf) > v)
  1380          break;
  1381        wormhole_qsbr_update_pause(ref, v1);
  1382      } while (true);
  1383    } while (true);
  1384  }
  1385  
  1386    static struct wormleaf *
  1387  wormhole_jump_leaf_write(struct wormref * const ref, const struct kref * const key)
  1388  {
  1389    struct wormhole * const map = ref->map;
  1390  #pragma nounroll
  1391    do {
  1392      const struct wormhmap * const hmap = wormhmap_load(map);
  1393      const u64 v = wormhmap_version_load(hmap);
  1394      qsbr_update(&ref->qref, v);
  1395      struct wormleaf * const leaf = wormhole_jump_leaf(hmap, key);
  1396      wormleaf_prefetch(leaf, key->hash32);
  1397  #pragma nounroll
  1398      do {
  1399        if (rwlock_trylock_write_nr(&(leaf->leaflock), 64)) {
  1400          if (wormleaf_version_load(leaf) <= v)
  1401            return leaf;
  1402          wormleaf_unlock_write(leaf);
  1403          break;
  1404        }
  1405        // v1 is loaded before lv; if lv <= v, can update v1 without redo jump
  1406        const u64 v1 = wormhmap_version_load(wormhmap_load(map));
  1407        if (wormleaf_version_load(leaf) > v)
  1408          break;
  1409        wormhole_qsbr_update_pause(ref, v1);
  1410      } while (true);
  1411    } while (true);
  1412  }
  1413  // }}} jump-rw
  1414  
  1415  // }}} jump
  1416  
  1417  // leaf-read {{{
  1418    static inline struct kv *
  1419  wormleaf_kv_at_ih(const struct wormleaf * const leaf, const u32 ih)
  1420  {
  1421    return u64_to_ptr(leaf->hs[ih].e3);
  1422  }
  1423  
  1424    static inline struct kv *
  1425  wormleaf_kv_at_is(const struct wormleaf * const leaf, const u32 is)
  1426  {
  1427    return u64_to_ptr(leaf->hs[leaf->ss[is]].e3);
  1428  }
  1429  
  1430    static inline void
  1431  wormleaf_prefetch_ss(const struct wormleaf * const leaf)
  1432  {
  1433    for (u32 i = 0; i < WH_KPN; i+=64)
  1434      cpu_prefetch0(&leaf->ss[i]);
  1435  }
  1436  
  1437  // leaf must have been sorted
  1438  // return the key at [i] as if k1 has been inserted into leaf; i <= leaf->nr_sorted
  1439    static const struct kv *
  1440  wormleaf_kv_at_is1(const struct wormleaf * const leaf, const u32 i, const u32 is1, const struct kv * const k1)
  1441  {
  1442    debug_assert(leaf->nr_keys == leaf->nr_sorted);
  1443    debug_assert(is1 <= leaf->nr_sorted);
  1444    if (i < is1)
  1445      return wormleaf_kv_at_is(leaf, i);
  1446    else if (i > is1)
  1447      return wormleaf_kv_at_is(leaf, i-1);
  1448    else // i == is1
  1449      return k1;
  1450  }
  1451  
  1452  // fast point-lookup
  1453  // returns WH_KPN if not found
  1454    static u32
  1455  wormleaf_match_hs(const struct wormleaf * const leaf, const struct kref * const key)
  1456  {
  1457    const u16 pkey = wormhole_pkey(key->hash32);
  1458    const u32 i0 = pkey / WH_HDIV;
  1459    const struct entry13 * const hs = leaf->hs;
  1460  
  1461    if (hs[i0].e1 == pkey) {
  1462      struct kv * const curr = u64_to_ptr(hs[i0].e3);
  1463      if (likely(wormhole_kref_kv_match(key, curr)))
  1464        return i0;
  1465    }
  1466    if (hs[i0].e1 == 0)
  1467      return WH_KPN;
  1468  
  1469    // search left
  1470    u32 i = i0 - 1;
  1471    while (i < WH_KPN) {
  1472      if (hs[i].e1 == pkey) {
  1473        struct kv * const curr = u64_to_ptr(hs[i].e3);
  1474        if (likely(wormhole_kref_kv_match(key, curr)))
  1475          return i;
  1476      } else if (hs[i].e1 < pkey) {
  1477        break;
  1478      }
  1479      i--;
  1480    }
  1481  
  1482    // search right
  1483    i = i0 + 1;
  1484    while (i < WH_KPN) {
  1485      if (hs[i].e1 == pkey) {
  1486        struct kv * const curr = u64_to_ptr(hs[i].e3);
  1487        if (likely(wormhole_kref_kv_match(key, curr)))
  1488          return i;
  1489      } else if ((hs[i].e1 > pkey) || (hs[i].e1 == 0)) {
  1490        break;
  1491      }
  1492      i++;
  1493    }
  1494  
  1495    // not found
  1496    return WH_KPN;
  1497  }
  1498  
  1499  // search for an existing entry in hs
  1500    static u32
  1501  wormleaf_search_ih(const struct wormleaf * const leaf, const struct entry13 e)
  1502  {
  1503    const u16 pkey = e.e1;
  1504    const u32 i0 = pkey / WH_HDIV;
  1505    const struct entry13 * const hs = leaf->hs;
  1506    const struct entry13 e0 = hs[i0];
  1507  
  1508    if (e0.v64 == e.v64)
  1509      return i0;
  1510  
  1511    if (e0.e1 == 0)
  1512      return WH_KPN;
  1513  
  1514    // search left
  1515    u32 i = i0 - 1;
  1516    while (i < WH_KPN) {
  1517      const struct entry13 ei = hs[i];
  1518      if (ei.v64 == e.v64) {
  1519        return i;
  1520      } else if (ei.e1 < pkey) {
  1521        break;
  1522      }
  1523      i--;
  1524    }
  1525  
  1526    // search right
  1527    i = i0 + 1;
  1528    while (i < WH_KPN) {
  1529      const struct entry13 ei = hs[i];
  1530      if (ei.v64 == e.v64) {
  1531        return i;
  1532      } else if ((ei.e1 > pkey) || (ei.e1 == 0)) {
  1533        break;
  1534      }
  1535      i++;
  1536    }
  1537  
  1538    // not found
  1539    return WH_KPN;
  1540  }
  1541  
  1542  // search for an existing entry in ss
  1543    static u32
  1544  wormleaf_search_is(const struct wormleaf * const leaf, const u8 ih)
  1545  {
  1546  #if defined(__x86_64__)
  1547    // TODO: avx512
  1548  #if defined(__AVX2__)
  1549    const m256 i1 = _mm256_set1_epi8((char)ih);
  1550    for (u32 i = 0; i < leaf->nr_keys; i += sizeof(m256)) {
  1551      const m256 sv = _mm256_load_si256((m256 *)(leaf->ss+i));
  1552      const u32 mask = (u32)_mm256_movemask_epi8(_mm256_cmpeq_epi8(sv, i1));
  1553      if (mask)
  1554        return i + (u32)__builtin_ctz(mask);
  1555    }
  1556  #else // SSE4.2
  1557    const m128 i1 = _mm_set1_epi8((char)ih);
  1558    for (u32 i = 0; i < leaf->nr_keys; i += sizeof(m128)) {
  1559      const m128 sv = _mm_load_si128((m128 *)(leaf->ss+i));
  1560      const u32 mask = (u32)_mm_movemask_epi8(_mm_cmpeq_epi8(sv, i1));
  1561      if (mask)
  1562        return i + (u32)__builtin_ctz(mask);
  1563    }
  1564  #endif // __AVX2__
  1565  #elif defined(__aarch64__)
  1566    static const m128 vtbl = {0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15};
  1567    static const uint16x8_t mbits = {0x0101, 0x0202, 0x0404, 0x0808, 0x1010, 0x2020, 0x4040, 0x8080};
  1568    const m128 i1 = vdupq_n_u8(ih);
  1569    for (u32 i = 0; i < leaf->nr_keys; i += sizeof(m128)) {
  1570      const m128 cmp = vceqq_u8(vld1q_u8(leaf->ss+i), i1); // cmpeq => 0xff or 0x00
  1571      const m128 cmp1 = vqtbl1q_u8(cmp, vtbl); // reorder
  1572      const u32 mask = (u32)vaddvq_u16(vandq_u8(vreinterpretq_u16_u8(cmp1), mbits));
  1573      if (mask)
  1574        return i + (u32)__builtin_ctz(mask);
  1575    }
  1576  #endif // __x86_64__
  1577    debug_die();
  1578  }
  1579  
  1580  // assumes there in no duplicated keys
  1581  // search the first key that is >= the given key
  1582  // return 0 .. nr_sorted
  1583    static u32
  1584  wormleaf_search_ss(const struct wormleaf * const leaf, const struct kref * const key)
  1585  {
  1586    u32 lo = 0;
  1587    u32 hi = leaf->nr_sorted;
  1588    while ((lo + 2) < hi) {
  1589      const u32 i = (lo + hi) >> 1;
  1590      const struct kv * const curr = wormleaf_kv_at_is(leaf, i);
  1591      cpu_prefetch0(curr);
  1592      cpu_prefetch0(leaf->hs + leaf->ss[(lo + i) >> 1]);
  1593      cpu_prefetch0(leaf->hs + leaf->ss[(i + 1 + hi) >> 1]);
  1594      const int cmp = kref_kv_compare(key, curr);
  1595      debug_assert(cmp != 0);
  1596      if (cmp < 0)
  1597        hi = i;
  1598      else
  1599        lo = i + 1;
  1600    }
  1601  
  1602    while (lo < hi) {
  1603      const u32 i = (lo + hi) >> 1;
  1604      const struct kv * const curr = wormleaf_kv_at_is(leaf, i);
  1605      const int cmp = kref_kv_compare(key, curr);
  1606      debug_assert(cmp != 0);
  1607      if (cmp < 0)
  1608        hi = i;
  1609      else
  1610        lo = i + 1;
  1611    }
  1612    return lo;
  1613  }
  1614  
  1615    static u32
  1616  wormleaf_seek(const struct wormleaf * const leaf, const struct kref * const key)
  1617  {
  1618    debug_assert(leaf->nr_sorted == leaf->nr_keys);
  1619    wormleaf_prefetch_ss(leaf); // effective for both hit and miss
  1620    const u32 ih = wormleaf_match_hs(leaf, key);
  1621    if (ih < WH_KPN) { // hit
  1622      return wormleaf_search_is(leaf, (u8)ih);
  1623    } else { // miss, binary search for gt
  1624      return wormleaf_search_ss(leaf, key);
  1625    }
  1626  }
  1627  
  1628  // same to search_sorted but the target is very likely beyond the end
  1629    static u32
  1630  wormleaf_seek_end(const struct wormleaf * const leaf, const struct kref * const key)
  1631  {
  1632    debug_assert(leaf->nr_keys == leaf->nr_sorted);
  1633    if (leaf->nr_sorted) {
  1634      const int cmp = kref_kv_compare(key, wormleaf_kv_at_is(leaf, leaf->nr_sorted-1));
  1635      if (cmp > 0)
  1636        return leaf->nr_sorted;
  1637      else if (cmp == 0)
  1638        return leaf->nr_sorted - 1;
  1639      else
  1640        return wormleaf_seek(leaf, key);
  1641    } else {
  1642      return 0;
  1643    }
  1644  }
  1645  // }}} leaf-read
  1646  
  1647  // leaf-write {{{
  1648    static void
  1649  wormleaf_sort_m2(struct wormleaf * const leaf, const u32 n1, const u32 n2)
  1650  {
  1651    if (n1 == 0 || n2 == 0)
  1652      return; // no need to sort
  1653  
  1654    u8 * const ss = leaf->ss;
  1655    u8 et[WH_KPN/2]; // min(n1,n2) < KPN/2
  1656    if (n1 <= n2) { // merge left
  1657      memcpy(et, &(ss[0]), sizeof(ss[0]) * n1);
  1658      u8 * eo = ss;
  1659      u8 * e1 = et; // size == n1
  1660      u8 * e2 = &(ss[n1]); // size == n2
  1661      const u8 * const z1 = e1 + n1;
  1662      const u8 * const z2 = e2 + n2;
  1663      while ((e1 < z1) && (e2 < z2)) {
  1664        const int cmp = kv_compare(wormleaf_kv_at_ih(leaf, *e1), wormleaf_kv_at_ih(leaf, *e2));
  1665        if (cmp < 0)
  1666          *(eo++) = *(e1++);
  1667        else if (cmp > 0)
  1668          *(eo++) = *(e2++);
  1669        else
  1670          debug_die();
  1671  
  1672        if (eo == e2)
  1673          break; // finish early
  1674      }
  1675      if (eo < e2)
  1676        memcpy(eo, e1, sizeof(*eo) * (size_t)(e2 - eo));
  1677    } else {
  1678      memcpy(et, &(ss[n1]), sizeof(ss[0]) * n2);
  1679      u8 * eo = &(ss[n1 + n2 - 1]); // merge backwards
  1680      u8 * e1 = &(ss[n1 - 1]); // size == n1
  1681      u8 * e2 = &(et[n2 - 1]); // size == n2
  1682      const u8 * const z1 = e1 - n1;
  1683      const u8 * const z2 = e2 - n2;
  1684      while ((e1 > z1) && (e2 > z2)) {
  1685        const int cmp = kv_compare(wormleaf_kv_at_ih(leaf, *e1), wormleaf_kv_at_ih(leaf, *e2));
  1686        if (cmp < 0)
  1687          *(eo--) = *(e2--);
  1688        else if (cmp > 0)
  1689          *(eo--) = *(e1--);
  1690        else
  1691          debug_die();
  1692  
  1693        if (eo == e1)
  1694          break;
  1695      }
  1696      if (eo > e1)
  1697        memcpy(e1 + 1, et, sizeof(*eo) * (size_t)(eo - e1));
  1698    }
  1699  }
  1700  
  1701  #if defined(__linux__)
  1702    static int
  1703  wormleaf_ss_cmp(const void * const p1, const void * const p2, void * priv)
  1704  {
  1705    const struct kv * const k1 = wormleaf_kv_at_ih(priv, *(const u8 *)p1);
  1706    const struct kv * const k2 = wormleaf_kv_at_ih(priv, *(const u8 *)p2);
  1707    return kv_compare(k1, k2);
  1708  }
  1709  #else // (FreeBSD and APPLE only)
  1710    static int
  1711  wormleaf_ss_cmp(void * priv, const void * const p1, const void * const p2)
  1712  {
  1713    const struct kv * const k1 = wormleaf_kv_at_ih(priv, *(const u8 *)p1);
  1714    const struct kv * const k2 = wormleaf_kv_at_ih(priv, *(const u8 *)p2);
  1715    return kv_compare(k1, k2);
  1716  }
  1717  #endif // __linux__
  1718  
  1719    static inline void
  1720  wormleaf_sort_range(struct wormleaf * const leaf, const u32 i0, const u32 nr)
  1721  {
  1722  #if defined(__linux__)
  1723    qsort_r(&(leaf->ss[i0]), nr, sizeof(leaf->ss[0]), wormleaf_ss_cmp, leaf);
  1724  #else // (FreeBSD and APPLE only)
  1725    qsort_r(&(leaf->ss[i0]), nr, sizeof(leaf->ss[0]), leaf, wormleaf_ss_cmp);
  1726  #endif // __linux__
  1727  }
  1728  
  1729  // make sure all keys are sorted in a leaf node
  1730    static void
  1731  wormleaf_sync_sorted(struct wormleaf * const leaf)
  1732  {
  1733    const u32 s = leaf->nr_sorted;
  1734    const u32 n = leaf->nr_keys;
  1735    if (s == n)
  1736      return;
  1737  
  1738    wormleaf_sort_range(leaf, s, n - s);
  1739    // merge-sort inplace
  1740    wormleaf_sort_m2(leaf, s, n - s);
  1741    leaf->nr_sorted = n;
  1742  }
  1743  
  1744  // shift a sequence of entries on hs and update the corresponding ss values
  1745    static void
  1746  wormleaf_shift_inc(struct wormleaf * const leaf, const u32 to, const u32 from, const u32 nr)
  1747  {
  1748    debug_assert(to == (from+1));
  1749    struct entry13 * const hs = leaf->hs;
  1750    memmove(&(hs[to]), &(hs[from]), sizeof(hs[0]) * nr);
  1751  
  1752  #if defined(__x86_64__)
  1753    // TODO: avx512
  1754  #if defined(__AVX2__)
  1755    const m256 ones = _mm256_set1_epi8(1);
  1756    const m256 addx = _mm256_set1_epi8((char)(u8)(INT8_MAX + 1 - from - nr));
  1757    const m256 cmpx = _mm256_set1_epi8((char)(u8)(INT8_MAX - nr));
  1758    for (u32 i = 0; i < leaf->nr_keys; i += sizeof(m256)) {
  1759      const m256 sv = _mm256_load_si256((m256 *)(leaf->ss+i));
  1760      const m256 add1 = _mm256_and_si256(_mm256_cmpgt_epi8(_mm256_add_epi8(sv, addx), cmpx), ones);
  1761      _mm256_store_si256((m256 *)(leaf->ss+i), _mm256_add_epi8(sv, add1));
  1762    }
  1763  #else // SSE4.2
  1764    const m128 ones = _mm_set1_epi8(1);
  1765    const m128 addx = _mm_set1_epi8((char)(u8)(INT8_MAX + 1 - from - nr));
  1766    const m128 cmpx = _mm_set1_epi8((char)(u8)(INT8_MAX - nr));
  1767    for (u32 i = 0; i < leaf->nr_keys; i += sizeof(m128)) {
  1768      const m128 sv = _mm_load_si128((m128 *)(leaf->ss+i));
  1769      const m128 add1 = _mm_and_si128(_mm_cmpgt_epi8(_mm_add_epi8(sv, addx), cmpx), ones);
  1770      _mm_store_si128((m128 *)(leaf->ss+i), _mm_add_epi8(sv, add1));
  1771    }
  1772  #endif // __AVX2__
  1773  #elif defined(__aarch64__) // __x86_64__
  1774    // aarch64
  1775    const m128 subx = vdupq_n_u8((u8)from);
  1776    const m128 cmpx = vdupq_n_u8((u8)nr);
  1777    for (u32 i = 0; i < leaf->nr_keys; i += sizeof(m128)) {
  1778      const m128 sv = vld1q_u8(leaf->ss+i);
  1779      const m128 add1 = vshrq_n_u8(vcltq_u8(vsubq_u8(sv, subx), cmpx), 7);
  1780      vst1q_u8(leaf->ss+i, vaddq_u8(sv, add1));
  1781    }
  1782  #endif // __x86_64__
  1783  }
  1784  
  1785    static void
  1786  wormleaf_shift_dec(struct wormleaf * const leaf, const u32 to, const u32 from, const u32 nr)
  1787  {
  1788    debug_assert(to == (from-1));
  1789    struct entry13 * const hs = leaf->hs;
  1790    memmove(&(hs[to]), &(hs[from]), sizeof(hs[0]) * nr);
  1791  
  1792  #if defined(__x86_64__)
  1793    // TODO: avx512
  1794  #if defined(__AVX2__)
  1795    const m256 ones = _mm256_set1_epi8(1);
  1796    const m256 addx = _mm256_set1_epi8((char)(u8)(INT8_MAX + 1 - from - nr));
  1797    const m256 cmpx = _mm256_set1_epi8((char)(u8)(INT8_MAX - nr));
  1798    for (u32 i = 0; i < leaf->nr_keys; i += sizeof(m256)) {
  1799      const m256 sv = _mm256_load_si256((m256 *)(leaf->ss+i));
  1800      const m256 add1 = _mm256_and_si256(_mm256_cmpgt_epi8(_mm256_add_epi8(sv, addx), cmpx), ones);
  1801      _mm256_store_si256((m256 *)(leaf->ss+i), _mm256_sub_epi8(sv, add1));
  1802    }
  1803  #else // SSE4.2
  1804    const m128 ones = _mm_set1_epi8(1);
  1805    const m128 addx = _mm_set1_epi8((char)(u8)(INT8_MAX + 1 - from - nr));
  1806    const m128 cmpx = _mm_set1_epi8((char)(u8)(INT8_MAX - nr));
  1807    for (u32 i = 0; i < leaf->nr_keys; i += 16) {
  1808      const m128 sv = _mm_load_si128((m128 *)(leaf->ss+i));
  1809      const m128 add1 = _mm_and_si128(_mm_cmpgt_epi8(_mm_add_epi8(sv, addx), cmpx), ones);
  1810      _mm_store_si128((m128 *)(leaf->ss+i), _mm_sub_epi8(sv, add1));
  1811    }
  1812  #endif // __AVX2__
  1813  #elif defined(__aarch64__) // __x86_64__
  1814    // aarch64
  1815    const m128 subx = vdupq_n_u8((u8)from);
  1816    const m128 cmpx = vdupq_n_u8((u8)nr);
  1817    for (u32 i = 0; i < leaf->nr_keys; i += sizeof(m128)) {
  1818      const m128 sv = vld1q_u8(leaf->ss+i);
  1819      const m128 add1 = vshrq_n_u8(vcltq_u8(vsubq_u8(sv, subx), cmpx), 7);
  1820      vst1q_u8(leaf->ss+i, vsubq_u8(sv, add1));
  1821    }
  1822  #endif // __x86_64__
  1823  }
  1824  
  1825  // insert hs and also shift ss
  1826    static u32
  1827  wormleaf_insert_hs(struct wormleaf * const leaf, const struct entry13 e)
  1828  {
  1829    struct entry13 * const hs = leaf->hs;
  1830    const u16 pkey = e.e1;
  1831    const u32 i0 = pkey / WH_HDIV;
  1832    if (hs[i0].e1 == 0) { // insert
  1833      hs[i0] = e;
  1834      return i0;
  1835    }
  1836  
  1837    // find left-most insertion point
  1838    u32 i = i0;
  1839    while (i && hs[i-1].e1 && (hs[i-1].e1 >= pkey))
  1840      i--;
  1841    while ((i < WH_KPN) && hs[i].e1 && (hs[i].e1 < pkey)) // stop at >= or empty
  1842      i++;
  1843    const u32 il = --i; // i in [0, KPN]
  1844  
  1845    // find left empty slot
  1846    if (i > (i0 - 1))
  1847      i = i0 - 1;
  1848    while ((i < WH_KPN) && hs[i].e1)
  1849      i--;
  1850    const u32 el = i; // el < i0 or el is invalid (>= KPN)
  1851  
  1852    // find right-most insertion point.
  1853    i = il + 1;
  1854    while ((i < WH_KPN) && hs[i].e1 && (hs[i].e1 == pkey))
  1855      i++;
  1856    const u32 ir = i; // ir >= il, in [0, KPN]
  1857  
  1858    // find right empty slot
  1859    if (i < (i0 + 1))
  1860      i = i0 + 1;
  1861    while ((i < WH_KPN) && hs[i].e1)
  1862      i++;
  1863    const u32 er = i; // er > i0 or el is invalid (>= KPN)
  1864  
  1865    // el <= il < ir <= er    (if < WH_KPN)
  1866    const u32 dl = (el < WH_KPN) ? (il - el) : WH_KPN;
  1867    const u32 dr = (er < WH_KPN) ? (er - ir) : WH_KPN;
  1868    if (dl <= dr) { // push left
  1869      debug_assert(dl < WH_KPN);
  1870      if (dl)
  1871        wormleaf_shift_dec(leaf, el, el+1, dl);
  1872      hs[il] = e;
  1873      return il;
  1874    } else {
  1875      debug_assert(dr < WH_KPN);
  1876      if (dr)
  1877        wormleaf_shift_inc(leaf, ir+1, ir, dr);
  1878      hs[ir] = e;
  1879      return ir;
  1880    }
  1881  }
  1882  
  1883    static void
  1884  wormleaf_insert_e13(struct wormleaf * const leaf, const struct entry13 e)
  1885  {
  1886    // insert to hs and fix all existing is
  1887    const u32 ih = wormleaf_insert_hs(leaf, e);
  1888    debug_assert(ih < WH_KPN);
  1889    // append the new is
  1890    leaf->ss[leaf->nr_keys] = (u8)ih;
  1891    // fix nr
  1892    leaf->nr_keys++;
  1893  }
  1894  
  1895    static void
  1896  wormleaf_insert(struct wormleaf * const leaf, const struct kv * const new)
  1897  {
  1898    debug_assert(new->hash == kv_crc32c_extend(kv_crc32c(new->kv, new->klen)));
  1899    debug_assert(leaf->nr_keys < WH_KPN);
  1900  
  1901    // insert
  1902    const struct entry13 e = entry13(wormhole_pkey(new->hashlo), ptr_to_u64(new));
  1903    const u32 nr0 = leaf->nr_keys;
  1904    wormleaf_insert_e13(leaf, e);
  1905  
  1906    // optimize for seq insertion
  1907    if (nr0 == leaf->nr_sorted) {
  1908      if (nr0) {
  1909        const struct kv * const kvn = wormleaf_kv_at_is(leaf, nr0 - 1);
  1910        if (kv_compare(new, kvn) > 0)
  1911          leaf->nr_sorted = nr0 + 1;
  1912      } else {
  1913        leaf->nr_sorted = 1;
  1914      }
  1915    }
  1916  }
  1917  
  1918    static void
  1919  wormleaf_pull_ih(struct wormleaf * const leaf, const u32 ih)
  1920  {
  1921    struct entry13 * const hs = leaf->hs;
  1922    // try left
  1923    u32 i = ih - 1;
  1924    while ((i < WH_KPN) && hs[i].e1 && ((hs[i].e1 / WH_HDIV) > i))
  1925      i--;
  1926  
  1927    if ((++i) < ih) {
  1928      wormleaf_shift_inc(leaf, i+1, i, ih - i);
  1929      leaf->hs[i].v64 = 0;
  1930      return;
  1931    }
  1932  
  1933    // try right
  1934    i = ih + 1;
  1935    while ((i < WH_KPN) && hs[i].e1 && ((hs[i].e1 / WH_HDIV) < i))
  1936      i++;
  1937  
  1938    if ((--i) > ih) {
  1939      wormleaf_shift_dec(leaf, ih, ih+1, i - ih);
  1940      hs[i].v64 = 0;
  1941    }
  1942    // hs[ih] may still be 0
  1943  }
  1944  
  1945  // internal only
  1946    static struct kv *
  1947  wormleaf_remove(struct wormleaf * const leaf, const u32 ih, const u32 is)
  1948  {
  1949    // ss
  1950    leaf->ss[is] = leaf->ss[leaf->nr_keys - 1];
  1951    if (leaf->nr_sorted > is)
  1952      leaf->nr_sorted = is;
  1953  
  1954    // ret
  1955    struct kv * const victim = wormleaf_kv_at_ih(leaf, ih);
  1956    // hs
  1957    leaf->hs[ih].v64 = 0;
  1958    leaf->nr_keys--;
  1959    // use magnet
  1960    wormleaf_pull_ih(leaf, ih);
  1961    return victim;
  1962  }
  1963  
  1964  // remove key from leaf but do not call free
  1965    static struct kv *
  1966  wormleaf_remove_ih(struct wormleaf * const leaf, const u32 ih)
  1967  {
  1968    // remove from ss
  1969    const u32 is = wormleaf_search_is(leaf, (u8)ih);
  1970    debug_assert(is < leaf->nr_keys);
  1971    return wormleaf_remove(leaf, ih, is);
  1972  }
  1973  
  1974    static struct kv *
  1975  wormleaf_remove_is(struct wormleaf * const leaf, const u32 is)
  1976  {
  1977    return wormleaf_remove(leaf, leaf->ss[is], is);
  1978  }
  1979  
  1980  // for delr (delete-range)
  1981    static void
  1982  wormleaf_delete_range(struct wormhole * const map, struct wormleaf * const leaf,
  1983      const u32 i0, const u32 end)
  1984  {
  1985    debug_assert(leaf->nr_keys == leaf->nr_sorted);
  1986    for (u32 i = end; i > i0; i--) {
  1987      const u32 ir = i - 1;
  1988      struct kv * const victim = wormleaf_remove_is(leaf, ir);
  1989      map->mm.free(victim, map->mm.priv);
  1990    }
  1991  }
  1992  
  1993  // return the old kv; the caller should free the old kv
  1994    static struct kv *
  1995  wormleaf_update(struct wormleaf * const leaf, const u32 ih, const struct kv * const new)
  1996  {
  1997    debug_assert(new->hash == kv_crc32c_extend(kv_crc32c(new->kv, new->klen)));
  1998    // search entry in ss (is)
  1999    struct kv * const old = wormleaf_kv_at_ih(leaf, ih);
  2000    debug_assert(old);
  2001  
  2002    entry13_update_e3(&leaf->hs[ih], (u64)new);
  2003    return old;
  2004  }
  2005  // }}} leaf-write
  2006  
  2007  // leaf-split {{{
  2008  // It only works correctly in cut_search
  2009  // quickly tell if a cut between k1 and k2 can achieve a specific anchor-key length
  2010    static bool
  2011  wormhole_split_cut_alen_check(const u32 alen, const struct kv * const k1, const struct kv * const k2)
  2012  {
  2013    debug_assert(k2->klen >= alen);
  2014    return (k1->klen < alen) || (k1->kv[alen - 1] != k2->kv[alen - 1]);
  2015  }
  2016  
  2017  // return the number of keys that should go to leaf1
  2018  // assert(r > 0 && r <= nr_keys)
  2019  // (1) r < is1, anchor key is ss[r-1]:ss[r]
  2020  // (2) r == is1: anchor key is ss[r-1]:new
  2021  // (3) r == is1+1: anchor key is new:ss[r-1] (ss[r-1] is the ss[r] on the logically sorted array)
  2022  // (4) r > is1+1: anchor key is ss[r-2]:ss[r-1] (ss[r-2] is the [r-1] on the logically sorted array)
  2023  // edge cases:
  2024  //   (case 2) is1 == nr_keys: r = nr_keys; ss[r-1]:new
  2025  //   (case 3) is1 == 0, r == 1; new:ss[0]
  2026  // return 1..WH_KPN
  2027    static u32
  2028  wormhole_split_cut_search1(struct wormleaf * const leaf, u32 l, u32 h, const u32 is1, const struct kv * const new)
  2029  {
  2030    debug_assert(leaf->nr_keys == leaf->nr_sorted);
  2031    debug_assert(leaf->nr_keys);
  2032    debug_assert(l < h && h <= leaf->nr_sorted);
  2033  
  2034    const struct kv * const kl0 = wormleaf_kv_at_is1(leaf, l, is1, new);
  2035    const struct kv * const kh0 = wormleaf_kv_at_is1(leaf, h, is1, new);
  2036    const u32 alen = kv_key_lcp(kl0, kh0) + 1;
  2037    if (unlikely(alen > UINT16_MAX))
  2038      return WH_KPN2;
  2039  
  2040    const u32 target = leaf->next ? WH_MID : WH_KPN_MRG;
  2041    while ((l + 1) < h) {
  2042      const u32 m = (l + h + 1) >> 1;
  2043      if (m <= target) { // try right
  2044        const struct kv * const k1 = wormleaf_kv_at_is1(leaf, m, is1, new);
  2045        const struct kv * const k2 = wormleaf_kv_at_is1(leaf, h, is1, new);
  2046        if (wormhole_split_cut_alen_check(alen, k1, k2))
  2047          l = m;
  2048        else
  2049          h = m;
  2050      } else { // try left
  2051        const struct kv * const k1 = wormleaf_kv_at_is1(leaf, l, is1, new);
  2052        const struct kv * const k2 = wormleaf_kv_at_is1(leaf, m, is1, new);
  2053        if (wormhole_split_cut_alen_check(alen, k1, k2))
  2054          h = m;
  2055        else
  2056          l = m;
  2057      }
  2058    }
  2059    return h;
  2060  }
  2061  
  2062    static void
  2063  wormhole_split_leaf_move1(struct wormleaf * const leaf1, struct wormleaf * const leaf2,
  2064      const u32 cut, const u32 is1, const struct kv * const new)
  2065  {
  2066    const u32 nr_keys = leaf1->nr_keys;
  2067    const struct entry13 e1 = entry13(wormhole_pkey(new->hashlo), ptr_to_u64(new));
  2068    struct entry13 es[WH_KPN];
  2069  
  2070    if (cut <= is1) { // e1 goes to leaf2
  2071      // leaf2
  2072      for (u32 i = cut; i < is1; i++)
  2073        wormleaf_insert_e13(leaf2, leaf1->hs[leaf1->ss[i]]);
  2074  
  2075      wormleaf_insert_e13(leaf2, e1);
  2076  
  2077      for (u32 i = is1; i < nr_keys; i++)
  2078        wormleaf_insert_e13(leaf2, leaf1->hs[leaf1->ss[i]]);
  2079  
  2080      // leaf1
  2081      for (u32 i = 0; i < cut; i++)
  2082        es[i] = leaf1->hs[leaf1->ss[i]];
  2083  
  2084    } else { // e1 goes to leaf1
  2085      // leaf2
  2086      for (u32 i = cut - 1; i < nr_keys; i++)
  2087        wormleaf_insert_e13(leaf2, leaf1->hs[leaf1->ss[i]]);
  2088  
  2089      // leaf1
  2090      for (u32 i = 0; i < is1; i++)
  2091        es[i] = leaf1->hs[leaf1->ss[i]];
  2092  
  2093      es[is1] = e1;
  2094  
  2095      for (u32 i = is1 + 1; i < cut; i++)
  2096        es[i] = leaf1->hs[leaf1->ss[i - 1]];
  2097    }
  2098  
  2099    leaf2->nr_sorted = leaf2->nr_keys;
  2100  
  2101    memset(leaf1->hs, 0, sizeof(leaf1->hs[0]) * WH_KPN);
  2102    leaf1->nr_keys = 0;
  2103    for (u32 i = 0; i < cut; i++)
  2104      wormleaf_insert_e13(leaf1, es[i]);
  2105    leaf1->nr_sorted = cut;
  2106    debug_assert((leaf1->nr_sorted + leaf2->nr_sorted) == (nr_keys + 1));
  2107  }
  2108  
  2109  // create an anchor for leaf-split
  2110    static struct kv *
  2111  wormhole_split_alloc_anchor(const struct kv * const key1, const struct kv * const key2)
  2112  {
  2113    const u32 alen = kv_key_lcp(key1, key2) + 1;
  2114    debug_assert(alen <= key2->klen);
  2115  
  2116    struct kv * const anchor = wormhole_alloc_akey(alen);
  2117    if (anchor)
  2118      kv_refill(anchor, key2->kv, alen, NULL, 0);
  2119    return anchor;
  2120  }
  2121  
  2122  // leaf1 is locked
  2123  // split leaf1 into leaf1+leaf2; insert new into leaf1 or leaf2, return leaf2
  2124    static struct wormleaf *
  2125  wormhole_split_leaf(struct wormhole * const map, struct wormleaf * const leaf1, struct kv * const new)
  2126  {
  2127    wormleaf_sync_sorted(leaf1);
  2128    struct kref kref_new;
  2129    kref_ref_kv(&kref_new, new);
  2130    const u32 is1 = wormleaf_search_ss(leaf1, &kref_new); // new should be inserted at [is1]
  2131    const u32 cut = wormhole_split_cut_search1(leaf1, 0, leaf1->nr_keys, is1, new);
  2132    if (unlikely(cut == WH_KPN2))
  2133      return NULL;
  2134  
  2135    // anchor of leaf2
  2136    debug_assert(cut && (cut <= leaf1->nr_keys));
  2137    const struct kv * const key1 = wormleaf_kv_at_is1(leaf1, cut - 1, is1, new);
  2138    const struct kv * const key2 = wormleaf_kv_at_is1(leaf1, cut, is1, new);
  2139    struct kv * const anchor2 = wormhole_split_alloc_anchor(key1, key2);
  2140    if (unlikely(anchor2 == NULL)) // anchor alloc failed
  2141      return NULL;
  2142  
  2143    // create leaf2 with anchor2
  2144    struct wormleaf * const leaf2 = wormleaf_alloc(map, leaf1, leaf1->next, anchor2);
  2145    if (unlikely(leaf2 == NULL)) {
  2146      wormhole_free_akey(anchor2);
  2147      return NULL;
  2148    }
  2149  
  2150    // split_hmap will unlock the leaf nodes; must move now
  2151    wormhole_split_leaf_move1(leaf1, leaf2, cut, is1, new);
  2152    // leaf1 and leaf2 should be sorted after split
  2153    debug_assert(leaf1->nr_keys == leaf1->nr_sorted);
  2154    debug_assert(leaf2->nr_keys == leaf2->nr_sorted);
  2155  
  2156    return leaf2;
  2157  }
  2158  // }}} leaf-split
  2159  
  2160  // leaf-merge {{{
  2161  // MERGE is the only operation that deletes a leaf node (leaf2).
  2162  // It ALWAYS merges the right node into the left node even if the left is empty.
  2163  // This requires both of their writer locks to be acquired.
  2164  // This allows iterators to safely probe the next node (but not backwards).
  2165  // In other words, if either the reader or the writer lock of node X has been acquired:
  2166  // X->next (the pointer) cannot be changed by any other thread.
  2167  // X->next cannot be deleted.
  2168  // But the content in X->next can still be changed.
  2169    static bool
  2170  wormleaf_merge(struct wormleaf * const leaf1, struct wormleaf * const leaf2)
  2171  {
  2172    debug_assert((leaf1->nr_keys + leaf2->nr_keys) <= WH_KPN);
  2173    const bool leaf1_sorted = leaf1->nr_keys == leaf1->nr_sorted;
  2174  
  2175    for (u32 i = 0; i < leaf2->nr_keys; i++)
  2176      wormleaf_insert_e13(leaf1, leaf2->hs[leaf2->ss[i]]);
  2177    if (leaf1_sorted)
  2178      leaf1->nr_sorted += leaf2->nr_sorted;
  2179    return true;
  2180  }
  2181  
  2182  // for undoing insertion under split_meta failure; leaf2 is still local
  2183  // remove the new key; merge keys in leaf2 into leaf1; free leaf2
  2184    static void
  2185  wormleaf_split_undo(struct wormhole * const map, struct wormleaf * const leaf1,
  2186      struct wormleaf * const leaf2, struct kv * const new)
  2187  {
  2188    if (new) {
  2189      const struct entry13 e = entry13(wormhole_pkey(new->hashlo), ptr_to_u64(new));
  2190      const u32 im1 = wormleaf_search_ih(leaf1, e);
  2191      if (im1 < WH_KPN) {
  2192        (void)wormleaf_remove_ih(leaf1, im1);
  2193      } else { // not found in leaf1; search leaf2
  2194        const u32 im2 = wormleaf_search_ih(leaf2, e);
  2195        debug_assert(im2 < WH_KPN);
  2196        (void)wormleaf_remove_ih(leaf2, im2);
  2197      }
  2198    }
  2199    // this merge must succeed
  2200    if (!wormleaf_merge(leaf1, leaf2))
  2201      debug_die();
  2202    // Keep this to avoid triggering false alarm in wormleaf_free
  2203    leaf2->leaflock.opaque = 0;
  2204    wormleaf_free(map->slab_leaf, leaf2);
  2205  }
  2206  // }}} leaf-mergeqsbr_update
  2207  
  2208  // get/probe {{{
  2209    struct kv *
  2210  wormhole_get(struct wormref * const ref, const struct kref * const key, struct kv * const out)
  2211  {
  2212    struct wormleaf * const leaf = wormhole_jump_leaf_read(ref, key);
  2213    const u32 i = wormleaf_match_hs(leaf, key);
  2214    struct kv * const tmp = (i < WH_KPN) ? ref->map->mm.out(wormleaf_kv_at_ih(leaf, i), out) : NULL;
  2215    wormleaf_unlock_read(leaf);
  2216    return tmp;
  2217  }
  2218  
  2219    struct kv *
  2220  whsafe_get(struct wormref * const ref, const struct kref * const key, struct kv * const out)
  2221  {
  2222    wormhole_resume(ref);
  2223    struct kv * const ret = wormhole_get(ref, key, out);
  2224    wormhole_park(ref);
  2225    return ret;
  2226  }
  2227  
  2228    struct kv *
  2229  whunsafe_get(struct wormhole * const map, const struct kref * const key, struct kv * const out)
  2230  {
  2231    struct wormleaf * const leaf = wormhole_jump_leaf(map->hmap, key);
  2232    const u32 i = wormleaf_match_hs(leaf, key);
  2233    return (i < WH_KPN) ? map->mm.out(wormleaf_kv_at_ih(leaf, i), out) : NULL;
  2234  }
  2235  
  2236    bool
  2237  wormhole_probe(struct wormref * const ref, const struct kref * const key)
  2238  {
  2239    struct wormleaf * const leaf = wormhole_jump_leaf_read(ref, key);
  2240    const u32 i = wormleaf_match_hs(leaf, key);
  2241    wormleaf_unlock_read(leaf);
  2242    return i < WH_KPN;
  2243  }
  2244  
  2245    bool
  2246  whsafe_probe(struct wormref * const ref, const struct kref * const key)
  2247  {
  2248    wormhole_resume(ref);
  2249    const bool r = wormhole_probe(ref, key);
  2250    wormhole_park(ref);
  2251    return r;
  2252  }
  2253  
  2254    bool
  2255  whunsafe_probe(struct wormhole * const map, const struct kref * const key)
  2256  {
  2257    struct wormleaf * const leaf = wormhole_jump_leaf(map->hmap, key);
  2258    return wormleaf_match_hs(leaf, key) < WH_KPN;
  2259  }
  2260  // }}} get/probe
  2261  
  2262  // meta-split {{{
  2263  // duplicate from meta1; only has one bit but will soon add a new bit
  2264    static struct wormmeta *
  2265  wormmeta_expand(struct wormhmap * const hmap, struct wormmeta * const meta1)
  2266  {
  2267    struct wormmeta * const meta2 = slab_alloc_unsafe(hmap->slab2);
  2268    if (meta2 == NULL)
  2269      return NULL;
  2270  
  2271    memcpy(meta2, meta1, sizeof(*meta1));
  2272    for (u32 i = 0; i < WH_BMNR; i++)
  2273      meta2->bitmap[i] = 0;
  2274    const u32 bitmin = wormmeta_bitmin_load(meta1);
  2275    debug_assert(bitmin == wormmeta_bitmax_load(meta1));
  2276    debug_assert(bitmin < WH_FO);
  2277    // set the only bit
  2278    meta2->bitmap[bitmin >> 6u] |= (1lu << (bitmin & 0x3fu));
  2279  
  2280    wormhmap_replace(hmap, meta1, meta2);
  2281    slab_free_unsafe(hmap->slab1, meta1);
  2282    return meta2;
  2283  }
  2284  
  2285    static struct wormmeta *
  2286  wormmeta_bm_set_helper(struct wormhmap * const hmap, struct wormmeta * const meta, const u32 id)
  2287  {
  2288    debug_assert(id < WH_FO);
  2289    const u32 bitmin = wormmeta_bitmin_load(meta);
  2290    const u32 bitmax = wormmeta_bitmax_load(meta);
  2291    if (bitmin < bitmax) { // already in full size
  2292      wormmeta_bm_set(meta, id);
  2293      return meta;
  2294    } else if (id == bitmin) { // do nothing
  2295      return meta;
  2296    } else if (bitmin == WH_FO) { // add the first bit
  2297      wormmeta_bitmin_store(meta, id);
  2298      wormmeta_bitmax_store(meta, id);
  2299      return meta;
  2300    } else { // need to expand
  2301      struct wormmeta * const meta2 = wormmeta_expand(hmap, meta);
  2302      wormmeta_bm_set(meta2, id);
  2303      return meta2;
  2304    }
  2305  }
  2306  
  2307  // return true if a new node is created
  2308    static void
  2309  wormmeta_split_touch(struct wormhmap * const hmap, struct kv * const mkey,
  2310      struct wormleaf * const leaf, const u32 alen)
  2311  {
  2312    struct wormmeta * meta = wormhmap_get(hmap, mkey);
  2313    if (meta) {
  2314      if (mkey->klen < alen)
  2315        meta = wormmeta_bm_set_helper(hmap, meta, mkey->kv[mkey->klen]);
  2316      if (wormmeta_lmost_load(meta) == leaf->next)
  2317        wormmeta_lmost_store(meta, leaf);
  2318      else if (wormmeta_rmost_load(meta) == leaf->prev)
  2319        wormmeta_rmost_store(meta, leaf);
  2320    } else { // create new node
  2321      const u32 bit = (mkey->klen < alen) ? mkey->kv[mkey->klen] : WH_FO;
  2322      meta = wormmeta_alloc(hmap, leaf, mkey, alen, bit);
  2323      debug_assert(meta);
  2324      wormhmap_set(hmap, meta);
  2325    }
  2326  }
  2327  
  2328    static void
  2329  wormmeta_lpath_update(struct wormhmap * const hmap, const struct kv * const a1, const struct kv * const a2,
  2330      struct wormleaf * const lpath)
  2331  {
  2332    struct kv * const pbuf = hmap->pbuf;
  2333    kv_dup2_key(a2, pbuf);
  2334  
  2335    // only need to update a2's own branch
  2336    u32 i = kv_key_lcp(a1, a2) + 1;
  2337    debug_assert(i <= pbuf->klen);
  2338    wormhole_prefix(pbuf, i);
  2339    while (i < a2->klen) {
  2340      debug_assert(i <= hmap->maxplen);
  2341      struct wormmeta * const meta = wormhmap_get(hmap, pbuf);
  2342      debug_assert(meta);
  2343      wormmeta_lpath_store(meta, lpath);
  2344  
  2345      i++;
  2346      wormhole_prefix_inc1(pbuf);
  2347    }
  2348  }
  2349  
  2350  // for leaf1, a leaf2 is already linked at its right side.
  2351  // this function updates the meta-map by moving leaf1 and hooking leaf2 at correct positions
  2352    static void
  2353  wormmeta_split(struct wormhmap * const hmap, struct wormleaf * const leaf,
  2354      struct kv * const mkey)
  2355  {
  2356    // left branches
  2357    struct wormleaf * const prev = leaf->prev;
  2358    struct wormleaf * const next = leaf->next;
  2359    u32 i = next ? kv_key_lcp(prev->anchor, next->anchor) : 0;
  2360    const u32 alen = leaf->anchor->klen;
  2361  
  2362    // save klen
  2363    const u32 mklen = mkey->klen;
  2364    wormhole_prefix(mkey, i);
  2365    do {
  2366      wormmeta_split_touch(hmap, mkey, leaf, alen);
  2367      if (i >= alen)
  2368        break;
  2369      i++;
  2370      wormhole_prefix_inc1(mkey);
  2371    } while (true);
  2372  
  2373    // adjust maxplen; i is the plen of the last _touch()
  2374    if (i > hmap->maxplen)
  2375      hmap->maxplen = i;
  2376    debug_assert(i <= UINT16_MAX);
  2377  
  2378    // restore klen
  2379    mkey->klen = mklen;
  2380  
  2381    if (next)
  2382      wormmeta_lpath_update(hmap, leaf->anchor, next->anchor, leaf);
  2383  }
  2384  
  2385  // all locks will be released before returning
  2386    static bool
  2387  wormhole_split_meta(struct wormref * const ref, struct wormleaf * const leaf2)
  2388  {
  2389    struct kv * const mkey = wormhole_alloc_mkey(leaf2->anchor->klen);
  2390    if (unlikely(mkey == NULL))
  2391      return false;
  2392    kv_dup2_key(leaf2->anchor, mkey);
  2393  
  2394    struct wormhole * const map = ref->map;
  2395    // metalock
  2396    wormhmap_lock(map, ref);
  2397  
  2398    // check slab reserve
  2399    const bool sr = wormhole_slab_reserve(map, mkey->klen);
  2400    if (unlikely(!sr)) {
  2401      wormhmap_unlock(map);
  2402      wormhole_free_mkey(mkey);
  2403      return false;
  2404    }
  2405  
  2406    struct wormhmap * const hmap0 = wormhmap_load(map);
  2407    struct wormhmap * const hmap1 = wormhmap_switch(map, hmap0);
  2408  
  2409    // link
  2410    struct wormleaf * const leaf1 = leaf2->prev;
  2411    leaf1->next = leaf2;
  2412    if (leaf2->next)
  2413      leaf2->next->prev = leaf2;
  2414  
  2415    // update versions
  2416    const u64 v1 = wormhmap_version_load(hmap0) + 1;
  2417    wormleaf_version_store(leaf1, v1);
  2418    wormleaf_version_store(leaf2, v1);
  2419    wormhmap_version_store(hmap1, v1);
  2420  
  2421    wormmeta_split(hmap1, leaf2, mkey);
  2422  
  2423    qsbr_update(&ref->qref, v1);
  2424  
  2425    // switch hmap
  2426    wormhmap_store(map, hmap1);
  2427  
  2428    wormleaf_unlock_write(leaf1);
  2429    wormleaf_unlock_write(leaf2);
  2430  
  2431    qsbr_wait(map->qsbr, v1);
  2432  
  2433    wormmeta_split(hmap0, leaf2, mkey);
  2434  
  2435    wormhmap_unlock(map);
  2436  
  2437    if (mkey->refcnt == 0) // this is possible
  2438      wormhole_free_mkey(mkey);
  2439    return true;
  2440  }
  2441  
  2442  // all locks (metalock + leaflocks) will be released before returning
  2443  // leaf1->lock (write) is already taken
  2444    static bool
  2445  wormhole_split_insert(struct wormref * const ref, struct wormleaf * const leaf1,
  2446      struct kv * const new)
  2447  {
  2448    struct wormleaf * const leaf2 = wormhole_split_leaf(ref->map, leaf1, new);
  2449    if (unlikely(leaf2 == NULL)) {
  2450      wormleaf_unlock_write(leaf1);
  2451      return false;
  2452    }
  2453  
  2454    rwlock_lock_write(&(leaf2->leaflock));
  2455    const bool rsm = wormhole_split_meta(ref, leaf2);
  2456    if (unlikely(!rsm)) {
  2457      // undo insertion & merge; free leaf2
  2458      wormleaf_split_undo(ref->map, leaf1, leaf2, new);
  2459      wormleaf_unlock_write(leaf1);
  2460    }
  2461    return rsm;
  2462  }
  2463  
  2464    static bool
  2465  whunsafe_split_meta(struct wormhole * const map, struct wormleaf * const leaf2)
  2466  {
  2467    struct kv * const mkey = wormhole_alloc_mkey(leaf2->anchor->klen);
  2468    if (unlikely(mkey == NULL))
  2469      return false;
  2470    kv_dup2_key(leaf2->anchor, mkey);
  2471  
  2472    const bool sr = wormhole_slab_reserve(map, mkey->klen);
  2473    if (unlikely(!sr)) {
  2474      wormhmap_unlock(map);
  2475      wormhole_free_mkey(mkey);
  2476      return false;
  2477    }
  2478  
  2479    // link
  2480    leaf2->prev->next = leaf2;
  2481    if (leaf2->next)
  2482      leaf2->next->prev = leaf2;
  2483  
  2484    for (u32 i = 0; i < 2; i++)
  2485      if (map->hmap2[i].pmap)
  2486        wormmeta_split(&(map->hmap2[i]), leaf2, mkey);
  2487    if (mkey->refcnt == 0) // this is possible
  2488      wormhole_free_mkey(mkey);
  2489    return true;
  2490  }
  2491  
  2492    static bool
  2493  whunsafe_split_insert(struct wormhole * const map, struct wormleaf * const leaf1,
  2494      struct kv * const new)
  2495  {
  2496    struct wormleaf * const leaf2 = wormhole_split_leaf(map, leaf1, new);
  2497    if (unlikely(leaf2 == NULL))
  2498      return false;
  2499  
  2500    const bool rsm = whunsafe_split_meta(map, leaf2);
  2501    if (unlikely(!rsm))  // undo insertion, merge, free leaf2
  2502      wormleaf_split_undo(map, leaf1, leaf2, new);
  2503  
  2504    return rsm;
  2505  }
  2506  // }}} meta-split
  2507  
  2508  // meta-merge {{{
  2509  // now it only contains one bit
  2510    static struct wormmeta *
  2511  wormmeta_shrink(struct wormhmap * const hmap, struct wormmeta * const meta2)
  2512  {
  2513    debug_assert(wormmeta_bitmin_load(meta2) == wormmeta_bitmax_load(meta2));
  2514    struct wormmeta * const meta1 = slab_alloc_unsafe(hmap->slab1);
  2515    if (meta1 == NULL)
  2516      return NULL;
  2517  
  2518    memcpy(meta1, meta2, sizeof(*meta1));
  2519  
  2520    wormhmap_replace(hmap, meta2, meta1);
  2521    slab_free_unsafe(hmap->slab2, meta2);
  2522    return meta1;
  2523  }
  2524  
  2525    static void
  2526  wormmeta_bm_clear_helper(struct wormhmap * const hmap, struct wormmeta * const meta, const u32 id)
  2527  {
  2528    if (wormmeta_bitmin_load(meta) == wormmeta_bitmax_load(meta)) {
  2529      debug_assert(wormmeta_bitmin_load(meta) < WH_FO);
  2530      wormmeta_bitmin_store(meta, WH_FO);
  2531      wormmeta_bitmax_store(meta, WH_FO);
  2532    } else { // has more than 1 bit
  2533      wormmeta_bm_clear(meta, id);
  2534      if (wormmeta_bitmin_load(meta) == wormmeta_bitmax_load(meta))
  2535        wormmeta_shrink(hmap, meta);
  2536    }
  2537  }
  2538  
  2539  // all locks held
  2540    static void
  2541  wormmeta_merge(struct wormhmap * const hmap, struct wormleaf * const leaf)
  2542  {
  2543    // leaf->next is the new next after merge, which can be NULL
  2544    struct wormleaf * const prev = leaf->prev;
  2545    struct wormleaf * const next = leaf->next;
  2546    struct kv * const pbuf = hmap->pbuf;
  2547    kv_dup2_key(leaf->anchor, pbuf);
  2548    u32 i = (prev && next) ? kv_key_lcp(prev->anchor, next->anchor) : 0;
  2549    const u32 alen = leaf->anchor->klen;
  2550    wormhole_prefix(pbuf, i);
  2551    struct wormmeta * parent = NULL;
  2552    do {
  2553      debug_assert(i <= hmap->maxplen);
  2554      struct wormmeta * meta = wormhmap_get(hmap, pbuf);
  2555      if (wormmeta_lmost_load(meta) == wormmeta_rmost_load(meta)) { // delete single-child
  2556        debug_assert(wormmeta_lmost_load(meta) == leaf);
  2557        const u32 bitmin = wormmeta_bitmin_load(meta);
  2558        wormhmap_del(hmap, meta);
  2559        wormmeta_free(hmap, meta);
  2560        if (parent) {
  2561          wormmeta_bm_clear_helper(hmap, parent, pbuf->kv[i-1]);
  2562          parent = NULL;
  2563        }
  2564        if (bitmin == WH_FO) // no child
  2565          break;
  2566      } else { // adjust lmost rmost
  2567        if (wormmeta_lmost_load(meta) == leaf)
  2568          wormmeta_lmost_store(meta, next);
  2569        else if (wormmeta_rmost_load(meta) == leaf)
  2570          wormmeta_rmost_store(meta, prev);
  2571        parent = meta;
  2572      }
  2573  
  2574      if (i >= alen)
  2575        break;
  2576      i++;
  2577      wormhole_prefix_inc1(pbuf);
  2578    } while (true);
  2579  
  2580    if (next)
  2581      wormmeta_lpath_update(hmap, leaf->anchor, next->anchor, prev);
  2582  }
  2583  
  2584  // all locks (metalock + two leaflock) will be released before returning
  2585  // merge leaf2 to leaf1, removing all metadata to leaf2 and leaf2 itself
  2586    static void
  2587  wormhole_meta_merge(struct wormref * const ref, struct wormleaf * const leaf1,
  2588      struct wormleaf * const leaf2, const bool unlock_leaf1)
  2589  {
  2590    debug_assert(leaf1->next == leaf2);
  2591    debug_assert(leaf2->prev == leaf1);
  2592    struct wormhole * const map = ref->map;
  2593  
  2594    wormhmap_lock(map, ref);
  2595  
  2596    struct wormhmap * const hmap0 = wormhmap_load(map);
  2597    struct wormhmap * const hmap1 = wormhmap_switch(map, hmap0);
  2598    const u64 v1 = wormhmap_version_load(hmap0) + 1;
  2599  
  2600    leaf1->next = leaf2->next;
  2601    if (leaf2->next)
  2602      leaf2->next->prev = leaf1;
  2603  
  2604    wormleaf_version_store(leaf1, v1);
  2605    wormleaf_version_store(leaf2, v1);
  2606    wormhmap_version_store(hmap1, v1);
  2607  
  2608    wormmeta_merge(hmap1, leaf2);
  2609  
  2610    qsbr_update(&ref->qref, v1);
  2611  
  2612    // switch hmap
  2613    wormhmap_store(map, hmap1);
  2614  
  2615    if (unlock_leaf1)
  2616      wormleaf_unlock_write(leaf1);
  2617    wormleaf_unlock_write(leaf2);
  2618  
  2619    qsbr_wait(map->qsbr, v1);
  2620  
  2621    wormmeta_merge(hmap0, leaf2);
  2622    // leaf2 is now safe to be removed
  2623    wormleaf_free(map->slab_leaf, leaf2);
  2624    wormhmap_unlock(map);
  2625  }
  2626  
  2627  // caller must acquire leaf->wlock and next->wlock
  2628  // all locks will be released when this function returns
  2629    static bool
  2630  wormhole_meta_leaf_merge(struct wormref * const ref, struct wormleaf * const leaf)
  2631  {
  2632    struct wormleaf * const next = leaf->next;
  2633    debug_assert(next);
  2634  
  2635    // double check
  2636    if ((leaf->nr_keys + next->nr_keys) <= WH_KPN) {
  2637      if (wormleaf_merge(leaf, next)) {
  2638        wormhole_meta_merge(ref, leaf, next, true);
  2639        return true;
  2640      }
  2641    }
  2642    // merge failed but it's fine
  2643    wormleaf_unlock_write(leaf);
  2644    wormleaf_unlock_write(next);
  2645    return false;
  2646  }
  2647  
  2648    static void
  2649  whunsafe_meta_leaf_merge(struct wormhole * const map, struct wormleaf * const leaf1,
  2650      struct wormleaf * const leaf2)
  2651  {
  2652    debug_assert(leaf1->next == leaf2);
  2653    debug_assert(leaf2->prev == leaf1);
  2654    if (!wormleaf_merge(leaf1, leaf2))
  2655      return;
  2656  
  2657    leaf1->next = leaf2->next;
  2658    if (leaf2->next)
  2659      leaf2->next->prev = leaf1;
  2660    for (u32 i = 0; i < 2; i++)
  2661      if (map->hmap2[i].pmap)
  2662        wormmeta_merge(&(map->hmap2[i]), leaf2);
  2663    wormleaf_free(map->slab_leaf, leaf2);
  2664  }
  2665  // }}} meta-merge
  2666  
  2667  // put {{{
  2668    bool
  2669  wormhole_put(struct wormref * const ref, struct kv * const kv)
  2670  {
  2671    // we always allocate a new item on SET
  2672    // future optimizations may perform in-place update
  2673    struct wormhole * const map = ref->map;
  2674    struct kv * const new = map->mm.in(kv, map->mm.priv);
  2675    if (unlikely(new == NULL))
  2676      return false;
  2677    const struct kref kref = kv_kref(new);
  2678  
  2679    struct wormleaf * const leaf = wormhole_jump_leaf_write(ref, &kref);
  2680    // update
  2681    const u32 im = wormleaf_match_hs(leaf, &kref);
  2682    if (im < WH_KPN) {
  2683      struct kv * const old = wormleaf_update(leaf, im, new);
  2684      wormleaf_unlock_write(leaf);
  2685      map->mm.free(old, map->mm.priv);
  2686      return true;
  2687    }
  2688  
  2689    // insert
  2690    if (likely(leaf->nr_keys < WH_KPN)) { // just insert
  2691      wormleaf_insert(leaf, new);
  2692      wormleaf_unlock_write(leaf);
  2693      return true;
  2694    }
  2695  
  2696    // split_insert changes hmap
  2697    // all locks should be released in wormhole_split_insert()
  2698    const bool rsi = wormhole_split_insert(ref, leaf, new);
  2699    if (!rsi)
  2700      map->mm.free(new, map->mm.priv);
  2701    return rsi;
  2702  }
  2703  
  2704    bool
  2705  whsafe_put(struct wormref * const ref, struct kv * const kv)
  2706  {
  2707    wormhole_resume(ref);
  2708    const bool r = wormhole_put(ref, kv);
  2709    wormhole_park(ref);
  2710    return r;
  2711  }
  2712  
  2713    bool
  2714  whunsafe_put(struct wormhole * const map, struct kv * const kv)
  2715  {
  2716    struct kv * const new = map->mm.in(kv, map->mm.priv);
  2717    if (unlikely(new == NULL))
  2718      return false;
  2719    const struct kref kref = kv_kref(new);
  2720  
  2721    struct wormleaf * const leaf = wormhole_jump_leaf(map->hmap, &kref);
  2722    // update
  2723    const u32 im = wormleaf_match_hs(leaf, &kref);
  2724    if (im < WH_KPN) { // overwrite
  2725      struct kv * const old = wormleaf_update(leaf, im, new);
  2726      map->mm.free(old, map->mm.priv);
  2727      return true;
  2728    }
  2729  
  2730    // insert
  2731    if (likely(leaf->nr_keys < WH_KPN)) { // just insert
  2732      wormleaf_insert(leaf, new);
  2733      return true;
  2734    }
  2735  
  2736    // split_insert changes hmap
  2737    const bool rsi = whunsafe_split_insert(map, leaf, new);
  2738    if (!rsi)
  2739      map->mm.free(new, map->mm.priv);
  2740    return rsi;
  2741  }
  2742  
  2743    bool
  2744  wormhole_merge(struct wormref * const ref, const struct kref * const kref,
  2745      kv_merge_func uf, void * const priv)
  2746  {
  2747    struct wormhole * const map = ref->map;
  2748    struct wormleaf * const leaf = wormhole_jump_leaf_write(ref, kref);
  2749    // update
  2750    const u32 im = wormleaf_match_hs(leaf, kref);
  2751    if (im < WH_KPN) { // update
  2752      struct kv * const kv0 = wormleaf_kv_at_ih(leaf, im);
  2753      struct kv * const kv = uf(kv0, priv);
  2754      if ((kv == kv0) || (kv == NULL)) { // no replacement
  2755        wormleaf_unlock_write(leaf);
  2756        return true;
  2757      }
  2758  
  2759      struct kv * const new = map->mm.in(kv, map->mm.priv);
  2760      if (unlikely(new == NULL)) { // mm error
  2761        wormleaf_unlock_write(leaf);
  2762        return false;
  2763      }
  2764  
  2765      struct kv * const old = wormleaf_update(leaf, im, new);
  2766      wormleaf_unlock_write(leaf);
  2767      map->mm.free(old, map->mm.priv);
  2768      return true;
  2769    }
  2770  
  2771    struct kv * const kv = uf(NULL, priv);
  2772    if (kv == NULL) { // nothing to be inserted
  2773      wormleaf_unlock_write(leaf);
  2774      return true;
  2775    }
  2776  
  2777    struct kv * const new = map->mm.in(kv, map->mm.priv);
  2778    if (unlikely(new == NULL)) { // mm error
  2779      wormleaf_unlock_write(leaf);
  2780      return false;
  2781    }
  2782  
  2783    // insert
  2784    if (likely(leaf->nr_keys < WH_KPN)) { // just insert
  2785      wormleaf_insert(leaf, new);
  2786      wormleaf_unlock_write(leaf);
  2787      return true;
  2788    }
  2789  
  2790    // split_insert changes hmap
  2791    // all locks should be released in wormhole_split_insert()
  2792    const bool rsi = wormhole_split_insert(ref, leaf, new);
  2793    if (!rsi)
  2794      map->mm.free(new, map->mm.priv);
  2795    return rsi;
  2796  }
  2797  
  2798    bool
  2799  whsafe_merge(struct wormref * const ref, const struct kref * const kref,
  2800      kv_merge_func uf, void * const priv)
  2801  {
  2802    wormhole_resume(ref);
  2803    const bool r = wormhole_merge(ref, kref, uf, priv);
  2804    wormhole_park(ref);
  2805    return r;
  2806  }
  2807  
  2808    bool
  2809  whunsafe_merge(struct wormhole * const map, const struct kref * const kref,
  2810      kv_merge_func uf, void * const priv)
  2811  {
  2812    struct wormleaf * const leaf = wormhole_jump_leaf(map->hmap, kref);
  2813    // update
  2814    const u32 im = wormleaf_match_hs(leaf, kref);
  2815    if (im < WH_KPN) { // update
  2816      struct kv * const kv0 = wormleaf_kv_at_ih(leaf, im);
  2817      struct kv * const kv = uf(kv0, priv);
  2818      if ((kv == kv0) || (kv == NULL))
  2819        return true;
  2820  
  2821      struct kv * const new = map->mm.in(kv, map->mm.priv);
  2822      if (unlikely(new == NULL))
  2823        return false;
  2824  
  2825      struct kv * const old = wormleaf_update(leaf, im, new);
  2826      map->mm.free(old, map->mm.priv);
  2827      return true;
  2828    }
  2829  
  2830    struct kv * const kv = uf(NULL, priv);
  2831    if (kv == NULL) // nothing to be inserted
  2832      return true;
  2833  
  2834    struct kv * const new = map->mm.in(kv, map->mm.priv);
  2835    if (unlikely(new == NULL)) // mm error
  2836      return false;
  2837  
  2838    // insert
  2839    if (likely(leaf->nr_keys < WH_KPN)) { // just insert
  2840      wormleaf_insert(leaf, new);
  2841      return true;
  2842    }
  2843  
  2844    // split_insert changes hmap
  2845    const bool rsi = whunsafe_split_insert(map, leaf, new);
  2846    if (!rsi)
  2847      map->mm.free(new, map->mm.priv);
  2848    return rsi;
  2849  }
  2850  // }}} put
  2851  
  2852  // inplace {{{
  2853    bool
  2854  wormhole_inpr(struct wormref * const ref, const struct kref * const key,
  2855      kv_inp_func uf, void * const priv)
  2856  {
  2857    struct wormleaf * const leaf = wormhole_jump_leaf_read(ref, key);
  2858    const u32 im = wormleaf_match_hs(leaf, key);
  2859    if (im < WH_KPN) {
  2860      uf(wormleaf_kv_at_ih(leaf, im), priv);
  2861      wormleaf_unlock_read(leaf);
  2862      return true;
  2863    } else {
  2864      uf(NULL, priv);
  2865      wormleaf_unlock_read(leaf);
  2866      return false;
  2867    }
  2868  }
  2869  
  2870    bool
  2871  wormhole_inpw(struct wormref * const ref, const struct kref * const key,
  2872      kv_inp_func uf, void * const priv)
  2873  {
  2874    struct wormleaf * const leaf = wormhole_jump_leaf_write(ref, key);
  2875    const u32 im = wormleaf_match_hs(leaf, key);
  2876    if (im < WH_KPN) {
  2877      uf(wormleaf_kv_at_ih(leaf, im), priv);
  2878      wormleaf_unlock_write(leaf);
  2879      return true;
  2880    } else {
  2881      uf(NULL, priv);
  2882      wormleaf_unlock_write(leaf);
  2883      return false;
  2884    }
  2885  }
  2886  
  2887    bool
  2888  whsafe_inpr(struct wormref * const ref, const struct kref * const key,
  2889      kv_inp_func uf, void * const priv)
  2890  {
  2891    wormhole_resume(ref);
  2892    const bool r = wormhole_inpr(ref, key, uf, priv);
  2893    wormhole_park(ref);
  2894    return r;
  2895  }
  2896  
  2897    bool
  2898  whsafe_inpw(struct wormref * const ref, const struct kref * const key,
  2899      kv_inp_func uf, void * const priv)
  2900  {
  2901    wormhole_resume(ref);
  2902    const bool r = wormhole_inpw(ref, key, uf, priv);
  2903    wormhole_park(ref);
  2904    return r;
  2905  }
  2906  
  2907    bool
  2908  whunsafe_inp(struct wormhole * const map, const struct kref * const key,
  2909      kv_inp_func uf, void * const priv)
  2910  {
  2911    struct wormleaf * const leaf = wormhole_jump_leaf(map->hmap, key);
  2912    const u32 im = wormleaf_match_hs(leaf, key);
  2913    if (im < WH_KPN) { // overwrite
  2914      uf(wormleaf_kv_at_ih(leaf, im), priv);
  2915      return true;
  2916    } else {
  2917      uf(NULL, priv);
  2918      return false;
  2919    }
  2920  }
  2921  // }}} put
  2922  
  2923  // del {{{
  2924    static void
  2925  wormhole_del_try_merge(struct wormref * const ref, struct wormleaf * const leaf)
  2926  {
  2927    struct wormleaf * const next = leaf->next;
  2928    if (next && ((leaf->nr_keys == 0) || ((leaf->nr_keys + next->nr_keys) < WH_KPN_MRG))) {
  2929      // try merge, it may fail if size becomes larger after locking
  2930      wormleaf_lock_write(next, ref);
  2931      (void)wormhole_meta_leaf_merge(ref, leaf);
  2932      // locks are already released; immediately return
  2933    } else {
  2934      wormleaf_unlock_write(leaf);
  2935    }
  2936  }
  2937  
  2938    bool
  2939  wormhole_del(struct wormref * const ref, const struct kref * const key)
  2940  {
  2941    struct wormleaf * const leaf = wormhole_jump_leaf_write(ref, key);
  2942    const u32 im = wormleaf_match_hs(leaf, key);
  2943    if (im < WH_KPN) { // found
  2944      struct kv * const kv = wormleaf_remove_ih(leaf, im);
  2945      wormhole_del_try_merge(ref, leaf);
  2946      debug_assert(kv);
  2947      // free after releasing locks
  2948      struct wormhole * const map = ref->map;
  2949      map->mm.free(kv, map->mm.priv);
  2950      return true;
  2951    } else {
  2952      wormleaf_unlock_write(leaf);
  2953      return false;
  2954    }
  2955  }
  2956  
  2957    bool
  2958  whsafe_del(struct wormref * const ref, const struct kref * const key)
  2959  {
  2960    wormhole_resume(ref);
  2961    const bool r = wormhole_del(ref, key);
  2962    wormhole_park(ref);
  2963    return r;
  2964  }
  2965  
  2966    static void
  2967  whunsafe_del_try_merge(struct wormhole * const map, struct wormleaf * const leaf)
  2968  {
  2969    const u32 n0 = leaf->prev ? leaf->prev->nr_keys : WH_KPN;
  2970    const u32 n1 = leaf->nr_keys;
  2971    const u32 n2 = leaf->next ? leaf->next->nr_keys : WH_KPN;
  2972  
  2973    if ((leaf->prev && (n1 == 0)) || ((n0 + n1) < WH_KPN_MRG)) {
  2974      whunsafe_meta_leaf_merge(map, leaf->prev, leaf);
  2975    } else if ((leaf->next && (n1 == 0)) || ((n1 + n2) < WH_KPN_MRG)) {
  2976      whunsafe_meta_leaf_merge(map, leaf, leaf->next);
  2977    }
  2978  }
  2979  
  2980    bool
  2981  whunsafe_del(struct wormhole * const map, const struct kref * const key)
  2982  {
  2983    struct wormleaf * const leaf = wormhole_jump_leaf(map->hmap, key);
  2984    const u32 im = wormleaf_match_hs(leaf, key);
  2985    if (im < WH_KPN) { // found
  2986      struct kv * const kv = wormleaf_remove_ih(leaf, im);
  2987      debug_assert(kv);
  2988  
  2989      whunsafe_del_try_merge(map, leaf);
  2990      map->mm.free(kv, map->mm.priv);
  2991      return true;
  2992    }
  2993    return false;
  2994  }
  2995  
  2996    u64
  2997  wormhole_delr(struct wormref * const ref, const struct kref * const start,
  2998      const struct kref * const end)
  2999  {
  3000    struct wormleaf * const leafa = wormhole_jump_leaf_write(ref, start);
  3001    wormleaf_sync_sorted(leafa);
  3002    const u32 ia = wormleaf_seek(leafa, start);
  3003    const u32 iaz = end ? wormleaf_seek_end(leafa, end) : leafa->nr_keys;
  3004    if (iaz < ia) { // do nothing if end < start
  3005      wormleaf_unlock_write(leafa);
  3006      return 0;
  3007    }
  3008    u64 ndel = iaz - ia;
  3009    struct wormhole * const map = ref->map;
  3010    wormleaf_delete_range(map, leafa, ia, iaz);
  3011    if (leafa->nr_keys > ia) { // end hit; done
  3012      wormhole_del_try_merge(ref, leafa);
  3013      return ndel;
  3014    }
  3015  
  3016    while (leafa->next) {
  3017      struct wormleaf * const leafx = leafa->next;
  3018      wormleaf_lock_write(leafx, ref);
  3019      // two leaf nodes locked
  3020      wormleaf_sync_sorted(leafx);
  3021      const u32 iz = end ? wormleaf_seek_end(leafx, end) : leafx->nr_keys;
  3022      ndel += iz;
  3023      wormleaf_delete_range(map, leafx, 0, iz);
  3024      if (leafx->nr_keys == 0) { // removed all
  3025        // must hold leaf1's lock for the next iteration
  3026        wormhole_meta_merge(ref, leafa, leafx, false);
  3027      } else { // partially removed; done
  3028        (void)wormhole_meta_leaf_merge(ref, leafa);
  3029        return ndel;
  3030      }
  3031    }
  3032    wormleaf_unlock_write(leafa);
  3033    return ndel;
  3034  }
  3035  
  3036    u64
  3037  whsafe_delr(struct wormref * const ref, const struct kref * const start,
  3038      const struct kref * const end)
  3039  {
  3040    wormhole_resume(ref);
  3041    const u64 ret = wormhole_delr(ref, start, end);
  3042    wormhole_park(ref);
  3043    return ret;
  3044  }
  3045  
  3046    u64
  3047  whunsafe_delr(struct wormhole * const map, const struct kref * const start,
  3048      const struct kref * const end)
  3049  {
  3050    // first leaf
  3051    struct wormhmap * const hmap = map->hmap;
  3052    struct wormleaf * const leafa = wormhole_jump_leaf(hmap, start);
  3053    wormleaf_sync_sorted(leafa);
  3054    // last leaf
  3055    struct wormleaf * const leafz = end ? wormhole_jump_leaf(hmap, end) : NULL;
  3056  
  3057    // select start/end on leafa
  3058    const u32 ia = wormleaf_seek(leafa, start);
  3059    const u32 iaz = end ? wormleaf_seek_end(leafa, end) : leafa->nr_keys;
  3060    if (iaz < ia)
  3061      return 0;
  3062  
  3063    wormleaf_delete_range(map, leafa, ia, iaz);
  3064    u64 ndel = iaz - ia;
  3065  
  3066    if (leafa == leafz) { // one node only
  3067      whunsafe_del_try_merge(map, leafa);
  3068      return ndel;
  3069    }
  3070  
  3071    // 0 or more nodes between leafa and leafz
  3072    while (leafa->next != leafz) {
  3073      struct wormleaf * const leafx = leafa->next;
  3074      ndel += leafx->nr_keys;
  3075      for (u32 i = 0; i < leafx->nr_keys; i++)
  3076        map->mm.free(wormleaf_kv_at_is(leafx, i), map->mm.priv);
  3077      leafx->nr_keys = 0;
  3078      leafx->nr_sorted = 0;
  3079      whunsafe_meta_leaf_merge(map, leafa, leafx);
  3080    }
  3081    // delete the smaller keys in leafz
  3082    if (leafz) {
  3083      wormleaf_sync_sorted(leafz);
  3084      const u32 iz = wormleaf_seek_end(leafz, end);
  3085      wormleaf_delete_range(map, leafz, 0, iz);
  3086      ndel += iz;
  3087      whunsafe_del_try_merge(map, leafa);
  3088    }
  3089    return ndel;
  3090  }
  3091  // }}} del
  3092  
  3093  // iter {{{
  3094  // safe iter: safe sort with read-lock acquired
  3095  // unsafe iter: allow concurrent seek/skip
  3096    static void
  3097  wormhole_iter_leaf_sync_sorted(struct wormleaf * const leaf)
  3098  {
  3099    if (unlikely(leaf->nr_keys != leaf->nr_sorted)) {
  3100      spinlock_lock(&(leaf->sortlock));
  3101      wormleaf_sync_sorted(leaf);
  3102      spinlock_unlock(&(leaf->sortlock));
  3103    }
  3104  }
  3105  
  3106    struct wormhole_iter *
  3107  wormhole_iter_create(struct wormref * const ref)
  3108  {
  3109    struct wormhole_iter * const iter = malloc(sizeof(*iter));
  3110    if (iter == NULL)
  3111      return NULL;
  3112    iter->ref = ref;
  3113    iter->map = ref->map;
  3114    iter->leaf = NULL;
  3115    iter->is = 0;
  3116    return iter;
  3117  }
  3118  
  3119    static void
  3120  wormhole_iter_fix(struct wormhole_iter * const iter)
  3121  {
  3122    if (!wormhole_iter_valid(iter))
  3123      return;
  3124  
  3125    while (unlikely(iter->is >= iter->leaf->nr_sorted)) {
  3126      struct wormleaf * const next = iter->leaf->next;
  3127      if (likely(next != NULL)) {
  3128        struct wormref * const ref = iter->ref;
  3129        wormleaf_lock_read(next, ref);
  3130        wormleaf_unlock_read(iter->leaf);
  3131  
  3132        wormhole_iter_leaf_sync_sorted(next);
  3133      } else {
  3134        wormleaf_unlock_read(iter->leaf);
  3135      }
  3136      iter->leaf = next;
  3137      iter->is = 0;
  3138      if (!wormhole_iter_valid(iter))
  3139        return;
  3140    }
  3141  }
  3142  
  3143    void
  3144  wormhole_iter_seek(struct wormhole_iter * const iter, const struct kref * const key)
  3145  {
  3146    debug_assert(key);
  3147    if (iter->leaf)
  3148      wormleaf_unlock_read(iter->leaf);
  3149  
  3150    struct wormleaf * const leaf = wormhole_jump_leaf_read(iter->ref, key);
  3151    wormhole_iter_leaf_sync_sorted(leaf);
  3152  
  3153    iter->leaf = leaf;
  3154    iter->is = wormleaf_seek(leaf, key);
  3155    wormhole_iter_fix(iter);
  3156  }
  3157  
  3158    void
  3159  whsafe_iter_seek(struct wormhole_iter * const iter, const struct kref * const key)
  3160  {
  3161    wormhole_resume(iter->ref);
  3162    wormhole_iter_seek(iter, key);
  3163  }
  3164  
  3165    bool
  3166  wormhole_iter_valid(struct wormhole_iter * const iter)
  3167  {
  3168    return iter->leaf != NULL;
  3169  }
  3170  
  3171    static struct kv *
  3172  wormhole_iter_current(struct wormhole_iter * const iter)
  3173  {
  3174    if (wormhole_iter_valid(iter)) {
  3175      debug_assert(iter->is < iter->leaf->nr_sorted);
  3176      struct kv * const kv = wormleaf_kv_at_is(iter->leaf, iter->is);
  3177      return kv;
  3178    }
  3179    return NULL;
  3180  }
  3181  
  3182    struct kv *
  3183  wormhole_iter_peek(struct wormhole_iter * const iter, struct kv * const out)
  3184  {
  3185    struct kv * const kv = wormhole_iter_current(iter);
  3186    if (kv) {
  3187      struct kv * const ret = iter->map->mm.out(kv, out);
  3188      return ret;
  3189    }
  3190    return NULL;
  3191  }
  3192  
  3193    bool
  3194  wormhole_iter_kref(struct wormhole_iter * const iter, struct kref * const kref)
  3195  {
  3196    struct kv * const kv = wormhole_iter_current(iter);
  3197    if (kv) {
  3198      kref_ref_kv(kref, kv);
  3199      return true;
  3200    }
  3201    return false;
  3202  }
  3203  
  3204    bool
  3205  wormhole_iter_kvref(struct wormhole_iter * const iter, struct kvref * const kvref)
  3206  {
  3207    struct kv * const kv = wormhole_iter_current(iter);
  3208    if (kv) {
  3209      kvref_ref_kv(kvref, kv);
  3210      return true;
  3211    }
  3212    return false;
  3213  }
  3214  
  3215    void
  3216  wormhole_iter_skip1(struct wormhole_iter * const iter)
  3217  {
  3218    if (wormhole_iter_valid(iter)) {
  3219      iter->is++;
  3220      wormhole_iter_fix(iter);
  3221    }
  3222  }
  3223  
  3224    void
  3225  wormhole_iter_skip(struct wormhole_iter * const iter, const u32 nr)
  3226  {
  3227    u32 todo = nr;
  3228    while (todo && wormhole_iter_valid(iter)) {
  3229      const u32 cap = iter->leaf->nr_sorted - iter->is;
  3230      const u32 nskip = (cap < todo) ? cap : todo;
  3231      iter->is += nskip;
  3232      wormhole_iter_fix(iter);
  3233      todo -= nskip;
  3234    }
  3235  }
  3236  
  3237    struct kv *
  3238  wormhole_iter_next(struct wormhole_iter * const iter, struct kv * const out)
  3239  {
  3240    struct kv * const ret = wormhole_iter_peek(iter, out);
  3241    wormhole_iter_skip1(iter);
  3242    return ret;
  3243  }
  3244  
  3245    bool
  3246  wormhole_iter_inp(struct wormhole_iter * const iter, kv_inp_func uf, void * const priv)
  3247  {
  3248    struct kv * const kv = wormhole_iter_current(iter);
  3249    uf(kv, priv); // call uf even if (kv == NULL)
  3250    return kv != NULL;
  3251  }
  3252  
  3253    void
  3254  wormhole_iter_park(struct wormhole_iter * const iter)
  3255  {
  3256    if (iter->leaf) {
  3257      wormleaf_unlock_read(iter->leaf);
  3258      iter->leaf = NULL;
  3259    }
  3260  }
  3261  
  3262    void
  3263  whsafe_iter_park(struct wormhole_iter * const iter)
  3264  {
  3265    wormhole_iter_park(iter);
  3266    wormhole_park(iter->ref);
  3267  }
  3268  
  3269    void
  3270  wormhole_iter_destroy(struct wormhole_iter * const iter)
  3271  {
  3272    if (iter->leaf)
  3273      wormleaf_unlock_read(iter->leaf);
  3274    free(iter);
  3275  }
  3276  
  3277    void
  3278  whsafe_iter_destroy(struct wormhole_iter * const iter)
  3279  {
  3280    wormhole_park(iter->ref);
  3281    wormhole_iter_destroy(iter);
  3282  }
  3283  // }}} iter
  3284  
  3285  // unsafe iter {{{
  3286    struct wormhole_iter *
  3287  whunsafe_iter_create(struct wormhole * const map)
  3288  {
  3289    struct wormhole_iter * const iter = malloc(sizeof(*iter));
  3290    if (iter == NULL)
  3291      return NULL;
  3292    iter->ref = NULL;
  3293    iter->map = map;
  3294    iter->leaf = NULL;
  3295    iter->is = 0;
  3296    whunsafe_iter_seek(iter, kref_null());
  3297    return iter;
  3298  }
  3299  
  3300    static void
  3301  whunsafe_iter_fix(struct wormhole_iter * const iter)
  3302  {
  3303    if (!wormhole_iter_valid(iter))
  3304      return;
  3305  
  3306    while (unlikely(iter->is >= iter->leaf->nr_sorted)) {
  3307      struct wormleaf * const next = iter->leaf->next;
  3308      if (likely(next != NULL))
  3309        wormhole_iter_leaf_sync_sorted(next);
  3310      iter->leaf = next;
  3311      iter->is = 0;
  3312      if (!wormhole_iter_valid(iter))
  3313        return;
  3314    }
  3315  }
  3316  
  3317    void
  3318  whunsafe_iter_seek(struct wormhole_iter * const iter, const struct kref * const key)
  3319  {
  3320    struct wormleaf * const leaf = wormhole_jump_leaf(iter->map->hmap, key);
  3321    wormhole_iter_leaf_sync_sorted(leaf);
  3322  
  3323    iter->leaf = leaf;
  3324    iter->is = wormleaf_seek(leaf, key);
  3325    whunsafe_iter_fix(iter);
  3326  }
  3327  
  3328    void
  3329  whunsafe_iter_skip1(struct wormhole_iter * const iter)
  3330  {
  3331    if (wormhole_iter_valid(iter)) {
  3332      iter->is++;
  3333      whunsafe_iter_fix(iter);
  3334    }
  3335  }
  3336  
  3337    void
  3338  whunsafe_iter_skip(struct wormhole_iter * const iter, const u32 nr)
  3339  {
  3340    u32 todo = nr;
  3341    while (todo && wormhole_iter_valid(iter)) {
  3342      const u32 cap = iter->leaf->nr_sorted - iter->is;
  3343      const u32 nskip = (cap < todo) ? cap : todo;
  3344      iter->is += nskip;
  3345      whunsafe_iter_fix(iter);
  3346      todo -= nskip;
  3347    }
  3348  }
  3349  
  3350    struct kv *
  3351  whunsafe_iter_next(struct wormhole_iter * const iter, struct kv * const out)
  3352  {
  3353    struct kv * const ret = wormhole_iter_peek(iter, out);
  3354    whunsafe_iter_skip1(iter);
  3355    return ret;
  3356  }
  3357  
  3358    void
  3359  whunsafe_iter_destroy(struct wormhole_iter * const iter)
  3360  {
  3361    free(iter);
  3362  }
  3363  // }}} unsafe iter
  3364  
  3365  // misc {{{
  3366    struct wormref *
  3367  wormhole_ref(struct wormhole * const map)
  3368  {
  3369    struct wormref * const ref = malloc(sizeof(*ref));
  3370    if (ref == NULL)
  3371      return NULL;
  3372    ref->map = map;
  3373    if (qsbr_register(map->qsbr, &(ref->qref)) == false) {
  3374      free(ref);
  3375      return NULL;
  3376    }
  3377    return ref;
  3378  }
  3379  
  3380    struct wormref *
  3381  whsafe_ref(struct wormhole * const map)
  3382  {
  3383    struct wormref * const ref = wormhole_ref(map);
  3384    if (ref)
  3385      wormhole_park(ref);
  3386    return ref;
  3387  }
  3388  
  3389    struct wormhole *
  3390  wormhole_unref(struct wormref * const ref)
  3391  {
  3392    struct wormhole * const map = ref->map;
  3393    qsbr_unregister(map->qsbr, &(ref->qref));
  3394    free(ref);
  3395    return map;
  3396  }
  3397  
  3398    inline void
  3399  wormhole_park(struct wormref * const ref)
  3400  {
  3401    qsbr_park(&(ref->qref));
  3402  }
  3403  
  3404    inline void
  3405  wormhole_resume(struct wormref * const ref)
  3406  {
  3407    qsbr_resume(&(ref->qref));
  3408  }
  3409  
  3410    inline void
  3411  wormhole_refresh_qstate(struct wormref * const ref)
  3412  {
  3413    qsbr_update(&(ref->qref), wormhmap_version_load(wormhmap_load(ref->map)));
  3414  }
  3415  
  3416    static void
  3417  wormhole_clean_hmap(struct wormhole * const map)
  3418  {
  3419    for (u32 x = 0; x < 2; x++) {
  3420      if (map->hmap2[x].pmap == NULL)
  3421        continue;
  3422      struct wormhmap * const hmap = &(map->hmap2[x]);
  3423      const u64 nr_slots = ((u64)(hmap->mask)) + 1;
  3424      struct wormmbkt * const pmap = hmap->pmap;
  3425      for (u64 s = 0; s < nr_slots; s++) {
  3426        struct wormmbkt * const slot = &(pmap[s]);
  3427        for (u32 i = 0; i < WH_BKT_NR; i++)
  3428          if (slot->e[i])
  3429            wormmeta_keyref_release(slot->e[i]);
  3430      }
  3431  
  3432      slab_free_all(hmap->slab1);
  3433      slab_free_all(hmap->slab2);
  3434      memset(hmap->pmap, 0, hmap->msize);
  3435      hmap->maxplen = 0;
  3436    }
  3437  }
  3438  
  3439    static void
  3440  wormhole_free_leaf_keys(struct wormhole * const map, struct wormleaf * const leaf)
  3441  {
  3442    const u32 nr = leaf->nr_keys;
  3443    for (u32 i = 0; i < nr; i++) {
  3444      void * const curr = wormleaf_kv_at_is(leaf, i);
  3445      debug_assert(curr);
  3446      map->mm.free(curr, map->mm.priv);
  3447    }
  3448    wormhole_free_akey(leaf->anchor);
  3449  }
  3450  
  3451    static void
  3452  wormhole_clean_helper(struct wormhole * const map)
  3453  {
  3454    wormhole_clean_hmap(map);
  3455    for (struct wormleaf * leaf = map->leaf0; leaf; leaf = leaf->next)
  3456      wormhole_free_leaf_keys(map, leaf);
  3457    slab_free_all(map->slab_leaf);
  3458    map->leaf0 = NULL;
  3459  }
  3460  
  3461  // unsafe
  3462    void
  3463  wormhole_clean(struct wormhole * const map)
  3464  {
  3465    wormhole_clean_helper(map);
  3466    wormhole_create_leaf0(map);
  3467  }
  3468  
  3469    void
  3470  wormhole_destroy(struct wormhole * const map)
  3471  {
  3472    wormhole_clean_helper(map);
  3473    for (u32 i = 0; i < 2; i++) {
  3474      struct wormhmap * const hmap = &map->hmap2[i];
  3475      if (hmap->slab1)
  3476        slab_destroy(hmap->slab1);
  3477      if (hmap->slab2)
  3478        slab_destroy(hmap->slab2);
  3479      wormhmap_deinit(hmap);
  3480    }
  3481    qsbr_destroy(map->qsbr);
  3482    slab_destroy(map->slab_leaf);
  3483    free(map->pbuf);
  3484    free(map);
  3485  }
  3486  
  3487    void
  3488  wormhole_fprint(struct wormhole * const map, FILE * const out)
  3489  {
  3490    const u64 nr_slab_ul = slab_get_nalloc(map->slab_leaf);
  3491    const u64 nr_slab_um11 = slab_get_nalloc(map->hmap2[0].slab1);
  3492    const u64 nr_slab_um12 = slab_get_nalloc(map->hmap2[0].slab2);
  3493    const u64 nr_slab_um21 = map->hmap2[1].slab1 ? slab_get_nalloc(map->hmap2[1].slab1) : 0;
  3494    const u64 nr_slab_um22 = map->hmap2[1].slab2 ? slab_get_nalloc(map->hmap2[1].slab2) : 0;
  3495    fprintf(out, "%s L-SLAB %lu M-SLAB [0] %lu+%lu [1] %lu+%lu\n",
  3496        __func__, nr_slab_ul, nr_slab_um11, nr_slab_um12, nr_slab_um21, nr_slab_um22);
  3497  }
  3498  // }}} misc
  3499  
  3500  // api {{{
  3501  const struct kvmap_api kvmap_api_wormhole = {
  3502    .hashkey = true,
  3503    .ordered = true,
  3504    .threadsafe = true,
  3505    .unique = true,
  3506    .refpark = true,
  3507    .put = (void *)wormhole_put,
  3508    .get = (void *)wormhole_get,
  3509    .probe = (void *)wormhole_probe,
  3510    .del = (void *)wormhole_del,
  3511    .inpr = (void *)wormhole_inpr,
  3512    .inpw = (void *)wormhole_inpw,
  3513    .merge = (void *)wormhole_merge,
  3514    .delr = (void *)wormhole_delr,
  3515    .iter_create = (void *)wormhole_iter_create,
  3516    .iter_seek = (void *)wormhole_iter_seek,
  3517    .iter_valid = (void *)wormhole_iter_valid,
  3518    .iter_peek = (void *)wormhole_iter_peek,
  3519    .iter_kref = (void *)wormhole_iter_kref,
  3520    .iter_kvref = (void *)wormhole_iter_kvref,
  3521    .iter_skip1 = (void *)wormhole_iter_skip1,
  3522    .iter_skip = (void *)wormhole_iter_skip,
  3523    .iter_next = (void *)wormhole_iter_next,
  3524    .iter_inp = (void *)wormhole_iter_inp,
  3525    .iter_park = (void *)wormhole_iter_park,
  3526    .iter_destroy = (void *)wormhole_iter_destroy,
  3527    .ref = (void *)wormhole_ref,
  3528    .unref = (void *)wormhole_unref,
  3529    .park = (void *)wormhole_park,
  3530    .resume = (void *)wormhole_resume,
  3531    .clean = (void *)wormhole_clean,
  3532    .destroy = (void *)wormhole_destroy,
  3533    .fprint = (void *)wormhole_fprint,
  3534  };
  3535  
  3536  const struct kvmap_api kvmap_api_whsafe = {
  3537    .hashkey = true,
  3538    .ordered = true,
  3539    .threadsafe = true,
  3540    .unique = true,
  3541    .put = (void *)whsafe_put,
  3542    .get = (void *)whsafe_get,
  3543    .probe = (void *)whsafe_probe,
  3544    .del = (void *)whsafe_del,
  3545    .inpr = (void *)whsafe_inpr,
  3546    .inpw = (void *)whsafe_inpw,
  3547    .merge = (void *)whsafe_merge,
  3548    .delr = (void *)whsafe_delr,
  3549    .iter_create = (void *)wormhole_iter_create,
  3550    .iter_seek = (void *)whsafe_iter_seek,
  3551    .iter_valid = (void *)wormhole_iter_valid,
  3552    .iter_peek = (void *)wormhole_iter_peek,
  3553    .iter_kref = (void *)wormhole_iter_kref,
  3554    .iter_kvref = (void *)wormhole_iter_kvref,
  3555    .iter_skip1 = (void *)wormhole_iter_skip1,
  3556    .iter_skip = (void *)wormhole_iter_skip,
  3557    .iter_next = (void *)wormhole_iter_next,
  3558    .iter_inp = (void *)wormhole_iter_inp,
  3559    .iter_park = (void *)whsafe_iter_park,
  3560    .iter_destroy = (void *)whsafe_iter_destroy,
  3561    .ref = (void *)whsafe_ref,
  3562    .unref = (void *)wormhole_unref,
  3563    .clean = (void *)wormhole_clean,
  3564    .destroy = (void *)wormhole_destroy,
  3565    .fprint = (void *)wormhole_fprint,
  3566  };
  3567  
  3568  const struct kvmap_api kvmap_api_whunsafe = {
  3569    .hashkey = true,
  3570    .ordered = true,
  3571    .unique = true,
  3572    .put = (void *)whunsafe_put,
  3573    .get = (void *)whunsafe_get,
  3574    .probe = (void *)whunsafe_probe,
  3575    .del = (void *)whunsafe_del,
  3576    .inpr = (void *)whunsafe_inp,
  3577    .inpw = (void *)whunsafe_inp,
  3578    .merge = (void *)whunsafe_merge,
  3579    .delr = (void *)whunsafe_delr,
  3580    .iter_create = (void *)whunsafe_iter_create,
  3581    .iter_seek = (void *)whunsafe_iter_seek,
  3582    .iter_valid = (void *)wormhole_iter_valid,
  3583    .iter_peek = (void *)wormhole_iter_peek,
  3584    .iter_kref = (void *)wormhole_iter_kref,
  3585    .iter_kvref = (void *)wormhole_iter_kvref,
  3586    .iter_skip1 = (void *)whunsafe_iter_skip1,
  3587    .iter_skip = (void *)whunsafe_iter_skip,
  3588    .iter_next = (void *)whunsafe_iter_next,
  3589    .iter_inp = (void *)wormhole_iter_inp,
  3590    .iter_destroy = (void *)whunsafe_iter_destroy,
  3591    .clean = (void *)wormhole_clean,
  3592    .destroy = (void *)wormhole_destroy,
  3593    .fprint = (void *)wormhole_fprint,
  3594  };
  3595  
  3596    static void *
  3597  wormhole_kvmap_api_create(const char * const name, const struct kvmap_mm * const mm, char ** args)
  3598  {
  3599    (void)args;
  3600    if ((!strcmp(name, "wormhole")) || (!strcmp(name, "whsafe"))) {
  3601      return wormhole_create(mm);
  3602    } else if (!strcmp(name, "whunsafe")) {
  3603      return whunsafe_create(mm);
  3604    } else {
  3605      return NULL;
  3606    }
  3607  }
  3608  
  3609  __attribute__((constructor))
  3610    static void
  3611  wormhole_kvmap_api_init(void)
  3612  {
  3613    kvmap_api_register(0, "wormhole", "", wormhole_kvmap_api_create, &kvmap_api_wormhole);
  3614    kvmap_api_register(0, "whsafe", "", wormhole_kvmap_api_create, &kvmap_api_whsafe);
  3615    kvmap_api_register(0, "whunsafe", "", wormhole_kvmap_api_create, &kvmap_api_whunsafe);
  3616  }
  3617  // }}} api
  3618  
  3619  // wh {{{
  3620  // Users often don't enjoy dealing with struct kv/kref and just want to use plain buffers.
  3621  // No problem!
  3622  // This example library shows you how to use Wormhole efficiently in the most intuitive way.
  3623  
  3624  // Use the worry-free api
  3625  static const struct kvmap_api * const wh_api = &kvmap_api_whsafe;
  3626  
  3627  // You can change the wh_api to kvmap_api_wormhole with a one-line replacement
  3628  // The standard Wormhole api can give you ~5% boost; read README for thread-safety tips
  3629  //static const struct kvmap_api * const wh_api = &kvmap_api_wormhole;
  3630  
  3631    struct wormhole *
  3632  wh_create(void)
  3633  {
  3634    // kvmap_mm_ndf (kv.h) will let the caller allocate the kv when inserting
  3635    // This can avoid a memcpy if the caller does not have the data in a struct kv
  3636    return wormhole_create(&kvmap_mm_ndf);
  3637  }
  3638  
  3639    struct wormref *
  3640  wh_ref(struct wormhole * const wh)
  3641  {
  3642    return wh_api->ref(wh);
  3643  }
  3644  
  3645    void
  3646  wh_unref(struct wormref * const ref)
  3647  {
  3648    (void)wh_api->unref(ref);
  3649  }
  3650  
  3651    void
  3652  wh_park(struct wormref * const ref)
  3653  {
  3654    if (wh_api->park)
  3655      wh_api->park(ref);
  3656  }
  3657  
  3658    void
  3659  wh_resume(struct wormref * const ref)
  3660  {
  3661    if (wh_api->resume)
  3662      wh_api->resume(ref);
  3663  }
  3664  
  3665    void
  3666  wh_clean(struct wormhole * const map)
  3667  {
  3668    wh_api->clean(map);
  3669  }
  3670  
  3671    void
  3672  wh_destroy(struct wormhole * const map)
  3673  {
  3674    wh_api->destroy(map);
  3675  }
  3676  
  3677  // Do set/put with explicit kv buffers
  3678    bool
  3679  wh_put(struct wormref * const ref, const void * const kbuf, const u32 klen,
  3680      const void * const vbuf, const u32 vlen)
  3681  {
  3682    struct kv * const newkv = kv_create(kbuf, klen, vbuf, vlen);
  3683    if (newkv == NULL)
  3684      return false;
  3685    // must use with kvmap_mm_ndf (see below)
  3686    // the newkv will be saved in the Wormhole and freed by Wormhole when upon deletion
  3687    return wh_api->put(ref, newkv);
  3688  }
  3689  
  3690  // delete a key
  3691    bool
  3692  wh_del(struct wormref * const ref, const void * const kbuf, const u32 klen)
  3693  {
  3694    struct kref kref;
  3695    kref_ref_hash32(&kref, kbuf, klen);
  3696    return wh_api->del(ref, &kref);
  3697  }
  3698  
  3699  // test if the key exist in Wormhole
  3700    bool
  3701  wh_probe(struct wormref * const ref, const void * const kbuf, const u32 klen)
  3702  {
  3703    struct kref kref;
  3704    kref_ref_hash32(&kref, kbuf, klen);
  3705    return wh_api->probe(ref, &kref);
  3706  }
  3707  
  3708  // for wh_get()
  3709  struct wh_inp_info { void * vbuf_out; u32 * vlen_out; u32 vbuf_size; };
  3710  
  3711  // a kv_inp_func; use this to retrieve the KV's data without unnecesary memory copying
  3712    static void
  3713  wh_inp_copy_value(struct kv * const curr, void * const priv)
  3714  {
  3715    if (curr) { // found
  3716      struct wh_inp_info * const info = (typeof(info))priv;
  3717      // copy the value data out
  3718      const u32 copy_size = info->vbuf_size < curr->vlen ? info->vbuf_size : curr->vlen;
  3719      memcpy(info->vbuf_out, kv_vptr_c(curr), copy_size);
  3720      // copy the vlen out
  3721      *info->vlen_out = curr->vlen;
  3722    }
  3723  }
  3724  
  3725  // returns a boolean value indicating whether the key is found.
  3726  // the value's data will be written to *vlen_out and vbuf_out if the key is found
  3727  // if vbuf_size < vlen, then only the first vbuf_size bytes is copied to the buffer
  3728  // a small vbuf_size can be used to reduce memcpy cost when only the first a few bytes are needed
  3729    bool
  3730  wh_get(struct wormref * const ref, const void * const kbuf, const u32 klen,
  3731      void * const vbuf_out, const u32 vbuf_size, u32 * const vlen_out)
  3732  {
  3733    struct kref kref;
  3734    kref_ref_hash32(&kref, kbuf, klen);
  3735    struct wh_inp_info info = {vbuf_out, vlen_out, vbuf_size};
  3736    // use the inplace read function to get the value if it exists
  3737    return wh_api->inpr(ref, &kref, wh_inp_copy_value, &info);
  3738  }
  3739  
  3740    bool
  3741  wh_inpr(struct wormref * const ref, const void * const kbuf, const u32 klen,
  3742      kv_inp_func uf, void * const priv)
  3743  {
  3744    struct kref kref;
  3745    kref_ref_hash32(&kref, kbuf, klen);
  3746    return wh_api->inpr(ref, &kref, uf, priv);
  3747  }
  3748  
  3749  // inplace update KV's value with a user-defined hook function
  3750  // the update should only modify the data in the value; It should not change the value size
  3751    bool
  3752  wh_inpw(struct wormref * const ref, const void * const kbuf, const u32 klen,
  3753      kv_inp_func uf, void * const priv)
  3754  {
  3755    struct kref kref;
  3756    kref_ref_hash32(&kref, kbuf, klen);
  3757    return wh_api->inpw(ref, &kref, uf, priv);
  3758  }
  3759  
  3760  // merge existing KV with updates with a user-defined hook function
  3761    bool
  3762  wh_merge(struct wormref * const ref, const void * const kbuf, const u32 klen,
  3763      kv_merge_func uf, void * const priv)
  3764  {
  3765    struct kref kref;
  3766    kref_ref_hash32(&kref, kbuf, klen);
  3767    return wh_api->merge(ref, &kref, uf, priv);
  3768  }
  3769  
  3770  // remove a range of KVs from start (inclusive) to end (exclusive); [start, end)
  3771    u64
  3772  wh_delr(struct wormref * const ref, const void * const kbuf_start, const u32 klen_start,
  3773      const void * const kbuf_end, const u32 klen_end)
  3774  {
  3775    struct kref kref_start, kref_end;
  3776    kref_ref_hash32(&kref_start, kbuf_start, klen_start);
  3777    kref_ref_hash32(&kref_end, kbuf_end, klen_end);
  3778    return wh_api->delr(ref, &kref_start, &kref_end);
  3779  }
  3780  
  3781    struct wormhole_iter *
  3782  wh_iter_create(struct wormref * const ref)
  3783  {
  3784    return wh_api->iter_create(ref);
  3785  }
  3786  
  3787    void
  3788  wh_iter_seek(struct wormhole_iter * const iter, const void * const kbuf, const u32 klen)
  3789  {
  3790    struct kref kref;
  3791    kref_ref_hash32(&kref, kbuf, klen);
  3792    wh_api->iter_seek(iter, &kref);
  3793  }
  3794  
  3795    bool
  3796  wh_iter_valid(struct wormhole_iter * const iter)
  3797  {
  3798    return wh_api->iter_valid(iter);
  3799  }
  3800  
  3801  // for wh_iter_peek()
  3802  // the out ptrs must be provided in pairs; use a pair of NULLs to ignore the key or value
  3803  struct wh_iter_inp_info { void * kbuf_out; void * vbuf_out; u32 kbuf_size; u32 vbuf_size; u32 * klen_out; u32 * vlen_out; };
  3804  
  3805  // a kv_inp_func; use this to retrieve the KV's data without unnecesary memory copying
  3806    static void
  3807  inp_copy_kv_cb(struct kv * const curr, void * const priv)
  3808  {
  3809    if (curr) { // found
  3810      struct wh_iter_inp_info * const info = (typeof(info))priv;
  3811  
  3812      // copy the key
  3813      if (info->kbuf_out) { // it assumes klen_out is also not NULL
  3814        // copy the key data out
  3815        const u32 clen = curr->klen < info->kbuf_size ? curr->klen : info->kbuf_size;
  3816        memcpy(info->kbuf_out, kv_kptr_c(curr), clen);
  3817        // copy the klen out
  3818        *info->klen_out = curr->klen;
  3819      }
  3820  
  3821      // copy the value
  3822      if (info->vbuf_out) { // it assumes vlen_out is also not NULL
  3823        // copy the value data out
  3824        const u32 clen = curr->vlen < info->vbuf_size ? curr->vlen : info->vbuf_size;
  3825        memcpy(info->vbuf_out, kv_vptr_c(curr), clen);
  3826        // copy the vlen out
  3827        *info->vlen_out = curr->vlen;
  3828      }
  3829    }
  3830  }
  3831  
  3832  // seek is similar to get
  3833    bool
  3834  wh_iter_peek(struct wormhole_iter * const iter,
  3835      void * const kbuf_out, const u32 kbuf_size, u32 * const klen_out,
  3836      void * const vbuf_out, const u32 vbuf_size, u32 * const vlen_out)
  3837  {
  3838    struct wh_iter_inp_info info = {kbuf_out, vbuf_out, kbuf_size, vbuf_size, klen_out, vlen_out};
  3839    return wh_api->iter_inp(iter, inp_copy_kv_cb, &info);
  3840  }
  3841  
  3842    void
  3843  wh_iter_skip1(struct wormhole_iter * const iter)
  3844  {
  3845    wh_api->iter_skip1(iter);
  3846  }
  3847  
  3848    void
  3849  wh_iter_skip(struct wormhole_iter * const iter, const u32 nr)
  3850  {
  3851    wh_api->iter_skip(iter, nr);
  3852  }
  3853  
  3854    bool
  3855  wh_iter_inp(struct wormhole_iter * const iter, kv_inp_func uf, void * const priv)
  3856  {
  3857    return wh_api->iter_inp(iter, uf, priv);
  3858  }
  3859  
  3860    void
  3861  wh_iter_park(struct wormhole_iter * const iter)
  3862  {
  3863    wh_api->iter_park(iter);
  3864  }
  3865  
  3866    void
  3867  wh_iter_destroy(struct wormhole_iter * const iter)
  3868  {
  3869    wh_api->iter_destroy(iter);
  3870  }
  3871  // }}} wh
  3872  
  3873  // vim:fdm=marker