github.com/pidato/unsafe@v0.1.4/memory/hash/wyhash.h (about)

     1  // This is free and unencumbered software released into the public domain under The Unlicense (http://unlicense.org/)
     2  // main repo: https://github.com/wangyi-fudan/wyhash
     3  // author: 王一 Wang Yi <godspeed_china@yeah.net>
     4  // contributors: Reini Urban, Dietrich Epp, Joshua Haberman, Tommy Ettinger, Daniel Lemire, Otmar Ertl, cocowalla, leo-yuriev, Diego Barrios Romero, paulie-g, dumblob, Yann Collet, ivte-ms, hyb, James Z.M. Gao, easyaspi314 (Devin), TheOneric
     5  
     6  /* quick example:
     7     string s="fjsakfdsjkf";
     8     uint64_t hash=wyhash(s.c_str(), s.size(), 0, _wyp);
     9  */
    10  
    11  #ifndef wyhash_final_version_3
    12  #define wyhash_final_version_3
    13  
    14  #ifndef WYHASH_CONDOM
    15  //protections that produce different results:
    16  //1: normal valid behavior
    17  //2: extra protection against entropy loss (probability=2^-63), aka. "blind multiplication"
    18  #define WYHASH_CONDOM 1
    19  #endif
    20  
    21  #ifndef WYHASH_32BIT_MUM
    22  //0: normal version, slow on 32 bit systems
    23  //1: faster on 32 bit systems but produces different results, incompatible with wy2u0k function
    24  #define WYHASH_32BIT_MUM 0
    25  #endif
    26  
    27  //includes
    28  #include <stdint.h>
    29  #include <string.h>
    30  #if defined(_MSC_VER) && defined(_M_X64)
    31    #include <intrin.h>
    32    #pragma intrinsic(_umul128)
    33  #endif
    34  
    35  //likely and unlikely macros
    36  #if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
    37    #define _likely_(x)  __builtin_expect(x,1)
    38    #define _unlikely_(x)  __builtin_expect(x,0)
    39  #else
    40    #define _likely_(x) (x)
    41    #define _unlikely_(x) (x)
    42  #endif
    43  
    44  //128bit multiply function
    45  static inline uint64_t _wyrot(uint64_t x) { return (x>>32)|(x<<32); }
    46  static inline void _wymum(uint64_t *A, uint64_t *B){
    47  #if(WYHASH_32BIT_MUM)
    48    uint64_t hh=(*A>>32)*(*B>>32), hl=(*A>>32)*(uint32_t)*B, lh=(uint32_t)*A*(*B>>32), ll=(uint64_t)(uint32_t)*A*(uint32_t)*B;
    49    #if(WYHASH_CONDOM>1)
    50    *A^=_wyrot(hl)^hh; *B^=_wyrot(lh)^ll;
    51    #else
    52    *A=_wyrot(hl)^hh; *B=_wyrot(lh)^ll;
    53    #endif
    54  #elif defined(__SIZEOF_INT128__)
    55    __uint128_t r=*A; r*=*B;
    56    #if(WYHASH_CONDOM>1)
    57    *A^=(uint64_t)r; *B^=(uint64_t)(r>>64);
    58    #else
    59    *A=(uint64_t)r; *B=(uint64_t)(r>>64);
    60    #endif
    61  #elif defined(_MSC_VER) && defined(_M_X64)
    62    #if(WYHASH_CONDOM>1)
    63    uint64_t  a,  b;
    64    a=_umul128(*A,*B,&b);
    65    *A^=a;  *B^=b;
    66    #else
    67    *A=_umul128(*A,*B,B);
    68    #endif
    69  #else
    70    uint64_t ha=*A>>32, hb=*B>>32, la=(uint32_t)*A, lb=(uint32_t)*B, hi, lo;
    71    uint64_t rh=ha*hb, rm0=ha*lb, rm1=hb*la, rl=la*lb, t=rl+(rm0<<32), c=t<rl;
    72    lo=t+(rm1<<32); c+=lo<t; hi=rh+(rm0>>32)+(rm1>>32)+c;
    73    #if(WYHASH_CONDOM>1)
    74    *A^=lo;  *B^=hi;
    75    #else
    76    *A=lo;  *B=hi;
    77    #endif
    78  #endif
    79  }
    80  
    81  //multiply and xor mix function, aka MUM
    82  static inline uint64_t _wymix(uint64_t A, uint64_t B){ _wymum(&A,&B); return A^B; }
    83  
    84  //endian macros
    85  #ifndef WYHASH_LITTLE_ENDIAN
    86    #if defined(_WIN32) || defined(__LITTLE_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
    87      #define WYHASH_LITTLE_ENDIAN 1
    88    #elif defined(__BIG_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
    89      #define WYHASH_LITTLE_ENDIAN 0
    90    #else
    91      #warning could not determine endianness! Falling back to little endian.
    92      #define WYHASH_LITTLE_ENDIAN 1
    93    #endif
    94  #endif
    95  
    96  //read functions
    97  #if (WYHASH_LITTLE_ENDIAN)
    98  static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return v;}
    99  static inline uint64_t _wyr4(const uint8_t *p) { uint32_t v; memcpy(&v, p, 4); return v;}
   100  #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
   101  static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return __builtin_bswap64(v);}
   102  static inline uint64_t _wyr4(const uint8_t *p) { uint32_t v; memcpy(&v, p, 4); return __builtin_bswap32(v);}
   103  #elif defined(_MSC_VER)
   104  static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return _byteswap_uint64(v);}
   105  static inline uint64_t _wyr4(const uint8_t *p) { uint32_t v; memcpy(&v, p, 4); return _byteswap_ulong(v);}
   106  #else
   107  static inline uint64_t _wyr8(const uint8_t *p) {
   108    uint64_t v; memcpy(&v, p, 8);
   109    return (((v >> 56) & 0xff)| ((v >> 40) & 0xff00)| ((v >> 24) & 0xff0000)| ((v >>  8) & 0xff000000)| ((v <<  8) & 0xff00000000)| ((v << 24) & 0xff0000000000)| ((v << 40) & 0xff000000000000)| ((v << 56) & 0xff00000000000000));
   110  }
   111  static inline uint64_t _wyr4(const uint8_t *p) {
   112    uint32_t v; memcpy(&v, p, 4);
   113    return (((v >> 24) & 0xff)| ((v >>  8) & 0xff00)| ((v <<  8) & 0xff0000)| ((v << 24) & 0xff000000));
   114  }
   115  #endif
   116  static inline uint64_t _wyr3(const uint8_t *p, size_t k) { return (((uint64_t)p[0])<<16)|(((uint64_t)p[k>>1])<<8)|p[k-1];}
   117  //wyhash main function
   118  static inline uint64_t wyhash(const void *key, size_t len, uint64_t seed, const uint64_t *secret){
   119    const uint8_t *p=(const uint8_t *)key; seed^=*secret;	uint64_t	a,	b;
   120    if(_likely_(len<=16)){
   121      if(_likely_(len>=4)){ a=(_wyr4(p)<<32)|_wyr4(p+((len>>3)<<2)); b=(_wyr4(p+len-4)<<32)|_wyr4(p+len-4-((len>>3)<<2)); }
   122      else if(_likely_(len>0)){ a=_wyr3(p,len); b=0;}
   123      else a=b=0;
   124    }
   125    else{
   126      size_t i=len;
   127      if(_unlikely_(i>48)){
   128        uint64_t see1=seed, see2=seed;
   129        do{
   130          seed=_wymix(_wyr8(p)^secret[1],_wyr8(p+8)^seed);
   131          see1=_wymix(_wyr8(p+16)^secret[2],_wyr8(p+24)^see1);
   132          see2=_wymix(_wyr8(p+32)^secret[3],_wyr8(p+40)^see2);
   133          p+=48; i-=48;
   134        }while(_likely_(i>48));
   135        seed^=see1^see2;
   136      }
   137      while(_unlikely_(i>16)){  seed=_wymix(_wyr8(p)^secret[1],_wyr8(p+8)^seed);  i-=16; p+=16;  }
   138      a=_wyr8(p+i-16);  b=_wyr8(p+i-8);
   139    }
   140    return _wymix(secret[1]^len,_wymix(a^secret[1],b^seed));
   141  }
   142  
   143  //the default secret parameters
   144  static const uint64_t _wyp[4] = {0xa0761d6478bd642full, 0xe7037ed1a0b428dbull, 0x8ebc6af09c88c6e3ull, 0x589965cc75374cc3ull};
   145  
   146  //a useful 64bit-64bit mix function to produce deterministic pseudo random numbers that can pass BigCrush and PractRand
   147  static inline uint64_t wyhash64(uint64_t A, uint64_t B){ A^=0xa0761d6478bd642full; B^=0xe7037ed1a0b428dbull; _wymum(&A,&B); return _wymix(A^0xa0761d6478bd642full,B^0xe7037ed1a0b428dbull);}
   148  
   149  //The wyrand PRNG that pass BigCrush and PractRand
   150  static inline uint64_t wyrand(uint64_t *seed){ *seed+=0xa0761d6478bd642full; return _wymix(*seed,*seed^0xe7037ed1a0b428dbull);}
   151  
   152  //convert any 64 bit pseudo random numbers to uniform distribution [0,1). It can be combined with wyrand, wyhash64 or wyhash.
   153  static inline double wy2u01(uint64_t r){ const double _wynorm=1.0/(1ull<<52); return (r>>12)*_wynorm;}
   154  
   155  //convert any 64 bit pseudo random numbers to APPROXIMATE Gaussian distribution. It can be combined with wyrand, wyhash64 or wyhash.
   156  static inline double wy2gau(uint64_t r){ const double _wynorm=1.0/(1ull<<20); return ((r&0x1fffff)+((r>>21)&0x1fffff)+((r>>42)&0x1fffff))*_wynorm-3.0;}
   157  
   158  #if(!WYHASH_32BIT_MUM)
   159  //fast range integer random number generation on [0,k) credit to Daniel Lemire. May not work when WYHASH_32BIT_MUM=1. It can be combined with wyrand, wyhash64 or wyhash.
   160  static inline uint64_t wy2u0k(uint64_t r, uint64_t k){ _wymum(&r,&k); return k; }
   161  #endif
   162  
   163  //make your own secret
   164  static inline void make_secret(uint64_t seed, uint64_t *secret){
   165    uint8_t c[] = {15, 23, 27, 29, 30, 39, 43, 45, 46, 51, 53, 54, 57, 58, 60, 71, 75, 77, 78, 83, 85, 86, 89, 90, 92, 99, 101, 102, 105, 106, 108, 113, 114, 116, 120, 135, 139, 141, 142, 147, 149, 150, 153, 154, 156, 163, 165, 166, 169, 170, 172, 177, 178, 180, 184, 195, 197, 198, 201, 202, 204, 209, 210, 212, 216, 225, 226, 228, 232, 240 };
   166    for(size_t i=0;i<4;i++){
   167      uint8_t ok;
   168      do{
   169        ok=1; secret[i]=0;
   170        for(size_t j=0;j<64;j+=8) secret[i]|=((uint64_t)c[wyrand(&seed)%sizeof(c)])<<j;
   171        if(secret[i]%2==0){ ok=0; continue; }
   172        for(size_t j=0;j<i;j++) {
   173  #if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
   174          if(__builtin_popcountll(secret[j]^secret[i])!=32){ ok=0; break; }
   175  #elif defined(_MSC_VER) && defined(_M_X64)
   176          if(_mm_popcnt_u64(secret[j]^secret[i])!=32){ ok=0; break; }
   177  #else
   178          //manual popcount
   179          uint64_t x = secret[j]^secret[i];
   180          x -= (x >> 1) & 0x5555555555555555;
   181          x = (x & 0x3333333333333333) + ((x >> 2) & 0x3333333333333333);
   182          x = (x + (x >> 4)) & 0x0f0f0f0f0f0f0f0f;
   183          x = (x * 0x0101010101010101) >> 56;
   184          if(x!=32){ ok=0; break; }
   185  #endif
   186        }
   187      }while(!ok);
   188    }
   189  }
   190  
   191  /*  This is world's fastest hash map: 2x faster than bytell_hash_map.
   192      It does not store the keys, but only the hash/signature of keys.
   193      First we use pos=hash1(key) to approximately locate the bucket.
   194      Then we search signature=hash2(key) from pos linearly.
   195      If we find a bucket with matched signature we report the bucket
   196      Or if we meet a bucket whose signature=0, we report a new position to insert
   197      The signature collision probability is very low as we usually searched N~10 buckets.
   198      By combining hash1 and hash2, we acturally have 128 bit anti-collision strength.
   199      hash1 and hash2 can be the same function, resulting lower collision resistance but faster.
   200      The signature is 64 bit, but can be modified to 32 bit if necessary for save space.
   201      The above two can be activated by define WYHASHMAP_WEAK_SMALL_FAST
   202      simple examples:
   203      const	size_t	size=213432;
   204      vector<wyhashmap_t>	idx(size);	//	allocate the index of fixed size. idx MUST be zeroed.
   205      vector<value_class>	value(size);	//	we only care about the index, user should maintain his own value vectors.
   206      string  key="dhskfhdsj"	//	the object to be inserted into idx
   207      size_t	pos=wyhashmap(idx.data(), idx.size(), key.c_str(), key.size(), 1);	//	get the position and insert
   208      if(pos<size)	value[pos]++;	//	we process the vallue
   209      else	cerr<<"map is full\n";
   210      pos=wyhashmap(idx.data(), idx.size(), key.c_str(), key.size(), 0);	// just lookup by setting insert=0
   211      if(pos<size)	value[pos]++;	//	we process the vallue
   212      else	cerr<<"the key does not exist\n";
   213  */
   214  /*
   215  #ifdef	WYHASHMAP_WEAK_SMALL_FAST	// for small hashmaps whose size < 2^24 and acceptable collision
   216  typedef	uint32_t	wyhashmap_t;
   217  #else
   218  typedef	uint64_t	wyhashmap_t;
   219  #endif
   220  static	inline	size_t	wyhashmap(wyhashmap_t	*idx,	size_t	idx_size,	const	void *key, size_t	key_size,	uint8_t	insert, uint64_t *secret){
   221  	size_t	i=1;	uint64_t	h2;	wyhashmap_t	sig;
   222  	do{	sig=h2=wyhash(key,key_size,i,secret);	i++;	}while(_unlikely_(!sig));
   223  #ifdef	WYHASHMAP_WEAK_SMALL_FAST
   224  	size_t	i0=wy2u0k(h2,idx_size);
   225  #else
   226  	size_t	i0=wy2u0k(wyhash(key,key_size,0,secret),idx_size);
   227  #endif
   228  	for(i=i0;	i<idx_size&&idx[i]&&idx[i]!=sig;	i++);
   229  	if(_unlikely_(i==idx_size)){
   230  		for(i=0;	i<i0&&idx[i]&&idx[i]!=sig;  i++);
   231  		if(i==i0)	return	idx_size;
   232  	}
   233  	if(!idx[i]){
   234  		if(insert)	idx[i]=sig;
   235  		else	return	idx_size;
   236  	}
   237  	return	i;
   238  }
   239  */
   240  #endif
   241  
   242  /* The Unlicense
   243  This is free and unencumbered software released into the public domain.
   244  Anyone is free to copy, modify, publish, use, compile, sell, or
   245  distribute this software, either in source code form or as a compiled
   246  binary, for any purpose, commercial or non-commercial, and by any
   247  means.
   248  In jurisdictions that recognize copyright laws, the author or authors
   249  of this software dedicate any and all copyright interest in the
   250  software to the public domain. We make this dedication for the benefit
   251  of the public at large and to the detriment of our heirs and
   252  successors. We intend this dedication to be an overt act of
   253  relinquishment in perpetuity of all present and future rights to this
   254  software under copyright law.
   255  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
   256  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
   257  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
   258  IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
   259  OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
   260  ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
   261  OTHER DEALINGS IN THE SOFTWARE.
   262  For more information, please refer to <http://unlicense.org/>
   263  */