github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/core/meta/hrw.go (about) 1 // Package meta: cluster-level metadata 2 /* 3 * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved. 4 */ 5 package meta 6 7 import ( 8 "fmt" 9 "sync/atomic" 10 11 "github.com/NVIDIA/aistore/api/apc" 12 "github.com/NVIDIA/aistore/cmn" 13 "github.com/NVIDIA/aistore/cmn/cos" 14 "github.com/NVIDIA/aistore/cmn/xoshiro256" 15 "github.com/OneOfOne/xxhash" 16 ) 17 18 // A variant of consistent hash based on rendezvous algorithm by Thaler and Ravishankar, 19 // aka highest random weight (HRW) 20 // See also: fs/hrw.go 21 22 var robin atomic.Uint64 // round 23 24 func (smap *Smap) HrwName2T(uname string) (*Snode, error) { 25 digest := xxhash.Checksum64S(cos.UnsafeB(uname), cos.MLCG32) 26 return smap.HrwHash2T(digest) 27 } 28 29 func (smap *Smap) HrwMultiHome(uname string) (si *Snode, netName string, err error) { 30 digest := xxhash.Checksum64S(cos.UnsafeB(uname), cos.MLCG32) 31 si, err = smap.HrwHash2T(digest) 32 if err != nil { 33 return nil, cmn.NetPublic, err 34 } 35 l := len(si.PubExtra) 36 if l == 0 { 37 return si, cmn.NetPublic, nil 38 } 39 i := robin.Add(1) % uint64(l+1) 40 if i == 0 { 41 return si, cmn.NetPublic, nil 42 } 43 return si, si.PubExtra[i-1].URL, nil 44 } 45 46 func (smap *Smap) HrwHash2T(digest uint64) (si *Snode, err error) { 47 var maxH uint64 48 for _, tsi := range smap.Tmap { 49 if tsi.InMaintOrDecomm() { // always skipping targets 'in maintenance mode' 50 continue 51 } 52 cs := xoshiro256.Hash(tsi.Digest() ^ digest) 53 if cs >= maxH { 54 maxH = cs 55 si = tsi 56 } 57 } 58 if si == nil { 59 err = cmn.NewErrNoNodes(apc.Target, len(smap.Tmap)) 60 } 61 return si, err 62 } 63 64 // NOTE: including targets 'in maintenance mode', if any 65 func (smap *Smap) HrwHash2Tall(digest uint64) (si *Snode, err error) { 66 var maxH uint64 67 for _, tsi := range smap.Tmap { 68 cs := xoshiro256.Hash(tsi.Digest() ^ digest) 69 if cs >= maxH { 70 maxH = cs 71 si = tsi 72 } 73 } 74 if si == nil { 75 err = cmn.NewErrNoNodes(apc.Target, len(smap.Tmap)) 76 } 77 return si, err 78 } 79 80 func (smap *Smap) HrwProxy(idToSkip string) (pi *Snode, err error) { 81 var maxH uint64 82 for pid, psi := range smap.Pmap { 83 if pid == idToSkip { 84 continue 85 } 86 if psi.Flags.IsSet(SnodeNonElectable) { 87 continue 88 } 89 if psi.InMaintOrDecomm() { 90 continue 91 } 92 if d := psi.Digest(); d >= maxH { 93 maxH = d 94 pi = psi 95 } 96 } 97 if pi == nil { 98 err = cmn.NewErrNoNodes(apc.Proxy, len(smap.Pmap)) 99 } 100 return pi, err 101 } 102 103 func (smap *Smap) HrwIC(uuid string) (pi *Snode, err error) { 104 var ( 105 maxH uint64 106 digest = xxhash.Checksum64S(cos.UnsafeB(uuid), cos.MLCG32) 107 ) 108 for _, psi := range smap.Pmap { 109 if psi.InMaintOrDecomm() || !psi.IsIC() { 110 continue 111 } 112 cs := xoshiro256.Hash(psi.Digest() ^ digest) 113 if cs >= maxH { 114 maxH = cs 115 pi = psi 116 } 117 } 118 if pi == nil { 119 err = fmt.Errorf("IC is empty %s: %s", smap, smap.StrIC(nil)) 120 } 121 return pi, err 122 } 123 124 // Returns a target for a given task. E.g. usage: list objects in a cloud bucket 125 // (we want only one target to do it). 126 func (smap *Smap) HrwTargetTask(uuid string) (si *Snode, err error) { 127 var ( 128 maxH uint64 129 digest = xxhash.Checksum64S(cos.UnsafeB(uuid), cos.MLCG32) 130 ) 131 for _, tsi := range smap.Tmap { 132 if tsi.InMaintOrDecomm() { 133 continue 134 } 135 // Assumes that sinfo.idDigest is initialized 136 cs := xoshiro256.Hash(tsi.Digest() ^ digest) 137 if cs >= maxH { 138 maxH = cs 139 si = tsi 140 } 141 } 142 if si == nil { 143 err = cmn.NewErrNoNodes(apc.Target, len(smap.Tmap)) 144 } 145 return si, err 146 } 147 148 ///////////// 149 // hrwList // 150 ///////////// 151 152 type hrwList struct { 153 hs []uint64 154 sis Nodes 155 n int 156 } 157 158 // Sorts all targets in a cluster by their respective HRW (weights) in a descending order; 159 // returns resulting subset (aka slice) that has the requested length = count. 160 // Returns error if the cluster does not have enough targets. 161 // If count == length of Smap.Tmap, the function returns as many targets as possible. 162 163 func (smap *Smap) HrwTargetList(uname string, count int) (sis Nodes, err error) { 164 const fmterr = "%v: required %d, available %d, %s" 165 cnt := smap.CountTargets() 166 if cnt < count { 167 err = fmt.Errorf(fmterr, cmn.ErrNotEnoughTargets, count, cnt, smap) 168 return 169 } 170 digest := xxhash.Checksum64S(cos.UnsafeB(uname), cos.MLCG32) 171 hlist := newHrwList(count) 172 173 for _, tsi := range smap.Tmap { 174 cs := xoshiro256.Hash(tsi.Digest() ^ digest) 175 if tsi.InMaintOrDecomm() { 176 continue 177 } 178 hlist.add(cs, tsi) 179 } 180 sis = hlist.get() 181 if count != cnt && len(sis) < count { 182 err = fmt.Errorf(fmterr, cmn.ErrNotEnoughTargets, count, len(sis), smap) 183 return nil, err 184 } 185 return sis, nil 186 } 187 188 func newHrwList(count int) *hrwList { 189 return &hrwList{hs: make([]uint64, 0, count), sis: make(Nodes, 0, count), n: count} 190 } 191 192 func (hl *hrwList) get() Nodes { return hl.sis } 193 194 // Adds Snode with `weight`. The result is sorted on the fly with insertion sort 195 // and it makes sure that the length of resulting list never exceeds `count` 196 func (hl *hrwList) add(weight uint64, sinfo *Snode) { 197 l := len(hl.sis) 198 if l == hl.n && weight <= hl.hs[l-1] { 199 return 200 } 201 if l == hl.n { 202 hl.hs[l-1] = weight 203 hl.sis[l-1] = sinfo 204 } else { 205 hl.hs = append(hl.hs, weight) 206 hl.sis = append(hl.sis, sinfo) 207 l++ 208 } 209 idx := l - 1 210 for idx > 0 && hl.hs[idx-1] < hl.hs[idx] { 211 hl.hs[idx], hl.hs[idx-1] = hl.hs[idx-1], hl.hs[idx] 212 hl.sis[idx], hl.sis[idx-1] = hl.sis[idx-1], hl.sis[idx] 213 idx-- 214 } 215 }