github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/ruler/storage/wal/series.go (about) 1 // This directory was copied and adapted from https://github.com/grafana/agent/tree/main/pkg/metrics. 2 // We cannot vendor the agent in since the agent vendors loki in, which would cause a cyclic dependency. 3 // NOTE: many changes have been made to the original code for our use-case. 4 package wal 5 6 import ( 7 "sync" 8 9 "github.com/prometheus/prometheus/model/labels" 10 "github.com/prometheus/prometheus/tsdb/chunks" 11 ) 12 13 // TODO(dannyk): add label set interning 14 15 type memSeries struct { 16 sync.Mutex 17 18 ref chunks.HeadSeriesRef 19 lset labels.Labels 20 lastTs int64 21 22 // TODO(rfratto): this solution below isn't perfect, and there's still 23 // the possibility for a series to be deleted before it's 24 // completely gone from the WAL. Rather, we should have gc return 25 // a "should delete" map and be given a "deleted" map. 26 // If a series that is going to be marked for deletion is in the 27 // "deleted" map, then it should be deleted instead. 28 // 29 // The "deleted" map will be populated by the Truncate function. 30 // It will be cleared with every call to gc. 31 32 // willDelete marks a series as to be deleted on the next garbage 33 // collection. If it receives a write, willDelete is disabled. 34 willDelete bool 35 36 // Whether this series has samples waiting to be committed to the WAL 37 pendingCommit bool 38 } 39 40 func (s *memSeries) updateTs(ts int64) { 41 s.lastTs = ts 42 s.willDelete = false 43 s.pendingCommit = true 44 } 45 46 // seriesHashmap is a simple hashmap for memSeries by their label set. It is 47 // built on top of a regular hashmap and holds a slice of series to resolve 48 // hash collisions. Its methods require the hash to be submitted with it to 49 // avoid re-computations throughout the code. 50 // 51 // This code is copied from the Prometheus TSDB. 52 type seriesHashmap map[uint64][]*memSeries 53 54 func (m seriesHashmap) get(hash uint64, lset labels.Labels) *memSeries { 55 for _, s := range m[hash] { 56 if labels.Equal(s.lset, lset) { 57 return s 58 } 59 } 60 return nil 61 } 62 63 func (m seriesHashmap) set(hash uint64, s *memSeries) { 64 //intern.InternLabels(intern.Global, s.lset) 65 66 l := m[hash] 67 for i, prev := range l { 68 if labels.Equal(prev.lset, s.lset) { 69 l[i] = s 70 return 71 } 72 } 73 m[hash] = append(l, s) 74 } 75 76 func (m seriesHashmap) del(hash uint64, ref chunks.HeadSeriesRef) { 77 var rem []*memSeries 78 for _, s := range m[hash] { 79 if s.ref != ref { 80 rem = append(rem, s) 81 } 82 } 83 if len(rem) == 0 { 84 delete(m, hash) 85 } else { 86 m[hash] = rem 87 } 88 } 89 90 const ( 91 // defaultStripeSize is the default number of entries to allocate in the 92 // stripeSeries hash map. 93 defaultStripeSize = 1 << 14 94 ) 95 96 // stripeSeries locks modulo ranges of IDs and hashes to reduce lock contention. 97 // The locks are padded to not be on the same cache line. Filling the padded space 98 // with the maps was profiled to be slower – likely due to the additional pointer 99 // dereferences. 100 // 101 // This code is copied from the Prometheus TSDB. 102 type stripeSeries struct { 103 size int 104 series []map[chunks.HeadSeriesRef]*memSeries 105 hashes []seriesHashmap 106 locks []stripeLock 107 } 108 109 type stripeLock struct { 110 sync.RWMutex 111 // Padding to avoid multiple locks being on the same cache line. 112 _ [40]byte 113 } 114 115 func newStripeSeries() *stripeSeries { 116 stripeSize := defaultStripeSize 117 s := &stripeSeries{ 118 size: stripeSize, 119 series: make([]map[chunks.HeadSeriesRef]*memSeries, stripeSize), 120 hashes: make([]seriesHashmap, stripeSize), 121 locks: make([]stripeLock, stripeSize), 122 } 123 124 for i := range s.series { 125 s.series[i] = map[chunks.HeadSeriesRef]*memSeries{} 126 } 127 for i := range s.hashes { 128 s.hashes[i] = seriesHashmap{} 129 } 130 return s 131 } 132 133 // gc garbage collects old chunks that are strictly before mint and removes 134 // series entirely that have no chunks left. 135 func (s *stripeSeries) gc(mint int64) map[chunks.HeadSeriesRef]struct{} { 136 var ( 137 deleted = map[chunks.HeadSeriesRef]struct{}{} 138 ) 139 140 // Run through all series and find series that haven't been written to 141 // since mint. Mark those series as deleted and store their ID. 142 for i := 0; i < s.size; i++ { 143 s.locks[i].Lock() 144 145 for _, series := range s.series[i] { 146 series.Lock() 147 seriesHash := series.lset.Hash() 148 149 // If the series has received a write after mint, there's still 150 // data and it's not completely gone yet. 151 if series.lastTs >= mint || series.pendingCommit { 152 series.willDelete = false 153 series.Unlock() 154 continue 155 } 156 157 // The series hasn't received any data and *might* be gone, but 158 // we want to give it an opportunity to come back before marking 159 // it as deleted, so we wait one more GC cycle. 160 if !series.willDelete { 161 series.willDelete = true 162 series.Unlock() 163 continue 164 } 165 166 // The series is gone entirely. We'll need to delete the label 167 // hash (if one exists) so we'll obtain a lock for that too. 168 j := int(seriesHash) & (s.size - 1) 169 if i != j { 170 s.locks[j].Lock() 171 } 172 173 deleted[series.ref] = struct{}{} 174 delete(s.series[i], series.ref) 175 s.hashes[j].del(seriesHash, series.ref) 176 177 if i != j { 178 s.locks[j].Unlock() 179 } 180 181 series.Unlock() 182 } 183 184 s.locks[i].Unlock() 185 } 186 187 return deleted 188 } 189 190 func (s *stripeSeries) getByID(id chunks.HeadSeriesRef) *memSeries { 191 i := uint64(id) & uint64(s.size-1) 192 193 s.locks[i].RLock() 194 series := s.series[i][id] 195 s.locks[i].RUnlock() 196 197 return series 198 } 199 200 func (s *stripeSeries) getByHash(hash uint64, lset labels.Labels) *memSeries { 201 i := hash & uint64(s.size-1) 202 203 s.locks[i].RLock() 204 series := s.hashes[i].get(hash, lset) 205 s.locks[i].RUnlock() 206 207 return series 208 } 209 210 func (s *stripeSeries) set(hash uint64, series *memSeries) { 211 i := hash & uint64(s.size-1) 212 s.locks[i].Lock() 213 s.hashes[i].set(hash, series) 214 s.locks[i].Unlock() 215 216 i = uint64(series.ref) & uint64(s.size-1) 217 s.locks[i].Lock() 218 s.series[i][series.ref] = series 219 s.locks[i].Unlock() 220 } 221 222 func (s *stripeSeries) iterator() *stripeSeriesIterator { 223 return &stripeSeriesIterator{s} 224 } 225 226 // stripeSeriesIterator allows to iterate over series through a channel. 227 // The channel should always be completely consumed to not leak. 228 type stripeSeriesIterator struct { 229 s *stripeSeries 230 } 231 232 func (it *stripeSeriesIterator) Channel() <-chan *memSeries { 233 ret := make(chan *memSeries) 234 235 go func() { 236 for i := 0; i < it.s.size; i++ { 237 it.s.locks[i].RLock() 238 239 for _, series := range it.s.series[i] { 240 series.Lock() 241 242 j := int(series.lset.Hash()) & (it.s.size - 1) 243 if i != j { 244 it.s.locks[j].RLock() 245 } 246 247 ret <- series 248 249 if i != j { 250 it.s.locks[j].RUnlock() 251 } 252 series.Unlock() 253 } 254 255 it.s.locks[i].RUnlock() 256 } 257 258 close(ret) 259 }() 260 261 return ret 262 }