github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/storage/stores/tsdb/head_wal.go (about) 1 package tsdb 2 3 import ( 4 "time" 5 6 "github.com/go-kit/log" 7 "github.com/pkg/errors" 8 "github.com/prometheus/prometheus/tsdb/record" 9 "github.com/prometheus/prometheus/tsdb/wal" 10 11 "github.com/grafana/loki/pkg/storage/stores/tsdb/index" 12 "github.com/grafana/loki/pkg/util/encoding" 13 ) 14 15 type WAL interface { 16 Start(time.Time) error 17 Log(*WALRecord) error 18 Stop() error 19 } 20 21 // TODO(owen-d): There are probably some performance gains to be had by utilizing 22 // pools here, but in the interest of implementation time and given chunks aren't 23 // flushed often (generally ~5/s), this seems fine. 24 // This may also be applicable to varint encoding. 25 26 // 128KB 27 // The segment sizes are kept small for the TSDB Head here because 28 // we only store chunk references 29 const walSegmentSize = 128 << 10 30 31 type RecordType byte 32 33 // By prefixing records with versions, we can easily update our wal schema 34 const ( 35 // FirstWrite is a special record type written once 36 // at the beginning of every WAL. It records the system time 37 // when the WAL was created. This is used to determine when to rotate 38 // WALs and persists across restarts. 39 WalRecordSeries RecordType = iota 40 WalRecordChunks 41 ) 42 43 type WALRecord struct { 44 UserID string 45 Series record.RefSeries 46 Chks ChunkMetasRecord 47 } 48 49 type ChunkMetasRecord struct { 50 Chks index.ChunkMetas 51 Ref uint64 52 } 53 54 func (r *WALRecord) encodeSeries(b []byte) []byte { 55 buf := encoding.EncWith(b) 56 buf.PutByte(byte(WalRecordSeries)) 57 buf.PutUvarintStr(r.UserID) 58 59 var enc record.Encoder 60 // The 'encoded' already has the type header and userID here, hence re-using 61 // the remaining part of the slice (i.e. encoded[len(encoded):])) to encode the series. 62 encoded := buf.Get() 63 encoded = append(encoded, enc.Series([]record.RefSeries{r.Series}, encoded[len(encoded):])...) 64 65 return encoded 66 } 67 68 func (r *WALRecord) encodeChunks(b []byte) []byte { 69 buf := encoding.EncWith(b) 70 buf.PutByte(byte(WalRecordChunks)) 71 buf.PutUvarintStr(r.UserID) 72 buf.PutBE64(r.Chks.Ref) 73 buf.PutUvarint(len(r.Chks.Chks)) 74 75 for _, chk := range r.Chks.Chks { 76 buf.PutBE64(uint64(chk.MinTime)) 77 buf.PutBE64(uint64(chk.MaxTime)) 78 buf.PutBE32(chk.Checksum) 79 buf.PutBE32(chk.KB) 80 buf.PutBE32(chk.Entries) 81 } 82 83 return buf.Get() 84 } 85 86 func decodeChunks(b []byte, rec *WALRecord) error { 87 if len(b) == 0 { 88 return nil 89 } 90 91 dec := encoding.DecWith(b) 92 93 rec.Chks.Ref = dec.Be64() 94 if err := dec.Err(); err != nil { 95 return errors.Wrap(err, "decoding series ref") 96 } 97 98 ln := dec.Uvarint() 99 if err := dec.Err(); err != nil { 100 return errors.Wrap(err, "decoding number of chunks") 101 } 102 // allocate space for the required number of chunks 103 rec.Chks.Chks = make(index.ChunkMetas, 0, ln) 104 105 for len(dec.B) > 0 && dec.Err() == nil { 106 rec.Chks.Chks = append(rec.Chks.Chks, index.ChunkMeta{ 107 MinTime: dec.Be64int64(), 108 MaxTime: dec.Be64int64(), 109 Checksum: dec.Be32(), 110 KB: dec.Be32(), 111 Entries: dec.Be32(), 112 }) 113 } 114 115 if err := dec.Err(); err != nil { 116 return errors.Wrap(err, "decoding chunk metas") 117 } 118 119 return nil 120 } 121 122 func decodeWALRecord(b []byte, walRec *WALRecord) error { 123 var ( 124 userID string 125 dec record.Decoder 126 127 decbuf = encoding.DecWith(b) 128 t = RecordType(decbuf.Byte()) 129 ) 130 131 switch t { 132 case WalRecordSeries: 133 userID = decbuf.UvarintStr() 134 rSeries, err := dec.Series(decbuf.B, nil) 135 if err != nil { 136 return errors.Wrap(err, "decoding head series") 137 } 138 // unlike tsdb, we only add one series per record. 139 if len(rSeries) > 1 { 140 return errors.New("more than one series detected in tsdb head wal record") 141 } 142 if len(rSeries) == 1 { 143 walRec.Series = rSeries[0] 144 } 145 case WalRecordChunks: 146 userID = decbuf.UvarintStr() 147 if err := decodeChunks(decbuf.B, walRec); err != nil { 148 return err 149 } 150 default: 151 return errors.New("unknown record type") 152 } 153 154 if decbuf.Err() != nil { 155 return decbuf.Err() 156 } 157 158 walRec.UserID = userID 159 return nil 160 } 161 162 // the headWAL, unlike Head, is multi-tenant. This is just to avoid the need to maintain 163 // an open segment per tenant (potentially thousands of them) 164 type headWAL struct { 165 initialized time.Time 166 log log.Logger 167 wal *wal.WAL 168 } 169 170 func newHeadWAL(log log.Logger, dir string, t time.Time) (*headWAL, error) { 171 // NB: if we use a non-nil Prometheus Registerer, ensure 172 // that the underlying metrics won't conflict with existing WAL metrics in the ingester. 173 // Likely, this can be done by adding extra label(s) 174 wal, err := wal.NewSize(log, nil, dir, walSegmentSize, false) 175 if err != nil { 176 return nil, err 177 } 178 179 return &headWAL{ 180 initialized: t, 181 log: log, 182 wal: wal, 183 }, nil 184 } 185 186 func (w *headWAL) Stop() error { 187 return w.wal.Close() 188 } 189 190 func (w *headWAL) Log(record *WALRecord) error { 191 if record == nil { 192 return nil 193 } 194 195 var buf []byte 196 197 // Always write series before chunks 198 if len(record.Series.Labels) > 0 { 199 buf = record.encodeSeries(buf[:0]) 200 if err := w.wal.Log(buf); err != nil { 201 return err 202 } 203 } 204 205 if len(record.Chks.Chks) > 0 { 206 buf = record.encodeChunks(buf[:0]) 207 if err := w.wal.Log(buf); err != nil { 208 return err 209 } 210 } 211 212 return nil 213 }