github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/storage/stores/tsdb/head_wal.go (about)

     1  package tsdb
     2  
     3  import (
     4  	"time"
     5  
     6  	"github.com/go-kit/log"
     7  	"github.com/pkg/errors"
     8  	"github.com/prometheus/prometheus/tsdb/record"
     9  	"github.com/prometheus/prometheus/tsdb/wal"
    10  
    11  	"github.com/grafana/loki/pkg/storage/stores/tsdb/index"
    12  	"github.com/grafana/loki/pkg/util/encoding"
    13  )
    14  
    15  type WAL interface {
    16  	Start(time.Time) error
    17  	Log(*WALRecord) error
    18  	Stop() error
    19  }
    20  
    21  // TODO(owen-d): There are probably some performance gains to be had by utilizing
    22  // pools here, but in the interest of implementation time and given chunks aren't
    23  // flushed often (generally ~5/s), this seems fine.
    24  // This may also be applicable to varint encoding.
    25  
    26  // 128KB
    27  // The segment sizes are kept small for the TSDB Head here because
    28  // we only store chunk references
    29  const walSegmentSize = 128 << 10
    30  
    31  type RecordType byte
    32  
    33  // By prefixing records with versions, we can easily update our wal schema
    34  const (
    35  	// FirstWrite is a special record type written once
    36  	// at the beginning of every WAL. It records the system time
    37  	// when the WAL was created. This is used to determine when to rotate
    38  	// WALs and persists across restarts.
    39  	WalRecordSeries RecordType = iota
    40  	WalRecordChunks
    41  )
    42  
    43  type WALRecord struct {
    44  	UserID string
    45  	Series record.RefSeries
    46  	Chks   ChunkMetasRecord
    47  }
    48  
    49  type ChunkMetasRecord struct {
    50  	Chks index.ChunkMetas
    51  	Ref  uint64
    52  }
    53  
    54  func (r *WALRecord) encodeSeries(b []byte) []byte {
    55  	buf := encoding.EncWith(b)
    56  	buf.PutByte(byte(WalRecordSeries))
    57  	buf.PutUvarintStr(r.UserID)
    58  
    59  	var enc record.Encoder
    60  	// The 'encoded' already has the type header and userID here, hence re-using
    61  	// the remaining part of the slice (i.e. encoded[len(encoded):])) to encode the series.
    62  	encoded := buf.Get()
    63  	encoded = append(encoded, enc.Series([]record.RefSeries{r.Series}, encoded[len(encoded):])...)
    64  
    65  	return encoded
    66  }
    67  
    68  func (r *WALRecord) encodeChunks(b []byte) []byte {
    69  	buf := encoding.EncWith(b)
    70  	buf.PutByte(byte(WalRecordChunks))
    71  	buf.PutUvarintStr(r.UserID)
    72  	buf.PutBE64(r.Chks.Ref)
    73  	buf.PutUvarint(len(r.Chks.Chks))
    74  
    75  	for _, chk := range r.Chks.Chks {
    76  		buf.PutBE64(uint64(chk.MinTime))
    77  		buf.PutBE64(uint64(chk.MaxTime))
    78  		buf.PutBE32(chk.Checksum)
    79  		buf.PutBE32(chk.KB)
    80  		buf.PutBE32(chk.Entries)
    81  	}
    82  
    83  	return buf.Get()
    84  }
    85  
    86  func decodeChunks(b []byte, rec *WALRecord) error {
    87  	if len(b) == 0 {
    88  		return nil
    89  	}
    90  
    91  	dec := encoding.DecWith(b)
    92  
    93  	rec.Chks.Ref = dec.Be64()
    94  	if err := dec.Err(); err != nil {
    95  		return errors.Wrap(err, "decoding series ref")
    96  	}
    97  
    98  	ln := dec.Uvarint()
    99  	if err := dec.Err(); err != nil {
   100  		return errors.Wrap(err, "decoding number of chunks")
   101  	}
   102  	// allocate space for the required number of chunks
   103  	rec.Chks.Chks = make(index.ChunkMetas, 0, ln)
   104  
   105  	for len(dec.B) > 0 && dec.Err() == nil {
   106  		rec.Chks.Chks = append(rec.Chks.Chks, index.ChunkMeta{
   107  			MinTime:  dec.Be64int64(),
   108  			MaxTime:  dec.Be64int64(),
   109  			Checksum: dec.Be32(),
   110  			KB:       dec.Be32(),
   111  			Entries:  dec.Be32(),
   112  		})
   113  	}
   114  
   115  	if err := dec.Err(); err != nil {
   116  		return errors.Wrap(err, "decoding chunk metas")
   117  	}
   118  
   119  	return nil
   120  }
   121  
   122  func decodeWALRecord(b []byte, walRec *WALRecord) error {
   123  	var (
   124  		userID string
   125  		dec    record.Decoder
   126  
   127  		decbuf = encoding.DecWith(b)
   128  		t      = RecordType(decbuf.Byte())
   129  	)
   130  
   131  	switch t {
   132  	case WalRecordSeries:
   133  		userID = decbuf.UvarintStr()
   134  		rSeries, err := dec.Series(decbuf.B, nil)
   135  		if err != nil {
   136  			return errors.Wrap(err, "decoding head series")
   137  		}
   138  		// unlike tsdb, we only add one series per record.
   139  		if len(rSeries) > 1 {
   140  			return errors.New("more than one series detected in tsdb head wal record")
   141  		}
   142  		if len(rSeries) == 1 {
   143  			walRec.Series = rSeries[0]
   144  		}
   145  	case WalRecordChunks:
   146  		userID = decbuf.UvarintStr()
   147  		if err := decodeChunks(decbuf.B, walRec); err != nil {
   148  			return err
   149  		}
   150  	default:
   151  		return errors.New("unknown record type")
   152  	}
   153  
   154  	if decbuf.Err() != nil {
   155  		return decbuf.Err()
   156  	}
   157  
   158  	walRec.UserID = userID
   159  	return nil
   160  }
   161  
   162  // the headWAL, unlike Head, is multi-tenant. This is just to avoid the need to maintain
   163  // an open segment per tenant (potentially thousands of them)
   164  type headWAL struct {
   165  	initialized time.Time
   166  	log         log.Logger
   167  	wal         *wal.WAL
   168  }
   169  
   170  func newHeadWAL(log log.Logger, dir string, t time.Time) (*headWAL, error) {
   171  	// NB: if we use a non-nil Prometheus Registerer, ensure
   172  	// that the underlying metrics won't conflict with existing WAL metrics in the ingester.
   173  	// Likely, this can be done by adding extra label(s)
   174  	wal, err := wal.NewSize(log, nil, dir, walSegmentSize, false)
   175  	if err != nil {
   176  		return nil, err
   177  	}
   178  
   179  	return &headWAL{
   180  		initialized: t,
   181  		log:         log,
   182  		wal:         wal,
   183  	}, nil
   184  }
   185  
   186  func (w *headWAL) Stop() error {
   187  	return w.wal.Close()
   188  }
   189  
   190  func (w *headWAL) Log(record *WALRecord) error {
   191  	if record == nil {
   192  		return nil
   193  	}
   194  
   195  	var buf []byte
   196  
   197  	// Always write series before chunks
   198  	if len(record.Series.Labels) > 0 {
   199  		buf = record.encodeSeries(buf[:0])
   200  		if err := w.wal.Log(buf); err != nil {
   201  			return err
   202  		}
   203  	}
   204  
   205  	if len(record.Chks.Chks) > 0 {
   206  		buf = record.encodeChunks(buf[:0])
   207  		if err := w.wal.Log(buf); err != nil {
   208  			return err
   209  		}
   210  	}
   211  
   212  	return nil
   213  }