github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/lsmkv/commitlogger.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package lsmkv
    13  
    14  import (
    15  	"bufio"
    16  	"bytes"
    17  	"encoding/binary"
    18  	"fmt"
    19  	"os"
    20  	"sync/atomic"
    21  
    22  	"github.com/weaviate/weaviate/adapters/repos/db/lsmkv/rwhasher"
    23  	"github.com/weaviate/weaviate/adapters/repos/db/roaringset"
    24  )
    25  
    26  type commitLogger struct {
    27  	file   *os.File
    28  	writer *bufio.Writer
    29  	n      atomic.Int64
    30  	path   string
    31  
    32  	checksumWriter rwhasher.WriterHasher
    33  
    34  	bufNode *bytes.Buffer
    35  
    36  	// e.g. when recovering from an existing log, we do not want to write into a
    37  	// new log again
    38  	paused bool
    39  }
    40  
    41  // commit log entry data format
    42  // ---------------------------
    43  // | version == 0 (1byte)    |
    44  // | record (dynamic length) |
    45  // ---------------------------
    46  
    47  // ------------------------------------------------------
    48  // | version == 1 (1byte)                               |
    49  // | type (1byte)                                       |
    50  // | node length (4bytes)                               |
    51  // | node (dynamic length)                              |
    52  // | checksum (crc32 4bytes non-checksum fields so far) |
    53  // ------------------------------------------------------
    54  
    55  const CurrentVersion uint8 = 1
    56  
    57  type CommitType uint8
    58  
    59  const (
    60  	CommitTypeReplace CommitType = iota // replace strategy
    61  
    62  	// collection strategy - this can handle all cases as updates and deletes are
    63  	// only appends in a collection strategy
    64  	CommitTypeCollection
    65  	CommitTypeRoaringSet
    66  )
    67  
    68  func (ct CommitType) String() string {
    69  	switch ct {
    70  	case CommitTypeReplace:
    71  		return "replace"
    72  	case CommitTypeCollection:
    73  		return "collection"
    74  	case CommitTypeRoaringSet:
    75  		return "roaringset"
    76  	default:
    77  		return "unknown"
    78  	}
    79  }
    80  
    81  func (ct CommitType) Is(checkedCommitType CommitType) bool {
    82  	return ct == checkedCommitType
    83  }
    84  
    85  func newCommitLogger(path string) (*commitLogger, error) {
    86  	out := &commitLogger{
    87  		path: path + ".wal",
    88  	}
    89  
    90  	f, err := os.OpenFile(out.path, os.O_CREATE|os.O_RDWR, 0o666)
    91  	if err != nil {
    92  		return nil, err
    93  	}
    94  
    95  	out.file = f
    96  
    97  	out.writer = bufio.NewWriter(f)
    98  	out.checksumWriter = rwhasher.NewCRC32Writer(out.writer)
    99  
   100  	out.bufNode = bytes.NewBuffer(nil)
   101  
   102  	return out, nil
   103  }
   104  
   105  func (cl *commitLogger) writeEntry(commitType CommitType, nodeBytes []byte) error {
   106  	// TODO: do we need a timestamp? if so, does it need to be a vector clock?
   107  
   108  	err := binary.Write(cl.checksumWriter, binary.LittleEndian, commitType)
   109  	if err != nil {
   110  		return err
   111  	}
   112  
   113  	err = binary.Write(cl.checksumWriter, binary.LittleEndian, CurrentVersion)
   114  	if err != nil {
   115  		return err
   116  	}
   117  
   118  	err = binary.Write(cl.checksumWriter, binary.LittleEndian, uint32(len(nodeBytes)))
   119  	if err != nil {
   120  		return err
   121  	}
   122  
   123  	// write node
   124  	_, err = cl.checksumWriter.Write(nodeBytes)
   125  	if err != nil {
   126  		return err
   127  	}
   128  
   129  	// write record checksum directly on the writer
   130  	checksumSize, err := cl.writer.Write(cl.checksumWriter.Hash())
   131  	if err != nil {
   132  		return err
   133  	}
   134  
   135  	cl.n.Add(int64(1 + 1 + 4 + len(nodeBytes) + checksumSize))
   136  
   137  	return nil
   138  }
   139  
   140  func (cl *commitLogger) put(node segmentReplaceNode) error {
   141  	if cl.paused {
   142  		return nil
   143  	}
   144  
   145  	cl.bufNode.Reset()
   146  
   147  	ki, err := node.KeyIndexAndWriteTo(cl.bufNode)
   148  	if err != nil {
   149  		return err
   150  	}
   151  	if len(cl.bufNode.Bytes()) != ki.ValueEnd-ki.ValueStart {
   152  		return fmt.Errorf("unexpected error, node size mismatch")
   153  	}
   154  
   155  	return cl.writeEntry(CommitTypeReplace, cl.bufNode.Bytes())
   156  }
   157  
   158  func (cl *commitLogger) append(node segmentCollectionNode) error {
   159  	if cl.paused {
   160  		return nil
   161  	}
   162  
   163  	cl.bufNode.Reset()
   164  
   165  	ki, err := node.KeyIndexAndWriteTo(cl.bufNode)
   166  	if err != nil {
   167  		return err
   168  	}
   169  	if len(cl.bufNode.Bytes()) != ki.ValueEnd-ki.ValueStart {
   170  		return fmt.Errorf("unexpected error, node size mismatch")
   171  	}
   172  
   173  	return cl.writeEntry(CommitTypeCollection, cl.bufNode.Bytes())
   174  }
   175  
   176  func (cl *commitLogger) add(node *roaringset.SegmentNode) error {
   177  	if cl.paused {
   178  		return nil
   179  	}
   180  
   181  	cl.bufNode.Reset()
   182  
   183  	ki, err := node.KeyIndexAndWriteTo(cl.bufNode, 0)
   184  	if err != nil {
   185  		return err
   186  	}
   187  	if len(cl.bufNode.Bytes()) != ki.ValueEnd-ki.ValueStart {
   188  		return fmt.Errorf("unexpected error, node size mismatch")
   189  	}
   190  
   191  	return cl.writeEntry(CommitTypeRoaringSet, cl.bufNode.Bytes())
   192  }
   193  
   194  // Size returns the amount of data that has been written since the commit
   195  // logger was initialized. After a flush a new logger is initialized which
   196  // automatically resets the logger.
   197  func (cl *commitLogger) Size() int64 {
   198  	return cl.n.Load()
   199  }
   200  
   201  func (cl *commitLogger) close() error {
   202  	if !cl.paused {
   203  		if err := cl.writer.Flush(); err != nil {
   204  			return err
   205  		}
   206  
   207  		if err := cl.file.Sync(); err != nil {
   208  			return err
   209  		}
   210  	}
   211  
   212  	return cl.file.Close()
   213  }
   214  
   215  func (cl *commitLogger) pause() {
   216  	cl.paused = true
   217  }
   218  
   219  func (cl *commitLogger) unpause() {
   220  	cl.paused = false
   221  }
   222  
   223  func (cl *commitLogger) delete() error {
   224  	return os.Remove(cl.path)
   225  }
   226  
   227  func (cl *commitLogger) flushBuffers() error {
   228  	err := cl.writer.Flush()
   229  	if err != nil {
   230  		return fmt.Errorf("flushing WAL %q: %w", cl.path, err)
   231  	}
   232  
   233  	return nil
   234  }