github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/lsmkv/commitlogger.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package lsmkv 13 14 import ( 15 "bufio" 16 "bytes" 17 "encoding/binary" 18 "fmt" 19 "os" 20 "sync/atomic" 21 22 "github.com/weaviate/weaviate/adapters/repos/db/lsmkv/rwhasher" 23 "github.com/weaviate/weaviate/adapters/repos/db/roaringset" 24 ) 25 26 type commitLogger struct { 27 file *os.File 28 writer *bufio.Writer 29 n atomic.Int64 30 path string 31 32 checksumWriter rwhasher.WriterHasher 33 34 bufNode *bytes.Buffer 35 36 // e.g. when recovering from an existing log, we do not want to write into a 37 // new log again 38 paused bool 39 } 40 41 // commit log entry data format 42 // --------------------------- 43 // | version == 0 (1byte) | 44 // | record (dynamic length) | 45 // --------------------------- 46 47 // ------------------------------------------------------ 48 // | version == 1 (1byte) | 49 // | type (1byte) | 50 // | node length (4bytes) | 51 // | node (dynamic length) | 52 // | checksum (crc32 4bytes non-checksum fields so far) | 53 // ------------------------------------------------------ 54 55 const CurrentVersion uint8 = 1 56 57 type CommitType uint8 58 59 const ( 60 CommitTypeReplace CommitType = iota // replace strategy 61 62 // collection strategy - this can handle all cases as updates and deletes are 63 // only appends in a collection strategy 64 CommitTypeCollection 65 CommitTypeRoaringSet 66 ) 67 68 func (ct CommitType) String() string { 69 switch ct { 70 case CommitTypeReplace: 71 return "replace" 72 case CommitTypeCollection: 73 return "collection" 74 case CommitTypeRoaringSet: 75 return "roaringset" 76 default: 77 return "unknown" 78 } 79 } 80 81 func (ct CommitType) Is(checkedCommitType CommitType) bool { 82 return ct == checkedCommitType 83 } 84 85 func newCommitLogger(path string) (*commitLogger, error) { 86 out := &commitLogger{ 87 path: path + ".wal", 88 } 89 90 f, err := os.OpenFile(out.path, os.O_CREATE|os.O_RDWR, 0o666) 91 if err != nil { 92 return nil, err 93 } 94 95 out.file = f 96 97 out.writer = bufio.NewWriter(f) 98 out.checksumWriter = rwhasher.NewCRC32Writer(out.writer) 99 100 out.bufNode = bytes.NewBuffer(nil) 101 102 return out, nil 103 } 104 105 func (cl *commitLogger) writeEntry(commitType CommitType, nodeBytes []byte) error { 106 // TODO: do we need a timestamp? if so, does it need to be a vector clock? 107 108 err := binary.Write(cl.checksumWriter, binary.LittleEndian, commitType) 109 if err != nil { 110 return err 111 } 112 113 err = binary.Write(cl.checksumWriter, binary.LittleEndian, CurrentVersion) 114 if err != nil { 115 return err 116 } 117 118 err = binary.Write(cl.checksumWriter, binary.LittleEndian, uint32(len(nodeBytes))) 119 if err != nil { 120 return err 121 } 122 123 // write node 124 _, err = cl.checksumWriter.Write(nodeBytes) 125 if err != nil { 126 return err 127 } 128 129 // write record checksum directly on the writer 130 checksumSize, err := cl.writer.Write(cl.checksumWriter.Hash()) 131 if err != nil { 132 return err 133 } 134 135 cl.n.Add(int64(1 + 1 + 4 + len(nodeBytes) + checksumSize)) 136 137 return nil 138 } 139 140 func (cl *commitLogger) put(node segmentReplaceNode) error { 141 if cl.paused { 142 return nil 143 } 144 145 cl.bufNode.Reset() 146 147 ki, err := node.KeyIndexAndWriteTo(cl.bufNode) 148 if err != nil { 149 return err 150 } 151 if len(cl.bufNode.Bytes()) != ki.ValueEnd-ki.ValueStart { 152 return fmt.Errorf("unexpected error, node size mismatch") 153 } 154 155 return cl.writeEntry(CommitTypeReplace, cl.bufNode.Bytes()) 156 } 157 158 func (cl *commitLogger) append(node segmentCollectionNode) error { 159 if cl.paused { 160 return nil 161 } 162 163 cl.bufNode.Reset() 164 165 ki, err := node.KeyIndexAndWriteTo(cl.bufNode) 166 if err != nil { 167 return err 168 } 169 if len(cl.bufNode.Bytes()) != ki.ValueEnd-ki.ValueStart { 170 return fmt.Errorf("unexpected error, node size mismatch") 171 } 172 173 return cl.writeEntry(CommitTypeCollection, cl.bufNode.Bytes()) 174 } 175 176 func (cl *commitLogger) add(node *roaringset.SegmentNode) error { 177 if cl.paused { 178 return nil 179 } 180 181 cl.bufNode.Reset() 182 183 ki, err := node.KeyIndexAndWriteTo(cl.bufNode, 0) 184 if err != nil { 185 return err 186 } 187 if len(cl.bufNode.Bytes()) != ki.ValueEnd-ki.ValueStart { 188 return fmt.Errorf("unexpected error, node size mismatch") 189 } 190 191 return cl.writeEntry(CommitTypeRoaringSet, cl.bufNode.Bytes()) 192 } 193 194 // Size returns the amount of data that has been written since the commit 195 // logger was initialized. After a flush a new logger is initialized which 196 // automatically resets the logger. 197 func (cl *commitLogger) Size() int64 { 198 return cl.n.Load() 199 } 200 201 func (cl *commitLogger) close() error { 202 if !cl.paused { 203 if err := cl.writer.Flush(); err != nil { 204 return err 205 } 206 207 if err := cl.file.Sync(); err != nil { 208 return err 209 } 210 } 211 212 return cl.file.Close() 213 } 214 215 func (cl *commitLogger) pause() { 216 cl.paused = true 217 } 218 219 func (cl *commitLogger) unpause() { 220 cl.paused = false 221 } 222 223 func (cl *commitLogger) delete() error { 224 return os.Remove(cl.path) 225 } 226 227 func (cl *commitLogger) flushBuffers() error { 228 err := cl.writer.Flush() 229 if err != nil { 230 return fmt.Errorf("flushing WAL %q: %w", cl.path, err) 231 } 232 233 return nil 234 }