github.com/m3db/m3@v1.5.0/src/dbnode/persist/fs/commitlog/writer.go (about) 1 // Copyright (c) 2016 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package commitlog 22 23 import ( 24 "bufio" 25 "encoding/binary" 26 "errors" 27 "io" 28 "os" 29 30 "github.com/m3db/bitset" 31 "github.com/m3db/m3/src/dbnode/digest" 32 "github.com/m3db/m3/src/dbnode/persist" 33 "github.com/m3db/m3/src/dbnode/persist/fs" 34 "github.com/m3db/m3/src/dbnode/persist/fs/msgpack" 35 "github.com/m3db/m3/src/dbnode/persist/schema" 36 "github.com/m3db/m3/src/dbnode/ts" 37 "github.com/m3db/m3/src/x/clock" 38 xos "github.com/m3db/m3/src/x/os" 39 xtime "github.com/m3db/m3/src/x/time" 40 ) 41 42 const ( 43 // The lengths to reserve for a chunk header: 44 // - size uint32 45 // - checksumSize uint32 46 // - checksumData uint32 47 chunkHeaderSizeLen = 4 48 chunkHeaderChecksumSizeLen = 4 49 chunkHeaderChecksumDataLen = 4 50 chunkHeaderLen = chunkHeaderSizeLen + 51 chunkHeaderChecksumSizeLen + 52 chunkHeaderChecksumDataLen 53 54 defaultBitSetLength = 65536 55 56 defaultEncoderBuffSize = 16384 57 ) 58 59 var ( 60 errCommitLogWriterAlreadyOpen = errors.New("commit log writer already open") 61 errTagEncoderDataNotAvailable = errors.New("tag iterator data not available") 62 63 endianness = binary.LittleEndian 64 ) 65 66 type commitLogWriter interface { 67 // Open opens the commit log for writing data 68 Open() (persist.CommitLogFile, error) 69 70 // Write will write an entry in the commit log for a given series 71 Write( 72 series ts.Series, 73 datapoint ts.Datapoint, 74 unit xtime.Unit, 75 annotation ts.Annotation, 76 ) error 77 78 // Flush will flush any data in the writers buffer to the chunkWriter, essentially forcing 79 // a new chunk to be created. Optionally forces the data to be FSync'd to disk. 80 Flush(sync bool) error 81 82 // setOnFlush will provide/override a callback that will be called after successful flush calls. 83 // Implementors MAY choose to not implement such a callback mechanism, however if 84 // such a mechanism is implemented they SHOULD properly implement this method. 85 setOnFlush(func(err error)) 86 87 // Close the reader 88 Close() error 89 } 90 91 type chunkWriter interface { 92 io.Writer 93 94 reset(f xos.File) 95 setOnFlush(func(err error)) 96 close() error 97 isOpen() bool 98 sync() error 99 } 100 101 type flushFn func(err error) 102 103 type writer struct { 104 filePathPrefix string 105 newFileMode os.FileMode 106 newDirectoryMode os.FileMode 107 nowFn clock.NowFn 108 chunkWriter chunkWriter 109 chunkReserveHeader []byte 110 buffer *bufio.Writer 111 sizeBuffer []byte 112 seen *bitset.BitSet 113 logEncoder *msgpack.Encoder 114 logEncoderBuff []byte 115 metadataEncoderBuff []byte 116 opts Options 117 } 118 119 func newCommitLogWriter( 120 flushFn flushFn, 121 opts Options, 122 ) commitLogWriter { 123 shouldFsync := opts.Strategy() == StrategyWriteWait 124 125 return &writer{ 126 filePathPrefix: opts.FilesystemOptions().FilePathPrefix(), 127 newFileMode: opts.FilesystemOptions().NewFileMode(), 128 newDirectoryMode: opts.FilesystemOptions().NewDirectoryMode(), 129 nowFn: opts.ClockOptions().NowFn(), 130 chunkWriter: newChunkWriter(flushFn, shouldFsync), 131 chunkReserveHeader: make([]byte, chunkHeaderLen), 132 buffer: bufio.NewWriterSize(nil, opts.FlushSize()), 133 sizeBuffer: make([]byte, binary.MaxVarintLen64), 134 seen: bitset.NewBitSet(defaultBitSetLength), 135 logEncoder: msgpack.NewEncoder(), 136 logEncoderBuff: make([]byte, 0, defaultEncoderBuffSize), 137 metadataEncoderBuff: make([]byte, 0, defaultEncoderBuffSize), 138 opts: opts, 139 } 140 } 141 142 func (w *writer) Open() (persist.CommitLogFile, error) { 143 if w.isOpen() { 144 return persist.CommitLogFile{}, errCommitLogWriterAlreadyOpen 145 } 146 147 // Reset buffers since they will grow 2x on demand so we want to make sure that 148 // one exceptionally large write does not cause them to remain oversized forever. 149 if cap(w.logEncoderBuff) != defaultEncoderBuffSize { 150 w.logEncoderBuff = make([]byte, 0, defaultEncoderBuffSize) 151 } 152 if cap(w.metadataEncoderBuff) != defaultEncoderBuffSize { 153 w.metadataEncoderBuff = make([]byte, 0, defaultEncoderBuffSize) 154 } 155 156 commitLogsDir := fs.CommitLogsDirPath(w.filePathPrefix) 157 if err := os.MkdirAll(commitLogsDir, w.newDirectoryMode); err != nil { 158 return persist.CommitLogFile{}, err 159 } 160 161 filePath, index, err := NextFile(w.opts) 162 if err != nil { 163 return persist.CommitLogFile{}, err 164 } 165 logInfo := schema.LogInfo{ 166 Index: int64(index), 167 } 168 w.logEncoder.Reset() 169 if err := w.logEncoder.EncodeLogInfo(logInfo); err != nil { 170 return persist.CommitLogFile{}, err 171 } 172 fd, err := fs.OpenWritable(filePath, w.newFileMode) 173 if err != nil { 174 return persist.CommitLogFile{}, err 175 } 176 177 w.chunkWriter.reset(fd) 178 w.buffer.Reset(w.chunkWriter) 179 if err := w.write(w.logEncoder.Bytes()); err != nil { 180 w.Close() 181 return persist.CommitLogFile{}, err 182 } 183 184 return persist.CommitLogFile{ 185 FilePath: filePath, 186 Index: int64(index), 187 }, nil 188 } 189 190 func (w *writer) isOpen() bool { 191 return w.chunkWriter.isOpen() 192 } 193 194 func (w *writer) Write( 195 series ts.Series, 196 datapoint ts.Datapoint, 197 unit xtime.Unit, 198 annotation ts.Annotation, 199 ) error { 200 var logEntry schema.LogEntry 201 logEntry.Create = w.nowFn().UnixNano() 202 logEntry.Index = series.UniqueIndex 203 204 seen := w.seen.Test(uint(series.UniqueIndex)) 205 if !seen { 206 // If "idx" likely hasn't been written to commit log 207 // yet we need to include series metadata 208 var metadata schema.LogMetadata 209 metadata.ID = series.ID.Bytes() 210 metadata.Namespace = series.Namespace.Bytes() 211 metadata.Shard = series.Shard 212 metadata.EncodedTags = series.EncodedTags 213 214 var err error 215 w.metadataEncoderBuff, err = msgpack.EncodeLogMetadataFast(w.metadataEncoderBuff[:0], metadata) 216 if err != nil { 217 return err 218 } 219 logEntry.Metadata = w.metadataEncoderBuff 220 } 221 222 logEntry.Timestamp = int64(datapoint.TimestampNanos) 223 logEntry.Value = datapoint.Value 224 logEntry.Unit = uint32(unit) 225 logEntry.Annotation = annotation 226 227 var err error 228 w.logEncoderBuff, err = msgpack.EncodeLogEntryFast(w.logEncoderBuff[:0], logEntry) 229 if err != nil { 230 return err 231 } 232 233 if err := w.write(w.logEncoderBuff); err != nil { 234 return err 235 } 236 237 if !seen { 238 // Record we have written this series and metadata to this commit log 239 w.seen.Set(uint(series.UniqueIndex)) 240 } 241 return nil 242 } 243 244 func (w *writer) Flush(sync bool) error { 245 err := w.buffer.Flush() 246 if err != nil { 247 return err 248 } 249 250 if !sync { 251 return nil 252 } 253 254 return w.sync() 255 } 256 257 func (w *writer) setOnFlush(f func(err error)) { 258 w.chunkWriter.setOnFlush(f) 259 } 260 261 func (w *writer) sync() error { 262 return w.chunkWriter.sync() 263 } 264 265 func (w *writer) Close() error { 266 if !w.isOpen() { 267 return nil 268 } 269 270 if err := w.Flush(true); err != nil { 271 return err 272 } 273 if err := w.chunkWriter.close(); err != nil { 274 return err 275 } 276 277 w.seen.ClearAll() 278 return nil 279 } 280 281 func (w *writer) write(data []byte) error { 282 dataLen := len(data) 283 sizeLen := binary.PutUvarint(w.sizeBuffer, uint64(dataLen)) 284 totalLen := sizeLen + dataLen 285 286 // Avoid writing across the checksum boundary if we can avoid it 287 if w.buffer.Buffered() > 0 && totalLen > w.buffer.Available() { 288 if err := w.buffer.Flush(); err != nil { 289 return err 290 } 291 return w.write(data) 292 } 293 294 // Write size and then data 295 if _, err := w.buffer.Write(w.sizeBuffer[:sizeLen]); err != nil { 296 return err 297 } 298 _, err := w.buffer.Write(data) 299 return err 300 } 301 302 type fsChunkWriter struct { 303 fd xos.File 304 flushFn flushFn 305 buff []byte 306 fsync bool 307 } 308 309 func newChunkWriter(flushFn flushFn, fsync bool) chunkWriter { 310 return &fsChunkWriter{ 311 flushFn: flushFn, 312 buff: make([]byte, chunkHeaderLen), 313 fsync: fsync, 314 } 315 } 316 317 func (w *fsChunkWriter) reset(f xos.File) { 318 w.fd = f 319 } 320 321 func (w *fsChunkWriter) setOnFlush(f func(err error)) { 322 w.flushFn = f 323 } 324 325 func (w *fsChunkWriter) close() error { 326 err := w.fd.Close() 327 w.fd = nil 328 return err 329 } 330 331 func (w *fsChunkWriter) isOpen() bool { 332 return w.fd != nil 333 } 334 335 func (w *fsChunkWriter) sync() error { 336 return w.fd.Sync() 337 } 338 339 // Writes a custom header in front of p to a file and returns number of bytes of p successfully written to the file. 340 // If the header or p is not fully written to the file, then this method returns number of bytes of p actually written 341 // to the file and an error explaining the reason of failure to write fully to the file. 342 func (w *fsChunkWriter) Write(p []byte) (int, error) { 343 size := len(p) 344 345 sizeStart, sizeEnd := 346 0, chunkHeaderSizeLen 347 checksumSizeStart, checksumSizeEnd := 348 sizeEnd, sizeEnd+chunkHeaderSizeLen 349 checksumDataStart, checksumDataEnd := 350 checksumSizeEnd, checksumSizeEnd+chunkHeaderChecksumDataLen 351 352 // Write size 353 endianness.PutUint32(w.buff[sizeStart:sizeEnd], uint32(size)) 354 355 // Calculate checksums 356 checksumSize := digest.Checksum(w.buff[sizeStart:sizeEnd]) 357 checksumData := digest.Checksum(p) 358 359 // Write checksums 360 digest. 361 Buffer(w.buff[checksumSizeStart:checksumSizeEnd]). 362 WriteDigest(checksumSize) 363 digest. 364 Buffer(w.buff[checksumDataStart:checksumDataEnd]). 365 WriteDigest(checksumData) 366 367 // Combine buffers to reduce to a single syscall 368 w.buff = append(w.buff[:chunkHeaderLen], p...) 369 370 // Write contents to file descriptor 371 n, err := w.fd.Write(w.buff) 372 // Count bytes successfully written from slice p 373 pBytesWritten := n - chunkHeaderLen 374 if pBytesWritten < 0 { 375 pBytesWritten = 0 376 } 377 378 if err != nil { 379 w.flushFn(err) 380 return pBytesWritten, err 381 } 382 383 // Fsync if required to 384 if w.fsync { 385 err = w.sync() 386 } 387 388 // Fire flush callback 389 w.flushFn(err) 390 return pBytesWritten, err 391 }