github.com/pingcap/ticdc@v0.0.0-20220526033649-485a10ef2652/cdc/puller/sorter/file_backend.go (about) 1 // Copyright 2021 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package sorter 15 16 import ( 17 "bufio" 18 "encoding/binary" 19 "io" 20 "os" 21 "sync/atomic" 22 23 "github.com/pingcap/errors" 24 "github.com/pingcap/failpoint" 25 "github.com/pingcap/log" 26 "github.com/pingcap/ticdc/cdc/model" 27 cerrors "github.com/pingcap/ticdc/pkg/errors" 28 "go.uber.org/zap" 29 ) 30 31 const ( 32 fileBufferSize = 4 * 1024 // 4KB 33 fileMagic = 0x12345678 34 numFileEntriesOffset = 4 35 blockMagic = 0xbeefbeef 36 ) 37 38 var openFDCount int64 39 40 type fileBackEnd struct { 41 fileName string 42 serde serializerDeserializer 43 borrowed int32 44 size int64 45 } 46 47 func newFileBackEnd(fileName string, serde serializerDeserializer) (*fileBackEnd, error) { 48 f, err := os.Create(fileName) 49 if err != nil { 50 return nil, errors.Trace(wrapIOError(err)) 51 } 52 53 err = f.Close() 54 if err != nil { 55 return nil, errors.Trace(wrapIOError(err)) 56 } 57 58 log.Debug("new FileSorterBackEnd created", zap.String("filename", fileName)) 59 return &fileBackEnd{ 60 fileName: fileName, 61 serde: serde, 62 borrowed: 0, 63 }, nil 64 } 65 66 func (f *fileBackEnd) reader() (backEndReader, error) { 67 fd, err := os.OpenFile(f.fileName, os.O_RDWR, 0o644) 68 if err != nil { 69 return nil, errors.Trace(wrapIOError(err)) 70 } 71 72 atomic.AddInt64(&openFDCount, 1) 73 74 var totalSize int64 75 failpoint.Inject("sorterDebug", func() { 76 info, err := fd.Stat() 77 if err != nil { 78 failpoint.Return(nil, errors.Trace(wrapIOError(err))) 79 } 80 totalSize = info.Size() 81 }) 82 83 failpoint.Inject("sorterDebug", func() { 84 if atomic.SwapInt32(&f.borrowed, 1) != 0 { 85 log.Panic("fileBackEnd: already borrowed", zap.String("fileName", f.fileName)) 86 } 87 }) 88 89 ret := &fileBackEndReader{ 90 backEnd: f, 91 f: fd, 92 reader: bufio.NewReaderSize(fd, fileBufferSize), 93 totalSize: totalSize, 94 } 95 96 err = ret.readHeader() 97 if err != nil { 98 return nil, errors.Trace(wrapIOError(err)) 99 } 100 101 return ret, nil 102 } 103 104 func (f *fileBackEnd) writer() (backEndWriter, error) { 105 fd, err := os.OpenFile(f.fileName, os.O_TRUNC|os.O_RDWR, 0o644) 106 if err != nil { 107 return nil, errors.Trace(wrapIOError(err)) 108 } 109 110 atomic.AddInt64(&openFDCount, 1) 111 112 failpoint.Inject("sorterDebug", func() { 113 if atomic.SwapInt32(&f.borrowed, 1) != 0 { 114 log.Panic("fileBackEnd: already borrowed", zap.String("fileName", f.fileName)) 115 } 116 }) 117 118 ret := &fileBackEndWriter{ 119 backEnd: f, 120 f: fd, 121 writer: bufio.NewWriterSize(fd, fileBufferSize), 122 } 123 124 err = ret.writeFileHeader() 125 if err != nil { 126 return nil, errors.Trace(wrapIOError(err)) 127 } 128 129 return ret, nil 130 } 131 132 func (f *fileBackEnd) free() error { 133 failpoint.Inject("sorterDebug", func() { 134 if atomic.LoadInt32(&f.borrowed) != 0 { 135 log.Panic("fileBackEnd: trying to free borrowed file", zap.String("fileName", f.fileName)) 136 } 137 }) 138 139 log.Debug("Removing file", zap.String("file", f.fileName)) 140 141 f.cleanStats() 142 143 err := os.Remove(f.fileName) 144 if err != nil { 145 failpoint.Inject("sorterDebug", func() { 146 failpoint.Return(errors.Trace(wrapIOError(err))) 147 }) 148 // ignore this error in production to provide some resilience 149 log.Warn("fileBackEnd: failed to remove file", zap.Error(wrapIOError(err))) 150 } 151 152 return nil 153 } 154 155 func (f *fileBackEnd) cleanStats() { 156 if pool != nil { 157 atomic.AddInt64(&pool.onDiskDataSize, -f.size) 158 } 159 f.size = 0 160 } 161 162 type fileBackEndReader struct { 163 backEnd *fileBackEnd 164 f *os.File 165 reader *bufio.Reader 166 isEOF bool 167 168 // to prevent truncation-like corruption 169 totalEvents uint64 170 readEvents uint64 171 172 // debug only fields 173 readBytes int64 174 totalSize int64 175 } 176 177 func (r *fileBackEndReader) readHeader() error { 178 failpoint.Inject("sorterDebug", func() { 179 pos, err := r.f.Seek(0, 1 /* relative to the current position */) 180 if err != nil { 181 failpoint.Return(errors.Trace(err)) 182 } 183 // verify that we are reading from the beginning of the file 184 if pos != 0 { 185 log.Panic("unexpected file descriptor cursor position", zap.Int64("pos", pos)) 186 } 187 }) 188 189 var m uint32 190 err := binary.Read(r.reader, binary.LittleEndian, &m) 191 if err != nil { 192 return errors.Trace(err) 193 } 194 if m != fileMagic { 195 log.Panic("fileSorterBackEnd: wrong fileMagic. Damaged file or bug?", zap.Uint32("actual", m)) 196 } 197 198 err = binary.Read(r.reader, binary.LittleEndian, &r.totalEvents) 199 if err != nil { 200 return errors.Trace(err) 201 } 202 203 return nil 204 } 205 206 func (r *fileBackEndReader) readNext() (*model.PolymorphicEvent, error) { 207 if r.isEOF { 208 // guaranteed EOF idempotency 209 return nil, nil 210 } 211 212 var m uint32 213 err := binary.Read(r.reader, binary.LittleEndian, &m) 214 if err != nil { 215 if err == io.EOF { 216 r.isEOF = true 217 // verifies that the file has not been truncated unexpectedly. 218 if r.totalEvents != r.readEvents { 219 log.Panic("unexpected EOF", 220 zap.String("file", r.backEnd.fileName), 221 zap.Uint64("expected-num-events", r.totalEvents), 222 zap.Uint64("actual-num-events", r.readEvents)) 223 } 224 return nil, nil 225 } 226 return nil, errors.Trace(wrapIOError(err)) 227 } 228 229 if m != blockMagic { 230 log.Panic("fileSorterBackEnd: wrong blockMagic. Damaged file or bug?", zap.Uint32("actual", m)) 231 } 232 233 var size uint32 234 err = binary.Read(r.reader, binary.LittleEndian, &size) 235 if err != nil { 236 return nil, errors.Trace(wrapIOError(err)) 237 } 238 239 // Note, do not hold the buffer in reader to avoid hogging memory. 240 rawBytesBuf := make([]byte, size) 241 242 // short reads are possible with bufio, hence the need for io.ReadFull 243 n, err := io.ReadFull(r.reader, rawBytesBuf) 244 if err != nil { 245 return nil, errors.Trace(wrapIOError(err)) 246 } 247 248 if n != int(size) { 249 return nil, errors.Errorf("fileSorterBackEnd: expected %d bytes, actually read %d bytes", size, n) 250 } 251 252 event := new(model.PolymorphicEvent) 253 _, err = r.backEnd.serde.unmarshal(event, rawBytesBuf) 254 if err != nil { 255 return nil, errors.Trace(err) 256 } 257 258 r.readEvents++ 259 260 failpoint.Inject("sorterDebug", func() { 261 r.readBytes += int64(4 + 4 + int(size)) 262 if r.readBytes > r.totalSize { 263 log.Panic("fileSorterBackEnd: read more bytes than expected, check concurrent use of file", 264 zap.String("fileName", r.backEnd.fileName)) 265 } 266 }) 267 268 return event, nil 269 } 270 271 func (r *fileBackEndReader) resetAndClose() error { 272 defer func() { 273 // fail-fast for double-close 274 r.f = nil 275 276 r.backEnd.cleanStats() 277 278 failpoint.Inject("sorterDebug", func() { 279 atomic.StoreInt32(&r.backEnd.borrowed, 0) 280 }) 281 }() 282 283 if r.f == nil { 284 failpoint.Inject("sorterDebug", func() { 285 log.Panic("Double closing of file", zap.String("filename", r.backEnd.fileName)) 286 }) 287 log.Warn("Double closing of file", zap.String("filename", r.backEnd.fileName)) 288 return nil 289 } 290 291 err := r.f.Truncate(0) 292 if err != nil { 293 failpoint.Inject("sorterDebug", func() { 294 info, err1 := r.f.Stat() 295 if err1 != nil { 296 failpoint.Return(errors.Trace(wrapIOError(err))) 297 } 298 299 log.Info("file debug info", zap.String("filename", info.Name()), 300 zap.Int64("size", info.Size())) 301 302 failpoint.Return(nil) 303 }) 304 log.Warn("fileBackEndReader: could not truncate file", zap.Error(err)) 305 } 306 307 err = r.f.Close() 308 if err != nil { 309 failpoint.Inject("sorterDebug", func() { 310 failpoint.Return(errors.Trace(err)) 311 }) 312 log.Warn("fileBackEndReader: could not close file", zap.Error(err)) 313 return nil 314 } 315 316 atomic.AddInt64(&openFDCount, -1) 317 318 return nil 319 } 320 321 type fileBackEndWriter struct { 322 backEnd *fileBackEnd 323 f *os.File 324 writer *bufio.Writer 325 326 bytesWritten int64 327 eventsWritten int64 328 } 329 330 func (w *fileBackEndWriter) writeFileHeader() error { 331 err := binary.Write(w.writer, binary.LittleEndian, uint32(fileMagic)) 332 if err != nil { 333 return errors.Trace(err) 334 } 335 336 // reserves the space for writing the total number of entries in this file 337 err = binary.Write(w.writer, binary.LittleEndian, uint64(0)) 338 if err != nil { 339 return errors.Trace(err) 340 } 341 342 return nil 343 } 344 345 func (w *fileBackEndWriter) writeNext(event *model.PolymorphicEvent) error { 346 var err error 347 // Note, do not hold the buffer in writer to avoid hogging memory. 348 var rawBytesBuf []byte 349 rawBytesBuf, err = w.backEnd.serde.marshal(event, rawBytesBuf) 350 if err != nil { 351 return errors.Trace(wrapIOError(err)) 352 } 353 354 size := len(rawBytesBuf) 355 if size == 0 { 356 log.Panic("fileSorterBackEnd: serialized to empty byte array. Bug?") 357 } 358 359 err = binary.Write(w.writer, binary.LittleEndian, uint32(blockMagic)) 360 if err != nil { 361 return errors.Trace(wrapIOError(err)) 362 } 363 364 err = binary.Write(w.writer, binary.LittleEndian, uint32(size)) 365 if err != nil { 366 return errors.Trace(wrapIOError(err)) 367 } 368 369 // short writes are possible with bufio 370 offset := 0 371 for offset < size { 372 n, err := w.writer.Write(rawBytesBuf[offset:]) 373 if err != nil { 374 return errors.Trace(wrapIOError(err)) 375 } 376 offset += n 377 } 378 if offset != size { 379 return errors.Errorf("fileSorterBackEnd: expected to write %d bytes, actually wrote %d bytes", size, offset) 380 } 381 382 w.eventsWritten++ 383 w.bytesWritten += int64(size) 384 return nil 385 } 386 387 func (w *fileBackEndWriter) writtenCount() int { 388 return int(w.bytesWritten) 389 } 390 391 func (w *fileBackEndWriter) dataSize() uint64 { 392 return uint64(w.eventsWritten) 393 } 394 395 func (w *fileBackEndWriter) flushAndClose() error { 396 defer func() { 397 // fail-fast for double-close 398 w.f = nil 399 }() 400 401 err := w.writer.Flush() 402 if err != nil { 403 return errors.Trace(wrapIOError(err)) 404 } 405 406 _, err = w.f.Seek(numFileEntriesOffset, 0 /* relative to the beginning of the file */) 407 if err != nil { 408 return errors.Trace(wrapIOError(err)) 409 } 410 411 // write the total number of entries in the file to the header 412 err = binary.Write(w.f, binary.LittleEndian, uint64(w.eventsWritten)) 413 if err != nil { 414 return errors.Trace(wrapIOError(err)) 415 } 416 417 err = w.f.Close() 418 if err != nil { 419 failpoint.Inject("sorterDebug", func() { 420 failpoint.Return(errors.Trace(wrapIOError(err))) 421 }) 422 log.Warn("fileBackEndReader: could not close file", zap.Error(err)) 423 return nil 424 } 425 426 atomic.AddInt64(&openFDCount, -1) 427 w.backEnd.size = w.bytesWritten 428 atomic.AddInt64(&pool.onDiskDataSize, w.bytesWritten) 429 430 failpoint.Inject("sorterDebug", func() { 431 atomic.StoreInt32(&w.backEnd.borrowed, 0) 432 }) 433 434 return nil 435 } 436 437 // wrapIOError should be called when the error is to be returned to an caller outside this file and 438 // if the error could be caused by a filesystem-related error. 439 func wrapIOError(err error) error { 440 cause := errors.Cause(err) 441 switch cause.(type) { 442 case *os.PathError: 443 // We don't generate stack in this helper function to avoid confusion. 444 return cerrors.ErrUnifiedSorterIOError.FastGenByArgs(err.Error()) 445 default: 446 return err 447 } 448 }