github.com/pingcap/ticdc@v0.0.0-20220526033649-485a10ef2652/cdc/puller/sorter/file_backend.go (about)

     1  // Copyright 2021 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package sorter
    15  
    16  import (
    17  	"bufio"
    18  	"encoding/binary"
    19  	"io"
    20  	"os"
    21  	"sync/atomic"
    22  
    23  	"github.com/pingcap/errors"
    24  	"github.com/pingcap/failpoint"
    25  	"github.com/pingcap/log"
    26  	"github.com/pingcap/ticdc/cdc/model"
    27  	cerrors "github.com/pingcap/ticdc/pkg/errors"
    28  	"go.uber.org/zap"
    29  )
    30  
    31  const (
    32  	fileBufferSize       = 4 * 1024 // 4KB
    33  	fileMagic            = 0x12345678
    34  	numFileEntriesOffset = 4
    35  	blockMagic           = 0xbeefbeef
    36  )
    37  
    38  var openFDCount int64
    39  
    40  type fileBackEnd struct {
    41  	fileName string
    42  	serde    serializerDeserializer
    43  	borrowed int32
    44  	size     int64
    45  }
    46  
    47  func newFileBackEnd(fileName string, serde serializerDeserializer) (*fileBackEnd, error) {
    48  	f, err := os.Create(fileName)
    49  	if err != nil {
    50  		return nil, errors.Trace(wrapIOError(err))
    51  	}
    52  
    53  	err = f.Close()
    54  	if err != nil {
    55  		return nil, errors.Trace(wrapIOError(err))
    56  	}
    57  
    58  	log.Debug("new FileSorterBackEnd created", zap.String("filename", fileName))
    59  	return &fileBackEnd{
    60  		fileName: fileName,
    61  		serde:    serde,
    62  		borrowed: 0,
    63  	}, nil
    64  }
    65  
    66  func (f *fileBackEnd) reader() (backEndReader, error) {
    67  	fd, err := os.OpenFile(f.fileName, os.O_RDWR, 0o644)
    68  	if err != nil {
    69  		return nil, errors.Trace(wrapIOError(err))
    70  	}
    71  
    72  	atomic.AddInt64(&openFDCount, 1)
    73  
    74  	var totalSize int64
    75  	failpoint.Inject("sorterDebug", func() {
    76  		info, err := fd.Stat()
    77  		if err != nil {
    78  			failpoint.Return(nil, errors.Trace(wrapIOError(err)))
    79  		}
    80  		totalSize = info.Size()
    81  	})
    82  
    83  	failpoint.Inject("sorterDebug", func() {
    84  		if atomic.SwapInt32(&f.borrowed, 1) != 0 {
    85  			log.Panic("fileBackEnd: already borrowed", zap.String("fileName", f.fileName))
    86  		}
    87  	})
    88  
    89  	ret := &fileBackEndReader{
    90  		backEnd:   f,
    91  		f:         fd,
    92  		reader:    bufio.NewReaderSize(fd, fileBufferSize),
    93  		totalSize: totalSize,
    94  	}
    95  
    96  	err = ret.readHeader()
    97  	if err != nil {
    98  		return nil, errors.Trace(wrapIOError(err))
    99  	}
   100  
   101  	return ret, nil
   102  }
   103  
   104  func (f *fileBackEnd) writer() (backEndWriter, error) {
   105  	fd, err := os.OpenFile(f.fileName, os.O_TRUNC|os.O_RDWR, 0o644)
   106  	if err != nil {
   107  		return nil, errors.Trace(wrapIOError(err))
   108  	}
   109  
   110  	atomic.AddInt64(&openFDCount, 1)
   111  
   112  	failpoint.Inject("sorterDebug", func() {
   113  		if atomic.SwapInt32(&f.borrowed, 1) != 0 {
   114  			log.Panic("fileBackEnd: already borrowed", zap.String("fileName", f.fileName))
   115  		}
   116  	})
   117  
   118  	ret := &fileBackEndWriter{
   119  		backEnd: f,
   120  		f:       fd,
   121  		writer:  bufio.NewWriterSize(fd, fileBufferSize),
   122  	}
   123  
   124  	err = ret.writeFileHeader()
   125  	if err != nil {
   126  		return nil, errors.Trace(wrapIOError(err))
   127  	}
   128  
   129  	return ret, nil
   130  }
   131  
   132  func (f *fileBackEnd) free() error {
   133  	failpoint.Inject("sorterDebug", func() {
   134  		if atomic.LoadInt32(&f.borrowed) != 0 {
   135  			log.Panic("fileBackEnd: trying to free borrowed file", zap.String("fileName", f.fileName))
   136  		}
   137  	})
   138  
   139  	log.Debug("Removing file", zap.String("file", f.fileName))
   140  
   141  	f.cleanStats()
   142  
   143  	err := os.Remove(f.fileName)
   144  	if err != nil {
   145  		failpoint.Inject("sorterDebug", func() {
   146  			failpoint.Return(errors.Trace(wrapIOError(err)))
   147  		})
   148  		// ignore this error in production to provide some resilience
   149  		log.Warn("fileBackEnd: failed to remove file", zap.Error(wrapIOError(err)))
   150  	}
   151  
   152  	return nil
   153  }
   154  
   155  func (f *fileBackEnd) cleanStats() {
   156  	if pool != nil {
   157  		atomic.AddInt64(&pool.onDiskDataSize, -f.size)
   158  	}
   159  	f.size = 0
   160  }
   161  
   162  type fileBackEndReader struct {
   163  	backEnd *fileBackEnd
   164  	f       *os.File
   165  	reader  *bufio.Reader
   166  	isEOF   bool
   167  
   168  	// to prevent truncation-like corruption
   169  	totalEvents uint64
   170  	readEvents  uint64
   171  
   172  	// debug only fields
   173  	readBytes int64
   174  	totalSize int64
   175  }
   176  
   177  func (r *fileBackEndReader) readHeader() error {
   178  	failpoint.Inject("sorterDebug", func() {
   179  		pos, err := r.f.Seek(0, 1 /* relative to the current position */)
   180  		if err != nil {
   181  			failpoint.Return(errors.Trace(err))
   182  		}
   183  		// verify that we are reading from the beginning of the file
   184  		if pos != 0 {
   185  			log.Panic("unexpected file descriptor cursor position", zap.Int64("pos", pos))
   186  		}
   187  	})
   188  
   189  	var m uint32
   190  	err := binary.Read(r.reader, binary.LittleEndian, &m)
   191  	if err != nil {
   192  		return errors.Trace(err)
   193  	}
   194  	if m != fileMagic {
   195  		log.Panic("fileSorterBackEnd: wrong fileMagic. Damaged file or bug?", zap.Uint32("actual", m))
   196  	}
   197  
   198  	err = binary.Read(r.reader, binary.LittleEndian, &r.totalEvents)
   199  	if err != nil {
   200  		return errors.Trace(err)
   201  	}
   202  
   203  	return nil
   204  }
   205  
   206  func (r *fileBackEndReader) readNext() (*model.PolymorphicEvent, error) {
   207  	if r.isEOF {
   208  		// guaranteed EOF idempotency
   209  		return nil, nil
   210  	}
   211  
   212  	var m uint32
   213  	err := binary.Read(r.reader, binary.LittleEndian, &m)
   214  	if err != nil {
   215  		if err == io.EOF {
   216  			r.isEOF = true
   217  			// verifies that the file has not been truncated unexpectedly.
   218  			if r.totalEvents != r.readEvents {
   219  				log.Panic("unexpected EOF",
   220  					zap.String("file", r.backEnd.fileName),
   221  					zap.Uint64("expected-num-events", r.totalEvents),
   222  					zap.Uint64("actual-num-events", r.readEvents))
   223  			}
   224  			return nil, nil
   225  		}
   226  		return nil, errors.Trace(wrapIOError(err))
   227  	}
   228  
   229  	if m != blockMagic {
   230  		log.Panic("fileSorterBackEnd: wrong blockMagic. Damaged file or bug?", zap.Uint32("actual", m))
   231  	}
   232  
   233  	var size uint32
   234  	err = binary.Read(r.reader, binary.LittleEndian, &size)
   235  	if err != nil {
   236  		return nil, errors.Trace(wrapIOError(err))
   237  	}
   238  
   239  	// Note, do not hold the buffer in reader to avoid hogging memory.
   240  	rawBytesBuf := make([]byte, size)
   241  
   242  	// short reads are possible with bufio, hence the need for io.ReadFull
   243  	n, err := io.ReadFull(r.reader, rawBytesBuf)
   244  	if err != nil {
   245  		return nil, errors.Trace(wrapIOError(err))
   246  	}
   247  
   248  	if n != int(size) {
   249  		return nil, errors.Errorf("fileSorterBackEnd: expected %d bytes, actually read %d bytes", size, n)
   250  	}
   251  
   252  	event := new(model.PolymorphicEvent)
   253  	_, err = r.backEnd.serde.unmarshal(event, rawBytesBuf)
   254  	if err != nil {
   255  		return nil, errors.Trace(err)
   256  	}
   257  
   258  	r.readEvents++
   259  
   260  	failpoint.Inject("sorterDebug", func() {
   261  		r.readBytes += int64(4 + 4 + int(size))
   262  		if r.readBytes > r.totalSize {
   263  			log.Panic("fileSorterBackEnd: read more bytes than expected, check concurrent use of file",
   264  				zap.String("fileName", r.backEnd.fileName))
   265  		}
   266  	})
   267  
   268  	return event, nil
   269  }
   270  
   271  func (r *fileBackEndReader) resetAndClose() error {
   272  	defer func() {
   273  		// fail-fast for double-close
   274  		r.f = nil
   275  
   276  		r.backEnd.cleanStats()
   277  
   278  		failpoint.Inject("sorterDebug", func() {
   279  			atomic.StoreInt32(&r.backEnd.borrowed, 0)
   280  		})
   281  	}()
   282  
   283  	if r.f == nil {
   284  		failpoint.Inject("sorterDebug", func() {
   285  			log.Panic("Double closing of file", zap.String("filename", r.backEnd.fileName))
   286  		})
   287  		log.Warn("Double closing of file", zap.String("filename", r.backEnd.fileName))
   288  		return nil
   289  	}
   290  
   291  	err := r.f.Truncate(0)
   292  	if err != nil {
   293  		failpoint.Inject("sorterDebug", func() {
   294  			info, err1 := r.f.Stat()
   295  			if err1 != nil {
   296  				failpoint.Return(errors.Trace(wrapIOError(err)))
   297  			}
   298  
   299  			log.Info("file debug info", zap.String("filename", info.Name()),
   300  				zap.Int64("size", info.Size()))
   301  
   302  			failpoint.Return(nil)
   303  		})
   304  		log.Warn("fileBackEndReader: could not truncate file", zap.Error(err))
   305  	}
   306  
   307  	err = r.f.Close()
   308  	if err != nil {
   309  		failpoint.Inject("sorterDebug", func() {
   310  			failpoint.Return(errors.Trace(err))
   311  		})
   312  		log.Warn("fileBackEndReader: could not close file", zap.Error(err))
   313  		return nil
   314  	}
   315  
   316  	atomic.AddInt64(&openFDCount, -1)
   317  
   318  	return nil
   319  }
   320  
   321  type fileBackEndWriter struct {
   322  	backEnd *fileBackEnd
   323  	f       *os.File
   324  	writer  *bufio.Writer
   325  
   326  	bytesWritten  int64
   327  	eventsWritten int64
   328  }
   329  
   330  func (w *fileBackEndWriter) writeFileHeader() error {
   331  	err := binary.Write(w.writer, binary.LittleEndian, uint32(fileMagic))
   332  	if err != nil {
   333  		return errors.Trace(err)
   334  	}
   335  
   336  	// reserves the space for writing the total number of entries in this file
   337  	err = binary.Write(w.writer, binary.LittleEndian, uint64(0))
   338  	if err != nil {
   339  		return errors.Trace(err)
   340  	}
   341  
   342  	return nil
   343  }
   344  
   345  func (w *fileBackEndWriter) writeNext(event *model.PolymorphicEvent) error {
   346  	var err error
   347  	// Note, do not hold the buffer in writer to avoid hogging memory.
   348  	var rawBytesBuf []byte
   349  	rawBytesBuf, err = w.backEnd.serde.marshal(event, rawBytesBuf)
   350  	if err != nil {
   351  		return errors.Trace(wrapIOError(err))
   352  	}
   353  
   354  	size := len(rawBytesBuf)
   355  	if size == 0 {
   356  		log.Panic("fileSorterBackEnd: serialized to empty byte array. Bug?")
   357  	}
   358  
   359  	err = binary.Write(w.writer, binary.LittleEndian, uint32(blockMagic))
   360  	if err != nil {
   361  		return errors.Trace(wrapIOError(err))
   362  	}
   363  
   364  	err = binary.Write(w.writer, binary.LittleEndian, uint32(size))
   365  	if err != nil {
   366  		return errors.Trace(wrapIOError(err))
   367  	}
   368  
   369  	// short writes are possible with bufio
   370  	offset := 0
   371  	for offset < size {
   372  		n, err := w.writer.Write(rawBytesBuf[offset:])
   373  		if err != nil {
   374  			return errors.Trace(wrapIOError(err))
   375  		}
   376  		offset += n
   377  	}
   378  	if offset != size {
   379  		return errors.Errorf("fileSorterBackEnd: expected to write %d bytes, actually wrote %d bytes", size, offset)
   380  	}
   381  
   382  	w.eventsWritten++
   383  	w.bytesWritten += int64(size)
   384  	return nil
   385  }
   386  
   387  func (w *fileBackEndWriter) writtenCount() int {
   388  	return int(w.bytesWritten)
   389  }
   390  
   391  func (w *fileBackEndWriter) dataSize() uint64 {
   392  	return uint64(w.eventsWritten)
   393  }
   394  
   395  func (w *fileBackEndWriter) flushAndClose() error {
   396  	defer func() {
   397  		// fail-fast for double-close
   398  		w.f = nil
   399  	}()
   400  
   401  	err := w.writer.Flush()
   402  	if err != nil {
   403  		return errors.Trace(wrapIOError(err))
   404  	}
   405  
   406  	_, err = w.f.Seek(numFileEntriesOffset, 0 /* relative to the beginning of the file */)
   407  	if err != nil {
   408  		return errors.Trace(wrapIOError(err))
   409  	}
   410  
   411  	// write the total number of entries in the file to the header
   412  	err = binary.Write(w.f, binary.LittleEndian, uint64(w.eventsWritten))
   413  	if err != nil {
   414  		return errors.Trace(wrapIOError(err))
   415  	}
   416  
   417  	err = w.f.Close()
   418  	if err != nil {
   419  		failpoint.Inject("sorterDebug", func() {
   420  			failpoint.Return(errors.Trace(wrapIOError(err)))
   421  		})
   422  		log.Warn("fileBackEndReader: could not close file", zap.Error(err))
   423  		return nil
   424  	}
   425  
   426  	atomic.AddInt64(&openFDCount, -1)
   427  	w.backEnd.size = w.bytesWritten
   428  	atomic.AddInt64(&pool.onDiskDataSize, w.bytesWritten)
   429  
   430  	failpoint.Inject("sorterDebug", func() {
   431  		atomic.StoreInt32(&w.backEnd.borrowed, 0)
   432  	})
   433  
   434  	return nil
   435  }
   436  
   437  // wrapIOError should be called when the error is to be returned to an caller outside this file and
   438  // if the error could be caused by a filesystem-related error.
   439  func wrapIOError(err error) error {
   440  	cause := errors.Cause(err)
   441  	switch cause.(type) {
   442  	case *os.PathError:
   443  		// We don't generate stack in this helper function to avoid confusion.
   444  		return cerrors.ErrUnifiedSorterIOError.FastGenByArgs(err.Error())
   445  	default:
   446  		return err
   447  	}
   448  }