github.com/zuoyebang/bitalosdb@v1.1.1-0.20240516111551-79a8c4d8ce20/internal/record/log_writer.go (about)

     1  // Copyright 2021 The Bitalosdb author(hustxrb@163.com) and other contributors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package record
    16  
    17  import (
    18  	"context"
    19  	"encoding/binary"
    20  	"io"
    21  	"runtime/pprof"
    22  	"sync"
    23  	"sync/atomic"
    24  	"time"
    25  
    26  	"github.com/zuoyebang/bitalosdb/internal/base"
    27  	"github.com/zuoyebang/bitalosdb/internal/crc"
    28  
    29  	"github.com/cockroachdb/errors"
    30  )
    31  
    32  var walSyncLabels = pprof.Labels("bitalosdb", "wal-sync")
    33  
    34  type block struct {
    35  	// buf[:written] has already been filled with fragments. Updated atomically.
    36  	written int32
    37  	// buf[:flushed] has already been flushed to w.
    38  	flushed int32
    39  	buf     [blockSize]byte
    40  }
    41  
    42  type flusher interface {
    43  	Flush() error
    44  }
    45  
    46  type syncer interface {
    47  	Sync() error
    48  }
    49  
    50  const (
    51  	syncConcurrencyBits = 9
    52  
    53  	// SyncConcurrency is the maximum number of concurrent sync operations that
    54  	// can be performed. Note that a sync operation is initiated either by a call
    55  	// to SyncRecord or by a call to Close. Exported as this value also limits
    56  	// the commit concurrency in commitPipeline.
    57  	SyncConcurrency = 1 << syncConcurrencyBits
    58  )
    59  
    60  type syncSlot struct {
    61  	wg  *sync.WaitGroup
    62  	err *error
    63  }
    64  
    65  // syncQueue is a lock-free fixed-size single-producer, single-consumer
    66  // queue. The single-producer can push to the head, and the single-consumer can
    67  // pop multiple values from the tail. Popping calls Done() on each of the
    68  // available *sync.WaitGroup elements.
    69  type syncQueue struct {
    70  	// headTail packs together a 32-bit head index and a 32-bit tail index. Both
    71  	// are indexes into slots modulo len(slots)-1.
    72  	//
    73  	// tail = index of oldest data in queue
    74  	// head = index of next slot to fill
    75  	//
    76  	// Slots in the range [tail, head) are owned by consumers.  A consumer
    77  	// continues to own a slot outside this range until it nils the slot, at
    78  	// which point ownership passes to the producer.
    79  	//
    80  	// The head index is stored in the most-significant bits so that we can
    81  	// atomically add to it and the overflow is harmless.
    82  	headTail uint64
    83  
    84  	// slots is a ring buffer of values stored in this queue. The size must be a
    85  	// power of 2. A slot is in use until the tail index has moved beyond it.
    86  	slots [SyncConcurrency]syncSlot
    87  
    88  	// blocked is an atomic boolean which indicates whether syncing is currently
    89  	// blocked or can proceed. It is used by the implementation of
    90  	// min-sync-interval to block syncing until the min interval has passed.
    91  	blocked uint32
    92  }
    93  
    94  const dequeueBits = 32
    95  
    96  func (q *syncQueue) unpack(ptrs uint64) (head, tail uint32) {
    97  	const mask = 1<<dequeueBits - 1
    98  	head = uint32((ptrs >> dequeueBits) & mask)
    99  	tail = uint32(ptrs & mask)
   100  	return
   101  }
   102  
   103  func (q *syncQueue) push(wg *sync.WaitGroup, err *error) {
   104  	ptrs := atomic.LoadUint64(&q.headTail)
   105  	head, tail := q.unpack(ptrs)
   106  	if (tail+uint32(len(q.slots)))&(1<<dequeueBits-1) == head {
   107  		panic("bitalosdb: queue is full")
   108  	}
   109  
   110  	slot := &q.slots[head&uint32(len(q.slots)-1)]
   111  	slot.wg = wg
   112  	slot.err = err
   113  
   114  	// Increment head. This passes ownership of slot to dequeue and acts as a
   115  	// store barrier for writing the slot.
   116  	atomic.AddUint64(&q.headTail, 1<<dequeueBits)
   117  }
   118  
   119  func (q *syncQueue) setBlocked() {
   120  	atomic.StoreUint32(&q.blocked, 1)
   121  }
   122  
   123  func (q *syncQueue) clearBlocked() {
   124  	atomic.StoreUint32(&q.blocked, 0)
   125  }
   126  
   127  func (q *syncQueue) empty() bool {
   128  	head, tail := q.load()
   129  	return head == tail
   130  }
   131  
   132  func (q *syncQueue) load() (head, tail uint32) {
   133  	if atomic.LoadUint32(&q.blocked) == 1 {
   134  		return 0, 0
   135  	}
   136  
   137  	ptrs := atomic.LoadUint64(&q.headTail)
   138  	head, tail = q.unpack(ptrs)
   139  	return head, tail
   140  }
   141  
   142  func (q *syncQueue) pop(head, tail uint32, err error) error {
   143  	if tail == head {
   144  		// Queue is empty.
   145  		return nil
   146  	}
   147  
   148  	for ; tail != head; tail++ {
   149  		slot := &q.slots[tail&uint32(len(q.slots)-1)]
   150  		wg := slot.wg
   151  		if wg == nil {
   152  			return errors.Errorf("nil waiter at %d", errors.Safe(tail&uint32(len(q.slots)-1)))
   153  		}
   154  		*slot.err = err
   155  		slot.wg = nil
   156  		slot.err = nil
   157  		// We need to bump the tail count before signalling the wait group as
   158  		// signalling the wait group can trigger release a blocked goroutine which
   159  		// will try to enqueue before we've "freed" space in the queue.
   160  		atomic.AddUint64(&q.headTail, 1)
   161  		wg.Done()
   162  	}
   163  
   164  	return nil
   165  }
   166  
   167  // flusherCond is a specialized condition variable that allows its condition to
   168  // change and readiness be signalled without holding its associated mutex. In
   169  // particular, when a waiter is added to syncQueue atomically, this condition
   170  // variable can be signalled without holding flusher.Mutex.
   171  type flusherCond struct {
   172  	mu   *sync.Mutex
   173  	q    *syncQueue
   174  	cond sync.Cond
   175  }
   176  
   177  func (c *flusherCond) init(mu *sync.Mutex, q *syncQueue) {
   178  	c.mu = mu
   179  	c.q = q
   180  	// Yes, this is a bit circular, but that is intentional. flusherCond.cond.L
   181  	// points flusherCond so that when cond.L.Unlock is called flusherCond.Unlock
   182  	// will be called and we can check the !syncQueue.empty() condition.
   183  	c.cond.L = c
   184  }
   185  
   186  func (c *flusherCond) Signal() {
   187  	// Pass-through to the cond var.
   188  	c.cond.Signal()
   189  }
   190  
   191  func (c *flusherCond) Wait() {
   192  	// Pass-through to the cond var. Note that internally the cond var implements
   193  	// Wait as:
   194  	//
   195  	//   t := notifyListAdd()
   196  	//   L.Unlock()
   197  	//   notifyListWait(t)
   198  	//   L.Lock()
   199  	//
   200  	// We've configured the cond var to call flusherReady.Unlock() which allows
   201  	// us to check the !syncQueue.empty() condition without a danger of missing a
   202  	// notification. Any call to flusherReady.Signal() after notifyListAdd() is
   203  	// called will cause the subsequent notifyListWait() to return immediately.
   204  	c.cond.Wait()
   205  }
   206  
   207  func (c *flusherCond) Lock() {
   208  	c.mu.Lock()
   209  }
   210  
   211  func (c *flusherCond) Unlock() {
   212  	c.mu.Unlock()
   213  	if !c.q.empty() {
   214  		// If the current goroutine is about to block on sync.Cond.Wait, this call
   215  		// to Signal will prevent that. The comment in Wait above explains a bit
   216  		// about what is going on here, but it is worth reiterating:
   217  		//
   218  		//   flusherCond.Wait()
   219  		//     sync.Cond.Wait()
   220  		//       t := notifyListAdd()
   221  		//       flusherCond.Unlock()    <-- we are here
   222  		//       notifyListWait(t)
   223  		//       flusherCond.Lock()
   224  		//
   225  		// The call to Signal here results in:
   226  		//
   227  		//     sync.Cond.Signal()
   228  		//       notifyListNotifyOne()
   229  		//
   230  		// The call to notifyListNotifyOne() will prevent the call to
   231  		// notifyListWait(t) from blocking.
   232  		c.cond.Signal()
   233  	}
   234  }
   235  
   236  type durationFunc func() time.Duration
   237  
   238  // syncTimer is an interface for timers, modeled on the closure callback mode
   239  // of time.Timer. See time.AfterFunc and LogWriter.afterFunc. syncTimer is used
   240  // by tests to mock out the timer functionality used to implement
   241  // min-sync-interval.
   242  type syncTimer interface {
   243  	Reset(time.Duration) bool
   244  	Stop() bool
   245  }
   246  
   247  // LogWriter writes records to an underlying io.Writer. In order to support WAL
   248  // file reuse, a LogWriter's records are tagged with the WAL's file
   249  // number. When reading a log file a record from a previous incarnation of the
   250  // file will return the error ErrInvalidLogNum.
   251  type LogWriter struct {
   252  	// w is the underlying writer.
   253  	w io.Writer
   254  	// c is w as a closer.
   255  	c io.Closer
   256  	// s is w as a syncer.
   257  	s syncer
   258  	// logNum is the low 32-bits of the log's file number.
   259  	logNum uint32
   260  	// blockNum is the zero based block number for the current block.
   261  	blockNum int64
   262  	// err is any accumulated error.
   263  	err error
   264  	// block is the current block being written. Protected by flusher.Mutex.
   265  	block *block
   266  	free  struct {
   267  		sync.Mutex
   268  		// Condition variable used to signal a block is freed.
   269  		cond      sync.Cond
   270  		blocks    []*block
   271  		allocated int
   272  	}
   273  
   274  	flusher struct {
   275  		sync.Mutex
   276  		// Flusher ready is a condition variable that is signalled when there are
   277  		// blocks to flush, syncing has been requested, or the LogWriter has been
   278  		// closed. For signalling of a sync, it is safe to call without holding
   279  		// flusher.Mutex.
   280  		ready flusherCond
   281  		// Set to true when the flush loop should be closed.
   282  		close bool
   283  		// Closed when the flush loop has terminated.
   284  		closed chan struct{}
   285  		// Accumulated flush error.
   286  		err error
   287  		// minSyncInterval is the minimum duration between syncs.
   288  		minSyncInterval durationFunc
   289  		pending         []*block
   290  		syncQ           syncQueue
   291  	}
   292  
   293  	// afterFunc is a hook to allow tests to mock out the timer functionality
   294  	// used for min-sync-interval. In normal operation this points to
   295  	// time.AfterFunc.
   296  	afterFunc func(d time.Duration, f func()) syncTimer
   297  }
   298  
   299  // NewLogWriter returns a new LogWriter.
   300  func NewLogWriter(w io.Writer, logNum base.FileNum) *LogWriter {
   301  	c, _ := w.(io.Closer)
   302  	s, _ := w.(syncer)
   303  	r := &LogWriter{
   304  		w: w,
   305  		c: c,
   306  		s: s,
   307  		// NB: we truncate the 64-bit log number to 32-bits. This is ok because a)
   308  		// we are very unlikely to reach a file number of 4 billion and b) the log
   309  		// number is used as a validation check and using only the low 32-bits is
   310  		// sufficient for that purpose.
   311  		logNum: uint32(logNum),
   312  		afterFunc: func(d time.Duration, f func()) syncTimer {
   313  			return time.AfterFunc(d, f)
   314  		},
   315  	}
   316  	r.free.cond.L = &r.free.Mutex
   317  	r.free.blocks = make([]*block, 0, 16)
   318  	r.free.allocated = 1
   319  	r.block = &block{}
   320  	r.flusher.ready.init(&r.flusher.Mutex, &r.flusher.syncQ)
   321  	r.flusher.closed = make(chan struct{})
   322  	r.flusher.pending = make([]*block, 0, cap(r.free.blocks))
   323  	go func() {
   324  		pprof.Do(context.Background(), walSyncLabels, r.flushLoop)
   325  	}()
   326  	return r
   327  }
   328  
   329  // SetMinSyncInterval sets the closure to invoke for retrieving the minimum
   330  // sync duration between syncs.
   331  func (w *LogWriter) SetMinSyncInterval(minSyncInterval durationFunc) {
   332  	f := &w.flusher
   333  	f.Lock()
   334  	f.minSyncInterval = minSyncInterval
   335  	f.Unlock()
   336  }
   337  
   338  func (w *LogWriter) flushLoop(context.Context) {
   339  	f := &w.flusher
   340  	f.Lock()
   341  
   342  	var syncTimer syncTimer
   343  	defer func() {
   344  		if syncTimer != nil {
   345  			syncTimer.Stop()
   346  		}
   347  		close(f.closed)
   348  		f.Unlock()
   349  	}()
   350  
   351  	// The flush loop performs flushing of full and partial data blocks to the
   352  	// underlying writer (LogWriter.w), syncing of the writer, and notification
   353  	// to sync requests that they have completed.
   354  	//
   355  	// - flusher.ready is a condition variable that is signalled when there is
   356  	//   work to do. Full blocks are contained in flusher.pending. The current
   357  	//   partial block is in LogWriter.block. And sync operations are held in
   358  	//   flusher.syncQ.
   359  	//
   360  	// - The decision to sync is determined by whether there are any sync
   361  	//   requests present in flusher.syncQ and whether enough time has elapsed
   362  	//   since the last sync. If not enough time has elapsed since the last sync,
   363  	//   flusher.syncQ.blocked will be set to 1. If syncing is blocked,
   364  	//   syncQueue.empty() will return true and syncQueue.load() will return 0,0
   365  	//   (i.e. an empty list).
   366  	//
   367  	// - flusher.syncQ.blocked is cleared by a timer that is initialized when
   368  	//   blocked is set to 1. When blocked is 1, no syncing will take place, but
   369  	//   flushing will continue to be performed. The on/off toggle for syncing
   370  	//   does not need to be carefully synchronized with the rest of processing
   371  	//   -- all we need to ensure is that after any transition to blocked=1 there
   372  	//   is eventually a transition to blocked=0. syncTimer performs this
   373  	//   transition. Note that any change to min-sync-interval will not take
   374  	//   effect until the previous timer elapses.
   375  	//
   376  	// - Picking up the syncing work to perform requires coordination with
   377  	//   picking up the flushing work. Specifically, flushing work is queued
   378  	//   before syncing work. The guarantee of this code is that when a sync is
   379  	//   requested, any previously queued flush work will be synced. This
   380  	//   motivates reading the syncing work (f.syncQ.load()) before picking up
   381  	//   the flush work (atomic.LoadInt32(&w.block.written)).
   382  
   383  	// The list of full blocks that need to be written. This is copied from
   384  	// f.pending on every loop iteration, though the number of elements is small
   385  	// (usually 1, max 16).
   386  	pending := make([]*block, 0, cap(f.pending))
   387  
   388  	for {
   389  		for {
   390  			// Grab the portion of the current block that requires flushing. Note that
   391  			// the current block can be added to the pending blocks list after we release
   392  			// the flusher lock, but it won't be part of pending.
   393  			written := atomic.LoadInt32(&w.block.written)
   394  			if len(f.pending) > 0 || written > w.block.flushed || !f.syncQ.empty() {
   395  				break
   396  			}
   397  			if f.close {
   398  				// If the writer is closed, pretend the sync timer fired immediately so
   399  				// that we can process any queued sync requests.
   400  				f.syncQ.clearBlocked()
   401  				if !f.syncQ.empty() {
   402  					break
   403  				}
   404  				return
   405  			}
   406  			f.ready.Wait()
   407  			continue
   408  		}
   409  
   410  		pending = pending[:len(f.pending)]
   411  		copy(pending, f.pending)
   412  		f.pending = f.pending[:0]
   413  
   414  		// Grab the list of sync waiters. Note that syncQueue.load() will return
   415  		// 0,0 while we're waiting for the min-sync-interval to expire. This
   416  		// allows flushing to proceed even if we're not ready to sync.
   417  		head, tail := f.syncQ.load()
   418  
   419  		// Grab the portion of the current block that requires flushing. Note that
   420  		// the current block can be added to the pending blocks list after we
   421  		// release the flusher lock, but it won't be part of pending. This has to
   422  		// be ordered after we get the list of sync waiters from syncQ in order to
   423  		// prevent a race where a waiter adds itself to syncQ, but this thread
   424  		// picks up the entry in syncQ and not the buffered data.
   425  		written := atomic.LoadInt32(&w.block.written)
   426  		data := w.block.buf[w.block.flushed:written]
   427  		w.block.flushed = written
   428  
   429  		// If flusher has an error, we propagate it to waiters. Note in spite of
   430  		// error we consume the pending list above to free blocks for writers.
   431  		if f.err != nil {
   432  			f.syncQ.pop(head, tail, f.err)
   433  			continue
   434  		}
   435  		f.Unlock()
   436  		synced, err := w.flushPending(data, pending, head, tail)
   437  		f.Lock()
   438  		f.err = err
   439  		if f.err != nil {
   440  			f.syncQ.clearBlocked()
   441  			continue
   442  		}
   443  
   444  		if synced && f.minSyncInterval != nil {
   445  			// A sync was performed. Make sure we've waited for the min sync
   446  			// interval before syncing again.
   447  			if min := f.minSyncInterval(); min > 0 {
   448  				f.syncQ.setBlocked()
   449  				if syncTimer == nil {
   450  					syncTimer = w.afterFunc(min, func() {
   451  						f.syncQ.clearBlocked()
   452  						f.ready.Signal()
   453  					})
   454  				} else {
   455  					syncTimer.Reset(min)
   456  				}
   457  			}
   458  		}
   459  	}
   460  }
   461  
   462  func (w *LogWriter) flushPending(
   463  	data []byte, pending []*block, head, tail uint32,
   464  ) (synced bool, err error) {
   465  	defer func() {
   466  		// Translate panics into errors. The errors will cause flushLoop to shut
   467  		// down, but allows us to do so in a controlled way and avoid swallowing
   468  		// the stack that created the panic if panic'ing itself hits a panic
   469  		// (e.g. unlock of unlocked mutex).
   470  		if r := recover(); r != nil {
   471  			err = errors.Newf("%v", r)
   472  		}
   473  	}()
   474  
   475  	for _, b := range pending {
   476  		if err = w.flushBlock(b); err != nil {
   477  			break
   478  		}
   479  	}
   480  	if err == nil && len(data) > 0 {
   481  		_, err = w.w.Write(data)
   482  	}
   483  
   484  	synced = head != tail
   485  	if synced {
   486  		if err == nil && w.s != nil {
   487  			err = w.s.Sync()
   488  		}
   489  		f := &w.flusher
   490  		if popErr := f.syncQ.pop(head, tail, err); popErr != nil {
   491  			return synced, popErr
   492  		}
   493  	}
   494  
   495  	return synced, err
   496  }
   497  
   498  func (w *LogWriter) flushBlock(b *block) error {
   499  	if _, err := w.w.Write(b.buf[b.flushed:]); err != nil {
   500  		return err
   501  	}
   502  	b.written = 0
   503  	b.flushed = 0
   504  	w.free.Lock()
   505  	w.free.blocks = append(w.free.blocks, b)
   506  	w.free.cond.Signal()
   507  	w.free.Unlock()
   508  	return nil
   509  }
   510  
   511  // queueBlock queues the current block for writing to the underlying writer,
   512  // allocates a new block and reserves space for the next header.
   513  func (w *LogWriter) queueBlock() {
   514  	// Allocate a new block, blocking until one is available. We do this first
   515  	// because w.block is protected by w.flusher.Mutex.
   516  	w.free.Lock()
   517  	if len(w.free.blocks) == 0 {
   518  		if w.free.allocated < cap(w.free.blocks) {
   519  			w.free.allocated++
   520  			w.free.blocks = append(w.free.blocks, &block{})
   521  		} else {
   522  			for len(w.free.blocks) == 0 {
   523  				w.free.cond.Wait()
   524  			}
   525  		}
   526  	}
   527  	nextBlock := w.free.blocks[len(w.free.blocks)-1]
   528  	w.free.blocks = w.free.blocks[:len(w.free.blocks)-1]
   529  	w.free.Unlock()
   530  
   531  	f := &w.flusher
   532  	f.Lock()
   533  	f.pending = append(f.pending, w.block)
   534  	w.block = nextBlock
   535  	f.ready.Signal()
   536  	w.err = w.flusher.err
   537  	f.Unlock()
   538  
   539  	w.blockNum++
   540  }
   541  
   542  // Close flushes and syncs any unwritten data and closes the writer.
   543  // Where required, external synchronisation is provided by commitPipeline.mu.
   544  func (w *LogWriter) Close() error {
   545  	f := &w.flusher
   546  
   547  	// Emit an EOF trailer signifying the end of this log. This helps readers
   548  	// differentiate between a corrupted entry in the middle of a log from
   549  	// garbage at the tail from a recycled log file.
   550  	w.emitEOFTrailer()
   551  
   552  	// Signal the flush loop to close.
   553  	f.Lock()
   554  	f.close = true
   555  	f.ready.Signal()
   556  	f.Unlock()
   557  
   558  	// Wait for the flush loop to close. The flush loop will not close until all
   559  	// pending data has been written or an error occurs.
   560  	<-f.closed
   561  
   562  	// Sync any flushed data to disk. NB: flushLoop will sync after flushing the
   563  	// last buffered data only if it was requested via syncQ, so we need to sync
   564  	// here to ensure that all the data is synced.
   565  	err := w.flusher.err
   566  	if err == nil && w.s != nil {
   567  		err = w.s.Sync()
   568  	}
   569  
   570  	if w.c != nil {
   571  		cerr := w.c.Close()
   572  		w.c = nil
   573  		if cerr != nil {
   574  			return cerr
   575  		}
   576  	}
   577  	w.err = errors.New("bitalosdb/record: closed LogWriter")
   578  	return err
   579  }
   580  
   581  // WriteRecord writes a complete record. Returns the offset just past the end
   582  // of the record.
   583  // External synchronisation provided by commitPipeline.mu.
   584  func (w *LogWriter) WriteRecord(p []byte) (int64, error) {
   585  	return w.SyncRecord(p, nil, nil)
   586  }
   587  
   588  // SyncRecord writes a complete record. If wg!= nil the record will be
   589  // asynchronously persisted to the underlying writer and done will be called on
   590  // the wait group upon completion. Returns the offset just past the end of the
   591  // record.
   592  // External synchronisation provided by commitPipeline.mu.
   593  func (w *LogWriter) SyncRecord(p []byte, wg *sync.WaitGroup, err *error) (int64, error) {
   594  	if w.err != nil {
   595  		return -1, w.err
   596  	}
   597  
   598  	// The `i == 0` condition ensures we handle empty records. Such records can
   599  	// possibly be generated for VersionEdits stored in the MANIFEST. While the
   600  	// MANIFEST is currently written using Writer, it is good to support the same
   601  	// semantics with LogWriter.
   602  
   603  	for i := 0; i == 0 || len(p) > 0; i++ {
   604  		p = w.emitFragment(i, p)
   605  	}
   606  
   607  	if wg != nil {
   608  		// If we've been asked to persist the record, add the WaitGroup to the sync
   609  		// queue and signal the flushLoop. Note that flushLoop will write partial
   610  		// blocks to the file if syncing has been requested. The contract is that
   611  		// any record written to the LogWriter to this point will be flushed to the
   612  		// OS and synced to disk.
   613  		f := &w.flusher
   614  		f.syncQ.push(wg, err)
   615  		f.ready.Signal()
   616  	}
   617  
   618  	offset := w.blockNum*blockSize + int64(w.block.written)
   619  	// Note that we don't return w.err here as a concurrent call to Close would
   620  	// race with our read. That's ok because the only error we could be seeing is
   621  	// one to syncing for which the caller can receive notification of by passing
   622  	// in a non-nil err argument.
   623  	return offset, nil
   624  }
   625  
   626  // Size returns the current size of the file.
   627  // External synchronisation provided by commitPipeline.mu.
   628  func (w *LogWriter) Size() int64 {
   629  	return w.blockNum*blockSize + int64(w.block.written)
   630  }
   631  
   632  func (w *LogWriter) emitEOFTrailer() {
   633  	// Write a recyclable chunk header with a different log number.  Readers
   634  	// will treat the header as EOF when the log number does not match.
   635  	b := w.block
   636  	i := b.written
   637  	binary.LittleEndian.PutUint32(b.buf[i+0:i+4], 0) // CRC
   638  	binary.LittleEndian.PutUint16(b.buf[i+4:i+6], 0) // Size
   639  	b.buf[i+6] = recyclableFullChunkType
   640  	binary.LittleEndian.PutUint32(b.buf[i+7:i+11], w.logNum+1) // Log number
   641  	atomic.StoreInt32(&b.written, i+int32(recyclableHeaderSize))
   642  }
   643  
   644  func (w *LogWriter) emitFragment(n int, p []byte) []byte {
   645  	b := w.block
   646  	i := b.written
   647  	first := n == 0
   648  	last := blockSize-i-recyclableHeaderSize >= int32(len(p))
   649  
   650  	if last {
   651  		if first {
   652  			b.buf[i+6] = recyclableFullChunkType
   653  		} else {
   654  			b.buf[i+6] = recyclableLastChunkType
   655  		}
   656  	} else {
   657  		if first {
   658  			b.buf[i+6] = recyclableFirstChunkType
   659  		} else {
   660  			b.buf[i+6] = recyclableMiddleChunkType
   661  		}
   662  	}
   663  
   664  	binary.LittleEndian.PutUint32(b.buf[i+7:i+11], w.logNum)
   665  
   666  	r := copy(b.buf[i+recyclableHeaderSize:], p)
   667  	j := i + int32(recyclableHeaderSize+r)
   668  	binary.LittleEndian.PutUint32(b.buf[i+0:i+4], crc.New(b.buf[i+6:j]).Value())
   669  	binary.LittleEndian.PutUint16(b.buf[i+4:i+6], uint16(r))
   670  	atomic.StoreInt32(&b.written, j)
   671  
   672  	if blockSize-b.written < recyclableHeaderSize {
   673  		// There is no room for another fragment in the block, so fill the
   674  		// remaining bytes with zeros and queue the block for flushing.
   675  		for i := b.written; i < blockSize; i++ {
   676  			b.buf[i] = 0
   677  		}
   678  		w.queueBlock()
   679  	}
   680  	return p[r:]
   681  }