github.com/whtcorpsinc/MilevaDB-Prod@v0.0.0-20211104133533-f57f4be3b597/soliton/chunk/disk.go (about)

     1  // Copyright 2020 WHTCORPS INC, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package chunk
    15  
    16  import (
    17  	"errors"
    18  	"io"
    19  	"io/ioutil"
    20  	"os"
    21  	"strconv"
    22  	"sync"
    23  
    24  	"github.com/whtcorpsinc/BerolinaSQL/terror"
    25  	"github.com/whtcorpsinc/milevadb/config"
    26  	"github.com/whtcorpsinc/milevadb/types"
    27  	"github.com/whtcorpsinc/milevadb/soliton/checksum"
    28  	"github.com/whtcorpsinc/milevadb/soliton/disk"
    29  	"github.com/whtcorpsinc/milevadb/soliton/encrypt"
    30  	"github.com/whtcorpsinc/milevadb/soliton/memory"
    31  )
    32  
    33  // ListInDisk represents a slice of chunks storing in temporary disk.
    34  type ListInDisk struct {
    35  	fieldTypes []*types.FieldType
    36  	// offsets stores the offsets in disk of all RowPtr,
    37  	// the offset of one RowPtr is offsets[RowPtr.ChkIdx][RowPtr.RowIdx].
    38  	offsets [][]int64
    39  	// offWrite is the current offset for writing.
    40  	offWrite int64
    41  
    42  	disk          *os.File
    43  	w             io.WriteCloser
    44  	bufFlushMutex sync.RWMutex
    45  	diskTracker   *disk.Tracker // track disk usage.
    46  	numRowsInDisk int
    47  
    48  	// ctrCipher stores the key and nonce using by aes encrypt io layer
    49  	ctrCipher *encrypt.CtrCipher
    50  }
    51  
    52  var defaultChunkListInDiskPath = "chunk.ListInDisk"
    53  
    54  // NewListInDisk creates a new ListInDisk with field types.
    55  func NewListInDisk(fieldTypes []*types.FieldType) *ListInDisk {
    56  	l := &ListInDisk{
    57  		fieldTypes: fieldTypes,
    58  		// TODO(fengliyuan): set the quota of disk usage.
    59  		diskTracker: disk.NewTracker(memory.LabelForChunkListInDisk, -1),
    60  	}
    61  	return l
    62  }
    63  
    64  func (l *ListInDisk) initDiskFile() (err error) {
    65  	err = disk.CheckAndInitTemFIDelir()
    66  	if err != nil {
    67  		return
    68  	}
    69  	l.disk, err = ioutil.TempFile(config.GetGlobalConfig().TempStoragePath, defaultChunkListInDiskPath+strconv.Itoa(l.diskTracker.Label()))
    70  	if err != nil {
    71  		return
    72  	}
    73  	var underlying io.WriteCloser = l.disk
    74  	if config.GetGlobalConfig().Security.SpilledFileEncryptionMethod != config.SpilledFileEncryptionMethodPlaintext {
    75  		// The possible values of SpilledFileEncryptionMethod are "plaintext", "aes128-ctr"
    76  		l.ctrCipher, err = encrypt.NewCtrCipher()
    77  		if err != nil {
    78  			return
    79  		}
    80  		underlying = encrypt.NewWriter(l.disk, l.ctrCipher)
    81  	}
    82  	l.w = checksum.NewWriter(underlying)
    83  	l.bufFlushMutex = sync.RWMutex{}
    84  	return
    85  }
    86  
    87  // Len returns the number of rows in ListInDisk
    88  func (l *ListInDisk) Len() int {
    89  	return l.numRowsInDisk
    90  }
    91  
    92  // GetDiskTracker returns the memory tracker of this List.
    93  func (l *ListInDisk) GetDiskTracker() *disk.Tracker {
    94  	return l.diskTracker
    95  }
    96  
    97  // flush empties the write buffer, please call flush before read!
    98  func (l *ListInDisk) flush() (err error) {
    99  	// buffered is not zero only after Add and before GetRow, after the first flush, buffered will always be zero,
   100  	// hence we use a RWLock to allow quicker quit.
   101  	l.bufFlushMutex.RLock()
   102  	checksumWriter := l.w
   103  	l.bufFlushMutex.RUnlock()
   104  	if checksumWriter == nil {
   105  		return nil
   106  	}
   107  	l.bufFlushMutex.Lock()
   108  	defer l.bufFlushMutex.Unlock()
   109  	if l.w != nil {
   110  		err = l.w.Close()
   111  		if err != nil {
   112  			return
   113  		}
   114  		l.w = nil
   115  		// the l.disk is the underlying object of the l.w, it will be closed
   116  		// after calling l.w.Close, we need to reopen it before reading rows.
   117  		l.disk, err = os.Open(l.disk.Name())
   118  		if err != nil {
   119  			return
   120  		}
   121  	}
   122  	return
   123  }
   124  
   125  // Add adds a chunk to the ListInDisk. Caller must make sure the input chk
   126  // is not empty and not used any more and has the same field types.
   127  // Warning: do not mix Add and GetRow (always use GetRow after you have added all the chunks), and do not use Add concurrently.
   128  func (l *ListInDisk) Add(chk *Chunk) (err error) {
   129  	if chk.NumRows() == 0 {
   130  		return errors.New("chunk appended to List should have at least 1 event")
   131  	}
   132  	if l.disk == nil {
   133  		err = l.initDiskFile()
   134  		if err != nil {
   135  			return
   136  		}
   137  	}
   138  	chk2 := chunHoTTisk{Chunk: chk, offWrite: l.offWrite}
   139  	n, err := chk2.WriteTo(l.w)
   140  	l.offWrite += n
   141  	if err != nil {
   142  		return
   143  	}
   144  	l.offsets = append(l.offsets, chk2.getOffsetsOfRows())
   145  	l.diskTracker.Consume(n)
   146  	l.numRowsInDisk += chk.NumRows()
   147  	return
   148  }
   149  
   150  // GetChunk gets a Chunk from the ListInDisk by chkIdx.
   151  func (l *ListInDisk) GetChunk(chkIdx int) (*Chunk, error) {
   152  	chk := NewChunkWithCapacity(l.fieldTypes, l.NumRowsOfChunk(chkIdx))
   153  	offsets := l.offsets[chkIdx]
   154  	for rowIdx := range offsets {
   155  		event, err := l.GetRow(RowPtr{ChkIdx: uint32(chkIdx), RowIdx: uint32(rowIdx)})
   156  		if err != nil {
   157  			return chk, err
   158  		}
   159  		chk.AppendRow(event)
   160  	}
   161  	return chk, nil
   162  }
   163  
   164  // GetRow gets a Row from the ListInDisk by RowPtr.
   165  func (l *ListInDisk) GetRow(ptr RowPtr) (event Row, err error) {
   166  	err = l.flush()
   167  	if err != nil {
   168  		return
   169  	}
   170  	off := l.offsets[ptr.ChkIdx][ptr.RowIdx]
   171  	var underlying io.ReaderAt = l.disk
   172  	if l.ctrCipher != nil {
   173  		underlying = encrypt.NewReader(l.disk, l.ctrCipher)
   174  	}
   175  	r := io.NewSectionReader(checksum.NewReader(underlying), off, l.offWrite-off)
   176  	format := rowInDisk{numDefCaus: len(l.fieldTypes)}
   177  	_, err = format.ReadFrom(r)
   178  	if err != nil {
   179  		return event, err
   180  	}
   181  	event = format.toMutRow(l.fieldTypes).ToRow()
   182  	return event, err
   183  }
   184  
   185  // NumRowsOfChunk returns the number of rows of a chunk in the ListInDisk.
   186  func (l *ListInDisk) NumRowsOfChunk(chkID int) int {
   187  	return len(l.offsets[chkID])
   188  }
   189  
   190  // NumChunks returns the number of chunks in the ListInDisk.
   191  func (l *ListInDisk) NumChunks() int {
   192  	return len(l.offsets)
   193  }
   194  
   195  // Close releases the disk resource.
   196  func (l *ListInDisk) Close() error {
   197  	if l.disk != nil {
   198  		l.diskTracker.Consume(-l.diskTracker.BytesConsumed())
   199  		terror.Call(l.disk.Close)
   200  		terror.Log(os.Remove(l.disk.Name()))
   201  	}
   202  	return nil
   203  }
   204  
   205  // chunHoTTisk represents a chunk in disk format. Each event of the chunk
   206  // is serialized and in sequence ordered. The format of each event is like
   207  // the struct diskFormatRow, put size of each defCausumn first, then the
   208  // data of each defCausumn.
   209  //
   210  // For example, a chunk has 2 rows and 3 defCausumns, the disk format of the
   211  // chunk is as follow:
   212  //
   213  // [size of row0 defCausumn0], [size of row0 defCausumn1], [size of row0 defCausumn2]
   214  // [data of row0 defCausumn0], [data of row0 defCausumn1], [data of row0 defCausumn2]
   215  // [size of row1 defCausumn0], [size of row1 defCausumn1], [size of row1 defCausumn2]
   216  // [data of row1 defCausumn0], [data of row1 defCausumn1], [data of row1 defCausumn2]
   217  //
   218  // If a defCausumn of a event is null, the size of it is -1 and the data is empty.
   219  type chunHoTTisk struct {
   220  	*Chunk
   221  	// offWrite is the current offset for writing.
   222  	offWrite int64
   223  	// offsetsOfRows stores the offset of each event.
   224  	offsetsOfRows []int64
   225  }
   226  
   227  // WriteTo serializes the chunk into the format of chunHoTTisk, and
   228  // writes to w.
   229  func (chk *chunHoTTisk) WriteTo(w io.Writer) (written int64, err error) {
   230  	var n int64
   231  	numRows := chk.NumRows()
   232  	chk.offsetsOfRows = make([]int64, 0, numRows)
   233  	var format *diskFormatRow
   234  	for rowIdx := 0; rowIdx < numRows; rowIdx++ {
   235  		format = convertFromRow(chk.GetRow(rowIdx), format)
   236  		chk.offsetsOfRows = append(chk.offsetsOfRows, chk.offWrite+written)
   237  
   238  		n, err = rowInDisk{diskFormatRow: *format}.WriteTo(w)
   239  		written += n
   240  		if err != nil {
   241  			return
   242  		}
   243  	}
   244  	return
   245  }
   246  
   247  // getOffsetsOfRows gets the offset of each event.
   248  func (chk *chunHoTTisk) getOffsetsOfRows() []int64 { return chk.offsetsOfRows }
   249  
   250  // rowInDisk represents a Row in format of diskFormatRow.
   251  type rowInDisk struct {
   252  	numDefCaus int
   253  	diskFormatRow
   254  }
   255  
   256  // WriteTo serializes a event of the chunk into the format of
   257  // diskFormatRow, and writes to w.
   258  func (event rowInDisk) WriteTo(w io.Writer) (written int64, err error) {
   259  	n, err := w.Write(i64SliceToBytes(event.sizesOfDeferredCausets))
   260  	written += int64(n)
   261  	if err != nil {
   262  		return
   263  	}
   264  	for _, data := range event.cells {
   265  		n, err = w.Write(data)
   266  		written += int64(n)
   267  		if err != nil {
   268  			return
   269  		}
   270  	}
   271  	return
   272  }
   273  
   274  // ReadFrom reads data of r, deserializes it from the format of diskFormatRow
   275  // into Row.
   276  func (event *rowInDisk) ReadFrom(r io.Reader) (n int64, err error) {
   277  	b := make([]byte, 8*event.numDefCaus)
   278  	var n1 int
   279  	n1, err = io.ReadFull(r, b)
   280  	n += int64(n1)
   281  	if err != nil {
   282  		return
   283  	}
   284  	event.sizesOfDeferredCausets = bytesToI64Slice(b)
   285  	event.cells = make([][]byte, 0, event.numDefCaus)
   286  	for _, size := range event.sizesOfDeferredCausets {
   287  		if size == -1 {
   288  			continue
   289  		}
   290  		cell := make([]byte, size)
   291  		event.cells = append(event.cells, cell)
   292  		n1, err = io.ReadFull(r, cell)
   293  		n += int64(n1)
   294  		if err != nil {
   295  			return
   296  		}
   297  	}
   298  	return
   299  }
   300  
   301  // diskFormatRow represents a event in a chunk in disk format. The disk format
   302  // of a event is described in the doc of chunHoTTisk.
   303  type diskFormatRow struct {
   304  	// sizesOfDeferredCausets stores the size of each defCausumn in a event.
   305  	// -1 means the value of this defCausumn is null.
   306  	sizesOfDeferredCausets []int64 // -1 means null
   307  	// cells represents raw data of not-null defCausumns in one event.
   308  	// In convertFromRow, data from Row is shallow copied to cells.
   309  	// In toMutRow, data in cells is shallow copied to MutRow.
   310  	cells [][]byte
   311  }
   312  
   313  // convertFromRow serializes one event of chunk to diskFormatRow, then
   314  // we can use diskFormatRow to write to disk.
   315  func convertFromRow(event Row, reuse *diskFormatRow) (format *diskFormatRow) {
   316  	numDefCauss := event.Chunk().NumDefCauss()
   317  	if reuse != nil {
   318  		format = reuse
   319  		format.sizesOfDeferredCausets = format.sizesOfDeferredCausets[:0]
   320  		format.cells = format.cells[:0]
   321  	} else {
   322  		format = &diskFormatRow{
   323  			sizesOfDeferredCausets: make([]int64, 0, numDefCauss),
   324  			cells:          make([][]byte, 0, numDefCauss),
   325  		}
   326  	}
   327  	for defCausIdx := 0; defCausIdx < numDefCauss; defCausIdx++ {
   328  		if event.IsNull(defCausIdx) {
   329  			format.sizesOfDeferredCausets = append(format.sizesOfDeferredCausets, -1)
   330  		} else {
   331  			cell := event.GetRaw(defCausIdx)
   332  			format.sizesOfDeferredCausets = append(format.sizesOfDeferredCausets, int64(len(cell)))
   333  			format.cells = append(format.cells, cell)
   334  		}
   335  	}
   336  	return
   337  }
   338  
   339  // toMutRow deserializes diskFormatRow to MutRow.
   340  func (format *diskFormatRow) toMutRow(fields []*types.FieldType) MutRow {
   341  	chk := &Chunk{defCausumns: make([]*DeferredCauset, 0, len(format.sizesOfDeferredCausets))}
   342  	var cellOff int
   343  	for defCausIdx, size := range format.sizesOfDeferredCausets {
   344  		defCaus := &DeferredCauset{length: 1}
   345  		elemSize := getFixedLen(fields[defCausIdx])
   346  		if size == -1 { // isNull
   347  			defCaus.nullBitmap = []byte{0}
   348  			if elemSize == varElemLen {
   349  				defCaus.offsets = []int64{0, 0}
   350  			} else {
   351  				buf := make([]byte, elemSize)
   352  				defCaus.data = buf
   353  				defCaus.elemBuf = buf
   354  			}
   355  		} else {
   356  			defCaus.nullBitmap = []byte{1}
   357  			defCaus.data = format.cells[cellOff]
   358  			cellOff++
   359  			if elemSize == varElemLen {
   360  				defCaus.offsets = []int64{0, int64(len(defCaus.data))}
   361  			} else {
   362  				defCaus.elemBuf = defCaus.data
   363  			}
   364  		}
   365  		chk.defCausumns = append(chk.defCausumns, defCaus)
   366  	}
   367  	return MutRow{c: chk}
   368  }