github.com/matrixorigin/matrixone@v0.7.0/pkg/objectio/writer.go (about)

     1  // Copyright 2021 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package objectio
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"encoding/binary"
    21  	"github.com/matrixorigin/matrixone/pkg/compress"
    22  	"github.com/pierrec/lz4"
    23  	"sync"
    24  
    25  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    26  	"github.com/matrixorigin/matrixone/pkg/container/batch"
    27  	"github.com/matrixorigin/matrixone/pkg/fileservice"
    28  )
    29  
    30  type ObjectWriter struct {
    31  	sync.RWMutex
    32  	object *Object
    33  	blocks []BlockObject
    34  	buffer *ObjectBuffer
    35  	name   string
    36  	lastId uint32
    37  }
    38  
    39  func NewObjectWriter(name string, fs fileservice.FileService) (Writer, error) {
    40  	object := NewObject(name, fs)
    41  	writer := &ObjectWriter{
    42  		name:   name,
    43  		object: object,
    44  		buffer: NewObjectBuffer(name),
    45  		blocks: make([]BlockObject, 0),
    46  		lastId: 0,
    47  	}
    48  	err := writer.WriteHeader()
    49  	return writer, err
    50  }
    51  
    52  func (w *ObjectWriter) WriteHeader() error {
    53  	var (
    54  		err    error
    55  		header bytes.Buffer
    56  	)
    57  	h := Header{magic: Magic, version: Version}
    58  	if err = binary.Write(&header, endian, h.magic); err != nil {
    59  		return err
    60  	}
    61  	if err = binary.Write(&header, endian, h.version); err != nil {
    62  		return err
    63  	}
    64  	if err = binary.Write(&header, endian, h.dummy); err != nil {
    65  		return err
    66  	}
    67  	_, _, err = w.buffer.Write(header.Bytes())
    68  	return err
    69  }
    70  
    71  func (w *ObjectWriter) Write(batch *batch.Batch) (BlockObject, error) {
    72  	block := NewBlock(uint16(len(batch.Vecs)), w.object, w.name)
    73  	w.AddBlock(block.(*Block))
    74  	for i, vec := range batch.Vecs {
    75  		buf, err := vec.Show()
    76  		if err != nil {
    77  			return nil, err
    78  		}
    79  		originSize := len(buf)
    80  		// TODO:Now by default, lz4 compression must be used for Write,
    81  		// and parameters need to be passed in later to determine the compression type
    82  		data := make([]byte, lz4.CompressBlockBound(originSize))
    83  		if buf, err = compress.Compress(buf, data, compress.Lz4); err != nil {
    84  			return nil, err
    85  		}
    86  		offset, length, err := w.buffer.Write(buf)
    87  		if err != nil {
    88  			return nil, err
    89  		}
    90  		block.(*Block).columns[i].(*ColumnBlock).meta.location = Extent{
    91  			id:         uint32(block.GetMeta().header.blockId),
    92  			offset:     uint32(offset),
    93  			length:     uint32(length),
    94  			originSize: uint32(originSize),
    95  		}
    96  		block.(*Block).columns[i].(*ColumnBlock).meta.alg = compress.Lz4
    97  	}
    98  	return block, nil
    99  }
   100  
   101  func (w *ObjectWriter) WriteIndex(fd BlockObject, index IndexData) error {
   102  	var err error
   103  
   104  	block := w.GetBlock(fd.GetID())
   105  	if block == nil || block.columns[index.GetIdx()] == nil {
   106  		return moerr.NewInternalErrorNoCtx("object io: not found")
   107  	}
   108  	err = index.Write(w, block)
   109  	return err
   110  }
   111  
   112  func (w *ObjectWriter) WriteEnd(ctx context.Context, items ...WriteOptions) ([]BlockObject, error) {
   113  	var err error
   114  	w.RLock()
   115  	defer w.RUnlock()
   116  	var buf bytes.Buffer
   117  	metaLen := 0
   118  	start := 0
   119  	for _, block := range w.blocks {
   120  		meta, err := block.(*Block).MarshalMeta()
   121  		if err != nil {
   122  			return nil, err
   123  		}
   124  		offset, length, err := w.buffer.Write(meta)
   125  		if err != nil {
   126  			return nil, err
   127  		}
   128  		if start == 0 {
   129  			start = offset
   130  		}
   131  		metaLen += length
   132  		if err = binary.Write(&buf, endian, uint32(offset)); err != nil {
   133  			return nil, err
   134  		}
   135  		if err = binary.Write(&buf, endian, uint32(length)); err != nil {
   136  			return nil, err
   137  		}
   138  		if err = binary.Write(&buf, endian, uint32(length)); err != nil {
   139  			return nil, err
   140  		}
   141  	}
   142  	if err = binary.Write(&buf, endian, uint32(len(w.blocks))); err != nil {
   143  		return nil, err
   144  	}
   145  	if err = binary.Write(&buf, endian, uint64(Magic)); err != nil {
   146  		return nil, err
   147  	}
   148  	_, _, err = w.buffer.Write(buf.Bytes())
   149  	if err != nil {
   150  		return nil, err
   151  	}
   152  	err = w.Sync(ctx, items...)
   153  	if err != nil {
   154  		return nil, err
   155  	}
   156  	for i := range w.blocks {
   157  		w.blocks[i].(*Block).extent = Extent{
   158  			id:         uint32(i),
   159  			offset:     uint32(start),
   160  			length:     uint32(metaLen),
   161  			originSize: uint32(metaLen),
   162  		}
   163  	}
   164  
   165  	// The buffer needs to be released at the end of WriteEnd
   166  	// Because the outside may hold this writer
   167  	// After WriteEnd is called, no more data can be written
   168  	w.buffer = nil
   169  	return w.blocks, err
   170  }
   171  
   172  // Sync is for testing
   173  func (w *ObjectWriter) Sync(ctx context.Context, items ...WriteOptions) error {
   174  	w.buffer.SetDataOptions(items...)
   175  	err := w.object.fs.Write(ctx, w.buffer.GetData())
   176  	if err != nil {
   177  		return err
   178  	}
   179  	return err
   180  }
   181  
   182  func (w *ObjectWriter) AddBlock(block *Block) {
   183  	w.Lock()
   184  	defer w.Unlock()
   185  	block.id = w.lastId
   186  	w.blocks = append(w.blocks, block)
   187  	//w.blocks[block.id] = block
   188  	w.lastId++
   189  }
   190  
   191  func (w *ObjectWriter) GetBlock(id uint32) *Block {
   192  	w.Lock()
   193  	defer w.Unlock()
   194  	return w.blocks[id].(*Block)
   195  }