github.com/matrixorigin/matrixone@v1.2.0/pkg/objectio/funcs.go (about)

     1  // Copyright 2021 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package objectio
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"fmt"
    21  
    22  	"github.com/matrixorigin/matrixone/pkg/container/batch"
    23  	"github.com/matrixorigin/matrixone/pkg/container/vector"
    24  
    25  	"github.com/matrixorigin/matrixone/pkg/common/mpool"
    26  	"github.com/matrixorigin/matrixone/pkg/container/types"
    27  	"github.com/matrixorigin/matrixone/pkg/fileservice"
    28  	"github.com/matrixorigin/matrixone/pkg/logutil"
    29  )
    30  
    31  func ReleaseIOEntry(entry *fileservice.IOEntry) {
    32  	entry.CachedData.Release()
    33  }
    34  
    35  func ReleaseIOVector(vector *fileservice.IOVector) {
    36  	vector.Release()
    37  }
    38  
    39  func ReadExtent(
    40  	ctx context.Context,
    41  	name string,
    42  	extent *Extent,
    43  	policy fileservice.Policy,
    44  	fs fileservice.FileService,
    45  	factory CacheConstructorFactory,
    46  ) (buf []byte, err error) {
    47  	ioVec := &fileservice.IOVector{
    48  		FilePath: name,
    49  		Entries:  make([]fileservice.IOEntry, 1),
    50  		Policy:   policy,
    51  	}
    52  
    53  	ioVec.Entries[0] = fileservice.IOEntry{
    54  		Offset:      int64(extent.Offset()),
    55  		Size:        int64(extent.Length()),
    56  		ToCacheData: factory(int64(extent.OriginSize()), extent.Alg()),
    57  	}
    58  	if err = fs.Read(ctx, ioVec); err != nil {
    59  		return
    60  	}
    61  	//TODO when to call ioVec.Release?
    62  	v := ioVec.Entries[0].CachedData.Bytes()
    63  	buf = make([]byte, len(v))
    64  	copy(buf, v)
    65  	ReleaseIOEntry(&ioVec.Entries[0])
    66  	return
    67  }
    68  
    69  func ReadBloomFilter(
    70  	ctx context.Context,
    71  	name string,
    72  	extent *Extent,
    73  	policy fileservice.Policy,
    74  	fs fileservice.FileService,
    75  ) (filters BloomFilter, err error) {
    76  	var v []byte
    77  	if v, err = ReadExtent(
    78  		ctx,
    79  		name,
    80  		extent,
    81  		policy,
    82  		fs,
    83  		constructorFactory); err != nil {
    84  		return
    85  	}
    86  
    87  	var obj any
    88  	obj, err = Decode(v)
    89  	if err != nil {
    90  		return
    91  	}
    92  
    93  	filters = obj.([]byte)
    94  	return
    95  }
    96  
    97  func ReadObjectMeta(
    98  	ctx context.Context,
    99  	name string,
   100  	extent *Extent,
   101  	policy fileservice.Policy,
   102  	fs fileservice.FileService,
   103  ) (meta ObjectMeta, err error) {
   104  	var v []byte
   105  	if v, err = ReadExtent(ctx, name, extent, policy, fs, constructorFactory); err != nil {
   106  		return
   107  	}
   108  
   109  	var obj any
   110  	obj, err = Decode(v)
   111  	if err != nil {
   112  		return
   113  	}
   114  
   115  	meta = obj.(ObjectMeta)
   116  	return
   117  }
   118  
   119  func ReadOneBlock(
   120  	ctx context.Context,
   121  	meta *ObjectDataMeta,
   122  	name string,
   123  	blk uint16,
   124  	seqnums []uint16,
   125  	typs []types.Type,
   126  	m *mpool.MPool,
   127  	fs fileservice.FileService,
   128  	policy fileservice.Policy,
   129  ) (ioVec *fileservice.IOVector, err error) {
   130  	return ReadOneBlockWithMeta(ctx, meta, name, blk, seqnums, typs, m, fs, constructorFactory, policy)
   131  }
   132  
   133  func ReadOneBlockWithMeta(
   134  	ctx context.Context,
   135  	meta *ObjectDataMeta,
   136  	name string,
   137  	blk uint16,
   138  	seqnums []uint16,
   139  	typs []types.Type,
   140  	m *mpool.MPool,
   141  	fs fileservice.FileService,
   142  	factory CacheConstructorFactory,
   143  	policy fileservice.Policy,
   144  ) (ioVec *fileservice.IOVector, err error) {
   145  	ioVec = &fileservice.IOVector{
   146  		FilePath: name,
   147  		Entries:  make([]fileservice.IOEntry, 0),
   148  		Policy:   policy,
   149  	}
   150  
   151  	var filledEntries []fileservice.IOEntry
   152  	blkmeta := meta.GetBlockMeta(uint32(blk))
   153  	maxSeqnum := blkmeta.GetMaxSeqnum()
   154  	for i, seqnum := range seqnums {
   155  		// special columns
   156  		if seqnum >= SEQNUM_UPPER {
   157  			metaColCnt := blkmeta.GetMetaColumnCount()
   158  			// read appendable block file, the last columns is commits and abort
   159  			if seqnum == SEQNUM_COMMITTS {
   160  				seqnum = metaColCnt - 2
   161  			} else if seqnum == SEQNUM_ABORT {
   162  				seqnum = metaColCnt - 1
   163  			} else {
   164  				panic(fmt.Sprintf("bad path to read special column %d", seqnum))
   165  			}
   166  			col := blkmeta.ColumnMeta(seqnum)
   167  			ext := col.Location()
   168  			ioVec.Entries = append(ioVec.Entries, fileservice.IOEntry{
   169  				Offset:      int64(ext.Offset()),
   170  				Size:        int64(ext.Length()),
   171  				ToCacheData: factory(int64(ext.OriginSize()), ext.Alg()),
   172  			})
   173  			continue
   174  		}
   175  
   176  		// need fill vector
   177  		if seqnum > maxSeqnum || blkmeta.ColumnMeta(seqnum).DataType() == 0 {
   178  			if filledEntries == nil {
   179  				filledEntries = make([]fileservice.IOEntry, len(seqnums))
   180  			}
   181  			filledEntries[i] = fileservice.IOEntry{
   182  				Size: int64(seqnum), // a marker, it can not be zero
   183  			}
   184  			continue
   185  		}
   186  
   187  		// read written normal column
   188  		col := blkmeta.ColumnMeta(seqnum)
   189  		ext := col.Location()
   190  		ioVec.Entries = append(ioVec.Entries, fileservice.IOEntry{
   191  			Offset:      int64(ext.Offset()),
   192  			Size:        int64(ext.Length()),
   193  			ToCacheData: factory(int64(ext.OriginSize()), ext.Alg()),
   194  		})
   195  	}
   196  	if len(ioVec.Entries) > 0 {
   197  		err = fs.Read(ctx, ioVec)
   198  		if err != nil {
   199  			return
   200  		}
   201  		//TODO when to call ioVec.Release?
   202  	}
   203  
   204  	// need to generate vector
   205  	if filledEntries != nil {
   206  		if len(typs) == 0 {
   207  			panic(fmt.Sprintf("block %s generate need typs", meta.BlockHeader().BlockID().String()))
   208  		}
   209  		length := int(blkmeta.GetRows())
   210  		readed := ioVec.Entries
   211  		for i := range filledEntries {
   212  			if filledEntries[i].Size == 0 { // we can tell it is the placeholder for the readed column
   213  				filledEntries[i] = readed[0]
   214  				readed = readed[1:]
   215  			} else {
   216  				logutil.Infof("block %s generate seqnum %d %v",
   217  					meta.BlockHeader().BlockID().String(), filledEntries[i].Size, typs[i])
   218  				buf := &bytes.Buffer{}
   219  				buf.Write(EncodeIOEntryHeader(&IOEntryHeader{Type: IOET_ColData, Version: IOET_ColumnData_CurrVer}))
   220  				if err = vector.NewConstNull(typs[i], length, m).MarshalBinaryWithBuffer(buf); err != nil {
   221  					return
   222  				}
   223  				cacheData := fileservice.DefaultCacheDataAllocator.Alloc(buf.Len())
   224  				copy(cacheData.Bytes(), buf.Bytes())
   225  				filledEntries[i].CachedData = cacheData
   226  			}
   227  		}
   228  		ioVec.Entries = filledEntries
   229  	}
   230  
   231  	return
   232  }
   233  
   234  func ReadMultiBlocksWithMeta(
   235  	ctx context.Context,
   236  	name string,
   237  	meta ObjectMeta,
   238  	options map[uint16]*ReadBlockOptions,
   239  	fs fileservice.FileService,
   240  	factory CacheConstructorFactory,
   241  ) (ioVec *fileservice.IOVector, err error) {
   242  	ioVec = &fileservice.IOVector{
   243  		FilePath: name,
   244  		Entries:  make([]fileservice.IOEntry, 0),
   245  	}
   246  	var dataMeta ObjectDataMeta
   247  	for _, opt := range options {
   248  		for seqnum := range opt.Idxes {
   249  			if DataMetaType(opt.DataType) == SchemaData {
   250  				dataMeta = meta.MustDataMeta()
   251  			} else if DataMetaType(opt.DataType) == SchemaTombstone {
   252  				dataMeta = meta.MustTombstoneMeta()
   253  			} else {
   254  				dataMeta, _ = meta.SubMeta(ConvertToCkpIdx(opt.DataType))
   255  			}
   256  			blkmeta := dataMeta.GetBlockMeta(uint32(opt.Id))
   257  			if seqnum > blkmeta.GetMaxSeqnum() || blkmeta.ColumnMeta(seqnum).DataType() == 0 {
   258  				// prefetch, do not generate
   259  				continue
   260  			}
   261  			col := blkmeta.ColumnMeta(seqnum)
   262  			ioVec.Entries = append(ioVec.Entries, fileservice.IOEntry{
   263  				Offset: int64(col.Location().Offset()),
   264  				Size:   int64(col.Location().Length()),
   265  
   266  				ToCacheData: factory(int64(col.Location().OriginSize()), col.Location().Alg()),
   267  			})
   268  		}
   269  	}
   270  
   271  	err = fs.Read(ctx, ioVec)
   272  	//TODO when to call ioVec.Release?
   273  	return
   274  }
   275  
   276  func ReadAllBlocksWithMeta(
   277  	ctx context.Context,
   278  	meta *ObjectDataMeta,
   279  	name string,
   280  	cols []uint16,
   281  	policy fileservice.Policy,
   282  	m *mpool.MPool,
   283  	fs fileservice.FileService,
   284  	factory CacheConstructorFactory,
   285  ) (ioVec *fileservice.IOVector, err error) {
   286  	ioVec = &fileservice.IOVector{
   287  		FilePath: name,
   288  		Entries:  make([]fileservice.IOEntry, 0, len(cols)*int(meta.BlockCount())),
   289  		Policy:   policy,
   290  	}
   291  	for blk := uint32(0); blk < meta.BlockCount(); blk++ {
   292  		for _, seqnum := range cols {
   293  			blkmeta := meta.GetBlockMeta(blk)
   294  			if seqnum > blkmeta.GetMaxSeqnum() || blkmeta.ColumnMeta(seqnum).DataType() == 0 {
   295  				// prefetch, do not generate
   296  				panic("ReadAllBlocksWithMeta expect no schema changes")
   297  			}
   298  			col := blkmeta.ColumnMeta(seqnum)
   299  			ext := col.Location()
   300  			ioVec.Entries = append(ioVec.Entries, fileservice.IOEntry{
   301  				Offset: int64(ext.Offset()),
   302  				Size:   int64(ext.Length()),
   303  
   304  				ToCacheData: factory(int64(ext.OriginSize()), ext.Alg()),
   305  			})
   306  		}
   307  	}
   308  
   309  	err = fs.Read(ctx, ioVec)
   310  	//TODO when to call ioVec.Release?
   311  	return
   312  }
   313  
   314  func ReadOneBlockAllColumns(
   315  	ctx context.Context,
   316  	meta *ObjectDataMeta,
   317  	name string,
   318  	id uint32,
   319  	cols []uint16,
   320  	cachePolicy fileservice.Policy,
   321  	fs fileservice.FileService,
   322  ) (bat *batch.Batch, err error) {
   323  	ioVec := &fileservice.IOVector{
   324  		FilePath: name,
   325  		Entries:  make([]fileservice.IOEntry, 0),
   326  		Policy:   cachePolicy,
   327  	}
   328  	for _, seqnum := range cols {
   329  		blkmeta := meta.GetBlockMeta(id)
   330  		col := blkmeta.ColumnMeta(seqnum)
   331  		ext := col.Location()
   332  		ioVec.Entries = append(ioVec.Entries, fileservice.IOEntry{
   333  			Offset: int64(ext.Offset()),
   334  			Size:   int64(ext.Length()),
   335  
   336  			ToCacheData: constructorFactory(int64(ext.OriginSize()), ext.Alg()),
   337  		})
   338  	}
   339  
   340  	err = fs.Read(ctx, ioVec)
   341  	//TODO when to call ioVec.Release?
   342  	bat = batch.NewWithSize(len(cols))
   343  	var obj any
   344  	for i := range cols {
   345  		obj, err = Decode(ioVec.Entries[i].CachedData.Bytes())
   346  		if err != nil {
   347  			return nil, err
   348  		}
   349  		bat.Vecs[i] = obj.(*vector.Vector)
   350  		bat.SetRowCount(bat.Vecs[i].Length())
   351  	}
   352  	return
   353  }