github.com/matrixorigin/matrixone@v0.7.0/pkg/vm/engine/disttae/partition_reader.go (about)

     1  // Copyright 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package disttae
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"strings"
    21  
    22  	"github.com/matrixorigin/matrixone/pkg/catalog"
    23  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    24  	"github.com/matrixorigin/matrixone/pkg/common/mpool"
    25  	"github.com/matrixorigin/matrixone/pkg/container/batch"
    26  	"github.com/matrixorigin/matrixone/pkg/container/types"
    27  	"github.com/matrixorigin/matrixone/pkg/container/vector"
    28  	"github.com/matrixorigin/matrixone/pkg/fileservice"
    29  	"github.com/matrixorigin/matrixone/pkg/objectio"
    30  	"github.com/matrixorigin/matrixone/pkg/pb/plan"
    31  	"github.com/matrixorigin/matrixone/pkg/txn/storage/memorystorage/memtable"
    32  	"github.com/matrixorigin/matrixone/pkg/vm/engine"
    33  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/dataio/blockio"
    34  	"github.com/matrixorigin/matrixone/pkg/vm/process"
    35  )
    36  
    37  type PartitionReader struct {
    38  	end         bool
    39  	typsMap     map[string]types.Type
    40  	firstCalled bool
    41  	readTime    memtable.Time
    42  	tx          *memtable.Transaction
    43  	index       memtable.Tuple
    44  	inserts     []*batch.Batch
    45  	deletes     map[types.Rowid]uint8
    46  	skipBlocks  map[uint64]uint8
    47  	iter        *memtable.TableIter[RowID, DataValue]
    48  	data        *memtable.Table[RowID, DataValue, *DataRow]
    49  	proc        *process.Process
    50  
    51  	// the following attributes are used to support cn2s3
    52  	s3FileService   fileservice.FileService
    53  	s3BlockReader   objectio.Reader
    54  	extendId2s3File map[string]int
    55  	// used to get idx of sepcified col
    56  	colIdxMp        map[string]int
    57  	blockBatch      *BlockBatch
    58  	currentFileName string
    59  }
    60  
    61  type BlockBatch struct {
    62  	metas  []string
    63  	idx    int
    64  	length int
    65  }
    66  
    67  func (blockBatch *BlockBatch) read() (res string) {
    68  	if blockBatch.idx == blockBatch.length {
    69  		return
    70  	}
    71  	res = blockBatch.metas[blockBatch.idx]
    72  	blockBatch.idx++
    73  	return
    74  }
    75  
    76  func (blockBatch *BlockBatch) hasRows() bool {
    77  	return blockBatch.idx < blockBatch.length
    78  }
    79  
    80  func (blockBatch *BlockBatch) setBat(bat *batch.Batch) {
    81  	blockBatch.metas = vector.MustStrCols(bat.Vecs[0])
    82  	blockBatch.idx = 0
    83  	blockBatch.length = len(blockBatch.metas)
    84  }
    85  
    86  var _ engine.Reader = new(PartitionReader)
    87  
    88  func (p *PartitionReader) Close() error {
    89  	p.iter.Close()
    90  	return nil
    91  }
    92  
    93  func (p *PartitionReader) getIdxs(colNames []string) (res []uint16) {
    94  	for _, str := range colNames {
    95  		v, ok := p.colIdxMp[str]
    96  		if !ok {
    97  			panic("not existed col in partitionReader")
    98  		}
    99  		res = append(res, uint16(v))
   100  	}
   101  	return
   102  }
   103  
   104  func (p *PartitionReader) Read(ctx context.Context, colNames []string, expr *plan.Expr, mp *mpool.MPool) (*batch.Batch, error) {
   105  	if p == nil {
   106  		return nil, nil
   107  	}
   108  	if p.end {
   109  		return nil, nil
   110  	}
   111  	if p.blockBatch == nil {
   112  		p.blockBatch = &BlockBatch{}
   113  	}
   114  
   115  	if len(p.inserts) > 0 || p.blockBatch.hasRows() {
   116  		var bat *batch.Batch
   117  		if p.blockBatch.hasRows() || p.inserts[0].Attrs[0] == catalog.BlockMeta_MetaLoc {
   118  			var err error
   119  			var ivec *fileservice.IOVector
   120  			// read block
   121  			// These blocks may have been written to s3 before the transaction was committed if the transaction is huge, but note that these blocks are only invisible to other transactions
   122  			if !p.blockBatch.hasRows() {
   123  				p.blockBatch.setBat(p.inserts[0])
   124  				p.inserts = p.inserts[1:]
   125  			}
   126  			metaLoc := p.blockBatch.read()
   127  			name := strings.Split(metaLoc, ":")[0]
   128  			if name != p.currentFileName {
   129  				p.s3BlockReader, err = objectio.NewObjectReader(name, p.s3FileService)
   130  				p.extendId2s3File[name] = 0
   131  				p.currentFileName = name
   132  				if err != nil {
   133  					return nil, err
   134  				}
   135  			}
   136  			_, extent, _ := blockio.DecodeMetaLoc(metaLoc)
   137  			for _, name := range colNames {
   138  				if name == catalog.Row_ID {
   139  					return nil, moerr.NewInternalError(ctx, "The current version does not support modifying the data read from s3 within a transaction")
   140  				}
   141  			}
   142  			ivec, err = p.s3BlockReader.Read(context.Background(), extent, p.getIdxs(colNames), p.proc.GetMPool())
   143  			if err != nil {
   144  				return nil, err
   145  			}
   146  			rbat := batch.NewWithSize(len(colNames))
   147  			rbat.SetAttributes(colNames)
   148  			rbat.Cnt = 1
   149  			for i, e := range ivec.Entries {
   150  				rbat.Vecs[i] = vector.New(p.typsMap[colNames[i]])
   151  				if err = rbat.Vecs[i].Read(e.Object.([]byte)); err != nil {
   152  					return nil, err
   153  				}
   154  			}
   155  			rbat.SetZs(rbat.Vecs[0].Length(), p.proc.GetMPool())
   156  			return rbat, nil
   157  		} else {
   158  			bat = p.inserts[0].GetSubBatch(colNames)
   159  			p.inserts = p.inserts[1:]
   160  			b := batch.NewWithSize(len(colNames))
   161  			b.SetAttributes(colNames)
   162  			for i, name := range colNames {
   163  				b.Vecs[i] = vector.New(p.typsMap[name])
   164  			}
   165  			if _, err := b.Append(ctx, mp, bat); err != nil {
   166  				return nil, err
   167  			}
   168  			return b, nil
   169  		}
   170  	}
   171  	b := batch.NewWithSize(len(colNames))
   172  	b.SetAttributes(colNames)
   173  	for i, name := range colNames {
   174  		b.Vecs[i] = vector.New(p.typsMap[name])
   175  	}
   176  	rows := 0
   177  	if len(p.index) > 0 {
   178  		p.iter.Close()
   179  		itr := p.data.NewIndexIter(p.tx, p.index, p.index)
   180  		for ok := itr.First(); ok; ok = itr.Next() {
   181  			entry := itr.Item()
   182  			if _, ok := p.deletes[types.Rowid(entry.Key)]; ok {
   183  				continue
   184  			}
   185  			if p.skipBlocks != nil {
   186  				if _, ok := p.skipBlocks[rowIDToBlockID(entry.Key)]; ok {
   187  					continue
   188  				}
   189  			}
   190  			dataValue, err := p.data.Get(p.tx, entry.Key)
   191  			if err != nil {
   192  				itr.Close()
   193  				p.end = true
   194  				return nil, err
   195  			}
   196  			if dataValue.op == opDelete {
   197  				continue
   198  			}
   199  			for i, name := range b.Attrs {
   200  				if name == catalog.Row_ID {
   201  					if err := b.Vecs[i].Append(types.Rowid(entry.Key), false, mp); err != nil {
   202  						return nil, err
   203  					}
   204  					continue
   205  				}
   206  				value, ok := dataValue.value[name]
   207  				if !ok {
   208  					panic(fmt.Sprintf("invalid column name: %v", name))
   209  				}
   210  				if err := value.AppendVector(b.Vecs[i], mp); err != nil {
   211  					return nil, err
   212  				}
   213  			}
   214  			rows++
   215  		}
   216  		if rows > 0 {
   217  			b.SetZs(rows, mp)
   218  		}
   219  		itr.Close()
   220  		p.end = true
   221  		if rows == 0 {
   222  			return nil, nil
   223  		}
   224  		return b, nil
   225  	}
   226  
   227  	fn := p.iter.Next
   228  	if !p.firstCalled {
   229  		fn = p.iter.First
   230  		p.firstCalled = true
   231  	}
   232  
   233  	maxRows := 8192 // i think 8192 is better than 4096
   234  	for ok := fn(); ok; ok = p.iter.Next() {
   235  		dataKey, dataValue, err := p.iter.Read()
   236  		if err != nil {
   237  			return nil, err
   238  		}
   239  
   240  		if _, ok := p.deletes[types.Rowid(dataKey)]; ok {
   241  			continue
   242  		}
   243  
   244  		if dataValue.op == opDelete {
   245  			continue
   246  		}
   247  
   248  		if p.skipBlocks != nil {
   249  			if _, ok := p.skipBlocks[rowIDToBlockID(dataKey)]; ok {
   250  				continue
   251  			}
   252  		}
   253  
   254  		for i, name := range b.Attrs {
   255  			if name == catalog.Row_ID {
   256  				if err := b.Vecs[i].Append(types.Rowid(dataKey), false, mp); err != nil {
   257  					return nil, err
   258  				}
   259  				continue
   260  			}
   261  			value, ok := dataValue.value[name]
   262  			if !ok {
   263  				panic(fmt.Sprintf("invalid column name: %v", name))
   264  			}
   265  			if err := value.AppendVector(b.Vecs[i], mp); err != nil {
   266  				return nil, err
   267  			}
   268  		}
   269  
   270  		rows++
   271  		if rows == maxRows {
   272  			break
   273  		}
   274  	}
   275  
   276  	if rows > 0 {
   277  		b.SetZs(rows, mp)
   278  	}
   279  	if rows == 0 {
   280  		return nil, nil
   281  	}
   282  
   283  	return b, nil
   284  }