github.com/matrixorigin/matrixone@v0.7.0/pkg/vm/engine/disttae/partition_reader.go (about) 1 // Copyright 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package disttae 16 17 import ( 18 "context" 19 "fmt" 20 "strings" 21 22 "github.com/matrixorigin/matrixone/pkg/catalog" 23 "github.com/matrixorigin/matrixone/pkg/common/moerr" 24 "github.com/matrixorigin/matrixone/pkg/common/mpool" 25 "github.com/matrixorigin/matrixone/pkg/container/batch" 26 "github.com/matrixorigin/matrixone/pkg/container/types" 27 "github.com/matrixorigin/matrixone/pkg/container/vector" 28 "github.com/matrixorigin/matrixone/pkg/fileservice" 29 "github.com/matrixorigin/matrixone/pkg/objectio" 30 "github.com/matrixorigin/matrixone/pkg/pb/plan" 31 "github.com/matrixorigin/matrixone/pkg/txn/storage/memorystorage/memtable" 32 "github.com/matrixorigin/matrixone/pkg/vm/engine" 33 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/dataio/blockio" 34 "github.com/matrixorigin/matrixone/pkg/vm/process" 35 ) 36 37 type PartitionReader struct { 38 end bool 39 typsMap map[string]types.Type 40 firstCalled bool 41 readTime memtable.Time 42 tx *memtable.Transaction 43 index memtable.Tuple 44 inserts []*batch.Batch 45 deletes map[types.Rowid]uint8 46 skipBlocks map[uint64]uint8 47 iter *memtable.TableIter[RowID, DataValue] 48 data *memtable.Table[RowID, DataValue, *DataRow] 49 proc *process.Process 50 51 // the following attributes are used to support cn2s3 52 s3FileService fileservice.FileService 53 s3BlockReader objectio.Reader 54 extendId2s3File map[string]int 55 // used to get idx of sepcified col 56 colIdxMp map[string]int 57 blockBatch *BlockBatch 58 currentFileName string 59 } 60 61 type BlockBatch struct { 62 metas []string 63 idx int 64 length int 65 } 66 67 func (blockBatch *BlockBatch) read() (res string) { 68 if blockBatch.idx == blockBatch.length { 69 return 70 } 71 res = blockBatch.metas[blockBatch.idx] 72 blockBatch.idx++ 73 return 74 } 75 76 func (blockBatch *BlockBatch) hasRows() bool { 77 return blockBatch.idx < blockBatch.length 78 } 79 80 func (blockBatch *BlockBatch) setBat(bat *batch.Batch) { 81 blockBatch.metas = vector.MustStrCols(bat.Vecs[0]) 82 blockBatch.idx = 0 83 blockBatch.length = len(blockBatch.metas) 84 } 85 86 var _ engine.Reader = new(PartitionReader) 87 88 func (p *PartitionReader) Close() error { 89 p.iter.Close() 90 return nil 91 } 92 93 func (p *PartitionReader) getIdxs(colNames []string) (res []uint16) { 94 for _, str := range colNames { 95 v, ok := p.colIdxMp[str] 96 if !ok { 97 panic("not existed col in partitionReader") 98 } 99 res = append(res, uint16(v)) 100 } 101 return 102 } 103 104 func (p *PartitionReader) Read(ctx context.Context, colNames []string, expr *plan.Expr, mp *mpool.MPool) (*batch.Batch, error) { 105 if p == nil { 106 return nil, nil 107 } 108 if p.end { 109 return nil, nil 110 } 111 if p.blockBatch == nil { 112 p.blockBatch = &BlockBatch{} 113 } 114 115 if len(p.inserts) > 0 || p.blockBatch.hasRows() { 116 var bat *batch.Batch 117 if p.blockBatch.hasRows() || p.inserts[0].Attrs[0] == catalog.BlockMeta_MetaLoc { 118 var err error 119 var ivec *fileservice.IOVector 120 // read block 121 // These blocks may have been written to s3 before the transaction was committed if the transaction is huge, but note that these blocks are only invisible to other transactions 122 if !p.blockBatch.hasRows() { 123 p.blockBatch.setBat(p.inserts[0]) 124 p.inserts = p.inserts[1:] 125 } 126 metaLoc := p.blockBatch.read() 127 name := strings.Split(metaLoc, ":")[0] 128 if name != p.currentFileName { 129 p.s3BlockReader, err = objectio.NewObjectReader(name, p.s3FileService) 130 p.extendId2s3File[name] = 0 131 p.currentFileName = name 132 if err != nil { 133 return nil, err 134 } 135 } 136 _, extent, _ := blockio.DecodeMetaLoc(metaLoc) 137 for _, name := range colNames { 138 if name == catalog.Row_ID { 139 return nil, moerr.NewInternalError(ctx, "The current version does not support modifying the data read from s3 within a transaction") 140 } 141 } 142 ivec, err = p.s3BlockReader.Read(context.Background(), extent, p.getIdxs(colNames), p.proc.GetMPool()) 143 if err != nil { 144 return nil, err 145 } 146 rbat := batch.NewWithSize(len(colNames)) 147 rbat.SetAttributes(colNames) 148 rbat.Cnt = 1 149 for i, e := range ivec.Entries { 150 rbat.Vecs[i] = vector.New(p.typsMap[colNames[i]]) 151 if err = rbat.Vecs[i].Read(e.Object.([]byte)); err != nil { 152 return nil, err 153 } 154 } 155 rbat.SetZs(rbat.Vecs[0].Length(), p.proc.GetMPool()) 156 return rbat, nil 157 } else { 158 bat = p.inserts[0].GetSubBatch(colNames) 159 p.inserts = p.inserts[1:] 160 b := batch.NewWithSize(len(colNames)) 161 b.SetAttributes(colNames) 162 for i, name := range colNames { 163 b.Vecs[i] = vector.New(p.typsMap[name]) 164 } 165 if _, err := b.Append(ctx, mp, bat); err != nil { 166 return nil, err 167 } 168 return b, nil 169 } 170 } 171 b := batch.NewWithSize(len(colNames)) 172 b.SetAttributes(colNames) 173 for i, name := range colNames { 174 b.Vecs[i] = vector.New(p.typsMap[name]) 175 } 176 rows := 0 177 if len(p.index) > 0 { 178 p.iter.Close() 179 itr := p.data.NewIndexIter(p.tx, p.index, p.index) 180 for ok := itr.First(); ok; ok = itr.Next() { 181 entry := itr.Item() 182 if _, ok := p.deletes[types.Rowid(entry.Key)]; ok { 183 continue 184 } 185 if p.skipBlocks != nil { 186 if _, ok := p.skipBlocks[rowIDToBlockID(entry.Key)]; ok { 187 continue 188 } 189 } 190 dataValue, err := p.data.Get(p.tx, entry.Key) 191 if err != nil { 192 itr.Close() 193 p.end = true 194 return nil, err 195 } 196 if dataValue.op == opDelete { 197 continue 198 } 199 for i, name := range b.Attrs { 200 if name == catalog.Row_ID { 201 if err := b.Vecs[i].Append(types.Rowid(entry.Key), false, mp); err != nil { 202 return nil, err 203 } 204 continue 205 } 206 value, ok := dataValue.value[name] 207 if !ok { 208 panic(fmt.Sprintf("invalid column name: %v", name)) 209 } 210 if err := value.AppendVector(b.Vecs[i], mp); err != nil { 211 return nil, err 212 } 213 } 214 rows++ 215 } 216 if rows > 0 { 217 b.SetZs(rows, mp) 218 } 219 itr.Close() 220 p.end = true 221 if rows == 0 { 222 return nil, nil 223 } 224 return b, nil 225 } 226 227 fn := p.iter.Next 228 if !p.firstCalled { 229 fn = p.iter.First 230 p.firstCalled = true 231 } 232 233 maxRows := 8192 // i think 8192 is better than 4096 234 for ok := fn(); ok; ok = p.iter.Next() { 235 dataKey, dataValue, err := p.iter.Read() 236 if err != nil { 237 return nil, err 238 } 239 240 if _, ok := p.deletes[types.Rowid(dataKey)]; ok { 241 continue 242 } 243 244 if dataValue.op == opDelete { 245 continue 246 } 247 248 if p.skipBlocks != nil { 249 if _, ok := p.skipBlocks[rowIDToBlockID(dataKey)]; ok { 250 continue 251 } 252 } 253 254 for i, name := range b.Attrs { 255 if name == catalog.Row_ID { 256 if err := b.Vecs[i].Append(types.Rowid(dataKey), false, mp); err != nil { 257 return nil, err 258 } 259 continue 260 } 261 value, ok := dataValue.value[name] 262 if !ok { 263 panic(fmt.Sprintf("invalid column name: %v", name)) 264 } 265 if err := value.AppendVector(b.Vecs[i], mp); err != nil { 266 return nil, err 267 } 268 } 269 270 rows++ 271 if rows == maxRows { 272 break 273 } 274 } 275 276 if rows > 0 { 277 b.SetZs(rows, mp) 278 } 279 if rows == 0 { 280 return nil, nil 281 } 282 283 return b, nil 284 }