github.com/matrixorigin/matrixone@v0.7.0/pkg/vm/engine/tae/dataio/blockio/read.go (about) 1 // Copyright 2021 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package blockio 16 17 import ( 18 "context" 19 "time" 20 21 "github.com/RoaringBitmap/roaring" 22 pkgcatalog "github.com/matrixorigin/matrixone/pkg/catalog" 23 "github.com/matrixorigin/matrixone/pkg/common/mpool" 24 "github.com/matrixorigin/matrixone/pkg/container/batch" 25 "github.com/matrixorigin/matrixone/pkg/container/types" 26 "github.com/matrixorigin/matrixone/pkg/container/vector" 27 "github.com/matrixorigin/matrixone/pkg/fileservice" 28 "github.com/matrixorigin/matrixone/pkg/logutil" 29 "github.com/matrixorigin/matrixone/pkg/objectio" 30 "github.com/matrixorigin/matrixone/pkg/pb/plan" 31 "github.com/matrixorigin/matrixone/pkg/pb/timestamp" 32 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/catalog" 33 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/containers" 34 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/model" 35 ) 36 37 // BlockRead read block data from storage and apply deletes according given timestamp. Caller make sure metaloc is not empty 38 func BlockRead( 39 ctx context.Context, 40 info *pkgcatalog.BlockInfo, 41 columns []string, 42 colIdxs []uint16, 43 colTypes []types.Type, 44 colNulls []bool, 45 tableDef *plan.TableDef, 46 ts timestamp.Timestamp, 47 fs fileservice.FileService, 48 pool *mpool.MPool) (*batch.Batch, error) { 49 50 // read 51 columnBatch, err := BlockReadInner( 52 ctx, info, len(tableDef.Cols), /*including rowid*/ 53 columns, colIdxs, colTypes, colNulls, 54 types.TimestampToTS(ts), fs, pool, 55 ) 56 if err != nil { 57 return nil, err 58 } 59 60 bat := batch.NewWithSize(len(columns)) 61 bat.Attrs = columns 62 for i, vec := range columnBatch.Vecs { 63 // If the vector uses mpool to allocate memory internally, 64 // it needs to be free here 65 if vec.Allocated() > 0 { 66 bat.Vecs[i] = containers.CopyToMoVec(vec) 67 } else { 68 bat.Vecs[i] = containers.UnmarshalToMoVec(vec) 69 } 70 vec.Close() 71 } 72 bat.SetZs(bat.Vecs[0].Length(), pool) 73 74 return bat, nil 75 } 76 77 func BlockReadInner( 78 ctx context.Context, 79 info *pkgcatalog.BlockInfo, 80 schemaColCnt int, 81 colNames []string, 82 colIdxs []uint16, 83 colTyps []types.Type, 84 colNulls []bool, 85 ts types.TS, 86 fs fileservice.FileService, 87 pool *mpool.MPool) (*containers.Batch, error) { 88 columnBatch, err := readColumnBatchByMetaloc( 89 ctx, info, ts, schemaColCnt, 90 colNames, colIdxs, colTyps, colNulls, 91 fs, pool, 92 ) 93 if err != nil { 94 return nil, err 95 } 96 if info.DeltaLoc != "" { 97 deleteBatch, err := readDeleteBatchByDeltaloc(ctx, info.DeltaLoc, fs) 98 if err != nil { 99 return nil, err 100 } 101 recordDeletes(columnBatch, deleteBatch, ts) 102 logutil.Infof( 103 "blockread %d read delete %d: base %s filter out %v\n", 104 info.BlockID, deleteBatch.Length(), ts.ToString(), columnBatch.DeleteCnt()) 105 deleteBatch.Close() 106 } 107 // remove rows from columns 108 if columnBatch.Deletes != nil { 109 for _, col := range columnBatch.Vecs { 110 col.Compact(columnBatch.Deletes) 111 } 112 } 113 return columnBatch, nil 114 } 115 116 func readColumnBatchByMetaloc( 117 ctx context.Context, 118 info *pkgcatalog.BlockInfo, 119 ts types.TS, 120 schemaColCnt int, 121 colNames []string, 122 colIdxs []uint16, 123 colTyps []types.Type, 124 colNulls []bool, 125 fs fileservice.FileService, 126 pool *mpool.MPool) (*containers.Batch, error) { 127 var bat *containers.Batch 128 var err error 129 name, extent, rows := DecodeMetaLoc(info.MetaLoc) 130 idxsWithouRowid := make([]uint16, 0, len(colIdxs)) 131 var rowidData containers.Vector 132 // sift rowid column 133 for i, typ := range colTyps { 134 if typ.Oid == types.T_Rowid { 135 // generate rowid data 136 prefix := model.EncodeBlockKeyPrefix(info.SegmentID, info.BlockID) 137 rowidData, err = model.PreparePhyAddrDataWithPool( 138 types.T_Rowid.ToType(), 139 prefix, 140 0, 141 rows, 142 pool, 143 ) 144 if err != nil { 145 return nil, err 146 } 147 defer func() { 148 if err != nil { 149 rowidData.Close() 150 } 151 }() 152 } else { 153 idxsWithouRowid = append(idxsWithouRowid, colIdxs[i]) 154 } 155 } 156 157 bat = containers.NewBatch() 158 defer func() { 159 if err != nil { 160 bat.Close() 161 } 162 }() 163 164 // only read rowid column on non appendable block, return early 165 if len(idxsWithouRowid) == 0 && !info.EntryState { 166 for _, name := range colNames { 167 bat.AddVector(name, rowidData) 168 } 169 return bat, nil 170 } 171 172 if info.EntryState { // appendable block should be filtered by committs 173 idxsWithouRowid = append(idxsWithouRowid, uint16(schemaColCnt)) // committs 174 idxsWithouRowid = append(idxsWithouRowid, uint16(schemaColCnt+1)) // aborted 175 } 176 177 // raed s3 178 reader, err := objectio.NewObjectReader(name, fs) 179 if err != nil { 180 return nil, err 181 } 182 183 ioResult, err := reader.Read(ctx, extent, idxsWithouRowid, nil) 184 if err != nil { 185 return nil, err 186 } 187 188 entry := ioResult.Entries 189 for i, typ := range colTyps { 190 if typ.Oid == types.T_Rowid { 191 bat.AddVector(colNames[i], rowidData) 192 } else { 193 vec := vector.New(colTyps[i]) 194 data := make([]byte, len(entry[0].Object.([]byte))) 195 copy(data, entry[0].Object.([]byte)) 196 err := vec.Read(data) 197 if err != nil { 198 return nil, err 199 } 200 bat.AddVector(colNames[i], containers.NewVectorWithSharedMemory(vec, colNulls[i])) 201 entry = entry[1:] 202 } 203 } 204 205 // generate filter map 206 if info.EntryState { 207 t0 := time.Now() 208 v1 := vector.New(types.T_TS.ToType()) 209 err := v1.Read(entry[0].Object.([]byte)) 210 if err != nil { 211 return nil, err 212 } 213 commits := containers.NewVectorWithSharedMemory(v1, false) 214 defer commits.Close() 215 v2 := vector.New(types.T_bool.ToType()) 216 err = v2.Read(entry[1].Object.([]byte)) 217 if err != nil { 218 return nil, err 219 } 220 abort := containers.NewVectorWithSharedMemory(v2, false) 221 defer abort.Close() 222 for i := 0; i < commits.Length(); i++ { 223 if abort.Get(i).(bool) || commits.Get(i).(types.TS).Greater(ts) { 224 if bat.Deletes == nil { 225 bat.Deletes = roaring.NewBitmap() 226 } 227 bat.Deletes.Add(uint32(i)) 228 } 229 } 230 logutil.Infof( 231 "blockread %d scan filter cost %v: base %s filter out %v\n ", 232 info.BlockID, time.Since(t0), ts.ToString(), bat.DeleteCnt()) 233 } 234 235 return bat, nil 236 } 237 238 func readDeleteBatchByDeltaloc(ctx context.Context, deltaloc string, fs fileservice.FileService) (*containers.Batch, error) { 239 bat := containers.NewBatch() 240 colNames := []string{catalog.PhyAddrColumnName, catalog.AttrCommitTs, catalog.AttrAborted} 241 colTypes := []types.Type{types.T_Rowid.ToType(), types.T_TS.ToType(), types.T_bool.ToType()} 242 243 name, extent, _ := DecodeMetaLoc(deltaloc) 244 reader, err := objectio.NewObjectReader(name, fs) 245 if err != nil { 246 return nil, err 247 } 248 ioResult, err := reader.Read(ctx, extent, []uint16{0, 1, 2}, nil) 249 if err != nil { 250 return nil, err 251 } 252 for i, entry := range ioResult.Entries { 253 vec := vector.New(colTypes[i]) 254 data := make([]byte, len(entry.Object.([]byte))) 255 copy(data, entry.Object.([]byte)) 256 err := vec.Read(data) 257 if err != nil { 258 return nil, err 259 } 260 bat.AddVector(colNames[i], containers.NewVectorWithSharedMemory(vec, false)) 261 } 262 return bat, nil 263 } 264 265 func recordDeletes(columnBatch *containers.Batch, deleteBatch *containers.Batch, ts types.TS) { 266 if deleteBatch == nil { 267 return 268 } 269 270 // record visible delete rows 271 for i := 0; i < deleteBatch.Length(); i++ { 272 abort := deleteBatch.GetVectorByName(catalog.AttrAborted).Get(i).(bool) 273 if abort { 274 continue 275 } 276 commitTS := deleteBatch.GetVectorByName(catalog.AttrCommitTs).Get(i).(types.TS) 277 if commitTS.Greater(ts) { 278 continue 279 } 280 rowid := deleteBatch.GetVectorByName(catalog.PhyAddrColumnName).Get(i).(types.Rowid) 281 _, _, row := model.DecodePhyAddrKey(rowid) 282 if columnBatch.Deletes == nil { 283 columnBatch.Deletes = roaring.NewBitmap() 284 } 285 columnBatch.Deletes.Add(row) 286 } 287 }