github.com/matrixorigin/matrixone@v1.2.0/pkg/objectio/funcs.go (about) 1 // Copyright 2021 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package objectio 16 17 import ( 18 "bytes" 19 "context" 20 "fmt" 21 22 "github.com/matrixorigin/matrixone/pkg/container/batch" 23 "github.com/matrixorigin/matrixone/pkg/container/vector" 24 25 "github.com/matrixorigin/matrixone/pkg/common/mpool" 26 "github.com/matrixorigin/matrixone/pkg/container/types" 27 "github.com/matrixorigin/matrixone/pkg/fileservice" 28 "github.com/matrixorigin/matrixone/pkg/logutil" 29 ) 30 31 func ReleaseIOEntry(entry *fileservice.IOEntry) { 32 entry.CachedData.Release() 33 } 34 35 func ReleaseIOVector(vector *fileservice.IOVector) { 36 vector.Release() 37 } 38 39 func ReadExtent( 40 ctx context.Context, 41 name string, 42 extent *Extent, 43 policy fileservice.Policy, 44 fs fileservice.FileService, 45 factory CacheConstructorFactory, 46 ) (buf []byte, err error) { 47 ioVec := &fileservice.IOVector{ 48 FilePath: name, 49 Entries: make([]fileservice.IOEntry, 1), 50 Policy: policy, 51 } 52 53 ioVec.Entries[0] = fileservice.IOEntry{ 54 Offset: int64(extent.Offset()), 55 Size: int64(extent.Length()), 56 ToCacheData: factory(int64(extent.OriginSize()), extent.Alg()), 57 } 58 if err = fs.Read(ctx, ioVec); err != nil { 59 return 60 } 61 //TODO when to call ioVec.Release? 62 v := ioVec.Entries[0].CachedData.Bytes() 63 buf = make([]byte, len(v)) 64 copy(buf, v) 65 ReleaseIOEntry(&ioVec.Entries[0]) 66 return 67 } 68 69 func ReadBloomFilter( 70 ctx context.Context, 71 name string, 72 extent *Extent, 73 policy fileservice.Policy, 74 fs fileservice.FileService, 75 ) (filters BloomFilter, err error) { 76 var v []byte 77 if v, err = ReadExtent( 78 ctx, 79 name, 80 extent, 81 policy, 82 fs, 83 constructorFactory); err != nil { 84 return 85 } 86 87 var obj any 88 obj, err = Decode(v) 89 if err != nil { 90 return 91 } 92 93 filters = obj.([]byte) 94 return 95 } 96 97 func ReadObjectMeta( 98 ctx context.Context, 99 name string, 100 extent *Extent, 101 policy fileservice.Policy, 102 fs fileservice.FileService, 103 ) (meta ObjectMeta, err error) { 104 var v []byte 105 if v, err = ReadExtent(ctx, name, extent, policy, fs, constructorFactory); err != nil { 106 return 107 } 108 109 var obj any 110 obj, err = Decode(v) 111 if err != nil { 112 return 113 } 114 115 meta = obj.(ObjectMeta) 116 return 117 } 118 119 func ReadOneBlock( 120 ctx context.Context, 121 meta *ObjectDataMeta, 122 name string, 123 blk uint16, 124 seqnums []uint16, 125 typs []types.Type, 126 m *mpool.MPool, 127 fs fileservice.FileService, 128 policy fileservice.Policy, 129 ) (ioVec *fileservice.IOVector, err error) { 130 return ReadOneBlockWithMeta(ctx, meta, name, blk, seqnums, typs, m, fs, constructorFactory, policy) 131 } 132 133 func ReadOneBlockWithMeta( 134 ctx context.Context, 135 meta *ObjectDataMeta, 136 name string, 137 blk uint16, 138 seqnums []uint16, 139 typs []types.Type, 140 m *mpool.MPool, 141 fs fileservice.FileService, 142 factory CacheConstructorFactory, 143 policy fileservice.Policy, 144 ) (ioVec *fileservice.IOVector, err error) { 145 ioVec = &fileservice.IOVector{ 146 FilePath: name, 147 Entries: make([]fileservice.IOEntry, 0), 148 Policy: policy, 149 } 150 151 var filledEntries []fileservice.IOEntry 152 blkmeta := meta.GetBlockMeta(uint32(blk)) 153 maxSeqnum := blkmeta.GetMaxSeqnum() 154 for i, seqnum := range seqnums { 155 // special columns 156 if seqnum >= SEQNUM_UPPER { 157 metaColCnt := blkmeta.GetMetaColumnCount() 158 // read appendable block file, the last columns is commits and abort 159 if seqnum == SEQNUM_COMMITTS { 160 seqnum = metaColCnt - 2 161 } else if seqnum == SEQNUM_ABORT { 162 seqnum = metaColCnt - 1 163 } else { 164 panic(fmt.Sprintf("bad path to read special column %d", seqnum)) 165 } 166 col := blkmeta.ColumnMeta(seqnum) 167 ext := col.Location() 168 ioVec.Entries = append(ioVec.Entries, fileservice.IOEntry{ 169 Offset: int64(ext.Offset()), 170 Size: int64(ext.Length()), 171 ToCacheData: factory(int64(ext.OriginSize()), ext.Alg()), 172 }) 173 continue 174 } 175 176 // need fill vector 177 if seqnum > maxSeqnum || blkmeta.ColumnMeta(seqnum).DataType() == 0 { 178 if filledEntries == nil { 179 filledEntries = make([]fileservice.IOEntry, len(seqnums)) 180 } 181 filledEntries[i] = fileservice.IOEntry{ 182 Size: int64(seqnum), // a marker, it can not be zero 183 } 184 continue 185 } 186 187 // read written normal column 188 col := blkmeta.ColumnMeta(seqnum) 189 ext := col.Location() 190 ioVec.Entries = append(ioVec.Entries, fileservice.IOEntry{ 191 Offset: int64(ext.Offset()), 192 Size: int64(ext.Length()), 193 ToCacheData: factory(int64(ext.OriginSize()), ext.Alg()), 194 }) 195 } 196 if len(ioVec.Entries) > 0 { 197 err = fs.Read(ctx, ioVec) 198 if err != nil { 199 return 200 } 201 //TODO when to call ioVec.Release? 202 } 203 204 // need to generate vector 205 if filledEntries != nil { 206 if len(typs) == 0 { 207 panic(fmt.Sprintf("block %s generate need typs", meta.BlockHeader().BlockID().String())) 208 } 209 length := int(blkmeta.GetRows()) 210 readed := ioVec.Entries 211 for i := range filledEntries { 212 if filledEntries[i].Size == 0 { // we can tell it is the placeholder for the readed column 213 filledEntries[i] = readed[0] 214 readed = readed[1:] 215 } else { 216 logutil.Infof("block %s generate seqnum %d %v", 217 meta.BlockHeader().BlockID().String(), filledEntries[i].Size, typs[i]) 218 buf := &bytes.Buffer{} 219 buf.Write(EncodeIOEntryHeader(&IOEntryHeader{Type: IOET_ColData, Version: IOET_ColumnData_CurrVer})) 220 if err = vector.NewConstNull(typs[i], length, m).MarshalBinaryWithBuffer(buf); err != nil { 221 return 222 } 223 cacheData := fileservice.DefaultCacheDataAllocator.Alloc(buf.Len()) 224 copy(cacheData.Bytes(), buf.Bytes()) 225 filledEntries[i].CachedData = cacheData 226 } 227 } 228 ioVec.Entries = filledEntries 229 } 230 231 return 232 } 233 234 func ReadMultiBlocksWithMeta( 235 ctx context.Context, 236 name string, 237 meta ObjectMeta, 238 options map[uint16]*ReadBlockOptions, 239 fs fileservice.FileService, 240 factory CacheConstructorFactory, 241 ) (ioVec *fileservice.IOVector, err error) { 242 ioVec = &fileservice.IOVector{ 243 FilePath: name, 244 Entries: make([]fileservice.IOEntry, 0), 245 } 246 var dataMeta ObjectDataMeta 247 for _, opt := range options { 248 for seqnum := range opt.Idxes { 249 if DataMetaType(opt.DataType) == SchemaData { 250 dataMeta = meta.MustDataMeta() 251 } else if DataMetaType(opt.DataType) == SchemaTombstone { 252 dataMeta = meta.MustTombstoneMeta() 253 } else { 254 dataMeta, _ = meta.SubMeta(ConvertToCkpIdx(opt.DataType)) 255 } 256 blkmeta := dataMeta.GetBlockMeta(uint32(opt.Id)) 257 if seqnum > blkmeta.GetMaxSeqnum() || blkmeta.ColumnMeta(seqnum).DataType() == 0 { 258 // prefetch, do not generate 259 continue 260 } 261 col := blkmeta.ColumnMeta(seqnum) 262 ioVec.Entries = append(ioVec.Entries, fileservice.IOEntry{ 263 Offset: int64(col.Location().Offset()), 264 Size: int64(col.Location().Length()), 265 266 ToCacheData: factory(int64(col.Location().OriginSize()), col.Location().Alg()), 267 }) 268 } 269 } 270 271 err = fs.Read(ctx, ioVec) 272 //TODO when to call ioVec.Release? 273 return 274 } 275 276 func ReadAllBlocksWithMeta( 277 ctx context.Context, 278 meta *ObjectDataMeta, 279 name string, 280 cols []uint16, 281 policy fileservice.Policy, 282 m *mpool.MPool, 283 fs fileservice.FileService, 284 factory CacheConstructorFactory, 285 ) (ioVec *fileservice.IOVector, err error) { 286 ioVec = &fileservice.IOVector{ 287 FilePath: name, 288 Entries: make([]fileservice.IOEntry, 0, len(cols)*int(meta.BlockCount())), 289 Policy: policy, 290 } 291 for blk := uint32(0); blk < meta.BlockCount(); blk++ { 292 for _, seqnum := range cols { 293 blkmeta := meta.GetBlockMeta(blk) 294 if seqnum > blkmeta.GetMaxSeqnum() || blkmeta.ColumnMeta(seqnum).DataType() == 0 { 295 // prefetch, do not generate 296 panic("ReadAllBlocksWithMeta expect no schema changes") 297 } 298 col := blkmeta.ColumnMeta(seqnum) 299 ext := col.Location() 300 ioVec.Entries = append(ioVec.Entries, fileservice.IOEntry{ 301 Offset: int64(ext.Offset()), 302 Size: int64(ext.Length()), 303 304 ToCacheData: factory(int64(ext.OriginSize()), ext.Alg()), 305 }) 306 } 307 } 308 309 err = fs.Read(ctx, ioVec) 310 //TODO when to call ioVec.Release? 311 return 312 } 313 314 func ReadOneBlockAllColumns( 315 ctx context.Context, 316 meta *ObjectDataMeta, 317 name string, 318 id uint32, 319 cols []uint16, 320 cachePolicy fileservice.Policy, 321 fs fileservice.FileService, 322 ) (bat *batch.Batch, err error) { 323 ioVec := &fileservice.IOVector{ 324 FilePath: name, 325 Entries: make([]fileservice.IOEntry, 0), 326 Policy: cachePolicy, 327 } 328 for _, seqnum := range cols { 329 blkmeta := meta.GetBlockMeta(id) 330 col := blkmeta.ColumnMeta(seqnum) 331 ext := col.Location() 332 ioVec.Entries = append(ioVec.Entries, fileservice.IOEntry{ 333 Offset: int64(ext.Offset()), 334 Size: int64(ext.Length()), 335 336 ToCacheData: constructorFactory(int64(ext.OriginSize()), ext.Alg()), 337 }) 338 } 339 340 err = fs.Read(ctx, ioVec) 341 //TODO when to call ioVec.Release? 342 bat = batch.NewWithSize(len(cols)) 343 var obj any 344 for i := range cols { 345 obj, err = Decode(ioVec.Entries[i].CachedData.Bytes()) 346 if err != nil { 347 return nil, err 348 } 349 bat.Vecs[i] = obj.(*vector.Vector) 350 bat.SetRowCount(bat.Vecs[i].Length()) 351 } 352 return 353 }