github.com/matrixorigin/matrixone@v0.7.0/pkg/vm/engine/tae/containers/batch.go (about) 1 // Copyright 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package containers 16 17 import ( 18 "bytes" 19 "fmt" 20 "io" 21 "unsafe" 22 23 "github.com/RoaringBitmap/roaring" 24 "github.com/matrixorigin/matrixone/pkg/common/moerr" 25 "github.com/matrixorigin/matrixone/pkg/common/mpool" 26 "github.com/matrixorigin/matrixone/pkg/container/types" 27 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/common" 28 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/stl/containers" 29 ) 30 31 func NewBatch() *Batch { 32 return &Batch{ 33 Attrs: make([]string, 0), 34 nameidx: make(map[string]int), 35 Vecs: make([]Vector, 0), 36 } 37 } 38 39 func (bat *Batch) AddVector(attr string, vec Vector) { 40 if _, exist := bat.nameidx[attr]; exist { 41 panic(moerr.NewInternalErrorNoCtx("duplicate vector %s", attr)) 42 } 43 idx := len(bat.Vecs) 44 bat.nameidx[attr] = idx 45 bat.Attrs = append(bat.Attrs, attr) 46 bat.Vecs = append(bat.Vecs, vec) 47 } 48 49 func (bat *Batch) GetVectorByName(name string) Vector { 50 pos := bat.nameidx[name] 51 return bat.Vecs[pos] 52 } 53 54 func (bat *Batch) RangeDelete(start, end int) { 55 if bat.Deletes == nil { 56 bat.Deletes = roaring.New() 57 } 58 bat.Deletes.AddRange(uint64(start), uint64(end)) 59 } 60 61 func (bat *Batch) Delete(i int) { 62 if bat.Deletes == nil { 63 bat.Deletes = roaring.BitmapOf(uint32(i)) 64 } else { 65 bat.Deletes.Add(uint32(i)) 66 } 67 } 68 69 func (bat *Batch) HasDelete() bool { 70 return bat.Deletes != nil && !bat.Deletes.IsEmpty() 71 } 72 73 func (bat *Batch) IsDeleted(i int) bool { 74 if !bat.HasDelete() { 75 return false 76 } 77 return bat.Deletes.ContainsInt(i) 78 } 79 80 func (bat *Batch) DeleteCnt() int { 81 if !bat.HasDelete() { 82 return 0 83 } 84 return int(bat.Deletes.GetCardinality()) 85 } 86 87 func (bat *Batch) Compact() { 88 if !bat.HasDelete() { 89 return 90 } 91 for _, vec := range bat.Vecs { 92 vec.Compact(bat.Deletes) 93 } 94 bat.Deletes = nil 95 } 96 97 func (bat *Batch) Length() int { 98 return bat.Vecs[0].Length() 99 } 100 101 func (bat *Batch) Capacity() int { 102 return bat.Vecs[0].Capacity() 103 } 104 105 func (bat *Batch) Allocated() int { 106 allocated := 0 107 for _, vec := range bat.Vecs { 108 allocated += vec.Allocated() 109 } 110 return allocated 111 } 112 113 func (bat *Batch) Window(offset, length int) *Batch { 114 win := new(Batch) 115 win.Attrs = bat.Attrs 116 win.nameidx = bat.nameidx 117 if bat.Deletes != nil && offset+length != bat.Length() { 118 win.Deletes = common.BM32Window(bat.Deletes, offset, offset+length) 119 } else { 120 win.Deletes = bat.Deletes 121 } 122 win.Vecs = make([]Vector, len(bat.Vecs)) 123 for i := range win.Vecs { 124 win.Vecs[i] = bat.Vecs[i].Window(offset, length) 125 } 126 return win 127 } 128 129 func (bat *Batch) CloneWindow(offset, length int, allocator ...*mpool.MPool) (cloned *Batch) { 130 cloned = new(Batch) 131 cloned.Attrs = make([]string, len(bat.Attrs)) 132 copy(cloned.Attrs, bat.Attrs) 133 cloned.nameidx = make(map[string]int, len(bat.nameidx)) 134 for k, v := range bat.nameidx { 135 cloned.nameidx[k] = v 136 } 137 if bat.Deletes != nil { 138 cloned.Deletes = common.BM32Window(bat.Deletes, offset, offset+length) 139 } 140 cloned.Vecs = make([]Vector, len(bat.Vecs)) 141 for i := range cloned.Vecs { 142 cloned.Vecs[i] = bat.Vecs[i].CloneWindow(offset, length, allocator...) 143 } 144 return 145 } 146 147 func (bat *Batch) String() string { 148 return bat.PPString(10) 149 } 150 151 func (bat *Batch) PPString(num int) string { 152 var w bytes.Buffer 153 for i, vec := range bat.Vecs { 154 _, _ = w.WriteString(fmt.Sprintf("[Name=%s]", bat.Attrs[i])) 155 _, _ = w.WriteString(vec.PPString(num)) 156 _ = w.WriteByte('\n') 157 } 158 return w.String() 159 } 160 161 func (bat *Batch) Close() { 162 for _, vec := range bat.Vecs { 163 vec.Close() 164 } 165 } 166 167 func (bat *Batch) Equals(o *Batch) bool { 168 if bat.Length() != o.Length() { 169 return false 170 } 171 if bat.DeleteCnt() != o.DeleteCnt() { 172 return false 173 } 174 if bat.HasDelete() { 175 if !bat.Deletes.Equals(o.Deletes) { 176 return false 177 } 178 } 179 for i := range bat.Vecs { 180 if bat.Attrs[i] != o.Attrs[i] { 181 return false 182 } 183 if !bat.Vecs[i].Equals(o.Vecs[i]) { 184 return false 185 } 186 } 187 return true 188 } 189 190 func (bat *Batch) WriteTo(w io.Writer) (n int64, err error) { 191 var nr int 192 var tmpn int64 193 buffer := containers.NewVector[[]byte]() 194 defer buffer.Close() 195 // 1. Vector cnt 196 // if nr, err = w.Write(types.EncodeFixed(uint16(len(bat.Vecs)))); err != nil { 197 // return 198 // } 199 // n += int64(nr) 200 buffer.Append(types.EncodeFixed(uint16(len(bat.Vecs)))) 201 202 // 2. Types and Names 203 for i, vec := range bat.Vecs { 204 buffer.Append([]byte(bat.Attrs[i])) 205 vt := vec.GetType() 206 buffer.Append(types.EncodeType(&vt)) 207 } 208 if tmpn, err = buffer.WriteTo(w); err != nil { 209 return 210 } 211 n += tmpn 212 213 // 3. Vectors 214 for _, vec := range bat.Vecs { 215 if tmpn, err = vec.WriteTo(w); err != nil { 216 return 217 } 218 n += tmpn 219 } 220 // 4. Deletes 221 var buf []byte 222 if bat.Deletes != nil { 223 if buf, err = bat.Deletes.ToBytes(); err != nil { 224 return 225 } 226 } 227 if nr, err = w.Write(types.EncodeFixed(uint32(len(buf)))); err != nil { 228 return 229 } 230 n += int64(nr) 231 if len(buf) == 0 { 232 return 233 } 234 if nr, err = w.Write(buf); err != nil { 235 return 236 } 237 n += int64(nr) 238 239 return 240 } 241 242 func (bat *Batch) ReadFrom(r io.Reader) (n int64, err error) { 243 var tmpn int64 244 buffer := containers.NewVector[[]byte]() 245 defer buffer.Close() 246 if tmpn, err = buffer.ReadFrom(r); err != nil { 247 return 248 } 249 n += tmpn 250 pos := 0 251 buf := buffer.Get(pos) 252 pos++ 253 cnt := types.DecodeFixed[uint16](buf) 254 vecTypes := make([]types.Type, cnt) 255 bat.Attrs = make([]string, cnt) 256 for i := 0; i < int(cnt); i++ { 257 buf = buffer.Get(pos) 258 pos++ 259 bat.Attrs[i] = string(buf) 260 bat.nameidx[bat.Attrs[i]] = i 261 buf = buffer.Get(pos) 262 vecTypes[i] = types.DecodeType(buf) 263 pos++ 264 } 265 for _, vecType := range vecTypes { 266 vec := MakeVector(vecType, true) 267 if tmpn, err = vec.ReadFrom(r); err != nil { 268 return 269 } 270 bat.Vecs = append(bat.Vecs, vec) 271 n += tmpn 272 } 273 // Read Deletes 274 buf = make([]byte, int(unsafe.Sizeof(uint32(0)))) 275 if _, err = r.Read(buf); err != nil { 276 return 277 } 278 n += int64(len(buf)) 279 size := types.DecodeFixed[uint32](buf) 280 if size == 0 { 281 return 282 } 283 bat.Deletes = roaring.New() 284 if tmpn, err = bat.Deletes.ReadFrom(r); err != nil { 285 return 286 } 287 n += tmpn 288 289 return 290 } 291 292 func (bat *Batch) Split(cnt int) []*Batch { 293 if cnt == 1 { 294 return []*Batch{bat} 295 } 296 length := bat.Length() 297 rows := length / cnt 298 if length%cnt == 0 { 299 bats := make([]*Batch, 0, cnt) 300 for i := 0; i < cnt; i++ { 301 newBat := bat.Window(i*rows, rows) 302 bats = append(bats, newBat) 303 } 304 return bats 305 } 306 rowArray := make([]int, 0) 307 if length/cnt == 0 { 308 for i := 0; i < length; i++ { 309 rowArray = append(rowArray, 1) 310 } 311 } else { 312 left := length 313 for i := 0; i < cnt; i++ { 314 if left >= rows && i < cnt-1 { 315 rowArray = append(rowArray, rows) 316 } else { 317 rowArray = append(rowArray, left) 318 } 319 left -= rows 320 } 321 } 322 start := 0 323 bats := make([]*Batch, 0, cnt) 324 for _, row := range rowArray { 325 newBat := bat.Window(start, row) 326 start += row 327 bats = append(bats, newBat) 328 } 329 return bats 330 } 331 332 func (bat *Batch) Append(src *Batch) (err error) { 333 for i, vec := range bat.Vecs { 334 vec.Extend(src.Vecs[i]) 335 } 336 return 337 } 338 339 // extend vector with same name, consume src batch 340 func (bat *Batch) Extend(src *Batch) { 341 for i, vec := range bat.Vecs { 342 attr := bat.Attrs[i] 343 if idx, ok := src.nameidx[attr]; ok { 344 vec.Extend(src.Vecs[idx]) 345 } 346 } 347 src.Close() 348 }