github.com/matrixorigin/matrixone@v1.2.0/pkg/container/batch/batch.go (about) 1 // Copyright 2021 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package batch 16 17 import ( 18 "bytes" 19 "context" 20 "fmt" 21 "github.com/matrixorigin/matrixone/pkg/sql/colexec/aggexec" 22 "sync/atomic" 23 24 "github.com/matrixorigin/matrixone/pkg/common/hashmap" 25 "github.com/matrixorigin/matrixone/pkg/common/moerr" 26 "github.com/matrixorigin/matrixone/pkg/common/mpool" 27 "github.com/matrixorigin/matrixone/pkg/container/types" 28 "github.com/matrixorigin/matrixone/pkg/container/vector" 29 "github.com/matrixorigin/matrixone/pkg/logutil" 30 ) 31 32 func New(ro bool, attrs []string) *Batch { 33 return &Batch{ 34 Ro: ro, 35 Cnt: 1, 36 Attrs: attrs, 37 Vecs: make([]*vector.Vector, len(attrs)), 38 rowCount: 0, 39 } 40 } 41 42 func NewWithSize(n int) *Batch { 43 return &Batch{ 44 Cnt: 1, 45 Vecs: make([]*vector.Vector, n), 46 rowCount: 0, 47 } 48 } 49 50 func SetLength(bat *Batch, n int) { 51 for _, vec := range bat.Vecs { 52 vec.SetLength(n) 53 } 54 bat.rowCount = n 55 } 56 57 func (bat *Batch) MarshalBinary() ([]byte, error) { 58 aggInfos := make([][]byte, len(bat.Aggs)) 59 for i, exec := range bat.Aggs { 60 data, err := aggexec.MarshalAggFuncExec(exec) 61 if err != nil { 62 return nil, err 63 } 64 aggInfos[i] = data 65 } 66 67 return types.Encode(&EncodeBatch{ 68 rowCount: int64(bat.rowCount), 69 Vecs: bat.Vecs, 70 Attrs: bat.Attrs, 71 AggInfos: aggInfos, 72 Recursive: bat.Recursive, 73 }) 74 } 75 76 func (bat *Batch) UnmarshalBinary(data []byte) (err error) { 77 return bat.unmarshalBinaryWithAnyMp(data, nil) 78 } 79 80 func (bat *Batch) UnmarshalBinaryWithCopy(data []byte, mp *mpool.MPool) error { 81 return bat.unmarshalBinaryWithAnyMp(data, mp) 82 } 83 84 func (bat *Batch) unmarshalBinaryWithAnyMp(data []byte, mp *mpool.MPool) (err error) { 85 rbat := new(EncodeBatch) 86 if err = rbat.UnmarshalBinaryWithCopy(data, mp); err != nil { 87 return err 88 } 89 90 bat.Recursive = rbat.Recursive 91 bat.Cnt = 1 92 bat.rowCount = int(rbat.rowCount) 93 bat.Vecs = rbat.Vecs 94 bat.Attrs = append(bat.Attrs, rbat.Attrs...) 95 96 if len(rbat.AggInfos) > 0 { 97 bat.Aggs = make([]aggexec.AggFuncExec, len(rbat.AggInfos)) 98 var aggMemoryManager aggexec.AggMemoryManager = nil 99 if mp != nil { 100 aggMemoryManager = aggexec.NewSimpleAggMemoryManager(mp) 101 } 102 103 for i, info := range rbat.AggInfos { 104 if bat.Aggs[i], err = aggexec.UnmarshalAggFuncExec(aggMemoryManager, info); err != nil { 105 return err 106 } 107 } 108 } 109 return nil 110 } 111 112 func (bat *Batch) Shrink(sels []int64, negate bool) { 113 if !negate { 114 if len(sels) == bat.rowCount { 115 return 116 } 117 } 118 for _, vec := range bat.Vecs { 119 vec.Shrink(sels, negate) 120 } 121 if negate { 122 bat.rowCount -= len(sels) 123 return 124 } 125 bat.rowCount = len(sels) 126 } 127 128 func (bat *Batch) Shuffle(sels []int64, m *mpool.MPool) error { 129 if len(sels) > 0 { 130 mp := make(map[*vector.Vector]uint8) 131 for _, vec := range bat.Vecs { 132 if _, ok := mp[vec]; ok { 133 continue 134 } 135 mp[vec]++ 136 if err := vec.Shuffle(sels, m); err != nil { 137 return err 138 } 139 } 140 bat.rowCount = len(sels) 141 } 142 return nil 143 } 144 145 func (bat *Batch) Size() int { 146 var size int 147 148 for _, vec := range bat.Vecs { 149 size += vec.Size() 150 } 151 return size 152 } 153 154 func (bat *Batch) RowCount() int { 155 return bat.rowCount 156 } 157 158 func (bat *Batch) VectorCount() int { 159 return len(bat.Vecs) 160 } 161 162 func (bat *Batch) Prefetch(poses []int32, vecs []*vector.Vector) { 163 for i, pos := range poses { 164 vecs[i] = bat.GetVector(pos) 165 } 166 } 167 168 func (bat *Batch) SetAttributes(attrs []string) { 169 bat.Attrs = attrs 170 } 171 172 func (bat *Batch) SetVector(pos int32, vec *vector.Vector) { 173 bat.Vecs[pos] = vec 174 } 175 176 func (bat *Batch) GetVector(pos int32) *vector.Vector { 177 return bat.Vecs[pos] 178 } 179 180 func (bat *Batch) GetSubBatch(cols []string) *Batch { 181 mp := make(map[string]int) 182 for i, attr := range bat.Attrs { 183 mp[attr] = i 184 } 185 rbat := NewWithSize(len(cols)) 186 for i, col := range cols { 187 rbat.Vecs[i] = bat.Vecs[mp[col]] 188 } 189 rbat.rowCount = bat.rowCount 190 return rbat 191 } 192 193 func (bat *Batch) Clean(m *mpool.MPool) { 194 if bat == EmptyBatch { 195 return 196 } 197 if atomic.LoadInt64(&bat.Cnt) == 0 { 198 // panic("batch is already cleaned") 199 return 200 } 201 if atomic.AddInt64(&bat.Cnt, -1) > 0 { 202 return 203 } 204 for _, vec := range bat.Vecs { 205 if vec != nil { 206 vec.Free(m) 207 } 208 } 209 for _, agg := range bat.Aggs { 210 if agg != nil { 211 agg.Free() 212 } 213 } 214 bat.Attrs = nil 215 bat.rowCount = 0 216 bat.Vecs = nil 217 } 218 219 func (bat *Batch) Last() bool { 220 return bat.Recursive > 0 221 } 222 223 func (bat *Batch) SetEnd() { 224 bat.Recursive = 2 225 } 226 227 func (bat *Batch) SetLast() { 228 bat.Recursive = 1 229 } 230 231 func (bat *Batch) End() bool { 232 return bat.Recursive == 2 233 } 234 235 func (bat *Batch) CleanOnlyData() { 236 for _, vec := range bat.Vecs { 237 if vec != nil { 238 vec.CleanOnlyData() 239 } 240 } 241 bat.rowCount = 0 242 } 243 244 func (bat *Batch) String() string { 245 var buf bytes.Buffer 246 247 for i, vec := range bat.Vecs { 248 buf.WriteString(fmt.Sprintf("%d : %s\n", i, vec.String())) 249 } 250 return buf.String() 251 } 252 253 func (bat *Batch) Log(tag string) { 254 if bat == nil || bat.rowCount < 1 { 255 return 256 } 257 logutil.Infof("\n" + tag + "\n" + bat.String()) 258 } 259 260 func (bat *Batch) Dup(mp *mpool.MPool) (*Batch, error) { 261 var err error 262 263 rbat := NewWithSize(len(bat.Vecs)) 264 rbat.SetAttributes(bat.Attrs) 265 rbat.Recursive = bat.Recursive 266 for j, vec := range bat.Vecs { 267 typ := *bat.GetVector(int32(j)).GetType() 268 rvec := vector.NewVec(typ) 269 if err = vector.GetUnionAllFunction(typ, mp)(rvec, vec); err != nil { 270 rbat.Clean(mp) 271 return nil, err 272 } 273 rbat.SetVector(int32(j), rvec) 274 } 275 rbat.rowCount = bat.rowCount 276 277 //if len(bat.Aggs) > 0 { 278 // rbat.Aggs = make([]aggexec.AggFuncExec, len(bat.Aggs)) 279 // aggMemoryManager := aggexec.NewSimpleAggMemoryManager(mp) 280 // 281 // for i, agg := range bat.Aggs { 282 // rbat.Aggs[i], err = aggexec.CopyAggFuncExec(aggMemoryManager, agg) 283 // if err != nil { 284 // rbat.Clean(mp) 285 // return nil, err 286 // } 287 // } 288 //} 289 // if bat.AuxData != nil { 290 // if m, ok := bat.AuxData.(*hashmap.JoinMap); ok { 291 // rbat.AuxData = &hashmap.JoinMap{ 292 // cnt: m 293 // } 294 // } 295 // } 296 return rbat, nil 297 } 298 299 func (bat *Batch) PreExtend(m *mpool.MPool, rows int) error { 300 for i := range bat.Vecs { 301 if err := bat.Vecs[i].PreExtend(rows, m); err != nil { 302 return err 303 } 304 } 305 return nil 306 } 307 308 func (bat *Batch) AppendWithCopy(ctx context.Context, mh *mpool.MPool, b *Batch) (*Batch, error) { 309 if bat == nil { 310 return b.Dup(mh) 311 } 312 if len(bat.Vecs) != len(b.Vecs) { 313 return nil, moerr.NewInternalError(ctx, "unexpected error happens in batch append") 314 } 315 if len(bat.Vecs) == 0 { 316 return bat, nil 317 } 318 319 for i := range bat.Vecs { 320 if err := bat.Vecs[i].UnionBatch(b.Vecs[i], 0, b.Vecs[i].Length(), nil, mh); err != nil { 321 return bat, err 322 } 323 bat.Vecs[i].SetSorted(false) 324 } 325 bat.rowCount += b.rowCount 326 return bat, nil 327 } 328 329 func (bat *Batch) Append(ctx context.Context, mh *mpool.MPool, b *Batch) (*Batch, error) { 330 if bat == nil { 331 return b, nil 332 } 333 if len(bat.Vecs) != len(b.Vecs) { 334 return nil, moerr.NewInternalError(ctx, "unexpected error happens in batch append") 335 } 336 if len(bat.Vecs) == 0 { 337 return bat, nil 338 } 339 340 for i := range bat.Vecs { 341 if err := bat.Vecs[i].UnionBatch(b.Vecs[i], 0, b.Vecs[i].Length(), nil, mh); err != nil { 342 return bat, err 343 } 344 bat.Vecs[i].SetSorted(false) 345 } 346 bat.rowCount += b.rowCount 347 return bat, nil 348 } 349 350 func (bat *Batch) AddRowCount(rowCount int) { 351 bat.rowCount += rowCount 352 } 353 354 func (bat *Batch) SetRowCount(rowCount int) { 355 bat.rowCount = rowCount 356 } 357 358 func (bat *Batch) AddCnt(cnt int) { 359 atomic.AddInt64(&bat.Cnt, int64(cnt)) 360 } 361 362 // func (bat *Batch) SubCnt(cnt int) { 363 // atomic.StoreInt64(&bat.Cnt, bat.Cnt-int64(cnt)) 364 // } 365 366 func (bat *Batch) SetCnt(cnt int64) { 367 atomic.StoreInt64(&bat.Cnt, cnt) 368 } 369 370 func (bat *Batch) GetCnt() int64 { 371 return atomic.LoadInt64(&bat.Cnt) 372 } 373 374 func (bat *Batch) ReplaceVector(oldVec *vector.Vector, newVec *vector.Vector) { 375 for i, vec := range bat.Vecs { 376 if vec == oldVec { 377 bat.SetVector(int32(i), newVec) 378 } 379 } 380 } 381 382 func (bat *Batch) IsEmpty() bool { 383 return bat.rowCount == 0 && bat.AuxData == nil && len(bat.Aggs) == 0 384 } 385 386 func (bat *Batch) DupJmAuxData() (ret *hashmap.JoinMap) { 387 if bat.AuxData == nil { 388 return 389 } 390 jm := bat.AuxData.(*hashmap.JoinMap) 391 if jm.IsDup() { 392 ret = jm.Dup() 393 } else { 394 ret = jm 395 bat.AuxData = nil 396 } 397 return 398 }