github.com/matrixorigin/matrixone@v0.7.0/pkg/container/batch/batch.go (about) 1 // Copyright 2021 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package batch 16 17 import ( 18 "bytes" 19 "context" 20 "fmt" 21 "sync/atomic" 22 23 "github.com/matrixorigin/matrixone/pkg/sql/colexec/agg" 24 25 "github.com/matrixorigin/matrixone/pkg/common/moerr" 26 "github.com/matrixorigin/matrixone/pkg/common/mpool" 27 "github.com/matrixorigin/matrixone/pkg/container/index" 28 "github.com/matrixorigin/matrixone/pkg/container/types" 29 "github.com/matrixorigin/matrixone/pkg/container/vector" 30 "github.com/matrixorigin/matrixone/pkg/util/fault" 31 "github.com/matrixorigin/matrixone/pkg/vectorize/shuffle" 32 ) 33 34 func New(ro bool, attrs []string) *Batch { 35 return &Batch{ 36 Ro: ro, 37 Attrs: attrs, 38 Vecs: make([]*vector.Vector, len(attrs)), 39 } 40 } 41 42 func Reorder(bat *Batch, attrs []string) { 43 if bat.Ro { 44 Cow(bat) 45 } 46 for i, name := range attrs { 47 for j, attr := range bat.Attrs { 48 if name == attr { 49 bat.Vecs[i], bat.Vecs[j] = bat.Vecs[j], bat.Vecs[i] 50 bat.Attrs[i], bat.Attrs[j] = bat.Attrs[j], bat.Attrs[i] 51 } 52 } 53 } 54 } 55 56 func SetLength(bat *Batch, n int) { 57 for _, vec := range bat.Vecs { 58 vector.SetLength(vec, n) 59 } 60 bat.Zs = bat.Zs[:n] 61 } 62 63 func Length(bat *Batch) int { 64 return len(bat.Zs) 65 } 66 67 func Cow(bat *Batch) { 68 attrs := make([]string, len(bat.Attrs)) 69 copy(attrs, bat.Attrs) 70 bat.Ro = false 71 bat.Attrs = attrs 72 } 73 74 func NewWithSize(n int) *Batch { 75 return &Batch{ 76 Cnt: 1, 77 Vecs: make([]*vector.Vector, n), 78 } 79 } 80 81 func (info *aggInfo) MarshalBinary() ([]byte, error) { 82 var buf bytes.Buffer 83 i32 := int32(info.Op) 84 buf.Write(types.EncodeInt32(&i32)) 85 buf.Write(types.EncodeBool(&info.Dist)) 86 buf.Write(types.EncodeType(&info.inputTypes)) 87 data, err := types.Encode(info.Agg) 88 if err != nil { 89 return nil, err 90 } 91 buf.Write(data) 92 return buf.Bytes(), nil 93 } 94 95 func (info *aggInfo) UnmarshalBinary(data []byte) error { 96 info.Op = int(types.DecodeInt32(data[:4])) 97 data = data[4:] 98 info.Dist = types.DecodeBool(data[:1]) 99 data = data[1:] 100 info.inputTypes = types.DecodeType(data[:types.TSize]) 101 data = data[types.TSize:] 102 aggregate, err := agg.New(info.Op, info.Dist, info.inputTypes) 103 if err != nil { 104 return err 105 } 106 info.Agg = aggregate 107 return types.Decode(data, info.Agg) 108 } 109 110 func (bat *Batch) MarshalBinary() ([]byte, error) { 111 aggInfo := make([]aggInfo, len(bat.Aggs)) 112 for i := range aggInfo { 113 aggInfo[i].Op = bat.Aggs[i].GetOperatorId() 114 aggInfo[i].inputTypes = bat.Aggs[i].GetInputTypes()[0] 115 aggInfo[i].Dist = bat.Aggs[i].IsDistinct() 116 aggInfo[i].Agg = bat.Aggs[i] 117 } 118 return types.Encode(&EncodeBatch{ 119 Zs: bat.Zs, 120 Vecs: bat.Vecs, 121 Attrs: bat.Attrs, 122 AggInfos: aggInfo, 123 }) 124 } 125 126 func (bat *Batch) UnmarshalBinary(data []byte) error { 127 rbat := new(EncodeBatch) 128 129 if err := types.Decode(data, rbat); err != nil { 130 return err 131 } 132 bat.Cnt = 1 133 bat.Zs = rbat.Zs // if you drop rbat.Zs is ok, if you need return rbat, you must deepcopy Zs. 134 bat.Vecs = rbat.Vecs 135 bat.Attrs = rbat.Attrs 136 bat.Aggs = make([]agg.Agg[any], len(rbat.AggInfos)) 137 for i, info := range rbat.AggInfos { 138 bat.Aggs[i] = info.Agg 139 } 140 return nil 141 } 142 143 func (bat *Batch) ExpandNulls() { 144 if len(bat.Zs) > 0 { 145 for i := range bat.Vecs { 146 bat.Vecs[i].TryExpandNulls(len(bat.Zs)) 147 } 148 } 149 } 150 151 func (bat *Batch) Shrink(sels []int64) { 152 mp := make(map[*vector.Vector]uint8) 153 for _, vec := range bat.Vecs { 154 if _, ok := mp[vec]; ok { 155 continue 156 } 157 mp[vec]++ 158 vector.Shrink(vec, sels) 159 } 160 vs := bat.Zs 161 for i, sel := range sels { 162 vs[i] = vs[sel] 163 } 164 bat.Zs = bat.Zs[:len(sels)] 165 } 166 167 func (bat *Batch) Shuffle(sels []int64, m *mpool.MPool) error { 168 if len(sels) > 0 { 169 mp := make(map[*vector.Vector]uint8) 170 for _, vec := range bat.Vecs { 171 if _, ok := mp[vec]; ok { 172 continue 173 } 174 mp[vec]++ 175 if err := vector.Shuffle(vec, sels, m); err != nil { 176 return err 177 } 178 } 179 180 ws := make([]int64, len(sels)) 181 bat.Zs = shuffle.FixedLengthShuffle(bat.Zs, ws, sels) 182 } 183 return nil 184 } 185 186 func (bat *Batch) Size() int { 187 var size int 188 189 for _, vec := range bat.Vecs { 190 size += vec.Size() 191 } 192 return size 193 } 194 195 func (bat *Batch) Length() int { 196 return len(bat.Zs) 197 } 198 199 func (bat *Batch) VectorCount() int { 200 return len(bat.Vecs) 201 } 202 203 func (bat *Batch) Prefetch(poses []int32, vecs []*vector.Vector) { 204 for i, pos := range poses { 205 vecs[i] = bat.GetVector(pos) 206 } 207 } 208 209 func (bat *Batch) SetAttributes(attrs []string) { 210 bat.Attrs = attrs 211 } 212 213 func (bat *Batch) SetVector(pos int32, vec *vector.Vector) { 214 bat.Vecs[pos] = vec 215 } 216 217 func (bat *Batch) GetVector(pos int32) *vector.Vector { 218 return bat.Vecs[pos] 219 } 220 221 func (bat *Batch) GetSubBatch(cols []string) *Batch { 222 mp := make(map[string]int) 223 for i, attr := range bat.Attrs { 224 mp[attr] = i 225 } 226 rbat := NewWithSize(len(cols)) 227 for i, col := range cols { 228 rbat.Vecs[i] = bat.Vecs[mp[col]] 229 } 230 rbat.Zs = append([]int64{}, bat.Zs...) 231 return rbat 232 } 233 234 func (bat *Batch) Clean(m *mpool.MPool) { 235 if atomic.AddInt64(&bat.Cnt, -1) != 0 { 236 return 237 } 238 for _, vec := range bat.Vecs { 239 if vec != nil { 240 vec.Free(m) 241 if vec.IsLowCardinality() { 242 vec.Index().(*index.LowCardinalityIndex).Free() 243 } 244 } 245 } 246 for _, agg := range bat.Aggs { 247 if agg != nil { 248 agg.Free(m) 249 } 250 } 251 if len(bat.Zs) != 0 { 252 m.PutSels(bat.Zs) 253 bat.Zs = nil 254 } 255 bat.Vecs = nil 256 } 257 258 func (bat *Batch) String() string { 259 var buf bytes.Buffer 260 261 for i, vec := range bat.Vecs { 262 buf.WriteString(fmt.Sprintf("%v\n", i)) 263 if len(bat.Zs) > 0 { 264 buf.WriteString(fmt.Sprintf("\t%s\n", vec)) 265 } 266 } 267 return buf.String() 268 } 269 270 func (bat *Batch) Append(ctx context.Context, mh *mpool.MPool, b *Batch) (*Batch, error) { 271 if bat == nil { 272 return b, nil 273 } 274 if len(bat.Vecs) != len(b.Vecs) { 275 return nil, moerr.NewInternalError(ctx, "unexpected error happens in batch append") 276 } 277 if len(bat.Vecs) == 0 { 278 return bat, nil 279 } 280 281 // XXX Here is a good place to trigger an panic for fault injection. 282 // fault.AddFaultPoint("panic_in_batch_append", ":::", "PANIC", 0, "") 283 fault.TriggerFault("panic_in_batch_append") 284 285 flags := make([]uint8, vector.Length(b.Vecs[0])) 286 for i := range flags { 287 flags[i]++ 288 } 289 for i := range bat.Vecs { 290 if err := vector.UnionBatch(bat.Vecs[i], b.Vecs[i], 0, vector.Length(b.Vecs[i]), flags[:vector.Length(b.Vecs[i])], mh); err != nil { 291 return bat, err 292 } 293 if b.Vecs[i].IsLowCardinality() { 294 idx := b.Vecs[i].Index().(*index.LowCardinalityIndex) 295 if bat.Vecs[i].Index() == nil { 296 bat.Vecs[i].SetIndex(idx.Dup()) 297 } else { 298 appendIdx := bat.Vecs[i].Index().(*index.LowCardinalityIndex) 299 dst, src := appendIdx.GetPoses(), idx.GetPoses() 300 if err := vector.UnionBatch(dst, src, 0, vector.Length(src), flags[:vector.Length(src)], mh); err != nil { 301 return bat, err 302 } 303 } 304 } 305 } 306 bat.Zs = append(bat.Zs, b.Zs...) 307 return bat, nil 308 } 309 310 // XXX I will slowly remove all code that uses InitZsone. 311 func (bat *Batch) SetZs(len int, m *mpool.MPool) { 312 bat.Zs = m.GetSels() 313 for i := 0; i < len; i++ { 314 bat.Zs = append(bat.Zs, 1) 315 } 316 } 317 318 // InitZsOne init Batch.Zs and values are all 1 319 func (bat *Batch) InitZsOne(len int) { 320 bat.Zs = make([]int64, len) 321 for i := range bat.Zs { 322 bat.Zs[i]++ 323 } 324 }