github.com/viant/toolbox@v0.34.5/data/compacted.go (about) 1 package data 2 3 import ( 4 "bytes" 5 "encoding/json" 6 "fmt" 7 "github.com/viant/toolbox" 8 "reflect" 9 "sync" 10 "sync/atomic" 11 ) 12 13 type Field struct { 14 Name string 15 Type reflect.Type 16 index int 17 } 18 19 type nilGroup int 20 21 //CompactedSlice represented a compacted slice to represent object collection 22 type CompactedSlice struct { 23 omitEmpty bool 24 compressNils bool 25 lock *sync.RWMutex 26 fieldNames map[string]*Field 27 fields []*Field 28 data [][]interface{} 29 size int64 30 RawEncoding bool 31 } 32 33 34 func (d CompactedSlice) MarshalJSON() ([]byte, error) { 35 buf := new(bytes.Buffer) 36 _, err := buf.Write([]byte("[")) 37 if err != nil { 38 return nil, err 39 } 40 i := 0 41 if err = d.Range(func(item interface{}) (b bool, err error) { 42 if i > 0 { 43 _, err := buf.Write([]byte(",")) 44 if err != nil { 45 return false, err 46 } 47 } 48 i++ 49 data, err :=json.Marshal(item) 50 if err != nil { 51 return false, err 52 } 53 _, err = buf.Write(data) 54 return err == nil, err 55 });err != nil { 56 return nil, err 57 } 58 if _, err := buf.Write([]byte("]")); err != nil { 59 return nil, err 60 } 61 return buf.Bytes(), nil 62 } 63 64 65 66 func (s *CompactedSlice) Fields() []*Field { 67 return s.fields 68 } 69 70 //Size returns size of collection 71 func (s *CompactedSlice) Size() int { 72 return int(atomic.LoadInt64(&s.size)) 73 } 74 75 func (s *CompactedSlice) index(fieldName string, value interface{}) int { 76 s.lock.RLock() 77 f, ok := s.fieldNames[fieldName] 78 s.lock.RUnlock() 79 if ok { 80 return f.index 81 } 82 f = &Field{Name: fieldName, index: len(s.fieldNames), Type: reflect.TypeOf(value)} 83 s.lock.Lock() 84 defer s.lock.Unlock() 85 s.fieldNames[fieldName] = f 86 s.fields = append(s.fields, f) 87 return f.index 88 } 89 90 func expandIfNeeded(size int, data []interface{}) []interface{} { 91 if size >= len(data) { 92 for i := len(data); i < size; i++ { 93 data = append(data, nil) 94 } 95 } 96 return data 97 } 98 99 func (s *CompactedSlice) compress(data []interface{}) []interface{} { 100 var compressed = make([]interface{}, 0) 101 var nilCount = 0 102 for _, item := range data { 103 if item != nil { 104 switch nilCount { 105 case 0: 106 case 1: 107 compressed = append(compressed, nil) 108 default: 109 compressed = append(compressed, nilGroup(nilCount)) 110 } 111 compressed = append(compressed, item) 112 nilCount = 0 113 continue 114 } 115 nilCount++ 116 } 117 return compressed 118 } 119 120 func (s *CompactedSlice) uncompress(in, out []interface{}) { 121 var index = 0 122 for i := 0; i < len(in); i++ { 123 var item = in[i] 124 nilGroup, ok := item.(nilGroup) 125 if !ok { 126 out[index] = item 127 index++ 128 continue 129 } 130 for j := 0; j < int(nilGroup); j++ { 131 out[index] = nil 132 index++ 133 } 134 } 135 for i := index; i < len(out); i++ { 136 out[i] = nil 137 } 138 } 139 140 //Add adds data to a collection 141 func (s *CompactedSlice) Add(data map[string]interface{}) { 142 var initSize = len(s.fieldNames) 143 if initSize < len(data) { 144 initSize = len(data) 145 } 146 atomic.AddInt64(&s.size, 1) 147 var record = make([]interface{}, initSize) 148 for k, v := range data { 149 i := s.index(k, v) 150 if !(i < len(record)) { 151 record = expandIfNeeded(i+1, record) 152 } 153 if s.omitEmpty { 154 if toolbox.IsString(v) { 155 if toolbox.AsString(v) == "" { 156 v = nil 157 } 158 } else if toolbox.IsInt(v) { 159 if toolbox.AsInt(v) == 0 { 160 v = nil 161 } 162 } else if toolbox.IsFloat(v) { 163 if toolbox.AsFloat(v) == 0.0 { 164 v = nil 165 } 166 } 167 } 168 record[i] = v 169 } 170 if s.compressNils { 171 record = s.compress(record) 172 } 173 s.data = append(s.data, record) 174 } 175 176 func (s *CompactedSlice) mapNamesToFieldPositions(names []string) ([]int, error) { 177 var result = make([]int, 0) 178 for _, name := range names { 179 field, ok := s.fieldNames[name] 180 if !ok { 181 return nil, fmt.Errorf("failed to lookup Field: %v", name) 182 } 183 result = append(result, field.index) 184 } 185 return result, nil 186 } 187 188 //SortedRange sort collection by supplied index and then call for each item supplied handler callback 189 func (s *CompactedSlice) SortedRange(indexBy []string, handler func(item interface{}) (bool, error)) error { 190 s.lock.Lock() 191 fields := s.fields 192 data := s.data 193 s.data = [][]interface{}{} 194 s.lock.Unlock() 195 indexByPositions, err := s.mapNamesToFieldPositions(indexBy) 196 if err != nil { 197 return err 198 } 199 200 var indexedRecords = make(map[interface{}][]interface{}) 201 var record = make([]interface{}, len(s.fields)) 202 var key interface{} 203 for _, item := range data { 204 atomic.AddInt64(&s.size, -1) 205 if s.compressNils { 206 s.uncompress(item, record) 207 } else { 208 record = item 209 } 210 key = indexValue(indexByPositions, item) 211 indexedRecords[key] = item 212 } 213 214 keys, err := sortKeys(key, indexedRecords) 215 if err != nil { 216 return err 217 } 218 for _, key := range keys { 219 item := indexedRecords[key] 220 if s.compressNils { 221 s.uncompress(item, record) 222 } else { 223 record = item 224 } 225 226 var aMap = map[string]interface{}{} 227 recordToMap(fields, record, aMap) 228 if next, err := handler(aMap); !next || err != nil { 229 return err 230 } 231 232 } 233 return nil 234 } 235 236 //SortedIterator returns sorted iterator 237 func (s *CompactedSlice) SortedIterator(indexBy []string) (toolbox.Iterator, error) { 238 s.lock.Lock() 239 fields := s.fields 240 data := s.data 241 s.data = [][]interface{}{} 242 s.lock.Unlock() 243 if len(indexBy) == 0 { 244 return nil, fmt.Errorf("indexBy was empty") 245 } 246 indexByPositions, err := s.mapNamesToFieldPositions(indexBy) 247 if err != nil { 248 return nil, err 249 } 250 var record = make([]interface{}, len(fields)) 251 var indexedRecords = make(map[interface{}][]interface{}) 252 var key interface{} 253 for _, item := range data { 254 atomic.AddInt64(&s.size, -1) 255 if s.compressNils { 256 s.uncompress(item, record) 257 } else { 258 record = item 259 } 260 key = indexValue(indexByPositions, record) 261 indexedRecords[key] = item 262 } 263 264 data = nil 265 keys, err := sortKeys(key, indexedRecords) 266 if err != nil { 267 return nil, err 268 } 269 atomic.AddInt64(&s.size, int64(-len(data))) 270 return &iterator{ 271 size: len(indexedRecords), 272 provider: func(index int) (map[string]interface{}, error) { 273 if index >= len(indexedRecords) { 274 return nil, fmt.Errorf("index: %d out bounds:%d", index, len(data)) 275 } 276 key := keys[index] 277 item := indexedRecords[key] 278 if s.compressNils { 279 s.uncompress(item, record) 280 } else { 281 record = item 282 } 283 var aMap = map[string]interface{}{} 284 recordToMap(fields, record, aMap) 285 return aMap, nil 286 }, 287 }, nil 288 } 289 290 //Range iterate over slice, and remove processed data from the compacted slice 291 func (s *CompactedSlice) Range(handler func(item interface{}) (bool, error)) error { 292 s.lock.Lock() 293 fields := s.fields 294 data := s.data 295 s.data = [][]interface{}{} 296 s.lock.Unlock() 297 298 var record = make([]interface{}, len(s.fields)) 299 for _, item := range data { 300 atomic.AddInt64(&s.size, -1) 301 if s.compressNils { 302 s.uncompress(item, record) 303 } else { 304 record = item 305 } 306 var aMap = map[string]interface{}{} 307 recordToMap(fields, record, aMap) 308 if next, err := handler(aMap); !next || err != nil { 309 return err 310 } 311 } 312 return nil 313 } 314 315 //Ranger moves data from slice to ranger 316 func (s *CompactedSlice) Ranger() toolbox.Ranger { 317 s.lock.Lock() 318 clone := &CompactedSlice{ 319 data: s.data, 320 fields: s.fields, 321 size: s.size, 322 omitEmpty: s.omitEmpty, 323 compressNils: s.compressNils, 324 lock: &sync.RWMutex{}, 325 fieldNames: s.fieldNames, 326 } 327 s.data = [][]interface{}{} 328 atomic.StoreInt64(&s.size, 0) 329 s.lock.Unlock() 330 return clone 331 } 332 333 //Iterator returns a slice iterator 334 func (s *CompactedSlice) Iterator() toolbox.Iterator { 335 s.lock.Lock() 336 fields := s.fields 337 data := s.data 338 s.data = [][]interface{}{} 339 s.lock.Unlock() 340 atomic.AddInt64(&s.size, int64(-len(data))) 341 342 var record = make([]interface{}, len(fields)) 343 return &iterator{ 344 size: len(data), 345 provider: func(index int) (map[string]interface{}, error) { 346 if index >= len(data) { 347 return nil, fmt.Errorf("index: %d out bounds:%d", index, len(data)) 348 } 349 item := data[index] 350 if s.compressNils { 351 s.uncompress(item, record) 352 } else { 353 record = item 354 } 355 var aMap = map[string]interface{}{} 356 recordToMap(fields, record, aMap) 357 return aMap, nil 358 }, 359 } 360 } 361 362 type iterator struct { 363 size int 364 provider func(index int) (map[string]interface{}, error) 365 index int 366 } 367 368 //HasNext returns true if iterator has next element. 369 func (i *iterator) HasNext() bool { 370 return i.index < i.size 371 } 372 373 //Next sets item pointer with next element. 374 func (i *iterator) Next(itemPointer interface{}) error { 375 record, err := i.provider(i.index) 376 if err != nil { 377 return err 378 } 379 switch pointer := itemPointer.(type) { 380 case *map[string]interface{}: 381 *pointer = record 382 case *interface{}: 383 *pointer = record 384 default: 385 return fmt.Errorf("unsupported type: %T, expected *map[string]interface{}", itemPointer) 386 } 387 i.index++ 388 return nil 389 } 390 391 //NewCompactedSlice create new compacted slice 392 func NewCompactedSlice(omitEmpty, compressNils bool) *CompactedSlice { 393 return &CompactedSlice{ 394 omitEmpty: omitEmpty, 395 compressNils: compressNils, 396 fields: make([]*Field, 0), 397 fieldNames: make(map[string]*Field), 398 data: make([][]interface{}, 0), 399 lock: &sync.RWMutex{}, 400 } 401 }