github.com/viant/toolbox@v0.34.5/data/compacted.go (about)

     1  package data
     2  
     3  import (
     4  	"bytes"
     5  	"encoding/json"
     6  	"fmt"
     7  	"github.com/viant/toolbox"
     8  	"reflect"
     9  	"sync"
    10  	"sync/atomic"
    11  )
    12  
    13  type Field struct {
    14  	Name  string
    15  	Type  reflect.Type
    16  	index int
    17  }
    18  
    19  type nilGroup int
    20  
    21  //CompactedSlice represented a compacted slice to represent object collection
    22  type CompactedSlice struct {
    23  	omitEmpty    bool
    24  	compressNils bool
    25  	lock         *sync.RWMutex
    26  	fieldNames   map[string]*Field
    27  	fields       []*Field
    28  	data         [][]interface{}
    29  	size         int64
    30  	RawEncoding bool
    31  }
    32  
    33  
    34  func (d CompactedSlice) MarshalJSON() ([]byte, error) {
    35  	buf := new(bytes.Buffer)
    36  	_, err := buf.Write([]byte("["))
    37  	if err != nil {
    38  		return nil, err
    39  	}
    40  	i := 0
    41  	if err = d.Range(func(item interface{}) (b bool, err error) {
    42  		if i > 0 {
    43  			_, err := buf.Write([]byte(","))
    44  			if err != nil {
    45  				return false, err
    46  			}
    47  		}
    48  		i++
    49  		data, err :=json.Marshal(item)
    50  		if err != nil {
    51  			return false, err
    52  		}
    53  		_, err = buf.Write(data)
    54  		return err == nil, err
    55  	});err != nil {
    56  		return nil, err
    57  	}
    58  	if _, err := buf.Write([]byte("]")); err != nil {
    59  		return nil, err
    60  	}
    61  	return buf.Bytes(), nil
    62  }
    63  
    64  
    65  
    66  func (s *CompactedSlice) Fields() []*Field {
    67  	return s.fields
    68  }
    69  
    70  //Size returns size of collection
    71  func (s *CompactedSlice) Size() int {
    72  	return int(atomic.LoadInt64(&s.size))
    73  }
    74  
    75  func (s *CompactedSlice) index(fieldName string, value interface{}) int {
    76  	s.lock.RLock()
    77  	f, ok := s.fieldNames[fieldName]
    78  	s.lock.RUnlock()
    79  	if ok {
    80  		return f.index
    81  	}
    82  	f = &Field{Name: fieldName, index: len(s.fieldNames), Type: reflect.TypeOf(value)}
    83  	s.lock.Lock()
    84  	defer s.lock.Unlock()
    85  	s.fieldNames[fieldName] = f
    86  	s.fields = append(s.fields, f)
    87  	return f.index
    88  }
    89  
    90  func expandIfNeeded(size int, data []interface{}) []interface{} {
    91  	if size >= len(data) {
    92  		for i := len(data); i < size; i++ {
    93  			data = append(data, nil)
    94  		}
    95  	}
    96  	return data
    97  }
    98  
    99  func (s *CompactedSlice) compress(data []interface{}) []interface{} {
   100  	var compressed = make([]interface{}, 0)
   101  	var nilCount = 0
   102  	for _, item := range data {
   103  		if item != nil {
   104  			switch nilCount {
   105  			case 0:
   106  			case 1:
   107  				compressed = append(compressed, nil)
   108  			default:
   109  				compressed = append(compressed, nilGroup(nilCount))
   110  			}
   111  			compressed = append(compressed, item)
   112  			nilCount = 0
   113  			continue
   114  		}
   115  		nilCount++
   116  	}
   117  	return compressed
   118  }
   119  
   120  func (s *CompactedSlice) uncompress(in, out []interface{}) {
   121  	var index = 0
   122  	for i := 0; i < len(in); i++ {
   123  		var item = in[i]
   124  		nilGroup, ok := item.(nilGroup)
   125  		if !ok {
   126  			out[index] = item
   127  			index++
   128  			continue
   129  		}
   130  		for j := 0; j < int(nilGroup); j++ {
   131  			out[index] = nil
   132  			index++
   133  		}
   134  	}
   135  	for i := index; i < len(out); i++ {
   136  		out[i] = nil
   137  	}
   138  }
   139  
   140  //Add adds data to a collection
   141  func (s *CompactedSlice) Add(data map[string]interface{}) {
   142  	var initSize = len(s.fieldNames)
   143  	if initSize < len(data) {
   144  		initSize = len(data)
   145  	}
   146  	atomic.AddInt64(&s.size, 1)
   147  	var record = make([]interface{}, initSize)
   148  	for k, v := range data {
   149  		i := s.index(k, v)
   150  		if !(i < len(record)) {
   151  			record = expandIfNeeded(i+1, record)
   152  		}
   153  		if s.omitEmpty {
   154  			if toolbox.IsString(v) {
   155  				if toolbox.AsString(v) == "" {
   156  					v = nil
   157  				}
   158  			} else if toolbox.IsInt(v) {
   159  				if toolbox.AsInt(v) == 0 {
   160  					v = nil
   161  				}
   162  			} else if toolbox.IsFloat(v) {
   163  				if toolbox.AsFloat(v) == 0.0 {
   164  					v = nil
   165  				}
   166  			}
   167  		}
   168  		record[i] = v
   169  	}
   170  	if s.compressNils {
   171  		record = s.compress(record)
   172  	}
   173  	s.data = append(s.data, record)
   174  }
   175  
   176  func (s *CompactedSlice) mapNamesToFieldPositions(names []string) ([]int, error) {
   177  	var result = make([]int, 0)
   178  	for _, name := range names {
   179  		field, ok := s.fieldNames[name]
   180  		if !ok {
   181  			return nil, fmt.Errorf("failed to lookup Field: %v", name)
   182  		}
   183  		result = append(result, field.index)
   184  	}
   185  	return result, nil
   186  }
   187  
   188  //SortedRange sort collection by supplied index and then call for each item supplied handler callback
   189  func (s *CompactedSlice) SortedRange(indexBy []string, handler func(item interface{}) (bool, error)) error {
   190  	s.lock.Lock()
   191  	fields := s.fields
   192  	data := s.data
   193  	s.data = [][]interface{}{}
   194  	s.lock.Unlock()
   195  	indexByPositions, err := s.mapNamesToFieldPositions(indexBy)
   196  	if err != nil {
   197  		return err
   198  	}
   199  
   200  	var indexedRecords = make(map[interface{}][]interface{})
   201  	var record = make([]interface{}, len(s.fields))
   202  	var key interface{}
   203  	for _, item := range data {
   204  		atomic.AddInt64(&s.size, -1)
   205  		if s.compressNils {
   206  			s.uncompress(item, record)
   207  		} else {
   208  			record = item
   209  		}
   210  		key = indexValue(indexByPositions, item)
   211  		indexedRecords[key] = item
   212  	}
   213  
   214  	keys, err := sortKeys(key, indexedRecords)
   215  	if err != nil {
   216  		return err
   217  	}
   218  	for _, key := range keys {
   219  		item := indexedRecords[key]
   220  		if s.compressNils {
   221  			s.uncompress(item, record)
   222  		} else {
   223  			record = item
   224  		}
   225  
   226  		var aMap = map[string]interface{}{}
   227  		recordToMap(fields, record, aMap)
   228  		if next, err := handler(aMap); !next || err != nil {
   229  			return err
   230  		}
   231  
   232  	}
   233  	return nil
   234  }
   235  
   236  //SortedIterator returns sorted iterator
   237  func (s *CompactedSlice) SortedIterator(indexBy []string) (toolbox.Iterator, error) {
   238  	s.lock.Lock()
   239  	fields := s.fields
   240  	data := s.data
   241  	s.data = [][]interface{}{}
   242  	s.lock.Unlock()
   243  	if len(indexBy) == 0 {
   244  		return nil, fmt.Errorf("indexBy was empty")
   245  	}
   246  	indexByPositions, err := s.mapNamesToFieldPositions(indexBy)
   247  	if err != nil {
   248  		return nil, err
   249  	}
   250  	var record = make([]interface{}, len(fields))
   251  	var indexedRecords = make(map[interface{}][]interface{})
   252  	var key interface{}
   253  	for _, item := range data {
   254  		atomic.AddInt64(&s.size, -1)
   255  		if s.compressNils {
   256  			s.uncompress(item, record)
   257  		} else {
   258  			record = item
   259  		}
   260  		key = indexValue(indexByPositions, record)
   261  		indexedRecords[key] = item
   262  	}
   263  
   264  	data = nil
   265  	keys, err := sortKeys(key, indexedRecords)
   266  	if err != nil {
   267  		return nil, err
   268  	}
   269  	atomic.AddInt64(&s.size, int64(-len(data)))
   270  	return &iterator{
   271  		size: len(indexedRecords),
   272  		provider: func(index int) (map[string]interface{}, error) {
   273  			if index >= len(indexedRecords) {
   274  				return nil, fmt.Errorf("index: %d out bounds:%d", index, len(data))
   275  			}
   276  			key := keys[index]
   277  			item := indexedRecords[key]
   278  			if s.compressNils {
   279  				s.uncompress(item, record)
   280  			} else {
   281  				record = item
   282  			}
   283  			var aMap = map[string]interface{}{}
   284  			recordToMap(fields, record, aMap)
   285  			return aMap, nil
   286  		},
   287  	}, nil
   288  }
   289  
   290  //Range iterate over slice, and remove processed data from the compacted slice
   291  func (s *CompactedSlice) Range(handler func(item interface{}) (bool, error)) error {
   292  	s.lock.Lock()
   293  	fields := s.fields
   294  	data := s.data
   295  	s.data = [][]interface{}{}
   296  	s.lock.Unlock()
   297  
   298  	var record = make([]interface{}, len(s.fields))
   299  	for _, item := range data {
   300  		atomic.AddInt64(&s.size, -1)
   301  		if s.compressNils {
   302  			s.uncompress(item, record)
   303  		} else {
   304  			record = item
   305  		}
   306  		var aMap = map[string]interface{}{}
   307  		recordToMap(fields, record, aMap)
   308  		if next, err := handler(aMap); !next || err != nil {
   309  			return err
   310  		}
   311  	}
   312  	return nil
   313  }
   314  
   315  //Ranger moves data from slice to ranger
   316  func (s *CompactedSlice) Ranger() toolbox.Ranger {
   317  	s.lock.Lock()
   318  	clone := &CompactedSlice{
   319  		data:         s.data,
   320  		fields:       s.fields,
   321  		size:         s.size,
   322  		omitEmpty:    s.omitEmpty,
   323  		compressNils: s.compressNils,
   324  		lock:         &sync.RWMutex{},
   325  		fieldNames:   s.fieldNames,
   326  	}
   327  	s.data = [][]interface{}{}
   328  	atomic.StoreInt64(&s.size, 0)
   329  	s.lock.Unlock()
   330  	return clone
   331  }
   332  
   333  //Iterator returns a slice iterator
   334  func (s *CompactedSlice) Iterator() toolbox.Iterator {
   335  	s.lock.Lock()
   336  	fields := s.fields
   337  	data := s.data
   338  	s.data = [][]interface{}{}
   339  	s.lock.Unlock()
   340  	atomic.AddInt64(&s.size, int64(-len(data)))
   341  
   342  	var record = make([]interface{}, len(fields))
   343  	return &iterator{
   344  		size: len(data),
   345  		provider: func(index int) (map[string]interface{}, error) {
   346  			if index >= len(data) {
   347  				return nil, fmt.Errorf("index: %d out bounds:%d", index, len(data))
   348  			}
   349  			item := data[index]
   350  			if s.compressNils {
   351  				s.uncompress(item, record)
   352  			} else {
   353  				record = item
   354  			}
   355  			var aMap = map[string]interface{}{}
   356  			recordToMap(fields, record, aMap)
   357  			return aMap, nil
   358  		},
   359  	}
   360  }
   361  
   362  type iterator struct {
   363  	size     int
   364  	provider func(index int) (map[string]interface{}, error)
   365  	index    int
   366  }
   367  
   368  //HasNext returns true if iterator has next element.
   369  func (i *iterator) HasNext() bool {
   370  	return i.index < i.size
   371  }
   372  
   373  //Next sets item pointer with next element.
   374  func (i *iterator) Next(itemPointer interface{}) error {
   375  	record, err := i.provider(i.index)
   376  	if err != nil {
   377  		return err
   378  	}
   379  	switch pointer := itemPointer.(type) {
   380  	case *map[string]interface{}:
   381  		*pointer = record
   382  	case *interface{}:
   383  		*pointer = record
   384  	default:
   385  		return fmt.Errorf("unsupported type: %T, expected *map[string]interface{}", itemPointer)
   386  	}
   387  	i.index++
   388  	return nil
   389  }
   390  
   391  //NewCompactedSlice create new compacted slice
   392  func NewCompactedSlice(omitEmpty, compressNils bool) *CompactedSlice {
   393  	return &CompactedSlice{
   394  		omitEmpty:    omitEmpty,
   395  		compressNils: compressNils,
   396  		fields:       make([]*Field, 0),
   397  		fieldNames:   make(map[string]*Field),
   398  		data:         make([][]interface{}, 0),
   399  		lock:         &sync.RWMutex{},
   400  	}
   401  }