go-ml.dev/pkg/base@v0.0.0-20200610162856-60c38abac71b/tables/batch.go (about) 1 package tables 2 3 import ( 4 "go-ml.dev/pkg/base/fu" 5 "go-ml.dev/pkg/base/fu/lazy" 6 "reflect" 7 ) 8 9 /* 10 FeaturesMapper interface is a features transformation abstraction 11 */ 12 type FeaturesMapper interface { 13 // MapFeature returns new table with all original columns except features 14 // adding one new column with prediction/calculation 15 MapFeatures(*Table) (*Table, error) 16 // Cloase releases all bounded resources 17 Close() error 18 } 19 20 type LambdaMapper func(table *Table) (*Table, error) 21 22 func (LambdaMapper) Close() error { return nil } 23 func (l LambdaMapper) MapFeatures(t *Table) (*Table, error) { return l(t) } 24 25 /* 26 Batch is batching abstraction to process lazy streams 27 */ 28 type Batch struct { 29 int 30 lazy.Source 31 } 32 33 /* 34 Batch transforms lazy stream to a batching flow 35 */ 36 func (zf Lazy) Batch(length int) Batch { 37 return Batch{length, func() lazy.Stream { 38 z := zf() 39 wc := fu.WaitCounter{Value: 0} 40 columns := []reflect.Value{} 41 na := []fu.Bits{} 42 names := []string{} 43 ac := fu.AtomicCounter{Value: 0} 44 45 return func(index uint64) (v reflect.Value, err error) { 46 v, err = z(index) 47 if index == lazy.STOP || err != nil { 48 wc.Stop() 49 return 50 } 51 52 x := fu.True 53 if wc.Wait(index) { 54 if v.Kind() == reflect.Bool { 55 if !v.Bool() { 56 n := int(ac.Value % uint64(length)) 57 if ac.Value != 0 { 58 if n == 0 { 59 n = length 60 } 61 v = reflect.ValueOf(MakeTable(names, columns, na, n)) 62 } 63 wc.Stop() 64 } 65 wc.Inc() 66 return v, nil 67 } 68 69 lr := v.Interface().(fu.Struct) 70 ndx := ac.PostInc() 71 n := int(ndx % uint64(length)) 72 73 if n == 0 { 74 if ndx != 0 { 75 x = reflect.ValueOf(MakeTable(names, columns, na, length)) 76 } 77 names = lr.Names 78 width := len(names) 79 columns = make([]reflect.Value, width) 80 for i := range columns { 81 columns[i] = reflect.MakeSlice(reflect.SliceOf(lr.Columns[i].Type()), 0, length) 82 } 83 na = make([]fu.Bits, width) 84 } 85 86 for i := range lr.Names { 87 columns[i] = reflect.Append(columns[i], lr.Columns[i]) 88 na[i].Set(n, lr.Na.Bit(i)) 89 } 90 91 wc.Inc() 92 return x, nil 93 } 94 return fu.False, nil 95 } 96 }} 97 } 98 99 /* 100 Flat transforms batching to the normal lazy stream 101 */ 102 func (zf Batch) Flat() Lazy { 103 return func() lazy.Stream { 104 z := zf.Source() 105 wc := fu.WaitCounter{Value: 0} 106 ac := fu.AtomicCounter{Value: 0} 107 t := (*Table)(nil) 108 row := 0 109 return func(index uint64) (v reflect.Value, err error) { 110 v = fu.False 111 if index == lazy.STOP { 112 wc.Stop() 113 return 114 } 115 if wc.Wait(index) { 116 if t == nil { 117 v, err = z(ac.PostInc()) 118 if err != nil || (v.Kind() == reflect.Bool && !v.Bool()) { 119 wc.Stop() 120 return 121 } 122 if v.Kind() != reflect.Bool { 123 t = v.Interface().(*Table) 124 row = 0 125 } 126 } 127 if t != nil { 128 v = reflect.ValueOf(t.Index(row)) 129 row++ 130 if row >= t.Len() { 131 t = nil 132 } 133 } 134 wc.Inc() 135 return v, nil 136 } 137 return fu.False, nil 138 } 139 } 140 } 141 142 /* 143 Transform transforms streamed data by batches 144 */ 145 func (zf Batch) Transform(tf func(int) (FeaturesMapper, error)) Batch { 146 return Batch{zf.int, func() lazy.Stream { 147 f := fu.AtomicFlag{Value: 0} 148 tx, err := tf(zf.int) 149 if err != nil { 150 return lazy.Error(err) 151 } 152 z := zf.Source() 153 return func(index uint64) (v reflect.Value, err error) { 154 v, err = z(index) 155 if index == lazy.STOP || err != nil { 156 f.Set() 157 tx.Close() 158 return 159 } 160 if !f.State() { 161 if v.Kind() != reflect.Bool { 162 lr := v.Interface().(*Table) 163 t, err := tx.MapFeatures(lr) 164 if err != nil { 165 f.Set() 166 return fu.False, err 167 } 168 return reflect.ValueOf(t), nil 169 } 170 if v.Bool() { 171 return fu.True, nil 172 } 173 f.Set() 174 } 175 return fu.False, nil 176 } 177 }} 178 } 179 180 /* 181 Reduce batches to values 182 */ 183 func (zf Batch) Reduce(tf func(t *Table) (fu.Struct, bool, error)) Lazy { 184 return func() lazy.Stream { 185 f := fu.AtomicFlag{Value: 0} 186 z := zf.Source() 187 return func(index uint64) (v reflect.Value, err error) { 188 v, err = z(index) 189 if index == lazy.STOP || err != nil { 190 f.Set() 191 return 192 } 193 if !f.State() { 194 if v.Kind() != reflect.Bool { 195 t := v.Interface().(*Table) 196 lr, ok, err := tf(t) 197 if err != nil { 198 f.Set() 199 return fu.False, err 200 } 201 if !ok { 202 return fu.True, nil 203 } 204 return reflect.ValueOf(lr), nil 205 } 206 if v.Bool() { 207 return fu.True, nil 208 } 209 f.Set() 210 } 211 return fu.False, nil 212 } 213 } 214 }