go-ml.dev/pkg/base@v0.0.0-20200610162856-60c38abac71b/tables/matrix.go (about)

     1  package tables
     2  
     3  import (
     4  	"go-ml.dev/pkg/base/fu"
     5  	"golang.org/x/xerrors"
     6  	"reflect"
     7  )
     8  
     9  /*
    10  Matrix the presentation of features and labels as plane []float32 slices
    11  */
    12  type Matrix struct {
    13  	Features      []float32
    14  	Labels        []float32
    15  	Width, Length int
    16  	LabelsWidth   int // 0 means no labels defined
    17  }
    18  
    19  /*
    20  Matrix returns matrix without labels
    21  */
    22  func (t *Table) Matrix(features []string, least ...int) (m Matrix, err error) {
    23  	_, m, err = t.MatrixWithLabelIf(features, "", "", nil, least...)
    24  	return
    25  }
    26  
    27  /*
    28  MatrixWithLabel returns matrix with labels
    29  */
    30  func (t *Table) MatrixWithLabel(features []string, label string, least ...int) (m Matrix, err error) {
    31  	_, m, err = t.MatrixWithLabelIf(features, label, "", nil, least...)
    32  	return
    33  }
    34  
    35  /*
    36  MatrixIf returns two matrices without labels
    37  the first one contains samples with column ifName equal ifValue
    38  the second one - samples with column ifName not equal ifValue
    39  */
    40  func (t *Table) MatrixIf(features []string, ifName string, ifValue interface{}) (m0, m1 Matrix, err error) {
    41  	return t.MatrixWithLabelIf(features, "", ifName, ifValue)
    42  }
    43  
    44  /*
    45  MatrixWithLabelIf returns two matrices with labels
    46  the first one contains samples with column ifName equal ifValue
    47  the second one - samples with column ifName not equal ifValue
    48  */
    49  func (t *Table) MatrixWithLabelIf(features []string, label string, ifName string, ifValue interface{}, least ...int) (test, train Matrix, err error) {
    50  	L := [2]int{0, t.Len()}
    51  	filter := func(int) int { return 1 }
    52  	if ifName != "" {
    53  		if tc, ok := t.ColIfExists(ifName); ok {
    54  			if a, ok := tc.Inspect().([]bool); ok {
    55  				vt := ifValue.(bool)
    56  				filter = func(i int) int {
    57  					if a[i] == vt {
    58  						return 0
    59  					}
    60  					return 1
    61  				}
    62  			} else if a, ok := tc.Inspect().([]int); ok {
    63  				vt := ifValue.(int)
    64  				filter = func(i int) int {
    65  					if a[i] == vt {
    66  						return 0
    67  					}
    68  					return 1
    69  				}
    70  			} else {
    71  				filter = func(i int) int {
    72  					if tc.Index(i).Value == ifValue {
    73  						return 0
    74  					}
    75  					return 1
    76  				}
    77  			}
    78  			l := t.Len()
    79  			L = [2]int{0, 0}
    80  			for i := 0; i < l; i++ {
    81  				L[filter(i)]++
    82  			}
    83  		}
    84  	}
    85  
    86  	width := 0
    87  	for _, n := range features {
    88  		c := t.Col(n)
    89  		if c.Type() == fu.TensorType {
    90  			width += c.Inspect().([]fu.Tensor)[0].Volume()
    91  		} else {
    92  			width++
    93  		}
    94  	}
    95  
    96  	lwidth := 0
    97  
    98  	if label != "" {
    99  		lc := t.Col(label)
   100  		lwidth = 1
   101  		if lc.Type() == fu.TensorType {
   102  			lwidth = lc.Inspect().([]fu.Tensor)[0].Volume()
   103  		}
   104  	}
   105  
   106  	for i := range L {
   107  		if L[i] > 0 {
   108  			L[i] = fu.Maxi(L[i], least...)
   109  		}
   110  	}
   111  
   112  	mx := []Matrix{
   113  		{make([]float32, L[0]*width), make([]float32, L[0]*lwidth), width, L[0], lwidth},
   114  		{make([]float32, L[1]*width), make([]float32, L[1]*lwidth), width, L[1], lwidth},
   115  	}
   116  
   117  	wc := 0
   118  
   119  	for _, n := range features {
   120  		if wc, err = t.addToMatrix(filter, mx, t.Col(n), false, wc, width, t.Len()); err != nil {
   121  			return
   122  		}
   123  	}
   124  
   125  	if lwidth > 0 {
   126  		if _, err = t.addToMatrix(filter, mx, t.Col(label), true, 0, lwidth, t.Len()); err != nil {
   127  			return
   128  		}
   129  	}
   130  
   131  	for i, l := range L {
   132  		if t.Len() < l && t.Len() > 0 {
   133  			for j := t.Len() - 1; j < l; j++ {
   134  				m := mx[i]
   135  				copy(m.Features[j*m.Width:(j+1)*m.Width], m.Features[0:m.Width])
   136  				if m.LabelsWidth > 0 {
   137  					copy(m.Labels[j*m.LabelsWidth:(j+1)*m.LabelsWidth], m.Labels[0:m.LabelsWidth])
   138  				}
   139  			}
   140  		}
   141  	}
   142  
   143  	return mx[0], mx[1], nil
   144  }
   145  
   146  func (t *Table) addToMatrix(f func(int) int, matrix []Matrix, c *Column, label bool, xc, width, length int) (wc int, err error) {
   147  	where := [2][]float32{
   148  		fu.Ife(label, matrix[0].Labels, matrix[0].Features).([]float32),
   149  		fu.Ife(label, matrix[1].Labels, matrix[1].Features).([]float32),
   150  	}
   151  	wc = xc
   152  	z := [2]int{}
   153  	switch c.Type() {
   154  	case fu.Float32:
   155  		x := c.Inspect().([]float32)
   156  		for j := 0; j < length; j++ {
   157  			jf := f(j)
   158  			where[jf][z[jf]*width+wc] = x[j]
   159  			z[jf]++
   160  		}
   161  		wc++
   162  	case fu.Float64:
   163  		x := c.Inspect().([]float64)
   164  		for j := 0; j < length; j++ {
   165  			jf := f(j)
   166  			where[jf][z[jf]*width+wc] = float32(x[j])
   167  			z[jf]++
   168  		}
   169  		wc++
   170  	case fu.Fixed8Type:
   171  		x := c.Inspect().([]fu.Fixed8)
   172  		for j := 0; j < length; j++ {
   173  			jf := f(j)
   174  			where[jf][z[jf]*width+wc] = x[j].Float32()
   175  			z[jf]++
   176  		}
   177  		wc++
   178  	case fu.Int:
   179  		x := c.Inspect().([]int)
   180  		for j := 0; j < length; j++ {
   181  			jf := f(j)
   182  			where[jf][z[jf]*width+wc] = float32(x[j])
   183  			z[jf]++
   184  		}
   185  		wc++
   186  	case fu.TensorType:
   187  		x := c.Inspect().([]fu.Tensor)
   188  		vol := x[0].Volume()
   189  		for j := 0; j < length; j++ {
   190  			if x[j].Volume() != vol {
   191  				err = xerrors.Errorf("tensors with different volumes found in one column")
   192  			}
   193  			jf := f(j)
   194  			m := z[jf]
   195  			t := where[jf]
   196  			switch x[j].Type() {
   197  			case fu.Float32:
   198  				y := x[j].Values().([]float32)
   199  				copy(t[m*width+wc:m*width+wc+vol], y)
   200  			case fu.Float64:
   201  				y := x[j].Values().([]float64)
   202  				for k := 0; k < vol; k++ {
   203  					t[m*width+wc+k] = float32(y[k])
   204  				}
   205  			case fu.Byte:
   206  				y := x[j].Values().([]byte)
   207  				for k := 0; k < vol; k++ {
   208  					t[m*width+wc+k] = float32(y[k]) / 256
   209  				}
   210  			case fu.Fixed8Type:
   211  				y := x[j].Values().([]fu.Fixed8)
   212  				for k := 0; k < vol; k++ {
   213  					t[m*width+wc+k] = y[k].Float32()
   214  				}
   215  			case fu.Int:
   216  				y := x[j].Values().([]int)
   217  				for k := 0; k < vol; k++ {
   218  					t[m*width+wc+k] = float32(y[k])
   219  				}
   220  			default:
   221  				return width, xerrors.Errorf("unsupported tensor type %v", x[j].Type)
   222  			}
   223  			z[jf]++
   224  		}
   225  		wc += vol
   226  	default:
   227  		x := c.ExtractAs(fu.Float32, true).([]float32)
   228  		for j := 0; j < length; j++ {
   229  			jf := f(j)
   230  			where[jf][z[jf]*width+wc] = x[j]
   231  			z[jf]++
   232  		}
   233  		wc++
   234  	}
   235  	return
   236  }
   237  
   238  /*
   239  AsTable converts raw features representation into Table
   240  */
   241  func (m Matrix) AsTable(names ...string) *Table {
   242  	columns := make([]reflect.Value, m.Width)
   243  	na := make([]fu.Bits, m.Width)
   244  	for i := range columns {
   245  		c := make([]float32, m.Length, m.Length)
   246  		for j := 0; j < m.Length; j++ {
   247  			c[j] = m.Features[m.Width*j+i]
   248  		}
   249  		columns[i] = reflect.ValueOf(c)
   250  	}
   251  	return MakeTable(names, columns, na, m.Length)
   252  }
   253  
   254  /*
   255  AsColumn converts raw features representation into Column
   256  */
   257  func (m Matrix) AsColumn() *Column {
   258  	if m.Width == 1 {
   259  		return &Column{column: reflect.ValueOf(m.Features[0:m.Length])}
   260  	}
   261  	column := make([]fu.Tensor, m.Length)
   262  	for i := 0; i < m.Length; i++ {
   263  		column[i] = fu.MakeFloat32Tensor(1, 1, m.Width, m.Features[m.Width*i:m.Width*(i+1)])
   264  	}
   265  	return &Column{column: reflect.ValueOf(column)}
   266  }
   267  
   268  /*
   269  AsLabelColumn converts raw labels representation into Column
   270  */
   271  func (m Matrix) AsLabelColumn() *Column {
   272  	if m.LabelsWidth == 1 {
   273  		return &Column{column: reflect.ValueOf(m.Labels[0:m.Length])}
   274  	}
   275  	column := make([]fu.Tensor, m.Length)
   276  	for i := 0; i < m.Length; i++ {
   277  		column[i] = fu.MakeFloat32Tensor(1, 1, m.LabelsWidth, m.Labels[m.LabelsWidth*i:m.LabelsWidth*(i+1)])
   278  	}
   279  	return &Column{column: reflect.ValueOf(column)}
   280  }
   281  
   282  func MatrixColumn(dat []float32, length int) *Column {
   283  	if length > 0 {
   284  		return Matrix{dat, nil, len(dat) / length, length, 0}.AsColumn()
   285  	}
   286  	return Col([]float32{})
   287  }