gorgonia.org/gorgonia@v0.9.17/weights.go

gorgonia.org/gorgonia@v0.9.17/weights.go (about)

     1  package gorgonia
     2  
     3  import (
     4  	"math"
     5  	"reflect"
     6  	"time"
     7  
     8  	rng "github.com/leesper/go_rng"
     9  	"github.com/pkg/errors"
    10  	"gorgonia.org/tensor"
    11  )
    12  
    13  // This file provides several weight initialization utility functions.
    14  // It uses the rng package by leesper
    15  
    16  // InitWFn is a type of helper function to help initialize weights vector/matrices.
    17  // It generates the backing required for the tensors.
    18  //
    19  // It's typically used in closures
    20  type InitWFn func(dt tensor.Dtype, s ...int) interface{}
    21  
    22  // Zeroes creates an InitWfn that populates a Value with... zeroes. I don't know what you expected.
    23  func Zeroes() InitWFn {
    24  	f := func(dt tensor.Dtype, s ...int) interface{} {
    25  		size := tensor.Shape(s).TotalSize()
    26  		switch dt {
    27  		case tensor.Float64:
    28  			return make([]float64, size)
    29  		case tensor.Float32:
    30  			return make([]float32, size)
    31  		case tensor.Int:
    32  			return make([]int, size)
    33  		default:
    34  			return reflect.MakeSlice(reflect.SliceOf(dt.Type), size, size).Interface()
    35  		}
    36  	}
    37  	return f
    38  }
    39  
    40  // Ones creates an InitWfn that populates a Value with ones. See Zeroes() for more explanation.
    41  func Ones() InitWFn {
    42  	return func(dt tensor.Dtype, s ...int) interface{} { return ones(dt, s...).Data() }
    43  }
    44  
    45  // RangedFrom creates an InitWFn that populates a Value starting with the provided start, increamenting the number for each element in the value by 1
    46  func RangedFrom(start int) InitWFn {
    47  	f := func(dt tensor.Dtype, s ...int) interface{} {
    48  		size := tensor.Shape(s).TotalSize()
    49  		return tensor.Range(dt, start, start+size)
    50  	}
    51  	return f
    52  }
    53  
    54  // ValuesOf creates an InitWrn that populates a value with val. This function will cause a panic if val's type is incompatible with the values type.
    55  func ValuesOf(val interface{}) InitWFn {
    56  	f := func(dt tensor.Dtype, s ...int) interface{} {
    57  		size := tensor.Shape(s).TotalSize()
    58  
    59  		switch dt {
    60  		case tensor.Float64:
    61  			v := val.(float64)
    62  			retVal := make([]float64, size)
    63  			for i := range retVal {
    64  				retVal[i] = v
    65  			}
    66  			return retVal
    67  		case tensor.Float32:
    68  			v := val.(float32)
    69  			retVal := make([]float32, size)
    70  			for i := range retVal {
    71  				retVal[i] = v
    72  			}
    73  			return retVal
    74  		case tensor.Int:
    75  			v := val.(int)
    76  			retVal := make([]int, size)
    77  			for i := range retVal {
    78  				retVal[i] = v
    79  			}
    80  			return retVal
    81  		default:
    82  			err := errors.Errorf(nyiTypeFail, "Zeroes", dt)
    83  			panic(err)
    84  		}
    85  	}
    86  	return f
    87  }
    88  
    89  // Gaussian creates a InitWFn with the specified parameters.
    90  // Example Usage:
    91  //		w := NewMatrix(g, Float64, WithName("w"), WithShape(2,2), WithInit(Gaussian(0, 1)))
    92  // This will create a backing slice of []float64, with the length of 4, and its values are drawn from a gaussian distro
    93  func Gaussian(mean, stdev float64) InitWFn {
    94  	f := func(dt tensor.Dtype, s ...int) interface{} {
    95  		switch dt {
    96  		case tensor.Float64:
    97  			return Gaussian64(mean, stdev, s...)
    98  		case tensor.Float32:
    99  			return Gaussian32(mean, stdev, s...)
   100  		default:
   101  			err := errors.Errorf(nyiTypeFail, "Gaussian init", dt)
   102  			panic(err)
   103  		}
   104  	}
   105  	return f
   106  }
   107  
   108  // Uniform creates a InitWFn with the specified parameters.
   109  // Example Usage:
   110  //		w := NewMatrix(g, Float64, WithName("w"), WithShape(2,2), WithInit(Uniform(-1, 1)))
   111  // This will create a backing slice of []float64, with the length of 4, and its values are drawn from a uniform distro
   112  func Uniform(low, high float64) InitWFn {
   113  	f := func(dt tensor.Dtype, s ...int) interface{} {
   114  		switch dt {
   115  		case tensor.Float64:
   116  			return Uniform64(low, high, s...)
   117  		case tensor.Float32:
   118  			return Uniform32(low, high, s...)
   119  		default:
   120  			err := errors.Errorf(nyiTypeFail, "Uniform init", dt)
   121  			panic(err)
   122  		}
   123  	}
   124  	return f
   125  }
   126  
   127  // GlorotN creates a InitWFn that populates a Value with weights normally sampled using Glorot et al.'s algorithm
   128  func GlorotN(gain float64) InitWFn {
   129  	f := func(dt tensor.Dtype, s ...int) interface{} {
   130  		switch dt {
   131  		case tensor.Float64:
   132  			return GlorotEtAlN64(gain, s...)
   133  		case tensor.Float32:
   134  			return GlorotEtAlN32(gain, s...)
   135  		default:
   136  			err := errors.Errorf(nyiTypeFail, "GlorotN", dt)
   137  			panic(err)
   138  		}
   139  	}
   140  	return f
   141  }
   142  
   143  // GlorotU creates a InitWFn that populates a Value with weights uniformly sampled using Glorot et al.'s algorithm
   144  func GlorotU(gain float64) InitWFn {
   145  	f := func(dt tensor.Dtype, s ...int) interface{} {
   146  		switch dt {
   147  		case tensor.Float64:
   148  			return GlorotEtAlU64(gain, s...)
   149  		case tensor.Float32:
   150  			return GlorotEtAlU32(gain, s...)
   151  		default:
   152  			err := errors.Errorf(nyiTypeFail, "GlorotU", dt)
   153  			panic(err)
   154  		}
   155  	}
   156  	return f
   157  }
   158  
   159  func HeN(gain float64) InitWFn {
   160  	f := func(dt tensor.Dtype, s ...int) interface{} {
   161  		switch dt {
   162  		case tensor.Float64:
   163  			return HeEtAlN64(gain, s...)
   164  		default:
   165  			err := errors.Errorf(nyiTypeFail, "HeNormal", dt)
   166  			panic(err)
   167  		}
   168  	}
   169  	return f
   170  }
   171  
   172  func HeU(gain float64) InitWFn {
   173  	f := func(dt tensor.Dtype, s ...int) interface{} {
   174  		switch dt {
   175  		case tensor.Float64:
   176  			return HeEtAlU64(gain, s...)
   177  		default:
   178  			err := errors.Errorf(nyiTypeFail, "HeUniform", dt)
   179  			panic(err)
   180  		}
   181  	}
   182  	return f
   183  }
   184  
   185  // Gaussian64 returns a []float64 drawn from a gaussian distribution as defined by the mean and stdev
   186  func Gaussian64(mean, stdev float64, s ...int) []float64 {
   187  	size := tensor.Shape(s).TotalSize()
   188  
   189  	rand := rng.NewGaussianGenerator(time.Now().UnixNano())
   190  	retVal := make([]float64, size)
   191  	for i := range retVal {
   192  		retVal[i] = rand.Gaussian(mean, stdev)
   193  	}
   194  	return retVal
   195  }
   196  
   197  // Gaussian32 returns a []float32 drawn from a gaussian distribution as defined by the mean and stdev
   198  func Gaussian32(mean, stdev float64, s ...int) []float32 {
   199  	size := tensor.Shape(s).TotalSize()
   200  
   201  	rand := rng.NewGaussianGenerator(time.Now().UnixNano())
   202  	retVal := make([]float32, size)
   203  	for i := range retVal {
   204  		retVal[i] = float32(rand.Gaussian(mean, stdev))
   205  	}
   206  	return retVal
   207  }
   208  
   209  // Uniform64 returns a []float64 drawn from a uniform distribution between [low, high) that is provided
   210  func Uniform64(low, high float64, s ...int) []float64 {
   211  	size := tensor.Shape(s).TotalSize()
   212  
   213  	rand := rng.NewUniformGenerator(time.Now().UnixNano())
   214  	retVal := make([]float64, size)
   215  	for i := range retVal {
   216  		retVal[i] = rand.Float64Range(low, high)
   217  	}
   218  	return retVal
   219  }
   220  
   221  // Uniform32 returns a []float64 drawn from a uniform distribution between [low, high) that is provided
   222  func Uniform32(low, high float64, s ...int) []float32 {
   223  	size := tensor.Shape(s).TotalSize()
   224  	l := float32(low)
   225  	h := float32(high)
   226  
   227  	rand := rng.NewUniformGenerator(time.Now().UnixNano())
   228  	retVal := make([]float32, size)
   229  	for i := range retVal {
   230  		retVal[i] = rand.Float32Range(l, h)
   231  	}
   232  	return retVal
   233  }
   234  
   235  // Binomial64 returns a []float64 drawn from a binomial distribution given the trial and probability parameters.
   236  func Binomial64(trials, prob float64, s ...int) []float64 {
   237  	size := tensor.Shape(s).TotalSize()
   238  	t := int64(trials)
   239  
   240  	rand := rng.NewBinomialGenerator(time.Now().UnixNano())
   241  	retVal := make([]float64, size)
   242  	for i := range retVal {
   243  		retVal[i] = float64(rand.Binomial(t, prob))
   244  	}
   245  	return retVal
   246  }
   247  
   248  // Binomial32 returns a []float32 drawn from a binomial distribution given the trial and probability parameters.
   249  func Binomial32(trials, prob float64, s ...int) []float32 {
   250  	size := tensor.Shape(s).TotalSize()
   251  	t := int64(trials)
   252  
   253  	rand := rng.NewBinomialGenerator(time.Now().UnixNano())
   254  	retVal := make([]float32, size)
   255  	for i := range retVal {
   256  		retVal[i] = float32(rand.Binomial(t, prob))
   257  	}
   258  	return retVal
   259  }
   260  
   261  /* SOPHISTICATED INITIALIZATION STRATEGIES */
   262  
   263  // GlorotEtAlN64 returns float64 weights sampled from a normal distribution
   264  // using the methods specified in Glorot et. al (2010).
   265  // See also: http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf
   266  func GlorotEtAlN64(gain float64, s ...int) []float64 {
   267  	var n1, n2 int
   268  	fieldSize := 1
   269  	switch len(s) {
   270  	case 0:
   271  		panic("Glorot Normal only works with Tensors of dimensions >= 1")
   272  	case 1:
   273  		// treat it as a col vec
   274  		n1 = 1
   275  		n2 = s[0]
   276  	default:
   277  		n1, n2 = s[0], s[1]
   278  		for _, v := range s[2:] {
   279  			fieldSize *= v
   280  		}
   281  	}
   282  
   283  	size := tensor.Shape(s).TotalSize()
   284  	fanIn := float64((n1 + n2) * fieldSize)
   285  
   286  	stdev := gain * math.Sqrt(2.0/fanIn)
   287  
   288  	rand := rng.NewGaussianGenerator(time.Now().UnixNano())
   289  	retVal := make([]float64, size)
   290  	for i := range retVal {
   291  		retVal[i] = rand.Gaussian(0.0, stdev)
   292  	}
   293  	return retVal
   294  }
   295  
   296  // GlorotEtAlN32 returns float32 weights sampled from a normal distribution
   297  // using the methods specified in Glorot et. al (2010).
   298  // See also: http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf
   299  func GlorotEtAlN32(gain float64, s ...int) []float32 {
   300  	f64 := GlorotEtAlN64(gain, s...)
   301  	retVal := make([]float32, len(f64))
   302  	for i, v := range f64 {
   303  		retVal[i] = float32(v)
   304  	}
   305  	return retVal
   306  }
   307  
   308  // GlorotEtAlU64 returns float64 weights sampled from a uniform distribution
   309  // using the methods specified in Glorot et. al (2010).
   310  // See also: http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf
   311  //
   312  // For best results, use:
   313  // 		1.0 for gain for weights that will be used in linear and/or sigmoid units
   314  //		math.Sqrt(2.0) for gain for weights that will be used in ReLU units
   315  //		math.Sqrt(2.0 / (1+alpha*alpha)) for ReLU that are leaky with alpha
   316  func GlorotEtAlU64(gain float64, s ...int) []float64 {
   317  	var n1, n2 int
   318  	fieldSize := 1
   319  	switch len(s) {
   320  	case 0:
   321  		panic("Glorot Uniform only works with Tensors of dimensions >= 1")
   322  	case 1:
   323  		// treat it as a col vec
   324  		n1 = 1
   325  		n2 = s[0]
   326  	default:
   327  		n1, n2 = s[0], s[1]
   328  		for _, v := range s[2:] {
   329  			fieldSize *= v
   330  		}
   331  	}
   332  
   333  	size := tensor.Shape(s).TotalSize()
   334  	fanIn := float64((n1 + n2) * fieldSize)
   335  
   336  	stdev := gain * math.Sqrt(2.0/fanIn)
   337  	lo := 0.0 - math.Sqrt(3.0)*stdev
   338  	hi := 0.0 + math.Sqrt(3.0)*stdev
   339  
   340  	rand := rng.NewUniformGenerator(time.Now().UnixNano())
   341  	retVal := make([]float64, size)
   342  	for i := range retVal {
   343  		retVal[i] = rand.Float64Range(lo, hi)
   344  	}
   345  	return retVal
   346  }
   347  
   348  // GlorotEtAlU32 returns float32 weights sampled from a uniform distribution
   349  // using the methods specified in Glorot et. al (2010).
   350  // See also: http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf
   351  //
   352  // For best results, use:
   353  // 		1.0 for gain for weights that will be used in linear and/or sigmoid units
   354  //		math.Sqrt(2.0) for gain for weights that will be used in ReLU units
   355  //		math.Sqrt(2.0 / (1+alpha*alpha)) for ReLU that are leaky with alpha
   356  func GlorotEtAlU32(gain float64, s ...int) []float32 {
   357  	f64 := GlorotEtAlN64(gain, s...)
   358  	retVal := make([]float32, len(f64))
   359  	for i, v := range f64 {
   360  		retVal[i] = float32(v)
   361  	}
   362  	return retVal
   363  }
   364  
   365  // HeEtAlN64 returns float64 weights sampled from a normal distro, using the methods
   366  // described in He et al (2015). The formula is:
   367  //		randn(n) * sqrt(2/n)
   368  // See also https://arxiv.org/abs/1502.01852
   369  //
   370  // For best results, use:
   371  // 		1.0 for gain for weights that will be used in linear and/or sigmoid units
   372  //		math.Sqrt(2.0) for gain for weights that will be used in ReLU units
   373  //		math.Sqrt(2.0 / (1+alpha*alpha)) for ReLU that are leaky with alpha
   374  func HeEtAlN64(gain float64, s ...int) []float64 {
   375  	var fanIn float64
   376  
   377  	switch len(s) {
   378  	case 0, 1:
   379  		panic("He et al only works with Tensors of dimensions >= 2")
   380  	case 2:
   381  		fanIn = float64(s[0])
   382  	default:
   383  		fanIn = 1.0
   384  		for _, v := range s[1:] {
   385  			fanIn *= float64(v)
   386  		}
   387  	}
   388  
   389  	size := tensor.Shape(s).TotalSize()
   390  	stdev := gain * math.Sqrt(1.0/fanIn)
   391  
   392  	rand := rng.NewGaussianGenerator(time.Now().UnixNano())
   393  	retVal := make([]float64, size)
   394  	for i := range retVal {
   395  		retVal[i] = rand.Gaussian(0.0, stdev)
   396  	}
   397  	return retVal
   398  }
   399  
   400  // HeEtAlU64 returns float64 weights sampled from a uniform distro, using the methods
   401  // described in He et al (2015). The formula is:
   402  //		randn(n) * sqrt(2/n)
   403  // See also https://arxiv.org/abs/1502.01852
   404  //
   405  // For best results, use:
   406  // 		1.0 for gain for weights that will be used in linear and/or sigmoid units
   407  //		math.Sqrt(2.0) for gain for weights that will be used in ReLU units
   408  //		math.Sqrt(2.0 / (1+alpha*alpha)) for ReLU that are leaky with alpha
   409  func HeEtAlU64(gain float64, s ...int) []float64 {
   410  	var fanIn float64
   411  
   412  	switch len(s) {
   413  	case 0, 1:
   414  		panic("He et al only works with Tensors of dimensions >= 2")
   415  	case 2:
   416  		fanIn = float64(s[0])
   417  	default:
   418  		fanIn = 1.0
   419  		for _, v := range s[1:] {
   420  			fanIn *= float64(v)
   421  		}
   422  	}
   423  
   424  	size := tensor.Shape(s).TotalSize()
   425  	stdev := gain * math.Sqrt(1.0/fanIn)
   426  
   427  	lo := 0.0 - math.Sqrt(3.0)*stdev
   428  	hi := 0.0 + math.Sqrt(3.0)*stdev
   429  
   430  	rand := rng.NewUniformGenerator(time.Now().UnixNano())
   431  	retVal := make([]float64, size)
   432  	for i := range retVal {
   433  		retVal[i] = rand.Float64Range(lo, hi)
   434  	}
   435  	return retVal
   436  }