gorgonia.org/gorgonia@v0.9.17/weights.go (about) 1 package gorgonia 2 3 import ( 4 "math" 5 "reflect" 6 "time" 7 8 rng "github.com/leesper/go_rng" 9 "github.com/pkg/errors" 10 "gorgonia.org/tensor" 11 ) 12 13 // This file provides several weight initialization utility functions. 14 // It uses the rng package by leesper 15 16 // InitWFn is a type of helper function to help initialize weights vector/matrices. 17 // It generates the backing required for the tensors. 18 // 19 // It's typically used in closures 20 type InitWFn func(dt tensor.Dtype, s ...int) interface{} 21 22 // Zeroes creates an InitWfn that populates a Value with... zeroes. I don't know what you expected. 23 func Zeroes() InitWFn { 24 f := func(dt tensor.Dtype, s ...int) interface{} { 25 size := tensor.Shape(s).TotalSize() 26 switch dt { 27 case tensor.Float64: 28 return make([]float64, size) 29 case tensor.Float32: 30 return make([]float32, size) 31 case tensor.Int: 32 return make([]int, size) 33 default: 34 return reflect.MakeSlice(reflect.SliceOf(dt.Type), size, size).Interface() 35 } 36 } 37 return f 38 } 39 40 // Ones creates an InitWfn that populates a Value with ones. See Zeroes() for more explanation. 41 func Ones() InitWFn { 42 return func(dt tensor.Dtype, s ...int) interface{} { return ones(dt, s...).Data() } 43 } 44 45 // RangedFrom creates an InitWFn that populates a Value starting with the provided start, increamenting the number for each element in the value by 1 46 func RangedFrom(start int) InitWFn { 47 f := func(dt tensor.Dtype, s ...int) interface{} { 48 size := tensor.Shape(s).TotalSize() 49 return tensor.Range(dt, start, start+size) 50 } 51 return f 52 } 53 54 // ValuesOf creates an InitWrn that populates a value with val. This function will cause a panic if val's type is incompatible with the values type. 55 func ValuesOf(val interface{}) InitWFn { 56 f := func(dt tensor.Dtype, s ...int) interface{} { 57 size := tensor.Shape(s).TotalSize() 58 59 switch dt { 60 case tensor.Float64: 61 v := val.(float64) 62 retVal := make([]float64, size) 63 for i := range retVal { 64 retVal[i] = v 65 } 66 return retVal 67 case tensor.Float32: 68 v := val.(float32) 69 retVal := make([]float32, size) 70 for i := range retVal { 71 retVal[i] = v 72 } 73 return retVal 74 case tensor.Int: 75 v := val.(int) 76 retVal := make([]int, size) 77 for i := range retVal { 78 retVal[i] = v 79 } 80 return retVal 81 default: 82 err := errors.Errorf(nyiTypeFail, "Zeroes", dt) 83 panic(err) 84 } 85 } 86 return f 87 } 88 89 // Gaussian creates a InitWFn with the specified parameters. 90 // Example Usage: 91 // w := NewMatrix(g, Float64, WithName("w"), WithShape(2,2), WithInit(Gaussian(0, 1))) 92 // This will create a backing slice of []float64, with the length of 4, and its values are drawn from a gaussian distro 93 func Gaussian(mean, stdev float64) InitWFn { 94 f := func(dt tensor.Dtype, s ...int) interface{} { 95 switch dt { 96 case tensor.Float64: 97 return Gaussian64(mean, stdev, s...) 98 case tensor.Float32: 99 return Gaussian32(mean, stdev, s...) 100 default: 101 err := errors.Errorf(nyiTypeFail, "Gaussian init", dt) 102 panic(err) 103 } 104 } 105 return f 106 } 107 108 // Uniform creates a InitWFn with the specified parameters. 109 // Example Usage: 110 // w := NewMatrix(g, Float64, WithName("w"), WithShape(2,2), WithInit(Uniform(-1, 1))) 111 // This will create a backing slice of []float64, with the length of 4, and its values are drawn from a uniform distro 112 func Uniform(low, high float64) InitWFn { 113 f := func(dt tensor.Dtype, s ...int) interface{} { 114 switch dt { 115 case tensor.Float64: 116 return Uniform64(low, high, s...) 117 case tensor.Float32: 118 return Uniform32(low, high, s...) 119 default: 120 err := errors.Errorf(nyiTypeFail, "Uniform init", dt) 121 panic(err) 122 } 123 } 124 return f 125 } 126 127 // GlorotN creates a InitWFn that populates a Value with weights normally sampled using Glorot et al.'s algorithm 128 func GlorotN(gain float64) InitWFn { 129 f := func(dt tensor.Dtype, s ...int) interface{} { 130 switch dt { 131 case tensor.Float64: 132 return GlorotEtAlN64(gain, s...) 133 case tensor.Float32: 134 return GlorotEtAlN32(gain, s...) 135 default: 136 err := errors.Errorf(nyiTypeFail, "GlorotN", dt) 137 panic(err) 138 } 139 } 140 return f 141 } 142 143 // GlorotU creates a InitWFn that populates a Value with weights uniformly sampled using Glorot et al.'s algorithm 144 func GlorotU(gain float64) InitWFn { 145 f := func(dt tensor.Dtype, s ...int) interface{} { 146 switch dt { 147 case tensor.Float64: 148 return GlorotEtAlU64(gain, s...) 149 case tensor.Float32: 150 return GlorotEtAlU32(gain, s...) 151 default: 152 err := errors.Errorf(nyiTypeFail, "GlorotU", dt) 153 panic(err) 154 } 155 } 156 return f 157 } 158 159 func HeN(gain float64) InitWFn { 160 f := func(dt tensor.Dtype, s ...int) interface{} { 161 switch dt { 162 case tensor.Float64: 163 return HeEtAlN64(gain, s...) 164 default: 165 err := errors.Errorf(nyiTypeFail, "HeNormal", dt) 166 panic(err) 167 } 168 } 169 return f 170 } 171 172 func HeU(gain float64) InitWFn { 173 f := func(dt tensor.Dtype, s ...int) interface{} { 174 switch dt { 175 case tensor.Float64: 176 return HeEtAlU64(gain, s...) 177 default: 178 err := errors.Errorf(nyiTypeFail, "HeUniform", dt) 179 panic(err) 180 } 181 } 182 return f 183 } 184 185 // Gaussian64 returns a []float64 drawn from a gaussian distribution as defined by the mean and stdev 186 func Gaussian64(mean, stdev float64, s ...int) []float64 { 187 size := tensor.Shape(s).TotalSize() 188 189 rand := rng.NewGaussianGenerator(time.Now().UnixNano()) 190 retVal := make([]float64, size) 191 for i := range retVal { 192 retVal[i] = rand.Gaussian(mean, stdev) 193 } 194 return retVal 195 } 196 197 // Gaussian32 returns a []float32 drawn from a gaussian distribution as defined by the mean and stdev 198 func Gaussian32(mean, stdev float64, s ...int) []float32 { 199 size := tensor.Shape(s).TotalSize() 200 201 rand := rng.NewGaussianGenerator(time.Now().UnixNano()) 202 retVal := make([]float32, size) 203 for i := range retVal { 204 retVal[i] = float32(rand.Gaussian(mean, stdev)) 205 } 206 return retVal 207 } 208 209 // Uniform64 returns a []float64 drawn from a uniform distribution between [low, high) that is provided 210 func Uniform64(low, high float64, s ...int) []float64 { 211 size := tensor.Shape(s).TotalSize() 212 213 rand := rng.NewUniformGenerator(time.Now().UnixNano()) 214 retVal := make([]float64, size) 215 for i := range retVal { 216 retVal[i] = rand.Float64Range(low, high) 217 } 218 return retVal 219 } 220 221 // Uniform32 returns a []float64 drawn from a uniform distribution between [low, high) that is provided 222 func Uniform32(low, high float64, s ...int) []float32 { 223 size := tensor.Shape(s).TotalSize() 224 l := float32(low) 225 h := float32(high) 226 227 rand := rng.NewUniformGenerator(time.Now().UnixNano()) 228 retVal := make([]float32, size) 229 for i := range retVal { 230 retVal[i] = rand.Float32Range(l, h) 231 } 232 return retVal 233 } 234 235 // Binomial64 returns a []float64 drawn from a binomial distribution given the trial and probability parameters. 236 func Binomial64(trials, prob float64, s ...int) []float64 { 237 size := tensor.Shape(s).TotalSize() 238 t := int64(trials) 239 240 rand := rng.NewBinomialGenerator(time.Now().UnixNano()) 241 retVal := make([]float64, size) 242 for i := range retVal { 243 retVal[i] = float64(rand.Binomial(t, prob)) 244 } 245 return retVal 246 } 247 248 // Binomial32 returns a []float32 drawn from a binomial distribution given the trial and probability parameters. 249 func Binomial32(trials, prob float64, s ...int) []float32 { 250 size := tensor.Shape(s).TotalSize() 251 t := int64(trials) 252 253 rand := rng.NewBinomialGenerator(time.Now().UnixNano()) 254 retVal := make([]float32, size) 255 for i := range retVal { 256 retVal[i] = float32(rand.Binomial(t, prob)) 257 } 258 return retVal 259 } 260 261 /* SOPHISTICATED INITIALIZATION STRATEGIES */ 262 263 // GlorotEtAlN64 returns float64 weights sampled from a normal distribution 264 // using the methods specified in Glorot et. al (2010). 265 // See also: http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf 266 func GlorotEtAlN64(gain float64, s ...int) []float64 { 267 var n1, n2 int 268 fieldSize := 1 269 switch len(s) { 270 case 0: 271 panic("Glorot Normal only works with Tensors of dimensions >= 1") 272 case 1: 273 // treat it as a col vec 274 n1 = 1 275 n2 = s[0] 276 default: 277 n1, n2 = s[0], s[1] 278 for _, v := range s[2:] { 279 fieldSize *= v 280 } 281 } 282 283 size := tensor.Shape(s).TotalSize() 284 fanIn := float64((n1 + n2) * fieldSize) 285 286 stdev := gain * math.Sqrt(2.0/fanIn) 287 288 rand := rng.NewGaussianGenerator(time.Now().UnixNano()) 289 retVal := make([]float64, size) 290 for i := range retVal { 291 retVal[i] = rand.Gaussian(0.0, stdev) 292 } 293 return retVal 294 } 295 296 // GlorotEtAlN32 returns float32 weights sampled from a normal distribution 297 // using the methods specified in Glorot et. al (2010). 298 // See also: http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf 299 func GlorotEtAlN32(gain float64, s ...int) []float32 { 300 f64 := GlorotEtAlN64(gain, s...) 301 retVal := make([]float32, len(f64)) 302 for i, v := range f64 { 303 retVal[i] = float32(v) 304 } 305 return retVal 306 } 307 308 // GlorotEtAlU64 returns float64 weights sampled from a uniform distribution 309 // using the methods specified in Glorot et. al (2010). 310 // See also: http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf 311 // 312 // For best results, use: 313 // 1.0 for gain for weights that will be used in linear and/or sigmoid units 314 // math.Sqrt(2.0) for gain for weights that will be used in ReLU units 315 // math.Sqrt(2.0 / (1+alpha*alpha)) for ReLU that are leaky with alpha 316 func GlorotEtAlU64(gain float64, s ...int) []float64 { 317 var n1, n2 int 318 fieldSize := 1 319 switch len(s) { 320 case 0: 321 panic("Glorot Uniform only works with Tensors of dimensions >= 1") 322 case 1: 323 // treat it as a col vec 324 n1 = 1 325 n2 = s[0] 326 default: 327 n1, n2 = s[0], s[1] 328 for _, v := range s[2:] { 329 fieldSize *= v 330 } 331 } 332 333 size := tensor.Shape(s).TotalSize() 334 fanIn := float64((n1 + n2) * fieldSize) 335 336 stdev := gain * math.Sqrt(2.0/fanIn) 337 lo := 0.0 - math.Sqrt(3.0)*stdev 338 hi := 0.0 + math.Sqrt(3.0)*stdev 339 340 rand := rng.NewUniformGenerator(time.Now().UnixNano()) 341 retVal := make([]float64, size) 342 for i := range retVal { 343 retVal[i] = rand.Float64Range(lo, hi) 344 } 345 return retVal 346 } 347 348 // GlorotEtAlU32 returns float32 weights sampled from a uniform distribution 349 // using the methods specified in Glorot et. al (2010). 350 // See also: http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf 351 // 352 // For best results, use: 353 // 1.0 for gain for weights that will be used in linear and/or sigmoid units 354 // math.Sqrt(2.0) for gain for weights that will be used in ReLU units 355 // math.Sqrt(2.0 / (1+alpha*alpha)) for ReLU that are leaky with alpha 356 func GlorotEtAlU32(gain float64, s ...int) []float32 { 357 f64 := GlorotEtAlN64(gain, s...) 358 retVal := make([]float32, len(f64)) 359 for i, v := range f64 { 360 retVal[i] = float32(v) 361 } 362 return retVal 363 } 364 365 // HeEtAlN64 returns float64 weights sampled from a normal distro, using the methods 366 // described in He et al (2015). The formula is: 367 // randn(n) * sqrt(2/n) 368 // See also https://arxiv.org/abs/1502.01852 369 // 370 // For best results, use: 371 // 1.0 for gain for weights that will be used in linear and/or sigmoid units 372 // math.Sqrt(2.0) for gain for weights that will be used in ReLU units 373 // math.Sqrt(2.0 / (1+alpha*alpha)) for ReLU that are leaky with alpha 374 func HeEtAlN64(gain float64, s ...int) []float64 { 375 var fanIn float64 376 377 switch len(s) { 378 case 0, 1: 379 panic("He et al only works with Tensors of dimensions >= 2") 380 case 2: 381 fanIn = float64(s[0]) 382 default: 383 fanIn = 1.0 384 for _, v := range s[1:] { 385 fanIn *= float64(v) 386 } 387 } 388 389 size := tensor.Shape(s).TotalSize() 390 stdev := gain * math.Sqrt(1.0/fanIn) 391 392 rand := rng.NewGaussianGenerator(time.Now().UnixNano()) 393 retVal := make([]float64, size) 394 for i := range retVal { 395 retVal[i] = rand.Gaussian(0.0, stdev) 396 } 397 return retVal 398 } 399 400 // HeEtAlU64 returns float64 weights sampled from a uniform distro, using the methods 401 // described in He et al (2015). The formula is: 402 // randn(n) * sqrt(2/n) 403 // See also https://arxiv.org/abs/1502.01852 404 // 405 // For best results, use: 406 // 1.0 for gain for weights that will be used in linear and/or sigmoid units 407 // math.Sqrt(2.0) for gain for weights that will be used in ReLU units 408 // math.Sqrt(2.0 / (1+alpha*alpha)) for ReLU that are leaky with alpha 409 func HeEtAlU64(gain float64, s ...int) []float64 { 410 var fanIn float64 411 412 switch len(s) { 413 case 0, 1: 414 panic("He et al only works with Tensors of dimensions >= 2") 415 case 2: 416 fanIn = float64(s[0]) 417 default: 418 fanIn = 1.0 419 for _, v := range s[1:] { 420 fanIn *= float64(v) 421 } 422 } 423 424 size := tensor.Shape(s).TotalSize() 425 stdev := gain * math.Sqrt(1.0/fanIn) 426 427 lo := 0.0 - math.Sqrt(3.0)*stdev 428 hi := 0.0 + math.Sqrt(3.0)*stdev 429 430 rand := rng.NewUniformGenerator(time.Now().UnixNano()) 431 retVal := make([]float64, size) 432 for i := range retVal { 433 retVal[i] = rand.Float64Range(lo, hi) 434 } 435 return retVal 436 }