gorgonia.org/gorgonia@v0.9.17/op_yolo.go

gorgonia.org/gorgonia@v0.9.17/op_yolo.go (about)

     1  package gorgonia
     2  
     3  import (
     4  	"fmt"
     5  	"hash"
     6  	"image"
     7  	"math"
     8  
     9  	"github.com/chewxy/hm"
    10  	"github.com/chewxy/math32"
    11  	"github.com/pkg/errors"
    12  	"gorgonia.org/tensor"
    13  )
    14  
    15  type yoloOp struct {
    16  	anchors     []float32
    17  	masks       []int
    18  	ignoreTresh float32
    19  	dimensions  int
    20  	numClasses  int
    21  	trainMode   bool
    22  }
    23  
    24  func newYoloOp(anchors []float32, masks []int, netSize, numClasses int, ignoreTresh float32, trainMode bool) *yoloOp {
    25  	yoloOp := &yoloOp{
    26  		anchors:     anchors,
    27  		dimensions:  netSize,
    28  		numClasses:  numClasses,
    29  		ignoreTresh: ignoreTresh,
    30  		masks:       masks,
    31  		trainMode:   trainMode,
    32  	}
    33  	return yoloOp
    34  }
    35  
    36  // YOLOv3 https://arxiv.org/abs/1804.02767
    37  func YOLOv3(input *Node, anchors []float32, masks []int, netSize, numClasses int, ignoreTresh float32, targets ...*Node) (*Node, error) {
    38  	if len(targets) > 0 {
    39  		inputSlice, err := Slice(input, S(0), nil, nil, nil)
    40  		if err != nil {
    41  			return nil, errors.Wrap(err, "Can't prepare YOLOv3 node for training mode due Slice() on input node error")
    42  		}
    43  		targetsSlice, err := Slice(targets[0], S(0), nil, nil, nil)
    44  		if err != nil {
    45  			return nil, errors.Wrap(err, "Can't prepare YOLOv3 node for training mode due Slice() on first node in target nodes slice error")
    46  		}
    47  		inputTargetConcat, err := Concat(0, inputSlice, targetsSlice)
    48  		if err != nil {
    49  			return nil, errors.Wrap(err, "Can't prepare YOLOv3 node for training mode due Concat() error")
    50  		}
    51  		concatShp := inputTargetConcat.Shape()
    52  		inputTargetConcat, err = Reshape(inputTargetConcat, []int{1, concatShp[0], concatShp[1], concatShp[2]})
    53  		if err != nil {
    54  			return nil, errors.Wrap(err, "Can't prepare YOLOv3 node for training mode due Reshape() error")
    55  		}
    56  		op := newYoloOp(anchors, masks, netSize, numClasses, ignoreTresh, true)
    57  		return ApplyOp(op, inputTargetConcat)
    58  	}
    59  	op := newYoloOp(anchors, masks, netSize, numClasses, ignoreTresh, false)
    60  	return ApplyOp(op, input)
    61  }
    62  
    63  func (op *yoloOp) Arity() int {
    64  	return 1
    65  }
    66  
    67  func (op *yoloOp) ReturnsPtr() bool { return false }
    68  
    69  func (op *yoloOp) CallsExtern() bool { return false }
    70  
    71  func (op *yoloOp) WriteHash(h hash.Hash) {
    72  	fmt.Fprintf(h, "YOLO{}(anchors: (%v))", op.anchors)
    73  }
    74  func (op *yoloOp) Hashcode() uint32 { return simpleHash(op) }
    75  
    76  func (op *yoloOp) String() string {
    77  	return fmt.Sprintf("YOLO{}(anchors: (%v))", op.anchors)
    78  }
    79  func (op *yoloOp) InferShape(inputs ...DimSizer) (tensor.Shape, error) {
    80  	shp := inputs[0].(tensor.Shape)
    81  	if len(shp) < 4 {
    82  		return nil, fmt.Errorf("InferShape() for YOLO must contain 4 dimensions")
    83  	}
    84  	s := shp.Clone()
    85  	if op.trainMode {
    86  		return []int{s[0], s[2] * s[3] * len(op.masks), (s[1] - 1) / len(op.masks)}, nil
    87  	}
    88  	return []int{s[0], s[2] * s[3] * len(op.masks), s[1] / len(op.masks)}, nil
    89  }
    90  
    91  func (op *yoloOp) Type() hm.Type {
    92  	a := hm.TypeVariable('a')
    93  	t := newTensorType(4, a)
    94  	o := newTensorType(3, a)
    95  	return hm.NewFnType(t, o)
    96  }
    97  
    98  func (op *yoloOp) OverwritesInput() int { return -1 }
    99  
   100  func (op *yoloOp) checkInput(inputs ...Value) (tensor.Tensor, error) {
   101  	if err := checkArity(op, len(inputs)); err != nil {
   102  		return nil, errors.Wrap(err, "Can't check arity for YOLO operation")
   103  	}
   104  	var in tensor.Tensor
   105  	var ok bool
   106  	if in, ok = inputs[0].(tensor.Tensor); !ok {
   107  		return nil, errors.Errorf("Can't check YOLO input: expected input has to be a tensor")
   108  	}
   109  	if in.Shape().Dims() != 4 {
   110  		return nil, errors.Errorf("Can't check YOLO input: expected input must have 4 dimensions")
   111  	}
   112  	return in, nil
   113  }
   114  
   115  func sigmoidSlice(v tensor.View) error {
   116  	switch v.Dtype() {
   117  	case Float32:
   118  		_, err := v.Apply(_sigmoidf32, tensor.UseUnsafe())
   119  		if err != nil {
   120  			return errors.Wrap(err, "Can't apply _sigmoidf32 as activation function to YOLO operation")
   121  		}
   122  	case Float64:
   123  		_, err := v.Apply(_sigmoidf64, tensor.UseUnsafe())
   124  		if err != nil {
   125  			return errors.Wrap(err, "Can't apply _sigmoidf64 as activation function to YOLO operation")
   126  		}
   127  	default:
   128  		return fmt.Errorf("Unsupported numeric type for YOLO sigmoid function. Please use float64 or float32")
   129  	}
   130  	return nil
   131  }
   132  
   133  func expSlice(v tensor.View) error {
   134  	switch v.Dtype() {
   135  	case Float32:
   136  		_, err := v.Apply(math32.Exp, tensor.UseUnsafe())
   137  		if err != nil {
   138  			return errors.Wrap(err, "Can't apply exp32 to YOLO operation")
   139  		}
   140  	case Float64:
   141  		_, err := v.Apply(math.Exp, tensor.UseUnsafe())
   142  		if err != nil {
   143  			return errors.Wrap(err, "Can't apply exp64 to YOLO operation")
   144  		}
   145  	default:
   146  		return fmt.Errorf("Unsupported numeric type for YOLO for exp function. Please use float64 or float32")
   147  	}
   148  	return nil
   149  }
   150  
   151  func (op *yoloOp) Do(inputs ...Value) (retVal Value, err error) {
   152  	if !op.trainMode {
   153  		inputTensor, err := op.checkInput(inputs...)
   154  		if err != nil {
   155  			return nil, errors.Wrap(err, "Can't check YOLO input")
   156  		}
   157  		batchSize := inputTensor.Shape()[0]
   158  		stride := op.dimensions / inputTensor.Shape()[2]
   159  		gridSize := inputTensor.Shape()[2]
   160  		bboxAttributes := 5 + op.numClasses
   161  		numAnchors := len(op.anchors) / 2
   162  		currentAnchors := []float32{}
   163  		for i := range op.masks {
   164  			if op.masks[i] >= numAnchors {
   165  				return nil, fmt.Errorf("Incorrect mask %v for anchors in YOLO layer", op.masks)
   166  			}
   167  			currentAnchors = append(currentAnchors, op.anchors[i*2], op.anchors[i*2+1])
   168  		}
   169  		return op.evaluateYOLO_f32(inputTensor, batchSize, stride, gridSize, bboxAttributes, len(op.masks), currentAnchors)
   170  	}
   171  
   172  	// Training mode
   173  	input, err := op.checkInput(inputs...)
   174  	if err != nil {
   175  		return nil, errors.Wrap(err, "Can't check YOLO input [Training mode]")
   176  	}
   177  	inv, err := input.Slice(nil, S(0, input.Shape()[1]-1), nil, nil)
   178  	if err != nil {
   179  		return nil, errors.Wrap(err, "Can't prepare slice in YOLO (1) [Training mode]")
   180  	}
   181  	numTargets, err := input.At(0, input.Shape()[1]-1, 0, 0)
   182  	if err != nil {
   183  		return nil, errors.Wrap(err, "Can't select targets from YOLO input [Training mode]")
   184  	}
   185  
   186  	batchSize := input.Shape()[0]
   187  	stride := op.dimensions / input.Shape()[2]
   188  	grid := input.Shape()[2]
   189  	bboxAttributes := 5 + op.numClasses
   190  	numAnchors := len(op.masks)
   191  	currentAnchors := []float32{}
   192  	for i := range op.masks {
   193  		if op.masks[i] >= (len(op.anchors) / 2) {
   194  			return nil, fmt.Errorf("Incorrect mask %v for anchors in YOLO layer [Training mode]", op.masks)
   195  		}
   196  		currentAnchors = append(currentAnchors, op.anchors[i*2], op.anchors[i*2+1])
   197  	}
   198  
   199  	targets := []float32{}
   200  	inputNumericType := input.Dtype()
   201  
   202  	switch inputNumericType {
   203  	case Float32:
   204  		lt := int(numTargets.(float32))
   205  		targets = make([]float32, lt)
   206  		for i := 1; i <= lt; i++ {
   207  			valAt, err := input.At(0, input.Shape()[1]-1, i/grid, i%grid)
   208  			if err != nil {
   209  				return nil, fmt.Errorf("Can't select float32 targets for YOLO [Training mode]")
   210  			}
   211  			targets[i-1] = valAt.(float32)
   212  		}
   213  		break
   214  	case Float64:
   215  		lt := int(numTargets.(float64))
   216  		targets = make([]float32, lt)
   217  		for i := 1; i <= lt; i++ {
   218  			valAt, err := input.At(0, input.Shape()[1]-1, i/grid, i%grid)
   219  			if err != nil {
   220  				return nil, fmt.Errorf("Can't select float64 targets for YOLO [Training mode]")
   221  			}
   222  			targets[i-1] = float32(valAt.(float64))
   223  		}
   224  		break
   225  	default:
   226  		return nil, fmt.Errorf("Unsupported numeric type while preparing targets for YOLO Please use float64 or float32 [Training mode]")
   227  	}
   228  
   229  	input = inv.Materialize()
   230  
   231  	err = input.Reshape(batchSize, bboxAttributes*numAnchors, grid*grid)
   232  	if err != nil {
   233  		return nil, errors.Wrap(err, "Can't reshape in YOLO (1) [Training mode]")
   234  	}
   235  	err = input.T(0, 2, 1)
   236  	if err != nil {
   237  		return nil, errors.Wrap(err, "Can't safely transponse in YOLO (1) [Training mode]")
   238  	}
   239  	err = input.Transpose()
   240  	if err != nil {
   241  		return nil, errors.Wrap(err, "Can't transponse in YOLO (1) [Training mode]")
   242  	}
   243  	err = input.Reshape(batchSize, grid*grid*numAnchors, bboxAttributes)
   244  	if err != nil {
   245  		return nil, errors.Wrap(err, "Can't reshape in YOLO (2) [Training mode]")
   246  	}
   247  
   248  	clonedInput := input.Clone().(tensor.Tensor)
   249  	outyolo, err := op.evaluateYOLO_f32(input, batchSize, stride, grid, bboxAttributes, numAnchors, currentAnchors)
   250  	if err != nil {
   251  		return nil, errors.Wrap(err, "Can't evaluate YOLO operation [Training mode]")
   252  	}
   253  
   254  	yoloNumericType := outyolo.Dtype()
   255  	result := &tensor.Dense{}
   256  
   257  	switch yoloNumericType {
   258  	case Float32:
   259  		yoloBBoxesF32 := make([]float32, 0)
   260  		inputF32 := make([]float32, 0)
   261  		err = clonedInput.Reshape(input.Shape()[0] * input.Shape()[1] * input.Shape()[2])
   262  		if err != nil {
   263  			return nil, errors.Wrap(err, "Can't reshape in YOLO (3) [Training mode]")
   264  		}
   265  		err = outyolo.Reshape(outyolo.Shape()[0] * outyolo.Shape()[1] * outyolo.Shape()[2])
   266  		if err != nil {
   267  			return nil, errors.Wrap(err, "Can't reshape in YOLO (3) [Training mode]")
   268  		}
   269  		for i := 0; i < outyolo.Shape()[0]; i++ {
   270  			buf, err := outyolo.At(i)
   271  			if err != nil {
   272  				return nil, errors.Wrap(err, "Can't select value from YOLO output [Training mode]")
   273  			}
   274  			yoloBBoxesF32 = append(yoloBBoxesF32, buf.(float32))
   275  			buf, err = clonedInput.At(i)
   276  			if err != nil {
   277  				return nil, errors.Wrap(err, "Can't select value from YOLO bounding boxes [Training mode]")
   278  			}
   279  			inputF32 = append(inputF32, buf.(float32))
   280  		}
   281  		preparedOut := prepareOutputYOLO_f32(inputF32, yoloBBoxesF32, targets, op.anchors, op.masks, op.numClasses, op.dimensions, grid, op.ignoreTresh)
   282  		result = tensor.New(tensor.WithShape(1, grid*grid*len(op.masks), 5+op.numClasses), tensor.Of(tensor.Float32), tensor.WithBacking(preparedOut))
   283  		break
   284  	case Float64:
   285  		// @todo
   286  		return nil, fmt.Errorf("float64 numeric type is not implemented for preparing result for YOLO [Training mode]")
   287  	default:
   288  		return nil, fmt.Errorf("Unsupported numeric type for preparing result for YOLO. Please use float64 or float32 [Training mode]")
   289  	}
   290  
   291  	return result, nil
   292  }
   293  
   294  func (op *yoloOp) evaluateYOLO_f32(input tensor.Tensor, batchSize, stride, grid, bboxAttrs, numAnchors int, currentAnchors []float32) (retVal tensor.Tensor, err error) {
   295  
   296  	inputNumericType := input.Dtype()
   297  	if inputNumericType != Float32 {
   298  		return nil, fmt.Errorf("evaluateYOLO_f32() called with input tensor of type %v. Float32 is required", inputNumericType)
   299  	}
   300  
   301  	err = input.Reshape(batchSize, bboxAttrs*numAnchors, grid*grid)
   302  	if err != nil {
   303  		return nil, errors.Wrap(err, "Can't make reshape grid^2 for YOLO")
   304  	}
   305  
   306  	err = input.T(0, 2, 1)
   307  	if err != nil {
   308  		return nil, errors.Wrap(err, "Can't safely transponse input for YOLO")
   309  	}
   310  	err = input.Transpose()
   311  	if err != nil {
   312  		return nil, errors.Wrap(err, "Can't transponse input for YOLO")
   313  	}
   314  	err = input.Reshape(batchSize, grid*grid*numAnchors, bboxAttrs)
   315  	if err != nil {
   316  		return nil, errors.Wrap(err, "Can't reshape bbox for YOLO")
   317  	}
   318  
   319  	// Activation of x, y, and objects via sigmoid function
   320  	slXY, err := input.Slice(nil, nil, S(0, 2))
   321  	err = sigmoidSlice(slXY)
   322  	if err != nil {
   323  		return nil, errors.Wrap(err, "Can't activate XY")
   324  	}
   325  	slClasses, err := input.Slice(nil, nil, S(4, 5+op.numClasses))
   326  	err = sigmoidSlice(slClasses)
   327  	if err != nil {
   328  		return nil, errors.Wrap(err, "Can't activate classes")
   329  	}
   330  
   331  	step := grid * numAnchors
   332  	for i := 0; i < grid; i++ {
   333  
   334  		vy, err := input.Slice(nil, S(i*step, i*step+step), S(1))
   335  		if err != nil {
   336  			return nil, errors.Wrap(err, "Can't slice while doing steps for grid")
   337  		}
   338  
   339  		_, err = tensor.Add(vy, float32(i), tensor.UseUnsafe())
   340  		if err != nil {
   341  			return nil, errors.Wrap(err, "Can't do tensor.Add(...) for float32; (1)")
   342  		}
   343  
   344  		for n := 0; n < numAnchors; n++ {
   345  			anchorsSlice, err := input.Slice(nil, S(i*numAnchors+n, input.Shape()[1], step), S(0))
   346  			if err != nil {
   347  				return nil, errors.Wrap(err, "Can't slice anchors while doing steps for grid")
   348  			}
   349  			_, err = tensor.Add(anchorsSlice, float32(i), tensor.UseUnsafe())
   350  			if err != nil {
   351  				return nil, errors.Wrap(err, "Can't do tensor.Add(...) for float32; (1)")
   352  			}
   353  		}
   354  
   355  	}
   356  
   357  	anchors := []float32{}
   358  	for i := 0; i < grid*grid; i++ {
   359  		anchors = append(anchors, currentAnchors...)
   360  	}
   361  
   362  	anchorsTensor := tensor.New(tensor.Of(inputNumericType), tensor.WithShape(1, grid*grid*numAnchors, 2))
   363  	for i := range anchors {
   364  		anchorsTensor.Set(i, anchors[i])
   365  	}
   366  
   367  	_, err = tensor.Div(anchorsTensor, float32(stride), tensor.UseUnsafe())
   368  	if err != nil {
   369  		return nil, errors.Wrap(err, "Can't do tensor.Div(...) for float32")
   370  	}
   371  
   372  	vhw, err := input.Slice(nil, nil, S(2, 4))
   373  	if err != nil {
   374  		return nil, errors.Wrap(err, "Can't do slice on input S(2,4)")
   375  	}
   376  
   377  	_, err = vhw.Apply(math32.Exp, tensor.UseUnsafe())
   378  	if err != nil {
   379  		return nil, errors.Wrap(err, "Can't apply exp32 to YOLO operation")
   380  	}
   381  
   382  	_, err = tensor.Mul(vhw, anchorsTensor, tensor.UseUnsafe())
   383  	if err != nil {
   384  		return nil, errors.Wrap(err, "Can't do tensor.Mul(...) for anchors")
   385  	}
   386  
   387  	vv, err := input.Slice(nil, nil, S(0, 4))
   388  	if err != nil {
   389  		return nil, errors.Wrap(err, "Can't do slice on input S(0,4)")
   390  	}
   391  
   392  	_, err = tensor.Mul(vv, float32(stride), tensor.UseUnsafe())
   393  	if err != nil {
   394  		return nil, errors.Wrap(err, "Can't do tensor.Mul(...) for float32")
   395  	}
   396  
   397  	return input, nil
   398  }
   399  
   400  func iou_f32(r1, r2 image.Rectangle) float32 {
   401  	intersection := r1.Intersect(r2)
   402  	interArea := intersection.Dx() * intersection.Dy()
   403  	r1Area := r1.Dx() * r1.Dy()
   404  	r2Area := r2.Dx() * r2.Dy()
   405  	return float32(interArea) / float32(r1Area+r2Area-interArea)
   406  }
   407  
   408  func getBestIOU_f32(input, target []float32, numClasses, dims int) [][]float32 {
   409  	ious := make([][]float32, 0)
   410  	imgsize := float32(dims)
   411  	for i := 0; i < len(input); i = i + numClasses + 5 {
   412  		ious = append(ious, []float32{0, -1})
   413  		r1 := rectifyBox_f32(input[i], input[i+1], input[i+2], input[i+3], dims)
   414  		for j := 0; j < len(target); j = j + 5 {
   415  			r2 := rectifyBox_f32(target[j+1]*imgsize, target[j+2]*imgsize, target[j+3]*imgsize, target[j+4]*imgsize, dims)
   416  			curiou := iou_f32(r1, r2)
   417  			if curiou > ious[i/(5+numClasses)][0] {
   418  				ious[i/(5+numClasses)][0] = curiou
   419  				ious[i/(5+numClasses)][1] = float32(j / 5)
   420  			}
   421  		}
   422  	}
   423  	return ious
   424  }
   425  
   426  func getBestAnchors_f32(target []float32, anchors []float32, masks []int, dims int, gridSize float32) [][]int {
   427  	bestAnchors := make([][]int, len(target)/5)
   428  	imgsize := float32(dims)
   429  	for j := 0; j < len(target); j = j + 5 {
   430  		targetRect := rectifyBox_f32(0, 0, target[j+3]*imgsize, target[j+4]*imgsize, dims) //not absolutely confident in rectangle sizes
   431  		bestIOU := float32(0.0)
   432  		bestAnchors[j/5] = make([]int, 3)
   433  		for i := 0; i < len(anchors); i = i + 2 {
   434  			anchorRect := rectifyBox_f32(0, 0, anchors[i], anchors[i+1], dims)
   435  			currentIOU := iou_f32(anchorRect, targetRect)
   436  			if currentIOU >= bestIOU {
   437  				bestAnchors[j/5][0] = i
   438  				bestIOU = currentIOU
   439  			}
   440  		}
   441  		bestAnchors[j/5][0] = findIntElement(masks, bestAnchors[j/5][0]/2)
   442  		if bestAnchors[j/5][0] != -1 {
   443  			bestAnchors[j/5][1] = int(target[j+1] * gridSize)
   444  			bestAnchors[j/5][2] = int(target[j+2] * gridSize)
   445  		}
   446  	}
   447  	return bestAnchors
   448  }
   449  
   450  func prepareOutputYOLO_f32(input, yoloBoxes, target, anchors []float32, masks []int, numClasses, dims, gridSize int, ignoreTresh float32) []float32 {
   451  	yoloBBoxes := make([]float32, len(yoloBoxes))
   452  	gridSizeF32 := float32(gridSize)
   453  	bestAnchors := getBestAnchors_f32(target, anchors, masks, dims, gridSizeF32)
   454  	bestIous := getBestIOU_f32(yoloBoxes, target, numClasses, dims)
   455  	for i := 0; i < len(yoloBoxes); i = i + (5 + numClasses) {
   456  		if bestIous[i/(5+numClasses)][0] <= ignoreTresh {
   457  			yoloBBoxes[i+4] = bceLoss32(0, yoloBoxes[i+4])
   458  		}
   459  	}
   460  	for i := 0; i < len(bestAnchors); i++ {
   461  		if bestAnchors[i][0] != -1 {
   462  			scale := (2 - target[i*5+3]*target[i*5+4])
   463  			giInt := bestAnchors[i][1]
   464  			gjInt := bestAnchors[i][2]
   465  			gx := invsigm32(target[i*5+1]*gridSizeF32 - float32(giInt))
   466  			gy := invsigm32(target[i*5+2]*gridSizeF32 - float32(gjInt))
   467  			gw := math32.Log(target[i*5+3]/anchors[bestAnchors[i][0]] + 1e-16)
   468  			gh := math32.Log(target[i*5+4]/anchors[bestAnchors[i][0]+1] + 1e-16)
   469  			bboxIdx := gjInt*gridSize*len(masks) + giInt*len(masks) + bestAnchors[i][0]
   470  			yoloBBoxes[bboxIdx] = mseLoss32(gx, input[bboxIdx], scale)
   471  			yoloBBoxes[bboxIdx+1] = mseLoss32(gy, input[bboxIdx+1], scale)
   472  			yoloBBoxes[bboxIdx+2] = mseLoss32(gw, input[bboxIdx+2], scale)
   473  			yoloBBoxes[bboxIdx+3] = mseLoss32(gh, input[bboxIdx+3], scale)
   474  			yoloBBoxes[bboxIdx+4] = bceLoss32(1, yoloBoxes[bboxIdx+4])
   475  			for j := 0; j < numClasses; j++ {
   476  				if j == int(target[i]) {
   477  					yoloBBoxes[bboxIdx+5+j] = bceLoss32(1, yoloBoxes[bboxIdx+4])
   478  				} else {
   479  					yoloBBoxes[bboxIdx+5+j] = bceLoss32(0, yoloBoxes[bboxIdx+4])
   480  				}
   481  			}
   482  		}
   483  	}
   484  	return yoloBBoxes
   485  }
   486  
   487  func findIntElement(arr []int, ele int) int {
   488  	for i := range arr {
   489  		if arr[i] == ele {
   490  			return i
   491  		}
   492  	}
   493  	return -1
   494  }
   495  
   496  func rectifyBox_f32(x, y, h, w float32, imgSize int) image.Rectangle {
   497  	return image.Rect(maxInt(int(x-w/2), 0), maxInt(int(y-h/2), 0), minInt(int(x+w/2+1), imgSize), minInt(int(y+h/2+1), imgSize))
   498  }
   499  
   500  func bceLoss32(target, pred float32) float32 {
   501  	if target == 1.0 {
   502  		return -(math32.Log(pred + 1e-16))
   503  	}
   504  	return -(math32.Log((1.0 - pred) + 1e-16))
   505  }
   506  
   507  func mseLoss32(target, pred, scale float32) float32 {
   508  	return math32.Pow(scale*(target-pred), 2) / 2.0
   509  }
   510  
   511  func invsigm32(target float32) float32 {
   512  	return -math32.Log(1-target+1e-16) + math32.Log(target+1e-16)
   513  }
   514  
   515  func (op *yoloOp) evaluateYOLO_f64(input tensor.Tensor, batchSize, stride, grid, bboxAttrs, numAnchors int, currentAnchors []float64) (retVal tensor.Tensor, err error) {
   516  	inputNumericType := input.Dtype()
   517  	if inputNumericType != Float64 {
   518  		return nil, fmt.Errorf("evaluateYOLO_f64() called with input tensor of type %v. Float64 is required", inputNumericType)
   519  	}
   520  	err = input.Reshape(batchSize, bboxAttrs*numAnchors, grid*grid)
   521  	if err != nil {
   522  		return nil, errors.Wrap(err, "Can't make reshape grid^2 for YOLO")
   523  	}
   524  	err = input.T(0, 2, 1)
   525  	if err != nil {
   526  		return nil, errors.Wrap(err, "Can't safely transponse input for YOLO")
   527  	}
   528  	err = input.Transpose()
   529  	if err != nil {
   530  		return nil, errors.Wrap(err, "Can't transponse input for YOLO")
   531  	}
   532  	err = input.Reshape(batchSize, grid*grid*numAnchors, bboxAttrs)
   533  	if err != nil {
   534  		return nil, errors.Wrap(err, "Can't reshape bbox for YOLO")
   535  	}
   536  
   537  	// Activation of x, y, and objects via sigmoid function
   538  	slXY, err := input.Slice(nil, nil, S(0, 2))
   539  	err = sigmoidSlice(slXY)
   540  	if err != nil {
   541  		return nil, errors.Wrap(err, "Can't activate XY")
   542  	}
   543  	slClasses, err := input.Slice(nil, nil, S(4, 5+op.numClasses))
   544  	err = sigmoidSlice(slClasses)
   545  	if err != nil {
   546  		return nil, errors.Wrap(err, "Can't activate classes")
   547  	}
   548  
   549  	step := grid * numAnchors
   550  	for i := 0; i < grid; i++ {
   551  		vy, err := input.Slice(nil, S(i*step, i*step+step), S(1))
   552  		if err != nil {
   553  			return nil, errors.Wrap(err, "Can't slice while doing steps for grid")
   554  		}
   555  		_, err = tensor.Add(vy, float64(i), tensor.UseUnsafe())
   556  		if err != nil {
   557  			return nil, errors.Wrap(err, "Can't do tensor.Add(...) for float64; (1)")
   558  		}
   559  		for n := 0; n < numAnchors; n++ {
   560  			anchorsSlice, err := input.Slice(nil, S(i*numAnchors+n, input.Shape()[1], step), S(0))
   561  			if err != nil {
   562  				return nil, errors.Wrap(err, "Can't slice anchors while doing steps for grid")
   563  			}
   564  			_, err = tensor.Add(anchorsSlice, float64(i), tensor.UseUnsafe())
   565  			if err != nil {
   566  				return nil, errors.Wrap(err, "Can't do tensor.Add(...) for float64; (2)")
   567  			}
   568  		}
   569  
   570  	}
   571  
   572  	anchors := []float64{}
   573  	for i := 0; i < grid*grid; i++ {
   574  		anchors = append(anchors, currentAnchors...)
   575  	}
   576  
   577  	anchorsTensor := tensor.New(tensor.Of(inputNumericType), tensor.WithShape(1, grid*grid*numAnchors, 2))
   578  	for i := range anchors {
   579  		anchorsTensor.Set(i, anchors[i])
   580  	}
   581  
   582  	_, err = tensor.Div(anchorsTensor, float64(stride), tensor.UseUnsafe())
   583  	if err != nil {
   584  		return nil, errors.Wrap(err, "Can't do tensor.Div(...) for float64")
   585  	}
   586  
   587  	vhw, err := input.Slice(nil, nil, S(2, 4))
   588  	if err != nil {
   589  		return nil, errors.Wrap(err, "Can't do slice on input S(2,4)")
   590  	}
   591  
   592  	_, err = vhw.Apply(math.Exp, tensor.UseUnsafe())
   593  	if err != nil {
   594  		return nil, errors.Wrap(err, "Can't apply exp64 to YOLO operation")
   595  	}
   596  
   597  	_, err = tensor.Mul(vhw, anchorsTensor, tensor.UseUnsafe())
   598  	if err != nil {
   599  		return nil, errors.Wrap(err, "Can't do tensor.Mul(...) for anchors")
   600  	}
   601  
   602  	vv, err := input.Slice(nil, nil, S(0, 4))
   603  	if err != nil {
   604  		return nil, errors.Wrap(err, "Can't do slice on input S(0,4)")
   605  	}
   606  
   607  	_, err = tensor.Mul(vv, float64(stride), tensor.UseUnsafe())
   608  	if err != nil {
   609  		return nil, errors.Wrap(err, "Can't do tensor.Mul(...) for float64")
   610  	}
   611  
   612  	return input, nil
   613  }
   614  
   615  func iou_f64(r1, r2 image.Rectangle) float64 {
   616  	intersection := r1.Intersect(r2)
   617  	interArea := intersection.Dx() * intersection.Dy()
   618  	r1Area := r1.Dx() * r1.Dy()
   619  	r2Area := r2.Dx() * r2.Dy()
   620  	return float64(interArea) / float64(r1Area+r2Area-interArea)
   621  }
   622  
   623  func getBestIOU_f64(input, target []float64, numClasses, dims int) [][]float64 {
   624  	ious := make([][]float64, 0)
   625  	imgsize := float64(dims)
   626  	for i := 0; i < len(input); i = i + numClasses + 5 {
   627  		ious = append(ious, []float64{0, -1})
   628  		r1 := rectifyBox_f64(input[i], input[i+1], input[i+2], input[i+3], dims)
   629  		for j := 0; j < len(target); j = j + 5 {
   630  			r2 := rectifyBox_f64(target[j+1]*imgsize, target[j+2]*imgsize, target[j+3]*imgsize, target[j+4]*imgsize, dims)
   631  			curiou := iou_f64(r1, r2)
   632  			if curiou > ious[i/(5+numClasses)][0] {
   633  				ious[i/(5+numClasses)][0] = curiou
   634  				ious[i/(5+numClasses)][1] = float64(j / 5)
   635  			}
   636  		}
   637  	}
   638  	return ious
   639  }
   640  
   641  func getBestAnchors_f64(target []float64, anchors []float64, masks []int, dims int, gridSize float64) [][]int {
   642  	bestAnchors := make([][]int, len(target)/5)
   643  	imgsize := float64(dims)
   644  	for j := 0; j < len(target); j = j + 5 {
   645  		targetRect := rectifyBox_f64(0, 0, target[j+3]*imgsize, target[j+4]*imgsize, dims) //not absolutely confident in rectangle sizes
   646  		bestIOU := float64(0.0)
   647  		bestAnchors[j/5] = make([]int, 3)
   648  		for i := 0; i < len(anchors); i = i + 2 {
   649  			anchorRect := rectifyBox_f64(0, 0, anchors[i], anchors[i+1], dims)
   650  			currentIOU := iou_f64(anchorRect, targetRect)
   651  			if currentIOU >= bestIOU {
   652  				bestAnchors[j/5][0] = i
   653  				bestIOU = currentIOU
   654  			}
   655  		}
   656  		bestAnchors[j/5][0] = findIntElement(masks, bestAnchors[j/5][0]/2)
   657  		if bestAnchors[j/5][0] != -1 {
   658  			bestAnchors[j/5][1] = int(target[j+1] * gridSize)
   659  			bestAnchors[j/5][2] = int(target[j+2] * gridSize)
   660  		}
   661  	}
   662  	return bestAnchors
   663  }
   664  
   665  func prepareOutputYOLO_f64(input, yoloBoxes, target, anchors []float64, masks []int, numClasses, dims, gridSize int, ignoreTresh float64) []float64 {
   666  	yoloBBoxes := make([]float64, len(yoloBoxes))
   667  	gridSizeF64 := float64(gridSize)
   668  	bestAnchors := getBestAnchors_f64(target, anchors, masks, dims, gridSizeF64)
   669  	bestIous := getBestIOU_f64(yoloBoxes, target, numClasses, dims)
   670  	for i := 0; i < len(yoloBoxes); i = i + (5 + numClasses) {
   671  		if bestIous[i/(5+numClasses)][0] <= ignoreTresh {
   672  			yoloBBoxes[i+4] = bceLoss64(0, yoloBoxes[i+4])
   673  		}
   674  	}
   675  	for i := 0; i < len(bestAnchors); i++ {
   676  		if bestAnchors[i][0] != -1 {
   677  			scale := (2 - target[i*5+3]*target[i*5+4])
   678  			giInt := bestAnchors[i][1]
   679  			gjInt := bestAnchors[i][2]
   680  			gx := invsigm64(target[i*5+1]*gridSizeF64 - float64(giInt))
   681  			gy := invsigm64(target[i*5+2]*gridSizeF64 - float64(gjInt))
   682  			gw := math.Log(target[i*5+3]/anchors[bestAnchors[i][0]] + 1e-16)
   683  			gh := math.Log(target[i*5+4]/anchors[bestAnchors[i][0]+1] + 1e-16)
   684  			bboxIdx := gjInt*gridSize*len(masks) + giInt*len(masks) + bestAnchors[i][0]
   685  			yoloBBoxes[bboxIdx] = mseLoss64(gx, input[bboxIdx], scale)
   686  			yoloBBoxes[bboxIdx+1] = mseLoss64(gy, input[bboxIdx+1], scale)
   687  			yoloBBoxes[bboxIdx+2] = mseLoss64(gw, input[bboxIdx+2], scale)
   688  			yoloBBoxes[bboxIdx+3] = mseLoss64(gh, input[bboxIdx+3], scale)
   689  			yoloBBoxes[bboxIdx+4] = bceLoss64(1, yoloBoxes[bboxIdx+4])
   690  			for j := 0; j < numClasses; j++ {
   691  				if j == int(target[i]) {
   692  					yoloBBoxes[bboxIdx+5+j] = bceLoss64(1, yoloBoxes[bboxIdx+4])
   693  				} else {
   694  					yoloBBoxes[bboxIdx+5+j] = bceLoss64(0, yoloBoxes[bboxIdx+4])
   695  				}
   696  			}
   697  		}
   698  	}
   699  	return yoloBBoxes
   700  }
   701  
   702  func rectifyBox_f64(x, y, h, w float64, imgSize int) image.Rectangle {
   703  	return image.Rect(maxInt(int(x-w/2), 0), maxInt(int(y-h/2), 0), minInt(int(x+w/2+1), imgSize), minInt(int(y+h/2+1), imgSize))
   704  }
   705  
   706  func bceLoss64(target, pred float64) float64 {
   707  	if target == 1.0 {
   708  		return -(math.Log(pred + 1e-16))
   709  	}
   710  	return -(math.Log((1.0 - pred) + 1e-16))
   711  }
   712  
   713  func mseLoss64(target, pred, scale float64) float64 {
   714  	return math.Pow(scale*(target-pred), 2) / 2.0
   715  }
   716  
   717  func invsigm64(target float64) float64 {
   718  	return -math.Log(1-target+1e-16) + math.Log(target+1e-16)
   719  }