github.com/jingcheng-WU/gonum@v0.9.1-0.20210323123734-f1a2a11a8f7b/blas/gonum/gemv.go (about)

     1  // Copyright ©2018 The Gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package gonum
     6  
     7  import (
     8  	"github.com/jingcheng-WU/gonum/blas"
     9  	"github.com/jingcheng-WU/gonum/internal/asm/f32"
    10  	"github.com/jingcheng-WU/gonum/internal/asm/f64"
    11  )
    12  
    13  // TODO(Kunde21):  Merge these methods back into level2double/level2single when Sgemv assembly kernels are merged into f32.
    14  
    15  // Dgemv computes
    16  //  y = alpha * A * x + beta * y   if tA = blas.NoTrans
    17  //  y = alpha * Aᵀ * x + beta * y  if tA = blas.Trans or blas.ConjTrans
    18  // where A is an m×n dense matrix, x and y are vectors, and alpha and beta are scalars.
    19  func (Implementation) Dgemv(tA blas.Transpose, m, n int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) {
    20  	if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
    21  		panic(badTranspose)
    22  	}
    23  	if m < 0 {
    24  		panic(mLT0)
    25  	}
    26  	if n < 0 {
    27  		panic(nLT0)
    28  	}
    29  	if lda < max(1, n) {
    30  		panic(badLdA)
    31  	}
    32  	if incX == 0 {
    33  		panic(zeroIncX)
    34  	}
    35  	if incY == 0 {
    36  		panic(zeroIncY)
    37  	}
    38  	// Set up indexes
    39  	lenX := m
    40  	lenY := n
    41  	if tA == blas.NoTrans {
    42  		lenX = n
    43  		lenY = m
    44  	}
    45  
    46  	// Quick return if possible
    47  	if m == 0 || n == 0 {
    48  		return
    49  	}
    50  
    51  	if (incX > 0 && (lenX-1)*incX >= len(x)) || (incX < 0 && (1-lenX)*incX >= len(x)) {
    52  		panic(shortX)
    53  	}
    54  	if (incY > 0 && (lenY-1)*incY >= len(y)) || (incY < 0 && (1-lenY)*incY >= len(y)) {
    55  		panic(shortY)
    56  	}
    57  	if len(a) < lda*(m-1)+n {
    58  		panic(shortA)
    59  	}
    60  
    61  	// Quick return if possible
    62  	if alpha == 0 && beta == 1 {
    63  		return
    64  	}
    65  
    66  	if alpha == 0 {
    67  		// First form y = beta * y
    68  		if incY > 0 {
    69  			Implementation{}.Dscal(lenY, beta, y, incY)
    70  		} else {
    71  			Implementation{}.Dscal(lenY, beta, y, -incY)
    72  		}
    73  		return
    74  	}
    75  
    76  	// Form y = alpha * A * x + y
    77  	if tA == blas.NoTrans {
    78  		f64.GemvN(uintptr(m), uintptr(n), alpha, a, uintptr(lda), x, uintptr(incX), beta, y, uintptr(incY))
    79  		return
    80  	}
    81  	// Cases where a is transposed.
    82  	f64.GemvT(uintptr(m), uintptr(n), alpha, a, uintptr(lda), x, uintptr(incX), beta, y, uintptr(incY))
    83  }
    84  
    85  // Sgemv computes
    86  //  y = alpha * A * x + beta * y   if tA = blas.NoTrans
    87  //  y = alpha * Aᵀ * x + beta * y  if tA = blas.Trans or blas.ConjTrans
    88  // where A is an m×n dense matrix, x and y are vectors, and alpha and beta are scalars.
    89  //
    90  // Float32 implementations are autogenerated and not directly tested.
    91  func (Implementation) Sgemv(tA blas.Transpose, m, n int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) {
    92  	if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
    93  		panic(badTranspose)
    94  	}
    95  	if m < 0 {
    96  		panic(mLT0)
    97  	}
    98  	if n < 0 {
    99  		panic(nLT0)
   100  	}
   101  	if lda < max(1, n) {
   102  		panic(badLdA)
   103  	}
   104  	if incX == 0 {
   105  		panic(zeroIncX)
   106  	}
   107  	if incY == 0 {
   108  		panic(zeroIncY)
   109  	}
   110  
   111  	// Quick return if possible.
   112  	if m == 0 || n == 0 {
   113  		return
   114  	}
   115  
   116  	// Set up indexes
   117  	lenX := m
   118  	lenY := n
   119  	if tA == blas.NoTrans {
   120  		lenX = n
   121  		lenY = m
   122  	}
   123  	if (incX > 0 && (lenX-1)*incX >= len(x)) || (incX < 0 && (1-lenX)*incX >= len(x)) {
   124  		panic(shortX)
   125  	}
   126  	if (incY > 0 && (lenY-1)*incY >= len(y)) || (incY < 0 && (1-lenY)*incY >= len(y)) {
   127  		panic(shortY)
   128  	}
   129  	if len(a) < lda*(m-1)+n {
   130  		panic(shortA)
   131  	}
   132  
   133  	// Quick return if possible.
   134  	if alpha == 0 && beta == 1 {
   135  		return
   136  	}
   137  
   138  	// First form y = beta * y
   139  	if incY > 0 {
   140  		Implementation{}.Sscal(lenY, beta, y, incY)
   141  	} else {
   142  		Implementation{}.Sscal(lenY, beta, y, -incY)
   143  	}
   144  
   145  	if alpha == 0 {
   146  		return
   147  	}
   148  
   149  	var kx, ky int
   150  	if incX < 0 {
   151  		kx = -(lenX - 1) * incX
   152  	}
   153  	if incY < 0 {
   154  		ky = -(lenY - 1) * incY
   155  	}
   156  
   157  	// Form y = alpha * A * x + y
   158  	if tA == blas.NoTrans {
   159  		if incX == 1 && incY == 1 {
   160  			for i := 0; i < m; i++ {
   161  				y[i] += alpha * f32.DotUnitary(a[lda*i:lda*i+n], x[:n])
   162  			}
   163  			return
   164  		}
   165  		iy := ky
   166  		for i := 0; i < m; i++ {
   167  			y[iy] += alpha * f32.DotInc(x, a[lda*i:lda*i+n], uintptr(n), uintptr(incX), 1, uintptr(kx), 0)
   168  			iy += incY
   169  		}
   170  		return
   171  	}
   172  	// Cases where a is transposed.
   173  	if incX == 1 && incY == 1 {
   174  		for i := 0; i < m; i++ {
   175  			tmp := alpha * x[i]
   176  			if tmp != 0 {
   177  				f32.AxpyUnitaryTo(y, tmp, a[lda*i:lda*i+n], y[:n])
   178  			}
   179  		}
   180  		return
   181  	}
   182  	ix := kx
   183  	for i := 0; i < m; i++ {
   184  		tmp := alpha * x[ix]
   185  		if tmp != 0 {
   186  			f32.AxpyInc(tmp, a[lda*i:lda*i+n], y, uintptr(n), 1, uintptr(incY), 0, uintptr(ky))
   187  		}
   188  		ix += incX
   189  	}
   190  }