github.com/jingcheng-WU/gonum@v0.9.1-0.20210323123734-f1a2a11a8f7b/blas/gonum/gemv.go (about) 1 // Copyright ©2018 The Gonum Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package gonum 6 7 import ( 8 "github.com/jingcheng-WU/gonum/blas" 9 "github.com/jingcheng-WU/gonum/internal/asm/f32" 10 "github.com/jingcheng-WU/gonum/internal/asm/f64" 11 ) 12 13 // TODO(Kunde21): Merge these methods back into level2double/level2single when Sgemv assembly kernels are merged into f32. 14 15 // Dgemv computes 16 // y = alpha * A * x + beta * y if tA = blas.NoTrans 17 // y = alpha * Aᵀ * x + beta * y if tA = blas.Trans or blas.ConjTrans 18 // where A is an m×n dense matrix, x and y are vectors, and alpha and beta are scalars. 19 func (Implementation) Dgemv(tA blas.Transpose, m, n int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) { 20 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 21 panic(badTranspose) 22 } 23 if m < 0 { 24 panic(mLT0) 25 } 26 if n < 0 { 27 panic(nLT0) 28 } 29 if lda < max(1, n) { 30 panic(badLdA) 31 } 32 if incX == 0 { 33 panic(zeroIncX) 34 } 35 if incY == 0 { 36 panic(zeroIncY) 37 } 38 // Set up indexes 39 lenX := m 40 lenY := n 41 if tA == blas.NoTrans { 42 lenX = n 43 lenY = m 44 } 45 46 // Quick return if possible 47 if m == 0 || n == 0 { 48 return 49 } 50 51 if (incX > 0 && (lenX-1)*incX >= len(x)) || (incX < 0 && (1-lenX)*incX >= len(x)) { 52 panic(shortX) 53 } 54 if (incY > 0 && (lenY-1)*incY >= len(y)) || (incY < 0 && (1-lenY)*incY >= len(y)) { 55 panic(shortY) 56 } 57 if len(a) < lda*(m-1)+n { 58 panic(shortA) 59 } 60 61 // Quick return if possible 62 if alpha == 0 && beta == 1 { 63 return 64 } 65 66 if alpha == 0 { 67 // First form y = beta * y 68 if incY > 0 { 69 Implementation{}.Dscal(lenY, beta, y, incY) 70 } else { 71 Implementation{}.Dscal(lenY, beta, y, -incY) 72 } 73 return 74 } 75 76 // Form y = alpha * A * x + y 77 if tA == blas.NoTrans { 78 f64.GemvN(uintptr(m), uintptr(n), alpha, a, uintptr(lda), x, uintptr(incX), beta, y, uintptr(incY)) 79 return 80 } 81 // Cases where a is transposed. 82 f64.GemvT(uintptr(m), uintptr(n), alpha, a, uintptr(lda), x, uintptr(incX), beta, y, uintptr(incY)) 83 } 84 85 // Sgemv computes 86 // y = alpha * A * x + beta * y if tA = blas.NoTrans 87 // y = alpha * Aᵀ * x + beta * y if tA = blas.Trans or blas.ConjTrans 88 // where A is an m×n dense matrix, x and y are vectors, and alpha and beta are scalars. 89 // 90 // Float32 implementations are autogenerated and not directly tested. 91 func (Implementation) Sgemv(tA blas.Transpose, m, n int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) { 92 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 93 panic(badTranspose) 94 } 95 if m < 0 { 96 panic(mLT0) 97 } 98 if n < 0 { 99 panic(nLT0) 100 } 101 if lda < max(1, n) { 102 panic(badLdA) 103 } 104 if incX == 0 { 105 panic(zeroIncX) 106 } 107 if incY == 0 { 108 panic(zeroIncY) 109 } 110 111 // Quick return if possible. 112 if m == 0 || n == 0 { 113 return 114 } 115 116 // Set up indexes 117 lenX := m 118 lenY := n 119 if tA == blas.NoTrans { 120 lenX = n 121 lenY = m 122 } 123 if (incX > 0 && (lenX-1)*incX >= len(x)) || (incX < 0 && (1-lenX)*incX >= len(x)) { 124 panic(shortX) 125 } 126 if (incY > 0 && (lenY-1)*incY >= len(y)) || (incY < 0 && (1-lenY)*incY >= len(y)) { 127 panic(shortY) 128 } 129 if len(a) < lda*(m-1)+n { 130 panic(shortA) 131 } 132 133 // Quick return if possible. 134 if alpha == 0 && beta == 1 { 135 return 136 } 137 138 // First form y = beta * y 139 if incY > 0 { 140 Implementation{}.Sscal(lenY, beta, y, incY) 141 } else { 142 Implementation{}.Sscal(lenY, beta, y, -incY) 143 } 144 145 if alpha == 0 { 146 return 147 } 148 149 var kx, ky int 150 if incX < 0 { 151 kx = -(lenX - 1) * incX 152 } 153 if incY < 0 { 154 ky = -(lenY - 1) * incY 155 } 156 157 // Form y = alpha * A * x + y 158 if tA == blas.NoTrans { 159 if incX == 1 && incY == 1 { 160 for i := 0; i < m; i++ { 161 y[i] += alpha * f32.DotUnitary(a[lda*i:lda*i+n], x[:n]) 162 } 163 return 164 } 165 iy := ky 166 for i := 0; i < m; i++ { 167 y[iy] += alpha * f32.DotInc(x, a[lda*i:lda*i+n], uintptr(n), uintptr(incX), 1, uintptr(kx), 0) 168 iy += incY 169 } 170 return 171 } 172 // Cases where a is transposed. 173 if incX == 1 && incY == 1 { 174 for i := 0; i < m; i++ { 175 tmp := alpha * x[i] 176 if tmp != 0 { 177 f32.AxpyUnitaryTo(y, tmp, a[lda*i:lda*i+n], y[:n]) 178 } 179 } 180 return 181 } 182 ix := kx 183 for i := 0; i < m; i++ { 184 tmp := alpha * x[ix] 185 if tmp != 0 { 186 f32.AxpyInc(tmp, a[lda*i:lda*i+n], y, uintptr(n), 1, uintptr(incY), 0, uintptr(ky)) 187 } 188 ix += incX 189 } 190 }