github.com/gonum/lapack@v0.0.0-20181123203213-e4cdc5a0bff9/native/dormqr.go (about) 1 // Copyright ©2015 The gonum Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package native 6 7 import ( 8 "github.com/gonum/blas" 9 "github.com/gonum/lapack" 10 ) 11 12 // Dormqr multiplies an m×n matrix C by an orthogonal matrix Q as 13 // C = Q * C, if side == blas.Left and trans == blas.NoTrans, 14 // C = Q^T * C, if side == blas.Left and trans == blas.Trans, 15 // C = C * Q, if side == blas.Right and trans == blas.NoTrans, 16 // C = C * Q^T, if side == blas.Right and trans == blas.Trans, 17 // where Q is defined as the product of k elementary reflectors 18 // Q = H_0 * H_1 * ... * H_{k-1}. 19 // 20 // If side == blas.Left, A is an m×k matrix and 0 <= k <= m. 21 // If side == blas.Right, A is an n×k matrix and 0 <= k <= n. 22 // The ith column of A contains the vector which defines the elementary 23 // reflector H_i and tau[i] contains its scalar factor. tau must have length k 24 // and Dormqr will panic otherwise. Dgeqrf returns A and tau in the required 25 // form. 26 // 27 // work must have length at least max(1,lwork), and lwork must be at least n if 28 // side == blas.Left and at least m if side == blas.Right, otherwise Dormqr will 29 // panic. 30 // 31 // work is temporary storage, and lwork specifies the usable memory length. At 32 // minimum, lwork >= m if side == blas.Left and lwork >= n if side == 33 // blas.Right, and this function will panic otherwise. Larger values of lwork 34 // will generally give better performance. On return, work[0] will contain the 35 // optimal value of lwork. 36 // 37 // If lwork is -1, instead of performing Dormqr, the optimal workspace size will 38 // be stored into work[0]. 39 func (impl Implementation) Dormqr(side blas.Side, trans blas.Transpose, m, n, k int, a []float64, lda int, tau, c []float64, ldc int, work []float64, lwork int) { 40 var nq, nw int 41 switch side { 42 default: 43 panic(badSide) 44 case blas.Left: 45 nq = m 46 nw = n 47 case blas.Right: 48 nq = n 49 nw = m 50 } 51 switch { 52 case trans != blas.NoTrans && trans != blas.Trans: 53 panic(badTrans) 54 case m < 0 || n < 0: 55 panic(negDimension) 56 case k < 0 || nq < k: 57 panic("lapack: invalid value of k") 58 case len(work) < lwork: 59 panic(shortWork) 60 case lwork < max(1, nw) && lwork != -1: 61 panic(badWork) 62 } 63 if lwork != -1 { 64 checkMatrix(nq, k, a, lda) 65 checkMatrix(m, n, c, ldc) 66 if len(tau) != k { 67 panic(badTau) 68 } 69 } 70 71 if m == 0 || n == 0 || k == 0 { 72 work[0] = 1 73 return 74 } 75 76 const ( 77 nbmax = 64 78 ldt = nbmax 79 tsize = nbmax * ldt 80 ) 81 opts := string(side) + string(trans) 82 nb := min(nbmax, impl.Ilaenv(1, "DORMQR", opts, m, n, k, -1)) 83 lworkopt := max(1, nw)*nb + tsize 84 if lwork == -1 { 85 work[0] = float64(lworkopt) 86 return 87 } 88 89 nbmin := 2 90 if 1 < nb && nb < k { 91 if lwork < nw*nb+tsize { 92 nb = (lwork - tsize) / nw 93 nbmin = max(2, impl.Ilaenv(2, "DORMQR", opts, m, n, k, -1)) 94 } 95 } 96 97 if nb < nbmin || k <= nb { 98 // Call unblocked code. 99 impl.Dorm2r(side, trans, m, n, k, a, lda, tau, c, ldc, work) 100 work[0] = float64(lworkopt) 101 return 102 } 103 104 var ( 105 ldwork = nb 106 left = side == blas.Left 107 notran = trans == blas.NoTrans 108 ) 109 switch { 110 case left && notran: 111 for i := ((k - 1) / nb) * nb; i >= 0; i -= nb { 112 ib := min(nb, k-i) 113 impl.Dlarft(lapack.Forward, lapack.ColumnWise, m-i, ib, 114 a[i*lda+i:], lda, 115 tau[i:], 116 work[:tsize], ldt) 117 impl.Dlarfb(side, trans, lapack.Forward, lapack.ColumnWise, m-i, n, ib, 118 a[i*lda+i:], lda, 119 work[:tsize], ldt, 120 c[i*ldc:], ldc, 121 work[tsize:], ldwork) 122 } 123 124 case left && !notran: 125 for i := 0; i < k; i += nb { 126 ib := min(nb, k-i) 127 impl.Dlarft(lapack.Forward, lapack.ColumnWise, m-i, ib, 128 a[i*lda+i:], lda, 129 tau[i:], 130 work[:tsize], ldt) 131 impl.Dlarfb(side, trans, lapack.Forward, lapack.ColumnWise, m-i, n, ib, 132 a[i*lda+i:], lda, 133 work[:tsize], ldt, 134 c[i*ldc:], ldc, 135 work[tsize:], ldwork) 136 } 137 138 case !left && notran: 139 for i := 0; i < k; i += nb { 140 ib := min(nb, k-i) 141 impl.Dlarft(lapack.Forward, lapack.ColumnWise, n-i, ib, 142 a[i*lda+i:], lda, 143 tau[i:], 144 work[:tsize], ldt) 145 impl.Dlarfb(side, trans, lapack.Forward, lapack.ColumnWise, m, n-i, ib, 146 a[i*lda+i:], lda, 147 work[:tsize], ldt, 148 c[i:], ldc, 149 work[tsize:], ldwork) 150 } 151 152 case !left && !notran: 153 for i := ((k - 1) / nb) * nb; i >= 0; i -= nb { 154 ib := min(nb, k-i) 155 impl.Dlarft(lapack.Forward, lapack.ColumnWise, n-i, ib, 156 a[i*lda+i:], lda, 157 tau[i:], 158 work[:tsize], ldt) 159 impl.Dlarfb(side, trans, lapack.Forward, lapack.ColumnWise, m, n-i, ib, 160 a[i*lda+i:], lda, 161 work[:tsize], ldt, 162 c[i:], ldc, 163 work[tsize:], ldwork) 164 } 165 } 166 work[0] = float64(lworkopt) 167 }