github.com/jingcheng-WU/gonum@v0.9.1-0.20210323123734-f1a2a11a8f7b/lapack/gonum/dgehrd.go (about) 1 // Copyright ©2016 The Gonum Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package gonum 6 7 import ( 8 "github.com/jingcheng-WU/gonum/blas" 9 "github.com/jingcheng-WU/gonum/blas/blas64" 10 "github.com/jingcheng-WU/gonum/lapack" 11 ) 12 13 // Dgehrd reduces a block of a real n×n general matrix A to upper Hessenberg 14 // form H by an orthogonal similarity transformation Qᵀ * A * Q = H. 15 // 16 // The matrix Q is represented as a product of (ihi-ilo) elementary 17 // reflectors 18 // Q = H_{ilo} H_{ilo+1} ... H_{ihi-1}. 19 // Each H_i has the form 20 // H_i = I - tau[i] * v * vᵀ 21 // where v is a real vector with v[0:i+1] = 0, v[i+1] = 1 and v[ihi+1:n] = 0. 22 // v[i+2:ihi+1] is stored on exit in A[i+2:ihi+1,i]. 23 // 24 // On entry, a contains the n×n general matrix to be reduced. On return, the 25 // upper triangle and the first subdiagonal of A will be overwritten with the 26 // upper Hessenberg matrix H, and the elements below the first subdiagonal, with 27 // the slice tau, represent the orthogonal matrix Q as a product of elementary 28 // reflectors. 29 // 30 // The contents of a are illustrated by the following example, with n = 7, ilo = 31 // 1 and ihi = 5. 32 // On entry, 33 // [ a a a a a a a ] 34 // [ a a a a a a ] 35 // [ a a a a a a ] 36 // [ a a a a a a ] 37 // [ a a a a a a ] 38 // [ a a a a a a ] 39 // [ a ] 40 // on return, 41 // [ a a h h h h a ] 42 // [ a h h h h a ] 43 // [ h h h h h h ] 44 // [ v1 h h h h h ] 45 // [ v1 v2 h h h h ] 46 // [ v1 v2 v3 h h h ] 47 // [ a ] 48 // where a denotes an element of the original matrix A, h denotes a 49 // modified element of the upper Hessenberg matrix H, and vi denotes an 50 // element of the vector defining H_i. 51 // 52 // ilo and ihi determine the block of A that will be reduced to upper Hessenberg 53 // form. It must hold that 0 <= ilo <= ihi < n if n > 0, and ilo == 0 and ihi == 54 // -1 if n == 0, otherwise Dgehrd will panic. 55 // 56 // On return, tau will contain the scalar factors of the elementary reflectors. 57 // Elements tau[:ilo] and tau[ihi:] will be set to zero. tau must have length 58 // equal to n-1 if n > 0, otherwise Dgehrd will panic. 59 // 60 // work must have length at least lwork and lwork must be at least max(1,n), 61 // otherwise Dgehrd will panic. On return, work[0] contains the optimal value of 62 // lwork. 63 // 64 // If lwork == -1, instead of performing Dgehrd, only the optimal value of lwork 65 // will be stored in work[0]. 66 // 67 // Dgehrd is an internal routine. It is exported for testing purposes. 68 func (impl Implementation) Dgehrd(n, ilo, ihi int, a []float64, lda int, tau, work []float64, lwork int) { 69 switch { 70 case n < 0: 71 panic(nLT0) 72 case ilo < 0 || max(0, n-1) < ilo: 73 panic(badIlo) 74 case ihi < min(ilo, n-1) || n <= ihi: 75 panic(badIhi) 76 case lda < max(1, n): 77 panic(badLdA) 78 case lwork < max(1, n) && lwork != -1: 79 panic(badLWork) 80 case len(work) < lwork: 81 panic(shortWork) 82 } 83 84 // Quick return if possible. 85 if n == 0 { 86 work[0] = 1 87 return 88 } 89 90 const ( 91 nbmax = 64 92 ldt = nbmax + 1 93 tsize = ldt * nbmax 94 ) 95 // Compute the workspace requirements. 96 nb := min(nbmax, impl.Ilaenv(1, "DGEHRD", " ", n, ilo, ihi, -1)) 97 lwkopt := n*nb + tsize 98 if lwork == -1 { 99 work[0] = float64(lwkopt) 100 return 101 } 102 103 if len(a) < (n-1)*lda+n { 104 panic(shortA) 105 } 106 if len(tau) != n-1 { 107 panic(badLenTau) 108 } 109 110 // Set tau[:ilo] and tau[ihi:] to zero. 111 for i := 0; i < ilo; i++ { 112 tau[i] = 0 113 } 114 for i := ihi; i < n-1; i++ { 115 tau[i] = 0 116 } 117 118 // Quick return if possible. 119 nh := ihi - ilo + 1 120 if nh <= 1 { 121 work[0] = 1 122 return 123 } 124 125 // Determine the block size. 126 nbmin := 2 127 var nx int 128 if 1 < nb && nb < nh { 129 // Determine when to cross over from blocked to unblocked code 130 // (last block is always handled by unblocked code). 131 nx = max(nb, impl.Ilaenv(3, "DGEHRD", " ", n, ilo, ihi, -1)) 132 if nx < nh { 133 // Determine if workspace is large enough for blocked code. 134 if lwork < n*nb+tsize { 135 // Not enough workspace to use optimal nb: 136 // determine the minimum value of nb, and reduce 137 // nb or force use of unblocked code. 138 nbmin = max(2, impl.Ilaenv(2, "DGEHRD", " ", n, ilo, ihi, -1)) 139 if lwork >= n*nbmin+tsize { 140 nb = (lwork - tsize) / n 141 } else { 142 nb = 1 143 } 144 } 145 } 146 } 147 ldwork := nb // work is used as an n×nb matrix. 148 149 var i int 150 if nb < nbmin || nh <= nb { 151 // Use unblocked code below. 152 i = ilo 153 } else { 154 // Use blocked code. 155 bi := blas64.Implementation() 156 iwt := n * nb // Size of the matrix Y and index where the matrix T starts in work. 157 for i = ilo; i < ihi-nx; i += nb { 158 ib := min(nb, ihi-i) 159 160 // Reduce columns [i:i+ib] to Hessenberg form, returning the 161 // matrices V and T of the block reflector H = I - V*T*Vᵀ 162 // which performs the reduction, and also the matrix Y = A*V*T. 163 impl.Dlahr2(ihi+1, i+1, ib, a[i:], lda, tau[i:], work[iwt:], ldt, work, ldwork) 164 165 // Apply the block reflector H to A[:ihi+1,i+ib:ihi+1] from the 166 // right, computing A := A - Y * Vᵀ. V[i+ib,i+ib-1] must be set 167 // to 1. 168 ei := a[(i+ib)*lda+i+ib-1] 169 a[(i+ib)*lda+i+ib-1] = 1 170 bi.Dgemm(blas.NoTrans, blas.Trans, ihi+1, ihi-i-ib+1, ib, 171 -1, work, ldwork, 172 a[(i+ib)*lda+i:], lda, 173 1, a[i+ib:], lda) 174 a[(i+ib)*lda+i+ib-1] = ei 175 176 // Apply the block reflector H to A[0:i+1,i+1:i+ib-1] from the 177 // right. 178 bi.Dtrmm(blas.Right, blas.Lower, blas.Trans, blas.Unit, i+1, ib-1, 179 1, a[(i+1)*lda+i:], lda, work, ldwork) 180 for j := 0; j <= ib-2; j++ { 181 bi.Daxpy(i+1, -1, work[j:], ldwork, a[i+j+1:], lda) 182 } 183 184 // Apply the block reflector H to A[i+1:ihi+1,i+ib:n] from the 185 // left. 186 impl.Dlarfb(blas.Left, blas.Trans, lapack.Forward, lapack.ColumnWise, 187 ihi-i, n-i-ib, ib, 188 a[(i+1)*lda+i:], lda, work[iwt:], ldt, a[(i+1)*lda+i+ib:], lda, work, ldwork) 189 } 190 } 191 // Use unblocked code to reduce the rest of the matrix. 192 impl.Dgehd2(n, i, ihi, a, lda, tau, work) 193 work[0] = float64(lwkopt) 194 }