github.com/gonum/lapack@v0.0.0-20181123203213-e4cdc5a0bff9/native/dlaqps.go

github.com/gonum/lapack@v0.0.0-20181123203213-e4cdc5a0bff9/native/dlaqps.go (about)

     1  // Copyright ©2017 The gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package native
     6  
     7  import (
     8  	"math"
     9  
    10  	"github.com/gonum/blas"
    11  	"github.com/gonum/blas/blas64"
    12  )
    13  
    14  // Dlaqps computes a step of QR factorization with column pivoting
    15  // of an m×n matrix A by using Blas-3. It tries to factorize nb
    16  // columns from A starting from the row offset, and updates all
    17  // of the matrix with Dgemm.
    18  //
    19  // In some cases, due to catastrophic cancellations, it cannot
    20  // factorize nb columns. Hence, the actual number of factorized
    21  // columns is returned in kb.
    22  //
    23  // Dlaqps computes a QR factorization with column pivoting of the
    24  // block A[offset:m, 0:nb] of the m×n matrix A. The block
    25  // A[0:offset, 0:n] is accordingly pivoted, but not factorized.
    26  //
    27  // On exit, the upper triangle of block A[offset:m, 0:kb] is the
    28  // triangular factor obtained. The elements in block A[offset:m, 0:n]
    29  // below the diagonal, together with tau, represent the orthogonal
    30  // matrix Q as a product of elementary reflectors.
    31  //
    32  // offset is number of rows of the matrix A that must be pivoted but
    33  // not factorized. offset must not be negative otherwise Dlaqps will panic.
    34  //
    35  // On exit, jpvt holds the permutation that was applied; the jth column
    36  // of A*P was the jpvt[j] column of A. jpvt must have length n,
    37  // otherwise Dlapqs will panic.
    38  //
    39  // On exit tau holds the scalar factors of the elementary reflectors.
    40  // It must have length nb, otherwise Dlapqs will panic.
    41  //
    42  // vn1 and vn2 hold the partial and complete column norms respectively.
    43  // They must have length n, otherwise Dlapqs will panic.
    44  //
    45  // auxv must have length nb, otherwise Dlaqps will panic.
    46  //
    47  // f and ldf represent an n×nb matrix F that is overwritten during the
    48  // call.
    49  //
    50  // Dlaqps is an internal routine. It is exported for testing purposes.
    51  func (impl Implementation) Dlaqps(m, n, offset, nb int, a []float64, lda int, jpvt []int, tau, vn1, vn2, auxv, f []float64, ldf int) (kb int) {
    52  	checkMatrix(m, n, a, lda)
    53  	checkMatrix(n, nb, f, ldf)
    54  	if offset > m {
    55  		panic(offsetGTM)
    56  	}
    57  	if n < 0 || nb > n {
    58  		panic(badNb)
    59  	}
    60  	if len(jpvt) != n {
    61  		panic(badIpiv)
    62  	}
    63  	if len(tau) < nb {
    64  		panic(badTau)
    65  	}
    66  	if len(vn1) < n {
    67  		panic(badVn1)
    68  	}
    69  	if len(vn2) < n {
    70  		panic(badVn2)
    71  	}
    72  	if len(auxv) < nb {
    73  		panic(badAuxv)
    74  	}
    75  
    76  	lastrk := min(m, n+offset)
    77  	lsticc := -1
    78  	tol3z := math.Sqrt(dlamchE)
    79  
    80  	bi := blas64.Implementation()
    81  
    82  	var k, rk int
    83  	for ; k < nb && lsticc == -1; k++ {
    84  		rk = offset + k
    85  
    86  		// Determine kth pivot column and swap if necessary.
    87  		p := k + bi.Idamax(n-k, vn1[k:], 1)
    88  		if p != k {
    89  			bi.Dswap(m, a[p:], lda, a[k:], lda)
    90  			bi.Dswap(k, f[p*ldf:], 1, f[k*ldf:], 1)
    91  			jpvt[p], jpvt[k] = jpvt[k], jpvt[p]
    92  			vn1[p] = vn1[k]
    93  			vn2[p] = vn2[k]
    94  		}
    95  
    96  		// Apply previous Householder reflectors to column K:
    97  		//
    98  		// A[rk:m, k] = A[rk:m, k] - A[rk:m, 0:k-1]*F[k, 0:k-1]^T.
    99  		if k > 0 {
   100  			bi.Dgemv(blas.NoTrans, m-rk, k, -1,
   101  				a[rk*lda:], lda,
   102  				f[k*ldf:], 1,
   103  				1,
   104  				a[rk*lda+k:], lda)
   105  		}
   106  
   107  		// Generate elementary reflector H_k.
   108  		if rk < m-1 {
   109  			a[rk*lda+k], tau[k] = impl.Dlarfg(m-rk, a[rk*lda+k], a[(rk+1)*lda+k:], lda)
   110  		} else {
   111  			tau[k] = 0
   112  		}
   113  
   114  		akk := a[rk*lda+k]
   115  		a[rk*lda+k] = 1
   116  
   117  		// Compute kth column of F:
   118  		//
   119  		// Compute F[k+1:n, k] = tau[k]*A[rk:m, k+1:n]^T*A[rk:m, k].
   120  		if k < n-1 {
   121  			bi.Dgemv(blas.Trans, m-rk, n-k-1, tau[k],
   122  				a[rk*lda+k+1:], lda,
   123  				a[rk*lda+k:], lda,
   124  				0,
   125  				f[(k+1)*ldf+k:], ldf)
   126  		}
   127  
   128  		// Padding F[0:k, k] with zeros.
   129  		for j := 0; j < k; j++ {
   130  			f[j*ldf+k] = 0
   131  		}
   132  
   133  		// Incremental updating of F:
   134  		//
   135  		// F[0:n, k] := F[0:n, k] - tau[k]*F[0:n, 0:k-1]*A[rk:m, 0:k-1]^T*A[rk:m,k].
   136  		if k > 0 {
   137  			bi.Dgemv(blas.Trans, m-rk, k, -tau[k],
   138  				a[rk*lda:], lda,
   139  				a[rk*lda+k:], lda,
   140  				0,
   141  				auxv, 1)
   142  			bi.Dgemv(blas.NoTrans, n, k, 1,
   143  				f, ldf,
   144  				auxv, 1,
   145  				1,
   146  				f[k:], ldf)
   147  		}
   148  
   149  		// Update the current row of A:
   150  		//
   151  		// A[rk, k+1:n] = A[rk, k+1:n] - A[rk, 0:k]*F[k+1:n, 0:k]^T.
   152  		if k < n-1 {
   153  			bi.Dgemv(blas.NoTrans, n-k-1, k+1, -1,
   154  				f[(k+1)*ldf:], ldf,
   155  				a[rk*lda:], 1,
   156  				1,
   157  				a[rk*lda+k+1:], 1)
   158  		}
   159  
   160  		// Update partial column norms.
   161  		if rk < lastrk-1 {
   162  			for j := k + 1; j < n; j++ {
   163  				if vn1[j] == 0 {
   164  					continue
   165  				}
   166  
   167  				// The following marked lines follow from the
   168  				// analysis in Lapack Working Note 176.
   169  				r := math.Abs(a[rk*lda+j]) / vn1[j] // *
   170  				temp := math.Max(0, 1-r*r)          // *
   171  				r = vn1[j] / vn2[j]                 // *
   172  				temp2 := temp * r * r               // *
   173  				if temp2 < tol3z {
   174  					// vn2 is used here as a collection of
   175  					// indices into vn2 and also a collection
   176  					// of column norms.
   177  					vn2[j] = float64(lsticc)
   178  					lsticc = j
   179  				} else {
   180  					vn1[j] *= math.Sqrt(temp) // *
   181  				}
   182  			}
   183  		}
   184  
   185  		a[rk*lda+k] = akk
   186  	}
   187  	kb = k
   188  	rk = offset + kb
   189  
   190  	// Apply the block reflector to the rest of the matrix:
   191  	//
   192  	// A[offset+kb+1:m, kb+1:n] := A[offset+kb+1:m, kb+1:n] - A[offset+kb+1:m, 1:kb]*F[kb+1:n, 1:kb]^T.
   193  	if kb < min(n, m-offset) {
   194  		bi.Dgemm(blas.NoTrans, blas.Trans,
   195  			m-rk, n-kb, kb, -1,
   196  			a[rk*lda:], lda,
   197  			f[kb*ldf:], ldf,
   198  			1,
   199  			a[rk*lda+kb:], lda)
   200  	}
   201  
   202  	// Recomputation of difficult columns.
   203  	for lsticc >= 0 {
   204  		itemp := int(vn2[lsticc])
   205  
   206  		// NOTE: The computation of vn1[lsticc] relies on the fact that
   207  		// Dnrm2 does not fail on vectors with norm below the value of
   208  		// sqrt(dlamchS)
   209  		v := bi.Dnrm2(m-rk, a[rk*lda+lsticc:], lda)
   210  		vn1[lsticc] = v
   211  		vn2[lsticc] = v
   212  
   213  		lsticc = itemp
   214  	}
   215  
   216  	return kb
   217  }