gonum.org/v1/gonum@v0.14.0/lapack/gonum/dsytrd.go (about)

     1  // Copyright ©2016 The Gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package gonum
     6  
     7  import (
     8  	"gonum.org/v1/gonum/blas"
     9  	"gonum.org/v1/gonum/blas/blas64"
    10  )
    11  
    12  // Dsytrd reduces a symmetric n×n matrix A to symmetric tridiagonal form by an
    13  // orthogonal similarity transformation
    14  //
    15  //	Qᵀ * A * Q = T
    16  //
    17  // where Q is an orthonormal matrix and T is symmetric and tridiagonal.
    18  //
    19  // On entry, a contains the elements of the input matrix in the triangle specified
    20  // by uplo. On exit, the diagonal and sub/super-diagonal are overwritten by the
    21  // corresponding elements of the tridiagonal matrix T. The remaining elements in
    22  // the triangle, along with the array tau, contain the data to construct Q as
    23  // the product of elementary reflectors.
    24  //
    25  // If uplo == blas.Upper, Q is constructed with
    26  //
    27  //	Q = H_{n-2} * ... * H_1 * H_0
    28  //
    29  // where
    30  //
    31  //	H_i = I - tau_i * v * vᵀ
    32  //
    33  // v is constructed as v[i+1:n] = 0, v[i] = 1, v[0:i-1] is stored in A[0:i-1, i+1].
    34  // The elements of A are
    35  //
    36  //	[ d   e  v1  v2  v3]
    37  //	[     d   e  v2  v3]
    38  //	[         d   e  v3]
    39  //	[             d   e]
    40  //	[                 e]
    41  //
    42  // If uplo == blas.Lower, Q is constructed with
    43  //
    44  //	Q = H_0 * H_1 * ... * H_{n-2}
    45  //
    46  // where
    47  //
    48  //	H_i = I - tau_i * v * vᵀ
    49  //
    50  // v is constructed as v[0:i+1] = 0, v[i+1] = 1, v[i+2:n] is stored in A[i+2:n, i].
    51  // The elements of A are
    52  //
    53  //	[ d                ]
    54  //	[ e   d            ]
    55  //	[v0   e   d        ]
    56  //	[v0  v1   e   d    ]
    57  //	[v0  v1  v2   e   d]
    58  //
    59  // d must have length n, and e and tau must have length n-1. Dsytrd will panic if
    60  // these conditions are not met.
    61  //
    62  // work is temporary storage, and lwork specifies the usable memory length. At minimum,
    63  // lwork >= 1, and Dsytrd will panic otherwise. The amount of blocking is
    64  // limited by the usable length.
    65  // If lwork == -1, instead of computing Dsytrd the optimal work length is stored
    66  // into work[0].
    67  //
    68  // Dsytrd is an internal routine. It is exported for testing purposes.
    69  func (impl Implementation) Dsytrd(uplo blas.Uplo, n int, a []float64, lda int, d, e, tau, work []float64, lwork int) {
    70  	switch {
    71  	case uplo != blas.Upper && uplo != blas.Lower:
    72  		panic(badUplo)
    73  	case n < 0:
    74  		panic(nLT0)
    75  	case lda < max(1, n):
    76  		panic(badLdA)
    77  	case lwork < 1 && lwork != -1:
    78  		panic(badLWork)
    79  	case len(work) < max(1, lwork):
    80  		panic(shortWork)
    81  	}
    82  
    83  	// Quick return if possible.
    84  	if n == 0 {
    85  		work[0] = 1
    86  		return
    87  	}
    88  
    89  	nb := impl.Ilaenv(1, "DSYTRD", string(uplo), n, -1, -1, -1)
    90  	lworkopt := n * nb
    91  	if lwork == -1 {
    92  		work[0] = float64(lworkopt)
    93  		return
    94  	}
    95  
    96  	switch {
    97  	case len(a) < (n-1)*lda+n:
    98  		panic(shortA)
    99  	case len(d) < n:
   100  		panic(shortD)
   101  	case len(e) < n-1:
   102  		panic(shortE)
   103  	case len(tau) < n-1:
   104  		panic(shortTau)
   105  	}
   106  
   107  	bi := blas64.Implementation()
   108  
   109  	nx := n
   110  	iws := 1
   111  	var ldwork int
   112  	if 1 < nb && nb < n {
   113  		// Determine when to cross over from blocked to unblocked code. The last
   114  		// block is always handled by unblocked code.
   115  		nx = max(nb, impl.Ilaenv(3, "DSYTRD", string(uplo), n, -1, -1, -1))
   116  		if nx < n {
   117  			// Determine if workspace is large enough for blocked code.
   118  			ldwork = nb
   119  			iws = n * ldwork
   120  			if lwork < iws {
   121  				// Not enough workspace to use optimal nb: determine the minimum
   122  				// value of nb and reduce nb or force use of unblocked code by
   123  				// setting nx = n.
   124  				nb = max(lwork/n, 1)
   125  				nbmin := impl.Ilaenv(2, "DSYTRD", string(uplo), n, -1, -1, -1)
   126  				if nb < nbmin {
   127  					nx = n
   128  				}
   129  			}
   130  		} else {
   131  			nx = n
   132  		}
   133  	} else {
   134  		nb = 1
   135  	}
   136  	ldwork = nb
   137  
   138  	if uplo == blas.Upper {
   139  		// Reduce the upper triangle of A. Columns 0:kk are handled by the
   140  		// unblocked method.
   141  		var i int
   142  		kk := n - ((n-nx+nb-1)/nb)*nb
   143  		for i = n - nb; i >= kk; i -= nb {
   144  			// Reduce columns i:i+nb to tridiagonal form and form the matrix W
   145  			// which is needed to update the unreduced part of the matrix.
   146  			impl.Dlatrd(uplo, i+nb, nb, a, lda, e, tau, work, ldwork)
   147  
   148  			// Update the unreduced submatrix A[0:i-1,0:i-1], using an update
   149  			// of the form A = A - V*Wᵀ - W*Vᵀ.
   150  			bi.Dsyr2k(uplo, blas.NoTrans, i, nb, -1, a[i:], lda, work, ldwork, 1, a, lda)
   151  
   152  			// Copy superdiagonal elements back into A, and diagonal elements into D.
   153  			for j := i; j < i+nb; j++ {
   154  				a[(j-1)*lda+j] = e[j-1]
   155  				d[j] = a[j*lda+j]
   156  			}
   157  		}
   158  		// Use unblocked code to reduce the last or only block
   159  		// check that i == kk.
   160  		impl.Dsytd2(uplo, kk, a, lda, d, e, tau)
   161  	} else {
   162  		var i int
   163  		// Reduce the lower triangle of A.
   164  		for i = 0; i < n-nx; i += nb {
   165  			// Reduce columns 0:i+nb to tridiagonal form and form the matrix W
   166  			// which is needed to update the unreduced part of the matrix.
   167  			impl.Dlatrd(uplo, n-i, nb, a[i*lda+i:], lda, e[i:], tau[i:], work, ldwork)
   168  
   169  			// Update the unreduced submatrix A[i+ib:n, i+ib:n], using an update
   170  			// of the form A = A + V*Wᵀ - W*Vᵀ.
   171  			bi.Dsyr2k(uplo, blas.NoTrans, n-i-nb, nb, -1, a[(i+nb)*lda+i:], lda,
   172  				work[nb*ldwork:], ldwork, 1, a[(i+nb)*lda+i+nb:], lda)
   173  
   174  			// Copy subdiagonal elements back into A, and diagonal elements into D.
   175  			for j := i; j < i+nb; j++ {
   176  				a[(j+1)*lda+j] = e[j]
   177  				d[j] = a[j*lda+j]
   178  			}
   179  		}
   180  		// Use unblocked code to reduce the last or only block.
   181  		impl.Dsytd2(uplo, n-i, a[i*lda+i:], lda, d[i:], e[i:], tau[i:])
   182  	}
   183  	work[0] = float64(iws)
   184  }