github.com/jingcheng-WU/gonum@v0.9.1-0.20210323123734-f1a2a11a8f7b/lapack/gonum/dsytrd.go (about)

     1  // Copyright ©2016 The Gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package gonum
     6  
     7  import (
     8  	"github.com/jingcheng-WU/gonum/blas"
     9  	"github.com/jingcheng-WU/gonum/blas/blas64"
    10  )
    11  
    12  // Dsytrd reduces a symmetric n×n matrix A to symmetric tridiagonal form by an
    13  // orthogonal similarity transformation
    14  //  Qᵀ * A * Q = T
    15  // where Q is an orthonormal matrix and T is symmetric and tridiagonal.
    16  //
    17  // On entry, a contains the elements of the input matrix in the triangle specified
    18  // by uplo. On exit, the diagonal and sub/super-diagonal are overwritten by the
    19  // corresponding elements of the tridiagonal matrix T. The remaining elements in
    20  // the triangle, along with the array tau, contain the data to construct Q as
    21  // the product of elementary reflectors.
    22  //
    23  // If uplo == blas.Upper, Q is constructed with
    24  //  Q = H_{n-2} * ... * H_1 * H_0
    25  // where
    26  //  H_i = I - tau_i * v * vᵀ
    27  // v is constructed as v[i+1:n] = 0, v[i] = 1, v[0:i-1] is stored in A[0:i-1, i+1].
    28  // The elements of A are
    29  //  [ d   e  v1  v2  v3]
    30  //  [     d   e  v2  v3]
    31  //  [         d   e  v3]
    32  //  [             d   e]
    33  //  [                 e]
    34  //
    35  // If uplo == blas.Lower, Q is constructed with
    36  //  Q = H_0 * H_1 * ... * H_{n-2}
    37  // where
    38  //  H_i = I - tau_i * v * vᵀ
    39  // v is constructed as v[0:i+1] = 0, v[i+1] = 1, v[i+2:n] is stored in A[i+2:n, i].
    40  // The elements of A are
    41  //  [ d                ]
    42  //  [ e   d            ]
    43  //  [v0   e   d        ]
    44  //  [v0  v1   e   d    ]
    45  //  [v0  v1  v2   e   d]
    46  //
    47  // d must have length n, and e and tau must have length n-1. Dsytrd will panic if
    48  // these conditions are not met.
    49  //
    50  // work is temporary storage, and lwork specifies the usable memory length. At minimum,
    51  // lwork >= 1, and Dsytrd will panic otherwise. The amount of blocking is
    52  // limited by the usable length.
    53  // If lwork == -1, instead of computing Dsytrd the optimal work length is stored
    54  // into work[0].
    55  //
    56  // Dsytrd is an internal routine. It is exported for testing purposes.
    57  func (impl Implementation) Dsytrd(uplo blas.Uplo, n int, a []float64, lda int, d, e, tau, work []float64, lwork int) {
    58  	switch {
    59  	case uplo != blas.Upper && uplo != blas.Lower:
    60  		panic(badUplo)
    61  	case n < 0:
    62  		panic(nLT0)
    63  	case lda < max(1, n):
    64  		panic(badLdA)
    65  	case lwork < 1 && lwork != -1:
    66  		panic(badLWork)
    67  	case len(work) < max(1, lwork):
    68  		panic(shortWork)
    69  	}
    70  
    71  	// Quick return if possible.
    72  	if n == 0 {
    73  		work[0] = 1
    74  		return
    75  	}
    76  
    77  	nb := impl.Ilaenv(1, "DSYTRD", string(uplo), n, -1, -1, -1)
    78  	lworkopt := n * nb
    79  	if lwork == -1 {
    80  		work[0] = float64(lworkopt)
    81  		return
    82  	}
    83  
    84  	switch {
    85  	case len(a) < (n-1)*lda+n:
    86  		panic(shortA)
    87  	case len(d) < n:
    88  		panic(shortD)
    89  	case len(e) < n-1:
    90  		panic(shortE)
    91  	case len(tau) < n-1:
    92  		panic(shortTau)
    93  	}
    94  
    95  	bi := blas64.Implementation()
    96  
    97  	nx := n
    98  	iws := 1
    99  	var ldwork int
   100  	if 1 < nb && nb < n {
   101  		// Determine when to cross over from blocked to unblocked code. The last
   102  		// block is always handled by unblocked code.
   103  		nx = max(nb, impl.Ilaenv(3, "DSYTRD", string(uplo), n, -1, -1, -1))
   104  		if nx < n {
   105  			// Determine if workspace is large enough for blocked code.
   106  			ldwork = nb
   107  			iws = n * ldwork
   108  			if lwork < iws {
   109  				// Not enough workspace to use optimal nb: determine the minimum
   110  				// value of nb and reduce nb or force use of unblocked code by
   111  				// setting nx = n.
   112  				nb = max(lwork/n, 1)
   113  				nbmin := impl.Ilaenv(2, "DSYTRD", string(uplo), n, -1, -1, -1)
   114  				if nb < nbmin {
   115  					nx = n
   116  				}
   117  			}
   118  		} else {
   119  			nx = n
   120  		}
   121  	} else {
   122  		nb = 1
   123  	}
   124  	ldwork = nb
   125  
   126  	if uplo == blas.Upper {
   127  		// Reduce the upper triangle of A. Columns 0:kk are handled by the
   128  		// unblocked method.
   129  		var i int
   130  		kk := n - ((n-nx+nb-1)/nb)*nb
   131  		for i = n - nb; i >= kk; i -= nb {
   132  			// Reduce columns i:i+nb to tridiagonal form and form the matrix W
   133  			// which is needed to update the unreduced part of the matrix.
   134  			impl.Dlatrd(uplo, i+nb, nb, a, lda, e, tau, work, ldwork)
   135  
   136  			// Update the unreduced submatrix A[0:i-1,0:i-1], using an update
   137  			// of the form A = A - V*Wᵀ - W*Vᵀ.
   138  			bi.Dsyr2k(uplo, blas.NoTrans, i, nb, -1, a[i:], lda, work, ldwork, 1, a, lda)
   139  
   140  			// Copy superdiagonal elements back into A, and diagonal elements into D.
   141  			for j := i; j < i+nb; j++ {
   142  				a[(j-1)*lda+j] = e[j-1]
   143  				d[j] = a[j*lda+j]
   144  			}
   145  		}
   146  		// Use unblocked code to reduce the last or only block
   147  		// check that i == kk.
   148  		impl.Dsytd2(uplo, kk, a, lda, d, e, tau)
   149  	} else {
   150  		var i int
   151  		// Reduce the lower triangle of A.
   152  		for i = 0; i < n-nx; i += nb {
   153  			// Reduce columns 0:i+nb to tridiagonal form and form the matrix W
   154  			// which is needed to update the unreduced part of the matrix.
   155  			impl.Dlatrd(uplo, n-i, nb, a[i*lda+i:], lda, e[i:], tau[i:], work, ldwork)
   156  
   157  			// Update the unreduced submatrix A[i+ib:n, i+ib:n], using an update
   158  			// of the form A = A + V*Wᵀ - W*Vᵀ.
   159  			bi.Dsyr2k(uplo, blas.NoTrans, n-i-nb, nb, -1, a[(i+nb)*lda+i:], lda,
   160  				work[nb*ldwork:], ldwork, 1, a[(i+nb)*lda+i+nb:], lda)
   161  
   162  			// Copy subdiagonal elements back into A, and diagonal elements into D.
   163  			for j := i; j < i+nb; j++ {
   164  				a[(j+1)*lda+j] = e[j]
   165  				d[j] = a[j*lda+j]
   166  			}
   167  		}
   168  		// Use unblocked code to reduce the last or only block.
   169  		impl.Dsytd2(uplo, n-i, a[i*lda+i:], lda, d[i:], e[i:], tau[i:])
   170  	}
   171  	work[0] = float64(iws)
   172  }