github.com/gonum/lapack@v0.0.0-20181123203213-e4cdc5a0bff9/native/dlaexc.go (about)

     1  // Copyright ©2016 The gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package native
     6  
     7  import (
     8  	"math"
     9  
    10  	"github.com/gonum/blas"
    11  	"github.com/gonum/blas/blas64"
    12  	"github.com/gonum/lapack"
    13  )
    14  
    15  // Dlaexc swaps two adjacent diagonal blocks of order 1 or 2 in an n×n upper
    16  // quasi-triangular matrix T by an orthogonal similarity transformation.
    17  //
    18  // T must be in Schur canonical form, that is, block upper triangular with 1×1
    19  // and 2×2 diagonal blocks; each 2×2 diagonal block has its diagonal elements
    20  // equal and its off-diagonal elements of opposite sign. On return, T will
    21  // contain the updated matrix again in Schur canonical form.
    22  //
    23  // If wantq is true, the transformation is accumulated in the n×n matrix Q,
    24  // otherwise Q is not referenced.
    25  //
    26  // j1 is the index of the first row of the first block. n1 and n2 are the order
    27  // of the first and second block, respectively.
    28  //
    29  // work must have length at least n, otherwise Dlaexc will panic.
    30  //
    31  // If ok is false, the transformed matrix T would be too far from Schur form.
    32  // The blocks are not swapped, and T and Q are not modified.
    33  //
    34  // If n1 and n2 are both equal to 1, Dlaexc will always return true.
    35  //
    36  // Dlaexc is an internal routine. It is exported for testing purposes.
    37  func (impl Implementation) Dlaexc(wantq bool, n int, t []float64, ldt int, q []float64, ldq int, j1, n1, n2 int, work []float64) (ok bool) {
    38  	checkMatrix(n, n, t, ldt)
    39  	if wantq {
    40  		checkMatrix(n, n, q, ldq)
    41  	}
    42  	if j1 < 0 || n <= j1 {
    43  		panic("lapack: index j1 out of range")
    44  	}
    45  	if len(work) < n {
    46  		panic(badWork)
    47  	}
    48  	if n1 < 0 || 2 < n1 {
    49  		panic("lapack: invalid value of n1")
    50  	}
    51  	if n2 < 0 || 2 < n2 {
    52  		panic("lapack: invalid value of n2")
    53  	}
    54  
    55  	if n == 0 || n1 == 0 || n2 == 0 {
    56  		return true
    57  	}
    58  	if j1+n1 >= n {
    59  		// TODO(vladimir-ch): Reference LAPACK does this check whether
    60  		// the start of the second block is in the matrix T. It returns
    61  		// true if it is not and moreover it does not check whether the
    62  		// whole second block fits into T. This does not feel
    63  		// satisfactory. The only caller of Dlaexc is Dtrexc, so if the
    64  		// caller makes sure that this does not happen, we could be
    65  		// stricter here.
    66  		return true
    67  	}
    68  
    69  	j2 := j1 + 1
    70  	j3 := j1 + 2
    71  
    72  	bi := blas64.Implementation()
    73  
    74  	if n1 == 1 && n2 == 1 {
    75  		// Swap two 1×1 blocks.
    76  		t11 := t[j1*ldt+j1]
    77  		t22 := t[j2*ldt+j2]
    78  
    79  		// Determine the transformation to perform the interchange.
    80  		cs, sn, _ := impl.Dlartg(t[j1*ldt+j2], t22-t11)
    81  
    82  		// Apply transformation to the matrix T.
    83  		if n-j3 > 0 {
    84  			bi.Drot(n-j3, t[j1*ldt+j3:], 1, t[j2*ldt+j3:], 1, cs, sn)
    85  		}
    86  		if j1 > 0 {
    87  			bi.Drot(j1, t[j1:], ldt, t[j2:], ldt, cs, sn)
    88  		}
    89  
    90  		t[j1*ldt+j1] = t22
    91  		t[j2*ldt+j2] = t11
    92  
    93  		if wantq {
    94  			// Accumulate transformation in the matrix Q.
    95  			bi.Drot(n, q[j1:], ldq, q[j2:], ldq, cs, sn)
    96  		}
    97  
    98  		return true
    99  	}
   100  
   101  	// Swapping involves at least one 2×2 block.
   102  	//
   103  	// Copy the diagonal block of order n1+n2 to the local array d and
   104  	// compute its norm.
   105  	nd := n1 + n2
   106  	var d [16]float64
   107  	const ldd = 4
   108  	impl.Dlacpy(blas.All, nd, nd, t[j1*ldt+j1:], ldt, d[:], ldd)
   109  	dnorm := impl.Dlange(lapack.MaxAbs, nd, nd, d[:], ldd, work)
   110  
   111  	// Compute machine-dependent threshold for test for accepting swap.
   112  	eps := dlamchP
   113  	thresh := math.Max(10*eps*dnorm, dlamchS/eps)
   114  
   115  	// Solve T11*X - X*T22 = scale*T12 for X.
   116  	var x [4]float64
   117  	const ldx = 2
   118  	scale, _, _ := impl.Dlasy2(false, false, -1, n1, n2, d[:], ldd, d[n1*ldd+n1:], ldd, d[n1:], ldd, x[:], ldx)
   119  
   120  	// Swap the adjacent diagonal blocks.
   121  	switch {
   122  	case n1 == 1 && n2 == 2:
   123  		// Generate elementary reflector H so that
   124  		//  ( scale, X11, X12 ) H = ( 0, 0, * )
   125  		u := [3]float64{scale, x[0], 1}
   126  		_, tau := impl.Dlarfg(3, x[1], u[:2], 1)
   127  		t11 := t[j1*ldt+j1]
   128  
   129  		// Perform swap provisionally on diagonal block in d.
   130  		impl.Dlarfx(blas.Left, 3, 3, u[:], tau, d[:], ldd, work)
   131  		impl.Dlarfx(blas.Right, 3, 3, u[:], tau, d[:], ldd, work)
   132  
   133  		// Test whether to reject swap.
   134  		if math.Max(math.Abs(d[2*ldd]), math.Max(math.Abs(d[2*ldd+1]), math.Abs(d[2*ldd+2]-t11))) > thresh {
   135  			return false
   136  		}
   137  
   138  		// Accept swap: apply transformation to the entire matrix T.
   139  		impl.Dlarfx(blas.Left, 3, n-j1, u[:], tau, t[j1*ldt+j1:], ldt, work)
   140  		impl.Dlarfx(blas.Right, j2+1, 3, u[:], tau, t[j1:], ldt, work)
   141  
   142  		t[j3*ldt+j1] = 0
   143  		t[j3*ldt+j2] = 0
   144  		t[j3*ldt+j3] = t11
   145  
   146  		if wantq {
   147  			// Accumulate transformation in the matrix Q.
   148  			impl.Dlarfx(blas.Right, n, 3, u[:], tau, q[j1:], ldq, work)
   149  		}
   150  
   151  	case n1 == 2 && n2 == 1:
   152  		//  Generate elementary reflector H so that:
   153  		//   H (  -X11 ) = ( * )
   154  		//     (  -X21 ) = ( 0 )
   155  		//     ( scale ) = ( 0 )
   156  		u := [3]float64{1, -x[ldx], scale}
   157  		_, tau := impl.Dlarfg(3, -x[0], u[1:], 1)
   158  		t33 := t[j3*ldt+j3]
   159  
   160  		// Perform swap provisionally on diagonal block in D.
   161  		impl.Dlarfx(blas.Left, 3, 3, u[:], tau, d[:], ldd, work)
   162  		impl.Dlarfx(blas.Right, 3, 3, u[:], tau, d[:], ldd, work)
   163  
   164  		// Test whether to reject swap.
   165  		if math.Max(math.Abs(d[ldd]), math.Max(math.Abs(d[2*ldd]), math.Abs(d[0]-t33))) > thresh {
   166  			return false
   167  		}
   168  
   169  		// Accept swap: apply transformation to the entire matrix T.
   170  		impl.Dlarfx(blas.Right, j3+1, 3, u[:], tau, t[j1:], ldt, work)
   171  		impl.Dlarfx(blas.Left, 3, n-j1-1, u[:], tau, t[j1*ldt+j2:], ldt, work)
   172  
   173  		t[j1*ldt+j1] = t33
   174  		t[j2*ldt+j1] = 0
   175  		t[j3*ldt+j1] = 0
   176  
   177  		if wantq {
   178  			// Accumulate transformation in the matrix Q.
   179  			impl.Dlarfx(blas.Right, n, 3, u[:], tau, q[j1:], ldq, work)
   180  		}
   181  
   182  	default: // n1 == 2 && n2 == 2
   183  		// Generate elementary reflectors H_1 and H_2 so that:
   184  		//  H_2 H_1 (  -X11  -X12 ) = (  *  * )
   185  		//          (  -X21  -X22 )   (  0  * )
   186  		//          ( scale    0  )   (  0  0 )
   187  		//          (    0  scale )   (  0  0 )
   188  		u1 := [3]float64{1, -x[ldx], scale}
   189  		_, tau1 := impl.Dlarfg(3, -x[0], u1[1:], 1)
   190  
   191  		temp := -tau1 * (x[1] + u1[1]*x[ldx+1])
   192  		u2 := [3]float64{1, -temp * u1[2], scale}
   193  		_, tau2 := impl.Dlarfg(3, -temp*u1[1]-x[ldx+1], u2[1:], 1)
   194  
   195  		// Perform swap provisionally on diagonal block in D.
   196  		impl.Dlarfx(blas.Left, 3, 4, u1[:], tau1, d[:], ldd, work)
   197  		impl.Dlarfx(blas.Right, 4, 3, u1[:], tau1, d[:], ldd, work)
   198  		impl.Dlarfx(blas.Left, 3, 4, u2[:], tau2, d[ldd:], ldd, work)
   199  		impl.Dlarfx(blas.Right, 4, 3, u2[:], tau2, d[1:], ldd, work)
   200  
   201  		// Test whether to reject swap.
   202  		m1 := math.Max(math.Abs(d[2*ldd]), math.Abs(d[2*ldd+1]))
   203  		m2 := math.Max(math.Abs(d[3*ldd]), math.Abs(d[3*ldd+1]))
   204  		if math.Max(m1, m2) > thresh {
   205  			return false
   206  		}
   207  
   208  		// Accept swap: apply transformation to the entire matrix T.
   209  		j4 := j1 + 3
   210  		impl.Dlarfx(blas.Left, 3, n-j1, u1[:], tau1, t[j1*ldt+j1:], ldt, work)
   211  		impl.Dlarfx(blas.Right, j4+1, 3, u1[:], tau1, t[j1:], ldt, work)
   212  		impl.Dlarfx(blas.Left, 3, n-j1, u2[:], tau2, t[j2*ldt+j1:], ldt, work)
   213  		impl.Dlarfx(blas.Right, j4+1, 3, u2[:], tau2, t[j2:], ldt, work)
   214  
   215  		t[j3*ldt+j1] = 0
   216  		t[j3*ldt+j2] = 0
   217  		t[j4*ldt+j1] = 0
   218  		t[j4*ldt+j2] = 0
   219  
   220  		if wantq {
   221  			// Accumulate transformation in the matrix Q.
   222  			impl.Dlarfx(blas.Right, n, 3, u1[:], tau1, q[j1:], ldq, work)
   223  			impl.Dlarfx(blas.Right, n, 3, u2[:], tau2, q[j2:], ldq, work)
   224  		}
   225  	}
   226  
   227  	if n2 == 2 {
   228  		// Standardize new 2×2 block T11.
   229  		a, b := t[j1*ldt+j1], t[j1*ldt+j2]
   230  		c, d := t[j2*ldt+j1], t[j2*ldt+j2]
   231  		var cs, sn float64
   232  		t[j1*ldt+j1], t[j1*ldt+j2], t[j2*ldt+j1], t[j2*ldt+j2], _, _, _, _, cs, sn = impl.Dlanv2(a, b, c, d)
   233  		if n-j1-2 > 0 {
   234  			bi.Drot(n-j1-2, t[j1*ldt+j1+2:], 1, t[j2*ldt+j1+2:], 1, cs, sn)
   235  		}
   236  		if j1 > 0 {
   237  			bi.Drot(j1, t[j1:], ldt, t[j2:], ldt, cs, sn)
   238  		}
   239  		if wantq {
   240  			bi.Drot(n, q[j1:], ldq, q[j2:], ldq, cs, sn)
   241  		}
   242  	}
   243  	if n1 == 2 {
   244  		// Standardize new 2×2 block T22.
   245  		j3 := j1 + n2
   246  		j4 := j3 + 1
   247  		a, b := t[j3*ldt+j3], t[j3*ldt+j4]
   248  		c, d := t[j4*ldt+j3], t[j4*ldt+j4]
   249  		var cs, sn float64
   250  		t[j3*ldt+j3], t[j3*ldt+j4], t[j4*ldt+j3], t[j4*ldt+j4], _, _, _, _, cs, sn = impl.Dlanv2(a, b, c, d)
   251  		if n-j3-2 > 0 {
   252  			bi.Drot(n-j3-2, t[j3*ldt+j3+2:], 1, t[j4*ldt+j3+2:], 1, cs, sn)
   253  		}
   254  		bi.Drot(j3, t[j3:], ldt, t[j4:], ldt, cs, sn)
   255  		if wantq {
   256  			bi.Drot(n, q[j3:], ldq, q[j4:], ldq, cs, sn)
   257  		}
   258  	}
   259  
   260  	return true
   261  }