github.com/gonum/lapack@v0.0.0-20181123203213-e4cdc5a0bff9/internal/testdata/netlib/dlaqr5.f (about) 1 *> \brief \b DLAQR5 performs a single small-bulge multi-shift QR sweep. 2 * 3 * =========== DOCUMENTATION =========== 4 * 5 * Online html documentation available at 6 * http://www.netlib.org/lapack/explore-html/ 7 * 8 *> \htmlonly 9 *> Download DLAQR5 + dependencies 10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaqr5.f"> 11 *> [TGZ]</a> 12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaqr5.f"> 13 *> [ZIP]</a> 14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaqr5.f"> 15 *> [TXT]</a> 16 *> \endhtmlonly 17 * 18 * Definition: 19 * =========== 20 * 21 * SUBROUTINE DLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, 22 * SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U, 23 * LDU, NV, WV, LDWV, NH, WH, LDWH ) 24 * 25 * .. Scalar Arguments .. 26 * INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV, 27 * $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV 28 * LOGICAL WANTT, WANTZ 29 * .. 30 * .. Array Arguments .. 31 * DOUBLE PRECISION H( LDH, * ), SI( * ), SR( * ), U( LDU, * ), 32 * $ V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ), 33 * $ Z( LDZ, * ) 34 * .. 35 * 36 * 37 *> \par Purpose: 38 * ============= 39 *> 40 *> \verbatim 41 *> 42 *> DLAQR5, called by DLAQR0, performs a 43 *> single small-bulge multi-shift QR sweep. 44 *> \endverbatim 45 * 46 * Arguments: 47 * ========== 48 * 49 *> \param[in] WANTT 50 *> \verbatim 51 *> WANTT is logical scalar 52 *> WANTT = .true. if the quasi-triangular Schur factor 53 *> is being computed. WANTT is set to .false. otherwise. 54 *> \endverbatim 55 *> 56 *> \param[in] WANTZ 57 *> \verbatim 58 *> WANTZ is logical scalar 59 *> WANTZ = .true. if the orthogonal Schur factor is being 60 *> computed. WANTZ is set to .false. otherwise. 61 *> \endverbatim 62 *> 63 *> \param[in] KACC22 64 *> \verbatim 65 *> KACC22 is integer with value 0, 1, or 2. 66 *> Specifies the computation mode of far-from-diagonal 67 *> orthogonal updates. 68 *> = 0: DLAQR5 does not accumulate reflections and does not 69 *> use matrix-matrix multiply to update far-from-diagonal 70 *> matrix entries. 71 *> = 1: DLAQR5 accumulates reflections and uses matrix-matrix 72 *> multiply to update the far-from-diagonal matrix entries. 73 *> = 2: DLAQR5 accumulates reflections, uses matrix-matrix 74 *> multiply to update the far-from-diagonal matrix entries, 75 *> and takes advantage of 2-by-2 block structure during 76 *> matrix multiplies. 77 *> \endverbatim 78 *> 79 *> \param[in] N 80 *> \verbatim 81 *> N is integer scalar 82 *> N is the order of the Hessenberg matrix H upon which this 83 *> subroutine operates. 84 *> \endverbatim 85 *> 86 *> \param[in] KTOP 87 *> \verbatim 88 *> KTOP is integer scalar 89 *> \endverbatim 90 *> 91 *> \param[in] KBOT 92 *> \verbatim 93 *> KBOT is integer scalar 94 *> These are the first and last rows and columns of an 95 *> isolated diagonal block upon which the QR sweep is to be 96 *> applied. It is assumed without a check that 97 *> either KTOP = 1 or H(KTOP,KTOP-1) = 0 98 *> and 99 *> either KBOT = N or H(KBOT+1,KBOT) = 0. 100 *> \endverbatim 101 *> 102 *> \param[in] NSHFTS 103 *> \verbatim 104 *> NSHFTS is integer scalar 105 *> NSHFTS gives the number of simultaneous shifts. NSHFTS 106 *> must be positive and even. 107 *> \endverbatim 108 *> 109 *> \param[in,out] SR 110 *> \verbatim 111 *> SR is DOUBLE PRECISION array of size (NSHFTS) 112 *> \endverbatim 113 *> 114 *> \param[in,out] SI 115 *> \verbatim 116 *> SI is DOUBLE PRECISION array of size (NSHFTS) 117 *> SR contains the real parts and SI contains the imaginary 118 *> parts of the NSHFTS shifts of origin that define the 119 *> multi-shift QR sweep. On output SR and SI may be 120 *> reordered. 121 *> \endverbatim 122 *> 123 *> \param[in,out] H 124 *> \verbatim 125 *> H is DOUBLE PRECISION array of size (LDH,N) 126 *> On input H contains a Hessenberg matrix. On output a 127 *> multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied 128 *> to the isolated diagonal block in rows and columns KTOP 129 *> through KBOT. 130 *> \endverbatim 131 *> 132 *> \param[in] LDH 133 *> \verbatim 134 *> LDH is integer scalar 135 *> LDH is the leading dimension of H just as declared in the 136 *> calling procedure. LDH.GE.MAX(1,N). 137 *> \endverbatim 138 *> 139 *> \param[in] ILOZ 140 *> \verbatim 141 *> ILOZ is INTEGER 142 *> \endverbatim 143 *> 144 *> \param[in] IHIZ 145 *> \verbatim 146 *> IHIZ is INTEGER 147 *> Specify the rows of Z to which transformations must be 148 *> applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N 149 *> \endverbatim 150 *> 151 *> \param[in,out] Z 152 *> \verbatim 153 *> Z is DOUBLE PRECISION array of size (LDZ,IHI) 154 *> If WANTZ = .TRUE., then the QR Sweep orthogonal 155 *> similarity transformation is accumulated into 156 *> Z(ILOZ:IHIZ,ILO:IHI) from the right. 157 *> If WANTZ = .FALSE., then Z is unreferenced. 158 *> \endverbatim 159 *> 160 *> \param[in] LDZ 161 *> \verbatim 162 *> LDZ is integer scalar 163 *> LDA is the leading dimension of Z just as declared in 164 *> the calling procedure. LDZ.GE.N. 165 *> \endverbatim 166 *> 167 *> \param[out] V 168 *> \verbatim 169 *> V is DOUBLE PRECISION array of size (LDV,NSHFTS/2) 170 *> \endverbatim 171 *> 172 *> \param[in] LDV 173 *> \verbatim 174 *> LDV is integer scalar 175 *> LDV is the leading dimension of V as declared in the 176 *> calling procedure. LDV.GE.3. 177 *> \endverbatim 178 *> 179 *> \param[out] U 180 *> \verbatim 181 *> U is DOUBLE PRECISION array of size 182 *> (LDU,3*NSHFTS-3) 183 *> \endverbatim 184 *> 185 *> \param[in] LDU 186 *> \verbatim 187 *> LDU is integer scalar 188 *> LDU is the leading dimension of U just as declared in the 189 *> in the calling subroutine. LDU.GE.3*NSHFTS-3. 190 *> \endverbatim 191 *> 192 *> \param[in] NH 193 *> \verbatim 194 *> NH is integer scalar 195 *> NH is the number of columns in array WH available for 196 *> workspace. NH.GE.1. 197 *> \endverbatim 198 *> 199 *> \param[out] WH 200 *> \verbatim 201 *> WH is DOUBLE PRECISION array of size (LDWH,NH) 202 *> \endverbatim 203 *> 204 *> \param[in] LDWH 205 *> \verbatim 206 *> LDWH is integer scalar 207 *> Leading dimension of WH just as declared in the 208 *> calling procedure. LDWH.GE.3*NSHFTS-3. 209 *> \endverbatim 210 *> 211 *> \param[in] NV 212 *> \verbatim 213 *> NV is integer scalar 214 *> NV is the number of rows in WV agailable for workspace. 215 *> NV.GE.1. 216 *> \endverbatim 217 *> 218 *> \param[out] WV 219 *> \verbatim 220 *> WV is DOUBLE PRECISION array of size 221 *> (LDWV,3*NSHFTS-3) 222 *> \endverbatim 223 *> 224 *> \param[in] LDWV 225 *> \verbatim 226 *> LDWV is integer scalar 227 *> LDWV is the leading dimension of WV as declared in the 228 *> in the calling subroutine. LDWV.GE.NV. 229 *> \endverbatim 230 * 231 * Authors: 232 * ======== 233 * 234 *> \author Univ. of Tennessee 235 *> \author Univ. of California Berkeley 236 *> \author Univ. of Colorado Denver 237 *> \author NAG Ltd. 238 * 239 *> \date September 2012 240 * 241 *> \ingroup doubleOTHERauxiliary 242 * 243 *> \par Contributors: 244 * ================== 245 *> 246 *> Karen Braman and Ralph Byers, Department of Mathematics, 247 *> University of Kansas, USA 248 * 249 *> \par References: 250 * ================ 251 *> 252 *> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR 253 *> Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 254 *> Performance, SIAM Journal of Matrix Analysis, volume 23, pages 255 *> 929--947, 2002. 256 *> 257 * ===================================================================== 258 SUBROUTINE DLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, 259 $ SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U, 260 $ LDU, NV, WV, LDWV, NH, WH, LDWH ) 261 * 262 * -- LAPACK auxiliary routine (version 3.4.2) -- 263 * -- LAPACK is a software package provided by Univ. of Tennessee, -- 264 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- 265 * September 2012 266 * 267 * .. Scalar Arguments .. 268 INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV, 269 $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV 270 LOGICAL WANTT, WANTZ 271 * .. 272 * .. Array Arguments .. 273 DOUBLE PRECISION H( LDH, * ), SI( * ), SR( * ), U( LDU, * ), 274 $ V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ), 275 $ Z( LDZ, * ) 276 * .. 277 * 278 * ================================================================ 279 * .. Parameters .. 280 DOUBLE PRECISION ZERO, ONE 281 PARAMETER ( ZERO = 0.0d0, ONE = 1.0d0 ) 282 * .. 283 * .. Local Scalars .. 284 DOUBLE PRECISION ALPHA, BETA, H11, H12, H21, H22, REFSUM, 285 $ SAFMAX, SAFMIN, SCL, SMLNUM, SWAP, TST1, TST2, 286 $ ULP 287 INTEGER I, I2, I4, INCOL, J, J2, J4, JBOT, JCOL, JLEN, 288 $ JROW, JTOP, K, K1, KDU, KMS, KNZ, KRCOL, KZS, 289 $ M, M22, MBOT, MEND, MSTART, MTOP, NBMPS, NDCOL, 290 $ NS, NU 291 LOGICAL ACCUM, BLK22, BMP22 292 * .. 293 * .. External Functions .. 294 DOUBLE PRECISION DLAMCH 295 EXTERNAL DLAMCH 296 * .. 297 * .. Intrinsic Functions .. 298 * 299 INTRINSIC ABS, DBLE, MAX, MIN, MOD 300 * .. 301 * .. Local Arrays .. 302 DOUBLE PRECISION VT( 3 ) 303 * .. 304 * .. External Subroutines .. 305 EXTERNAL DGEMM, DLABAD, DLACPY, DLAQR1, DLARFG, DLASET, 306 $ DTRMM 307 * .. 308 * .. Executable Statements .. 309 * 310 * ==== If there are no shifts, then there is nothing to do. ==== 311 * 312 IF( NSHFTS.LT.2 ) 313 $ RETURN 314 * 315 * ==== If the active block is empty or 1-by-1, then there 316 * . is nothing to do. ==== 317 * 318 IF( KTOP.GE.KBOT ) 319 $ RETURN 320 * 321 * ==== Shuffle shifts into pairs of real shifts and pairs 322 * . of complex conjugate shifts assuming complex 323 * . conjugate shifts are already adjacent to one 324 * . another. ==== 325 * 326 DO 10 I = 1, NSHFTS - 2, 2 327 IF( SI( I ).NE.-SI( I+1 ) ) THEN 328 * 329 SWAP = SR( I ) 330 SR( I ) = SR( I+1 ) 331 SR( I+1 ) = SR( I+2 ) 332 SR( I+2 ) = SWAP 333 * 334 SWAP = SI( I ) 335 SI( I ) = SI( I+1 ) 336 SI( I+1 ) = SI( I+2 ) 337 SI( I+2 ) = SWAP 338 END IF 339 10 CONTINUE 340 * 341 * ==== NSHFTS is supposed to be even, but if it is odd, 342 * . then simply reduce it by one. The shuffle above 343 * . ensures that the dropped shift is real and that 344 * . the remaining shifts are paired. ==== 345 * 346 NS = NSHFTS - MOD( NSHFTS, 2 ) 347 * 348 * ==== Machine constants for deflation ==== 349 * 350 SAFMIN = DLAMCH( 'SAFE MINIMUM' ) 351 SAFMAX = ONE / SAFMIN 352 CALL DLABAD( SAFMIN, SAFMAX ) 353 ULP = DLAMCH( 'PRECISION' ) 354 SMLNUM = SAFMIN*( DBLE( N ) / ULP ) 355 * 356 * ==== Use accumulated reflections to update far-from-diagonal 357 * . entries ? ==== 358 * 359 ACCUM = ( KACC22.EQ.1 ) .OR. ( KACC22.EQ.2 ) 360 * 361 * ==== If so, exploit the 2-by-2 block structure? ==== 362 * 363 BLK22 = ( NS.GT.2 ) .AND. ( KACC22.EQ.2 ) 364 * 365 * ==== clear trash ==== 366 * 367 IF( KTOP+2.LE.KBOT ) 368 $ H( KTOP+2, KTOP ) = ZERO 369 * 370 * ==== NBMPS = number of 2-shift bulges in the chain ==== 371 * 372 NBMPS = NS / 2 373 * 374 * ==== KDU = width of slab ==== 375 * 376 KDU = 6*NBMPS - 3 377 * 378 * ==== Create and chase chains of NBMPS bulges ==== 379 * 380 DO 220 INCOL = 3*( 1-NBMPS ) + KTOP - 1, KBOT - 2, 3*NBMPS - 2 381 NDCOL = INCOL + KDU 382 IF( ACCUM ) 383 $ CALL DLASET( 'ALL', KDU, KDU, ZERO, ONE, U, LDU ) 384 * 385 * ==== Near-the-diagonal bulge chase. The following loop 386 * . performs the near-the-diagonal part of a small bulge 387 * . multi-shift QR sweep. Each 6*NBMPS-2 column diagonal 388 * . chunk extends from column INCOL to column NDCOL 389 * . (including both column INCOL and column NDCOL). The 390 * . following loop chases a 3*NBMPS column long chain of 391 * . NBMPS bulges 3*NBMPS-2 columns to the right. (INCOL 392 * . may be less than KTOP and and NDCOL may be greater than 393 * . KBOT indicating phantom columns from which to chase 394 * . bulges before they are actually introduced or to which 395 * . to chase bulges beyond column KBOT.) ==== 396 * 397 DO 150 KRCOL = INCOL, MIN( INCOL+3*NBMPS-3, KBOT-2 ) 398 * 399 * ==== Bulges number MTOP to MBOT are active double implicit 400 * . shift bulges. There may or may not also be small 401 * . 2-by-2 bulge, if there is room. The inactive bulges 402 * . (if any) must wait until the active bulges have moved 403 * . down the diagonal to make room. The phantom matrix 404 * . paradigm described above helps keep track. ==== 405 * 406 MTOP = MAX( 1, ( ( KTOP-1 )-KRCOL+2 ) / 3+1 ) 407 MBOT = MIN( NBMPS, ( KBOT-KRCOL ) / 3 ) 408 M22 = MBOT + 1 409 BMP22 = ( MBOT.LT.NBMPS ) .AND. ( KRCOL+3*( M22-1 ) ).EQ. 410 $ ( KBOT-2 ) 411 * 412 * ==== Generate reflections to chase the chain right 413 * . one column. (The minimum value of K is KTOP-1.) ==== 414 * 415 DO 20 M = MTOP, MBOT 416 K = KRCOL + 3*( M-1 ) 417 IF( K.EQ.KTOP-1 ) THEN 418 CALL DLAQR1( 3, H( KTOP, KTOP ), LDH, SR( 2*M-1 ), 419 $ SI( 2*M-1 ), SR( 2*M ), SI( 2*M ), 420 $ V( 1, M ) ) 421 ALPHA = V( 1, M ) 422 CALL DLARFG( 3, ALPHA, V( 2, M ), 1, V( 1, M ) ) 423 ELSE 424 BETA = H( K+1, K ) 425 V( 2, M ) = H( K+2, K ) 426 V( 3, M ) = H( K+3, K ) 427 CALL DLARFG( 3, BETA, V( 2, M ), 1, V( 1, M ) ) 428 * 429 * ==== A Bulge may collapse because of vigilant 430 * . deflation or destructive underflow. In the 431 * . underflow case, try the two-small-subdiagonals 432 * . trick to try to reinflate the bulge. ==== 433 * 434 IF( H( K+3, K ).NE.ZERO .OR. H( K+3, K+1 ).NE. 435 $ ZERO .OR. H( K+3, K+2 ).EQ.ZERO ) THEN 436 * 437 * ==== Typical case: not collapsed (yet). ==== 438 * 439 H( K+1, K ) = BETA 440 H( K+2, K ) = ZERO 441 H( K+3, K ) = ZERO 442 ELSE 443 * 444 * ==== Atypical case: collapsed. Attempt to 445 * . reintroduce ignoring H(K+1,K) and H(K+2,K). 446 * . If the fill resulting from the new 447 * . reflector is too large, then abandon it. 448 * . Otherwise, use the new one. ==== 449 * 450 CALL DLAQR1( 3, H( K+1, K+1 ), LDH, SR( 2*M-1 ), 451 $ SI( 2*M-1 ), SR( 2*M ), SI( 2*M ), 452 $ VT ) 453 ALPHA = VT( 1 ) 454 CALL DLARFG( 3, ALPHA, VT( 2 ), 1, VT( 1 ) ) 455 REFSUM = VT( 1 )*( H( K+1, K )+VT( 2 )* 456 $ H( K+2, K ) ) 457 * 458 IF( ABS( H( K+2, K )-REFSUM*VT( 2 ) )+ 459 $ ABS( REFSUM*VT( 3 ) ).GT.ULP* 460 $ ( ABS( H( K, K ) )+ABS( H( K+1, 461 $ K+1 ) )+ABS( H( K+2, K+2 ) ) ) ) THEN 462 * 463 * ==== Starting a new bulge here would 464 * . create non-negligible fill. Use 465 * . the old one with trepidation. ==== 466 * 467 H( K+1, K ) = BETA 468 H( K+2, K ) = ZERO 469 H( K+3, K ) = ZERO 470 ELSE 471 * 472 * ==== Stating a new bulge here would 473 * . create only negligible fill. 474 * . Replace the old reflector with 475 * . the new one. ==== 476 * 477 H( K+1, K ) = H( K+1, K ) - REFSUM 478 H( K+2, K ) = ZERO 479 H( K+3, K ) = ZERO 480 V( 1, M ) = VT( 1 ) 481 V( 2, M ) = VT( 2 ) 482 V( 3, M ) = VT( 3 ) 483 END IF 484 END IF 485 END IF 486 20 CONTINUE 487 * 488 * ==== Generate a 2-by-2 reflection, if needed. ==== 489 * 490 K = KRCOL + 3*( M22-1 ) 491 IF( BMP22 ) THEN 492 IF( K.EQ.KTOP-1 ) THEN 493 CALL DLAQR1( 2, H( K+1, K+1 ), LDH, SR( 2*M22-1 ), 494 $ SI( 2*M22-1 ), SR( 2*M22 ), SI( 2*M22 ), 495 $ V( 1, M22 ) ) 496 BETA = V( 1, M22 ) 497 CALL DLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) ) 498 ELSE 499 BETA = H( K+1, K ) 500 V( 2, M22 ) = H( K+2, K ) 501 CALL DLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) ) 502 H( K+1, K ) = BETA 503 H( K+2, K ) = ZERO 504 END IF 505 END IF 506 * 507 * ==== Multiply H by reflections from the left ==== 508 * 509 IF( ACCUM ) THEN 510 JBOT = MIN( NDCOL, KBOT ) 511 ELSE IF( WANTT ) THEN 512 JBOT = N 513 ELSE 514 JBOT = KBOT 515 END IF 516 DO 40 J = MAX( KTOP, KRCOL ), JBOT 517 MEND = MIN( MBOT, ( J-KRCOL+2 ) / 3 ) 518 DO 30 M = MTOP, MEND 519 K = KRCOL + 3*( M-1 ) 520 REFSUM = V( 1, M )*( H( K+1, J )+V( 2, M )* 521 $ H( K+2, J )+V( 3, M )*H( K+3, J ) ) 522 H( K+1, J ) = H( K+1, J ) - REFSUM 523 H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M ) 524 H( K+3, J ) = H( K+3, J ) - REFSUM*V( 3, M ) 525 30 CONTINUE 526 40 CONTINUE 527 IF( BMP22 ) THEN 528 K = KRCOL + 3*( M22-1 ) 529 DO 50 J = MAX( K+1, KTOP ), JBOT 530 REFSUM = V( 1, M22 )*( H( K+1, J )+V( 2, M22 )* 531 $ H( K+2, J ) ) 532 H( K+1, J ) = H( K+1, J ) - REFSUM 533 H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M22 ) 534 50 CONTINUE 535 END IF 536 * 537 * ==== Multiply H by reflections from the right. 538 * . Delay filling in the last row until the 539 * . vigilant deflation check is complete. ==== 540 * 541 IF( ACCUM ) THEN 542 JTOP = MAX( KTOP, INCOL ) 543 ELSE IF( WANTT ) THEN 544 JTOP = 1 545 ELSE 546 JTOP = KTOP 547 END IF 548 DO 90 M = MTOP, MBOT 549 IF( V( 1, M ).NE.ZERO ) THEN 550 K = KRCOL + 3*( M-1 ) 551 DO 60 J = JTOP, MIN( KBOT, K+3 ) 552 REFSUM = V( 1, M )*( H( J, K+1 )+V( 2, M )* 553 $ H( J, K+2 )+V( 3, M )*H( J, K+3 ) ) 554 H( J, K+1 ) = H( J, K+1 ) - REFSUM 555 H( J, K+2 ) = H( J, K+2 ) - REFSUM*V( 2, M ) 556 H( J, K+3 ) = H( J, K+3 ) - REFSUM*V( 3, M ) 557 60 CONTINUE 558 * 559 IF( ACCUM ) THEN 560 * 561 * ==== Accumulate U. (If necessary, update Z later 562 * . with with an efficient matrix-matrix 563 * . multiply.) ==== 564 * 565 KMS = K - INCOL 566 DO 70 J = MAX( 1, KTOP-INCOL ), KDU 567 REFSUM = V( 1, M )*( U( J, KMS+1 )+V( 2, M )* 568 $ U( J, KMS+2 )+V( 3, M )*U( J, KMS+3 ) ) 569 U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM 570 U( J, KMS+2 ) = U( J, KMS+2 ) - REFSUM*V( 2, M ) 571 U( J, KMS+3 ) = U( J, KMS+3 ) - REFSUM*V( 3, M ) 572 70 CONTINUE 573 ELSE IF( WANTZ ) THEN 574 * 575 * ==== U is not accumulated, so update Z 576 * . now by multiplying by reflections 577 * . from the right. ==== 578 * 579 DO 80 J = ILOZ, IHIZ 580 REFSUM = V( 1, M )*( Z( J, K+1 )+V( 2, M )* 581 $ Z( J, K+2 )+V( 3, M )*Z( J, K+3 ) ) 582 Z( J, K+1 ) = Z( J, K+1 ) - REFSUM 583 Z( J, K+2 ) = Z( J, K+2 ) - REFSUM*V( 2, M ) 584 Z( J, K+3 ) = Z( J, K+3 ) - REFSUM*V( 3, M ) 585 80 CONTINUE 586 END IF 587 END IF 588 90 CONTINUE 589 * 590 * ==== Special case: 2-by-2 reflection (if needed) ==== 591 * 592 K = KRCOL + 3*( M22-1 ) 593 IF( BMP22 ) THEN 594 IF ( V( 1, M22 ).NE.ZERO ) THEN 595 DO 100 J = JTOP, MIN( KBOT, K+3 ) 596 REFSUM = V( 1, M22 )*( H( J, K+1 )+V( 2, M22 )* 597 $ H( J, K+2 ) ) 598 H( J, K+1 ) = H( J, K+1 ) - REFSUM 599 H( J, K+2 ) = H( J, K+2 ) - REFSUM*V( 2, M22 ) 600 100 CONTINUE 601 * 602 IF( ACCUM ) THEN 603 KMS = K - INCOL 604 DO 110 J = MAX( 1, KTOP-INCOL ), KDU 605 REFSUM = V( 1, M22 )*( U( J, KMS+1 )+ 606 $ V( 2, M22 )*U( J, KMS+2 ) ) 607 U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM 608 U( J, KMS+2 ) = U( J, KMS+2 ) - 609 $ REFSUM*V( 2, M22 ) 610 110 CONTINUE 611 ELSE IF( WANTZ ) THEN 612 DO 120 J = ILOZ, IHIZ 613 REFSUM = V( 1, M22 )*( Z( J, K+1 )+V( 2, M22 )* 614 $ Z( J, K+2 ) ) 615 Z( J, K+1 ) = Z( J, K+1 ) - REFSUM 616 Z( J, K+2 ) = Z( J, K+2 ) - REFSUM*V( 2, M22 ) 617 120 CONTINUE 618 END IF 619 END IF 620 END IF 621 * 622 * ==== Vigilant deflation check ==== 623 * 624 MSTART = MTOP 625 IF( KRCOL+3*( MSTART-1 ).LT.KTOP ) 626 $ MSTART = MSTART + 1 627 MEND = MBOT 628 IF( BMP22 ) 629 $ MEND = MEND + 1 630 IF( KRCOL.EQ.KBOT-2 ) 631 $ MEND = MEND + 1 632 DO 130 M = MSTART, MEND 633 K = MIN( KBOT-1, KRCOL+3*( M-1 ) ) 634 * 635 * ==== The following convergence test requires that 636 * . the tradition small-compared-to-nearby-diagonals 637 * . criterion and the Ahues & Tisseur (LAWN 122, 1997) 638 * . criteria both be satisfied. The latter improves 639 * . accuracy in some examples. Falling back on an 640 * . alternate convergence criterion when TST1 or TST2 641 * . is zero (as done here) is traditional but probably 642 * . unnecessary. ==== 643 * 644 IF( H( K+1, K ).NE.ZERO ) THEN 645 TST1 = ABS( H( K, K ) ) + ABS( H( K+1, K+1 ) ) 646 IF( TST1.EQ.ZERO ) THEN 647 IF( K.GE.KTOP+1 ) 648 $ TST1 = TST1 + ABS( H( K, K-1 ) ) 649 IF( K.GE.KTOP+2 ) 650 $ TST1 = TST1 + ABS( H( K, K-2 ) ) 651 IF( K.GE.KTOP+3 ) 652 $ TST1 = TST1 + ABS( H( K, K-3 ) ) 653 IF( K.LE.KBOT-2 ) 654 $ TST1 = TST1 + ABS( H( K+2, K+1 ) ) 655 IF( K.LE.KBOT-3 ) 656 $ TST1 = TST1 + ABS( H( K+3, K+1 ) ) 657 IF( K.LE.KBOT-4 ) 658 $ TST1 = TST1 + ABS( H( K+4, K+1 ) ) 659 END IF 660 IF( ABS( H( K+1, K ) ).LE.MAX( SMLNUM, ULP*TST1 ) ) 661 $ THEN 662 H12 = MAX( ABS( H( K+1, K ) ), ABS( H( K, K+1 ) ) ) 663 H21 = MIN( ABS( H( K+1, K ) ), ABS( H( K, K+1 ) ) ) 664 H11 = MAX( ABS( H( K+1, K+1 ) ), 665 $ ABS( H( K, K )-H( K+1, K+1 ) ) ) 666 H22 = MIN( ABS( H( K+1, K+1 ) ), 667 $ ABS( H( K, K )-H( K+1, K+1 ) ) ) 668 SCL = H11 + H12 669 TST2 = H22*( H11 / SCL ) 670 * 671 IF( TST2.EQ.ZERO .OR. H21*( H12 / SCL ).LE. 672 $ MAX( SMLNUM, ULP*TST2 ) )H( K+1, K ) = ZERO 673 END IF 674 END IF 675 130 CONTINUE 676 * 677 * ==== Fill in the last row of each bulge. ==== 678 * 679 MEND = MIN( NBMPS, ( KBOT-KRCOL-1 ) / 3 ) 680 DO 140 M = MTOP, MEND 681 K = KRCOL + 3*( M-1 ) 682 REFSUM = V( 1, M )*V( 3, M )*H( K+4, K+3 ) 683 H( K+4, K+1 ) = -REFSUM 684 H( K+4, K+2 ) = -REFSUM*V( 2, M ) 685 H( K+4, K+3 ) = H( K+4, K+3 ) - REFSUM*V( 3, M ) 686 140 CONTINUE 687 * 688 * ==== End of near-the-diagonal bulge chase. ==== 689 * 690 150 CONTINUE 691 * 692 * ==== Use U (if accumulated) to update far-from-diagonal 693 * . entries in H. If required, use U to update Z as 694 * . well. ==== 695 * 696 IF( ACCUM ) THEN 697 IF( WANTT ) THEN 698 JTOP = 1 699 JBOT = N 700 ELSE 701 JTOP = KTOP 702 JBOT = KBOT 703 END IF 704 IF( ( .NOT.BLK22 ) .OR. ( INCOL.LT.KTOP ) .OR. 705 $ ( NDCOL.GT.KBOT ) .OR. ( NS.LE.2 ) ) THEN 706 * 707 * ==== Updates not exploiting the 2-by-2 block 708 * . structure of U. K1 and NU keep track of 709 * . the location and size of U in the special 710 * . cases of introducing bulges and chasing 711 * . bulges off the bottom. In these special 712 * . cases and in case the number of shifts 713 * . is NS = 2, there is no 2-by-2 block 714 * . structure to exploit. ==== 715 * 716 K1 = MAX( 1, KTOP-INCOL ) 717 NU = ( KDU-MAX( 0, NDCOL-KBOT ) ) - K1 + 1 718 * 719 * ==== Horizontal Multiply ==== 720 * 721 DO 160 JCOL = MIN( NDCOL, KBOT ) + 1, JBOT, NH 722 JLEN = MIN( NH, JBOT-JCOL+1 ) 723 CALL DGEMM( 'C', 'N', NU, JLEN, NU, ONE, U( K1, K1 ), 724 $ LDU, H( INCOL+K1, JCOL ), LDH, ZERO, WH, 725 $ LDWH ) 726 CALL DLACPY( 'ALL', NU, JLEN, WH, LDWH, 727 $ H( INCOL+K1, JCOL ), LDH ) 728 160 CONTINUE 729 * 730 * ==== Vertical multiply ==== 731 * 732 DO 170 JROW = JTOP, MAX( KTOP, INCOL ) - 1, NV 733 JLEN = MIN( NV, MAX( KTOP, INCOL )-JROW ) 734 CALL DGEMM( 'N', 'N', JLEN, NU, NU, ONE, 735 $ H( JROW, INCOL+K1 ), LDH, U( K1, K1 ), 736 $ LDU, ZERO, WV, LDWV ) 737 CALL DLACPY( 'ALL', JLEN, NU, WV, LDWV, 738 $ H( JROW, INCOL+K1 ), LDH ) 739 170 CONTINUE 740 * 741 * ==== Z multiply (also vertical) ==== 742 * 743 IF( WANTZ ) THEN 744 DO 180 JROW = ILOZ, IHIZ, NV 745 JLEN = MIN( NV, IHIZ-JROW+1 ) 746 CALL DGEMM( 'N', 'N', JLEN, NU, NU, ONE, 747 $ Z( JROW, INCOL+K1 ), LDZ, U( K1, K1 ), 748 $ LDU, ZERO, WV, LDWV ) 749 CALL DLACPY( 'ALL', JLEN, NU, WV, LDWV, 750 $ Z( JROW, INCOL+K1 ), LDZ ) 751 180 CONTINUE 752 END IF 753 ELSE 754 * 755 * ==== Updates exploiting U's 2-by-2 block structure. 756 * . (I2, I4, J2, J4 are the last rows and columns 757 * . of the blocks.) ==== 758 * 759 I2 = ( KDU+1 ) / 2 760 I4 = KDU 761 J2 = I4 - I2 762 J4 = KDU 763 * 764 * ==== KZS and KNZ deal with the band of zeros 765 * . along the diagonal of one of the triangular 766 * . blocks. ==== 767 * 768 KZS = ( J4-J2 ) - ( NS+1 ) 769 KNZ = NS + 1 770 * 771 * ==== Horizontal multiply ==== 772 * 773 DO 190 JCOL = MIN( NDCOL, KBOT ) + 1, JBOT, NH 774 JLEN = MIN( NH, JBOT-JCOL+1 ) 775 * 776 * ==== Copy bottom of H to top+KZS of scratch ==== 777 * (The first KZS rows get multiplied by zero.) ==== 778 * 779 CALL DLACPY( 'ALL', KNZ, JLEN, H( INCOL+1+J2, JCOL ), 780 $ LDH, WH( KZS+1, 1 ), LDWH ) 781 * 782 * ==== Multiply by U21**T ==== 783 * 784 CALL DLASET( 'ALL', KZS, JLEN, ZERO, ZERO, WH, LDWH ) 785 CALL DTRMM( 'L', 'U', 'C', 'N', KNZ, JLEN, ONE, 786 $ U( J2+1, 1+KZS ), LDU, WH( KZS+1, 1 ), 787 $ LDWH ) 788 * 789 * ==== Multiply top of H by U11**T ==== 790 * 791 CALL DGEMM( 'C', 'N', I2, JLEN, J2, ONE, U, LDU, 792 $ H( INCOL+1, JCOL ), LDH, ONE, WH, LDWH ) 793 * 794 * ==== Copy top of H to bottom of WH ==== 795 * 796 CALL DLACPY( 'ALL', J2, JLEN, H( INCOL+1, JCOL ), LDH, 797 $ WH( I2+1, 1 ), LDWH ) 798 * 799 * ==== Multiply by U21**T ==== 800 * 801 CALL DTRMM( 'L', 'L', 'C', 'N', J2, JLEN, ONE, 802 $ U( 1, I2+1 ), LDU, WH( I2+1, 1 ), LDWH ) 803 * 804 * ==== Multiply by U22 ==== 805 * 806 CALL DGEMM( 'C', 'N', I4-I2, JLEN, J4-J2, ONE, 807 $ U( J2+1, I2+1 ), LDU, 808 $ H( INCOL+1+J2, JCOL ), LDH, ONE, 809 $ WH( I2+1, 1 ), LDWH ) 810 * 811 * ==== Copy it back ==== 812 * 813 CALL DLACPY( 'ALL', KDU, JLEN, WH, LDWH, 814 $ H( INCOL+1, JCOL ), LDH ) 815 190 CONTINUE 816 * 817 * ==== Vertical multiply ==== 818 * 819 DO 200 JROW = JTOP, MAX( INCOL, KTOP ) - 1, NV 820 JLEN = MIN( NV, MAX( INCOL, KTOP )-JROW ) 821 * 822 * ==== Copy right of H to scratch (the first KZS 823 * . columns get multiplied by zero) ==== 824 * 825 CALL DLACPY( 'ALL', JLEN, KNZ, H( JROW, INCOL+1+J2 ), 826 $ LDH, WV( 1, 1+KZS ), LDWV ) 827 * 828 * ==== Multiply by U21 ==== 829 * 830 CALL DLASET( 'ALL', JLEN, KZS, ZERO, ZERO, WV, LDWV ) 831 CALL DTRMM( 'R', 'U', 'N', 'N', JLEN, KNZ, ONE, 832 $ U( J2+1, 1+KZS ), LDU, WV( 1, 1+KZS ), 833 $ LDWV ) 834 * 835 * ==== Multiply by U11 ==== 836 * 837 CALL DGEMM( 'N', 'N', JLEN, I2, J2, ONE, 838 $ H( JROW, INCOL+1 ), LDH, U, LDU, ONE, WV, 839 $ LDWV ) 840 * 841 * ==== Copy left of H to right of scratch ==== 842 * 843 CALL DLACPY( 'ALL', JLEN, J2, H( JROW, INCOL+1 ), LDH, 844 $ WV( 1, 1+I2 ), LDWV ) 845 * 846 * ==== Multiply by U21 ==== 847 * 848 CALL DTRMM( 'R', 'L', 'N', 'N', JLEN, I4-I2, ONE, 849 $ U( 1, I2+1 ), LDU, WV( 1, 1+I2 ), LDWV ) 850 * 851 * ==== Multiply by U22 ==== 852 * 853 CALL DGEMM( 'N', 'N', JLEN, I4-I2, J4-J2, ONE, 854 $ H( JROW, INCOL+1+J2 ), LDH, 855 $ U( J2+1, I2+1 ), LDU, ONE, WV( 1, 1+I2 ), 856 $ LDWV ) 857 * 858 * ==== Copy it back ==== 859 * 860 CALL DLACPY( 'ALL', JLEN, KDU, WV, LDWV, 861 $ H( JROW, INCOL+1 ), LDH ) 862 200 CONTINUE 863 * 864 * ==== Multiply Z (also vertical) ==== 865 * 866 IF( WANTZ ) THEN 867 DO 210 JROW = ILOZ, IHIZ, NV 868 JLEN = MIN( NV, IHIZ-JROW+1 ) 869 * 870 * ==== Copy right of Z to left of scratch (first 871 * . KZS columns get multiplied by zero) ==== 872 * 873 CALL DLACPY( 'ALL', JLEN, KNZ, 874 $ Z( JROW, INCOL+1+J2 ), LDZ, 875 $ WV( 1, 1+KZS ), LDWV ) 876 * 877 * ==== Multiply by U12 ==== 878 * 879 CALL DLASET( 'ALL', JLEN, KZS, ZERO, ZERO, WV, 880 $ LDWV ) 881 CALL DTRMM( 'R', 'U', 'N', 'N', JLEN, KNZ, ONE, 882 $ U( J2+1, 1+KZS ), LDU, WV( 1, 1+KZS ), 883 $ LDWV ) 884 * 885 * ==== Multiply by U11 ==== 886 * 887 CALL DGEMM( 'N', 'N', JLEN, I2, J2, ONE, 888 $ Z( JROW, INCOL+1 ), LDZ, U, LDU, ONE, 889 $ WV, LDWV ) 890 * 891 * ==== Copy left of Z to right of scratch ==== 892 * 893 CALL DLACPY( 'ALL', JLEN, J2, Z( JROW, INCOL+1 ), 894 $ LDZ, WV( 1, 1+I2 ), LDWV ) 895 * 896 * ==== Multiply by U21 ==== 897 * 898 CALL DTRMM( 'R', 'L', 'N', 'N', JLEN, I4-I2, ONE, 899 $ U( 1, I2+1 ), LDU, WV( 1, 1+I2 ), 900 $ LDWV ) 901 * 902 * ==== Multiply by U22 ==== 903 * 904 CALL DGEMM( 'N', 'N', JLEN, I4-I2, J4-J2, ONE, 905 $ Z( JROW, INCOL+1+J2 ), LDZ, 906 $ U( J2+1, I2+1 ), LDU, ONE, 907 $ WV( 1, 1+I2 ), LDWV ) 908 * 909 * ==== Copy the result back to Z ==== 910 * 911 CALL DLACPY( 'ALL', JLEN, KDU, WV, LDWV, 912 $ Z( JROW, INCOL+1 ), LDZ ) 913 210 CONTINUE 914 END IF 915 END IF 916 END IF 917 220 CONTINUE 918 * 919 * ==== End of DLAQR5 ==== 920 * 921 END