git.sr.ht/~pingoo/stdx@v0.0.0-20240218134121-094174641f6e/crypto/internal/blake3/blake3_amd64.s (about) 1 // Code generated by command: go run gen.go -out blake3_amd64.s. DO NOT EDIT. 2 3 #include "textflag.h" 4 5 DATA iv<>+0(SB)/4, $0x6a09e667 6 DATA iv<>+4(SB)/4, $0xbb67ae85 7 DATA iv<>+8(SB)/4, $0x3c6ef372 8 DATA iv<>+12(SB)/4, $0xa54ff53a 9 GLOBL iv<>(SB), RODATA|NOPTR, $16 10 11 DATA seq<>+0(SB)/4, $0x00000000 12 DATA seq<>+4(SB)/4, $0x00000001 13 DATA seq<>+8(SB)/4, $0x00000002 14 DATA seq<>+12(SB)/4, $0x00000003 15 DATA seq<>+16(SB)/4, $0x00000004 16 DATA seq<>+20(SB)/4, $0x00000005 17 DATA seq<>+24(SB)/4, $0x00000006 18 DATA seq<>+28(SB)/4, $0x00000007 19 DATA seq<>+32(SB)/4, $0x00000008 20 DATA seq<>+36(SB)/4, $0x00000009 21 DATA seq<>+40(SB)/4, $0x0000000a 22 DATA seq<>+44(SB)/4, $0x0000000b 23 DATA seq<>+48(SB)/4, $0x0000000c 24 DATA seq<>+52(SB)/4, $0x0000000d 25 DATA seq<>+56(SB)/4, $0x0000000e 26 DATA seq<>+60(SB)/4, $0x0000000f 27 GLOBL seq<>(SB), RODATA|NOPTR, $64 28 29 DATA seq64<>+0(SB)/8, $0x0000000000000000 30 DATA seq64<>+8(SB)/8, $0x0000000000000001 31 DATA seq64<>+16(SB)/8, $0x0000000000000002 32 DATA seq64<>+24(SB)/8, $0x0000000000000003 33 DATA seq64<>+32(SB)/8, $0x0000000000000004 34 DATA seq64<>+40(SB)/8, $0x0000000000000005 35 DATA seq64<>+48(SB)/8, $0x0000000000000006 36 DATA seq64<>+56(SB)/8, $0x0000000000000007 37 GLOBL seq64<>(SB), RODATA|NOPTR, $64 38 39 DATA shuffle_rot8<>+0(SB)/4, $0x00030201 40 DATA shuffle_rot8<>+4(SB)/4, $0x04070605 41 DATA shuffle_rot8<>+8(SB)/4, $0x080b0a09 42 DATA shuffle_rot8<>+12(SB)/4, $0x0c0f0e0d 43 DATA shuffle_rot8<>+16(SB)/4, $0x10131211 44 DATA shuffle_rot8<>+20(SB)/4, $0x14171615 45 DATA shuffle_rot8<>+24(SB)/4, $0x181b1a19 46 DATA shuffle_rot8<>+28(SB)/4, $0x1c1f1e1d 47 GLOBL shuffle_rot8<>(SB), RODATA|NOPTR, $32 48 49 DATA shuffle_rot16<>+0(SB)/4, $0x01000302 50 DATA shuffle_rot16<>+4(SB)/4, $0x05040706 51 DATA shuffle_rot16<>+8(SB)/4, $0x09080b0a 52 DATA shuffle_rot16<>+12(SB)/4, $0x0d0c0f0e 53 DATA shuffle_rot16<>+16(SB)/4, $0x11101312 54 DATA shuffle_rot16<>+20(SB)/4, $0x15141716 55 DATA shuffle_rot16<>+24(SB)/4, $0x19181b1a 56 DATA shuffle_rot16<>+28(SB)/4, $0x1d1c1f1e 57 GLOBL shuffle_rot16<>(SB), RODATA|NOPTR, $32 58 59 // func compressBlocksAVX512(out *[1024]byte, block *[16]uint32, cv *[8]uint32, counter uint64, blockLen uint32, flags uint32) 60 // Requires: AVX512BW, AVX512F 61 TEXT ·compressBlocksAVX512(SB), NOSPLIT, $0-40 62 MOVQ out+0(FP), AX 63 MOVQ block+8(FP), CX 64 MOVQ cv+16(FP), DX 65 66 // Initialize block vectors 67 VPBROADCASTD (CX), Z1 68 VPBROADCASTD 4(CX), Z3 69 VPBROADCASTD 8(CX), Z5 70 VPBROADCASTD 12(CX), Z7 71 VPBROADCASTD 16(CX), Z9 72 VPBROADCASTD 20(CX), Z11 73 VPBROADCASTD 24(CX), Z13 74 VPBROADCASTD 28(CX), Z15 75 VPBROADCASTD 32(CX), Z17 76 VPBROADCASTD 36(CX), Z19 77 VPBROADCASTD 40(CX), Z21 78 VPBROADCASTD 44(CX), Z23 79 VPBROADCASTD 48(CX), Z25 80 VPBROADCASTD 52(CX), Z27 81 VPBROADCASTD 56(CX), Z29 82 VPBROADCASTD 60(CX), Z31 83 84 // Initialize state vectors 85 VPBROADCASTD (DX), Z0 86 VPBROADCASTD 4(DX), Z2 87 VPBROADCASTD 8(DX), Z4 88 VPBROADCASTD 12(DX), Z6 89 VPBROADCASTD 16(DX), Z8 90 VPBROADCASTD 20(DX), Z10 91 VPBROADCASTD 24(DX), Z12 92 VPBROADCASTD 28(DX), Z14 93 VPBROADCASTD iv<>+0(SB), Z16 94 VPBROADCASTD iv<>+4(SB), Z18 95 VPBROADCASTD iv<>+8(SB), Z20 96 VPBROADCASTD iv<>+12(SB), Z22 97 VPBROADCASTD counter+24(FP), Z24 98 VPADDD seq<>+0(SB), Z24, Z24 99 VPCMPUD $0x01, seq<>+0(SB), Z24, K1 100 VPBROADCASTD counter+28(FP), Z26 101 VPADDD.BCST seq<>+4(SB), Z26, K1, Z26 102 VPBROADCASTD blockLen+32(FP), Z28 103 VPBROADCASTD flags+36(FP), Z30 104 105 // Round 1 106 VPADDD Z0, Z8, Z0 107 VPADDD Z1, Z0, Z0 108 VPXORD Z24, Z0, Z24 109 VPRORD $0x10, Z24, Z24 110 VPADDD Z16, Z24, Z16 111 VPXORD Z8, Z16, Z8 112 VPRORD $0x0c, Z8, Z8 113 VPADDD Z0, Z8, Z0 114 VPADDD Z3, Z0, Z0 115 VPXORD Z24, Z0, Z24 116 VPRORD $0x08, Z24, Z24 117 VPADDD Z16, Z24, Z16 118 VPXORD Z8, Z16, Z8 119 VPRORD $0x07, Z8, Z8 120 VPADDD Z2, Z10, Z2 121 VPADDD Z5, Z2, Z2 122 VPXORD Z26, Z2, Z26 123 VPRORD $0x10, Z26, Z26 124 VPADDD Z18, Z26, Z18 125 VPXORD Z10, Z18, Z10 126 VPRORD $0x0c, Z10, Z10 127 VPADDD Z2, Z10, Z2 128 VPADDD Z7, Z2, Z2 129 VPXORD Z26, Z2, Z26 130 VPRORD $0x08, Z26, Z26 131 VPADDD Z18, Z26, Z18 132 VPXORD Z10, Z18, Z10 133 VPRORD $0x07, Z10, Z10 134 VPADDD Z4, Z12, Z4 135 VPADDD Z9, Z4, Z4 136 VPXORD Z28, Z4, Z28 137 VPRORD $0x10, Z28, Z28 138 VPADDD Z20, Z28, Z20 139 VPXORD Z12, Z20, Z12 140 VPRORD $0x0c, Z12, Z12 141 VPADDD Z4, Z12, Z4 142 VPADDD Z11, Z4, Z4 143 VPXORD Z28, Z4, Z28 144 VPRORD $0x08, Z28, Z28 145 VPADDD Z20, Z28, Z20 146 VPXORD Z12, Z20, Z12 147 VPRORD $0x07, Z12, Z12 148 VPADDD Z6, Z14, Z6 149 VPADDD Z13, Z6, Z6 150 VPXORD Z30, Z6, Z30 151 VPRORD $0x10, Z30, Z30 152 VPADDD Z22, Z30, Z22 153 VPXORD Z14, Z22, Z14 154 VPRORD $0x0c, Z14, Z14 155 VPADDD Z6, Z14, Z6 156 VPADDD Z15, Z6, Z6 157 VPXORD Z30, Z6, Z30 158 VPRORD $0x08, Z30, Z30 159 VPADDD Z22, Z30, Z22 160 VPXORD Z14, Z22, Z14 161 VPRORD $0x07, Z14, Z14 162 VPADDD Z0, Z10, Z0 163 VPADDD Z17, Z0, Z0 164 VPXORD Z30, Z0, Z30 165 VPRORD $0x10, Z30, Z30 166 VPADDD Z20, Z30, Z20 167 VPXORD Z10, Z20, Z10 168 VPRORD $0x0c, Z10, Z10 169 VPADDD Z0, Z10, Z0 170 VPADDD Z19, Z0, Z0 171 VPXORD Z30, Z0, Z30 172 VPRORD $0x08, Z30, Z30 173 VPADDD Z20, Z30, Z20 174 VPXORD Z10, Z20, Z10 175 VPRORD $0x07, Z10, Z10 176 VPADDD Z2, Z12, Z2 177 VPADDD Z21, Z2, Z2 178 VPXORD Z24, Z2, Z24 179 VPRORD $0x10, Z24, Z24 180 VPADDD Z22, Z24, Z22 181 VPXORD Z12, Z22, Z12 182 VPRORD $0x0c, Z12, Z12 183 VPADDD Z2, Z12, Z2 184 VPADDD Z23, Z2, Z2 185 VPXORD Z24, Z2, Z24 186 VPRORD $0x08, Z24, Z24 187 VPADDD Z22, Z24, Z22 188 VPXORD Z12, Z22, Z12 189 VPRORD $0x07, Z12, Z12 190 VPADDD Z4, Z14, Z4 191 VPADDD Z25, Z4, Z4 192 VPXORD Z26, Z4, Z26 193 VPRORD $0x10, Z26, Z26 194 VPADDD Z16, Z26, Z16 195 VPXORD Z14, Z16, Z14 196 VPRORD $0x0c, Z14, Z14 197 VPADDD Z4, Z14, Z4 198 VPADDD Z27, Z4, Z4 199 VPXORD Z26, Z4, Z26 200 VPRORD $0x08, Z26, Z26 201 VPADDD Z16, Z26, Z16 202 VPXORD Z14, Z16, Z14 203 VPRORD $0x07, Z14, Z14 204 VPADDD Z6, Z8, Z6 205 VPADDD Z29, Z6, Z6 206 VPXORD Z28, Z6, Z28 207 VPRORD $0x10, Z28, Z28 208 VPADDD Z18, Z28, Z18 209 VPXORD Z8, Z18, Z8 210 VPRORD $0x0c, Z8, Z8 211 VPADDD Z6, Z8, Z6 212 VPADDD Z31, Z6, Z6 213 VPXORD Z28, Z6, Z28 214 VPRORD $0x08, Z28, Z28 215 VPADDD Z18, Z28, Z18 216 VPXORD Z8, Z18, Z8 217 VPRORD $0x07, Z8, Z8 218 219 // Round 2 220 VPADDD Z0, Z8, Z0 221 VPADDD Z5, Z0, Z0 222 VPXORD Z24, Z0, Z24 223 VPRORD $0x10, Z24, Z24 224 VPADDD Z16, Z24, Z16 225 VPXORD Z8, Z16, Z8 226 VPRORD $0x0c, Z8, Z8 227 VPADDD Z0, Z8, Z0 228 VPADDD Z13, Z0, Z0 229 VPXORD Z24, Z0, Z24 230 VPRORD $0x08, Z24, Z24 231 VPADDD Z16, Z24, Z16 232 VPXORD Z8, Z16, Z8 233 VPRORD $0x07, Z8, Z8 234 VPADDD Z2, Z10, Z2 235 VPADDD Z7, Z2, Z2 236 VPXORD Z26, Z2, Z26 237 VPRORD $0x10, Z26, Z26 238 VPADDD Z18, Z26, Z18 239 VPXORD Z10, Z18, Z10 240 VPRORD $0x0c, Z10, Z10 241 VPADDD Z2, Z10, Z2 242 VPADDD Z21, Z2, Z2 243 VPXORD Z26, Z2, Z26 244 VPRORD $0x08, Z26, Z26 245 VPADDD Z18, Z26, Z18 246 VPXORD Z10, Z18, Z10 247 VPRORD $0x07, Z10, Z10 248 VPADDD Z4, Z12, Z4 249 VPADDD Z15, Z4, Z4 250 VPXORD Z28, Z4, Z28 251 VPRORD $0x10, Z28, Z28 252 VPADDD Z20, Z28, Z20 253 VPXORD Z12, Z20, Z12 254 VPRORD $0x0c, Z12, Z12 255 VPADDD Z4, Z12, Z4 256 VPADDD Z1, Z4, Z4 257 VPXORD Z28, Z4, Z28 258 VPRORD $0x08, Z28, Z28 259 VPADDD Z20, Z28, Z20 260 VPXORD Z12, Z20, Z12 261 VPRORD $0x07, Z12, Z12 262 VPADDD Z6, Z14, Z6 263 VPADDD Z9, Z6, Z6 264 VPXORD Z30, Z6, Z30 265 VPRORD $0x10, Z30, Z30 266 VPADDD Z22, Z30, Z22 267 VPXORD Z14, Z22, Z14 268 VPRORD $0x0c, Z14, Z14 269 VPADDD Z6, Z14, Z6 270 VPADDD Z27, Z6, Z6 271 VPXORD Z30, Z6, Z30 272 VPRORD $0x08, Z30, Z30 273 VPADDD Z22, Z30, Z22 274 VPXORD Z14, Z22, Z14 275 VPRORD $0x07, Z14, Z14 276 VPADDD Z0, Z10, Z0 277 VPADDD Z3, Z0, Z0 278 VPXORD Z30, Z0, Z30 279 VPRORD $0x10, Z30, Z30 280 VPADDD Z20, Z30, Z20 281 VPXORD Z10, Z20, Z10 282 VPRORD $0x0c, Z10, Z10 283 VPADDD Z0, Z10, Z0 284 VPADDD Z23, Z0, Z0 285 VPXORD Z30, Z0, Z30 286 VPRORD $0x08, Z30, Z30 287 VPADDD Z20, Z30, Z20 288 VPXORD Z10, Z20, Z10 289 VPRORD $0x07, Z10, Z10 290 VPADDD Z2, Z12, Z2 291 VPADDD Z25, Z2, Z2 292 VPXORD Z24, Z2, Z24 293 VPRORD $0x10, Z24, Z24 294 VPADDD Z22, Z24, Z22 295 VPXORD Z12, Z22, Z12 296 VPRORD $0x0c, Z12, Z12 297 VPADDD Z2, Z12, Z2 298 VPADDD Z11, Z2, Z2 299 VPXORD Z24, Z2, Z24 300 VPRORD $0x08, Z24, Z24 301 VPADDD Z22, Z24, Z22 302 VPXORD Z12, Z22, Z12 303 VPRORD $0x07, Z12, Z12 304 VPADDD Z4, Z14, Z4 305 VPADDD Z19, Z4, Z4 306 VPXORD Z26, Z4, Z26 307 VPRORD $0x10, Z26, Z26 308 VPADDD Z16, Z26, Z16 309 VPXORD Z14, Z16, Z14 310 VPRORD $0x0c, Z14, Z14 311 VPADDD Z4, Z14, Z4 312 VPADDD Z29, Z4, Z4 313 VPXORD Z26, Z4, Z26 314 VPRORD $0x08, Z26, Z26 315 VPADDD Z16, Z26, Z16 316 VPXORD Z14, Z16, Z14 317 VPRORD $0x07, Z14, Z14 318 VPADDD Z6, Z8, Z6 319 VPADDD Z31, Z6, Z6 320 VPXORD Z28, Z6, Z28 321 VPRORD $0x10, Z28, Z28 322 VPADDD Z18, Z28, Z18 323 VPXORD Z8, Z18, Z8 324 VPRORD $0x0c, Z8, Z8 325 VPADDD Z6, Z8, Z6 326 VPADDD Z17, Z6, Z6 327 VPXORD Z28, Z6, Z28 328 VPRORD $0x08, Z28, Z28 329 VPADDD Z18, Z28, Z18 330 VPXORD Z8, Z18, Z8 331 VPRORD $0x07, Z8, Z8 332 333 // Round 3 334 VPADDD Z0, Z8, Z0 335 VPADDD Z7, Z0, Z0 336 VPXORD Z24, Z0, Z24 337 VPRORD $0x10, Z24, Z24 338 VPADDD Z16, Z24, Z16 339 VPXORD Z8, Z16, Z8 340 VPRORD $0x0c, Z8, Z8 341 VPADDD Z0, Z8, Z0 342 VPADDD Z9, Z0, Z0 343 VPXORD Z24, Z0, Z24 344 VPRORD $0x08, Z24, Z24 345 VPADDD Z16, Z24, Z16 346 VPXORD Z8, Z16, Z8 347 VPRORD $0x07, Z8, Z8 348 VPADDD Z2, Z10, Z2 349 VPADDD Z21, Z2, Z2 350 VPXORD Z26, Z2, Z26 351 VPRORD $0x10, Z26, Z26 352 VPADDD Z18, Z26, Z18 353 VPXORD Z10, Z18, Z10 354 VPRORD $0x0c, Z10, Z10 355 VPADDD Z2, Z10, Z2 356 VPADDD Z25, Z2, Z2 357 VPXORD Z26, Z2, Z26 358 VPRORD $0x08, Z26, Z26 359 VPADDD Z18, Z26, Z18 360 VPXORD Z10, Z18, Z10 361 VPRORD $0x07, Z10, Z10 362 VPADDD Z4, Z12, Z4 363 VPADDD Z27, Z4, Z4 364 VPXORD Z28, Z4, Z28 365 VPRORD $0x10, Z28, Z28 366 VPADDD Z20, Z28, Z20 367 VPXORD Z12, Z20, Z12 368 VPRORD $0x0c, Z12, Z12 369 VPADDD Z4, Z12, Z4 370 VPADDD Z5, Z4, Z4 371 VPXORD Z28, Z4, Z28 372 VPRORD $0x08, Z28, Z28 373 VPADDD Z20, Z28, Z20 374 VPXORD Z12, Z20, Z12 375 VPRORD $0x07, Z12, Z12 376 VPADDD Z6, Z14, Z6 377 VPADDD Z15, Z6, Z6 378 VPXORD Z30, Z6, Z30 379 VPRORD $0x10, Z30, Z30 380 VPADDD Z22, Z30, Z22 381 VPXORD Z14, Z22, Z14 382 VPRORD $0x0c, Z14, Z14 383 VPADDD Z6, Z14, Z6 384 VPADDD Z29, Z6, Z6 385 VPXORD Z30, Z6, Z30 386 VPRORD $0x08, Z30, Z30 387 VPADDD Z22, Z30, Z22 388 VPXORD Z14, Z22, Z14 389 VPRORD $0x07, Z14, Z14 390 VPADDD Z0, Z10, Z0 391 VPADDD Z13, Z0, Z0 392 VPXORD Z30, Z0, Z30 393 VPRORD $0x10, Z30, Z30 394 VPADDD Z20, Z30, Z20 395 VPXORD Z10, Z20, Z10 396 VPRORD $0x0c, Z10, Z10 397 VPADDD Z0, Z10, Z0 398 VPADDD Z11, Z0, Z0 399 VPXORD Z30, Z0, Z30 400 VPRORD $0x08, Z30, Z30 401 VPADDD Z20, Z30, Z20 402 VPXORD Z10, Z20, Z10 403 VPRORD $0x07, Z10, Z10 404 VPADDD Z2, Z12, Z2 405 VPADDD Z19, Z2, Z2 406 VPXORD Z24, Z2, Z24 407 VPRORD $0x10, Z24, Z24 408 VPADDD Z22, Z24, Z22 409 VPXORD Z12, Z22, Z12 410 VPRORD $0x0c, Z12, Z12 411 VPADDD Z2, Z12, Z2 412 VPADDD Z1, Z2, Z2 413 VPXORD Z24, Z2, Z24 414 VPRORD $0x08, Z24, Z24 415 VPADDD Z22, Z24, Z22 416 VPXORD Z12, Z22, Z12 417 VPRORD $0x07, Z12, Z12 418 VPADDD Z4, Z14, Z4 419 VPADDD Z23, Z4, Z4 420 VPXORD Z26, Z4, Z26 421 VPRORD $0x10, Z26, Z26 422 VPADDD Z16, Z26, Z16 423 VPXORD Z14, Z16, Z14 424 VPRORD $0x0c, Z14, Z14 425 VPADDD Z4, Z14, Z4 426 VPADDD Z31, Z4, Z4 427 VPXORD Z26, Z4, Z26 428 VPRORD $0x08, Z26, Z26 429 VPADDD Z16, Z26, Z16 430 VPXORD Z14, Z16, Z14 431 VPRORD $0x07, Z14, Z14 432 VPADDD Z6, Z8, Z6 433 VPADDD Z17, Z6, Z6 434 VPXORD Z28, Z6, Z28 435 VPRORD $0x10, Z28, Z28 436 VPADDD Z18, Z28, Z18 437 VPXORD Z8, Z18, Z8 438 VPRORD $0x0c, Z8, Z8 439 VPADDD Z6, Z8, Z6 440 VPADDD Z3, Z6, Z6 441 VPXORD Z28, Z6, Z28 442 VPRORD $0x08, Z28, Z28 443 VPADDD Z18, Z28, Z18 444 VPXORD Z8, Z18, Z8 445 VPRORD $0x07, Z8, Z8 446 447 // Round 4 448 VPADDD Z0, Z8, Z0 449 VPADDD Z21, Z0, Z0 450 VPXORD Z24, Z0, Z24 451 VPRORD $0x10, Z24, Z24 452 VPADDD Z16, Z24, Z16 453 VPXORD Z8, Z16, Z8 454 VPRORD $0x0c, Z8, Z8 455 VPADDD Z0, Z8, Z0 456 VPADDD Z15, Z0, Z0 457 VPXORD Z24, Z0, Z24 458 VPRORD $0x08, Z24, Z24 459 VPADDD Z16, Z24, Z16 460 VPXORD Z8, Z16, Z8 461 VPRORD $0x07, Z8, Z8 462 VPADDD Z2, Z10, Z2 463 VPADDD Z25, Z2, Z2 464 VPXORD Z26, Z2, Z26 465 VPRORD $0x10, Z26, Z26 466 VPADDD Z18, Z26, Z18 467 VPXORD Z10, Z18, Z10 468 VPRORD $0x0c, Z10, Z10 469 VPADDD Z2, Z10, Z2 470 VPADDD Z19, Z2, Z2 471 VPXORD Z26, Z2, Z26 472 VPRORD $0x08, Z26, Z26 473 VPADDD Z18, Z26, Z18 474 VPXORD Z10, Z18, Z10 475 VPRORD $0x07, Z10, Z10 476 VPADDD Z4, Z12, Z4 477 VPADDD Z29, Z4, Z4 478 VPXORD Z28, Z4, Z28 479 VPRORD $0x10, Z28, Z28 480 VPADDD Z20, Z28, Z20 481 VPXORD Z12, Z20, Z12 482 VPRORD $0x0c, Z12, Z12 483 VPADDD Z4, Z12, Z4 484 VPADDD Z7, Z4, Z4 485 VPXORD Z28, Z4, Z28 486 VPRORD $0x08, Z28, Z28 487 VPADDD Z20, Z28, Z20 488 VPXORD Z12, Z20, Z12 489 VPRORD $0x07, Z12, Z12 490 VPADDD Z6, Z14, Z6 491 VPADDD Z27, Z6, Z6 492 VPXORD Z30, Z6, Z30 493 VPRORD $0x10, Z30, Z30 494 VPADDD Z22, Z30, Z22 495 VPXORD Z14, Z22, Z14 496 VPRORD $0x0c, Z14, Z14 497 VPADDD Z6, Z14, Z6 498 VPADDD Z31, Z6, Z6 499 VPXORD Z30, Z6, Z30 500 VPRORD $0x08, Z30, Z30 501 VPADDD Z22, Z30, Z22 502 VPXORD Z14, Z22, Z14 503 VPRORD $0x07, Z14, Z14 504 VPADDD Z0, Z10, Z0 505 VPADDD Z9, Z0, Z0 506 VPXORD Z30, Z0, Z30 507 VPRORD $0x10, Z30, Z30 508 VPADDD Z20, Z30, Z20 509 VPXORD Z10, Z20, Z10 510 VPRORD $0x0c, Z10, Z10 511 VPADDD Z0, Z10, Z0 512 VPADDD Z1, Z0, Z0 513 VPXORD Z30, Z0, Z30 514 VPRORD $0x08, Z30, Z30 515 VPADDD Z20, Z30, Z20 516 VPXORD Z10, Z20, Z10 517 VPRORD $0x07, Z10, Z10 518 VPADDD Z2, Z12, Z2 519 VPADDD Z23, Z2, Z2 520 VPXORD Z24, Z2, Z24 521 VPRORD $0x10, Z24, Z24 522 VPADDD Z22, Z24, Z22 523 VPXORD Z12, Z22, Z12 524 VPRORD $0x0c, Z12, Z12 525 VPADDD Z2, Z12, Z2 526 VPADDD Z5, Z2, Z2 527 VPXORD Z24, Z2, Z24 528 VPRORD $0x08, Z24, Z24 529 VPADDD Z22, Z24, Z22 530 VPXORD Z12, Z22, Z12 531 VPRORD $0x07, Z12, Z12 532 VPADDD Z4, Z14, Z4 533 VPADDD Z11, Z4, Z4 534 VPXORD Z26, Z4, Z26 535 VPRORD $0x10, Z26, Z26 536 VPADDD Z16, Z26, Z16 537 VPXORD Z14, Z16, Z14 538 VPRORD $0x0c, Z14, Z14 539 VPADDD Z4, Z14, Z4 540 VPADDD Z17, Z4, Z4 541 VPXORD Z26, Z4, Z26 542 VPRORD $0x08, Z26, Z26 543 VPADDD Z16, Z26, Z16 544 VPXORD Z14, Z16, Z14 545 VPRORD $0x07, Z14, Z14 546 VPADDD Z6, Z8, Z6 547 VPADDD Z3, Z6, Z6 548 VPXORD Z28, Z6, Z28 549 VPRORD $0x10, Z28, Z28 550 VPADDD Z18, Z28, Z18 551 VPXORD Z8, Z18, Z8 552 VPRORD $0x0c, Z8, Z8 553 VPADDD Z6, Z8, Z6 554 VPADDD Z13, Z6, Z6 555 VPXORD Z28, Z6, Z28 556 VPRORD $0x08, Z28, Z28 557 VPADDD Z18, Z28, Z18 558 VPXORD Z8, Z18, Z8 559 VPRORD $0x07, Z8, Z8 560 561 // Round 5 562 VPADDD Z0, Z8, Z0 563 VPADDD Z25, Z0, Z0 564 VPXORD Z24, Z0, Z24 565 VPRORD $0x10, Z24, Z24 566 VPADDD Z16, Z24, Z16 567 VPXORD Z8, Z16, Z8 568 VPRORD $0x0c, Z8, Z8 569 VPADDD Z0, Z8, Z0 570 VPADDD Z27, Z0, Z0 571 VPXORD Z24, Z0, Z24 572 VPRORD $0x08, Z24, Z24 573 VPADDD Z16, Z24, Z16 574 VPXORD Z8, Z16, Z8 575 VPRORD $0x07, Z8, Z8 576 VPADDD Z2, Z10, Z2 577 VPADDD Z19, Z2, Z2 578 VPXORD Z26, Z2, Z26 579 VPRORD $0x10, Z26, Z26 580 VPADDD Z18, Z26, Z18 581 VPXORD Z10, Z18, Z10 582 VPRORD $0x0c, Z10, Z10 583 VPADDD Z2, Z10, Z2 584 VPADDD Z23, Z2, Z2 585 VPXORD Z26, Z2, Z26 586 VPRORD $0x08, Z26, Z26 587 VPADDD Z18, Z26, Z18 588 VPXORD Z10, Z18, Z10 589 VPRORD $0x07, Z10, Z10 590 VPADDD Z4, Z12, Z4 591 VPADDD Z31, Z4, Z4 592 VPXORD Z28, Z4, Z28 593 VPRORD $0x10, Z28, Z28 594 VPADDD Z20, Z28, Z20 595 VPXORD Z12, Z20, Z12 596 VPRORD $0x0c, Z12, Z12 597 VPADDD Z4, Z12, Z4 598 VPADDD Z21, Z4, Z4 599 VPXORD Z28, Z4, Z28 600 VPRORD $0x08, Z28, Z28 601 VPADDD Z20, Z28, Z20 602 VPXORD Z12, Z20, Z12 603 VPRORD $0x07, Z12, Z12 604 VPADDD Z6, Z14, Z6 605 VPADDD Z29, Z6, Z6 606 VPXORD Z30, Z6, Z30 607 VPRORD $0x10, Z30, Z30 608 VPADDD Z22, Z30, Z22 609 VPXORD Z14, Z22, Z14 610 VPRORD $0x0c, Z14, Z14 611 VPADDD Z6, Z14, Z6 612 VPADDD Z17, Z6, Z6 613 VPXORD Z30, Z6, Z30 614 VPRORD $0x08, Z30, Z30 615 VPADDD Z22, Z30, Z22 616 VPXORD Z14, Z22, Z14 617 VPRORD $0x07, Z14, Z14 618 VPADDD Z0, Z10, Z0 619 VPADDD Z15, Z0, Z0 620 VPXORD Z30, Z0, Z30 621 VPRORD $0x10, Z30, Z30 622 VPADDD Z20, Z30, Z20 623 VPXORD Z10, Z20, Z10 624 VPRORD $0x0c, Z10, Z10 625 VPADDD Z0, Z10, Z0 626 VPADDD Z5, Z0, Z0 627 VPXORD Z30, Z0, Z30 628 VPRORD $0x08, Z30, Z30 629 VPADDD Z20, Z30, Z20 630 VPXORD Z10, Z20, Z10 631 VPRORD $0x07, Z10, Z10 632 VPADDD Z2, Z12, Z2 633 VPADDD Z11, Z2, Z2 634 VPXORD Z24, Z2, Z24 635 VPRORD $0x10, Z24, Z24 636 VPADDD Z22, Z24, Z22 637 VPXORD Z12, Z22, Z12 638 VPRORD $0x0c, Z12, Z12 639 VPADDD Z2, Z12, Z2 640 VPADDD Z7, Z2, Z2 641 VPXORD Z24, Z2, Z24 642 VPRORD $0x08, Z24, Z24 643 VPADDD Z22, Z24, Z22 644 VPXORD Z12, Z22, Z12 645 VPRORD $0x07, Z12, Z12 646 VPADDD Z4, Z14, Z4 647 VPADDD Z1, Z4, Z4 648 VPXORD Z26, Z4, Z26 649 VPRORD $0x10, Z26, Z26 650 VPADDD Z16, Z26, Z16 651 VPXORD Z14, Z16, Z14 652 VPRORD $0x0c, Z14, Z14 653 VPADDD Z4, Z14, Z4 654 VPADDD Z3, Z4, Z4 655 VPXORD Z26, Z4, Z26 656 VPRORD $0x08, Z26, Z26 657 VPADDD Z16, Z26, Z16 658 VPXORD Z14, Z16, Z14 659 VPRORD $0x07, Z14, Z14 660 VPADDD Z6, Z8, Z6 661 VPADDD Z13, Z6, Z6 662 VPXORD Z28, Z6, Z28 663 VPRORD $0x10, Z28, Z28 664 VPADDD Z18, Z28, Z18 665 VPXORD Z8, Z18, Z8 666 VPRORD $0x0c, Z8, Z8 667 VPADDD Z6, Z8, Z6 668 VPADDD Z9, Z6, Z6 669 VPXORD Z28, Z6, Z28 670 VPRORD $0x08, Z28, Z28 671 VPADDD Z18, Z28, Z18 672 VPXORD Z8, Z18, Z8 673 VPRORD $0x07, Z8, Z8 674 675 // Round 6 676 VPADDD Z0, Z8, Z0 677 VPADDD Z19, Z0, Z0 678 VPXORD Z24, Z0, Z24 679 VPRORD $0x10, Z24, Z24 680 VPADDD Z16, Z24, Z16 681 VPXORD Z8, Z16, Z8 682 VPRORD $0x0c, Z8, Z8 683 VPADDD Z0, Z8, Z0 684 VPADDD Z29, Z0, Z0 685 VPXORD Z24, Z0, Z24 686 VPRORD $0x08, Z24, Z24 687 VPADDD Z16, Z24, Z16 688 VPXORD Z8, Z16, Z8 689 VPRORD $0x07, Z8, Z8 690 VPADDD Z2, Z10, Z2 691 VPADDD Z23, Z2, Z2 692 VPXORD Z26, Z2, Z26 693 VPRORD $0x10, Z26, Z26 694 VPADDD Z18, Z26, Z18 695 VPXORD Z10, Z18, Z10 696 VPRORD $0x0c, Z10, Z10 697 VPADDD Z2, Z10, Z2 698 VPADDD Z11, Z2, Z2 699 VPXORD Z26, Z2, Z26 700 VPRORD $0x08, Z26, Z26 701 VPADDD Z18, Z26, Z18 702 VPXORD Z10, Z18, Z10 703 VPRORD $0x07, Z10, Z10 704 VPADDD Z4, Z12, Z4 705 VPADDD Z17, Z4, Z4 706 VPXORD Z28, Z4, Z28 707 VPRORD $0x10, Z28, Z28 708 VPADDD Z20, Z28, Z20 709 VPXORD Z12, Z20, Z12 710 VPRORD $0x0c, Z12, Z12 711 VPADDD Z4, Z12, Z4 712 VPADDD Z25, Z4, Z4 713 VPXORD Z28, Z4, Z28 714 VPRORD $0x08, Z28, Z28 715 VPADDD Z20, Z28, Z20 716 VPXORD Z12, Z20, Z12 717 VPRORD $0x07, Z12, Z12 718 VPADDD Z6, Z14, Z6 719 VPADDD Z31, Z6, Z6 720 VPXORD Z30, Z6, Z30 721 VPRORD $0x10, Z30, Z30 722 VPADDD Z22, Z30, Z22 723 VPXORD Z14, Z22, Z14 724 VPRORD $0x0c, Z14, Z14 725 VPADDD Z6, Z14, Z6 726 VPADDD Z3, Z6, Z6 727 VPXORD Z30, Z6, Z30 728 VPRORD $0x08, Z30, Z30 729 VPADDD Z22, Z30, Z22 730 VPXORD Z14, Z22, Z14 731 VPRORD $0x07, Z14, Z14 732 VPADDD Z0, Z10, Z0 733 VPADDD Z27, Z0, Z0 734 VPXORD Z30, Z0, Z30 735 VPRORD $0x10, Z30, Z30 736 VPADDD Z20, Z30, Z20 737 VPXORD Z10, Z20, Z10 738 VPRORD $0x0c, Z10, Z10 739 VPADDD Z0, Z10, Z0 740 VPADDD Z7, Z0, Z0 741 VPXORD Z30, Z0, Z30 742 VPRORD $0x08, Z30, Z30 743 VPADDD Z20, Z30, Z20 744 VPXORD Z10, Z20, Z10 745 VPRORD $0x07, Z10, Z10 746 VPADDD Z2, Z12, Z2 747 VPADDD Z1, Z2, Z2 748 VPXORD Z24, Z2, Z24 749 VPRORD $0x10, Z24, Z24 750 VPADDD Z22, Z24, Z22 751 VPXORD Z12, Z22, Z12 752 VPRORD $0x0c, Z12, Z12 753 VPADDD Z2, Z12, Z2 754 VPADDD Z21, Z2, Z2 755 VPXORD Z24, Z2, Z24 756 VPRORD $0x08, Z24, Z24 757 VPADDD Z22, Z24, Z22 758 VPXORD Z12, Z22, Z12 759 VPRORD $0x07, Z12, Z12 760 VPADDD Z4, Z14, Z4 761 VPADDD Z5, Z4, Z4 762 VPXORD Z26, Z4, Z26 763 VPRORD $0x10, Z26, Z26 764 VPADDD Z16, Z26, Z16 765 VPXORD Z14, Z16, Z14 766 VPRORD $0x0c, Z14, Z14 767 VPADDD Z4, Z14, Z4 768 VPADDD Z13, Z4, Z4 769 VPXORD Z26, Z4, Z26 770 VPRORD $0x08, Z26, Z26 771 VPADDD Z16, Z26, Z16 772 VPXORD Z14, Z16, Z14 773 VPRORD $0x07, Z14, Z14 774 VPADDD Z6, Z8, Z6 775 VPADDD Z9, Z6, Z6 776 VPXORD Z28, Z6, Z28 777 VPRORD $0x10, Z28, Z28 778 VPADDD Z18, Z28, Z18 779 VPXORD Z8, Z18, Z8 780 VPRORD $0x0c, Z8, Z8 781 VPADDD Z6, Z8, Z6 782 VPADDD Z15, Z6, Z6 783 VPXORD Z28, Z6, Z28 784 VPRORD $0x08, Z28, Z28 785 VPADDD Z18, Z28, Z18 786 VPXORD Z8, Z18, Z8 787 VPRORD $0x07, Z8, Z8 788 789 // Round 7 790 VPADDD Z0, Z8, Z0 791 VPADDD Z23, Z0, Z0 792 VPXORD Z24, Z0, Z24 793 VPRORD $0x10, Z24, Z24 794 VPADDD Z16, Z24, Z16 795 VPXORD Z8, Z16, Z8 796 VPRORD $0x0c, Z8, Z8 797 VPADDD Z0, Z8, Z0 798 VPADDD Z31, Z0, Z0 799 VPXORD Z24, Z0, Z24 800 VPRORD $0x08, Z24, Z24 801 VPADDD Z16, Z24, Z16 802 VPXORD Z8, Z16, Z8 803 VPRORD $0x07, Z8, Z8 804 VPADDD Z2, Z10, Z2 805 VPADDD Z11, Z2, Z2 806 VPXORD Z26, Z2, Z26 807 VPRORD $0x10, Z26, Z26 808 VPADDD Z18, Z26, Z18 809 VPXORD Z10, Z18, Z10 810 VPRORD $0x0c, Z10, Z10 811 VPADDD Z2, Z10, Z2 812 VPADDD Z1, Z2, Z2 813 VPXORD Z26, Z2, Z26 814 VPRORD $0x08, Z26, Z26 815 VPADDD Z18, Z26, Z18 816 VPXORD Z10, Z18, Z10 817 VPRORD $0x07, Z10, Z10 818 VPADDD Z4, Z12, Z4 819 VPADDD Z3, Z4, Z4 820 VPXORD Z28, Z4, Z28 821 VPRORD $0x10, Z28, Z28 822 VPADDD Z20, Z28, Z20 823 VPXORD Z12, Z20, Z12 824 VPRORD $0x0c, Z12, Z12 825 VPADDD Z4, Z12, Z4 826 VPADDD Z19, Z4, Z4 827 VPXORD Z28, Z4, Z28 828 VPRORD $0x08, Z28, Z28 829 VPADDD Z20, Z28, Z20 830 VPXORD Z12, Z20, Z12 831 VPRORD $0x07, Z12, Z12 832 VPADDD Z6, Z14, Z6 833 VPADDD Z17, Z6, Z6 834 VPXORD Z30, Z6, Z30 835 VPRORD $0x10, Z30, Z30 836 VPADDD Z22, Z30, Z22 837 VPXORD Z14, Z22, Z14 838 VPRORD $0x0c, Z14, Z14 839 VPADDD Z6, Z14, Z6 840 VPADDD Z13, Z6, Z6 841 VPXORD Z30, Z6, Z30 842 VPRORD $0x08, Z30, Z30 843 VPADDD Z22, Z30, Z22 844 VPXORD Z14, Z22, Z14 845 VPRORD $0x07, Z14, Z14 846 VPADDD Z0, Z10, Z0 847 VPADDD Z29, Z0, Z0 848 VPXORD Z30, Z0, Z30 849 VPRORD $0x10, Z30, Z30 850 VPADDD Z20, Z30, Z20 851 VPXORD Z10, Z20, Z10 852 VPRORD $0x0c, Z10, Z10 853 VPADDD Z0, Z10, Z0 854 VPADDD Z21, Z0, Z0 855 VPXORD Z30, Z0, Z30 856 VPRORD $0x08, Z30, Z30 857 VPADDD Z20, Z30, Z20 858 VPXORD Z10, Z20, Z10 859 VPRORD $0x07, Z10, Z10 860 VPADDD Z2, Z12, Z2 861 VPADDD Z5, Z2, Z2 862 VPXORD Z24, Z2, Z24 863 VPRORD $0x10, Z24, Z24 864 VPADDD Z22, Z24, Z22 865 VPXORD Z12, Z22, Z12 866 VPRORD $0x0c, Z12, Z12 867 VPADDD Z2, Z12, Z2 868 VPADDD Z25, Z2, Z2 869 VPXORD Z24, Z2, Z24 870 VPRORD $0x08, Z24, Z24 871 VPADDD Z22, Z24, Z22 872 VPXORD Z12, Z22, Z12 873 VPRORD $0x07, Z12, Z12 874 VPADDD Z4, Z14, Z4 875 VPADDD Z7, Z4, Z4 876 VPXORD Z26, Z4, Z26 877 VPRORD $0x10, Z26, Z26 878 VPADDD Z16, Z26, Z16 879 VPXORD Z14, Z16, Z14 880 VPRORD $0x0c, Z14, Z14 881 VPADDD Z4, Z14, Z4 882 VPADDD Z9, Z4, Z4 883 VPXORD Z26, Z4, Z26 884 VPRORD $0x08, Z26, Z26 885 VPADDD Z16, Z26, Z16 886 VPXORD Z14, Z16, Z14 887 VPRORD $0x07, Z14, Z14 888 VPADDD Z6, Z8, Z6 889 VPADDD Z15, Z6, Z6 890 VPXORD Z28, Z6, Z28 891 VPRORD $0x10, Z28, Z28 892 VPADDD Z18, Z28, Z18 893 VPXORD Z8, Z18, Z8 894 VPRORD $0x0c, Z8, Z8 895 VPADDD Z6, Z8, Z6 896 VPADDD Z27, Z6, Z6 897 VPXORD Z28, Z6, Z28 898 VPRORD $0x08, Z28, Z28 899 VPADDD Z18, Z28, Z18 900 VPXORD Z8, Z18, Z8 901 VPRORD $0x07, Z8, Z8 902 903 // Finalize CVs 904 VPXORD Z0, Z16, Z0 905 VPXORD Z2, Z18, Z2 906 VPXORD Z4, Z20, Z4 907 VPXORD Z6, Z22, Z6 908 VPXORD Z8, Z24, Z8 909 VPXORD Z10, Z26, Z10 910 VPXORD Z12, Z28, Z12 911 VPXORD Z14, Z30, Z14 912 VPXORD.BCST (DX), Z16, Z16 913 VPXORD.BCST 4(DX), Z18, Z18 914 VPXORD.BCST 8(DX), Z20, Z20 915 VPXORD.BCST 12(DX), Z22, Z22 916 VPXORD.BCST 16(DX), Z24, Z24 917 VPXORD.BCST 20(DX), Z26, Z26 918 VPXORD.BCST 24(DX), Z28, Z28 919 VPXORD.BCST 28(DX), Z30, Z30 920 VMOVDQU32 seq<>+0(SB), Z1 921 VPSLLD $0x06, Z1, Z1 922 KXNORD K1, K1, K1 923 VPSCATTERDD Z0, K1, (AX)(Z1*1) 924 KXNORD K1, K1, K1 925 VPSCATTERDD Z2, K1, 4(AX)(Z1*1) 926 KXNORD K1, K1, K1 927 VPSCATTERDD Z4, K1, 8(AX)(Z1*1) 928 KXNORD K1, K1, K1 929 VPSCATTERDD Z6, K1, 12(AX)(Z1*1) 930 KXNORD K1, K1, K1 931 VPSCATTERDD Z8, K1, 16(AX)(Z1*1) 932 KXNORD K1, K1, K1 933 VPSCATTERDD Z10, K1, 20(AX)(Z1*1) 934 KXNORD K1, K1, K1 935 VPSCATTERDD Z12, K1, 24(AX)(Z1*1) 936 KXNORD K1, K1, K1 937 VPSCATTERDD Z14, K1, 28(AX)(Z1*1) 938 KXNORD K1, K1, K1 939 VPSCATTERDD Z16, K1, 32(AX)(Z1*1) 940 KXNORD K1, K1, K1 941 VPSCATTERDD Z18, K1, 36(AX)(Z1*1) 942 KXNORD K1, K1, K1 943 VPSCATTERDD Z20, K1, 40(AX)(Z1*1) 944 KXNORD K1, K1, K1 945 VPSCATTERDD Z22, K1, 44(AX)(Z1*1) 946 KXNORD K1, K1, K1 947 VPSCATTERDD Z24, K1, 48(AX)(Z1*1) 948 KXNORD K1, K1, K1 949 VPSCATTERDD Z26, K1, 52(AX)(Z1*1) 950 KXNORD K1, K1, K1 951 VPSCATTERDD Z28, K1, 56(AX)(Z1*1) 952 KXNORD K1, K1, K1 953 VPSCATTERDD Z30, K1, 60(AX)(Z1*1) 954 RET 955 956 // func compressChunksAVX512(cvs *[16][8]uint32, buf *[16384]byte, key *[8]uint32, counter uint64, flags uint32) 957 // Requires: AVX512BW, AVX512F 958 TEXT ·compressChunksAVX512(SB), NOSPLIT, $192-36 959 MOVQ cvs+0(FP), AX 960 MOVQ buf+8(FP), CX 961 MOVQ key+16(FP), DX 962 963 // Initialize counter 964 VPBROADCASTD counter+24(FP), Z0 965 VPADDD seq<>+0(SB), Z0, Z0 966 VPCMPUD $0x01, seq<>+0(SB), Z0, K1 967 VPBROADCASTD counter+28(FP), Z2 968 VPADDD.BCST seq<>+4(SB), Z2, K1, Z2 969 VMOVDQU32 Z0, (SP) 970 VMOVDQU32 Z2, 64(SP) 971 972 // Initialize flags 973 VPBROADCASTD flags+32(FP), Z0 974 VMOVDQU32 Z0, 128(SP) 975 ORL $0x01, 128(SP) 976 ORL $0x02, 188(SP) 977 978 // Load key 979 VPBROADCASTD (DX), Z0 980 VPBROADCASTD 4(DX), Z2 981 VPBROADCASTD 8(DX), Z4 982 VPBROADCASTD 12(DX), Z6 983 VPBROADCASTD 16(DX), Z8 984 VPBROADCASTD 20(DX), Z10 985 VPBROADCASTD 24(DX), Z12 986 VPBROADCASTD 28(DX), Z14 987 988 // Loop index 989 XORQ DX, DX 990 991 loop: 992 // Load transposed block 993 VMOVDQU32 seq<>+0(SB), Z16 994 VPSLLD $0x0a, Z16, Z16 995 KXNORD K1, K1, K1 996 VPGATHERDD (CX)(Z16*1), K1, Z1 997 KXNORD K1, K1, K1 998 VPGATHERDD 4(CX)(Z16*1), K1, Z3 999 KXNORD K1, K1, K1 1000 VPGATHERDD 8(CX)(Z16*1), K1, Z5 1001 KXNORD K1, K1, K1 1002 VPGATHERDD 12(CX)(Z16*1), K1, Z7 1003 KXNORD K1, K1, K1 1004 VPGATHERDD 16(CX)(Z16*1), K1, Z9 1005 KXNORD K1, K1, K1 1006 VPGATHERDD 20(CX)(Z16*1), K1, Z11 1007 KXNORD K1, K1, K1 1008 VPGATHERDD 24(CX)(Z16*1), K1, Z13 1009 KXNORD K1, K1, K1 1010 VPGATHERDD 28(CX)(Z16*1), K1, Z15 1011 KXNORD K1, K1, K1 1012 VPGATHERDD 32(CX)(Z16*1), K1, Z17 1013 KXNORD K1, K1, K1 1014 VPGATHERDD 36(CX)(Z16*1), K1, Z19 1015 KXNORD K1, K1, K1 1016 VPGATHERDD 40(CX)(Z16*1), K1, Z21 1017 KXNORD K1, K1, K1 1018 VPGATHERDD 44(CX)(Z16*1), K1, Z23 1019 KXNORD K1, K1, K1 1020 VPGATHERDD 48(CX)(Z16*1), K1, Z25 1021 KXNORD K1, K1, K1 1022 VPGATHERDD 52(CX)(Z16*1), K1, Z27 1023 KXNORD K1, K1, K1 1024 VPGATHERDD 56(CX)(Z16*1), K1, Z29 1025 KXNORD K1, K1, K1 1026 VPGATHERDD 60(CX)(Z16*1), K1, Z31 1027 ADDQ $0x40, CX 1028 1029 // Reload state vectors (other than CVs) 1030 VPBROADCASTD iv<>+0(SB), Z16 1031 VPBROADCASTD iv<>+4(SB), Z18 1032 VPBROADCASTD iv<>+8(SB), Z20 1033 VPBROADCASTD iv<>+12(SB), Z22 1034 VMOVDQU32 (SP), Z24 1035 VMOVDQU32 64(SP), Z26 1036 VPBROADCASTD seq<>+4(SB), Z28 1037 VPSLLD $0x06, Z28, Z28 1038 VPBROADCASTD 128(SP)(DX*4), Z30 1039 1040 // Round 1 1041 VPADDD Z0, Z8, Z0 1042 VPADDD Z1, Z0, Z0 1043 VPXORD Z24, Z0, Z24 1044 VPRORD $0x10, Z24, Z24 1045 VPADDD Z16, Z24, Z16 1046 VPXORD Z8, Z16, Z8 1047 VPRORD $0x0c, Z8, Z8 1048 VPADDD Z0, Z8, Z0 1049 VPADDD Z3, Z0, Z0 1050 VPXORD Z24, Z0, Z24 1051 VPRORD $0x08, Z24, Z24 1052 VPADDD Z16, Z24, Z16 1053 VPXORD Z8, Z16, Z8 1054 VPRORD $0x07, Z8, Z8 1055 VPADDD Z2, Z10, Z2 1056 VPADDD Z5, Z2, Z2 1057 VPXORD Z26, Z2, Z26 1058 VPRORD $0x10, Z26, Z26 1059 VPADDD Z18, Z26, Z18 1060 VPXORD Z10, Z18, Z10 1061 VPRORD $0x0c, Z10, Z10 1062 VPADDD Z2, Z10, Z2 1063 VPADDD Z7, Z2, Z2 1064 VPXORD Z26, Z2, Z26 1065 VPRORD $0x08, Z26, Z26 1066 VPADDD Z18, Z26, Z18 1067 VPXORD Z10, Z18, Z10 1068 VPRORD $0x07, Z10, Z10 1069 VPADDD Z4, Z12, Z4 1070 VPADDD Z9, Z4, Z4 1071 VPXORD Z28, Z4, Z28 1072 VPRORD $0x10, Z28, Z28 1073 VPADDD Z20, Z28, Z20 1074 VPXORD Z12, Z20, Z12 1075 VPRORD $0x0c, Z12, Z12 1076 VPADDD Z4, Z12, Z4 1077 VPADDD Z11, Z4, Z4 1078 VPXORD Z28, Z4, Z28 1079 VPRORD $0x08, Z28, Z28 1080 VPADDD Z20, Z28, Z20 1081 VPXORD Z12, Z20, Z12 1082 VPRORD $0x07, Z12, Z12 1083 VPADDD Z6, Z14, Z6 1084 VPADDD Z13, Z6, Z6 1085 VPXORD Z30, Z6, Z30 1086 VPRORD $0x10, Z30, Z30 1087 VPADDD Z22, Z30, Z22 1088 VPXORD Z14, Z22, Z14 1089 VPRORD $0x0c, Z14, Z14 1090 VPADDD Z6, Z14, Z6 1091 VPADDD Z15, Z6, Z6 1092 VPXORD Z30, Z6, Z30 1093 VPRORD $0x08, Z30, Z30 1094 VPADDD Z22, Z30, Z22 1095 VPXORD Z14, Z22, Z14 1096 VPRORD $0x07, Z14, Z14 1097 VPADDD Z0, Z10, Z0 1098 VPADDD Z17, Z0, Z0 1099 VPXORD Z30, Z0, Z30 1100 VPRORD $0x10, Z30, Z30 1101 VPADDD Z20, Z30, Z20 1102 VPXORD Z10, Z20, Z10 1103 VPRORD $0x0c, Z10, Z10 1104 VPADDD Z0, Z10, Z0 1105 VPADDD Z19, Z0, Z0 1106 VPXORD Z30, Z0, Z30 1107 VPRORD $0x08, Z30, Z30 1108 VPADDD Z20, Z30, Z20 1109 VPXORD Z10, Z20, Z10 1110 VPRORD $0x07, Z10, Z10 1111 VPADDD Z2, Z12, Z2 1112 VPADDD Z21, Z2, Z2 1113 VPXORD Z24, Z2, Z24 1114 VPRORD $0x10, Z24, Z24 1115 VPADDD Z22, Z24, Z22 1116 VPXORD Z12, Z22, Z12 1117 VPRORD $0x0c, Z12, Z12 1118 VPADDD Z2, Z12, Z2 1119 VPADDD Z23, Z2, Z2 1120 VPXORD Z24, Z2, Z24 1121 VPRORD $0x08, Z24, Z24 1122 VPADDD Z22, Z24, Z22 1123 VPXORD Z12, Z22, Z12 1124 VPRORD $0x07, Z12, Z12 1125 VPADDD Z4, Z14, Z4 1126 VPADDD Z25, Z4, Z4 1127 VPXORD Z26, Z4, Z26 1128 VPRORD $0x10, Z26, Z26 1129 VPADDD Z16, Z26, Z16 1130 VPXORD Z14, Z16, Z14 1131 VPRORD $0x0c, Z14, Z14 1132 VPADDD Z4, Z14, Z4 1133 VPADDD Z27, Z4, Z4 1134 VPXORD Z26, Z4, Z26 1135 VPRORD $0x08, Z26, Z26 1136 VPADDD Z16, Z26, Z16 1137 VPXORD Z14, Z16, Z14 1138 VPRORD $0x07, Z14, Z14 1139 VPADDD Z6, Z8, Z6 1140 VPADDD Z29, Z6, Z6 1141 VPXORD Z28, Z6, Z28 1142 VPRORD $0x10, Z28, Z28 1143 VPADDD Z18, Z28, Z18 1144 VPXORD Z8, Z18, Z8 1145 VPRORD $0x0c, Z8, Z8 1146 VPADDD Z6, Z8, Z6 1147 VPADDD Z31, Z6, Z6 1148 VPXORD Z28, Z6, Z28 1149 VPRORD $0x08, Z28, Z28 1150 VPADDD Z18, Z28, Z18 1151 VPXORD Z8, Z18, Z8 1152 VPRORD $0x07, Z8, Z8 1153 1154 // Round 2 1155 VPADDD Z0, Z8, Z0 1156 VPADDD Z5, Z0, Z0 1157 VPXORD Z24, Z0, Z24 1158 VPRORD $0x10, Z24, Z24 1159 VPADDD Z16, Z24, Z16 1160 VPXORD Z8, Z16, Z8 1161 VPRORD $0x0c, Z8, Z8 1162 VPADDD Z0, Z8, Z0 1163 VPADDD Z13, Z0, Z0 1164 VPXORD Z24, Z0, Z24 1165 VPRORD $0x08, Z24, Z24 1166 VPADDD Z16, Z24, Z16 1167 VPXORD Z8, Z16, Z8 1168 VPRORD $0x07, Z8, Z8 1169 VPADDD Z2, Z10, Z2 1170 VPADDD Z7, Z2, Z2 1171 VPXORD Z26, Z2, Z26 1172 VPRORD $0x10, Z26, Z26 1173 VPADDD Z18, Z26, Z18 1174 VPXORD Z10, Z18, Z10 1175 VPRORD $0x0c, Z10, Z10 1176 VPADDD Z2, Z10, Z2 1177 VPADDD Z21, Z2, Z2 1178 VPXORD Z26, Z2, Z26 1179 VPRORD $0x08, Z26, Z26 1180 VPADDD Z18, Z26, Z18 1181 VPXORD Z10, Z18, Z10 1182 VPRORD $0x07, Z10, Z10 1183 VPADDD Z4, Z12, Z4 1184 VPADDD Z15, Z4, Z4 1185 VPXORD Z28, Z4, Z28 1186 VPRORD $0x10, Z28, Z28 1187 VPADDD Z20, Z28, Z20 1188 VPXORD Z12, Z20, Z12 1189 VPRORD $0x0c, Z12, Z12 1190 VPADDD Z4, Z12, Z4 1191 VPADDD Z1, Z4, Z4 1192 VPXORD Z28, Z4, Z28 1193 VPRORD $0x08, Z28, Z28 1194 VPADDD Z20, Z28, Z20 1195 VPXORD Z12, Z20, Z12 1196 VPRORD $0x07, Z12, Z12 1197 VPADDD Z6, Z14, Z6 1198 VPADDD Z9, Z6, Z6 1199 VPXORD Z30, Z6, Z30 1200 VPRORD $0x10, Z30, Z30 1201 VPADDD Z22, Z30, Z22 1202 VPXORD Z14, Z22, Z14 1203 VPRORD $0x0c, Z14, Z14 1204 VPADDD Z6, Z14, Z6 1205 VPADDD Z27, Z6, Z6 1206 VPXORD Z30, Z6, Z30 1207 VPRORD $0x08, Z30, Z30 1208 VPADDD Z22, Z30, Z22 1209 VPXORD Z14, Z22, Z14 1210 VPRORD $0x07, Z14, Z14 1211 VPADDD Z0, Z10, Z0 1212 VPADDD Z3, Z0, Z0 1213 VPXORD Z30, Z0, Z30 1214 VPRORD $0x10, Z30, Z30 1215 VPADDD Z20, Z30, Z20 1216 VPXORD Z10, Z20, Z10 1217 VPRORD $0x0c, Z10, Z10 1218 VPADDD Z0, Z10, Z0 1219 VPADDD Z23, Z0, Z0 1220 VPXORD Z30, Z0, Z30 1221 VPRORD $0x08, Z30, Z30 1222 VPADDD Z20, Z30, Z20 1223 VPXORD Z10, Z20, Z10 1224 VPRORD $0x07, Z10, Z10 1225 VPADDD Z2, Z12, Z2 1226 VPADDD Z25, Z2, Z2 1227 VPXORD Z24, Z2, Z24 1228 VPRORD $0x10, Z24, Z24 1229 VPADDD Z22, Z24, Z22 1230 VPXORD Z12, Z22, Z12 1231 VPRORD $0x0c, Z12, Z12 1232 VPADDD Z2, Z12, Z2 1233 VPADDD Z11, Z2, Z2 1234 VPXORD Z24, Z2, Z24 1235 VPRORD $0x08, Z24, Z24 1236 VPADDD Z22, Z24, Z22 1237 VPXORD Z12, Z22, Z12 1238 VPRORD $0x07, Z12, Z12 1239 VPADDD Z4, Z14, Z4 1240 VPADDD Z19, Z4, Z4 1241 VPXORD Z26, Z4, Z26 1242 VPRORD $0x10, Z26, Z26 1243 VPADDD Z16, Z26, Z16 1244 VPXORD Z14, Z16, Z14 1245 VPRORD $0x0c, Z14, Z14 1246 VPADDD Z4, Z14, Z4 1247 VPADDD Z29, Z4, Z4 1248 VPXORD Z26, Z4, Z26 1249 VPRORD $0x08, Z26, Z26 1250 VPADDD Z16, Z26, Z16 1251 VPXORD Z14, Z16, Z14 1252 VPRORD $0x07, Z14, Z14 1253 VPADDD Z6, Z8, Z6 1254 VPADDD Z31, Z6, Z6 1255 VPXORD Z28, Z6, Z28 1256 VPRORD $0x10, Z28, Z28 1257 VPADDD Z18, Z28, Z18 1258 VPXORD Z8, Z18, Z8 1259 VPRORD $0x0c, Z8, Z8 1260 VPADDD Z6, Z8, Z6 1261 VPADDD Z17, Z6, Z6 1262 VPXORD Z28, Z6, Z28 1263 VPRORD $0x08, Z28, Z28 1264 VPADDD Z18, Z28, Z18 1265 VPXORD Z8, Z18, Z8 1266 VPRORD $0x07, Z8, Z8 1267 1268 // Round 3 1269 VPADDD Z0, Z8, Z0 1270 VPADDD Z7, Z0, Z0 1271 VPXORD Z24, Z0, Z24 1272 VPRORD $0x10, Z24, Z24 1273 VPADDD Z16, Z24, Z16 1274 VPXORD Z8, Z16, Z8 1275 VPRORD $0x0c, Z8, Z8 1276 VPADDD Z0, Z8, Z0 1277 VPADDD Z9, Z0, Z0 1278 VPXORD Z24, Z0, Z24 1279 VPRORD $0x08, Z24, Z24 1280 VPADDD Z16, Z24, Z16 1281 VPXORD Z8, Z16, Z8 1282 VPRORD $0x07, Z8, Z8 1283 VPADDD Z2, Z10, Z2 1284 VPADDD Z21, Z2, Z2 1285 VPXORD Z26, Z2, Z26 1286 VPRORD $0x10, Z26, Z26 1287 VPADDD Z18, Z26, Z18 1288 VPXORD Z10, Z18, Z10 1289 VPRORD $0x0c, Z10, Z10 1290 VPADDD Z2, Z10, Z2 1291 VPADDD Z25, Z2, Z2 1292 VPXORD Z26, Z2, Z26 1293 VPRORD $0x08, Z26, Z26 1294 VPADDD Z18, Z26, Z18 1295 VPXORD Z10, Z18, Z10 1296 VPRORD $0x07, Z10, Z10 1297 VPADDD Z4, Z12, Z4 1298 VPADDD Z27, Z4, Z4 1299 VPXORD Z28, Z4, Z28 1300 VPRORD $0x10, Z28, Z28 1301 VPADDD Z20, Z28, Z20 1302 VPXORD Z12, Z20, Z12 1303 VPRORD $0x0c, Z12, Z12 1304 VPADDD Z4, Z12, Z4 1305 VPADDD Z5, Z4, Z4 1306 VPXORD Z28, Z4, Z28 1307 VPRORD $0x08, Z28, Z28 1308 VPADDD Z20, Z28, Z20 1309 VPXORD Z12, Z20, Z12 1310 VPRORD $0x07, Z12, Z12 1311 VPADDD Z6, Z14, Z6 1312 VPADDD Z15, Z6, Z6 1313 VPXORD Z30, Z6, Z30 1314 VPRORD $0x10, Z30, Z30 1315 VPADDD Z22, Z30, Z22 1316 VPXORD Z14, Z22, Z14 1317 VPRORD $0x0c, Z14, Z14 1318 VPADDD Z6, Z14, Z6 1319 VPADDD Z29, Z6, Z6 1320 VPXORD Z30, Z6, Z30 1321 VPRORD $0x08, Z30, Z30 1322 VPADDD Z22, Z30, Z22 1323 VPXORD Z14, Z22, Z14 1324 VPRORD $0x07, Z14, Z14 1325 VPADDD Z0, Z10, Z0 1326 VPADDD Z13, Z0, Z0 1327 VPXORD Z30, Z0, Z30 1328 VPRORD $0x10, Z30, Z30 1329 VPADDD Z20, Z30, Z20 1330 VPXORD Z10, Z20, Z10 1331 VPRORD $0x0c, Z10, Z10 1332 VPADDD Z0, Z10, Z0 1333 VPADDD Z11, Z0, Z0 1334 VPXORD Z30, Z0, Z30 1335 VPRORD $0x08, Z30, Z30 1336 VPADDD Z20, Z30, Z20 1337 VPXORD Z10, Z20, Z10 1338 VPRORD $0x07, Z10, Z10 1339 VPADDD Z2, Z12, Z2 1340 VPADDD Z19, Z2, Z2 1341 VPXORD Z24, Z2, Z24 1342 VPRORD $0x10, Z24, Z24 1343 VPADDD Z22, Z24, Z22 1344 VPXORD Z12, Z22, Z12 1345 VPRORD $0x0c, Z12, Z12 1346 VPADDD Z2, Z12, Z2 1347 VPADDD Z1, Z2, Z2 1348 VPXORD Z24, Z2, Z24 1349 VPRORD $0x08, Z24, Z24 1350 VPADDD Z22, Z24, Z22 1351 VPXORD Z12, Z22, Z12 1352 VPRORD $0x07, Z12, Z12 1353 VPADDD Z4, Z14, Z4 1354 VPADDD Z23, Z4, Z4 1355 VPXORD Z26, Z4, Z26 1356 VPRORD $0x10, Z26, Z26 1357 VPADDD Z16, Z26, Z16 1358 VPXORD Z14, Z16, Z14 1359 VPRORD $0x0c, Z14, Z14 1360 VPADDD Z4, Z14, Z4 1361 VPADDD Z31, Z4, Z4 1362 VPXORD Z26, Z4, Z26 1363 VPRORD $0x08, Z26, Z26 1364 VPADDD Z16, Z26, Z16 1365 VPXORD Z14, Z16, Z14 1366 VPRORD $0x07, Z14, Z14 1367 VPADDD Z6, Z8, Z6 1368 VPADDD Z17, Z6, Z6 1369 VPXORD Z28, Z6, Z28 1370 VPRORD $0x10, Z28, Z28 1371 VPADDD Z18, Z28, Z18 1372 VPXORD Z8, Z18, Z8 1373 VPRORD $0x0c, Z8, Z8 1374 VPADDD Z6, Z8, Z6 1375 VPADDD Z3, Z6, Z6 1376 VPXORD Z28, Z6, Z28 1377 VPRORD $0x08, Z28, Z28 1378 VPADDD Z18, Z28, Z18 1379 VPXORD Z8, Z18, Z8 1380 VPRORD $0x07, Z8, Z8 1381 1382 // Round 4 1383 VPADDD Z0, Z8, Z0 1384 VPADDD Z21, Z0, Z0 1385 VPXORD Z24, Z0, Z24 1386 VPRORD $0x10, Z24, Z24 1387 VPADDD Z16, Z24, Z16 1388 VPXORD Z8, Z16, Z8 1389 VPRORD $0x0c, Z8, Z8 1390 VPADDD Z0, Z8, Z0 1391 VPADDD Z15, Z0, Z0 1392 VPXORD Z24, Z0, Z24 1393 VPRORD $0x08, Z24, Z24 1394 VPADDD Z16, Z24, Z16 1395 VPXORD Z8, Z16, Z8 1396 VPRORD $0x07, Z8, Z8 1397 VPADDD Z2, Z10, Z2 1398 VPADDD Z25, Z2, Z2 1399 VPXORD Z26, Z2, Z26 1400 VPRORD $0x10, Z26, Z26 1401 VPADDD Z18, Z26, Z18 1402 VPXORD Z10, Z18, Z10 1403 VPRORD $0x0c, Z10, Z10 1404 VPADDD Z2, Z10, Z2 1405 VPADDD Z19, Z2, Z2 1406 VPXORD Z26, Z2, Z26 1407 VPRORD $0x08, Z26, Z26 1408 VPADDD Z18, Z26, Z18 1409 VPXORD Z10, Z18, Z10 1410 VPRORD $0x07, Z10, Z10 1411 VPADDD Z4, Z12, Z4 1412 VPADDD Z29, Z4, Z4 1413 VPXORD Z28, Z4, Z28 1414 VPRORD $0x10, Z28, Z28 1415 VPADDD Z20, Z28, Z20 1416 VPXORD Z12, Z20, Z12 1417 VPRORD $0x0c, Z12, Z12 1418 VPADDD Z4, Z12, Z4 1419 VPADDD Z7, Z4, Z4 1420 VPXORD Z28, Z4, Z28 1421 VPRORD $0x08, Z28, Z28 1422 VPADDD Z20, Z28, Z20 1423 VPXORD Z12, Z20, Z12 1424 VPRORD $0x07, Z12, Z12 1425 VPADDD Z6, Z14, Z6 1426 VPADDD Z27, Z6, Z6 1427 VPXORD Z30, Z6, Z30 1428 VPRORD $0x10, Z30, Z30 1429 VPADDD Z22, Z30, Z22 1430 VPXORD Z14, Z22, Z14 1431 VPRORD $0x0c, Z14, Z14 1432 VPADDD Z6, Z14, Z6 1433 VPADDD Z31, Z6, Z6 1434 VPXORD Z30, Z6, Z30 1435 VPRORD $0x08, Z30, Z30 1436 VPADDD Z22, Z30, Z22 1437 VPXORD Z14, Z22, Z14 1438 VPRORD $0x07, Z14, Z14 1439 VPADDD Z0, Z10, Z0 1440 VPADDD Z9, Z0, Z0 1441 VPXORD Z30, Z0, Z30 1442 VPRORD $0x10, Z30, Z30 1443 VPADDD Z20, Z30, Z20 1444 VPXORD Z10, Z20, Z10 1445 VPRORD $0x0c, Z10, Z10 1446 VPADDD Z0, Z10, Z0 1447 VPADDD Z1, Z0, Z0 1448 VPXORD Z30, Z0, Z30 1449 VPRORD $0x08, Z30, Z30 1450 VPADDD Z20, Z30, Z20 1451 VPXORD Z10, Z20, Z10 1452 VPRORD $0x07, Z10, Z10 1453 VPADDD Z2, Z12, Z2 1454 VPADDD Z23, Z2, Z2 1455 VPXORD Z24, Z2, Z24 1456 VPRORD $0x10, Z24, Z24 1457 VPADDD Z22, Z24, Z22 1458 VPXORD Z12, Z22, Z12 1459 VPRORD $0x0c, Z12, Z12 1460 VPADDD Z2, Z12, Z2 1461 VPADDD Z5, Z2, Z2 1462 VPXORD Z24, Z2, Z24 1463 VPRORD $0x08, Z24, Z24 1464 VPADDD Z22, Z24, Z22 1465 VPXORD Z12, Z22, Z12 1466 VPRORD $0x07, Z12, Z12 1467 VPADDD Z4, Z14, Z4 1468 VPADDD Z11, Z4, Z4 1469 VPXORD Z26, Z4, Z26 1470 VPRORD $0x10, Z26, Z26 1471 VPADDD Z16, Z26, Z16 1472 VPXORD Z14, Z16, Z14 1473 VPRORD $0x0c, Z14, Z14 1474 VPADDD Z4, Z14, Z4 1475 VPADDD Z17, Z4, Z4 1476 VPXORD Z26, Z4, Z26 1477 VPRORD $0x08, Z26, Z26 1478 VPADDD Z16, Z26, Z16 1479 VPXORD Z14, Z16, Z14 1480 VPRORD $0x07, Z14, Z14 1481 VPADDD Z6, Z8, Z6 1482 VPADDD Z3, Z6, Z6 1483 VPXORD Z28, Z6, Z28 1484 VPRORD $0x10, Z28, Z28 1485 VPADDD Z18, Z28, Z18 1486 VPXORD Z8, Z18, Z8 1487 VPRORD $0x0c, Z8, Z8 1488 VPADDD Z6, Z8, Z6 1489 VPADDD Z13, Z6, Z6 1490 VPXORD Z28, Z6, Z28 1491 VPRORD $0x08, Z28, Z28 1492 VPADDD Z18, Z28, Z18 1493 VPXORD Z8, Z18, Z8 1494 VPRORD $0x07, Z8, Z8 1495 1496 // Round 5 1497 VPADDD Z0, Z8, Z0 1498 VPADDD Z25, Z0, Z0 1499 VPXORD Z24, Z0, Z24 1500 VPRORD $0x10, Z24, Z24 1501 VPADDD Z16, Z24, Z16 1502 VPXORD Z8, Z16, Z8 1503 VPRORD $0x0c, Z8, Z8 1504 VPADDD Z0, Z8, Z0 1505 VPADDD Z27, Z0, Z0 1506 VPXORD Z24, Z0, Z24 1507 VPRORD $0x08, Z24, Z24 1508 VPADDD Z16, Z24, Z16 1509 VPXORD Z8, Z16, Z8 1510 VPRORD $0x07, Z8, Z8 1511 VPADDD Z2, Z10, Z2 1512 VPADDD Z19, Z2, Z2 1513 VPXORD Z26, Z2, Z26 1514 VPRORD $0x10, Z26, Z26 1515 VPADDD Z18, Z26, Z18 1516 VPXORD Z10, Z18, Z10 1517 VPRORD $0x0c, Z10, Z10 1518 VPADDD Z2, Z10, Z2 1519 VPADDD Z23, Z2, Z2 1520 VPXORD Z26, Z2, Z26 1521 VPRORD $0x08, Z26, Z26 1522 VPADDD Z18, Z26, Z18 1523 VPXORD Z10, Z18, Z10 1524 VPRORD $0x07, Z10, Z10 1525 VPADDD Z4, Z12, Z4 1526 VPADDD Z31, Z4, Z4 1527 VPXORD Z28, Z4, Z28 1528 VPRORD $0x10, Z28, Z28 1529 VPADDD Z20, Z28, Z20 1530 VPXORD Z12, Z20, Z12 1531 VPRORD $0x0c, Z12, Z12 1532 VPADDD Z4, Z12, Z4 1533 VPADDD Z21, Z4, Z4 1534 VPXORD Z28, Z4, Z28 1535 VPRORD $0x08, Z28, Z28 1536 VPADDD Z20, Z28, Z20 1537 VPXORD Z12, Z20, Z12 1538 VPRORD $0x07, Z12, Z12 1539 VPADDD Z6, Z14, Z6 1540 VPADDD Z29, Z6, Z6 1541 VPXORD Z30, Z6, Z30 1542 VPRORD $0x10, Z30, Z30 1543 VPADDD Z22, Z30, Z22 1544 VPXORD Z14, Z22, Z14 1545 VPRORD $0x0c, Z14, Z14 1546 VPADDD Z6, Z14, Z6 1547 VPADDD Z17, Z6, Z6 1548 VPXORD Z30, Z6, Z30 1549 VPRORD $0x08, Z30, Z30 1550 VPADDD Z22, Z30, Z22 1551 VPXORD Z14, Z22, Z14 1552 VPRORD $0x07, Z14, Z14 1553 VPADDD Z0, Z10, Z0 1554 VPADDD Z15, Z0, Z0 1555 VPXORD Z30, Z0, Z30 1556 VPRORD $0x10, Z30, Z30 1557 VPADDD Z20, Z30, Z20 1558 VPXORD Z10, Z20, Z10 1559 VPRORD $0x0c, Z10, Z10 1560 VPADDD Z0, Z10, Z0 1561 VPADDD Z5, Z0, Z0 1562 VPXORD Z30, Z0, Z30 1563 VPRORD $0x08, Z30, Z30 1564 VPADDD Z20, Z30, Z20 1565 VPXORD Z10, Z20, Z10 1566 VPRORD $0x07, Z10, Z10 1567 VPADDD Z2, Z12, Z2 1568 VPADDD Z11, Z2, Z2 1569 VPXORD Z24, Z2, Z24 1570 VPRORD $0x10, Z24, Z24 1571 VPADDD Z22, Z24, Z22 1572 VPXORD Z12, Z22, Z12 1573 VPRORD $0x0c, Z12, Z12 1574 VPADDD Z2, Z12, Z2 1575 VPADDD Z7, Z2, Z2 1576 VPXORD Z24, Z2, Z24 1577 VPRORD $0x08, Z24, Z24 1578 VPADDD Z22, Z24, Z22 1579 VPXORD Z12, Z22, Z12 1580 VPRORD $0x07, Z12, Z12 1581 VPADDD Z4, Z14, Z4 1582 VPADDD Z1, Z4, Z4 1583 VPXORD Z26, Z4, Z26 1584 VPRORD $0x10, Z26, Z26 1585 VPADDD Z16, Z26, Z16 1586 VPXORD Z14, Z16, Z14 1587 VPRORD $0x0c, Z14, Z14 1588 VPADDD Z4, Z14, Z4 1589 VPADDD Z3, Z4, Z4 1590 VPXORD Z26, Z4, Z26 1591 VPRORD $0x08, Z26, Z26 1592 VPADDD Z16, Z26, Z16 1593 VPXORD Z14, Z16, Z14 1594 VPRORD $0x07, Z14, Z14 1595 VPADDD Z6, Z8, Z6 1596 VPADDD Z13, Z6, Z6 1597 VPXORD Z28, Z6, Z28 1598 VPRORD $0x10, Z28, Z28 1599 VPADDD Z18, Z28, Z18 1600 VPXORD Z8, Z18, Z8 1601 VPRORD $0x0c, Z8, Z8 1602 VPADDD Z6, Z8, Z6 1603 VPADDD Z9, Z6, Z6 1604 VPXORD Z28, Z6, Z28 1605 VPRORD $0x08, Z28, Z28 1606 VPADDD Z18, Z28, Z18 1607 VPXORD Z8, Z18, Z8 1608 VPRORD $0x07, Z8, Z8 1609 1610 // Round 6 1611 VPADDD Z0, Z8, Z0 1612 VPADDD Z19, Z0, Z0 1613 VPXORD Z24, Z0, Z24 1614 VPRORD $0x10, Z24, Z24 1615 VPADDD Z16, Z24, Z16 1616 VPXORD Z8, Z16, Z8 1617 VPRORD $0x0c, Z8, Z8 1618 VPADDD Z0, Z8, Z0 1619 VPADDD Z29, Z0, Z0 1620 VPXORD Z24, Z0, Z24 1621 VPRORD $0x08, Z24, Z24 1622 VPADDD Z16, Z24, Z16 1623 VPXORD Z8, Z16, Z8 1624 VPRORD $0x07, Z8, Z8 1625 VPADDD Z2, Z10, Z2 1626 VPADDD Z23, Z2, Z2 1627 VPXORD Z26, Z2, Z26 1628 VPRORD $0x10, Z26, Z26 1629 VPADDD Z18, Z26, Z18 1630 VPXORD Z10, Z18, Z10 1631 VPRORD $0x0c, Z10, Z10 1632 VPADDD Z2, Z10, Z2 1633 VPADDD Z11, Z2, Z2 1634 VPXORD Z26, Z2, Z26 1635 VPRORD $0x08, Z26, Z26 1636 VPADDD Z18, Z26, Z18 1637 VPXORD Z10, Z18, Z10 1638 VPRORD $0x07, Z10, Z10 1639 VPADDD Z4, Z12, Z4 1640 VPADDD Z17, Z4, Z4 1641 VPXORD Z28, Z4, Z28 1642 VPRORD $0x10, Z28, Z28 1643 VPADDD Z20, Z28, Z20 1644 VPXORD Z12, Z20, Z12 1645 VPRORD $0x0c, Z12, Z12 1646 VPADDD Z4, Z12, Z4 1647 VPADDD Z25, Z4, Z4 1648 VPXORD Z28, Z4, Z28 1649 VPRORD $0x08, Z28, Z28 1650 VPADDD Z20, Z28, Z20 1651 VPXORD Z12, Z20, Z12 1652 VPRORD $0x07, Z12, Z12 1653 VPADDD Z6, Z14, Z6 1654 VPADDD Z31, Z6, Z6 1655 VPXORD Z30, Z6, Z30 1656 VPRORD $0x10, Z30, Z30 1657 VPADDD Z22, Z30, Z22 1658 VPXORD Z14, Z22, Z14 1659 VPRORD $0x0c, Z14, Z14 1660 VPADDD Z6, Z14, Z6 1661 VPADDD Z3, Z6, Z6 1662 VPXORD Z30, Z6, Z30 1663 VPRORD $0x08, Z30, Z30 1664 VPADDD Z22, Z30, Z22 1665 VPXORD Z14, Z22, Z14 1666 VPRORD $0x07, Z14, Z14 1667 VPADDD Z0, Z10, Z0 1668 VPADDD Z27, Z0, Z0 1669 VPXORD Z30, Z0, Z30 1670 VPRORD $0x10, Z30, Z30 1671 VPADDD Z20, Z30, Z20 1672 VPXORD Z10, Z20, Z10 1673 VPRORD $0x0c, Z10, Z10 1674 VPADDD Z0, Z10, Z0 1675 VPADDD Z7, Z0, Z0 1676 VPXORD Z30, Z0, Z30 1677 VPRORD $0x08, Z30, Z30 1678 VPADDD Z20, Z30, Z20 1679 VPXORD Z10, Z20, Z10 1680 VPRORD $0x07, Z10, Z10 1681 VPADDD Z2, Z12, Z2 1682 VPADDD Z1, Z2, Z2 1683 VPXORD Z24, Z2, Z24 1684 VPRORD $0x10, Z24, Z24 1685 VPADDD Z22, Z24, Z22 1686 VPXORD Z12, Z22, Z12 1687 VPRORD $0x0c, Z12, Z12 1688 VPADDD Z2, Z12, Z2 1689 VPADDD Z21, Z2, Z2 1690 VPXORD Z24, Z2, Z24 1691 VPRORD $0x08, Z24, Z24 1692 VPADDD Z22, Z24, Z22 1693 VPXORD Z12, Z22, Z12 1694 VPRORD $0x07, Z12, Z12 1695 VPADDD Z4, Z14, Z4 1696 VPADDD Z5, Z4, Z4 1697 VPXORD Z26, Z4, Z26 1698 VPRORD $0x10, Z26, Z26 1699 VPADDD Z16, Z26, Z16 1700 VPXORD Z14, Z16, Z14 1701 VPRORD $0x0c, Z14, Z14 1702 VPADDD Z4, Z14, Z4 1703 VPADDD Z13, Z4, Z4 1704 VPXORD Z26, Z4, Z26 1705 VPRORD $0x08, Z26, Z26 1706 VPADDD Z16, Z26, Z16 1707 VPXORD Z14, Z16, Z14 1708 VPRORD $0x07, Z14, Z14 1709 VPADDD Z6, Z8, Z6 1710 VPADDD Z9, Z6, Z6 1711 VPXORD Z28, Z6, Z28 1712 VPRORD $0x10, Z28, Z28 1713 VPADDD Z18, Z28, Z18 1714 VPXORD Z8, Z18, Z8 1715 VPRORD $0x0c, Z8, Z8 1716 VPADDD Z6, Z8, Z6 1717 VPADDD Z15, Z6, Z6 1718 VPXORD Z28, Z6, Z28 1719 VPRORD $0x08, Z28, Z28 1720 VPADDD Z18, Z28, Z18 1721 VPXORD Z8, Z18, Z8 1722 VPRORD $0x07, Z8, Z8 1723 1724 // Round 7 1725 VPADDD Z0, Z8, Z0 1726 VPADDD Z23, Z0, Z0 1727 VPXORD Z24, Z0, Z24 1728 VPRORD $0x10, Z24, Z24 1729 VPADDD Z16, Z24, Z16 1730 VPXORD Z8, Z16, Z8 1731 VPRORD $0x0c, Z8, Z8 1732 VPADDD Z0, Z8, Z0 1733 VPADDD Z31, Z0, Z0 1734 VPXORD Z24, Z0, Z24 1735 VPRORD $0x08, Z24, Z24 1736 VPADDD Z16, Z24, Z16 1737 VPXORD Z8, Z16, Z8 1738 VPRORD $0x07, Z8, Z8 1739 VPADDD Z2, Z10, Z2 1740 VPADDD Z11, Z2, Z2 1741 VPXORD Z26, Z2, Z26 1742 VPRORD $0x10, Z26, Z26 1743 VPADDD Z18, Z26, Z18 1744 VPXORD Z10, Z18, Z10 1745 VPRORD $0x0c, Z10, Z10 1746 VPADDD Z2, Z10, Z2 1747 VPADDD Z1, Z2, Z2 1748 VPXORD Z26, Z2, Z26 1749 VPRORD $0x08, Z26, Z26 1750 VPADDD Z18, Z26, Z18 1751 VPXORD Z10, Z18, Z10 1752 VPRORD $0x07, Z10, Z10 1753 VPADDD Z4, Z12, Z4 1754 VPADDD Z3, Z4, Z4 1755 VPXORD Z28, Z4, Z28 1756 VPRORD $0x10, Z28, Z28 1757 VPADDD Z20, Z28, Z20 1758 VPXORD Z12, Z20, Z12 1759 VPRORD $0x0c, Z12, Z12 1760 VPADDD Z4, Z12, Z4 1761 VPADDD Z19, Z4, Z4 1762 VPXORD Z28, Z4, Z28 1763 VPRORD $0x08, Z28, Z28 1764 VPADDD Z20, Z28, Z20 1765 VPXORD Z12, Z20, Z12 1766 VPRORD $0x07, Z12, Z12 1767 VPADDD Z6, Z14, Z6 1768 VPADDD Z17, Z6, Z6 1769 VPXORD Z30, Z6, Z30 1770 VPRORD $0x10, Z30, Z30 1771 VPADDD Z22, Z30, Z22 1772 VPXORD Z14, Z22, Z14 1773 VPRORD $0x0c, Z14, Z14 1774 VPADDD Z6, Z14, Z6 1775 VPADDD Z13, Z6, Z6 1776 VPXORD Z30, Z6, Z30 1777 VPRORD $0x08, Z30, Z30 1778 VPADDD Z22, Z30, Z22 1779 VPXORD Z14, Z22, Z14 1780 VPRORD $0x07, Z14, Z14 1781 VPADDD Z0, Z10, Z0 1782 VPADDD Z29, Z0, Z0 1783 VPXORD Z30, Z0, Z30 1784 VPRORD $0x10, Z30, Z30 1785 VPADDD Z20, Z30, Z20 1786 VPXORD Z10, Z20, Z10 1787 VPRORD $0x0c, Z10, Z10 1788 VPADDD Z0, Z10, Z0 1789 VPADDD Z21, Z0, Z0 1790 VPXORD Z30, Z0, Z30 1791 VPRORD $0x08, Z30, Z30 1792 VPADDD Z20, Z30, Z20 1793 VPXORD Z10, Z20, Z10 1794 VPRORD $0x07, Z10, Z10 1795 VPADDD Z2, Z12, Z2 1796 VPADDD Z5, Z2, Z2 1797 VPXORD Z24, Z2, Z24 1798 VPRORD $0x10, Z24, Z24 1799 VPADDD Z22, Z24, Z22 1800 VPXORD Z12, Z22, Z12 1801 VPRORD $0x0c, Z12, Z12 1802 VPADDD Z2, Z12, Z2 1803 VPADDD Z25, Z2, Z2 1804 VPXORD Z24, Z2, Z24 1805 VPRORD $0x08, Z24, Z24 1806 VPADDD Z22, Z24, Z22 1807 VPXORD Z12, Z22, Z12 1808 VPRORD $0x07, Z12, Z12 1809 VPADDD Z4, Z14, Z4 1810 VPADDD Z7, Z4, Z4 1811 VPXORD Z26, Z4, Z26 1812 VPRORD $0x10, Z26, Z26 1813 VPADDD Z16, Z26, Z16 1814 VPXORD Z14, Z16, Z14 1815 VPRORD $0x0c, Z14, Z14 1816 VPADDD Z4, Z14, Z4 1817 VPADDD Z9, Z4, Z4 1818 VPXORD Z26, Z4, Z26 1819 VPRORD $0x08, Z26, Z26 1820 VPADDD Z16, Z26, Z16 1821 VPXORD Z14, Z16, Z14 1822 VPRORD $0x07, Z14, Z14 1823 VPADDD Z6, Z8, Z6 1824 VPADDD Z15, Z6, Z6 1825 VPXORD Z28, Z6, Z28 1826 VPRORD $0x10, Z28, Z28 1827 VPADDD Z18, Z28, Z18 1828 VPXORD Z8, Z18, Z8 1829 VPRORD $0x0c, Z8, Z8 1830 VPADDD Z6, Z8, Z6 1831 VPADDD Z27, Z6, Z6 1832 VPXORD Z28, Z6, Z28 1833 VPRORD $0x08, Z28, Z28 1834 VPADDD Z18, Z28, Z18 1835 VPXORD Z8, Z18, Z8 1836 VPRORD $0x07, Z8, Z8 1837 1838 // Finalize CVs 1839 VPXORD Z0, Z16, Z0 1840 VPXORD Z2, Z18, Z2 1841 VPXORD Z4, Z20, Z4 1842 VPXORD Z6, Z22, Z6 1843 VPXORD Z8, Z24, Z8 1844 VPXORD Z10, Z26, Z10 1845 VPXORD Z12, Z28, Z12 1846 VPXORD Z14, Z30, Z14 1847 1848 // Loop 1849 INCQ DX 1850 CMPQ DX, $0x00000010 1851 JNE loop 1852 1853 // Finished; transpose CVs 1854 VMOVDQU32 seq<>+0(SB), Z16 1855 VPSLLD $0x05, Z16, Z16 1856 KXNORD K1, K1, K1 1857 VPSCATTERDD Z0, K1, (AX)(Z16*1) 1858 KXNORD K1, K1, K1 1859 VPSCATTERDD Z2, K1, 4(AX)(Z16*1) 1860 KXNORD K1, K1, K1 1861 VPSCATTERDD Z4, K1, 8(AX)(Z16*1) 1862 KXNORD K1, K1, K1 1863 VPSCATTERDD Z6, K1, 12(AX)(Z16*1) 1864 KXNORD K1, K1, K1 1865 VPSCATTERDD Z8, K1, 16(AX)(Z16*1) 1866 KXNORD K1, K1, K1 1867 VPSCATTERDD Z10, K1, 20(AX)(Z16*1) 1868 KXNORD K1, K1, K1 1869 VPSCATTERDD Z12, K1, 24(AX)(Z16*1) 1870 KXNORD K1, K1, K1 1871 VPSCATTERDD Z14, K1, 28(AX)(Z16*1) 1872 RET 1873 1874 // func compressBlocksAVX2(out *[512]byte, block *[16]uint32, cv *[8]uint32, counter uint64, blockLen uint32, flags uint32) 1875 // Requires: AVX, AVX2 1876 TEXT ·compressBlocksAVX2(SB), NOSPLIT, $544-40 1877 MOVQ out+0(FP), AX 1878 MOVQ block+8(FP), CX 1879 MOVQ cv+16(FP), DX 1880 1881 // Load block 1882 VPBROADCASTD (CX), Y0 1883 VMOVDQU Y0, (SP) 1884 VPBROADCASTD 4(CX), Y0 1885 VMOVDQU Y0, 32(SP) 1886 VPBROADCASTD 8(CX), Y0 1887 VMOVDQU Y0, 64(SP) 1888 VPBROADCASTD 12(CX), Y0 1889 VMOVDQU Y0, 96(SP) 1890 VPBROADCASTD 16(CX), Y0 1891 VMOVDQU Y0, 128(SP) 1892 VPBROADCASTD 20(CX), Y0 1893 VMOVDQU Y0, 160(SP) 1894 VPBROADCASTD 24(CX), Y0 1895 VMOVDQU Y0, 192(SP) 1896 VPBROADCASTD 28(CX), Y0 1897 VMOVDQU Y0, 224(SP) 1898 VPBROADCASTD 32(CX), Y0 1899 VMOVDQU Y0, 256(SP) 1900 VPBROADCASTD 36(CX), Y0 1901 VMOVDQU Y0, 288(SP) 1902 VPBROADCASTD 40(CX), Y0 1903 VMOVDQU Y0, 320(SP) 1904 VPBROADCASTD 44(CX), Y0 1905 VMOVDQU Y0, 352(SP) 1906 VPBROADCASTD 48(CX), Y0 1907 VMOVDQU Y0, 384(SP) 1908 VPBROADCASTD 52(CX), Y0 1909 VMOVDQU Y0, 416(SP) 1910 VPBROADCASTD 56(CX), Y0 1911 VMOVDQU Y0, 448(SP) 1912 VPBROADCASTD 60(CX), Y0 1913 VMOVDQU Y0, 480(SP) 1914 1915 // Initialize state vectors 1916 VPBROADCASTD (DX), Y0 1917 VPBROADCASTD 4(DX), Y1 1918 VPBROADCASTD 8(DX), Y2 1919 VPBROADCASTD 12(DX), Y3 1920 VPBROADCASTD 16(DX), Y4 1921 VPBROADCASTD 20(DX), Y5 1922 VPBROADCASTD 24(DX), Y6 1923 VPBROADCASTD 28(DX), Y7 1924 VPBROADCASTD iv<>+0(SB), Y8 1925 VPBROADCASTD iv<>+4(SB), Y9 1926 VPBROADCASTD iv<>+8(SB), Y10 1927 VPBROADCASTD iv<>+12(SB), Y11 1928 VPBROADCASTQ counter+24(FP), Y12 1929 VPBROADCASTQ counter+24(FP), Y13 1930 VPADDQ seq64<>+0(SB), Y12, Y12 1931 VPADDQ seq64<>+32(SB), Y13, Y13 1932 VPUNPCKLDQ Y13, Y12, Y14 1933 VPUNPCKHDQ Y13, Y12, Y15 1934 VPUNPCKLDQ Y15, Y14, Y12 1935 VPUNPCKHDQ Y15, Y14, Y13 1936 VPERMQ $0xd8, Y12, Y12 1937 VPERMQ $0xd8, Y13, Y13 1938 VPBROADCASTD blockLen+32(FP), Y14 1939 VPBROADCASTD flags+36(FP), Y15 1940 VMOVDQU Y8, 512(SP) 1941 1942 // Round 1 1943 VPADDD Y0, Y4, Y0 1944 VPADDD (SP), Y0, Y0 1945 VPXOR Y12, Y0, Y12 1946 VPSHUFB shuffle_rot16<>+0(SB), Y12, Y12 1947 VMOVDQU 512(SP), Y8 1948 VPADDD Y8, Y12, Y8 1949 VPXOR Y4, Y8, Y4 1950 VMOVDQU Y8, 512(SP) 1951 VPSRLD $0x0c, Y4, Y8 1952 VPSLLD $0x14, Y4, Y4 1953 VPOR Y4, Y8, Y4 1954 VPADDD Y0, Y4, Y0 1955 VPADDD 32(SP), Y0, Y0 1956 VPXOR Y12, Y0, Y12 1957 VPSHUFB shuffle_rot8<>+0(SB), Y12, Y12 1958 VMOVDQU 512(SP), Y8 1959 VPADDD Y8, Y12, Y8 1960 VPXOR Y4, Y8, Y4 1961 VMOVDQU Y8, 512(SP) 1962 VPSRLD $0x07, Y4, Y8 1963 VPSLLD $0x19, Y4, Y4 1964 VPOR Y4, Y8, Y4 1965 VPADDD Y1, Y5, Y1 1966 VPADDD 64(SP), Y1, Y1 1967 VPXOR Y13, Y1, Y13 1968 VPSHUFB shuffle_rot16<>+0(SB), Y13, Y13 1969 VPADDD Y9, Y13, Y9 1970 VPXOR Y5, Y9, Y5 1971 VPSRLD $0x0c, Y5, Y8 1972 VPSLLD $0x14, Y5, Y5 1973 VPOR Y5, Y8, Y5 1974 VPADDD Y1, Y5, Y1 1975 VPADDD 96(SP), Y1, Y1 1976 VPXOR Y13, Y1, Y13 1977 VPSHUFB shuffle_rot8<>+0(SB), Y13, Y13 1978 VPADDD Y9, Y13, Y9 1979 VPXOR Y5, Y9, Y5 1980 VPSRLD $0x07, Y5, Y8 1981 VPSLLD $0x19, Y5, Y5 1982 VPOR Y5, Y8, Y5 1983 VPADDD Y2, Y6, Y2 1984 VPADDD 128(SP), Y2, Y2 1985 VPXOR Y14, Y2, Y14 1986 VPSHUFB shuffle_rot16<>+0(SB), Y14, Y14 1987 VPADDD Y10, Y14, Y10 1988 VPXOR Y6, Y10, Y6 1989 VPSRLD $0x0c, Y6, Y8 1990 VPSLLD $0x14, Y6, Y6 1991 VPOR Y6, Y8, Y6 1992 VPADDD Y2, Y6, Y2 1993 VPADDD 160(SP), Y2, Y2 1994 VPXOR Y14, Y2, Y14 1995 VPSHUFB shuffle_rot8<>+0(SB), Y14, Y14 1996 VPADDD Y10, Y14, Y10 1997 VPXOR Y6, Y10, Y6 1998 VPSRLD $0x07, Y6, Y8 1999 VPSLLD $0x19, Y6, Y6 2000 VPOR Y6, Y8, Y6 2001 VPADDD Y3, Y7, Y3 2002 VPADDD 192(SP), Y3, Y3 2003 VPXOR Y15, Y3, Y15 2004 VPSHUFB shuffle_rot16<>+0(SB), Y15, Y15 2005 VPADDD Y11, Y15, Y11 2006 VPXOR Y7, Y11, Y7 2007 VPSRLD $0x0c, Y7, Y8 2008 VPSLLD $0x14, Y7, Y7 2009 VPOR Y7, Y8, Y7 2010 VPADDD Y3, Y7, Y3 2011 VPADDD 224(SP), Y3, Y3 2012 VPXOR Y15, Y3, Y15 2013 VPSHUFB shuffle_rot8<>+0(SB), Y15, Y15 2014 VPADDD Y11, Y15, Y11 2015 VPXOR Y7, Y11, Y7 2016 VPSRLD $0x07, Y7, Y8 2017 VPSLLD $0x19, Y7, Y7 2018 VPOR Y7, Y8, Y7 2019 VPADDD Y0, Y5, Y0 2020 VPADDD 256(SP), Y0, Y0 2021 VPXOR Y15, Y0, Y15 2022 VPSHUFB shuffle_rot16<>+0(SB), Y15, Y15 2023 VPADDD Y10, Y15, Y10 2024 VPXOR Y5, Y10, Y5 2025 VPSRLD $0x0c, Y5, Y8 2026 VPSLLD $0x14, Y5, Y5 2027 VPOR Y5, Y8, Y5 2028 VPADDD Y0, Y5, Y0 2029 VPADDD 288(SP), Y0, Y0 2030 VPXOR Y15, Y0, Y15 2031 VPSHUFB shuffle_rot8<>+0(SB), Y15, Y15 2032 VPADDD Y10, Y15, Y10 2033 VPXOR Y5, Y10, Y5 2034 VPSRLD $0x07, Y5, Y8 2035 VPSLLD $0x19, Y5, Y5 2036 VPOR Y5, Y8, Y5 2037 VPADDD Y1, Y6, Y1 2038 VPADDD 320(SP), Y1, Y1 2039 VPXOR Y12, Y1, Y12 2040 VPSHUFB shuffle_rot16<>+0(SB), Y12, Y12 2041 VPADDD Y11, Y12, Y11 2042 VPXOR Y6, Y11, Y6 2043 VPSRLD $0x0c, Y6, Y8 2044 VPSLLD $0x14, Y6, Y6 2045 VPOR Y6, Y8, Y6 2046 VPADDD Y1, Y6, Y1 2047 VPADDD 352(SP), Y1, Y1 2048 VPXOR Y12, Y1, Y12 2049 VPSHUFB shuffle_rot8<>+0(SB), Y12, Y12 2050 VPADDD Y11, Y12, Y11 2051 VPXOR Y6, Y11, Y6 2052 VPSRLD $0x07, Y6, Y8 2053 VPSLLD $0x19, Y6, Y6 2054 VPOR Y6, Y8, Y6 2055 VPADDD Y2, Y7, Y2 2056 VPADDD 384(SP), Y2, Y2 2057 VPXOR Y13, Y2, Y13 2058 VPSHUFB shuffle_rot16<>+0(SB), Y13, Y13 2059 VMOVDQU 512(SP), Y8 2060 VPADDD Y8, Y13, Y8 2061 VPXOR Y7, Y8, Y7 2062 VMOVDQU Y8, 512(SP) 2063 VPSRLD $0x0c, Y7, Y8 2064 VPSLLD $0x14, Y7, Y7 2065 VPOR Y7, Y8, Y7 2066 VPADDD Y2, Y7, Y2 2067 VPADDD 416(SP), Y2, Y2 2068 VPXOR Y13, Y2, Y13 2069 VPSHUFB shuffle_rot8<>+0(SB), Y13, Y13 2070 VMOVDQU 512(SP), Y8 2071 VPADDD Y8, Y13, Y8 2072 VPXOR Y7, Y8, Y7 2073 VMOVDQU Y8, 512(SP) 2074 VPSRLD $0x07, Y7, Y8 2075 VPSLLD $0x19, Y7, Y7 2076 VPOR Y7, Y8, Y7 2077 VPADDD Y3, Y4, Y3 2078 VPADDD 448(SP), Y3, Y3 2079 VPXOR Y14, Y3, Y14 2080 VPSHUFB shuffle_rot16<>+0(SB), Y14, Y14 2081 VPADDD Y9, Y14, Y9 2082 VPXOR Y4, Y9, Y4 2083 VPSRLD $0x0c, Y4, Y8 2084 VPSLLD $0x14, Y4, Y4 2085 VPOR Y4, Y8, Y4 2086 VPADDD Y3, Y4, Y3 2087 VPADDD 480(SP), Y3, Y3 2088 VPXOR Y14, Y3, Y14 2089 VPSHUFB shuffle_rot8<>+0(SB), Y14, Y14 2090 VPADDD Y9, Y14, Y9 2091 VPXOR Y4, Y9, Y4 2092 VPSRLD $0x07, Y4, Y8 2093 VPSLLD $0x19, Y4, Y4 2094 VPOR Y4, Y8, Y4 2095 2096 // Round 2 2097 VPADDD Y0, Y4, Y0 2098 VPADDD 64(SP), Y0, Y0 2099 VPXOR Y12, Y0, Y12 2100 VPSHUFB shuffle_rot16<>+0(SB), Y12, Y12 2101 VMOVDQU 512(SP), Y8 2102 VPADDD Y8, Y12, Y8 2103 VPXOR Y4, Y8, Y4 2104 VMOVDQU Y8, 512(SP) 2105 VPSRLD $0x0c, Y4, Y8 2106 VPSLLD $0x14, Y4, Y4 2107 VPOR Y4, Y8, Y4 2108 VPADDD Y0, Y4, Y0 2109 VPADDD 192(SP), Y0, Y0 2110 VPXOR Y12, Y0, Y12 2111 VPSHUFB shuffle_rot8<>+0(SB), Y12, Y12 2112 VMOVDQU 512(SP), Y8 2113 VPADDD Y8, Y12, Y8 2114 VPXOR Y4, Y8, Y4 2115 VMOVDQU Y8, 512(SP) 2116 VPSRLD $0x07, Y4, Y8 2117 VPSLLD $0x19, Y4, Y4 2118 VPOR Y4, Y8, Y4 2119 VPADDD Y1, Y5, Y1 2120 VPADDD 96(SP), Y1, Y1 2121 VPXOR Y13, Y1, Y13 2122 VPSHUFB shuffle_rot16<>+0(SB), Y13, Y13 2123 VPADDD Y9, Y13, Y9 2124 VPXOR Y5, Y9, Y5 2125 VPSRLD $0x0c, Y5, Y8 2126 VPSLLD $0x14, Y5, Y5 2127 VPOR Y5, Y8, Y5 2128 VPADDD Y1, Y5, Y1 2129 VPADDD 320(SP), Y1, Y1 2130 VPXOR Y13, Y1, Y13 2131 VPSHUFB shuffle_rot8<>+0(SB), Y13, Y13 2132 VPADDD Y9, Y13, Y9 2133 VPXOR Y5, Y9, Y5 2134 VPSRLD $0x07, Y5, Y8 2135 VPSLLD $0x19, Y5, Y5 2136 VPOR Y5, Y8, Y5 2137 VPADDD Y2, Y6, Y2 2138 VPADDD 224(SP), Y2, Y2 2139 VPXOR Y14, Y2, Y14 2140 VPSHUFB shuffle_rot16<>+0(SB), Y14, Y14 2141 VPADDD Y10, Y14, Y10 2142 VPXOR Y6, Y10, Y6 2143 VPSRLD $0x0c, Y6, Y8 2144 VPSLLD $0x14, Y6, Y6 2145 VPOR Y6, Y8, Y6 2146 VPADDD Y2, Y6, Y2 2147 VPADDD (SP), Y2, Y2 2148 VPXOR Y14, Y2, Y14 2149 VPSHUFB shuffle_rot8<>+0(SB), Y14, Y14 2150 VPADDD Y10, Y14, Y10 2151 VPXOR Y6, Y10, Y6 2152 VPSRLD $0x07, Y6, Y8 2153 VPSLLD $0x19, Y6, Y6 2154 VPOR Y6, Y8, Y6 2155 VPADDD Y3, Y7, Y3 2156 VPADDD 128(SP), Y3, Y3 2157 VPXOR Y15, Y3, Y15 2158 VPSHUFB shuffle_rot16<>+0(SB), Y15, Y15 2159 VPADDD Y11, Y15, Y11 2160 VPXOR Y7, Y11, Y7 2161 VPSRLD $0x0c, Y7, Y8 2162 VPSLLD $0x14, Y7, Y7 2163 VPOR Y7, Y8, Y7 2164 VPADDD Y3, Y7, Y3 2165 VPADDD 416(SP), Y3, Y3 2166 VPXOR Y15, Y3, Y15 2167 VPSHUFB shuffle_rot8<>+0(SB), Y15, Y15 2168 VPADDD Y11, Y15, Y11 2169 VPXOR Y7, Y11, Y7 2170 VPSRLD $0x07, Y7, Y8 2171 VPSLLD $0x19, Y7, Y7 2172 VPOR Y7, Y8, Y7 2173 VPADDD Y0, Y5, Y0 2174 VPADDD 32(SP), Y0, Y0 2175 VPXOR Y15, Y0, Y15 2176 VPSHUFB shuffle_rot16<>+0(SB), Y15, Y15 2177 VPADDD Y10, Y15, Y10 2178 VPXOR Y5, Y10, Y5 2179 VPSRLD $0x0c, Y5, Y8 2180 VPSLLD $0x14, Y5, Y5 2181 VPOR Y5, Y8, Y5 2182 VPADDD Y0, Y5, Y0 2183 VPADDD 352(SP), Y0, Y0 2184 VPXOR Y15, Y0, Y15 2185 VPSHUFB shuffle_rot8<>+0(SB), Y15, Y15 2186 VPADDD Y10, Y15, Y10 2187 VPXOR Y5, Y10, Y5 2188 VPSRLD $0x07, Y5, Y8 2189 VPSLLD $0x19, Y5, Y5 2190 VPOR Y5, Y8, Y5 2191 VPADDD Y1, Y6, Y1 2192 VPADDD 384(SP), Y1, Y1 2193 VPXOR Y12, Y1, Y12 2194 VPSHUFB shuffle_rot16<>+0(SB), Y12, Y12 2195 VPADDD Y11, Y12, Y11 2196 VPXOR Y6, Y11, Y6 2197 VPSRLD $0x0c, Y6, Y8 2198 VPSLLD $0x14, Y6, Y6 2199 VPOR Y6, Y8, Y6 2200 VPADDD Y1, Y6, Y1 2201 VPADDD 160(SP), Y1, Y1 2202 VPXOR Y12, Y1, Y12 2203 VPSHUFB shuffle_rot8<>+0(SB), Y12, Y12 2204 VPADDD Y11, Y12, Y11 2205 VPXOR Y6, Y11, Y6 2206 VPSRLD $0x07, Y6, Y8 2207 VPSLLD $0x19, Y6, Y6 2208 VPOR Y6, Y8, Y6 2209 VPADDD Y2, Y7, Y2 2210 VPADDD 288(SP), Y2, Y2 2211 VPXOR Y13, Y2, Y13 2212 VPSHUFB shuffle_rot16<>+0(SB), Y13, Y13 2213 VMOVDQU 512(SP), Y8 2214 VPADDD Y8, Y13, Y8 2215 VPXOR Y7, Y8, Y7 2216 VMOVDQU Y8, 512(SP) 2217 VPSRLD $0x0c, Y7, Y8 2218 VPSLLD $0x14, Y7, Y7 2219 VPOR Y7, Y8, Y7 2220 VPADDD Y2, Y7, Y2 2221 VPADDD 448(SP), Y2, Y2 2222 VPXOR Y13, Y2, Y13 2223 VPSHUFB shuffle_rot8<>+0(SB), Y13, Y13 2224 VMOVDQU 512(SP), Y8 2225 VPADDD Y8, Y13, Y8 2226 VPXOR Y7, Y8, Y7 2227 VMOVDQU Y8, 512(SP) 2228 VPSRLD $0x07, Y7, Y8 2229 VPSLLD $0x19, Y7, Y7 2230 VPOR Y7, Y8, Y7 2231 VPADDD Y3, Y4, Y3 2232 VPADDD 480(SP), Y3, Y3 2233 VPXOR Y14, Y3, Y14 2234 VPSHUFB shuffle_rot16<>+0(SB), Y14, Y14 2235 VPADDD Y9, Y14, Y9 2236 VPXOR Y4, Y9, Y4 2237 VPSRLD $0x0c, Y4, Y8 2238 VPSLLD $0x14, Y4, Y4 2239 VPOR Y4, Y8, Y4 2240 VPADDD Y3, Y4, Y3 2241 VPADDD 256(SP), Y3, Y3 2242 VPXOR Y14, Y3, Y14 2243 VPSHUFB shuffle_rot8<>+0(SB), Y14, Y14 2244 VPADDD Y9, Y14, Y9 2245 VPXOR Y4, Y9, Y4 2246 VPSRLD $0x07, Y4, Y8 2247 VPSLLD $0x19, Y4, Y4 2248 VPOR Y4, Y8, Y4 2249 2250 // Round 3 2251 VPADDD Y0, Y4, Y0 2252 VPADDD 96(SP), Y0, Y0 2253 VPXOR Y12, Y0, Y12 2254 VPSHUFB shuffle_rot16<>+0(SB), Y12, Y12 2255 VMOVDQU 512(SP), Y8 2256 VPADDD Y8, Y12, Y8 2257 VPXOR Y4, Y8, Y4 2258 VMOVDQU Y8, 512(SP) 2259 VPSRLD $0x0c, Y4, Y8 2260 VPSLLD $0x14, Y4, Y4 2261 VPOR Y4, Y8, Y4 2262 VPADDD Y0, Y4, Y0 2263 VPADDD 128(SP), Y0, Y0 2264 VPXOR Y12, Y0, Y12 2265 VPSHUFB shuffle_rot8<>+0(SB), Y12, Y12 2266 VMOVDQU 512(SP), Y8 2267 VPADDD Y8, Y12, Y8 2268 VPXOR Y4, Y8, Y4 2269 VMOVDQU Y8, 512(SP) 2270 VPSRLD $0x07, Y4, Y8 2271 VPSLLD $0x19, Y4, Y4 2272 VPOR Y4, Y8, Y4 2273 VPADDD Y1, Y5, Y1 2274 VPADDD 320(SP), Y1, Y1 2275 VPXOR Y13, Y1, Y13 2276 VPSHUFB shuffle_rot16<>+0(SB), Y13, Y13 2277 VPADDD Y9, Y13, Y9 2278 VPXOR Y5, Y9, Y5 2279 VPSRLD $0x0c, Y5, Y8 2280 VPSLLD $0x14, Y5, Y5 2281 VPOR Y5, Y8, Y5 2282 VPADDD Y1, Y5, Y1 2283 VPADDD 384(SP), Y1, Y1 2284 VPXOR Y13, Y1, Y13 2285 VPSHUFB shuffle_rot8<>+0(SB), Y13, Y13 2286 VPADDD Y9, Y13, Y9 2287 VPXOR Y5, Y9, Y5 2288 VPSRLD $0x07, Y5, Y8 2289 VPSLLD $0x19, Y5, Y5 2290 VPOR Y5, Y8, Y5 2291 VPADDD Y2, Y6, Y2 2292 VPADDD 416(SP), Y2, Y2 2293 VPXOR Y14, Y2, Y14 2294 VPSHUFB shuffle_rot16<>+0(SB), Y14, Y14 2295 VPADDD Y10, Y14, Y10 2296 VPXOR Y6, Y10, Y6 2297 VPSRLD $0x0c, Y6, Y8 2298 VPSLLD $0x14, Y6, Y6 2299 VPOR Y6, Y8, Y6 2300 VPADDD Y2, Y6, Y2 2301 VPADDD 64(SP), Y2, Y2 2302 VPXOR Y14, Y2, Y14 2303 VPSHUFB shuffle_rot8<>+0(SB), Y14, Y14 2304 VPADDD Y10, Y14, Y10 2305 VPXOR Y6, Y10, Y6 2306 VPSRLD $0x07, Y6, Y8 2307 VPSLLD $0x19, Y6, Y6 2308 VPOR Y6, Y8, Y6 2309 VPADDD Y3, Y7, Y3 2310 VPADDD 224(SP), Y3, Y3 2311 VPXOR Y15, Y3, Y15 2312 VPSHUFB shuffle_rot16<>+0(SB), Y15, Y15 2313 VPADDD Y11, Y15, Y11 2314 VPXOR Y7, Y11, Y7 2315 VPSRLD $0x0c, Y7, Y8 2316 VPSLLD $0x14, Y7, Y7 2317 VPOR Y7, Y8, Y7 2318 VPADDD Y3, Y7, Y3 2319 VPADDD 448(SP), Y3, Y3 2320 VPXOR Y15, Y3, Y15 2321 VPSHUFB shuffle_rot8<>+0(SB), Y15, Y15 2322 VPADDD Y11, Y15, Y11 2323 VPXOR Y7, Y11, Y7 2324 VPSRLD $0x07, Y7, Y8 2325 VPSLLD $0x19, Y7, Y7 2326 VPOR Y7, Y8, Y7 2327 VPADDD Y0, Y5, Y0 2328 VPADDD 192(SP), Y0, Y0 2329 VPXOR Y15, Y0, Y15 2330 VPSHUFB shuffle_rot16<>+0(SB), Y15, Y15 2331 VPADDD Y10, Y15, Y10 2332 VPXOR Y5, Y10, Y5 2333 VPSRLD $0x0c, Y5, Y8 2334 VPSLLD $0x14, Y5, Y5 2335 VPOR Y5, Y8, Y5 2336 VPADDD Y0, Y5, Y0 2337 VPADDD 160(SP), Y0, Y0 2338 VPXOR Y15, Y0, Y15 2339 VPSHUFB shuffle_rot8<>+0(SB), Y15, Y15 2340 VPADDD Y10, Y15, Y10 2341 VPXOR Y5, Y10, Y5 2342 VPSRLD $0x07, Y5, Y8 2343 VPSLLD $0x19, Y5, Y5 2344 VPOR Y5, Y8, Y5 2345 VPADDD Y1, Y6, Y1 2346 VPADDD 288(SP), Y1, Y1 2347 VPXOR Y12, Y1, Y12 2348 VPSHUFB shuffle_rot16<>+0(SB), Y12, Y12 2349 VPADDD Y11, Y12, Y11 2350 VPXOR Y6, Y11, Y6 2351 VPSRLD $0x0c, Y6, Y8 2352 VPSLLD $0x14, Y6, Y6 2353 VPOR Y6, Y8, Y6 2354 VPADDD Y1, Y6, Y1 2355 VPADDD (SP), Y1, Y1 2356 VPXOR Y12, Y1, Y12 2357 VPSHUFB shuffle_rot8<>+0(SB), Y12, Y12 2358 VPADDD Y11, Y12, Y11 2359 VPXOR Y6, Y11, Y6 2360 VPSRLD $0x07, Y6, Y8 2361 VPSLLD $0x19, Y6, Y6 2362 VPOR Y6, Y8, Y6 2363 VPADDD Y2, Y7, Y2 2364 VPADDD 352(SP), Y2, Y2 2365 VPXOR Y13, Y2, Y13 2366 VPSHUFB shuffle_rot16<>+0(SB), Y13, Y13 2367 VMOVDQU 512(SP), Y8 2368 VPADDD Y8, Y13, Y8 2369 VPXOR Y7, Y8, Y7 2370 VMOVDQU Y8, 512(SP) 2371 VPSRLD $0x0c, Y7, Y8 2372 VPSLLD $0x14, Y7, Y7 2373 VPOR Y7, Y8, Y7 2374 VPADDD Y2, Y7, Y2 2375 VPADDD 480(SP), Y2, Y2 2376 VPXOR Y13, Y2, Y13 2377 VPSHUFB shuffle_rot8<>+0(SB), Y13, Y13 2378 VMOVDQU 512(SP), Y8 2379 VPADDD Y8, Y13, Y8 2380 VPXOR Y7, Y8, Y7 2381 VMOVDQU Y8, 512(SP) 2382 VPSRLD $0x07, Y7, Y8 2383 VPSLLD $0x19, Y7, Y7 2384 VPOR Y7, Y8, Y7 2385 VPADDD Y3, Y4, Y3 2386 VPADDD 256(SP), Y3, Y3 2387 VPXOR Y14, Y3, Y14 2388 VPSHUFB shuffle_rot16<>+0(SB), Y14, Y14 2389 VPADDD Y9, Y14, Y9 2390 VPXOR Y4, Y9, Y4 2391 VPSRLD $0x0c, Y4, Y8 2392 VPSLLD $0x14, Y4, Y4 2393 VPOR Y4, Y8, Y4 2394 VPADDD Y3, Y4, Y3 2395 VPADDD 32(SP), Y3, Y3 2396 VPXOR Y14, Y3, Y14 2397 VPSHUFB shuffle_rot8<>+0(SB), Y14, Y14 2398 VPADDD Y9, Y14, Y9 2399 VPXOR Y4, Y9, Y4 2400 VPSRLD $0x07, Y4, Y8 2401 VPSLLD $0x19, Y4, Y4 2402 VPOR Y4, Y8, Y4 2403 2404 // Round 4 2405 VPADDD Y0, Y4, Y0 2406 VPADDD 320(SP), Y0, Y0 2407 VPXOR Y12, Y0, Y12 2408 VPSHUFB shuffle_rot16<>+0(SB), Y12, Y12 2409 VMOVDQU 512(SP), Y8 2410 VPADDD Y8, Y12, Y8 2411 VPXOR Y4, Y8, Y4 2412 VMOVDQU Y8, 512(SP) 2413 VPSRLD $0x0c, Y4, Y8 2414 VPSLLD $0x14, Y4, Y4 2415 VPOR Y4, Y8, Y4 2416 VPADDD Y0, Y4, Y0 2417 VPADDD 224(SP), Y0, Y0 2418 VPXOR Y12, Y0, Y12 2419 VPSHUFB shuffle_rot8<>+0(SB), Y12, Y12 2420 VMOVDQU 512(SP), Y8 2421 VPADDD Y8, Y12, Y8 2422 VPXOR Y4, Y8, Y4 2423 VMOVDQU Y8, 512(SP) 2424 VPSRLD $0x07, Y4, Y8 2425 VPSLLD $0x19, Y4, Y4 2426 VPOR Y4, Y8, Y4 2427 VPADDD Y1, Y5, Y1 2428 VPADDD 384(SP), Y1, Y1 2429 VPXOR Y13, Y1, Y13 2430 VPSHUFB shuffle_rot16<>+0(SB), Y13, Y13 2431 VPADDD Y9, Y13, Y9 2432 VPXOR Y5, Y9, Y5 2433 VPSRLD $0x0c, Y5, Y8 2434 VPSLLD $0x14, Y5, Y5 2435 VPOR Y5, Y8, Y5 2436 VPADDD Y1, Y5, Y1 2437 VPADDD 288(SP), Y1, Y1 2438 VPXOR Y13, Y1, Y13 2439 VPSHUFB shuffle_rot8<>+0(SB), Y13, Y13 2440 VPADDD Y9, Y13, Y9 2441 VPXOR Y5, Y9, Y5 2442 VPSRLD $0x07, Y5, Y8 2443 VPSLLD $0x19, Y5, Y5 2444 VPOR Y5, Y8, Y5 2445 VPADDD Y2, Y6, Y2 2446 VPADDD 448(SP), Y2, Y2 2447 VPXOR Y14, Y2, Y14 2448 VPSHUFB shuffle_rot16<>+0(SB), Y14, Y14 2449 VPADDD Y10, Y14, Y10 2450 VPXOR Y6, Y10, Y6 2451 VPSRLD $0x0c, Y6, Y8 2452 VPSLLD $0x14, Y6, Y6 2453 VPOR Y6, Y8, Y6 2454 VPADDD Y2, Y6, Y2 2455 VPADDD 96(SP), Y2, Y2 2456 VPXOR Y14, Y2, Y14 2457 VPSHUFB shuffle_rot8<>+0(SB), Y14, Y14 2458 VPADDD Y10, Y14, Y10 2459 VPXOR Y6, Y10, Y6 2460 VPSRLD $0x07, Y6, Y8 2461 VPSLLD $0x19, Y6, Y6 2462 VPOR Y6, Y8, Y6 2463 VPADDD Y3, Y7, Y3 2464 VPADDD 416(SP), Y3, Y3 2465 VPXOR Y15, Y3, Y15 2466 VPSHUFB shuffle_rot16<>+0(SB), Y15, Y15 2467 VPADDD Y11, Y15, Y11 2468 VPXOR Y7, Y11, Y7 2469 VPSRLD $0x0c, Y7, Y8 2470 VPSLLD $0x14, Y7, Y7 2471 VPOR Y7, Y8, Y7 2472 VPADDD Y3, Y7, Y3 2473 VPADDD 480(SP), Y3, Y3 2474 VPXOR Y15, Y3, Y15 2475 VPSHUFB shuffle_rot8<>+0(SB), Y15, Y15 2476 VPADDD Y11, Y15, Y11 2477 VPXOR Y7, Y11, Y7 2478 VPSRLD $0x07, Y7, Y8 2479 VPSLLD $0x19, Y7, Y7 2480 VPOR Y7, Y8, Y7 2481 VPADDD Y0, Y5, Y0 2482 VPADDD 128(SP), Y0, Y0 2483 VPXOR Y15, Y0, Y15 2484 VPSHUFB shuffle_rot16<>+0(SB), Y15, Y15 2485 VPADDD Y10, Y15, Y10 2486 VPXOR Y5, Y10, Y5 2487 VPSRLD $0x0c, Y5, Y8 2488 VPSLLD $0x14, Y5, Y5 2489 VPOR Y5, Y8, Y5 2490 VPADDD Y0, Y5, Y0 2491 VPADDD (SP), Y0, Y0 2492 VPXOR Y15, Y0, Y15 2493 VPSHUFB shuffle_rot8<>+0(SB), Y15, Y15 2494 VPADDD Y10, Y15, Y10 2495 VPXOR Y5, Y10, Y5 2496 VPSRLD $0x07, Y5, Y8 2497 VPSLLD $0x19, Y5, Y5 2498 VPOR Y5, Y8, Y5 2499 VPADDD Y1, Y6, Y1 2500 VPADDD 352(SP), Y1, Y1 2501 VPXOR Y12, Y1, Y12 2502 VPSHUFB shuffle_rot16<>+0(SB), Y12, Y12 2503 VPADDD Y11, Y12, Y11 2504 VPXOR Y6, Y11, Y6 2505 VPSRLD $0x0c, Y6, Y8 2506 VPSLLD $0x14, Y6, Y6 2507 VPOR Y6, Y8, Y6 2508 VPADDD Y1, Y6, Y1 2509 VPADDD 64(SP), Y1, Y1 2510 VPXOR Y12, Y1, Y12 2511 VPSHUFB shuffle_rot8<>+0(SB), Y12, Y12 2512 VPADDD Y11, Y12, Y11 2513 VPXOR Y6, Y11, Y6 2514 VPSRLD $0x07, Y6, Y8 2515 VPSLLD $0x19, Y6, Y6 2516 VPOR Y6, Y8, Y6 2517 VPADDD Y2, Y7, Y2 2518 VPADDD 160(SP), Y2, Y2 2519 VPXOR Y13, Y2, Y13 2520 VPSHUFB shuffle_rot16<>+0(SB), Y13, Y13 2521 VMOVDQU 512(SP), Y8 2522 VPADDD Y8, Y13, Y8 2523 VPXOR Y7, Y8, Y7 2524 VMOVDQU Y8, 512(SP) 2525 VPSRLD $0x0c, Y7, Y8 2526 VPSLLD $0x14, Y7, Y7 2527 VPOR Y7, Y8, Y7 2528 VPADDD Y2, Y7, Y2 2529 VPADDD 256(SP), Y2, Y2 2530 VPXOR Y13, Y2, Y13 2531 VPSHUFB shuffle_rot8<>+0(SB), Y13, Y13 2532 VMOVDQU 512(SP), Y8 2533 VPADDD Y8, Y13, Y8 2534 VPXOR Y7, Y8, Y7 2535 VMOVDQU Y8, 512(SP) 2536 VPSRLD $0x07, Y7, Y8 2537 VPSLLD $0x19, Y7, Y7 2538 VPOR Y7, Y8, Y7 2539 VPADDD Y3, Y4, Y3 2540 VPADDD 32(SP), Y3, Y3 2541 VPXOR Y14, Y3, Y14 2542 VPSHUFB shuffle_rot16<>+0(SB), Y14, Y14 2543 VPADDD Y9, Y14, Y9 2544 VPXOR Y4, Y9, Y4 2545 VPSRLD $0x0c, Y4, Y8 2546 VPSLLD $0x14, Y4, Y4 2547 VPOR Y4, Y8, Y4 2548 VPADDD Y3, Y4, Y3 2549 VPADDD 192(SP), Y3, Y3 2550 VPXOR Y14, Y3, Y14 2551 VPSHUFB shuffle_rot8<>+0(SB), Y14, Y14 2552 VPADDD Y9, Y14, Y9 2553 VPXOR Y4, Y9, Y4 2554 VPSRLD $0x07, Y4, Y8 2555 VPSLLD $0x19, Y4, Y4 2556 VPOR Y4, Y8, Y4 2557 2558 // Round 5 2559 VPADDD Y0, Y4, Y0 2560 VPADDD 384(SP), Y0, Y0 2561 VPXOR Y12, Y0, Y12 2562 VPSHUFB shuffle_rot16<>+0(SB), Y12, Y12 2563 VMOVDQU 512(SP), Y8 2564 VPADDD Y8, Y12, Y8 2565 VPXOR Y4, Y8, Y4 2566 VMOVDQU Y8, 512(SP) 2567 VPSRLD $0x0c, Y4, Y8 2568 VPSLLD $0x14, Y4, Y4 2569 VPOR Y4, Y8, Y4 2570 VPADDD Y0, Y4, Y0 2571 VPADDD 416(SP), Y0, Y0 2572 VPXOR Y12, Y0, Y12 2573 VPSHUFB shuffle_rot8<>+0(SB), Y12, Y12 2574 VMOVDQU 512(SP), Y8 2575 VPADDD Y8, Y12, Y8 2576 VPXOR Y4, Y8, Y4 2577 VMOVDQU Y8, 512(SP) 2578 VPSRLD $0x07, Y4, Y8 2579 VPSLLD $0x19, Y4, Y4 2580 VPOR Y4, Y8, Y4 2581 VPADDD Y1, Y5, Y1 2582 VPADDD 288(SP), Y1, Y1 2583 VPXOR Y13, Y1, Y13 2584 VPSHUFB shuffle_rot16<>+0(SB), Y13, Y13 2585 VPADDD Y9, Y13, Y9 2586 VPXOR Y5, Y9, Y5 2587 VPSRLD $0x0c, Y5, Y8 2588 VPSLLD $0x14, Y5, Y5 2589 VPOR Y5, Y8, Y5 2590 VPADDD Y1, Y5, Y1 2591 VPADDD 352(SP), Y1, Y1 2592 VPXOR Y13, Y1, Y13 2593 VPSHUFB shuffle_rot8<>+0(SB), Y13, Y13 2594 VPADDD Y9, Y13, Y9 2595 VPXOR Y5, Y9, Y5 2596 VPSRLD $0x07, Y5, Y8 2597 VPSLLD $0x19, Y5, Y5 2598 VPOR Y5, Y8, Y5 2599 VPADDD Y2, Y6, Y2 2600 VPADDD 480(SP), Y2, Y2 2601 VPXOR Y14, Y2, Y14 2602 VPSHUFB shuffle_rot16<>+0(SB), Y14, Y14 2603 VPADDD Y10, Y14, Y10 2604 VPXOR Y6, Y10, Y6 2605 VPSRLD $0x0c, Y6, Y8 2606 VPSLLD $0x14, Y6, Y6 2607 VPOR Y6, Y8, Y6 2608 VPADDD Y2, Y6, Y2 2609 VPADDD 320(SP), Y2, Y2 2610 VPXOR Y14, Y2, Y14 2611 VPSHUFB shuffle_rot8<>+0(SB), Y14, Y14 2612 VPADDD Y10, Y14, Y10 2613 VPXOR Y6, Y10, Y6 2614 VPSRLD $0x07, Y6, Y8 2615 VPSLLD $0x19, Y6, Y6 2616 VPOR Y6, Y8, Y6 2617 VPADDD Y3, Y7, Y3 2618 VPADDD 448(SP), Y3, Y3 2619 VPXOR Y15, Y3, Y15 2620 VPSHUFB shuffle_rot16<>+0(SB), Y15, Y15 2621 VPADDD Y11, Y15, Y11 2622 VPXOR Y7, Y11, Y7 2623 VPSRLD $0x0c, Y7, Y8 2624 VPSLLD $0x14, Y7, Y7 2625 VPOR Y7, Y8, Y7 2626 VPADDD Y3, Y7, Y3 2627 VPADDD 256(SP), Y3, Y3 2628 VPXOR Y15, Y3, Y15 2629 VPSHUFB shuffle_rot8<>+0(SB), Y15, Y15 2630 VPADDD Y11, Y15, Y11 2631 VPXOR Y7, Y11, Y7 2632 VPSRLD $0x07, Y7, Y8 2633 VPSLLD $0x19, Y7, Y7 2634 VPOR Y7, Y8, Y7 2635 VPADDD Y0, Y5, Y0 2636 VPADDD 224(SP), Y0, Y0 2637 VPXOR Y15, Y0, Y15 2638 VPSHUFB shuffle_rot16<>+0(SB), Y15, Y15 2639 VPADDD Y10, Y15, Y10 2640 VPXOR Y5, Y10, Y5 2641 VPSRLD $0x0c, Y5, Y8 2642 VPSLLD $0x14, Y5, Y5 2643 VPOR Y5, Y8, Y5 2644 VPADDD Y0, Y5, Y0 2645 VPADDD 64(SP), Y0, Y0 2646 VPXOR Y15, Y0, Y15 2647 VPSHUFB shuffle_rot8<>+0(SB), Y15, Y15 2648 VPADDD Y10, Y15, Y10 2649 VPXOR Y5, Y10, Y5 2650 VPSRLD $0x07, Y5, Y8 2651 VPSLLD $0x19, Y5, Y5 2652 VPOR Y5, Y8, Y5 2653 VPADDD Y1, Y6, Y1 2654 VPADDD 160(SP), Y1, Y1 2655 VPXOR Y12, Y1, Y12 2656 VPSHUFB shuffle_rot16<>+0(SB), Y12, Y12 2657 VPADDD Y11, Y12, Y11 2658 VPXOR Y6, Y11, Y6 2659 VPSRLD $0x0c, Y6, Y8 2660 VPSLLD $0x14, Y6, Y6 2661 VPOR Y6, Y8, Y6 2662 VPADDD Y1, Y6, Y1 2663 VPADDD 96(SP), Y1, Y1 2664 VPXOR Y12, Y1, Y12 2665 VPSHUFB shuffle_rot8<>+0(SB), Y12, Y12 2666 VPADDD Y11, Y12, Y11 2667 VPXOR Y6, Y11, Y6 2668 VPSRLD $0x07, Y6, Y8 2669 VPSLLD $0x19, Y6, Y6 2670 VPOR Y6, Y8, Y6 2671 VPADDD Y2, Y7, Y2 2672 VPADDD (SP), Y2, Y2 2673 VPXOR Y13, Y2, Y13 2674 VPSHUFB shuffle_rot16<>+0(SB), Y13, Y13 2675 VMOVDQU 512(SP), Y8 2676 VPADDD Y8, Y13, Y8 2677 VPXOR Y7, Y8, Y7 2678 VMOVDQU Y8, 512(SP) 2679 VPSRLD $0x0c, Y7, Y8 2680 VPSLLD $0x14, Y7, Y7 2681 VPOR Y7, Y8, Y7 2682 VPADDD Y2, Y7, Y2 2683 VPADDD 32(SP), Y2, Y2 2684 VPXOR Y13, Y2, Y13 2685 VPSHUFB shuffle_rot8<>+0(SB), Y13, Y13 2686 VMOVDQU 512(SP), Y8 2687 VPADDD Y8, Y13, Y8 2688 VPXOR Y7, Y8, Y7 2689 VMOVDQU Y8, 512(SP) 2690 VPSRLD $0x07, Y7, Y8 2691 VPSLLD $0x19, Y7, Y7 2692 VPOR Y7, Y8, Y7 2693 VPADDD Y3, Y4, Y3 2694 VPADDD 192(SP), Y3, Y3 2695 VPXOR Y14, Y3, Y14 2696 VPSHUFB shuffle_rot16<>+0(SB), Y14, Y14 2697 VPADDD Y9, Y14, Y9 2698 VPXOR Y4, Y9, Y4 2699 VPSRLD $0x0c, Y4, Y8 2700 VPSLLD $0x14, Y4, Y4 2701 VPOR Y4, Y8, Y4 2702 VPADDD Y3, Y4, Y3 2703 VPADDD 128(SP), Y3, Y3 2704 VPXOR Y14, Y3, Y14 2705 VPSHUFB shuffle_rot8<>+0(SB), Y14, Y14 2706 VPADDD Y9, Y14, Y9 2707 VPXOR Y4, Y9, Y4 2708 VPSRLD $0x07, Y4, Y8 2709 VPSLLD $0x19, Y4, Y4 2710 VPOR Y4, Y8, Y4 2711 2712 // Round 6 2713 VPADDD Y0, Y4, Y0 2714 VPADDD 288(SP), Y0, Y0 2715 VPXOR Y12, Y0, Y12 2716 VPSHUFB shuffle_rot16<>+0(SB), Y12, Y12 2717 VMOVDQU 512(SP), Y8 2718 VPADDD Y8, Y12, Y8 2719 VPXOR Y4, Y8, Y4 2720 VMOVDQU Y8, 512(SP) 2721 VPSRLD $0x0c, Y4, Y8 2722 VPSLLD $0x14, Y4, Y4 2723 VPOR Y4, Y8, Y4 2724 VPADDD Y0, Y4, Y0 2725 VPADDD 448(SP), Y0, Y0 2726 VPXOR Y12, Y0, Y12 2727 VPSHUFB shuffle_rot8<>+0(SB), Y12, Y12 2728 VMOVDQU 512(SP), Y8 2729 VPADDD Y8, Y12, Y8 2730 VPXOR Y4, Y8, Y4 2731 VMOVDQU Y8, 512(SP) 2732 VPSRLD $0x07, Y4, Y8 2733 VPSLLD $0x19, Y4, Y4 2734 VPOR Y4, Y8, Y4 2735 VPADDD Y1, Y5, Y1 2736 VPADDD 352(SP), Y1, Y1 2737 VPXOR Y13, Y1, Y13 2738 VPSHUFB shuffle_rot16<>+0(SB), Y13, Y13 2739 VPADDD Y9, Y13, Y9 2740 VPXOR Y5, Y9, Y5 2741 VPSRLD $0x0c, Y5, Y8 2742 VPSLLD $0x14, Y5, Y5 2743 VPOR Y5, Y8, Y5 2744 VPADDD Y1, Y5, Y1 2745 VPADDD 160(SP), Y1, Y1 2746 VPXOR Y13, Y1, Y13 2747 VPSHUFB shuffle_rot8<>+0(SB), Y13, Y13 2748 VPADDD Y9, Y13, Y9 2749 VPXOR Y5, Y9, Y5 2750 VPSRLD $0x07, Y5, Y8 2751 VPSLLD $0x19, Y5, Y5 2752 VPOR Y5, Y8, Y5 2753 VPADDD Y2, Y6, Y2 2754 VPADDD 256(SP), Y2, Y2 2755 VPXOR Y14, Y2, Y14 2756 VPSHUFB shuffle_rot16<>+0(SB), Y14, Y14 2757 VPADDD Y10, Y14, Y10 2758 VPXOR Y6, Y10, Y6 2759 VPSRLD $0x0c, Y6, Y8 2760 VPSLLD $0x14, Y6, Y6 2761 VPOR Y6, Y8, Y6 2762 VPADDD Y2, Y6, Y2 2763 VPADDD 384(SP), Y2, Y2 2764 VPXOR Y14, Y2, Y14 2765 VPSHUFB shuffle_rot8<>+0(SB), Y14, Y14 2766 VPADDD Y10, Y14, Y10 2767 VPXOR Y6, Y10, Y6 2768 VPSRLD $0x07, Y6, Y8 2769 VPSLLD $0x19, Y6, Y6 2770 VPOR Y6, Y8, Y6 2771 VPADDD Y3, Y7, Y3 2772 VPADDD 480(SP), Y3, Y3 2773 VPXOR Y15, Y3, Y15 2774 VPSHUFB shuffle_rot16<>+0(SB), Y15, Y15 2775 VPADDD Y11, Y15, Y11 2776 VPXOR Y7, Y11, Y7 2777 VPSRLD $0x0c, Y7, Y8 2778 VPSLLD $0x14, Y7, Y7 2779 VPOR Y7, Y8, Y7 2780 VPADDD Y3, Y7, Y3 2781 VPADDD 32(SP), Y3, Y3 2782 VPXOR Y15, Y3, Y15 2783 VPSHUFB shuffle_rot8<>+0(SB), Y15, Y15 2784 VPADDD Y11, Y15, Y11 2785 VPXOR Y7, Y11, Y7 2786 VPSRLD $0x07, Y7, Y8 2787 VPSLLD $0x19, Y7, Y7 2788 VPOR Y7, Y8, Y7 2789 VPADDD Y0, Y5, Y0 2790 VPADDD 416(SP), Y0, Y0 2791 VPXOR Y15, Y0, Y15 2792 VPSHUFB shuffle_rot16<>+0(SB), Y15, Y15 2793 VPADDD Y10, Y15, Y10 2794 VPXOR Y5, Y10, Y5 2795 VPSRLD $0x0c, Y5, Y8 2796 VPSLLD $0x14, Y5, Y5 2797 VPOR Y5, Y8, Y5 2798 VPADDD Y0, Y5, Y0 2799 VPADDD 96(SP), Y0, Y0 2800 VPXOR Y15, Y0, Y15 2801 VPSHUFB shuffle_rot8<>+0(SB), Y15, Y15 2802 VPADDD Y10, Y15, Y10 2803 VPXOR Y5, Y10, Y5 2804 VPSRLD $0x07, Y5, Y8 2805 VPSLLD $0x19, Y5, Y5 2806 VPOR Y5, Y8, Y5 2807 VPADDD Y1, Y6, Y1 2808 VPADDD (SP), Y1, Y1 2809 VPXOR Y12, Y1, Y12 2810 VPSHUFB shuffle_rot16<>+0(SB), Y12, Y12 2811 VPADDD Y11, Y12, Y11 2812 VPXOR Y6, Y11, Y6 2813 VPSRLD $0x0c, Y6, Y8 2814 VPSLLD $0x14, Y6, Y6 2815 VPOR Y6, Y8, Y6 2816 VPADDD Y1, Y6, Y1 2817 VPADDD 320(SP), Y1, Y1 2818 VPXOR Y12, Y1, Y12 2819 VPSHUFB shuffle_rot8<>+0(SB), Y12, Y12 2820 VPADDD Y11, Y12, Y11 2821 VPXOR Y6, Y11, Y6 2822 VPSRLD $0x07, Y6, Y8 2823 VPSLLD $0x19, Y6, Y6 2824 VPOR Y6, Y8, Y6 2825 VPADDD Y2, Y7, Y2 2826 VPADDD 64(SP), Y2, Y2 2827 VPXOR Y13, Y2, Y13 2828 VPSHUFB shuffle_rot16<>+0(SB), Y13, Y13 2829 VMOVDQU 512(SP), Y8 2830 VPADDD Y8, Y13, Y8 2831 VPXOR Y7, Y8, Y7 2832 VMOVDQU Y8, 512(SP) 2833 VPSRLD $0x0c, Y7, Y8 2834 VPSLLD $0x14, Y7, Y7 2835 VPOR Y7, Y8, Y7 2836 VPADDD Y2, Y7, Y2 2837 VPADDD 192(SP), Y2, Y2 2838 VPXOR Y13, Y2, Y13 2839 VPSHUFB shuffle_rot8<>+0(SB), Y13, Y13 2840 VMOVDQU 512(SP), Y8 2841 VPADDD Y8, Y13, Y8 2842 VPXOR Y7, Y8, Y7 2843 VMOVDQU Y8, 512(SP) 2844 VPSRLD $0x07, Y7, Y8 2845 VPSLLD $0x19, Y7, Y7 2846 VPOR Y7, Y8, Y7 2847 VPADDD Y3, Y4, Y3 2848 VPADDD 128(SP), Y3, Y3 2849 VPXOR Y14, Y3, Y14 2850 VPSHUFB shuffle_rot16<>+0(SB), Y14, Y14 2851 VPADDD Y9, Y14, Y9 2852 VPXOR Y4, Y9, Y4 2853 VPSRLD $0x0c, Y4, Y8 2854 VPSLLD $0x14, Y4, Y4 2855 VPOR Y4, Y8, Y4 2856 VPADDD Y3, Y4, Y3 2857 VPADDD 224(SP), Y3, Y3 2858 VPXOR Y14, Y3, Y14 2859 VPSHUFB shuffle_rot8<>+0(SB), Y14, Y14 2860 VPADDD Y9, Y14, Y9 2861 VPXOR Y4, Y9, Y4 2862 VPSRLD $0x07, Y4, Y8 2863 VPSLLD $0x19, Y4, Y4 2864 VPOR Y4, Y8, Y4 2865 2866 // Round 7 2867 VPADDD Y0, Y4, Y0 2868 VPADDD 352(SP), Y0, Y0 2869 VPXOR Y12, Y0, Y12 2870 VPSHUFB shuffle_rot16<>+0(SB), Y12, Y12 2871 VMOVDQU 512(SP), Y8 2872 VPADDD Y8, Y12, Y8 2873 VPXOR Y4, Y8, Y4 2874 VMOVDQU Y8, 512(SP) 2875 VPSRLD $0x0c, Y4, Y8 2876 VPSLLD $0x14, Y4, Y4 2877 VPOR Y4, Y8, Y4 2878 VPADDD Y0, Y4, Y0 2879 VPADDD 480(SP), Y0, Y0 2880 VPXOR Y12, Y0, Y12 2881 VPSHUFB shuffle_rot8<>+0(SB), Y12, Y12 2882 VMOVDQU 512(SP), Y8 2883 VPADDD Y8, Y12, Y8 2884 VPXOR Y4, Y8, Y4 2885 VMOVDQU Y8, 512(SP) 2886 VPSRLD $0x07, Y4, Y8 2887 VPSLLD $0x19, Y4, Y4 2888 VPOR Y4, Y8, Y4 2889 VPADDD Y1, Y5, Y1 2890 VPADDD 160(SP), Y1, Y1 2891 VPXOR Y13, Y1, Y13 2892 VPSHUFB shuffle_rot16<>+0(SB), Y13, Y13 2893 VPADDD Y9, Y13, Y9 2894 VPXOR Y5, Y9, Y5 2895 VPSRLD $0x0c, Y5, Y8 2896 VPSLLD $0x14, Y5, Y5 2897 VPOR Y5, Y8, Y5 2898 VPADDD Y1, Y5, Y1 2899 VPADDD (SP), Y1, Y1 2900 VPXOR Y13, Y1, Y13 2901 VPSHUFB shuffle_rot8<>+0(SB), Y13, Y13 2902 VPADDD Y9, Y13, Y9 2903 VPXOR Y5, Y9, Y5 2904 VPSRLD $0x07, Y5, Y8 2905 VPSLLD $0x19, Y5, Y5 2906 VPOR Y5, Y8, Y5 2907 VPADDD Y2, Y6, Y2 2908 VPADDD 32(SP), Y2, Y2 2909 VPXOR Y14, Y2, Y14 2910 VPSHUFB shuffle_rot16<>+0(SB), Y14, Y14 2911 VPADDD Y10, Y14, Y10 2912 VPXOR Y6, Y10, Y6 2913 VPSRLD $0x0c, Y6, Y8 2914 VPSLLD $0x14, Y6, Y6 2915 VPOR Y6, Y8, Y6 2916 VPADDD Y2, Y6, Y2 2917 VPADDD 288(SP), Y2, Y2 2918 VPXOR Y14, Y2, Y14 2919 VPSHUFB shuffle_rot8<>+0(SB), Y14, Y14 2920 VPADDD Y10, Y14, Y10 2921 VPXOR Y6, Y10, Y6 2922 VPSRLD $0x07, Y6, Y8 2923 VPSLLD $0x19, Y6, Y6 2924 VPOR Y6, Y8, Y6 2925 VPADDD Y3, Y7, Y3 2926 VPADDD 256(SP), Y3, Y3 2927 VPXOR Y15, Y3, Y15 2928 VPSHUFB shuffle_rot16<>+0(SB), Y15, Y15 2929 VPADDD Y11, Y15, Y11 2930 VPXOR Y7, Y11, Y7 2931 VPSRLD $0x0c, Y7, Y8 2932 VPSLLD $0x14, Y7, Y7 2933 VPOR Y7, Y8, Y7 2934 VPADDD Y3, Y7, Y3 2935 VPADDD 192(SP), Y3, Y3 2936 VPXOR Y15, Y3, Y15 2937 VPSHUFB shuffle_rot8<>+0(SB), Y15, Y15 2938 VPADDD Y11, Y15, Y11 2939 VPXOR Y7, Y11, Y7 2940 VPSRLD $0x07, Y7, Y8 2941 VPSLLD $0x19, Y7, Y7 2942 VPOR Y7, Y8, Y7 2943 VPADDD Y0, Y5, Y0 2944 VPADDD 448(SP), Y0, Y0 2945 VPXOR Y15, Y0, Y15 2946 VPSHUFB shuffle_rot16<>+0(SB), Y15, Y15 2947 VPADDD Y10, Y15, Y10 2948 VPXOR Y5, Y10, Y5 2949 VPSRLD $0x0c, Y5, Y8 2950 VPSLLD $0x14, Y5, Y5 2951 VPOR Y5, Y8, Y5 2952 VPADDD Y0, Y5, Y0 2953 VPADDD 320(SP), Y0, Y0 2954 VPXOR Y15, Y0, Y15 2955 VPSHUFB shuffle_rot8<>+0(SB), Y15, Y15 2956 VPADDD Y10, Y15, Y10 2957 VPXOR Y5, Y10, Y5 2958 VPSRLD $0x07, Y5, Y8 2959 VPSLLD $0x19, Y5, Y5 2960 VPOR Y5, Y8, Y5 2961 VPADDD Y1, Y6, Y1 2962 VPADDD 64(SP), Y1, Y1 2963 VPXOR Y12, Y1, Y12 2964 VPSHUFB shuffle_rot16<>+0(SB), Y12, Y12 2965 VPADDD Y11, Y12, Y11 2966 VPXOR Y6, Y11, Y6 2967 VPSRLD $0x0c, Y6, Y8 2968 VPSLLD $0x14, Y6, Y6 2969 VPOR Y6, Y8, Y6 2970 VPADDD Y1, Y6, Y1 2971 VPADDD 384(SP), Y1, Y1 2972 VPXOR Y12, Y1, Y12 2973 VPSHUFB shuffle_rot8<>+0(SB), Y12, Y12 2974 VPADDD Y11, Y12, Y11 2975 VPXOR Y6, Y11, Y6 2976 VPSRLD $0x07, Y6, Y8 2977 VPSLLD $0x19, Y6, Y6 2978 VPOR Y6, Y8, Y6 2979 VPADDD Y2, Y7, Y2 2980 VPADDD 96(SP), Y2, Y2 2981 VPXOR Y13, Y2, Y13 2982 VPSHUFB shuffle_rot16<>+0(SB), Y13, Y13 2983 VMOVDQU 512(SP), Y8 2984 VPADDD Y8, Y13, Y8 2985 VPXOR Y7, Y8, Y7 2986 VMOVDQU Y8, 512(SP) 2987 VPSRLD $0x0c, Y7, Y8 2988 VPSLLD $0x14, Y7, Y7 2989 VPOR Y7, Y8, Y7 2990 VPADDD Y2, Y7, Y2 2991 VPADDD 128(SP), Y2, Y2 2992 VPXOR Y13, Y2, Y13 2993 VPSHUFB shuffle_rot8<>+0(SB), Y13, Y13 2994 VMOVDQU 512(SP), Y8 2995 VPADDD Y8, Y13, Y8 2996 VPXOR Y7, Y8, Y7 2997 VMOVDQU Y8, 512(SP) 2998 VPSRLD $0x07, Y7, Y8 2999 VPSLLD $0x19, Y7, Y7 3000 VPOR Y7, Y8, Y7 3001 VPADDD Y3, Y4, Y3 3002 VPADDD 224(SP), Y3, Y3 3003 VPXOR Y14, Y3, Y14 3004 VPSHUFB shuffle_rot16<>+0(SB), Y14, Y14 3005 VPADDD Y9, Y14, Y9 3006 VPXOR Y4, Y9, Y4 3007 VPSRLD $0x0c, Y4, Y8 3008 VPSLLD $0x14, Y4, Y4 3009 VPOR Y4, Y8, Y4 3010 VPADDD Y3, Y4, Y3 3011 VPADDD 416(SP), Y3, Y3 3012 VPXOR Y14, Y3, Y14 3013 VPSHUFB shuffle_rot8<>+0(SB), Y14, Y14 3014 VPADDD Y9, Y14, Y9 3015 VPXOR Y4, Y9, Y4 3016 VPSRLD $0x07, Y4, Y8 3017 VPSLLD $0x19, Y4, Y4 3018 VPOR Y4, Y8, Y4 3019 VMOVDQU 512(SP), Y8 3020 3021 // Finalize CVs 3022 VMOVDQU Y8, 256(SP) 3023 VMOVDQU Y9, 288(SP) 3024 VMOVDQU Y10, 320(SP) 3025 VMOVDQU Y11, 352(SP) 3026 VMOVDQU Y12, 384(SP) 3027 VMOVDQU Y13, 416(SP) 3028 VMOVDQU Y14, 448(SP) 3029 VMOVDQU Y15, 480(SP) 3030 VPXOR Y0, Y8, Y0 3031 VPXOR Y1, Y9, Y1 3032 VPXOR Y2, Y10, Y2 3033 VPXOR Y3, Y11, Y3 3034 VPXOR Y4, Y12, Y4 3035 VPXOR Y5, Y13, Y5 3036 VPXOR Y6, Y14, Y6 3037 VPXOR Y7, Y15, Y7 3038 VPUNPCKLDQ Y1, Y0, Y8 3039 VPUNPCKHDQ Y1, Y0, Y9 3040 VPUNPCKLDQ Y3, Y2, Y10 3041 VPUNPCKHDQ Y3, Y2, Y11 3042 VPUNPCKLDQ Y5, Y4, Y12 3043 VPUNPCKHDQ Y5, Y4, Y13 3044 VPUNPCKLDQ Y7, Y6, Y14 3045 VPUNPCKHDQ Y7, Y6, Y15 3046 VPUNPCKLQDQ Y10, Y8, Y0 3047 VPUNPCKHQDQ Y10, Y8, Y1 3048 VPUNPCKLQDQ Y11, Y9, Y2 3049 VPUNPCKHQDQ Y11, Y9, Y3 3050 VPUNPCKLQDQ Y14, Y12, Y4 3051 VPUNPCKHQDQ Y14, Y12, Y5 3052 VPUNPCKLQDQ Y15, Y13, Y6 3053 VPUNPCKHQDQ Y15, Y13, Y7 3054 VPERM2I128 $0x20, Y4, Y0, Y8 3055 VPERM2I128 $0x31, Y4, Y0, Y12 3056 VPERM2I128 $0x20, Y5, Y1, Y9 3057 VPERM2I128 $0x31, Y5, Y1, Y13 3058 VPERM2I128 $0x20, Y6, Y2, Y10 3059 VPERM2I128 $0x31, Y6, Y2, Y14 3060 VPERM2I128 $0x20, Y7, Y3, Y11 3061 VPERM2I128 $0x31, Y7, Y3, Y15 3062 VMOVDQU Y8, (AX) 3063 VMOVDQU Y9, 64(AX) 3064 VMOVDQU Y10, 128(AX) 3065 VMOVDQU Y11, 192(AX) 3066 VMOVDQU Y12, 256(AX) 3067 VMOVDQU Y13, 320(AX) 3068 VMOVDQU Y14, 384(AX) 3069 VMOVDQU Y15, 448(AX) 3070 VMOVDQU 256(SP), Y8 3071 VMOVDQU 288(SP), Y9 3072 VMOVDQU 320(SP), Y10 3073 VMOVDQU 352(SP), Y11 3074 VMOVDQU 384(SP), Y12 3075 VMOVDQU 416(SP), Y13 3076 VMOVDQU 448(SP), Y14 3077 VMOVDQU 480(SP), Y15 3078 VPBROADCASTD (DX), Y0 3079 VPXOR Y0, Y8, Y8 3080 VPBROADCASTD 4(DX), Y0 3081 VPXOR Y0, Y9, Y9 3082 VPBROADCASTD 8(DX), Y0 3083 VPXOR Y0, Y10, Y10 3084 VPBROADCASTD 12(DX), Y0 3085 VPXOR Y0, Y11, Y11 3086 VPBROADCASTD 16(DX), Y0 3087 VPXOR Y0, Y12, Y12 3088 VPBROADCASTD 20(DX), Y0 3089 VPXOR Y0, Y13, Y13 3090 VPBROADCASTD 24(DX), Y0 3091 VPXOR Y0, Y14, Y14 3092 VPBROADCASTD 28(DX), Y0 3093 VPXOR Y0, Y15, Y15 3094 VPUNPCKLDQ Y9, Y8, Y0 3095 VPUNPCKHDQ Y9, Y8, Y1 3096 VPUNPCKLDQ Y11, Y10, Y2 3097 VPUNPCKHDQ Y11, Y10, Y3 3098 VPUNPCKLDQ Y13, Y12, Y4 3099 VPUNPCKHDQ Y13, Y12, Y5 3100 VPUNPCKLDQ Y15, Y14, Y6 3101 VPUNPCKHDQ Y15, Y14, Y7 3102 VPUNPCKLQDQ Y2, Y0, Y8 3103 VPUNPCKHQDQ Y2, Y0, Y9 3104 VPUNPCKLQDQ Y3, Y1, Y10 3105 VPUNPCKHQDQ Y3, Y1, Y11 3106 VPUNPCKLQDQ Y6, Y4, Y12 3107 VPUNPCKHQDQ Y6, Y4, Y13 3108 VPUNPCKLQDQ Y7, Y5, Y14 3109 VPUNPCKHQDQ Y7, Y5, Y15 3110 VPERM2I128 $0x20, Y12, Y8, Y0 3111 VPERM2I128 $0x31, Y12, Y8, Y4 3112 VPERM2I128 $0x20, Y13, Y9, Y1 3113 VPERM2I128 $0x31, Y13, Y9, Y5 3114 VPERM2I128 $0x20, Y14, Y10, Y2 3115 VPERM2I128 $0x31, Y14, Y10, Y6 3116 VPERM2I128 $0x20, Y15, Y11, Y3 3117 VPERM2I128 $0x31, Y15, Y11, Y7 3118 VMOVDQU Y0, 32(AX) 3119 VMOVDQU Y1, 96(AX) 3120 VMOVDQU Y2, 160(AX) 3121 VMOVDQU Y3, 224(AX) 3122 VMOVDQU Y4, 288(AX) 3123 VMOVDQU Y5, 352(AX) 3124 VMOVDQU Y6, 416(AX) 3125 VMOVDQU Y7, 480(AX) 3126 RET 3127 3128 // func compressChunksAVX2(cvs *[8][8]uint32, buf *[8192]byte, key *[8]uint32, counter uint64, flags uint32) 3129 // Requires: AVX, AVX2 3130 TEXT ·compressChunksAVX2(SB), NOSPLIT, $672-36 3131 MOVQ cvs+0(FP), AX 3132 MOVQ buf+8(FP), CX 3133 MOVQ key+16(FP), DX 3134 3135 // Load key 3136 VPBROADCASTD (DX), Y0 3137 VPBROADCASTD 4(DX), Y1 3138 VPBROADCASTD 8(DX), Y2 3139 VPBROADCASTD 12(DX), Y3 3140 VPBROADCASTD 16(DX), Y4 3141 VPBROADCASTD 20(DX), Y5 3142 VPBROADCASTD 24(DX), Y6 3143 VPBROADCASTD 28(DX), Y7 3144 3145 // Initialize counter 3146 VPBROADCASTQ counter+24(FP), Y12 3147 VPBROADCASTQ counter+24(FP), Y13 3148 VPADDQ seq64<>+0(SB), Y12, Y12 3149 VPADDQ seq64<>+32(SB), Y13, Y13 3150 VPUNPCKLDQ Y13, Y12, Y14 3151 VPUNPCKHDQ Y13, Y12, Y15 3152 VPUNPCKLDQ Y15, Y14, Y12 3153 VPUNPCKHDQ Y15, Y14, Y13 3154 VPERMQ $0xd8, Y12, Y12 3155 VPERMQ $0xd8, Y13, Y13 3156 VMOVDQU Y12, 512(SP) 3157 VMOVDQU Y13, 544(SP) 3158 3159 // Initialize flags 3160 VPBROADCASTD flags+32(FP), Y14 3161 VMOVDQU Y14, 576(SP) 3162 VMOVDQU Y14, 608(SP) 3163 ORL $0x01, 576(SP) 3164 ORL $0x02, 636(SP) 3165 3166 // Loop index 3167 XORQ DX, DX 3168 3169 loop: 3170 // Load transposed block 3171 VMOVDQU seq<>+0(SB), Y9 3172 VPSLLD $0x0a, Y9, Y9 3173 VPCMPEQD Y8, Y8, Y8 3174 VPGATHERDD Y8, (CX)(Y9*1), Y10 3175 VMOVDQU Y10, (SP) 3176 VPCMPEQD Y8, Y8, Y8 3177 VPGATHERDD Y8, 4(CX)(Y9*1), Y10 3178 VMOVDQU Y10, 32(SP) 3179 VPCMPEQD Y8, Y8, Y8 3180 VPGATHERDD Y8, 8(CX)(Y9*1), Y10 3181 VMOVDQU Y10, 64(SP) 3182 VPCMPEQD Y8, Y8, Y8 3183 VPGATHERDD Y8, 12(CX)(Y9*1), Y10 3184 VMOVDQU Y10, 96(SP) 3185 VPCMPEQD Y8, Y8, Y8 3186 VPGATHERDD Y8, 16(CX)(Y9*1), Y10 3187 VMOVDQU Y10, 128(SP) 3188 VPCMPEQD Y8, Y8, Y8 3189 VPGATHERDD Y8, 20(CX)(Y9*1), Y10 3190 VMOVDQU Y10, 160(SP) 3191 VPCMPEQD Y8, Y8, Y8 3192 VPGATHERDD Y8, 24(CX)(Y9*1), Y10 3193 VMOVDQU Y10, 192(SP) 3194 VPCMPEQD Y8, Y8, Y8 3195 VPGATHERDD Y8, 28(CX)(Y9*1), Y10 3196 VMOVDQU Y10, 224(SP) 3197 VPCMPEQD Y8, Y8, Y8 3198 VPGATHERDD Y8, 32(CX)(Y9*1), Y10 3199 VMOVDQU Y10, 256(SP) 3200 VPCMPEQD Y8, Y8, Y8 3201 VPGATHERDD Y8, 36(CX)(Y9*1), Y10 3202 VMOVDQU Y10, 288(SP) 3203 VPCMPEQD Y8, Y8, Y8 3204 VPGATHERDD Y8, 40(CX)(Y9*1), Y10 3205 VMOVDQU Y10, 320(SP) 3206 VPCMPEQD Y8, Y8, Y8 3207 VPGATHERDD Y8, 44(CX)(Y9*1), Y10 3208 VMOVDQU Y10, 352(SP) 3209 VPCMPEQD Y8, Y8, Y8 3210 VPGATHERDD Y8, 48(CX)(Y9*1), Y10 3211 VMOVDQU Y10, 384(SP) 3212 VPCMPEQD Y8, Y8, Y8 3213 VPGATHERDD Y8, 52(CX)(Y9*1), Y10 3214 VMOVDQU Y10, 416(SP) 3215 VPCMPEQD Y8, Y8, Y8 3216 VPGATHERDD Y8, 56(CX)(Y9*1), Y10 3217 VMOVDQU Y10, 448(SP) 3218 VPCMPEQD Y8, Y8, Y8 3219 VPGATHERDD Y8, 60(CX)(Y9*1), Y10 3220 VMOVDQU Y10, 480(SP) 3221 ADDQ $0x40, CX 3222 3223 // Reload state vectors (other than CVs) 3224 VPBROADCASTD iv<>+0(SB), Y8 3225 VPBROADCASTD iv<>+4(SB), Y9 3226 VPBROADCASTD iv<>+8(SB), Y10 3227 VPBROADCASTD iv<>+12(SB), Y11 3228 VMOVDQU 512(SP), Y12 3229 VMOVDQU 544(SP), Y13 3230 VPBROADCASTD seq<>+4(SB), Y14 3231 VPSLLD $0x06, Y14, Y14 3232 VPBROADCASTD 576(SP)(DX*4), Y15 3233 VMOVDQU Y8, 640(SP) 3234 3235 // Round 1 3236 VPADDD Y0, Y4, Y0 3237 VPADDD (SP), Y0, Y0 3238 VPXOR Y12, Y0, Y12 3239 VPSHUFB shuffle_rot16<>+0(SB), Y12, Y12 3240 VMOVDQU 640(SP), Y8 3241 VPADDD Y8, Y12, Y8 3242 VPXOR Y4, Y8, Y4 3243 VMOVDQU Y8, 640(SP) 3244 VPSRLD $0x0c, Y4, Y8 3245 VPSLLD $0x14, Y4, Y4 3246 VPOR Y4, Y8, Y4 3247 VPADDD Y0, Y4, Y0 3248 VPADDD 32(SP), Y0, Y0 3249 VPXOR Y12, Y0, Y12 3250 VPSHUFB shuffle_rot8<>+0(SB), Y12, Y12 3251 VMOVDQU 640(SP), Y8 3252 VPADDD Y8, Y12, Y8 3253 VPXOR Y4, Y8, Y4 3254 VMOVDQU Y8, 640(SP) 3255 VPSRLD $0x07, Y4, Y8 3256 VPSLLD $0x19, Y4, Y4 3257 VPOR Y4, Y8, Y4 3258 VPADDD Y1, Y5, Y1 3259 VPADDD 64(SP), Y1, Y1 3260 VPXOR Y13, Y1, Y13 3261 VPSHUFB shuffle_rot16<>+0(SB), Y13, Y13 3262 VPADDD Y9, Y13, Y9 3263 VPXOR Y5, Y9, Y5 3264 VPSRLD $0x0c, Y5, Y8 3265 VPSLLD $0x14, Y5, Y5 3266 VPOR Y5, Y8, Y5 3267 VPADDD Y1, Y5, Y1 3268 VPADDD 96(SP), Y1, Y1 3269 VPXOR Y13, Y1, Y13 3270 VPSHUFB shuffle_rot8<>+0(SB), Y13, Y13 3271 VPADDD Y9, Y13, Y9 3272 VPXOR Y5, Y9, Y5 3273 VPSRLD $0x07, Y5, Y8 3274 VPSLLD $0x19, Y5, Y5 3275 VPOR Y5, Y8, Y5 3276 VPADDD Y2, Y6, Y2 3277 VPADDD 128(SP), Y2, Y2 3278 VPXOR Y14, Y2, Y14 3279 VPSHUFB shuffle_rot16<>+0(SB), Y14, Y14 3280 VPADDD Y10, Y14, Y10 3281 VPXOR Y6, Y10, Y6 3282 VPSRLD $0x0c, Y6, Y8 3283 VPSLLD $0x14, Y6, Y6 3284 VPOR Y6, Y8, Y6 3285 VPADDD Y2, Y6, Y2 3286 VPADDD 160(SP), Y2, Y2 3287 VPXOR Y14, Y2, Y14 3288 VPSHUFB shuffle_rot8<>+0(SB), Y14, Y14 3289 VPADDD Y10, Y14, Y10 3290 VPXOR Y6, Y10, Y6 3291 VPSRLD $0x07, Y6, Y8 3292 VPSLLD $0x19, Y6, Y6 3293 VPOR Y6, Y8, Y6 3294 VPADDD Y3, Y7, Y3 3295 VPADDD 192(SP), Y3, Y3 3296 VPXOR Y15, Y3, Y15 3297 VPSHUFB shuffle_rot16<>+0(SB), Y15, Y15 3298 VPADDD Y11, Y15, Y11 3299 VPXOR Y7, Y11, Y7 3300 VPSRLD $0x0c, Y7, Y8 3301 VPSLLD $0x14, Y7, Y7 3302 VPOR Y7, Y8, Y7 3303 VPADDD Y3, Y7, Y3 3304 VPADDD 224(SP), Y3, Y3 3305 VPXOR Y15, Y3, Y15 3306 VPSHUFB shuffle_rot8<>+0(SB), Y15, Y15 3307 VPADDD Y11, Y15, Y11 3308 VPXOR Y7, Y11, Y7 3309 VPSRLD $0x07, Y7, Y8 3310 VPSLLD $0x19, Y7, Y7 3311 VPOR Y7, Y8, Y7 3312 VPADDD Y0, Y5, Y0 3313 VPADDD 256(SP), Y0, Y0 3314 VPXOR Y15, Y0, Y15 3315 VPSHUFB shuffle_rot16<>+0(SB), Y15, Y15 3316 VPADDD Y10, Y15, Y10 3317 VPXOR Y5, Y10, Y5 3318 VPSRLD $0x0c, Y5, Y8 3319 VPSLLD $0x14, Y5, Y5 3320 VPOR Y5, Y8, Y5 3321 VPADDD Y0, Y5, Y0 3322 VPADDD 288(SP), Y0, Y0 3323 VPXOR Y15, Y0, Y15 3324 VPSHUFB shuffle_rot8<>+0(SB), Y15, Y15 3325 VPADDD Y10, Y15, Y10 3326 VPXOR Y5, Y10, Y5 3327 VPSRLD $0x07, Y5, Y8 3328 VPSLLD $0x19, Y5, Y5 3329 VPOR Y5, Y8, Y5 3330 VPADDD Y1, Y6, Y1 3331 VPADDD 320(SP), Y1, Y1 3332 VPXOR Y12, Y1, Y12 3333 VPSHUFB shuffle_rot16<>+0(SB), Y12, Y12 3334 VPADDD Y11, Y12, Y11 3335 VPXOR Y6, Y11, Y6 3336 VPSRLD $0x0c, Y6, Y8 3337 VPSLLD $0x14, Y6, Y6 3338 VPOR Y6, Y8, Y6 3339 VPADDD Y1, Y6, Y1 3340 VPADDD 352(SP), Y1, Y1 3341 VPXOR Y12, Y1, Y12 3342 VPSHUFB shuffle_rot8<>+0(SB), Y12, Y12 3343 VPADDD Y11, Y12, Y11 3344 VPXOR Y6, Y11, Y6 3345 VPSRLD $0x07, Y6, Y8 3346 VPSLLD $0x19, Y6, Y6 3347 VPOR Y6, Y8, Y6 3348 VPADDD Y2, Y7, Y2 3349 VPADDD 384(SP), Y2, Y2 3350 VPXOR Y13, Y2, Y13 3351 VPSHUFB shuffle_rot16<>+0(SB), Y13, Y13 3352 VMOVDQU 640(SP), Y8 3353 VPADDD Y8, Y13, Y8 3354 VPXOR Y7, Y8, Y7 3355 VMOVDQU Y8, 640(SP) 3356 VPSRLD $0x0c, Y7, Y8 3357 VPSLLD $0x14, Y7, Y7 3358 VPOR Y7, Y8, Y7 3359 VPADDD Y2, Y7, Y2 3360 VPADDD 416(SP), Y2, Y2 3361 VPXOR Y13, Y2, Y13 3362 VPSHUFB shuffle_rot8<>+0(SB), Y13, Y13 3363 VMOVDQU 640(SP), Y8 3364 VPADDD Y8, Y13, Y8 3365 VPXOR Y7, Y8, Y7 3366 VMOVDQU Y8, 640(SP) 3367 VPSRLD $0x07, Y7, Y8 3368 VPSLLD $0x19, Y7, Y7 3369 VPOR Y7, Y8, Y7 3370 VPADDD Y3, Y4, Y3 3371 VPADDD 448(SP), Y3, Y3 3372 VPXOR Y14, Y3, Y14 3373 VPSHUFB shuffle_rot16<>+0(SB), Y14, Y14 3374 VPADDD Y9, Y14, Y9 3375 VPXOR Y4, Y9, Y4 3376 VPSRLD $0x0c, Y4, Y8 3377 VPSLLD $0x14, Y4, Y4 3378 VPOR Y4, Y8, Y4 3379 VPADDD Y3, Y4, Y3 3380 VPADDD 480(SP), Y3, Y3 3381 VPXOR Y14, Y3, Y14 3382 VPSHUFB shuffle_rot8<>+0(SB), Y14, Y14 3383 VPADDD Y9, Y14, Y9 3384 VPXOR Y4, Y9, Y4 3385 VPSRLD $0x07, Y4, Y8 3386 VPSLLD $0x19, Y4, Y4 3387 VPOR Y4, Y8, Y4 3388 3389 // Round 2 3390 VPADDD Y0, Y4, Y0 3391 VPADDD 64(SP), Y0, Y0 3392 VPXOR Y12, Y0, Y12 3393 VPSHUFB shuffle_rot16<>+0(SB), Y12, Y12 3394 VMOVDQU 640(SP), Y8 3395 VPADDD Y8, Y12, Y8 3396 VPXOR Y4, Y8, Y4 3397 VMOVDQU Y8, 640(SP) 3398 VPSRLD $0x0c, Y4, Y8 3399 VPSLLD $0x14, Y4, Y4 3400 VPOR Y4, Y8, Y4 3401 VPADDD Y0, Y4, Y0 3402 VPADDD 192(SP), Y0, Y0 3403 VPXOR Y12, Y0, Y12 3404 VPSHUFB shuffle_rot8<>+0(SB), Y12, Y12 3405 VMOVDQU 640(SP), Y8 3406 VPADDD Y8, Y12, Y8 3407 VPXOR Y4, Y8, Y4 3408 VMOVDQU Y8, 640(SP) 3409 VPSRLD $0x07, Y4, Y8 3410 VPSLLD $0x19, Y4, Y4 3411 VPOR Y4, Y8, Y4 3412 VPADDD Y1, Y5, Y1 3413 VPADDD 96(SP), Y1, Y1 3414 VPXOR Y13, Y1, Y13 3415 VPSHUFB shuffle_rot16<>+0(SB), Y13, Y13 3416 VPADDD Y9, Y13, Y9 3417 VPXOR Y5, Y9, Y5 3418 VPSRLD $0x0c, Y5, Y8 3419 VPSLLD $0x14, Y5, Y5 3420 VPOR Y5, Y8, Y5 3421 VPADDD Y1, Y5, Y1 3422 VPADDD 320(SP), Y1, Y1 3423 VPXOR Y13, Y1, Y13 3424 VPSHUFB shuffle_rot8<>+0(SB), Y13, Y13 3425 VPADDD Y9, Y13, Y9 3426 VPXOR Y5, Y9, Y5 3427 VPSRLD $0x07, Y5, Y8 3428 VPSLLD $0x19, Y5, Y5 3429 VPOR Y5, Y8, Y5 3430 VPADDD Y2, Y6, Y2 3431 VPADDD 224(SP), Y2, Y2 3432 VPXOR Y14, Y2, Y14 3433 VPSHUFB shuffle_rot16<>+0(SB), Y14, Y14 3434 VPADDD Y10, Y14, Y10 3435 VPXOR Y6, Y10, Y6 3436 VPSRLD $0x0c, Y6, Y8 3437 VPSLLD $0x14, Y6, Y6 3438 VPOR Y6, Y8, Y6 3439 VPADDD Y2, Y6, Y2 3440 VPADDD (SP), Y2, Y2 3441 VPXOR Y14, Y2, Y14 3442 VPSHUFB shuffle_rot8<>+0(SB), Y14, Y14 3443 VPADDD Y10, Y14, Y10 3444 VPXOR Y6, Y10, Y6 3445 VPSRLD $0x07, Y6, Y8 3446 VPSLLD $0x19, Y6, Y6 3447 VPOR Y6, Y8, Y6 3448 VPADDD Y3, Y7, Y3 3449 VPADDD 128(SP), Y3, Y3 3450 VPXOR Y15, Y3, Y15 3451 VPSHUFB shuffle_rot16<>+0(SB), Y15, Y15 3452 VPADDD Y11, Y15, Y11 3453 VPXOR Y7, Y11, Y7 3454 VPSRLD $0x0c, Y7, Y8 3455 VPSLLD $0x14, Y7, Y7 3456 VPOR Y7, Y8, Y7 3457 VPADDD Y3, Y7, Y3 3458 VPADDD 416(SP), Y3, Y3 3459 VPXOR Y15, Y3, Y15 3460 VPSHUFB shuffle_rot8<>+0(SB), Y15, Y15 3461 VPADDD Y11, Y15, Y11 3462 VPXOR Y7, Y11, Y7 3463 VPSRLD $0x07, Y7, Y8 3464 VPSLLD $0x19, Y7, Y7 3465 VPOR Y7, Y8, Y7 3466 VPADDD Y0, Y5, Y0 3467 VPADDD 32(SP), Y0, Y0 3468 VPXOR Y15, Y0, Y15 3469 VPSHUFB shuffle_rot16<>+0(SB), Y15, Y15 3470 VPADDD Y10, Y15, Y10 3471 VPXOR Y5, Y10, Y5 3472 VPSRLD $0x0c, Y5, Y8 3473 VPSLLD $0x14, Y5, Y5 3474 VPOR Y5, Y8, Y5 3475 VPADDD Y0, Y5, Y0 3476 VPADDD 352(SP), Y0, Y0 3477 VPXOR Y15, Y0, Y15 3478 VPSHUFB shuffle_rot8<>+0(SB), Y15, Y15 3479 VPADDD Y10, Y15, Y10 3480 VPXOR Y5, Y10, Y5 3481 VPSRLD $0x07, Y5, Y8 3482 VPSLLD $0x19, Y5, Y5 3483 VPOR Y5, Y8, Y5 3484 VPADDD Y1, Y6, Y1 3485 VPADDD 384(SP), Y1, Y1 3486 VPXOR Y12, Y1, Y12 3487 VPSHUFB shuffle_rot16<>+0(SB), Y12, Y12 3488 VPADDD Y11, Y12, Y11 3489 VPXOR Y6, Y11, Y6 3490 VPSRLD $0x0c, Y6, Y8 3491 VPSLLD $0x14, Y6, Y6 3492 VPOR Y6, Y8, Y6 3493 VPADDD Y1, Y6, Y1 3494 VPADDD 160(SP), Y1, Y1 3495 VPXOR Y12, Y1, Y12 3496 VPSHUFB shuffle_rot8<>+0(SB), Y12, Y12 3497 VPADDD Y11, Y12, Y11 3498 VPXOR Y6, Y11, Y6 3499 VPSRLD $0x07, Y6, Y8 3500 VPSLLD $0x19, Y6, Y6 3501 VPOR Y6, Y8, Y6 3502 VPADDD Y2, Y7, Y2 3503 VPADDD 288(SP), Y2, Y2 3504 VPXOR Y13, Y2, Y13 3505 VPSHUFB shuffle_rot16<>+0(SB), Y13, Y13 3506 VMOVDQU 640(SP), Y8 3507 VPADDD Y8, Y13, Y8 3508 VPXOR Y7, Y8, Y7 3509 VMOVDQU Y8, 640(SP) 3510 VPSRLD $0x0c, Y7, Y8 3511 VPSLLD $0x14, Y7, Y7 3512 VPOR Y7, Y8, Y7 3513 VPADDD Y2, Y7, Y2 3514 VPADDD 448(SP), Y2, Y2 3515 VPXOR Y13, Y2, Y13 3516 VPSHUFB shuffle_rot8<>+0(SB), Y13, Y13 3517 VMOVDQU 640(SP), Y8 3518 VPADDD Y8, Y13, Y8 3519 VPXOR Y7, Y8, Y7 3520 VMOVDQU Y8, 640(SP) 3521 VPSRLD $0x07, Y7, Y8 3522 VPSLLD $0x19, Y7, Y7 3523 VPOR Y7, Y8, Y7 3524 VPADDD Y3, Y4, Y3 3525 VPADDD 480(SP), Y3, Y3 3526 VPXOR Y14, Y3, Y14 3527 VPSHUFB shuffle_rot16<>+0(SB), Y14, Y14 3528 VPADDD Y9, Y14, Y9 3529 VPXOR Y4, Y9, Y4 3530 VPSRLD $0x0c, Y4, Y8 3531 VPSLLD $0x14, Y4, Y4 3532 VPOR Y4, Y8, Y4 3533 VPADDD Y3, Y4, Y3 3534 VPADDD 256(SP), Y3, Y3 3535 VPXOR Y14, Y3, Y14 3536 VPSHUFB shuffle_rot8<>+0(SB), Y14, Y14 3537 VPADDD Y9, Y14, Y9 3538 VPXOR Y4, Y9, Y4 3539 VPSRLD $0x07, Y4, Y8 3540 VPSLLD $0x19, Y4, Y4 3541 VPOR Y4, Y8, Y4 3542 3543 // Round 3 3544 VPADDD Y0, Y4, Y0 3545 VPADDD 96(SP), Y0, Y0 3546 VPXOR Y12, Y0, Y12 3547 VPSHUFB shuffle_rot16<>+0(SB), Y12, Y12 3548 VMOVDQU 640(SP), Y8 3549 VPADDD Y8, Y12, Y8 3550 VPXOR Y4, Y8, Y4 3551 VMOVDQU Y8, 640(SP) 3552 VPSRLD $0x0c, Y4, Y8 3553 VPSLLD $0x14, Y4, Y4 3554 VPOR Y4, Y8, Y4 3555 VPADDD Y0, Y4, Y0 3556 VPADDD 128(SP), Y0, Y0 3557 VPXOR Y12, Y0, Y12 3558 VPSHUFB shuffle_rot8<>+0(SB), Y12, Y12 3559 VMOVDQU 640(SP), Y8 3560 VPADDD Y8, Y12, Y8 3561 VPXOR Y4, Y8, Y4 3562 VMOVDQU Y8, 640(SP) 3563 VPSRLD $0x07, Y4, Y8 3564 VPSLLD $0x19, Y4, Y4 3565 VPOR Y4, Y8, Y4 3566 VPADDD Y1, Y5, Y1 3567 VPADDD 320(SP), Y1, Y1 3568 VPXOR Y13, Y1, Y13 3569 VPSHUFB shuffle_rot16<>+0(SB), Y13, Y13 3570 VPADDD Y9, Y13, Y9 3571 VPXOR Y5, Y9, Y5 3572 VPSRLD $0x0c, Y5, Y8 3573 VPSLLD $0x14, Y5, Y5 3574 VPOR Y5, Y8, Y5 3575 VPADDD Y1, Y5, Y1 3576 VPADDD 384(SP), Y1, Y1 3577 VPXOR Y13, Y1, Y13 3578 VPSHUFB shuffle_rot8<>+0(SB), Y13, Y13 3579 VPADDD Y9, Y13, Y9 3580 VPXOR Y5, Y9, Y5 3581 VPSRLD $0x07, Y5, Y8 3582 VPSLLD $0x19, Y5, Y5 3583 VPOR Y5, Y8, Y5 3584 VPADDD Y2, Y6, Y2 3585 VPADDD 416(SP), Y2, Y2 3586 VPXOR Y14, Y2, Y14 3587 VPSHUFB shuffle_rot16<>+0(SB), Y14, Y14 3588 VPADDD Y10, Y14, Y10 3589 VPXOR Y6, Y10, Y6 3590 VPSRLD $0x0c, Y6, Y8 3591 VPSLLD $0x14, Y6, Y6 3592 VPOR Y6, Y8, Y6 3593 VPADDD Y2, Y6, Y2 3594 VPADDD 64(SP), Y2, Y2 3595 VPXOR Y14, Y2, Y14 3596 VPSHUFB shuffle_rot8<>+0(SB), Y14, Y14 3597 VPADDD Y10, Y14, Y10 3598 VPXOR Y6, Y10, Y6 3599 VPSRLD $0x07, Y6, Y8 3600 VPSLLD $0x19, Y6, Y6 3601 VPOR Y6, Y8, Y6 3602 VPADDD Y3, Y7, Y3 3603 VPADDD 224(SP), Y3, Y3 3604 VPXOR Y15, Y3, Y15 3605 VPSHUFB shuffle_rot16<>+0(SB), Y15, Y15 3606 VPADDD Y11, Y15, Y11 3607 VPXOR Y7, Y11, Y7 3608 VPSRLD $0x0c, Y7, Y8 3609 VPSLLD $0x14, Y7, Y7 3610 VPOR Y7, Y8, Y7 3611 VPADDD Y3, Y7, Y3 3612 VPADDD 448(SP), Y3, Y3 3613 VPXOR Y15, Y3, Y15 3614 VPSHUFB shuffle_rot8<>+0(SB), Y15, Y15 3615 VPADDD Y11, Y15, Y11 3616 VPXOR Y7, Y11, Y7 3617 VPSRLD $0x07, Y7, Y8 3618 VPSLLD $0x19, Y7, Y7 3619 VPOR Y7, Y8, Y7 3620 VPADDD Y0, Y5, Y0 3621 VPADDD 192(SP), Y0, Y0 3622 VPXOR Y15, Y0, Y15 3623 VPSHUFB shuffle_rot16<>+0(SB), Y15, Y15 3624 VPADDD Y10, Y15, Y10 3625 VPXOR Y5, Y10, Y5 3626 VPSRLD $0x0c, Y5, Y8 3627 VPSLLD $0x14, Y5, Y5 3628 VPOR Y5, Y8, Y5 3629 VPADDD Y0, Y5, Y0 3630 VPADDD 160(SP), Y0, Y0 3631 VPXOR Y15, Y0, Y15 3632 VPSHUFB shuffle_rot8<>+0(SB), Y15, Y15 3633 VPADDD Y10, Y15, Y10 3634 VPXOR Y5, Y10, Y5 3635 VPSRLD $0x07, Y5, Y8 3636 VPSLLD $0x19, Y5, Y5 3637 VPOR Y5, Y8, Y5 3638 VPADDD Y1, Y6, Y1 3639 VPADDD 288(SP), Y1, Y1 3640 VPXOR Y12, Y1, Y12 3641 VPSHUFB shuffle_rot16<>+0(SB), Y12, Y12 3642 VPADDD Y11, Y12, Y11 3643 VPXOR Y6, Y11, Y6 3644 VPSRLD $0x0c, Y6, Y8 3645 VPSLLD $0x14, Y6, Y6 3646 VPOR Y6, Y8, Y6 3647 VPADDD Y1, Y6, Y1 3648 VPADDD (SP), Y1, Y1 3649 VPXOR Y12, Y1, Y12 3650 VPSHUFB shuffle_rot8<>+0(SB), Y12, Y12 3651 VPADDD Y11, Y12, Y11 3652 VPXOR Y6, Y11, Y6 3653 VPSRLD $0x07, Y6, Y8 3654 VPSLLD $0x19, Y6, Y6 3655 VPOR Y6, Y8, Y6 3656 VPADDD Y2, Y7, Y2 3657 VPADDD 352(SP), Y2, Y2 3658 VPXOR Y13, Y2, Y13 3659 VPSHUFB shuffle_rot16<>+0(SB), Y13, Y13 3660 VMOVDQU 640(SP), Y8 3661 VPADDD Y8, Y13, Y8 3662 VPXOR Y7, Y8, Y7 3663 VMOVDQU Y8, 640(SP) 3664 VPSRLD $0x0c, Y7, Y8 3665 VPSLLD $0x14, Y7, Y7 3666 VPOR Y7, Y8, Y7 3667 VPADDD Y2, Y7, Y2 3668 VPADDD 480(SP), Y2, Y2 3669 VPXOR Y13, Y2, Y13 3670 VPSHUFB shuffle_rot8<>+0(SB), Y13, Y13 3671 VMOVDQU 640(SP), Y8 3672 VPADDD Y8, Y13, Y8 3673 VPXOR Y7, Y8, Y7 3674 VMOVDQU Y8, 640(SP) 3675 VPSRLD $0x07, Y7, Y8 3676 VPSLLD $0x19, Y7, Y7 3677 VPOR Y7, Y8, Y7 3678 VPADDD Y3, Y4, Y3 3679 VPADDD 256(SP), Y3, Y3 3680 VPXOR Y14, Y3, Y14 3681 VPSHUFB shuffle_rot16<>+0(SB), Y14, Y14 3682 VPADDD Y9, Y14, Y9 3683 VPXOR Y4, Y9, Y4 3684 VPSRLD $0x0c, Y4, Y8 3685 VPSLLD $0x14, Y4, Y4 3686 VPOR Y4, Y8, Y4 3687 VPADDD Y3, Y4, Y3 3688 VPADDD 32(SP), Y3, Y3 3689 VPXOR Y14, Y3, Y14 3690 VPSHUFB shuffle_rot8<>+0(SB), Y14, Y14 3691 VPADDD Y9, Y14, Y9 3692 VPXOR Y4, Y9, Y4 3693 VPSRLD $0x07, Y4, Y8 3694 VPSLLD $0x19, Y4, Y4 3695 VPOR Y4, Y8, Y4 3696 3697 // Round 4 3698 VPADDD Y0, Y4, Y0 3699 VPADDD 320(SP), Y0, Y0 3700 VPXOR Y12, Y0, Y12 3701 VPSHUFB shuffle_rot16<>+0(SB), Y12, Y12 3702 VMOVDQU 640(SP), Y8 3703 VPADDD Y8, Y12, Y8 3704 VPXOR Y4, Y8, Y4 3705 VMOVDQU Y8, 640(SP) 3706 VPSRLD $0x0c, Y4, Y8 3707 VPSLLD $0x14, Y4, Y4 3708 VPOR Y4, Y8, Y4 3709 VPADDD Y0, Y4, Y0 3710 VPADDD 224(SP), Y0, Y0 3711 VPXOR Y12, Y0, Y12 3712 VPSHUFB shuffle_rot8<>+0(SB), Y12, Y12 3713 VMOVDQU 640(SP), Y8 3714 VPADDD Y8, Y12, Y8 3715 VPXOR Y4, Y8, Y4 3716 VMOVDQU Y8, 640(SP) 3717 VPSRLD $0x07, Y4, Y8 3718 VPSLLD $0x19, Y4, Y4 3719 VPOR Y4, Y8, Y4 3720 VPADDD Y1, Y5, Y1 3721 VPADDD 384(SP), Y1, Y1 3722 VPXOR Y13, Y1, Y13 3723 VPSHUFB shuffle_rot16<>+0(SB), Y13, Y13 3724 VPADDD Y9, Y13, Y9 3725 VPXOR Y5, Y9, Y5 3726 VPSRLD $0x0c, Y5, Y8 3727 VPSLLD $0x14, Y5, Y5 3728 VPOR Y5, Y8, Y5 3729 VPADDD Y1, Y5, Y1 3730 VPADDD 288(SP), Y1, Y1 3731 VPXOR Y13, Y1, Y13 3732 VPSHUFB shuffle_rot8<>+0(SB), Y13, Y13 3733 VPADDD Y9, Y13, Y9 3734 VPXOR Y5, Y9, Y5 3735 VPSRLD $0x07, Y5, Y8 3736 VPSLLD $0x19, Y5, Y5 3737 VPOR Y5, Y8, Y5 3738 VPADDD Y2, Y6, Y2 3739 VPADDD 448(SP), Y2, Y2 3740 VPXOR Y14, Y2, Y14 3741 VPSHUFB shuffle_rot16<>+0(SB), Y14, Y14 3742 VPADDD Y10, Y14, Y10 3743 VPXOR Y6, Y10, Y6 3744 VPSRLD $0x0c, Y6, Y8 3745 VPSLLD $0x14, Y6, Y6 3746 VPOR Y6, Y8, Y6 3747 VPADDD Y2, Y6, Y2 3748 VPADDD 96(SP), Y2, Y2 3749 VPXOR Y14, Y2, Y14 3750 VPSHUFB shuffle_rot8<>+0(SB), Y14, Y14 3751 VPADDD Y10, Y14, Y10 3752 VPXOR Y6, Y10, Y6 3753 VPSRLD $0x07, Y6, Y8 3754 VPSLLD $0x19, Y6, Y6 3755 VPOR Y6, Y8, Y6 3756 VPADDD Y3, Y7, Y3 3757 VPADDD 416(SP), Y3, Y3 3758 VPXOR Y15, Y3, Y15 3759 VPSHUFB shuffle_rot16<>+0(SB), Y15, Y15 3760 VPADDD Y11, Y15, Y11 3761 VPXOR Y7, Y11, Y7 3762 VPSRLD $0x0c, Y7, Y8 3763 VPSLLD $0x14, Y7, Y7 3764 VPOR Y7, Y8, Y7 3765 VPADDD Y3, Y7, Y3 3766 VPADDD 480(SP), Y3, Y3 3767 VPXOR Y15, Y3, Y15 3768 VPSHUFB shuffle_rot8<>+0(SB), Y15, Y15 3769 VPADDD Y11, Y15, Y11 3770 VPXOR Y7, Y11, Y7 3771 VPSRLD $0x07, Y7, Y8 3772 VPSLLD $0x19, Y7, Y7 3773 VPOR Y7, Y8, Y7 3774 VPADDD Y0, Y5, Y0 3775 VPADDD 128(SP), Y0, Y0 3776 VPXOR Y15, Y0, Y15 3777 VPSHUFB shuffle_rot16<>+0(SB), Y15, Y15 3778 VPADDD Y10, Y15, Y10 3779 VPXOR Y5, Y10, Y5 3780 VPSRLD $0x0c, Y5, Y8 3781 VPSLLD $0x14, Y5, Y5 3782 VPOR Y5, Y8, Y5 3783 VPADDD Y0, Y5, Y0 3784 VPADDD (SP), Y0, Y0 3785 VPXOR Y15, Y0, Y15 3786 VPSHUFB shuffle_rot8<>+0(SB), Y15, Y15 3787 VPADDD Y10, Y15, Y10 3788 VPXOR Y5, Y10, Y5 3789 VPSRLD $0x07, Y5, Y8 3790 VPSLLD $0x19, Y5, Y5 3791 VPOR Y5, Y8, Y5 3792 VPADDD Y1, Y6, Y1 3793 VPADDD 352(SP), Y1, Y1 3794 VPXOR Y12, Y1, Y12 3795 VPSHUFB shuffle_rot16<>+0(SB), Y12, Y12 3796 VPADDD Y11, Y12, Y11 3797 VPXOR Y6, Y11, Y6 3798 VPSRLD $0x0c, Y6, Y8 3799 VPSLLD $0x14, Y6, Y6 3800 VPOR Y6, Y8, Y6 3801 VPADDD Y1, Y6, Y1 3802 VPADDD 64(SP), Y1, Y1 3803 VPXOR Y12, Y1, Y12 3804 VPSHUFB shuffle_rot8<>+0(SB), Y12, Y12 3805 VPADDD Y11, Y12, Y11 3806 VPXOR Y6, Y11, Y6 3807 VPSRLD $0x07, Y6, Y8 3808 VPSLLD $0x19, Y6, Y6 3809 VPOR Y6, Y8, Y6 3810 VPADDD Y2, Y7, Y2 3811 VPADDD 160(SP), Y2, Y2 3812 VPXOR Y13, Y2, Y13 3813 VPSHUFB shuffle_rot16<>+0(SB), Y13, Y13 3814 VMOVDQU 640(SP), Y8 3815 VPADDD Y8, Y13, Y8 3816 VPXOR Y7, Y8, Y7 3817 VMOVDQU Y8, 640(SP) 3818 VPSRLD $0x0c, Y7, Y8 3819 VPSLLD $0x14, Y7, Y7 3820 VPOR Y7, Y8, Y7 3821 VPADDD Y2, Y7, Y2 3822 VPADDD 256(SP), Y2, Y2 3823 VPXOR Y13, Y2, Y13 3824 VPSHUFB shuffle_rot8<>+0(SB), Y13, Y13 3825 VMOVDQU 640(SP), Y8 3826 VPADDD Y8, Y13, Y8 3827 VPXOR Y7, Y8, Y7 3828 VMOVDQU Y8, 640(SP) 3829 VPSRLD $0x07, Y7, Y8 3830 VPSLLD $0x19, Y7, Y7 3831 VPOR Y7, Y8, Y7 3832 VPADDD Y3, Y4, Y3 3833 VPADDD 32(SP), Y3, Y3 3834 VPXOR Y14, Y3, Y14 3835 VPSHUFB shuffle_rot16<>+0(SB), Y14, Y14 3836 VPADDD Y9, Y14, Y9 3837 VPXOR Y4, Y9, Y4 3838 VPSRLD $0x0c, Y4, Y8 3839 VPSLLD $0x14, Y4, Y4 3840 VPOR Y4, Y8, Y4 3841 VPADDD Y3, Y4, Y3 3842 VPADDD 192(SP), Y3, Y3 3843 VPXOR Y14, Y3, Y14 3844 VPSHUFB shuffle_rot8<>+0(SB), Y14, Y14 3845 VPADDD Y9, Y14, Y9 3846 VPXOR Y4, Y9, Y4 3847 VPSRLD $0x07, Y4, Y8 3848 VPSLLD $0x19, Y4, Y4 3849 VPOR Y4, Y8, Y4 3850 3851 // Round 5 3852 VPADDD Y0, Y4, Y0 3853 VPADDD 384(SP), Y0, Y0 3854 VPXOR Y12, Y0, Y12 3855 VPSHUFB shuffle_rot16<>+0(SB), Y12, Y12 3856 VMOVDQU 640(SP), Y8 3857 VPADDD Y8, Y12, Y8 3858 VPXOR Y4, Y8, Y4 3859 VMOVDQU Y8, 640(SP) 3860 VPSRLD $0x0c, Y4, Y8 3861 VPSLLD $0x14, Y4, Y4 3862 VPOR Y4, Y8, Y4 3863 VPADDD Y0, Y4, Y0 3864 VPADDD 416(SP), Y0, Y0 3865 VPXOR Y12, Y0, Y12 3866 VPSHUFB shuffle_rot8<>+0(SB), Y12, Y12 3867 VMOVDQU 640(SP), Y8 3868 VPADDD Y8, Y12, Y8 3869 VPXOR Y4, Y8, Y4 3870 VMOVDQU Y8, 640(SP) 3871 VPSRLD $0x07, Y4, Y8 3872 VPSLLD $0x19, Y4, Y4 3873 VPOR Y4, Y8, Y4 3874 VPADDD Y1, Y5, Y1 3875 VPADDD 288(SP), Y1, Y1 3876 VPXOR Y13, Y1, Y13 3877 VPSHUFB shuffle_rot16<>+0(SB), Y13, Y13 3878 VPADDD Y9, Y13, Y9 3879 VPXOR Y5, Y9, Y5 3880 VPSRLD $0x0c, Y5, Y8 3881 VPSLLD $0x14, Y5, Y5 3882 VPOR Y5, Y8, Y5 3883 VPADDD Y1, Y5, Y1 3884 VPADDD 352(SP), Y1, Y1 3885 VPXOR Y13, Y1, Y13 3886 VPSHUFB shuffle_rot8<>+0(SB), Y13, Y13 3887 VPADDD Y9, Y13, Y9 3888 VPXOR Y5, Y9, Y5 3889 VPSRLD $0x07, Y5, Y8 3890 VPSLLD $0x19, Y5, Y5 3891 VPOR Y5, Y8, Y5 3892 VPADDD Y2, Y6, Y2 3893 VPADDD 480(SP), Y2, Y2 3894 VPXOR Y14, Y2, Y14 3895 VPSHUFB shuffle_rot16<>+0(SB), Y14, Y14 3896 VPADDD Y10, Y14, Y10 3897 VPXOR Y6, Y10, Y6 3898 VPSRLD $0x0c, Y6, Y8 3899 VPSLLD $0x14, Y6, Y6 3900 VPOR Y6, Y8, Y6 3901 VPADDD Y2, Y6, Y2 3902 VPADDD 320(SP), Y2, Y2 3903 VPXOR Y14, Y2, Y14 3904 VPSHUFB shuffle_rot8<>+0(SB), Y14, Y14 3905 VPADDD Y10, Y14, Y10 3906 VPXOR Y6, Y10, Y6 3907 VPSRLD $0x07, Y6, Y8 3908 VPSLLD $0x19, Y6, Y6 3909 VPOR Y6, Y8, Y6 3910 VPADDD Y3, Y7, Y3 3911 VPADDD 448(SP), Y3, Y3 3912 VPXOR Y15, Y3, Y15 3913 VPSHUFB shuffle_rot16<>+0(SB), Y15, Y15 3914 VPADDD Y11, Y15, Y11 3915 VPXOR Y7, Y11, Y7 3916 VPSRLD $0x0c, Y7, Y8 3917 VPSLLD $0x14, Y7, Y7 3918 VPOR Y7, Y8, Y7 3919 VPADDD Y3, Y7, Y3 3920 VPADDD 256(SP), Y3, Y3 3921 VPXOR Y15, Y3, Y15 3922 VPSHUFB shuffle_rot8<>+0(SB), Y15, Y15 3923 VPADDD Y11, Y15, Y11 3924 VPXOR Y7, Y11, Y7 3925 VPSRLD $0x07, Y7, Y8 3926 VPSLLD $0x19, Y7, Y7 3927 VPOR Y7, Y8, Y7 3928 VPADDD Y0, Y5, Y0 3929 VPADDD 224(SP), Y0, Y0 3930 VPXOR Y15, Y0, Y15 3931 VPSHUFB shuffle_rot16<>+0(SB), Y15, Y15 3932 VPADDD Y10, Y15, Y10 3933 VPXOR Y5, Y10, Y5 3934 VPSRLD $0x0c, Y5, Y8 3935 VPSLLD $0x14, Y5, Y5 3936 VPOR Y5, Y8, Y5 3937 VPADDD Y0, Y5, Y0 3938 VPADDD 64(SP), Y0, Y0 3939 VPXOR Y15, Y0, Y15 3940 VPSHUFB shuffle_rot8<>+0(SB), Y15, Y15 3941 VPADDD Y10, Y15, Y10 3942 VPXOR Y5, Y10, Y5 3943 VPSRLD $0x07, Y5, Y8 3944 VPSLLD $0x19, Y5, Y5 3945 VPOR Y5, Y8, Y5 3946 VPADDD Y1, Y6, Y1 3947 VPADDD 160(SP), Y1, Y1 3948 VPXOR Y12, Y1, Y12 3949 VPSHUFB shuffle_rot16<>+0(SB), Y12, Y12 3950 VPADDD Y11, Y12, Y11 3951 VPXOR Y6, Y11, Y6 3952 VPSRLD $0x0c, Y6, Y8 3953 VPSLLD $0x14, Y6, Y6 3954 VPOR Y6, Y8, Y6 3955 VPADDD Y1, Y6, Y1 3956 VPADDD 96(SP), Y1, Y1 3957 VPXOR Y12, Y1, Y12 3958 VPSHUFB shuffle_rot8<>+0(SB), Y12, Y12 3959 VPADDD Y11, Y12, Y11 3960 VPXOR Y6, Y11, Y6 3961 VPSRLD $0x07, Y6, Y8 3962 VPSLLD $0x19, Y6, Y6 3963 VPOR Y6, Y8, Y6 3964 VPADDD Y2, Y7, Y2 3965 VPADDD (SP), Y2, Y2 3966 VPXOR Y13, Y2, Y13 3967 VPSHUFB shuffle_rot16<>+0(SB), Y13, Y13 3968 VMOVDQU 640(SP), Y8 3969 VPADDD Y8, Y13, Y8 3970 VPXOR Y7, Y8, Y7 3971 VMOVDQU Y8, 640(SP) 3972 VPSRLD $0x0c, Y7, Y8 3973 VPSLLD $0x14, Y7, Y7 3974 VPOR Y7, Y8, Y7 3975 VPADDD Y2, Y7, Y2 3976 VPADDD 32(SP), Y2, Y2 3977 VPXOR Y13, Y2, Y13 3978 VPSHUFB shuffle_rot8<>+0(SB), Y13, Y13 3979 VMOVDQU 640(SP), Y8 3980 VPADDD Y8, Y13, Y8 3981 VPXOR Y7, Y8, Y7 3982 VMOVDQU Y8, 640(SP) 3983 VPSRLD $0x07, Y7, Y8 3984 VPSLLD $0x19, Y7, Y7 3985 VPOR Y7, Y8, Y7 3986 VPADDD Y3, Y4, Y3 3987 VPADDD 192(SP), Y3, Y3 3988 VPXOR Y14, Y3, Y14 3989 VPSHUFB shuffle_rot16<>+0(SB), Y14, Y14 3990 VPADDD Y9, Y14, Y9 3991 VPXOR Y4, Y9, Y4 3992 VPSRLD $0x0c, Y4, Y8 3993 VPSLLD $0x14, Y4, Y4 3994 VPOR Y4, Y8, Y4 3995 VPADDD Y3, Y4, Y3 3996 VPADDD 128(SP), Y3, Y3 3997 VPXOR Y14, Y3, Y14 3998 VPSHUFB shuffle_rot8<>+0(SB), Y14, Y14 3999 VPADDD Y9, Y14, Y9 4000 VPXOR Y4, Y9, Y4 4001 VPSRLD $0x07, Y4, Y8 4002 VPSLLD $0x19, Y4, Y4 4003 VPOR Y4, Y8, Y4 4004 4005 // Round 6 4006 VPADDD Y0, Y4, Y0 4007 VPADDD 288(SP), Y0, Y0 4008 VPXOR Y12, Y0, Y12 4009 VPSHUFB shuffle_rot16<>+0(SB), Y12, Y12 4010 VMOVDQU 640(SP), Y8 4011 VPADDD Y8, Y12, Y8 4012 VPXOR Y4, Y8, Y4 4013 VMOVDQU Y8, 640(SP) 4014 VPSRLD $0x0c, Y4, Y8 4015 VPSLLD $0x14, Y4, Y4 4016 VPOR Y4, Y8, Y4 4017 VPADDD Y0, Y4, Y0 4018 VPADDD 448(SP), Y0, Y0 4019 VPXOR Y12, Y0, Y12 4020 VPSHUFB shuffle_rot8<>+0(SB), Y12, Y12 4021 VMOVDQU 640(SP), Y8 4022 VPADDD Y8, Y12, Y8 4023 VPXOR Y4, Y8, Y4 4024 VMOVDQU Y8, 640(SP) 4025 VPSRLD $0x07, Y4, Y8 4026 VPSLLD $0x19, Y4, Y4 4027 VPOR Y4, Y8, Y4 4028 VPADDD Y1, Y5, Y1 4029 VPADDD 352(SP), Y1, Y1 4030 VPXOR Y13, Y1, Y13 4031 VPSHUFB shuffle_rot16<>+0(SB), Y13, Y13 4032 VPADDD Y9, Y13, Y9 4033 VPXOR Y5, Y9, Y5 4034 VPSRLD $0x0c, Y5, Y8 4035 VPSLLD $0x14, Y5, Y5 4036 VPOR Y5, Y8, Y5 4037 VPADDD Y1, Y5, Y1 4038 VPADDD 160(SP), Y1, Y1 4039 VPXOR Y13, Y1, Y13 4040 VPSHUFB shuffle_rot8<>+0(SB), Y13, Y13 4041 VPADDD Y9, Y13, Y9 4042 VPXOR Y5, Y9, Y5 4043 VPSRLD $0x07, Y5, Y8 4044 VPSLLD $0x19, Y5, Y5 4045 VPOR Y5, Y8, Y5 4046 VPADDD Y2, Y6, Y2 4047 VPADDD 256(SP), Y2, Y2 4048 VPXOR Y14, Y2, Y14 4049 VPSHUFB shuffle_rot16<>+0(SB), Y14, Y14 4050 VPADDD Y10, Y14, Y10 4051 VPXOR Y6, Y10, Y6 4052 VPSRLD $0x0c, Y6, Y8 4053 VPSLLD $0x14, Y6, Y6 4054 VPOR Y6, Y8, Y6 4055 VPADDD Y2, Y6, Y2 4056 VPADDD 384(SP), Y2, Y2 4057 VPXOR Y14, Y2, Y14 4058 VPSHUFB shuffle_rot8<>+0(SB), Y14, Y14 4059 VPADDD Y10, Y14, Y10 4060 VPXOR Y6, Y10, Y6 4061 VPSRLD $0x07, Y6, Y8 4062 VPSLLD $0x19, Y6, Y6 4063 VPOR Y6, Y8, Y6 4064 VPADDD Y3, Y7, Y3 4065 VPADDD 480(SP), Y3, Y3 4066 VPXOR Y15, Y3, Y15 4067 VPSHUFB shuffle_rot16<>+0(SB), Y15, Y15 4068 VPADDD Y11, Y15, Y11 4069 VPXOR Y7, Y11, Y7 4070 VPSRLD $0x0c, Y7, Y8 4071 VPSLLD $0x14, Y7, Y7 4072 VPOR Y7, Y8, Y7 4073 VPADDD Y3, Y7, Y3 4074 VPADDD 32(SP), Y3, Y3 4075 VPXOR Y15, Y3, Y15 4076 VPSHUFB shuffle_rot8<>+0(SB), Y15, Y15 4077 VPADDD Y11, Y15, Y11 4078 VPXOR Y7, Y11, Y7 4079 VPSRLD $0x07, Y7, Y8 4080 VPSLLD $0x19, Y7, Y7 4081 VPOR Y7, Y8, Y7 4082 VPADDD Y0, Y5, Y0 4083 VPADDD 416(SP), Y0, Y0 4084 VPXOR Y15, Y0, Y15 4085 VPSHUFB shuffle_rot16<>+0(SB), Y15, Y15 4086 VPADDD Y10, Y15, Y10 4087 VPXOR Y5, Y10, Y5 4088 VPSRLD $0x0c, Y5, Y8 4089 VPSLLD $0x14, Y5, Y5 4090 VPOR Y5, Y8, Y5 4091 VPADDD Y0, Y5, Y0 4092 VPADDD 96(SP), Y0, Y0 4093 VPXOR Y15, Y0, Y15 4094 VPSHUFB shuffle_rot8<>+0(SB), Y15, Y15 4095 VPADDD Y10, Y15, Y10 4096 VPXOR Y5, Y10, Y5 4097 VPSRLD $0x07, Y5, Y8 4098 VPSLLD $0x19, Y5, Y5 4099 VPOR Y5, Y8, Y5 4100 VPADDD Y1, Y6, Y1 4101 VPADDD (SP), Y1, Y1 4102 VPXOR Y12, Y1, Y12 4103 VPSHUFB shuffle_rot16<>+0(SB), Y12, Y12 4104 VPADDD Y11, Y12, Y11 4105 VPXOR Y6, Y11, Y6 4106 VPSRLD $0x0c, Y6, Y8 4107 VPSLLD $0x14, Y6, Y6 4108 VPOR Y6, Y8, Y6 4109 VPADDD Y1, Y6, Y1 4110 VPADDD 320(SP), Y1, Y1 4111 VPXOR Y12, Y1, Y12 4112 VPSHUFB shuffle_rot8<>+0(SB), Y12, Y12 4113 VPADDD Y11, Y12, Y11 4114 VPXOR Y6, Y11, Y6 4115 VPSRLD $0x07, Y6, Y8 4116 VPSLLD $0x19, Y6, Y6 4117 VPOR Y6, Y8, Y6 4118 VPADDD Y2, Y7, Y2 4119 VPADDD 64(SP), Y2, Y2 4120 VPXOR Y13, Y2, Y13 4121 VPSHUFB shuffle_rot16<>+0(SB), Y13, Y13 4122 VMOVDQU 640(SP), Y8 4123 VPADDD Y8, Y13, Y8 4124 VPXOR Y7, Y8, Y7 4125 VMOVDQU Y8, 640(SP) 4126 VPSRLD $0x0c, Y7, Y8 4127 VPSLLD $0x14, Y7, Y7 4128 VPOR Y7, Y8, Y7 4129 VPADDD Y2, Y7, Y2 4130 VPADDD 192(SP), Y2, Y2 4131 VPXOR Y13, Y2, Y13 4132 VPSHUFB shuffle_rot8<>+0(SB), Y13, Y13 4133 VMOVDQU 640(SP), Y8 4134 VPADDD Y8, Y13, Y8 4135 VPXOR Y7, Y8, Y7 4136 VMOVDQU Y8, 640(SP) 4137 VPSRLD $0x07, Y7, Y8 4138 VPSLLD $0x19, Y7, Y7 4139 VPOR Y7, Y8, Y7 4140 VPADDD Y3, Y4, Y3 4141 VPADDD 128(SP), Y3, Y3 4142 VPXOR Y14, Y3, Y14 4143 VPSHUFB shuffle_rot16<>+0(SB), Y14, Y14 4144 VPADDD Y9, Y14, Y9 4145 VPXOR Y4, Y9, Y4 4146 VPSRLD $0x0c, Y4, Y8 4147 VPSLLD $0x14, Y4, Y4 4148 VPOR Y4, Y8, Y4 4149 VPADDD Y3, Y4, Y3 4150 VPADDD 224(SP), Y3, Y3 4151 VPXOR Y14, Y3, Y14 4152 VPSHUFB shuffle_rot8<>+0(SB), Y14, Y14 4153 VPADDD Y9, Y14, Y9 4154 VPXOR Y4, Y9, Y4 4155 VPSRLD $0x07, Y4, Y8 4156 VPSLLD $0x19, Y4, Y4 4157 VPOR Y4, Y8, Y4 4158 4159 // Round 7 4160 VPADDD Y0, Y4, Y0 4161 VPADDD 352(SP), Y0, Y0 4162 VPXOR Y12, Y0, Y12 4163 VPSHUFB shuffle_rot16<>+0(SB), Y12, Y12 4164 VMOVDQU 640(SP), Y8 4165 VPADDD Y8, Y12, Y8 4166 VPXOR Y4, Y8, Y4 4167 VMOVDQU Y8, 640(SP) 4168 VPSRLD $0x0c, Y4, Y8 4169 VPSLLD $0x14, Y4, Y4 4170 VPOR Y4, Y8, Y4 4171 VPADDD Y0, Y4, Y0 4172 VPADDD 480(SP), Y0, Y0 4173 VPXOR Y12, Y0, Y12 4174 VPSHUFB shuffle_rot8<>+0(SB), Y12, Y12 4175 VMOVDQU 640(SP), Y8 4176 VPADDD Y8, Y12, Y8 4177 VPXOR Y4, Y8, Y4 4178 VMOVDQU Y8, 640(SP) 4179 VPSRLD $0x07, Y4, Y8 4180 VPSLLD $0x19, Y4, Y4 4181 VPOR Y4, Y8, Y4 4182 VPADDD Y1, Y5, Y1 4183 VPADDD 160(SP), Y1, Y1 4184 VPXOR Y13, Y1, Y13 4185 VPSHUFB shuffle_rot16<>+0(SB), Y13, Y13 4186 VPADDD Y9, Y13, Y9 4187 VPXOR Y5, Y9, Y5 4188 VPSRLD $0x0c, Y5, Y8 4189 VPSLLD $0x14, Y5, Y5 4190 VPOR Y5, Y8, Y5 4191 VPADDD Y1, Y5, Y1 4192 VPADDD (SP), Y1, Y1 4193 VPXOR Y13, Y1, Y13 4194 VPSHUFB shuffle_rot8<>+0(SB), Y13, Y13 4195 VPADDD Y9, Y13, Y9 4196 VPXOR Y5, Y9, Y5 4197 VPSRLD $0x07, Y5, Y8 4198 VPSLLD $0x19, Y5, Y5 4199 VPOR Y5, Y8, Y5 4200 VPADDD Y2, Y6, Y2 4201 VPADDD 32(SP), Y2, Y2 4202 VPXOR Y14, Y2, Y14 4203 VPSHUFB shuffle_rot16<>+0(SB), Y14, Y14 4204 VPADDD Y10, Y14, Y10 4205 VPXOR Y6, Y10, Y6 4206 VPSRLD $0x0c, Y6, Y8 4207 VPSLLD $0x14, Y6, Y6 4208 VPOR Y6, Y8, Y6 4209 VPADDD Y2, Y6, Y2 4210 VPADDD 288(SP), Y2, Y2 4211 VPXOR Y14, Y2, Y14 4212 VPSHUFB shuffle_rot8<>+0(SB), Y14, Y14 4213 VPADDD Y10, Y14, Y10 4214 VPXOR Y6, Y10, Y6 4215 VPSRLD $0x07, Y6, Y8 4216 VPSLLD $0x19, Y6, Y6 4217 VPOR Y6, Y8, Y6 4218 VPADDD Y3, Y7, Y3 4219 VPADDD 256(SP), Y3, Y3 4220 VPXOR Y15, Y3, Y15 4221 VPSHUFB shuffle_rot16<>+0(SB), Y15, Y15 4222 VPADDD Y11, Y15, Y11 4223 VPXOR Y7, Y11, Y7 4224 VPSRLD $0x0c, Y7, Y8 4225 VPSLLD $0x14, Y7, Y7 4226 VPOR Y7, Y8, Y7 4227 VPADDD Y3, Y7, Y3 4228 VPADDD 192(SP), Y3, Y3 4229 VPXOR Y15, Y3, Y15 4230 VPSHUFB shuffle_rot8<>+0(SB), Y15, Y15 4231 VPADDD Y11, Y15, Y11 4232 VPXOR Y7, Y11, Y7 4233 VPSRLD $0x07, Y7, Y8 4234 VPSLLD $0x19, Y7, Y7 4235 VPOR Y7, Y8, Y7 4236 VPADDD Y0, Y5, Y0 4237 VPADDD 448(SP), Y0, Y0 4238 VPXOR Y15, Y0, Y15 4239 VPSHUFB shuffle_rot16<>+0(SB), Y15, Y15 4240 VPADDD Y10, Y15, Y10 4241 VPXOR Y5, Y10, Y5 4242 VPSRLD $0x0c, Y5, Y8 4243 VPSLLD $0x14, Y5, Y5 4244 VPOR Y5, Y8, Y5 4245 VPADDD Y0, Y5, Y0 4246 VPADDD 320(SP), Y0, Y0 4247 VPXOR Y15, Y0, Y15 4248 VPSHUFB shuffle_rot8<>+0(SB), Y15, Y15 4249 VPADDD Y10, Y15, Y10 4250 VPXOR Y5, Y10, Y5 4251 VPSRLD $0x07, Y5, Y8 4252 VPSLLD $0x19, Y5, Y5 4253 VPOR Y5, Y8, Y5 4254 VPADDD Y1, Y6, Y1 4255 VPADDD 64(SP), Y1, Y1 4256 VPXOR Y12, Y1, Y12 4257 VPSHUFB shuffle_rot16<>+0(SB), Y12, Y12 4258 VPADDD Y11, Y12, Y11 4259 VPXOR Y6, Y11, Y6 4260 VPSRLD $0x0c, Y6, Y8 4261 VPSLLD $0x14, Y6, Y6 4262 VPOR Y6, Y8, Y6 4263 VPADDD Y1, Y6, Y1 4264 VPADDD 384(SP), Y1, Y1 4265 VPXOR Y12, Y1, Y12 4266 VPSHUFB shuffle_rot8<>+0(SB), Y12, Y12 4267 VPADDD Y11, Y12, Y11 4268 VPXOR Y6, Y11, Y6 4269 VPSRLD $0x07, Y6, Y8 4270 VPSLLD $0x19, Y6, Y6 4271 VPOR Y6, Y8, Y6 4272 VPADDD Y2, Y7, Y2 4273 VPADDD 96(SP), Y2, Y2 4274 VPXOR Y13, Y2, Y13 4275 VPSHUFB shuffle_rot16<>+0(SB), Y13, Y13 4276 VMOVDQU 640(SP), Y8 4277 VPADDD Y8, Y13, Y8 4278 VPXOR Y7, Y8, Y7 4279 VMOVDQU Y8, 640(SP) 4280 VPSRLD $0x0c, Y7, Y8 4281 VPSLLD $0x14, Y7, Y7 4282 VPOR Y7, Y8, Y7 4283 VPADDD Y2, Y7, Y2 4284 VPADDD 128(SP), Y2, Y2 4285 VPXOR Y13, Y2, Y13 4286 VPSHUFB shuffle_rot8<>+0(SB), Y13, Y13 4287 VMOVDQU 640(SP), Y8 4288 VPADDD Y8, Y13, Y8 4289 VPXOR Y7, Y8, Y7 4290 VMOVDQU Y8, 640(SP) 4291 VPSRLD $0x07, Y7, Y8 4292 VPSLLD $0x19, Y7, Y7 4293 VPOR Y7, Y8, Y7 4294 VPADDD Y3, Y4, Y3 4295 VPADDD 224(SP), Y3, Y3 4296 VPXOR Y14, Y3, Y14 4297 VPSHUFB shuffle_rot16<>+0(SB), Y14, Y14 4298 VPADDD Y9, Y14, Y9 4299 VPXOR Y4, Y9, Y4 4300 VPSRLD $0x0c, Y4, Y8 4301 VPSLLD $0x14, Y4, Y4 4302 VPOR Y4, Y8, Y4 4303 VPADDD Y3, Y4, Y3 4304 VPADDD 416(SP), Y3, Y3 4305 VPXOR Y14, Y3, Y14 4306 VPSHUFB shuffle_rot8<>+0(SB), Y14, Y14 4307 VPADDD Y9, Y14, Y9 4308 VPXOR Y4, Y9, Y4 4309 VPSRLD $0x07, Y4, Y8 4310 VPSLLD $0x19, Y4, Y4 4311 VPOR Y4, Y8, Y4 4312 VMOVDQU 640(SP), Y8 4313 4314 // Finalize CVs 4315 VPXOR Y0, Y8, Y0 4316 VPXOR Y1, Y9, Y1 4317 VPXOR Y2, Y10, Y2 4318 VPXOR Y3, Y11, Y3 4319 VPXOR Y4, Y12, Y4 4320 VPXOR Y5, Y13, Y5 4321 VPXOR Y6, Y14, Y6 4322 VPXOR Y7, Y15, Y7 4323 4324 // Loop 4325 INCQ DX 4326 CMPQ DX, $0x00000010 4327 JNE loop 4328 4329 // Finished; transpose CVs 4330 VPUNPCKLDQ Y1, Y0, Y8 4331 VPUNPCKHDQ Y1, Y0, Y9 4332 VPUNPCKLDQ Y3, Y2, Y10 4333 VPUNPCKHDQ Y3, Y2, Y11 4334 VPUNPCKLDQ Y5, Y4, Y12 4335 VPUNPCKHDQ Y5, Y4, Y13 4336 VPUNPCKLDQ Y7, Y6, Y14 4337 VPUNPCKHDQ Y7, Y6, Y15 4338 VPUNPCKLQDQ Y10, Y8, Y0 4339 VPUNPCKHQDQ Y10, Y8, Y1 4340 VPUNPCKLQDQ Y11, Y9, Y2 4341 VPUNPCKHQDQ Y11, Y9, Y3 4342 VPUNPCKLQDQ Y14, Y12, Y4 4343 VPUNPCKHQDQ Y14, Y12, Y5 4344 VPUNPCKLQDQ Y15, Y13, Y6 4345 VPUNPCKHQDQ Y15, Y13, Y7 4346 VPERM2I128 $0x20, Y4, Y0, Y8 4347 VPERM2I128 $0x31, Y4, Y0, Y12 4348 VPERM2I128 $0x20, Y5, Y1, Y9 4349 VPERM2I128 $0x31, Y5, Y1, Y13 4350 VPERM2I128 $0x20, Y6, Y2, Y10 4351 VPERM2I128 $0x31, Y6, Y2, Y14 4352 VPERM2I128 $0x20, Y7, Y3, Y11 4353 VPERM2I128 $0x31, Y7, Y3, Y15 4354 VMOVDQU Y8, (AX) 4355 VMOVDQU Y9, 32(AX) 4356 VMOVDQU Y10, 64(AX) 4357 VMOVDQU Y11, 96(AX) 4358 VMOVDQU Y12, 128(AX) 4359 VMOVDQU Y13, 160(AX) 4360 VMOVDQU Y14, 192(AX) 4361 VMOVDQU Y15, 224(AX) 4362 RET 4363 4364 // func compressParentsAVX2(parents *[8][8]uint32, cvs *[16][8]uint32, key *[8]uint32, flags uint32) 4365 // Requires: AVX, AVX2 4366 TEXT ·compressParentsAVX2(SB), NOSPLIT, $544-28 4367 MOVQ parents+0(FP), AX 4368 MOVQ cvs+8(FP), CX 4369 MOVQ key+16(FP), DX 4370 4371 // Load transposed block 4372 VMOVDQU seq<>+0(SB), Y9 4373 VPSLLD $0x06, Y9, Y9 4374 VPCMPEQD Y8, Y8, Y8 4375 VPGATHERDD Y8, (CX)(Y9*1), Y10 4376 VMOVDQU Y10, (SP) 4377 VPCMPEQD Y8, Y8, Y8 4378 VPGATHERDD Y8, 4(CX)(Y9*1), Y10 4379 VMOVDQU Y10, 32(SP) 4380 VPCMPEQD Y8, Y8, Y8 4381 VPGATHERDD Y8, 8(CX)(Y9*1), Y10 4382 VMOVDQU Y10, 64(SP) 4383 VPCMPEQD Y8, Y8, Y8 4384 VPGATHERDD Y8, 12(CX)(Y9*1), Y10 4385 VMOVDQU Y10, 96(SP) 4386 VPCMPEQD Y8, Y8, Y8 4387 VPGATHERDD Y8, 16(CX)(Y9*1), Y10 4388 VMOVDQU Y10, 128(SP) 4389 VPCMPEQD Y8, Y8, Y8 4390 VPGATHERDD Y8, 20(CX)(Y9*1), Y10 4391 VMOVDQU Y10, 160(SP) 4392 VPCMPEQD Y8, Y8, Y8 4393 VPGATHERDD Y8, 24(CX)(Y9*1), Y10 4394 VMOVDQU Y10, 192(SP) 4395 VPCMPEQD Y8, Y8, Y8 4396 VPGATHERDD Y8, 28(CX)(Y9*1), Y10 4397 VMOVDQU Y10, 224(SP) 4398 VPCMPEQD Y8, Y8, Y8 4399 VPGATHERDD Y8, 32(CX)(Y9*1), Y10 4400 VMOVDQU Y10, 256(SP) 4401 VPCMPEQD Y8, Y8, Y8 4402 VPGATHERDD Y8, 36(CX)(Y9*1), Y10 4403 VMOVDQU Y10, 288(SP) 4404 VPCMPEQD Y8, Y8, Y8 4405 VPGATHERDD Y8, 40(CX)(Y9*1), Y10 4406 VMOVDQU Y10, 320(SP) 4407 VPCMPEQD Y8, Y8, Y8 4408 VPGATHERDD Y8, 44(CX)(Y9*1), Y10 4409 VMOVDQU Y10, 352(SP) 4410 VPCMPEQD Y8, Y8, Y8 4411 VPGATHERDD Y8, 48(CX)(Y9*1), Y10 4412 VMOVDQU Y10, 384(SP) 4413 VPCMPEQD Y8, Y8, Y8 4414 VPGATHERDD Y8, 52(CX)(Y9*1), Y10 4415 VMOVDQU Y10, 416(SP) 4416 VPCMPEQD Y8, Y8, Y8 4417 VPGATHERDD Y8, 56(CX)(Y9*1), Y10 4418 VMOVDQU Y10, 448(SP) 4419 VPCMPEQD Y8, Y8, Y8 4420 VPGATHERDD Y8, 60(CX)(Y9*1), Y10 4421 VMOVDQU Y10, 480(SP) 4422 4423 // Initialize state vectors 4424 VPBROADCASTD (DX), Y0 4425 VPBROADCASTD 4(DX), Y1 4426 VPBROADCASTD 8(DX), Y2 4427 VPBROADCASTD 12(DX), Y3 4428 VPBROADCASTD 16(DX), Y4 4429 VPBROADCASTD 20(DX), Y5 4430 VPBROADCASTD 24(DX), Y6 4431 VPBROADCASTD 28(DX), Y7 4432 VPBROADCASTD iv<>+0(SB), Y8 4433 VPBROADCASTD iv<>+4(SB), Y9 4434 VPBROADCASTD iv<>+8(SB), Y10 4435 VPBROADCASTD iv<>+12(SB), Y11 4436 VPXOR Y12, Y12, Y12 4437 VPXOR Y13, Y13, Y13 4438 VPBROADCASTD seq<>+4(SB), Y14 4439 VPSLLD $0x06, Y14, Y14 4440 ORL $0x04, flags+24(FP) 4441 VPBROADCASTD flags+24(FP), Y15 4442 VMOVDQU Y8, 512(SP) 4443 4444 // Round 1 4445 VPADDD Y0, Y4, Y0 4446 VPADDD (SP), Y0, Y0 4447 VPXOR Y12, Y0, Y12 4448 VPSHUFB shuffle_rot16<>+0(SB), Y12, Y12 4449 VMOVDQU 512(SP), Y8 4450 VPADDD Y8, Y12, Y8 4451 VPXOR Y4, Y8, Y4 4452 VMOVDQU Y8, 512(SP) 4453 VPSRLD $0x0c, Y4, Y8 4454 VPSLLD $0x14, Y4, Y4 4455 VPOR Y4, Y8, Y4 4456 VPADDD Y0, Y4, Y0 4457 VPADDD 32(SP), Y0, Y0 4458 VPXOR Y12, Y0, Y12 4459 VPSHUFB shuffle_rot8<>+0(SB), Y12, Y12 4460 VMOVDQU 512(SP), Y8 4461 VPADDD Y8, Y12, Y8 4462 VPXOR Y4, Y8, Y4 4463 VMOVDQU Y8, 512(SP) 4464 VPSRLD $0x07, Y4, Y8 4465 VPSLLD $0x19, Y4, Y4 4466 VPOR Y4, Y8, Y4 4467 VPADDD Y1, Y5, Y1 4468 VPADDD 64(SP), Y1, Y1 4469 VPXOR Y13, Y1, Y13 4470 VPSHUFB shuffle_rot16<>+0(SB), Y13, Y13 4471 VPADDD Y9, Y13, Y9 4472 VPXOR Y5, Y9, Y5 4473 VPSRLD $0x0c, Y5, Y8 4474 VPSLLD $0x14, Y5, Y5 4475 VPOR Y5, Y8, Y5 4476 VPADDD Y1, Y5, Y1 4477 VPADDD 96(SP), Y1, Y1 4478 VPXOR Y13, Y1, Y13 4479 VPSHUFB shuffle_rot8<>+0(SB), Y13, Y13 4480 VPADDD Y9, Y13, Y9 4481 VPXOR Y5, Y9, Y5 4482 VPSRLD $0x07, Y5, Y8 4483 VPSLLD $0x19, Y5, Y5 4484 VPOR Y5, Y8, Y5 4485 VPADDD Y2, Y6, Y2 4486 VPADDD 128(SP), Y2, Y2 4487 VPXOR Y14, Y2, Y14 4488 VPSHUFB shuffle_rot16<>+0(SB), Y14, Y14 4489 VPADDD Y10, Y14, Y10 4490 VPXOR Y6, Y10, Y6 4491 VPSRLD $0x0c, Y6, Y8 4492 VPSLLD $0x14, Y6, Y6 4493 VPOR Y6, Y8, Y6 4494 VPADDD Y2, Y6, Y2 4495 VPADDD 160(SP), Y2, Y2 4496 VPXOR Y14, Y2, Y14 4497 VPSHUFB shuffle_rot8<>+0(SB), Y14, Y14 4498 VPADDD Y10, Y14, Y10 4499 VPXOR Y6, Y10, Y6 4500 VPSRLD $0x07, Y6, Y8 4501 VPSLLD $0x19, Y6, Y6 4502 VPOR Y6, Y8, Y6 4503 VPADDD Y3, Y7, Y3 4504 VPADDD 192(SP), Y3, Y3 4505 VPXOR Y15, Y3, Y15 4506 VPSHUFB shuffle_rot16<>+0(SB), Y15, Y15 4507 VPADDD Y11, Y15, Y11 4508 VPXOR Y7, Y11, Y7 4509 VPSRLD $0x0c, Y7, Y8 4510 VPSLLD $0x14, Y7, Y7 4511 VPOR Y7, Y8, Y7 4512 VPADDD Y3, Y7, Y3 4513 VPADDD 224(SP), Y3, Y3 4514 VPXOR Y15, Y3, Y15 4515 VPSHUFB shuffle_rot8<>+0(SB), Y15, Y15 4516 VPADDD Y11, Y15, Y11 4517 VPXOR Y7, Y11, Y7 4518 VPSRLD $0x07, Y7, Y8 4519 VPSLLD $0x19, Y7, Y7 4520 VPOR Y7, Y8, Y7 4521 VPADDD Y0, Y5, Y0 4522 VPADDD 256(SP), Y0, Y0 4523 VPXOR Y15, Y0, Y15 4524 VPSHUFB shuffle_rot16<>+0(SB), Y15, Y15 4525 VPADDD Y10, Y15, Y10 4526 VPXOR Y5, Y10, Y5 4527 VPSRLD $0x0c, Y5, Y8 4528 VPSLLD $0x14, Y5, Y5 4529 VPOR Y5, Y8, Y5 4530 VPADDD Y0, Y5, Y0 4531 VPADDD 288(SP), Y0, Y0 4532 VPXOR Y15, Y0, Y15 4533 VPSHUFB shuffle_rot8<>+0(SB), Y15, Y15 4534 VPADDD Y10, Y15, Y10 4535 VPXOR Y5, Y10, Y5 4536 VPSRLD $0x07, Y5, Y8 4537 VPSLLD $0x19, Y5, Y5 4538 VPOR Y5, Y8, Y5 4539 VPADDD Y1, Y6, Y1 4540 VPADDD 320(SP), Y1, Y1 4541 VPXOR Y12, Y1, Y12 4542 VPSHUFB shuffle_rot16<>+0(SB), Y12, Y12 4543 VPADDD Y11, Y12, Y11 4544 VPXOR Y6, Y11, Y6 4545 VPSRLD $0x0c, Y6, Y8 4546 VPSLLD $0x14, Y6, Y6 4547 VPOR Y6, Y8, Y6 4548 VPADDD Y1, Y6, Y1 4549 VPADDD 352(SP), Y1, Y1 4550 VPXOR Y12, Y1, Y12 4551 VPSHUFB shuffle_rot8<>+0(SB), Y12, Y12 4552 VPADDD Y11, Y12, Y11 4553 VPXOR Y6, Y11, Y6 4554 VPSRLD $0x07, Y6, Y8 4555 VPSLLD $0x19, Y6, Y6 4556 VPOR Y6, Y8, Y6 4557 VPADDD Y2, Y7, Y2 4558 VPADDD 384(SP), Y2, Y2 4559 VPXOR Y13, Y2, Y13 4560 VPSHUFB shuffle_rot16<>+0(SB), Y13, Y13 4561 VMOVDQU 512(SP), Y8 4562 VPADDD Y8, Y13, Y8 4563 VPXOR Y7, Y8, Y7 4564 VMOVDQU Y8, 512(SP) 4565 VPSRLD $0x0c, Y7, Y8 4566 VPSLLD $0x14, Y7, Y7 4567 VPOR Y7, Y8, Y7 4568 VPADDD Y2, Y7, Y2 4569 VPADDD 416(SP), Y2, Y2 4570 VPXOR Y13, Y2, Y13 4571 VPSHUFB shuffle_rot8<>+0(SB), Y13, Y13 4572 VMOVDQU 512(SP), Y8 4573 VPADDD Y8, Y13, Y8 4574 VPXOR Y7, Y8, Y7 4575 VMOVDQU Y8, 512(SP) 4576 VPSRLD $0x07, Y7, Y8 4577 VPSLLD $0x19, Y7, Y7 4578 VPOR Y7, Y8, Y7 4579 VPADDD Y3, Y4, Y3 4580 VPADDD 448(SP), Y3, Y3 4581 VPXOR Y14, Y3, Y14 4582 VPSHUFB shuffle_rot16<>+0(SB), Y14, Y14 4583 VPADDD Y9, Y14, Y9 4584 VPXOR Y4, Y9, Y4 4585 VPSRLD $0x0c, Y4, Y8 4586 VPSLLD $0x14, Y4, Y4 4587 VPOR Y4, Y8, Y4 4588 VPADDD Y3, Y4, Y3 4589 VPADDD 480(SP), Y3, Y3 4590 VPXOR Y14, Y3, Y14 4591 VPSHUFB shuffle_rot8<>+0(SB), Y14, Y14 4592 VPADDD Y9, Y14, Y9 4593 VPXOR Y4, Y9, Y4 4594 VPSRLD $0x07, Y4, Y8 4595 VPSLLD $0x19, Y4, Y4 4596 VPOR Y4, Y8, Y4 4597 4598 // Round 2 4599 VPADDD Y0, Y4, Y0 4600 VPADDD 64(SP), Y0, Y0 4601 VPXOR Y12, Y0, Y12 4602 VPSHUFB shuffle_rot16<>+0(SB), Y12, Y12 4603 VMOVDQU 512(SP), Y8 4604 VPADDD Y8, Y12, Y8 4605 VPXOR Y4, Y8, Y4 4606 VMOVDQU Y8, 512(SP) 4607 VPSRLD $0x0c, Y4, Y8 4608 VPSLLD $0x14, Y4, Y4 4609 VPOR Y4, Y8, Y4 4610 VPADDD Y0, Y4, Y0 4611 VPADDD 192(SP), Y0, Y0 4612 VPXOR Y12, Y0, Y12 4613 VPSHUFB shuffle_rot8<>+0(SB), Y12, Y12 4614 VMOVDQU 512(SP), Y8 4615 VPADDD Y8, Y12, Y8 4616 VPXOR Y4, Y8, Y4 4617 VMOVDQU Y8, 512(SP) 4618 VPSRLD $0x07, Y4, Y8 4619 VPSLLD $0x19, Y4, Y4 4620 VPOR Y4, Y8, Y4 4621 VPADDD Y1, Y5, Y1 4622 VPADDD 96(SP), Y1, Y1 4623 VPXOR Y13, Y1, Y13 4624 VPSHUFB shuffle_rot16<>+0(SB), Y13, Y13 4625 VPADDD Y9, Y13, Y9 4626 VPXOR Y5, Y9, Y5 4627 VPSRLD $0x0c, Y5, Y8 4628 VPSLLD $0x14, Y5, Y5 4629 VPOR Y5, Y8, Y5 4630 VPADDD Y1, Y5, Y1 4631 VPADDD 320(SP), Y1, Y1 4632 VPXOR Y13, Y1, Y13 4633 VPSHUFB shuffle_rot8<>+0(SB), Y13, Y13 4634 VPADDD Y9, Y13, Y9 4635 VPXOR Y5, Y9, Y5 4636 VPSRLD $0x07, Y5, Y8 4637 VPSLLD $0x19, Y5, Y5 4638 VPOR Y5, Y8, Y5 4639 VPADDD Y2, Y6, Y2 4640 VPADDD 224(SP), Y2, Y2 4641 VPXOR Y14, Y2, Y14 4642 VPSHUFB shuffle_rot16<>+0(SB), Y14, Y14 4643 VPADDD Y10, Y14, Y10 4644 VPXOR Y6, Y10, Y6 4645 VPSRLD $0x0c, Y6, Y8 4646 VPSLLD $0x14, Y6, Y6 4647 VPOR Y6, Y8, Y6 4648 VPADDD Y2, Y6, Y2 4649 VPADDD (SP), Y2, Y2 4650 VPXOR Y14, Y2, Y14 4651 VPSHUFB shuffle_rot8<>+0(SB), Y14, Y14 4652 VPADDD Y10, Y14, Y10 4653 VPXOR Y6, Y10, Y6 4654 VPSRLD $0x07, Y6, Y8 4655 VPSLLD $0x19, Y6, Y6 4656 VPOR Y6, Y8, Y6 4657 VPADDD Y3, Y7, Y3 4658 VPADDD 128(SP), Y3, Y3 4659 VPXOR Y15, Y3, Y15 4660 VPSHUFB shuffle_rot16<>+0(SB), Y15, Y15 4661 VPADDD Y11, Y15, Y11 4662 VPXOR Y7, Y11, Y7 4663 VPSRLD $0x0c, Y7, Y8 4664 VPSLLD $0x14, Y7, Y7 4665 VPOR Y7, Y8, Y7 4666 VPADDD Y3, Y7, Y3 4667 VPADDD 416(SP), Y3, Y3 4668 VPXOR Y15, Y3, Y15 4669 VPSHUFB shuffle_rot8<>+0(SB), Y15, Y15 4670 VPADDD Y11, Y15, Y11 4671 VPXOR Y7, Y11, Y7 4672 VPSRLD $0x07, Y7, Y8 4673 VPSLLD $0x19, Y7, Y7 4674 VPOR Y7, Y8, Y7 4675 VPADDD Y0, Y5, Y0 4676 VPADDD 32(SP), Y0, Y0 4677 VPXOR Y15, Y0, Y15 4678 VPSHUFB shuffle_rot16<>+0(SB), Y15, Y15 4679 VPADDD Y10, Y15, Y10 4680 VPXOR Y5, Y10, Y5 4681 VPSRLD $0x0c, Y5, Y8 4682 VPSLLD $0x14, Y5, Y5 4683 VPOR Y5, Y8, Y5 4684 VPADDD Y0, Y5, Y0 4685 VPADDD 352(SP), Y0, Y0 4686 VPXOR Y15, Y0, Y15 4687 VPSHUFB shuffle_rot8<>+0(SB), Y15, Y15 4688 VPADDD Y10, Y15, Y10 4689 VPXOR Y5, Y10, Y5 4690 VPSRLD $0x07, Y5, Y8 4691 VPSLLD $0x19, Y5, Y5 4692 VPOR Y5, Y8, Y5 4693 VPADDD Y1, Y6, Y1 4694 VPADDD 384(SP), Y1, Y1 4695 VPXOR Y12, Y1, Y12 4696 VPSHUFB shuffle_rot16<>+0(SB), Y12, Y12 4697 VPADDD Y11, Y12, Y11 4698 VPXOR Y6, Y11, Y6 4699 VPSRLD $0x0c, Y6, Y8 4700 VPSLLD $0x14, Y6, Y6 4701 VPOR Y6, Y8, Y6 4702 VPADDD Y1, Y6, Y1 4703 VPADDD 160(SP), Y1, Y1 4704 VPXOR Y12, Y1, Y12 4705 VPSHUFB shuffle_rot8<>+0(SB), Y12, Y12 4706 VPADDD Y11, Y12, Y11 4707 VPXOR Y6, Y11, Y6 4708 VPSRLD $0x07, Y6, Y8 4709 VPSLLD $0x19, Y6, Y6 4710 VPOR Y6, Y8, Y6 4711 VPADDD Y2, Y7, Y2 4712 VPADDD 288(SP), Y2, Y2 4713 VPXOR Y13, Y2, Y13 4714 VPSHUFB shuffle_rot16<>+0(SB), Y13, Y13 4715 VMOVDQU 512(SP), Y8 4716 VPADDD Y8, Y13, Y8 4717 VPXOR Y7, Y8, Y7 4718 VMOVDQU Y8, 512(SP) 4719 VPSRLD $0x0c, Y7, Y8 4720 VPSLLD $0x14, Y7, Y7 4721 VPOR Y7, Y8, Y7 4722 VPADDD Y2, Y7, Y2 4723 VPADDD 448(SP), Y2, Y2 4724 VPXOR Y13, Y2, Y13 4725 VPSHUFB shuffle_rot8<>+0(SB), Y13, Y13 4726 VMOVDQU 512(SP), Y8 4727 VPADDD Y8, Y13, Y8 4728 VPXOR Y7, Y8, Y7 4729 VMOVDQU Y8, 512(SP) 4730 VPSRLD $0x07, Y7, Y8 4731 VPSLLD $0x19, Y7, Y7 4732 VPOR Y7, Y8, Y7 4733 VPADDD Y3, Y4, Y3 4734 VPADDD 480(SP), Y3, Y3 4735 VPXOR Y14, Y3, Y14 4736 VPSHUFB shuffle_rot16<>+0(SB), Y14, Y14 4737 VPADDD Y9, Y14, Y9 4738 VPXOR Y4, Y9, Y4 4739 VPSRLD $0x0c, Y4, Y8 4740 VPSLLD $0x14, Y4, Y4 4741 VPOR Y4, Y8, Y4 4742 VPADDD Y3, Y4, Y3 4743 VPADDD 256(SP), Y3, Y3 4744 VPXOR Y14, Y3, Y14 4745 VPSHUFB shuffle_rot8<>+0(SB), Y14, Y14 4746 VPADDD Y9, Y14, Y9 4747 VPXOR Y4, Y9, Y4 4748 VPSRLD $0x07, Y4, Y8 4749 VPSLLD $0x19, Y4, Y4 4750 VPOR Y4, Y8, Y4 4751 4752 // Round 3 4753 VPADDD Y0, Y4, Y0 4754 VPADDD 96(SP), Y0, Y0 4755 VPXOR Y12, Y0, Y12 4756 VPSHUFB shuffle_rot16<>+0(SB), Y12, Y12 4757 VMOVDQU 512(SP), Y8 4758 VPADDD Y8, Y12, Y8 4759 VPXOR Y4, Y8, Y4 4760 VMOVDQU Y8, 512(SP) 4761 VPSRLD $0x0c, Y4, Y8 4762 VPSLLD $0x14, Y4, Y4 4763 VPOR Y4, Y8, Y4 4764 VPADDD Y0, Y4, Y0 4765 VPADDD 128(SP), Y0, Y0 4766 VPXOR Y12, Y0, Y12 4767 VPSHUFB shuffle_rot8<>+0(SB), Y12, Y12 4768 VMOVDQU 512(SP), Y8 4769 VPADDD Y8, Y12, Y8 4770 VPXOR Y4, Y8, Y4 4771 VMOVDQU Y8, 512(SP) 4772 VPSRLD $0x07, Y4, Y8 4773 VPSLLD $0x19, Y4, Y4 4774 VPOR Y4, Y8, Y4 4775 VPADDD Y1, Y5, Y1 4776 VPADDD 320(SP), Y1, Y1 4777 VPXOR Y13, Y1, Y13 4778 VPSHUFB shuffle_rot16<>+0(SB), Y13, Y13 4779 VPADDD Y9, Y13, Y9 4780 VPXOR Y5, Y9, Y5 4781 VPSRLD $0x0c, Y5, Y8 4782 VPSLLD $0x14, Y5, Y5 4783 VPOR Y5, Y8, Y5 4784 VPADDD Y1, Y5, Y1 4785 VPADDD 384(SP), Y1, Y1 4786 VPXOR Y13, Y1, Y13 4787 VPSHUFB shuffle_rot8<>+0(SB), Y13, Y13 4788 VPADDD Y9, Y13, Y9 4789 VPXOR Y5, Y9, Y5 4790 VPSRLD $0x07, Y5, Y8 4791 VPSLLD $0x19, Y5, Y5 4792 VPOR Y5, Y8, Y5 4793 VPADDD Y2, Y6, Y2 4794 VPADDD 416(SP), Y2, Y2 4795 VPXOR Y14, Y2, Y14 4796 VPSHUFB shuffle_rot16<>+0(SB), Y14, Y14 4797 VPADDD Y10, Y14, Y10 4798 VPXOR Y6, Y10, Y6 4799 VPSRLD $0x0c, Y6, Y8 4800 VPSLLD $0x14, Y6, Y6 4801 VPOR Y6, Y8, Y6 4802 VPADDD Y2, Y6, Y2 4803 VPADDD 64(SP), Y2, Y2 4804 VPXOR Y14, Y2, Y14 4805 VPSHUFB shuffle_rot8<>+0(SB), Y14, Y14 4806 VPADDD Y10, Y14, Y10 4807 VPXOR Y6, Y10, Y6 4808 VPSRLD $0x07, Y6, Y8 4809 VPSLLD $0x19, Y6, Y6 4810 VPOR Y6, Y8, Y6 4811 VPADDD Y3, Y7, Y3 4812 VPADDD 224(SP), Y3, Y3 4813 VPXOR Y15, Y3, Y15 4814 VPSHUFB shuffle_rot16<>+0(SB), Y15, Y15 4815 VPADDD Y11, Y15, Y11 4816 VPXOR Y7, Y11, Y7 4817 VPSRLD $0x0c, Y7, Y8 4818 VPSLLD $0x14, Y7, Y7 4819 VPOR Y7, Y8, Y7 4820 VPADDD Y3, Y7, Y3 4821 VPADDD 448(SP), Y3, Y3 4822 VPXOR Y15, Y3, Y15 4823 VPSHUFB shuffle_rot8<>+0(SB), Y15, Y15 4824 VPADDD Y11, Y15, Y11 4825 VPXOR Y7, Y11, Y7 4826 VPSRLD $0x07, Y7, Y8 4827 VPSLLD $0x19, Y7, Y7 4828 VPOR Y7, Y8, Y7 4829 VPADDD Y0, Y5, Y0 4830 VPADDD 192(SP), Y0, Y0 4831 VPXOR Y15, Y0, Y15 4832 VPSHUFB shuffle_rot16<>+0(SB), Y15, Y15 4833 VPADDD Y10, Y15, Y10 4834 VPXOR Y5, Y10, Y5 4835 VPSRLD $0x0c, Y5, Y8 4836 VPSLLD $0x14, Y5, Y5 4837 VPOR Y5, Y8, Y5 4838 VPADDD Y0, Y5, Y0 4839 VPADDD 160(SP), Y0, Y0 4840 VPXOR Y15, Y0, Y15 4841 VPSHUFB shuffle_rot8<>+0(SB), Y15, Y15 4842 VPADDD Y10, Y15, Y10 4843 VPXOR Y5, Y10, Y5 4844 VPSRLD $0x07, Y5, Y8 4845 VPSLLD $0x19, Y5, Y5 4846 VPOR Y5, Y8, Y5 4847 VPADDD Y1, Y6, Y1 4848 VPADDD 288(SP), Y1, Y1 4849 VPXOR Y12, Y1, Y12 4850 VPSHUFB shuffle_rot16<>+0(SB), Y12, Y12 4851 VPADDD Y11, Y12, Y11 4852 VPXOR Y6, Y11, Y6 4853 VPSRLD $0x0c, Y6, Y8 4854 VPSLLD $0x14, Y6, Y6 4855 VPOR Y6, Y8, Y6 4856 VPADDD Y1, Y6, Y1 4857 VPADDD (SP), Y1, Y1 4858 VPXOR Y12, Y1, Y12 4859 VPSHUFB shuffle_rot8<>+0(SB), Y12, Y12 4860 VPADDD Y11, Y12, Y11 4861 VPXOR Y6, Y11, Y6 4862 VPSRLD $0x07, Y6, Y8 4863 VPSLLD $0x19, Y6, Y6 4864 VPOR Y6, Y8, Y6 4865 VPADDD Y2, Y7, Y2 4866 VPADDD 352(SP), Y2, Y2 4867 VPXOR Y13, Y2, Y13 4868 VPSHUFB shuffle_rot16<>+0(SB), Y13, Y13 4869 VMOVDQU 512(SP), Y8 4870 VPADDD Y8, Y13, Y8 4871 VPXOR Y7, Y8, Y7 4872 VMOVDQU Y8, 512(SP) 4873 VPSRLD $0x0c, Y7, Y8 4874 VPSLLD $0x14, Y7, Y7 4875 VPOR Y7, Y8, Y7 4876 VPADDD Y2, Y7, Y2 4877 VPADDD 480(SP), Y2, Y2 4878 VPXOR Y13, Y2, Y13 4879 VPSHUFB shuffle_rot8<>+0(SB), Y13, Y13 4880 VMOVDQU 512(SP), Y8 4881 VPADDD Y8, Y13, Y8 4882 VPXOR Y7, Y8, Y7 4883 VMOVDQU Y8, 512(SP) 4884 VPSRLD $0x07, Y7, Y8 4885 VPSLLD $0x19, Y7, Y7 4886 VPOR Y7, Y8, Y7 4887 VPADDD Y3, Y4, Y3 4888 VPADDD 256(SP), Y3, Y3 4889 VPXOR Y14, Y3, Y14 4890 VPSHUFB shuffle_rot16<>+0(SB), Y14, Y14 4891 VPADDD Y9, Y14, Y9 4892 VPXOR Y4, Y9, Y4 4893 VPSRLD $0x0c, Y4, Y8 4894 VPSLLD $0x14, Y4, Y4 4895 VPOR Y4, Y8, Y4 4896 VPADDD Y3, Y4, Y3 4897 VPADDD 32(SP), Y3, Y3 4898 VPXOR Y14, Y3, Y14 4899 VPSHUFB shuffle_rot8<>+0(SB), Y14, Y14 4900 VPADDD Y9, Y14, Y9 4901 VPXOR Y4, Y9, Y4 4902 VPSRLD $0x07, Y4, Y8 4903 VPSLLD $0x19, Y4, Y4 4904 VPOR Y4, Y8, Y4 4905 4906 // Round 4 4907 VPADDD Y0, Y4, Y0 4908 VPADDD 320(SP), Y0, Y0 4909 VPXOR Y12, Y0, Y12 4910 VPSHUFB shuffle_rot16<>+0(SB), Y12, Y12 4911 VMOVDQU 512(SP), Y8 4912 VPADDD Y8, Y12, Y8 4913 VPXOR Y4, Y8, Y4 4914 VMOVDQU Y8, 512(SP) 4915 VPSRLD $0x0c, Y4, Y8 4916 VPSLLD $0x14, Y4, Y4 4917 VPOR Y4, Y8, Y4 4918 VPADDD Y0, Y4, Y0 4919 VPADDD 224(SP), Y0, Y0 4920 VPXOR Y12, Y0, Y12 4921 VPSHUFB shuffle_rot8<>+0(SB), Y12, Y12 4922 VMOVDQU 512(SP), Y8 4923 VPADDD Y8, Y12, Y8 4924 VPXOR Y4, Y8, Y4 4925 VMOVDQU Y8, 512(SP) 4926 VPSRLD $0x07, Y4, Y8 4927 VPSLLD $0x19, Y4, Y4 4928 VPOR Y4, Y8, Y4 4929 VPADDD Y1, Y5, Y1 4930 VPADDD 384(SP), Y1, Y1 4931 VPXOR Y13, Y1, Y13 4932 VPSHUFB shuffle_rot16<>+0(SB), Y13, Y13 4933 VPADDD Y9, Y13, Y9 4934 VPXOR Y5, Y9, Y5 4935 VPSRLD $0x0c, Y5, Y8 4936 VPSLLD $0x14, Y5, Y5 4937 VPOR Y5, Y8, Y5 4938 VPADDD Y1, Y5, Y1 4939 VPADDD 288(SP), Y1, Y1 4940 VPXOR Y13, Y1, Y13 4941 VPSHUFB shuffle_rot8<>+0(SB), Y13, Y13 4942 VPADDD Y9, Y13, Y9 4943 VPXOR Y5, Y9, Y5 4944 VPSRLD $0x07, Y5, Y8 4945 VPSLLD $0x19, Y5, Y5 4946 VPOR Y5, Y8, Y5 4947 VPADDD Y2, Y6, Y2 4948 VPADDD 448(SP), Y2, Y2 4949 VPXOR Y14, Y2, Y14 4950 VPSHUFB shuffle_rot16<>+0(SB), Y14, Y14 4951 VPADDD Y10, Y14, Y10 4952 VPXOR Y6, Y10, Y6 4953 VPSRLD $0x0c, Y6, Y8 4954 VPSLLD $0x14, Y6, Y6 4955 VPOR Y6, Y8, Y6 4956 VPADDD Y2, Y6, Y2 4957 VPADDD 96(SP), Y2, Y2 4958 VPXOR Y14, Y2, Y14 4959 VPSHUFB shuffle_rot8<>+0(SB), Y14, Y14 4960 VPADDD Y10, Y14, Y10 4961 VPXOR Y6, Y10, Y6 4962 VPSRLD $0x07, Y6, Y8 4963 VPSLLD $0x19, Y6, Y6 4964 VPOR Y6, Y8, Y6 4965 VPADDD Y3, Y7, Y3 4966 VPADDD 416(SP), Y3, Y3 4967 VPXOR Y15, Y3, Y15 4968 VPSHUFB shuffle_rot16<>+0(SB), Y15, Y15 4969 VPADDD Y11, Y15, Y11 4970 VPXOR Y7, Y11, Y7 4971 VPSRLD $0x0c, Y7, Y8 4972 VPSLLD $0x14, Y7, Y7 4973 VPOR Y7, Y8, Y7 4974 VPADDD Y3, Y7, Y3 4975 VPADDD 480(SP), Y3, Y3 4976 VPXOR Y15, Y3, Y15 4977 VPSHUFB shuffle_rot8<>+0(SB), Y15, Y15 4978 VPADDD Y11, Y15, Y11 4979 VPXOR Y7, Y11, Y7 4980 VPSRLD $0x07, Y7, Y8 4981 VPSLLD $0x19, Y7, Y7 4982 VPOR Y7, Y8, Y7 4983 VPADDD Y0, Y5, Y0 4984 VPADDD 128(SP), Y0, Y0 4985 VPXOR Y15, Y0, Y15 4986 VPSHUFB shuffle_rot16<>+0(SB), Y15, Y15 4987 VPADDD Y10, Y15, Y10 4988 VPXOR Y5, Y10, Y5 4989 VPSRLD $0x0c, Y5, Y8 4990 VPSLLD $0x14, Y5, Y5 4991 VPOR Y5, Y8, Y5 4992 VPADDD Y0, Y5, Y0 4993 VPADDD (SP), Y0, Y0 4994 VPXOR Y15, Y0, Y15 4995 VPSHUFB shuffle_rot8<>+0(SB), Y15, Y15 4996 VPADDD Y10, Y15, Y10 4997 VPXOR Y5, Y10, Y5 4998 VPSRLD $0x07, Y5, Y8 4999 VPSLLD $0x19, Y5, Y5 5000 VPOR Y5, Y8, Y5 5001 VPADDD Y1, Y6, Y1 5002 VPADDD 352(SP), Y1, Y1 5003 VPXOR Y12, Y1, Y12 5004 VPSHUFB shuffle_rot16<>+0(SB), Y12, Y12 5005 VPADDD Y11, Y12, Y11 5006 VPXOR Y6, Y11, Y6 5007 VPSRLD $0x0c, Y6, Y8 5008 VPSLLD $0x14, Y6, Y6 5009 VPOR Y6, Y8, Y6 5010 VPADDD Y1, Y6, Y1 5011 VPADDD 64(SP), Y1, Y1 5012 VPXOR Y12, Y1, Y12 5013 VPSHUFB shuffle_rot8<>+0(SB), Y12, Y12 5014 VPADDD Y11, Y12, Y11 5015 VPXOR Y6, Y11, Y6 5016 VPSRLD $0x07, Y6, Y8 5017 VPSLLD $0x19, Y6, Y6 5018 VPOR Y6, Y8, Y6 5019 VPADDD Y2, Y7, Y2 5020 VPADDD 160(SP), Y2, Y2 5021 VPXOR Y13, Y2, Y13 5022 VPSHUFB shuffle_rot16<>+0(SB), Y13, Y13 5023 VMOVDQU 512(SP), Y8 5024 VPADDD Y8, Y13, Y8 5025 VPXOR Y7, Y8, Y7 5026 VMOVDQU Y8, 512(SP) 5027 VPSRLD $0x0c, Y7, Y8 5028 VPSLLD $0x14, Y7, Y7 5029 VPOR Y7, Y8, Y7 5030 VPADDD Y2, Y7, Y2 5031 VPADDD 256(SP), Y2, Y2 5032 VPXOR Y13, Y2, Y13 5033 VPSHUFB shuffle_rot8<>+0(SB), Y13, Y13 5034 VMOVDQU 512(SP), Y8 5035 VPADDD Y8, Y13, Y8 5036 VPXOR Y7, Y8, Y7 5037 VMOVDQU Y8, 512(SP) 5038 VPSRLD $0x07, Y7, Y8 5039 VPSLLD $0x19, Y7, Y7 5040 VPOR Y7, Y8, Y7 5041 VPADDD Y3, Y4, Y3 5042 VPADDD 32(SP), Y3, Y3 5043 VPXOR Y14, Y3, Y14 5044 VPSHUFB shuffle_rot16<>+0(SB), Y14, Y14 5045 VPADDD Y9, Y14, Y9 5046 VPXOR Y4, Y9, Y4 5047 VPSRLD $0x0c, Y4, Y8 5048 VPSLLD $0x14, Y4, Y4 5049 VPOR Y4, Y8, Y4 5050 VPADDD Y3, Y4, Y3 5051 VPADDD 192(SP), Y3, Y3 5052 VPXOR Y14, Y3, Y14 5053 VPSHUFB shuffle_rot8<>+0(SB), Y14, Y14 5054 VPADDD Y9, Y14, Y9 5055 VPXOR Y4, Y9, Y4 5056 VPSRLD $0x07, Y4, Y8 5057 VPSLLD $0x19, Y4, Y4 5058 VPOR Y4, Y8, Y4 5059 5060 // Round 5 5061 VPADDD Y0, Y4, Y0 5062 VPADDD 384(SP), Y0, Y0 5063 VPXOR Y12, Y0, Y12 5064 VPSHUFB shuffle_rot16<>+0(SB), Y12, Y12 5065 VMOVDQU 512(SP), Y8 5066 VPADDD Y8, Y12, Y8 5067 VPXOR Y4, Y8, Y4 5068 VMOVDQU Y8, 512(SP) 5069 VPSRLD $0x0c, Y4, Y8 5070 VPSLLD $0x14, Y4, Y4 5071 VPOR Y4, Y8, Y4 5072 VPADDD Y0, Y4, Y0 5073 VPADDD 416(SP), Y0, Y0 5074 VPXOR Y12, Y0, Y12 5075 VPSHUFB shuffle_rot8<>+0(SB), Y12, Y12 5076 VMOVDQU 512(SP), Y8 5077 VPADDD Y8, Y12, Y8 5078 VPXOR Y4, Y8, Y4 5079 VMOVDQU Y8, 512(SP) 5080 VPSRLD $0x07, Y4, Y8 5081 VPSLLD $0x19, Y4, Y4 5082 VPOR Y4, Y8, Y4 5083 VPADDD Y1, Y5, Y1 5084 VPADDD 288(SP), Y1, Y1 5085 VPXOR Y13, Y1, Y13 5086 VPSHUFB shuffle_rot16<>+0(SB), Y13, Y13 5087 VPADDD Y9, Y13, Y9 5088 VPXOR Y5, Y9, Y5 5089 VPSRLD $0x0c, Y5, Y8 5090 VPSLLD $0x14, Y5, Y5 5091 VPOR Y5, Y8, Y5 5092 VPADDD Y1, Y5, Y1 5093 VPADDD 352(SP), Y1, Y1 5094 VPXOR Y13, Y1, Y13 5095 VPSHUFB shuffle_rot8<>+0(SB), Y13, Y13 5096 VPADDD Y9, Y13, Y9 5097 VPXOR Y5, Y9, Y5 5098 VPSRLD $0x07, Y5, Y8 5099 VPSLLD $0x19, Y5, Y5 5100 VPOR Y5, Y8, Y5 5101 VPADDD Y2, Y6, Y2 5102 VPADDD 480(SP), Y2, Y2 5103 VPXOR Y14, Y2, Y14 5104 VPSHUFB shuffle_rot16<>+0(SB), Y14, Y14 5105 VPADDD Y10, Y14, Y10 5106 VPXOR Y6, Y10, Y6 5107 VPSRLD $0x0c, Y6, Y8 5108 VPSLLD $0x14, Y6, Y6 5109 VPOR Y6, Y8, Y6 5110 VPADDD Y2, Y6, Y2 5111 VPADDD 320(SP), Y2, Y2 5112 VPXOR Y14, Y2, Y14 5113 VPSHUFB shuffle_rot8<>+0(SB), Y14, Y14 5114 VPADDD Y10, Y14, Y10 5115 VPXOR Y6, Y10, Y6 5116 VPSRLD $0x07, Y6, Y8 5117 VPSLLD $0x19, Y6, Y6 5118 VPOR Y6, Y8, Y6 5119 VPADDD Y3, Y7, Y3 5120 VPADDD 448(SP), Y3, Y3 5121 VPXOR Y15, Y3, Y15 5122 VPSHUFB shuffle_rot16<>+0(SB), Y15, Y15 5123 VPADDD Y11, Y15, Y11 5124 VPXOR Y7, Y11, Y7 5125 VPSRLD $0x0c, Y7, Y8 5126 VPSLLD $0x14, Y7, Y7 5127 VPOR Y7, Y8, Y7 5128 VPADDD Y3, Y7, Y3 5129 VPADDD 256(SP), Y3, Y3 5130 VPXOR Y15, Y3, Y15 5131 VPSHUFB shuffle_rot8<>+0(SB), Y15, Y15 5132 VPADDD Y11, Y15, Y11 5133 VPXOR Y7, Y11, Y7 5134 VPSRLD $0x07, Y7, Y8 5135 VPSLLD $0x19, Y7, Y7 5136 VPOR Y7, Y8, Y7 5137 VPADDD Y0, Y5, Y0 5138 VPADDD 224(SP), Y0, Y0 5139 VPXOR Y15, Y0, Y15 5140 VPSHUFB shuffle_rot16<>+0(SB), Y15, Y15 5141 VPADDD Y10, Y15, Y10 5142 VPXOR Y5, Y10, Y5 5143 VPSRLD $0x0c, Y5, Y8 5144 VPSLLD $0x14, Y5, Y5 5145 VPOR Y5, Y8, Y5 5146 VPADDD Y0, Y5, Y0 5147 VPADDD 64(SP), Y0, Y0 5148 VPXOR Y15, Y0, Y15 5149 VPSHUFB shuffle_rot8<>+0(SB), Y15, Y15 5150 VPADDD Y10, Y15, Y10 5151 VPXOR Y5, Y10, Y5 5152 VPSRLD $0x07, Y5, Y8 5153 VPSLLD $0x19, Y5, Y5 5154 VPOR Y5, Y8, Y5 5155 VPADDD Y1, Y6, Y1 5156 VPADDD 160(SP), Y1, Y1 5157 VPXOR Y12, Y1, Y12 5158 VPSHUFB shuffle_rot16<>+0(SB), Y12, Y12 5159 VPADDD Y11, Y12, Y11 5160 VPXOR Y6, Y11, Y6 5161 VPSRLD $0x0c, Y6, Y8 5162 VPSLLD $0x14, Y6, Y6 5163 VPOR Y6, Y8, Y6 5164 VPADDD Y1, Y6, Y1 5165 VPADDD 96(SP), Y1, Y1 5166 VPXOR Y12, Y1, Y12 5167 VPSHUFB shuffle_rot8<>+0(SB), Y12, Y12 5168 VPADDD Y11, Y12, Y11 5169 VPXOR Y6, Y11, Y6 5170 VPSRLD $0x07, Y6, Y8 5171 VPSLLD $0x19, Y6, Y6 5172 VPOR Y6, Y8, Y6 5173 VPADDD Y2, Y7, Y2 5174 VPADDD (SP), Y2, Y2 5175 VPXOR Y13, Y2, Y13 5176 VPSHUFB shuffle_rot16<>+0(SB), Y13, Y13 5177 VMOVDQU 512(SP), Y8 5178 VPADDD Y8, Y13, Y8 5179 VPXOR Y7, Y8, Y7 5180 VMOVDQU Y8, 512(SP) 5181 VPSRLD $0x0c, Y7, Y8 5182 VPSLLD $0x14, Y7, Y7 5183 VPOR Y7, Y8, Y7 5184 VPADDD Y2, Y7, Y2 5185 VPADDD 32(SP), Y2, Y2 5186 VPXOR Y13, Y2, Y13 5187 VPSHUFB shuffle_rot8<>+0(SB), Y13, Y13 5188 VMOVDQU 512(SP), Y8 5189 VPADDD Y8, Y13, Y8 5190 VPXOR Y7, Y8, Y7 5191 VMOVDQU Y8, 512(SP) 5192 VPSRLD $0x07, Y7, Y8 5193 VPSLLD $0x19, Y7, Y7 5194 VPOR Y7, Y8, Y7 5195 VPADDD Y3, Y4, Y3 5196 VPADDD 192(SP), Y3, Y3 5197 VPXOR Y14, Y3, Y14 5198 VPSHUFB shuffle_rot16<>+0(SB), Y14, Y14 5199 VPADDD Y9, Y14, Y9 5200 VPXOR Y4, Y9, Y4 5201 VPSRLD $0x0c, Y4, Y8 5202 VPSLLD $0x14, Y4, Y4 5203 VPOR Y4, Y8, Y4 5204 VPADDD Y3, Y4, Y3 5205 VPADDD 128(SP), Y3, Y3 5206 VPXOR Y14, Y3, Y14 5207 VPSHUFB shuffle_rot8<>+0(SB), Y14, Y14 5208 VPADDD Y9, Y14, Y9 5209 VPXOR Y4, Y9, Y4 5210 VPSRLD $0x07, Y4, Y8 5211 VPSLLD $0x19, Y4, Y4 5212 VPOR Y4, Y8, Y4 5213 5214 // Round 6 5215 VPADDD Y0, Y4, Y0 5216 VPADDD 288(SP), Y0, Y0 5217 VPXOR Y12, Y0, Y12 5218 VPSHUFB shuffle_rot16<>+0(SB), Y12, Y12 5219 VMOVDQU 512(SP), Y8 5220 VPADDD Y8, Y12, Y8 5221 VPXOR Y4, Y8, Y4 5222 VMOVDQU Y8, 512(SP) 5223 VPSRLD $0x0c, Y4, Y8 5224 VPSLLD $0x14, Y4, Y4 5225 VPOR Y4, Y8, Y4 5226 VPADDD Y0, Y4, Y0 5227 VPADDD 448(SP), Y0, Y0 5228 VPXOR Y12, Y0, Y12 5229 VPSHUFB shuffle_rot8<>+0(SB), Y12, Y12 5230 VMOVDQU 512(SP), Y8 5231 VPADDD Y8, Y12, Y8 5232 VPXOR Y4, Y8, Y4 5233 VMOVDQU Y8, 512(SP) 5234 VPSRLD $0x07, Y4, Y8 5235 VPSLLD $0x19, Y4, Y4 5236 VPOR Y4, Y8, Y4 5237 VPADDD Y1, Y5, Y1 5238 VPADDD 352(SP), Y1, Y1 5239 VPXOR Y13, Y1, Y13 5240 VPSHUFB shuffle_rot16<>+0(SB), Y13, Y13 5241 VPADDD Y9, Y13, Y9 5242 VPXOR Y5, Y9, Y5 5243 VPSRLD $0x0c, Y5, Y8 5244 VPSLLD $0x14, Y5, Y5 5245 VPOR Y5, Y8, Y5 5246 VPADDD Y1, Y5, Y1 5247 VPADDD 160(SP), Y1, Y1 5248 VPXOR Y13, Y1, Y13 5249 VPSHUFB shuffle_rot8<>+0(SB), Y13, Y13 5250 VPADDD Y9, Y13, Y9 5251 VPXOR Y5, Y9, Y5 5252 VPSRLD $0x07, Y5, Y8 5253 VPSLLD $0x19, Y5, Y5 5254 VPOR Y5, Y8, Y5 5255 VPADDD Y2, Y6, Y2 5256 VPADDD 256(SP), Y2, Y2 5257 VPXOR Y14, Y2, Y14 5258 VPSHUFB shuffle_rot16<>+0(SB), Y14, Y14 5259 VPADDD Y10, Y14, Y10 5260 VPXOR Y6, Y10, Y6 5261 VPSRLD $0x0c, Y6, Y8 5262 VPSLLD $0x14, Y6, Y6 5263 VPOR Y6, Y8, Y6 5264 VPADDD Y2, Y6, Y2 5265 VPADDD 384(SP), Y2, Y2 5266 VPXOR Y14, Y2, Y14 5267 VPSHUFB shuffle_rot8<>+0(SB), Y14, Y14 5268 VPADDD Y10, Y14, Y10 5269 VPXOR Y6, Y10, Y6 5270 VPSRLD $0x07, Y6, Y8 5271 VPSLLD $0x19, Y6, Y6 5272 VPOR Y6, Y8, Y6 5273 VPADDD Y3, Y7, Y3 5274 VPADDD 480(SP), Y3, Y3 5275 VPXOR Y15, Y3, Y15 5276 VPSHUFB shuffle_rot16<>+0(SB), Y15, Y15 5277 VPADDD Y11, Y15, Y11 5278 VPXOR Y7, Y11, Y7 5279 VPSRLD $0x0c, Y7, Y8 5280 VPSLLD $0x14, Y7, Y7 5281 VPOR Y7, Y8, Y7 5282 VPADDD Y3, Y7, Y3 5283 VPADDD 32(SP), Y3, Y3 5284 VPXOR Y15, Y3, Y15 5285 VPSHUFB shuffle_rot8<>+0(SB), Y15, Y15 5286 VPADDD Y11, Y15, Y11 5287 VPXOR Y7, Y11, Y7 5288 VPSRLD $0x07, Y7, Y8 5289 VPSLLD $0x19, Y7, Y7 5290 VPOR Y7, Y8, Y7 5291 VPADDD Y0, Y5, Y0 5292 VPADDD 416(SP), Y0, Y0 5293 VPXOR Y15, Y0, Y15 5294 VPSHUFB shuffle_rot16<>+0(SB), Y15, Y15 5295 VPADDD Y10, Y15, Y10 5296 VPXOR Y5, Y10, Y5 5297 VPSRLD $0x0c, Y5, Y8 5298 VPSLLD $0x14, Y5, Y5 5299 VPOR Y5, Y8, Y5 5300 VPADDD Y0, Y5, Y0 5301 VPADDD 96(SP), Y0, Y0 5302 VPXOR Y15, Y0, Y15 5303 VPSHUFB shuffle_rot8<>+0(SB), Y15, Y15 5304 VPADDD Y10, Y15, Y10 5305 VPXOR Y5, Y10, Y5 5306 VPSRLD $0x07, Y5, Y8 5307 VPSLLD $0x19, Y5, Y5 5308 VPOR Y5, Y8, Y5 5309 VPADDD Y1, Y6, Y1 5310 VPADDD (SP), Y1, Y1 5311 VPXOR Y12, Y1, Y12 5312 VPSHUFB shuffle_rot16<>+0(SB), Y12, Y12 5313 VPADDD Y11, Y12, Y11 5314 VPXOR Y6, Y11, Y6 5315 VPSRLD $0x0c, Y6, Y8 5316 VPSLLD $0x14, Y6, Y6 5317 VPOR Y6, Y8, Y6 5318 VPADDD Y1, Y6, Y1 5319 VPADDD 320(SP), Y1, Y1 5320 VPXOR Y12, Y1, Y12 5321 VPSHUFB shuffle_rot8<>+0(SB), Y12, Y12 5322 VPADDD Y11, Y12, Y11 5323 VPXOR Y6, Y11, Y6 5324 VPSRLD $0x07, Y6, Y8 5325 VPSLLD $0x19, Y6, Y6 5326 VPOR Y6, Y8, Y6 5327 VPADDD Y2, Y7, Y2 5328 VPADDD 64(SP), Y2, Y2 5329 VPXOR Y13, Y2, Y13 5330 VPSHUFB shuffle_rot16<>+0(SB), Y13, Y13 5331 VMOVDQU 512(SP), Y8 5332 VPADDD Y8, Y13, Y8 5333 VPXOR Y7, Y8, Y7 5334 VMOVDQU Y8, 512(SP) 5335 VPSRLD $0x0c, Y7, Y8 5336 VPSLLD $0x14, Y7, Y7 5337 VPOR Y7, Y8, Y7 5338 VPADDD Y2, Y7, Y2 5339 VPADDD 192(SP), Y2, Y2 5340 VPXOR Y13, Y2, Y13 5341 VPSHUFB shuffle_rot8<>+0(SB), Y13, Y13 5342 VMOVDQU 512(SP), Y8 5343 VPADDD Y8, Y13, Y8 5344 VPXOR Y7, Y8, Y7 5345 VMOVDQU Y8, 512(SP) 5346 VPSRLD $0x07, Y7, Y8 5347 VPSLLD $0x19, Y7, Y7 5348 VPOR Y7, Y8, Y7 5349 VPADDD Y3, Y4, Y3 5350 VPADDD 128(SP), Y3, Y3 5351 VPXOR Y14, Y3, Y14 5352 VPSHUFB shuffle_rot16<>+0(SB), Y14, Y14 5353 VPADDD Y9, Y14, Y9 5354 VPXOR Y4, Y9, Y4 5355 VPSRLD $0x0c, Y4, Y8 5356 VPSLLD $0x14, Y4, Y4 5357 VPOR Y4, Y8, Y4 5358 VPADDD Y3, Y4, Y3 5359 VPADDD 224(SP), Y3, Y3 5360 VPXOR Y14, Y3, Y14 5361 VPSHUFB shuffle_rot8<>+0(SB), Y14, Y14 5362 VPADDD Y9, Y14, Y9 5363 VPXOR Y4, Y9, Y4 5364 VPSRLD $0x07, Y4, Y8 5365 VPSLLD $0x19, Y4, Y4 5366 VPOR Y4, Y8, Y4 5367 5368 // Round 7 5369 VPADDD Y0, Y4, Y0 5370 VPADDD 352(SP), Y0, Y0 5371 VPXOR Y12, Y0, Y12 5372 VPSHUFB shuffle_rot16<>+0(SB), Y12, Y12 5373 VMOVDQU 512(SP), Y8 5374 VPADDD Y8, Y12, Y8 5375 VPXOR Y4, Y8, Y4 5376 VMOVDQU Y8, 512(SP) 5377 VPSRLD $0x0c, Y4, Y8 5378 VPSLLD $0x14, Y4, Y4 5379 VPOR Y4, Y8, Y4 5380 VPADDD Y0, Y4, Y0 5381 VPADDD 480(SP), Y0, Y0 5382 VPXOR Y12, Y0, Y12 5383 VPSHUFB shuffle_rot8<>+0(SB), Y12, Y12 5384 VMOVDQU 512(SP), Y8 5385 VPADDD Y8, Y12, Y8 5386 VPXOR Y4, Y8, Y4 5387 VMOVDQU Y8, 512(SP) 5388 VPSRLD $0x07, Y4, Y8 5389 VPSLLD $0x19, Y4, Y4 5390 VPOR Y4, Y8, Y4 5391 VPADDD Y1, Y5, Y1 5392 VPADDD 160(SP), Y1, Y1 5393 VPXOR Y13, Y1, Y13 5394 VPSHUFB shuffle_rot16<>+0(SB), Y13, Y13 5395 VPADDD Y9, Y13, Y9 5396 VPXOR Y5, Y9, Y5 5397 VPSRLD $0x0c, Y5, Y8 5398 VPSLLD $0x14, Y5, Y5 5399 VPOR Y5, Y8, Y5 5400 VPADDD Y1, Y5, Y1 5401 VPADDD (SP), Y1, Y1 5402 VPXOR Y13, Y1, Y13 5403 VPSHUFB shuffle_rot8<>+0(SB), Y13, Y13 5404 VPADDD Y9, Y13, Y9 5405 VPXOR Y5, Y9, Y5 5406 VPSRLD $0x07, Y5, Y8 5407 VPSLLD $0x19, Y5, Y5 5408 VPOR Y5, Y8, Y5 5409 VPADDD Y2, Y6, Y2 5410 VPADDD 32(SP), Y2, Y2 5411 VPXOR Y14, Y2, Y14 5412 VPSHUFB shuffle_rot16<>+0(SB), Y14, Y14 5413 VPADDD Y10, Y14, Y10 5414 VPXOR Y6, Y10, Y6 5415 VPSRLD $0x0c, Y6, Y8 5416 VPSLLD $0x14, Y6, Y6 5417 VPOR Y6, Y8, Y6 5418 VPADDD Y2, Y6, Y2 5419 VPADDD 288(SP), Y2, Y2 5420 VPXOR Y14, Y2, Y14 5421 VPSHUFB shuffle_rot8<>+0(SB), Y14, Y14 5422 VPADDD Y10, Y14, Y10 5423 VPXOR Y6, Y10, Y6 5424 VPSRLD $0x07, Y6, Y8 5425 VPSLLD $0x19, Y6, Y6 5426 VPOR Y6, Y8, Y6 5427 VPADDD Y3, Y7, Y3 5428 VPADDD 256(SP), Y3, Y3 5429 VPXOR Y15, Y3, Y15 5430 VPSHUFB shuffle_rot16<>+0(SB), Y15, Y15 5431 VPADDD Y11, Y15, Y11 5432 VPXOR Y7, Y11, Y7 5433 VPSRLD $0x0c, Y7, Y8 5434 VPSLLD $0x14, Y7, Y7 5435 VPOR Y7, Y8, Y7 5436 VPADDD Y3, Y7, Y3 5437 VPADDD 192(SP), Y3, Y3 5438 VPXOR Y15, Y3, Y15 5439 VPSHUFB shuffle_rot8<>+0(SB), Y15, Y15 5440 VPADDD Y11, Y15, Y11 5441 VPXOR Y7, Y11, Y7 5442 VPSRLD $0x07, Y7, Y8 5443 VPSLLD $0x19, Y7, Y7 5444 VPOR Y7, Y8, Y7 5445 VPADDD Y0, Y5, Y0 5446 VPADDD 448(SP), Y0, Y0 5447 VPXOR Y15, Y0, Y15 5448 VPSHUFB shuffle_rot16<>+0(SB), Y15, Y15 5449 VPADDD Y10, Y15, Y10 5450 VPXOR Y5, Y10, Y5 5451 VPSRLD $0x0c, Y5, Y8 5452 VPSLLD $0x14, Y5, Y5 5453 VPOR Y5, Y8, Y5 5454 VPADDD Y0, Y5, Y0 5455 VPADDD 320(SP), Y0, Y0 5456 VPXOR Y15, Y0, Y15 5457 VPSHUFB shuffle_rot8<>+0(SB), Y15, Y15 5458 VPADDD Y10, Y15, Y10 5459 VPXOR Y5, Y10, Y5 5460 VPSRLD $0x07, Y5, Y8 5461 VPSLLD $0x19, Y5, Y5 5462 VPOR Y5, Y8, Y5 5463 VPADDD Y1, Y6, Y1 5464 VPADDD 64(SP), Y1, Y1 5465 VPXOR Y12, Y1, Y12 5466 VPSHUFB shuffle_rot16<>+0(SB), Y12, Y12 5467 VPADDD Y11, Y12, Y11 5468 VPXOR Y6, Y11, Y6 5469 VPSRLD $0x0c, Y6, Y8 5470 VPSLLD $0x14, Y6, Y6 5471 VPOR Y6, Y8, Y6 5472 VPADDD Y1, Y6, Y1 5473 VPADDD 384(SP), Y1, Y1 5474 VPXOR Y12, Y1, Y12 5475 VPSHUFB shuffle_rot8<>+0(SB), Y12, Y12 5476 VPADDD Y11, Y12, Y11 5477 VPXOR Y6, Y11, Y6 5478 VPSRLD $0x07, Y6, Y8 5479 VPSLLD $0x19, Y6, Y6 5480 VPOR Y6, Y8, Y6 5481 VPADDD Y2, Y7, Y2 5482 VPADDD 96(SP), Y2, Y2 5483 VPXOR Y13, Y2, Y13 5484 VPSHUFB shuffle_rot16<>+0(SB), Y13, Y13 5485 VMOVDQU 512(SP), Y8 5486 VPADDD Y8, Y13, Y8 5487 VPXOR Y7, Y8, Y7 5488 VMOVDQU Y8, 512(SP) 5489 VPSRLD $0x0c, Y7, Y8 5490 VPSLLD $0x14, Y7, Y7 5491 VPOR Y7, Y8, Y7 5492 VPADDD Y2, Y7, Y2 5493 VPADDD 128(SP), Y2, Y2 5494 VPXOR Y13, Y2, Y13 5495 VPSHUFB shuffle_rot8<>+0(SB), Y13, Y13 5496 VMOVDQU 512(SP), Y8 5497 VPADDD Y8, Y13, Y8 5498 VPXOR Y7, Y8, Y7 5499 VMOVDQU Y8, 512(SP) 5500 VPSRLD $0x07, Y7, Y8 5501 VPSLLD $0x19, Y7, Y7 5502 VPOR Y7, Y8, Y7 5503 VPADDD Y3, Y4, Y3 5504 VPADDD 224(SP), Y3, Y3 5505 VPXOR Y14, Y3, Y14 5506 VPSHUFB shuffle_rot16<>+0(SB), Y14, Y14 5507 VPADDD Y9, Y14, Y9 5508 VPXOR Y4, Y9, Y4 5509 VPSRLD $0x0c, Y4, Y8 5510 VPSLLD $0x14, Y4, Y4 5511 VPOR Y4, Y8, Y4 5512 VPADDD Y3, Y4, Y3 5513 VPADDD 416(SP), Y3, Y3 5514 VPXOR Y14, Y3, Y14 5515 VPSHUFB shuffle_rot8<>+0(SB), Y14, Y14 5516 VPADDD Y9, Y14, Y9 5517 VPXOR Y4, Y9, Y4 5518 VPSRLD $0x07, Y4, Y8 5519 VPSLLD $0x19, Y4, Y4 5520 VPOR Y4, Y8, Y4 5521 VMOVDQU 512(SP), Y8 5522 5523 // Finalize CVs 5524 VPXOR Y0, Y8, Y0 5525 VPXOR Y1, Y9, Y1 5526 VPXOR Y2, Y10, Y2 5527 VPXOR Y3, Y11, Y3 5528 VPXOR Y4, Y12, Y4 5529 VPXOR Y5, Y13, Y5 5530 VPXOR Y6, Y14, Y6 5531 VPXOR Y7, Y15, Y7 5532 VPUNPCKLDQ Y1, Y0, Y8 5533 VPUNPCKHDQ Y1, Y0, Y9 5534 VPUNPCKLDQ Y3, Y2, Y10 5535 VPUNPCKHDQ Y3, Y2, Y11 5536 VPUNPCKLDQ Y5, Y4, Y12 5537 VPUNPCKHDQ Y5, Y4, Y13 5538 VPUNPCKLDQ Y7, Y6, Y14 5539 VPUNPCKHDQ Y7, Y6, Y15 5540 VPUNPCKLQDQ Y10, Y8, Y0 5541 VPUNPCKHQDQ Y10, Y8, Y1 5542 VPUNPCKLQDQ Y11, Y9, Y2 5543 VPUNPCKHQDQ Y11, Y9, Y3 5544 VPUNPCKLQDQ Y14, Y12, Y4 5545 VPUNPCKHQDQ Y14, Y12, Y5 5546 VPUNPCKLQDQ Y15, Y13, Y6 5547 VPUNPCKHQDQ Y15, Y13, Y7 5548 VPERM2I128 $0x20, Y4, Y0, Y8 5549 VPERM2I128 $0x31, Y4, Y0, Y12 5550 VPERM2I128 $0x20, Y5, Y1, Y9 5551 VPERM2I128 $0x31, Y5, Y1, Y13 5552 VPERM2I128 $0x20, Y6, Y2, Y10 5553 VPERM2I128 $0x31, Y6, Y2, Y14 5554 VPERM2I128 $0x20, Y7, Y3, Y11 5555 VPERM2I128 $0x31, Y7, Y3, Y15 5556 VMOVDQU Y8, (AX) 5557 VMOVDQU Y9, 32(AX) 5558 VMOVDQU Y10, 64(AX) 5559 VMOVDQU Y11, 96(AX) 5560 VMOVDQU Y12, 128(AX) 5561 VMOVDQU Y13, 160(AX) 5562 VMOVDQU Y14, 192(AX) 5563 VMOVDQU Y15, 224(AX) 5564 RET