github.com/matrixorigin/matrixone@v1.2.0/pkg/container/hashtable/hash_amd64.s (about) 1 // Copyright 2021 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "textflag.h" 16 17 // func crc32Int64BatchHash(data *uint64, hashes *uint64, length int) 18 // Requires: SSE4.2 19 TEXT ·crc32Int64BatchHash(SB), NOSPLIT, $0-24 20 MOVQ data+0(FP), SI 21 MOVQ hashes+8(FP), DI 22 MOVQ length+16(FP), CX 23 24 loop: 25 SUBQ $8, CX 26 JL tail 27 28 VMOVDQU (SI), Y0 29 VMOVDQU Y0, (DI) 30 VMOVDQU 0x20(SI), Y1 31 VMOVDQU Y1, 0x20(DI) 32 33 MOVQ $-1, R8 34 MOVQ $-1, R9 35 MOVQ $-1, R10 36 MOVQ $-1, R11 37 MOVQ $-1, R12 38 MOVQ $-1, R13 39 MOVQ $-1, R14 40 MOVQ $-1, R15 41 42 CRC32Q 0x00(SI), R8 43 CRC32Q 0x08(SI), R9 44 CRC32Q 0x10(SI), R10 45 CRC32Q 0x18(SI), R11 46 CRC32Q 0x20(SI), R12 47 CRC32Q 0x28(SI), R13 48 CRC32Q 0x30(SI), R14 49 CRC32Q 0x38(SI), R15 50 51 MOVL R8, 0x00(DI) 52 MOVL R9, 0x08(DI) 53 MOVL R10, 0x10(DI) 54 MOVL R11, 0x18(DI) 55 MOVL R12, 0x20(DI) 56 MOVL R13, 0x28(DI) 57 MOVL R14, 0x30(DI) 58 MOVL R15, 0x38(DI) 59 60 ADDQ $0x40, SI 61 ADDQ $0x40, DI 62 JMP loop 63 64 tail: 65 ADDQ $8, CX 66 JE done 67 68 tailLoop: 69 MOVQ $-1, R8 70 MOVQ (SI), R9 71 MOVQ R9, (DI) 72 CRC32Q (SI), R8 73 MOVL R8, (DI) 74 75 ADDQ $0x08, SI 76 ADDQ $0x08, DI 77 LOOP tailLoop 78 79 done: 80 RET 81 82 //////////////////////////////////////////////////////////////// 83 //////////////////////////////////////////////////////////////// 84 //////////////////////////////////////////////////////////////// 85 //////////////////////////////////////////////////////////////// 86 87 DATA Pi<>+0x00(SB)/8, $0x3243f6a8885a308d 88 DATA Pi<>+0x08(SB)/8, $0x313198a2e0370734 89 DATA Pi<>+0x10(SB)/8, $0x4a4093822299f31d 90 DATA Pi<>+0x18(SB)/8, $0x0082efa98ec4e6c8 91 DATA Pi<>+0x20(SB)/8, $0x9452821e638d0137 92 DATA Pi<>+0x28(SB)/8, $0x7be5466cf34e90c6 93 DATA Pi<>+0x30(SB)/8, $0xcc0ac29b7c97c50d 94 DATA Pi<>+0x38(SB)/8, $0xd3f84d5b5b547091 95 DATA Pi<>+0x40(SB)/8, $0x79216d5d98979fb1 96 DATA Pi<>+0x48(SB)/8, $0xbd1310ba698dfb5a 97 DATA Pi<>+0x50(SB)/8, $0xc2ffd72dbd01adfb 98 DATA Pi<>+0x58(SB)/8, $0x7b8e1afed6a267e9 99 DATA Pi<>+0x60(SB)/8, $0x6ba7c9045f12c7f9 100 DATA Pi<>+0x68(SB)/8, $0x924a19947b3916cf 101 DATA Pi<>+0x70(SB)/8, $0x70801f2e2858efc1 102 DATA Pi<>+0x78(SB)/8, $0x6636920d871574e6 103 GLOBL Pi<>(SB), (NOPTR+RODATA), $0x80 104 105 DATA CryptedPi<>+0x00(SB)/8, $0x822233b93c11087c 106 DATA CryptedPi<>+0x08(SB)/8, $0xd2b32f4adde873da 107 DATA CryptedPi<>+0x10(SB)/8, $0xae9c2fc7dd17bcdb 108 DATA CryptedPi<>+0x18(SB)/8, $0x859110441a1569fc 109 DATA CryptedPi<>+0x20(SB)/8, $0x47087d794fffb5c9 110 DATA CryptedPi<>+0x28(SB)/8, $0xb7b6c8f565414445 111 DATA CryptedPi<>+0x30(SB)/8, $0xfd260edabb308f8d 112 DATA CryptedPi<>+0x38(SB)/8, $0x3ddefc67bc565a13 113 DATA CryptedPi<>+0x40(SB)/8, $0xe4c1d50223544f10 114 DATA CryptedPi<>+0x48(SB)/8, $0xaf40e05725c3192b 115 DATA CryptedPi<>+0x50(SB)/8, $0x281d8ab9a16382e9 116 DATA CryptedPi<>+0x58(SB)/8, $0xddc10c903b63a6cf 117 DATA CryptedPi<>+0x60(SB)/8, $0x852d3ad603e8df72 118 DATA CryptedPi<>+0x68(SB)/8, $0xa6642b57d1011deb 119 DATA CryptedPi<>+0x70(SB)/8, $0x5063d25a1cb7b6b9 120 DATA CryptedPi<>+0x78(SB)/8, $0xb2623e6241e8e46e 121 GLOBL CryptedPi<>(SB), (NOPTR+RODATA), $0x80 122 123 // func aesBytesBatchGenHashStates(data *[]byte, states *[3]uint64, length int) 124 // Requires: AES 125 TEXT ·aesBytesBatchGenHashStates(SB), NOSPLIT, $0-24 126 MOVQ data+0(FP), SI 127 MOVQ states+8(FP), DI 128 MOVQ length+16(FP), CX 129 130 VMOVDQU CryptedPi<>+0x00(SB), X0 131 VMOVDQU CryptedPi<>+0x10(SB), X1 132 VMOVDQU CryptedPi<>+0x20(SB), X2 133 VMOVDQU CryptedPi<>+0x30(SB), X3 134 VMOVDQU CryptedPi<>+0x40(SB), X4 135 VMOVDQU CryptedPi<>+0x50(SB), X5 136 VMOVDQU CryptedPi<>+0x60(SB), X6 137 VMOVDQU CryptedPi<>+0x70(SB), X7 138 139 loop: 140 MOVQ (SI), AX 141 MOVQ 8(SI), DX 142 MOVQ DX, BX 143 144 ADDQ AX, DX 145 SUBQ $0x40, DX 146 147 VMOVDQU X0, X8 148 VMOVDQU X1, X9 149 VMOVDQU X2, X10 150 VMOVDQU X3, X11 151 VMOVDQU X4, X12 152 VMOVDQU X5, X13 153 VMOVDQU X6, X14 154 VMOVDQU X7, X15 155 156 innerLoop: 157 CMPQ AX, DX 158 JGE tail 159 160 VAESENC 0x00(AX), X8, X8 161 VAESENC 0x00(AX), X12, X12 162 VAESENC 0x10(AX), X9, X9 163 VAESENC 0x10(AX), X13, X13 164 VAESENC 0x20(AX), X10, X10 165 VAESENC 0x20(AX), X14, X14 166 VAESENC 0x30(AX), X11, X11 167 VAESENC 0x30(AX), X15, X15 168 169 ADDQ $0x40, AX 170 JMP innerLoop 171 172 tail: 173 ADDQ $0x30, DX 174 CMPQ AX, DX 175 JGE done 176 177 VAESENC (AX), X8, X8 178 VAESENC (AX), X12, X12 179 180 ADDQ $0x10, AX 181 CMPQ AX, DX 182 JGE done 183 184 VAESENC (AX), X9, X9 185 VAESENC (AX), X13, X13 186 187 ADDQ $0x10, AX 188 CMPQ AX, DX 189 JGE done 190 191 VAESENC (AX), X10, X10 192 VAESENC (AX), X14, X14 193 194 done: 195 VAESENC (DX), X11, X11 196 VAESENC (DX), X15, X15 197 198 VAESENC X9, X8, X8 199 VAESENC X10, X11, X11 200 VAESENC X8, X11, X11 201 202 VAESENC X11, X11, X11 203 VAESENC X11, X11, X11 204 VAESENC X11, X11, X11 205 206 VAESENC X14, X13, X13 207 VAESENC X15, X12, X12 208 VAESENC X13, X12, X12 209 210 VPSHUFD $0x4e, X11, X8 211 VPXOR X8, X11, X11 212 VMOVQ X11, R8 213 XORQ BX, R8 214 215 MOVQ R8, (DI) 216 VMOVDQU X12, 8(DI) 217 218 ADDQ $24, SI 219 ADDQ $24, DI 220 DECQ CX 221 JNZ loop 222 223 RET 224 225 // func aesInt192BatchGenHashStates(data *[3]uint64, states *[3]uint64, length int) 226 // Requires: AES 227 TEXT ·aesInt192BatchGenHashStates(SB), NOSPLIT, $0-24 228 MOVQ data+0(FP), SI 229 MOVQ states+8(FP), DI 230 MOVQ length+16(FP), CX 231 232 VMOVDQU CryptedPi<>+0x00(SB), X0 233 VMOVDQU CryptedPi<>+0x10(SB), X1 234 VMOVDQU CryptedPi<>+0x20(SB), X2 235 VMOVDQU CryptedPi<>+0x30(SB), X3 236 VMOVDQU CryptedPi<>+0x40(SB), X4 237 VMOVDQU CryptedPi<>+0x50(SB), X5 238 VMOVDQU CryptedPi<>+0x60(SB), X6 239 VMOVDQU CryptedPi<>+0x70(SB), X7 240 VAESENC X2, X3, X3 241 VAESENC X7, X6, X6 242 243 loop: 244 VAESENC 0x00(SI), X0, X8 245 VAESENC 0x00(SI), X4, X10 246 VAESENC 0x08(SI), X1, X9 247 VAESENC 0x08(SI), X5, X11 248 VAESENC X8, X9, X9 249 VAESENC X3, X9, X9 250 VAESENC X9, X9, X9 251 VAESENC X9, X9, X9 252 VPSHUFD $0x4e, X9, X8 253 VPXOR X8, X9, X9 254 VAESENC X11, X10, X10 255 VAESENC X6, X10, X10 256 VMOVQ X9, 0x00(DI) 257 VMOVDQU X10, 0x08(DI) 258 259 ADDQ $0x18, SI 260 ADDQ $0x18, DI 261 LOOP loop 262 263 done: 264 RET 265 266 // func aesInt256BatchGenHashStates(data *[4]uint64, states *[3]uint64, length int) 267 // Requires: AES 268 TEXT ·aesInt256BatchGenHashStates(SB), NOSPLIT, $0-24 269 MOVQ data+0(FP), SI 270 MOVQ states+8(FP), DI 271 MOVQ length+16(FP), CX 272 273 VMOVDQU CryptedPi<>+0x00(SB), X0 274 VMOVDQU CryptedPi<>+0x10(SB), X1 275 VMOVDQU CryptedPi<>+0x20(SB), X2 276 VMOVDQU CryptedPi<>+0x30(SB), X3 277 VMOVDQU CryptedPi<>+0x40(SB), X4 278 VMOVDQU CryptedPi<>+0x50(SB), X5 279 VMOVDQU CryptedPi<>+0x60(SB), X6 280 VMOVDQU CryptedPi<>+0x70(SB), X7 281 VAESENC X2, X3, X3 282 VAESENC X7, X6, X6 283 284 loop: 285 VAESENC 0x00(SI), X0, X8 286 VAESENC 0x00(SI), X4, X10 287 VAESENC 0x10(SI), X1, X9 288 VAESENC 0x10(SI), X5, X11 289 VAESENC X8, X9, X9 290 VAESENC X3, X9, X9 291 VAESENC X9, X9, X9 292 VAESENC X9, X9, X9 293 VPSHUFD $0x4e, X9, X8 294 VPXOR X8, X9, X9 295 VAESENC X11, X10, X10 296 VAESENC X6, X10, X10 297 VMOVQ X9, 0x00(DI) 298 VMOVDQU X10, 0x08(DI) 299 300 ADDQ $0x20, SI 301 ADDQ $0x18, DI 302 LOOP loop 303 304 done: 305 RET 306 307 // func aesInt320BatchGenHashStates(data *[5]uint64, states *[3]uint64, length int) 308 // Requires: AES 309 TEXT ·aesInt320BatchGenHashStates(SB), NOSPLIT, $0-24 310 MOVQ data+0(FP), SI 311 MOVQ states+8(FP), DI 312 MOVQ length+16(FP), CX 313 314 VMOVDQU CryptedPi<>+0x00(SB), X0 315 VMOVDQU CryptedPi<>+0x10(SB), X1 316 VMOVDQU CryptedPi<>+0x20(SB), X2 317 VMOVDQU CryptedPi<>+0x30(SB), X3 318 VMOVDQU CryptedPi<>+0x40(SB), X4 319 VMOVDQU CryptedPi<>+0x50(SB), X5 320 VMOVDQU CryptedPi<>+0x60(SB), X6 321 VMOVDQU CryptedPi<>+0x70(SB), X7 322 323 loop: 324 VAESENC 0x00(SI), X0, X8 325 VAESENC 0x00(SI), X4, X11 326 VAESENC 0x10(SI), X1, X9 327 VAESENC 0x10(SI), X5, X12 328 VAESENC 0x18(SI), X3, X10 329 VAESENC 0x18(SI), X6, X13 330 VAESENC X10, X8, X8 331 VAESENC X2, X9, X9 332 VAESENC X9, X8, X8 333 VAESENC X8, X8, X8 334 VAESENC X8, X8, X8 335 VPSHUFD $0x4e, X8, X9 336 VPXOR X9, X8, X8 337 VAESENC X12, X11, X11 338 VAESENC X7, X13, X13 339 VAESENC X13, X11, X11 340 VMOVQ X8, 0x00(DI) 341 VMOVDQU X11, 0x08(DI) 342 343 ADDQ $0x28, SI 344 ADDQ $0x18, DI 345 LOOP loop 346 347 done: 348 RET 349 350 TEXT genCryptedPi(SB), NOSPLIT, $0-8 351 MOVQ dst+0(FP), DI 352 353 VMOVDQU Pi<>+0x00(SB), X0 354 VMOVDQU Pi<>+0x10(SB), X1 355 VMOVDQU Pi<>+0x20(SB), X2 356 VMOVDQU Pi<>+0x30(SB), X3 357 VMOVDQU Pi<>+0x40(SB), X4 358 VMOVDQU Pi<>+0x50(SB), X5 359 VMOVDQU Pi<>+0x60(SB), X6 360 VMOVDQU Pi<>+0x70(SB), X7 361 362 VAESENC X0, X0, X0 363 VAESENC X1, X1, X1 364 VAESENC X2, X2, X2 365 VAESENC X3, X3, X3 366 VAESENC X4, X4, X4 367 VAESENC X5, X5, X5 368 VAESENC X6, X6, X6 369 VAESENC X7, X7, X7 370 371 VAESENC X0, X0, X0 372 VAESENC X1, X1, X1 373 VAESENC X2, X2, X2 374 VAESENC X3, X3, X3 375 VAESENC X4, X4, X4 376 VAESENC X5, X5, X5 377 VAESENC X6, X6, X6 378 VAESENC X7, X7, X7 379 380 VAESENC X0, X0, X0 381 VAESENC X1, X1, X1 382 VAESENC X2, X2, X2 383 VAESENC X3, X3, X3 384 VAESENC X4, X4, X4 385 VAESENC X5, X5, X5 386 VAESENC X6, X6, X6 387 VAESENC X7, X7, X7 388 389 VAESENC X0, X0, X0 390 VAESENC X1, X1, X1 391 VAESENC X2, X2, X2 392 VAESENC X3, X3, X3 393 VAESENC X4, X4, X4 394 VAESENC X5, X5, X5 395 VAESENC X6, X6, X6 396 VAESENC X7, X7, X7 397 398 VAESENC X0, X0, X0 399 VAESENC X1, X1, X1 400 VAESENC X2, X2, X2 401 VAESENC X3, X3, X3 402 VAESENC X4, X4, X4 403 VAESENC X5, X5, X5 404 VAESENC X6, X6, X6 405 VAESENC X7, X7, X7 406 407 VAESENC X0, X0, X0 408 VAESENC X1, X1, X1 409 VAESENC X2, X2, X2 410 VAESENC X3, X3, X3 411 VAESENC X4, X4, X4 412 VAESENC X5, X5, X5 413 VAESENC X6, X6, X6 414 VAESENC X7, X7, X7 415 416 VAESENC X0, X0, X0 417 VAESENC X1, X1, X1 418 VAESENC X2, X2, X2 419 VAESENC X3, X3, X3 420 VAESENC X4, X4, X4 421 VAESENC X5, X5, X5 422 VAESENC X6, X6, X6 423 VAESENC X7, X7, X7 424 425 VAESENC X0, X0, X0 426 VAESENC X1, X1, X1 427 VAESENC X2, X2, X2 428 VAESENC X3, X3, X3 429 VAESENC X4, X4, X4 430 VAESENC X5, X5, X5 431 VAESENC X6, X6, X6 432 VAESENC X7, X7, X7 433 434 VAESENC X0, X0, X0 435 VAESENC X1, X1, X1 436 VAESENC X2, X2, X2 437 VAESENC X3, X3, X3 438 VAESENC X4, X4, X4 439 VAESENC X5, X5, X5 440 VAESENC X6, X6, X6 441 VAESENC X7, X7, X7 442 443 VAESENC X0, X0, X0 444 VAESENC X1, X1, X1 445 VAESENC X2, X2, X2 446 VAESENC X3, X3, X3 447 VAESENC X4, X4, X4 448 VAESENC X5, X5, X5 449 VAESENC X6, X6, X6 450 VAESENC X7, X7, X7 451 452 VAESENC X0, X0, X0 453 VAESENC X1, X1, X1 454 VAESENC X2, X2, X2 455 VAESENC X3, X3, X3 456 VAESENC X4, X4, X4 457 VAESENC X5, X5, X5 458 VAESENC X6, X6, X6 459 VAESENC X7, X7, X7 460 461 VAESENC X0, X0, X0 462 VAESENC X1, X1, X1 463 VAESENC X2, X2, X2 464 VAESENC X3, X3, X3 465 VAESENC X4, X4, X4 466 VAESENC X5, X5, X5 467 VAESENC X6, X6, X6 468 VAESENC X7, X7, X7 469 470 VAESENC X0, X0, X0 471 VAESENC X1, X1, X1 472 VAESENC X2, X2, X2 473 VAESENC X3, X3, X3 474 VAESENC X4, X4, X4 475 VAESENC X5, X5, X5 476 VAESENC X6, X6, X6 477 VAESENC X7, X7, X7 478 479 VAESENCLAST X0, X0, X0 480 VAESENCLAST X1, X1, X1 481 VAESENCLAST X2, X2, X2 482 VAESENCLAST X3, X3, X3 483 VAESENCLAST X4, X4, X4 484 VAESENCLAST X5, X5, X5 485 VAESENCLAST X6, X6, X6 486 VAESENCLAST X7, X7, X7 487 488 VAESENC X0, X0, X0 489 VAESENC X1, X1, X1 490 VAESENC X2, X2, X2 491 VAESENC X3, X3, X3 492 VAESENC X4, X4, X4 493 VAESENC X5, X5, X5 494 VAESENC X6, X6, X6 495 VAESENC X7, X7, X7 496 497 VAESENC X0, X0, X0 498 VAESENC X1, X1, X1 499 VAESENC X2, X2, X2 500 VAESENC X3, X3, X3 501 VAESENC X4, X4, X4 502 VAESENC X5, X5, X5 503 VAESENC X6, X6, X6 504 VAESENC X7, X7, X7 505 506 VAESENC X0, X0, X0 507 VAESENC X1, X1, X1 508 VAESENC X2, X2, X2 509 VAESENC X3, X3, X3 510 VAESENC X4, X4, X4 511 VAESENC X5, X5, X5 512 VAESENC X6, X6, X6 513 VAESENC X7, X7, X7 514 515 VAESENC X0, X0, X0 516 VAESENC X1, X1, X1 517 VAESENC X2, X2, X2 518 VAESENC X3, X3, X3 519 VAESENC X4, X4, X4 520 VAESENC X5, X5, X5 521 VAESENC X6, X6, X6 522 VAESENC X7, X7, X7 523 524 VAESENC X0, X0, X0 525 VAESENC X1, X1, X1 526 VAESENC X2, X2, X2 527 VAESENC X3, X3, X3 528 VAESENC X4, X4, X4 529 VAESENC X5, X5, X5 530 VAESENC X6, X6, X6 531 VAESENC X7, X7, X7 532 533 VAESENC X0, X0, X0 534 VAESENC X1, X1, X1 535 VAESENC X2, X2, X2 536 VAESENC X3, X3, X3 537 VAESENC X4, X4, X4 538 VAESENC X5, X5, X5 539 VAESENC X6, X6, X6 540 VAESENC X7, X7, X7 541 542 VAESENC X0, X0, X0 543 VAESENC X1, X1, X1 544 VAESENC X2, X2, X2 545 VAESENC X3, X3, X3 546 VAESENC X4, X4, X4 547 VAESENC X5, X5, X5 548 VAESENC X6, X6, X6 549 VAESENC X7, X7, X7 550 551 VAESENC X0, X0, X0 552 VAESENC X1, X1, X1 553 VAESENC X2, X2, X2 554 VAESENC X3, X3, X3 555 VAESENC X4, X4, X4 556 VAESENC X5, X5, X5 557 VAESENC X6, X6, X6 558 VAESENC X7, X7, X7 559 560 VAESENC X0, X0, X0 561 VAESENC X1, X1, X1 562 VAESENC X2, X2, X2 563 VAESENC X3, X3, X3 564 VAESENC X4, X4, X4 565 VAESENC X5, X5, X5 566 VAESENC X6, X6, X6 567 VAESENC X7, X7, X7 568 569 VAESENC X0, X0, X0 570 VAESENC X1, X1, X1 571 VAESENC X2, X2, X2 572 VAESENC X3, X3, X3 573 VAESENC X4, X4, X4 574 VAESENC X5, X5, X5 575 VAESENC X6, X6, X6 576 VAESENC X7, X7, X7 577 578 VAESENC X0, X0, X0 579 VAESENC X1, X1, X1 580 VAESENC X2, X2, X2 581 VAESENC X3, X3, X3 582 VAESENC X4, X4, X4 583 VAESENC X5, X5, X5 584 VAESENC X6, X6, X6 585 VAESENC X7, X7, X7 586 587 VAESENC X0, X0, X0 588 VAESENC X1, X1, X1 589 VAESENC X2, X2, X2 590 VAESENC X3, X3, X3 591 VAESENC X4, X4, X4 592 VAESENC X5, X5, X5 593 VAESENC X6, X6, X6 594 VAESENC X7, X7, X7 595 596 VAESENC X0, X0, X0 597 VAESENC X1, X1, X1 598 VAESENC X2, X2, X2 599 VAESENC X3, X3, X3 600 VAESENC X4, X4, X4 601 VAESENC X5, X5, X5 602 VAESENC X6, X6, X6 603 VAESENC X7, X7, X7 604 605 VAESENCLAST X0, X0, X0 606 VAESENCLAST X1, X1, X1 607 VAESENCLAST X2, X2, X2 608 VAESENCLAST X3, X3, X3 609 VAESENCLAST X4, X4, X4 610 VAESENCLAST X5, X5, X5 611 VAESENCLAST X6, X6, X6 612 VAESENCLAST X7, X7, X7 613 614 VMOVDQU X0, 0x00(DI) 615 VMOVDQU X1, 0x10(DI) 616 VMOVDQU X2, 0x20(DI) 617 VMOVDQU X3, 0x30(DI) 618 VMOVDQU X4, 0x40(DI) 619 VMOVDQU X5, 0x50(DI) 620 VMOVDQU X6, 0x60(DI) 621 VMOVDQU X7, 0x70(DI) 622 623 RET