github.com/vc42/parquet-go@v0.0.0-20240320194221-1a9adb5f23f5/order_amd64.s (about)

     1  //go:build !purego
     2  
     3  #include "textflag.h"
     4  
     5  #define UNDEFINED 0
     6  #define ASCENDING 1
     7  #define DESCENDING -1
     8  
     9  DATA shift1x32<>+0(SB)/4, $1
    10  DATA shift1x32<>+4(SB)/4, $2
    11  DATA shift1x32<>+8(SB)/4, $3
    12  DATA shift1x32<>+12(SB)/4, $4
    13  DATA shift1x32<>+16(SB)/4, $5
    14  DATA shift1x32<>+20(SB)/4, $6
    15  DATA shift1x32<>+24(SB)/4, $7
    16  DATA shift1x32<>+28(SB)/4, $8
    17  DATA shift1x32<>+32(SB)/4, $9
    18  DATA shift1x32<>+36(SB)/4, $10
    19  DATA shift1x32<>+40(SB)/4, $11
    20  DATA shift1x32<>+44(SB)/4, $12
    21  DATA shift1x32<>+48(SB)/4, $13
    22  DATA shift1x32<>+52(SB)/4, $14
    23  DATA shift1x32<>+56(SB)/4, $15
    24  DATA shift1x32<>+60(SB)/4, $15
    25  GLOBL shift1x32<>(SB), RODATA|NOPTR, $64
    26  
    27  DATA shift1x64<>+0(SB)/4, $1
    28  DATA shift1x64<>+8(SB)/4, $2
    29  DATA shift1x64<>+16(SB)/4, $3
    30  DATA shift1x64<>+24(SB)/4, $4
    31  DATA shift1x64<>+32(SB)/4, $5
    32  DATA shift1x64<>+40(SB)/4, $6
    33  DATA shift1x64<>+48(SB)/4, $7
    34  DATA shift1x64<>+56(SB)/4, $7
    35  GLOBL shift1x64<>(SB), RODATA|NOPTR, $64
    36  
    37  // func orderOfInt32(data []int32) int
    38  TEXT ·orderOfInt32(SB), NOSPLIT, $-32
    39      MOVQ data_base+0(FP), R8
    40      MOVQ data_len+8(FP), R9
    41      XORQ SI, SI
    42      XORQ DI, DI
    43  
    44      CMPQ R9, $2
    45      JB undefined
    46  
    47      CMPB ·hasAVX512VL(SB), $0
    48      JE test
    49  
    50      CMPQ R9, $16
    51      JB test
    52  
    53      XORQ DX, DX
    54      MOVQ R9, AX
    55      SHRQ $4, AX
    56      SHLQ $4, AX
    57      MOVQ $15, CX
    58      IDIVQ CX
    59      IMULQ $15, AX
    60      DECQ R9
    61  
    62      VMOVDQU32 shift1x32<>(SB), Z2
    63      KXORW K2, K2, K2
    64  testAscending15:
    65      VMOVDQU32 (R8)(SI*4), Z0
    66      VMOVDQU32 Z2, Z1
    67      VPERMI2D Z0, Z0, Z1
    68      VPCMPD $2, Z1, Z0, K1
    69      KORTESTW K2, K1
    70      JNC testDescending15
    71      ADDQ $15, SI
    72      CMPQ SI, AX
    73      JNE testAscending15
    74      VZEROUPPER
    75      JMP testAscending
    76  testDescending15:
    77      VMOVDQU32 (R8)(DI*4), Z0
    78      VMOVDQU32 Z2, Z1
    79      VPERMI2D Z0, Z0, Z1
    80      VPCMPD $5, Z1, Z0, K1
    81      KORTESTW K2, K1
    82      JNC undefined15
    83      ADDQ $15, DI
    84      CMPQ DI, AX
    85      JNE testDescending15
    86      VZEROUPPER
    87      JMP testDescending
    88  
    89  test:
    90      DECQ R9
    91  testAscending:
    92      CMPQ SI, R9
    93      JAE ascending
    94      MOVL (R8)(SI*4), BX
    95      MOVL 4(R8)(SI*4), DX
    96      INCQ SI
    97      CMPL BX, DX
    98      JLE testAscending
    99      JMP testDescending
   100  ascending:
   101      MOVQ $ASCENDING, ret+24(FP)
   102      RET
   103  testDescending:
   104      CMPQ DI, R9
   105      JAE descending
   106      MOVL (R8)(DI*4), BX
   107      MOVL 4(R8)(DI*4), DX
   108      INCQ DI
   109      CMPL BX, DX
   110      JGE testDescending
   111      JMP undefined
   112  descending:
   113      MOVQ $DESCENDING, ret+24(FP)
   114      RET
   115  undefined15:
   116      VZEROUPPER
   117  undefined:
   118      MOVQ $UNDEFINED, ret+24(FP)
   119      RET
   120  
   121  // func orderOfInt64(data []int64) int
   122  TEXT ·orderOfInt64(SB), NOSPLIT, $-32
   123      MOVQ data_base+0(FP), R8
   124      MOVQ data_len+8(FP), R9
   125      XORQ SI, SI
   126      XORQ DI, DI
   127  
   128      CMPQ R9, $2
   129      JB undefined
   130  
   131      CMPB ·hasAVX512VL(SB), $0
   132      JE test
   133  
   134      CMPQ R9, $8
   135      JB test
   136  
   137      XORQ DX, DX
   138      MOVQ R9, AX
   139      SHRQ $3, AX
   140      SHLQ $3, AX
   141      MOVQ $7, CX
   142      IDIVQ CX
   143      IMULQ $7, AX
   144      DECQ R9
   145  
   146      VMOVDQU64 shift1x64<>(SB), Z2
   147      KXORB K2, K2, K2
   148  testAscending7:
   149      VMOVDQU64 (R8)(SI*8), Z0
   150      VMOVDQU64 Z2, Z1
   151      VPERMI2Q Z0, Z0, Z1
   152      VPCMPQ $2, Z1, Z0, K1
   153      KORTESTB K2, K1
   154      JNC testDescending7
   155      ADDQ $7, SI
   156      CMPQ SI, AX
   157      JNE testAscending7
   158      VZEROUPPER
   159      JMP testAscending
   160  testDescending7:
   161      VMOVDQU64 (R8)(DI*8), Z0
   162      VMOVDQU64 Z2, Z1
   163      VPERMI2Q Z0, Z0, Z1
   164      VPCMPQ $5, Z1, Z0, K1
   165      KORTESTB K2, K1
   166      JNC undefined7
   167      ADDQ $7, DI
   168      CMPQ DI, AX
   169      JNE testDescending7
   170      VZEROUPPER
   171      JMP testDescending
   172  
   173  test:
   174      DECQ R9
   175  testAscending:
   176      CMPQ SI, R9
   177      JAE ascending
   178      MOVQ (R8)(SI*8), BX
   179      MOVQ 8(R8)(SI*8), DX
   180      INCQ SI
   181      CMPQ BX, DX
   182      JLE testAscending
   183      JMP testDescending
   184  ascending:
   185      MOVQ $ASCENDING, ret+24(FP)
   186      RET
   187  testDescending:
   188      CMPQ DI, R9
   189      JAE descending
   190      MOVQ (R8)(DI*8), BX
   191      MOVQ 8(R8)(DI*8), DX
   192      INCQ DI
   193      CMPQ BX, DX
   194      JGE testDescending
   195      JMP undefined
   196  descending:
   197      MOVQ $DESCENDING, ret+24(FP)
   198      RET
   199  undefined7:
   200      VZEROUPPER
   201  undefined:
   202      MOVQ $UNDEFINED, ret+24(FP)
   203      RET
   204  
   205  // func orderOfUint32(data []uint32) int
   206  TEXT ·orderOfUint32(SB), NOSPLIT, $-32
   207      MOVQ data_base+0(FP), R8
   208      MOVQ data_len+8(FP), R9
   209      XORQ SI, SI
   210      XORQ DI, DI
   211  
   212      CMPQ R9, $2
   213      JB undefined
   214  
   215      CMPB ·hasAVX512VL(SB), $0
   216      JE test
   217  
   218      CMPQ R9, $16
   219      JB test
   220  
   221      XORQ DX, DX
   222      MOVQ R9, AX
   223      SHRQ $4, AX
   224      SHLQ $4, AX
   225      MOVQ $15, CX
   226      IDIVQ CX
   227      IMULQ $15, AX
   228      DECQ R9
   229  
   230      VMOVDQU32 shift1x32<>(SB), Z2
   231      KXORW K2, K2, K2
   232  testAscending15:
   233      VMOVDQU32 (R8)(SI*4), Z0
   234      VMOVDQU32 Z2, Z1
   235      VPERMI2D Z0, Z0, Z1
   236      VPCMPUD $2, Z1, Z0, K1
   237      KORTESTW K2, K1
   238      JNC testDescending15
   239      ADDQ $15, SI
   240      CMPQ SI, AX
   241      JNE testAscending15
   242      VZEROUPPER
   243      JMP testAscending
   244  testDescending15:
   245      VMOVDQU32 (R8)(DI*4), Z0
   246      VMOVDQU32 Z2, Z1
   247      VPERMI2D Z0, Z0, Z1
   248      VPCMPUD $5, Z1, Z0, K1
   249      KORTESTW K2, K1
   250      JNC undefined15
   251      ADDQ $15, DI
   252      CMPQ DI, AX
   253      JNE testDescending15
   254      VZEROUPPER
   255      JMP testDescending
   256  
   257  test:
   258      DECQ R9
   259  testAscending:
   260      CMPQ SI, R9
   261      JAE ascending
   262      MOVL (R8)(SI*4), BX
   263      MOVL 4(R8)(SI*4), DX
   264      INCQ SI
   265      CMPL BX, DX
   266      JBE testAscending
   267      JMP testDescending
   268  ascending:
   269      MOVQ $ASCENDING, ret+24(FP)
   270      RET
   271  testDescending:
   272      CMPQ DI, R9
   273      JAE descending
   274      MOVL (R8)(DI*4), BX
   275      MOVL 4(R8)(DI*4), DX
   276      INCQ DI
   277      CMPL BX, DX
   278      JAE testDescending
   279      JMP undefined
   280  descending:
   281      MOVQ $DESCENDING, ret+24(FP)
   282      RET
   283  undefined15:
   284      VZEROUPPER
   285  undefined:
   286      MOVQ $UNDEFINED, ret+24(FP)
   287      RET
   288  
   289  // func orderOfUint64(data []uint64) int
   290  TEXT ·orderOfUint64(SB), NOSPLIT, $-32
   291      MOVQ data_base+0(FP), R8
   292      MOVQ data_len+8(FP), R9
   293      XORQ SI, SI
   294      XORQ DI, DI
   295  
   296      CMPQ R9, $2
   297      JB undefined
   298  
   299      CMPB ·hasAVX512VL(SB), $0
   300      JE test
   301  
   302      CMPQ R9, $8
   303      JB test
   304  
   305      XORQ DX, DX
   306      MOVQ R9, AX
   307      SHRQ $3, AX
   308      SHLQ $3, AX
   309      MOVQ $7, CX
   310      IDIVQ CX
   311      IMULQ $7, AX
   312      DECQ R9
   313  
   314      VMOVDQU64 shift1x64<>(SB), Z2
   315      KXORB K2, K2, K2
   316  testAscending7:
   317      VMOVDQU64 (R8)(SI*8), Z0
   318      VMOVDQU64 Z2, Z1
   319      VPERMI2Q Z0, Z0, Z1
   320      VPCMPUQ $2, Z1, Z0, K1
   321      KORTESTB K2, K1
   322      JNC testDescending7
   323      ADDQ $7, SI
   324      CMPQ SI, AX
   325      JNE testAscending7
   326      VZEROUPPER
   327      JMP testAscending
   328  testDescending7:
   329      VMOVDQU64 (R8)(DI*8), Z0
   330      VMOVDQU64 Z2, Z1
   331      VPERMI2Q Z0, Z0, Z1
   332      VPCMPUQ $5, Z1, Z0, K1
   333      KORTESTB K2, K1
   334      JNC undefined7
   335      ADDQ $7, DI
   336      CMPQ DI, AX
   337      JNE testDescending7
   338      VZEROUPPER
   339      JMP testDescending
   340  
   341  test:
   342      DECQ R9
   343  testAscending:
   344      CMPQ SI, R9
   345      JAE ascending
   346      MOVQ (R8)(SI*8), BX
   347      MOVQ 8(R8)(SI*8), DX
   348      INCQ SI
   349      CMPQ BX, DX
   350      JBE testAscending
   351      JMP testDescending
   352  ascending:
   353      MOVQ $ASCENDING, ret+24(FP)
   354      RET
   355  testDescending:
   356      CMPQ DI, R9
   357      JAE descending
   358      MOVQ (R8)(DI*8), BX
   359      MOVQ 8(R8)(DI*8), DX
   360      INCQ DI
   361      CMPQ BX, DX
   362      JAE testDescending
   363      JMP undefined
   364  descending:
   365      MOVQ $DESCENDING, ret+24(FP)
   366      RET
   367  undefined7:
   368      VZEROUPPER
   369  undefined:
   370      MOVQ $UNDEFINED, ret+24(FP)
   371      RET
   372  
   373  // func orderOfFloat32(data []float32) int
   374  TEXT ·orderOfFloat32(SB), NOSPLIT, $-32
   375      MOVQ data_base+0(FP), R8
   376      MOVQ data_len+8(FP), R9
   377      XORQ SI, SI
   378      XORQ DI, DI
   379  
   380      CMPQ R9, $2
   381      JB undefined
   382  
   383      CMPB ·hasAVX512VL(SB), $0
   384      JE test
   385  
   386      CMPQ R9, $16
   387      JB test
   388  
   389      XORQ DX, DX
   390      MOVQ R9, AX
   391      SHRQ $4, AX
   392      SHLQ $4, AX
   393      MOVQ $15, CX
   394      IDIVQ CX
   395      IMULQ $15, AX
   396      DECQ R9
   397  
   398      VMOVDQU32 shift1x32<>(SB), Z2
   399      KXORW K2, K2, K2
   400  testAscending15:
   401      VMOVDQU32 (R8)(SI*4), Z0
   402      VMOVDQU32 Z2, Z1
   403      VPERMI2D Z0, Z0, Z1
   404      VCMPPS $2, Z1, Z0, K1
   405      KORTESTW K2, K1
   406      JNC testDescending15
   407      ADDQ $15, SI
   408      CMPQ SI, AX
   409      JNE testAscending15
   410      VZEROUPPER
   411      JMP testAscending
   412  testDescending15:
   413      VMOVDQU32 (R8)(DI*4), Z0
   414      VMOVDQU32 Z2, Z1
   415      VPERMI2D Z0, Z0, Z1
   416      VCMPPS $5, Z1, Z0, K1
   417      KORTESTW K2, K1
   418      JNC undefined15
   419      ADDQ $15, DI
   420      CMPQ DI, AX
   421      JNE testDescending15
   422      VZEROUPPER
   423      JMP testDescending
   424  
   425  test:
   426      DECQ R9
   427  testAscending:
   428      CMPQ SI, R9
   429      JAE ascending
   430      MOVLQZX (R8)(SI*4), BX
   431      MOVLQZX 4(R8)(SI*4), DX
   432      INCQ SI
   433      MOVQ BX, X0
   434      MOVQ DX, X1
   435      UCOMISS X1, X0
   436      JBE testAscending
   437      JMP testDescending
   438  ascending:
   439      MOVQ $ASCENDING, ret+24(FP)
   440      RET
   441  testDescending:
   442      CMPQ DI, R9
   443      JAE descending
   444      MOVLQZX (R8)(DI*4), BX
   445      MOVLQZX 4(R8)(DI*4), DX
   446      INCQ DI
   447      MOVQ BX, X0
   448      MOVQ DX, X1
   449      UCOMISS X1, X0
   450      JAE testDescending
   451      JMP undefined
   452  descending:
   453      MOVQ $DESCENDING, ret+24(FP)
   454      RET
   455  undefined15:
   456      VZEROUPPER
   457  undefined:
   458      MOVQ $UNDEFINED, ret+24(FP)
   459      RET
   460  
   461  // func orderOfFloat64(data []uint64) int
   462  TEXT ·orderOfFloat64(SB), NOSPLIT, $-32
   463      MOVQ data_base+0(FP), R8
   464      MOVQ data_len+8(FP), R9
   465      XORQ SI, SI
   466      XORQ DI, DI
   467  
   468      CMPQ R9, $2
   469      JB undefined
   470  
   471      CMPB ·hasAVX512VL(SB), $0
   472      JE test
   473  
   474      CMPQ R9, $8
   475      JB test
   476  
   477      XORQ DX, DX
   478      MOVQ R9, AX
   479      SHRQ $3, AX
   480      SHLQ $3, AX
   481      MOVQ $7, CX
   482      IDIVQ CX
   483      IMULQ $7, AX
   484      DECQ R9
   485  
   486      VMOVDQU64 shift1x64<>(SB), Z2
   487      KXORB K2, K2, K2
   488  testAscending7:
   489      VMOVDQU64 (R8)(SI*8), Z0
   490      VMOVDQU64 Z2, Z1
   491      VPERMI2Q Z0, Z0, Z1
   492      VCMPPD $2, Z1, Z0, K1
   493      KORTESTB K2, K1
   494      JNC testDescending7
   495      ADDQ $7, SI
   496      CMPQ SI, AX
   497      JNE testAscending7
   498      VZEROUPPER
   499      JMP testAscending
   500  testDescending7:
   501      VMOVDQU64 (R8)(DI*8), Z0
   502      VMOVDQU64 Z2, Z1
   503      VPERMI2Q Z0, Z0, Z1
   504      VCMPPD $5, Z1, Z0, K1
   505      KORTESTB K2, K1
   506      JNC undefined7
   507      ADDQ $7, DI
   508      CMPQ DI, AX
   509      JNE testDescending7
   510      VZEROUPPER
   511      JMP testDescending
   512  
   513  test:
   514      DECQ R9
   515  testAscending:
   516      CMPQ SI, R9
   517      JAE ascending
   518      MOVQ (R8)(SI*8), BX
   519      MOVQ 8(R8)(SI*8), DX
   520      INCQ SI
   521      MOVQ BX, X0
   522      MOVQ DX, X1
   523      UCOMISD X1, X0
   524      JBE testAscending
   525      JMP testDescending
   526  ascending:
   527      MOVQ $ASCENDING, ret+24(FP)
   528      RET
   529  testDescending:
   530      CMPQ DI, R9
   531      JAE descending
   532      MOVQ (R8)(DI*8), BX
   533      MOVQ 8(R8)(DI*8), DX
   534      INCQ DI
   535      MOVQ BX, X0
   536      MOVQ DX, X1
   537      UCOMISD X1, X0
   538      JAE testDescending
   539      JMP undefined
   540  descending:
   541      MOVQ $DESCENDING, ret+24(FP)
   542      RET
   543  undefined7:
   544      VZEROUPPER
   545  undefined:
   546      MOVQ $UNDEFINED, ret+24(FP)
   547      RET