github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86/pentium/popcount.asm (about)

     1  dnl  Intel P5 mpn_popcount -- mpn bit population count.
     2  
     3  dnl  Copyright 2001, 2002, 2014, 2015 Free Software Foundation, Inc.
     4  
     5  dnl  This file is part of the GNU MP Library.
     6  dnl
     7  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     8  dnl  it under the terms of either:
     9  dnl
    10  dnl    * the GNU Lesser General Public License as published by the Free
    11  dnl      Software Foundation; either version 3 of the License, or (at your
    12  dnl      option) any later version.
    13  dnl
    14  dnl  or
    15  dnl
    16  dnl    * the GNU General Public License as published by the Free Software
    17  dnl      Foundation; either version 2 of the License, or (at your option) any
    18  dnl      later version.
    19  dnl
    20  dnl  or both in parallel, as here.
    21  dnl
    22  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    23  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    24  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    25  dnl  for more details.
    26  dnl
    27  dnl  You should have received copies of the GNU General Public License and the
    28  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    29  dnl  see https://www.gnu.org/licenses/.
    30  
    31  include(`../config.m4')
    32  
    33  
    34  C P5: 8.0 cycles/limb
    35  
    36  
    37  C unsigned long mpn_popcount (mp_srcptr src, mp_size_t size);
    38  C
    39  C An arithmetic approach has been found to be slower than the table lookup,
    40  C due to needing too many instructions.
    41  
    42  C The slightly strange quoting here helps the renaming done by tune/many.pl.
    43  deflit(TABLE_NAME,
    44  m4_assert_defined(`GSYM_PREFIX')
    45  GSYM_PREFIX`'mpn_popcount``'_table')
    46  
    47  C FIXME: exporting the table to hamdist is incorrect as it hurt incremental
    48  C linking.
    49  
    50  	RODATA
    51  	ALIGN(8)
    52  	GLOBL	TABLE_NAME
    53  TABLE_NAME:
    54  forloop(i,0,255,
    55  `	.byte	m4_popcount(i)
    56  ')
    57  
    58  defframe(PARAM_SIZE,8)
    59  defframe(PARAM_SRC, 4)
    60  
    61  	TEXT
    62  	ALIGN(8)
    63  
    64  PROLOGUE(mpn_popcount)
    65  deflit(`FRAME',0)
    66  
    67  	movl	PARAM_SIZE, %ecx
    68  	pushl	%esi	FRAME_pushl()
    69  
    70  ifdef(`PIC',`
    71  	pushl	%ebx	FRAME_pushl()
    72  	pushl	%ebp	FRAME_pushl()
    73  ifdef(`DARWIN',`
    74  	shll	%ecx		C size in byte pairs
    75  	LEA(	TABLE_NAME, %ebp)
    76  	movl	PARAM_SRC, %esi
    77  	xorl	%eax, %eax	C total
    78  	xorl	%ebx, %ebx	C byte
    79  	xorl	%edx, %edx	C byte
    80  ',`
    81  	call	L(here)
    82  L(here):
    83  	popl	%ebp
    84  	shll	%ecx		C size in byte pairs
    85  
    86  	addl	$_GLOBAL_OFFSET_TABLE_+[.-L(here)], %ebp
    87  	movl	PARAM_SRC, %esi
    88  
    89  	xorl	%eax, %eax	C total
    90  	xorl	%ebx, %ebx	C byte
    91  
    92  	movl	TABLE_NAME@GOT(%ebp), %ebp
    93  	xorl	%edx, %edx	C byte
    94  ')
    95  define(TABLE,`(%ebp,$1)')
    96  ',`
    97  dnl non-PIC
    98  	shll	%ecx		C size in byte pairs
    99  	movl	PARAM_SRC, %esi
   100  
   101  	pushl	%ebx	FRAME_pushl()
   102  	xorl	%eax, %eax	C total
   103  
   104  	xorl	%ebx, %ebx	C byte
   105  	xorl	%edx, %edx	C byte
   106  
   107  define(TABLE,`TABLE_NAME`'($1)')
   108  ')
   109  
   110  
   111  	ALIGN(8)	C necessary on P55 for claimed speed
   112  L(top):
   113  	C eax	total
   114  	C ebx	byte
   115  	C ecx	counter, 2*size to 2
   116  	C edx	byte
   117  	C esi	src
   118  	C edi
   119  	C ebp	[PIC] table
   120  
   121  	addl	%ebx, %eax
   122  	movb	-1(%esi,%ecx,2), %bl
   123  
   124  	addl	%edx, %eax
   125  	movb	-2(%esi,%ecx,2), %dl
   126  
   127  	movb	TABLE(%ebx), %bl
   128  	decl	%ecx
   129  
   130  	movb	TABLE(%edx), %dl
   131  	jnz	L(top)
   132  
   133  
   134  ifdef(`PIC',`
   135  	popl	%ebp
   136  ')
   137  	addl	%ebx, %eax
   138  	popl	%ebx
   139  
   140  	addl	%edx, %eax
   141  	popl	%esi
   142  
   143  	ret
   144  
   145  EPILOGUE()
   146  ASM_END()