gitee.com/quant1x/num@v0.3.2/math32/exp_amd64.s (about)

     1  // Copyright 2014 Xuanyi Chew. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  //
     5  // The original code is lifted from the Go standard library which is governed by
     6  // a BSD-style licence which can be found here: https://golang.org/LICENSE
     7  
     8  #include "textflag.h"
     9  
    10  // The method is based on a paper by Naoki Shibata: "Efficient evaluation
    11  // methods of elementary functions suitable for SIMD computation", Proc.
    12  // of International Supercomputing Conference 2010 (ISC'10), pp. 25 -- 32
    13  // (May 2010). The paper is available at
    14  // http://www.springerlink.com/content/340228x165742104/
    15  //
    16  // The original code and the constants below are from the author's
    17  // implementation available at http://freshmeat.net/projects/sleef.
    18  // The README file says, "The software is in public domain.
    19  // You can use the software without any obligation."
    20  //
    21  // This code is a simplified version of the original.
    22  // The magic numbers for the float32 are lifted from the same project
    23  
    24  	
    25  #define LN2 0.693147182464599609375 // log_e(2)
    26  #define LOG2E 1.44269502162933349609375 // 1/LN2
    27  #define LN2U 0.693145751953125 // upper half LN2
    28  #define LN2L 1.428606765330187045e-06 // lower half LN2
    29  #define T0 1.0
    30  #define T1 0.5
    31  #define T2 0.166665524244308471679688
    32  #define T3 0.0416710823774337768554688
    33  #define T4 0.00836596917361021041870117
    34  #define PosInf 0x7F800000
    35  #define NegInf 0xFF800000
    36  
    37  // func Exp(x float32) float32
    38  TEXT ·Exp(SB),NOSPLIT,$0
    39  // test bits for not-finite
    40  	MOVL    x+0(FP), BX
    41  	MOVQ    $~(1<<31), AX // sign bit mask
    42  	MOVL    BX, DX
    43  	ANDL    AX, DX
    44  	MOVL    $PosInf, AX
    45  	CMPL    AX, DX
    46  	JLE     notFinite
    47  	MOVL    BX, X0
    48  	MOVSS   $LOG2E, X1
    49  	MULSS   X0, X1
    50  	CVTSS2SL X1, BX // BX = exponent
    51  	CVTSL2SS BX, X1
    52  	MOVSS   $LN2U, X2
    53  	MULSS   X1, X2
    54  	SUBSS   X2, X0
    55  	MOVSS   $LN2L, X2
    56  	MULSS   X1, X2
    57  	SUBSS   X2, X0
    58  	// reduce argument
    59  	MULSS   $0.0625, X0
    60  	// Taylor series evaluation
    61  	ADDSS   $T4, X1
    62  	MULSS   X0, X1
    63  	ADDSS   $T3, X1
    64  	MULSS   X0, X1
    65  	ADDSS   $T2, X1
    66  	MULSS   X0, X1
    67  	ADDSS   $T1, X1
    68  	MULSS   X0, X1
    69  	ADDSS   $T0, X1
    70  	MULSS   X1, X0
    71  	MOVSS   $2.0, X1
    72  	ADDSS   X0, X1
    73  	MULSS   X1, X0
    74  	MOVSS   $2.0, X1
    75  	ADDSS   X0, X1
    76  	MULSS   X1, X0
    77  	MOVSS   $2.0, X1
    78  	ADDSS   X0, X1
    79  	MULSS   X1, X0
    80  	MOVSS   $2.0, X1
    81  	ADDSS   X0, X1
    82  	MULSS   X1, X0
    83  	ADDSS   $1.0, X0
    84  	// return fr * 2**exponent
    85  	MOVL    $0x7F, AX // bias
    86  	ADDL    AX, BX
    87  	JLE     underflow
    88  	CMPL    BX, $0xFF
    89  	JGE     overflow
    90  	MOVL    $23, CX
    91  	SHLQ    CX, BX
    92  	MOVL    BX, X1
    93  	MULSS   X1, X0
    94  	MOVSS   X0, ret+8(FP)
    95  	RET
    96  notFinite:
    97  	// test bits for -Inf
    98  	MOVL    $NegInf, AX
    99  	CMPQ    AX, BX
   100  	JNE     notNegInf
   101  	// -Inf, return 0
   102  underflow: // return 0
   103  	MOVL    $0, AX
   104  	MOVL    AX, ret+8(FP)
   105  	RET
   106  overflow: // return +Inf
   107  	MOVL    $PosInf, BX
   108  notNegInf: // NaN or +Inf, return x
   109  	MOVL    BX, ret+8(FP)
   110  	RET
   111  
   112  TEXT ·Exp2(SB),NOSPLIT,$0
   113  	JMP  ·exp2(SB)