github.com/MangoDowner/go-gm@v0.0.0-20180818020936-8baa2bd4408c/src/math/cbrt_s390x.s (about) 1 // Copyright 2017 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "textflag.h" 6 7 // Minimax polynomial coefficients and other constants 8 DATA ·cbrtrodataL9<> + 0(SB)/8, $-.00016272731015974436E+00 9 DATA ·cbrtrodataL9<> + 8(SB)/8, $0.66639548758285293179E+00 10 DATA ·cbrtrodataL9<> + 16(SB)/8, $0.55519402697349815993E+00 11 DATA ·cbrtrodataL9<> + 24(SB)/8, $0.49338566048766782004E+00 12 DATA ·cbrtrodataL9<> + 32(SB)/8, $0.45208160036325611486E+00 13 DATA ·cbrtrodataL9<> + 40(SB)/8, $0.43099892837778637816E+00 14 DATA ·cbrtrodataL9<> + 48(SB)/8, $1.000244140625 15 DATA ·cbrtrodataL9<> + 56(SB)/8, $0.33333333333333333333E+00 16 DATA ·cbrtrodataL9<> + 64(SB)/8, $79228162514264337593543950336. 17 GLOBL ·cbrtrodataL9<> + 0(SB), RODATA, $72 18 19 // Index tables 20 DATA ·cbrttab32069<> + 0(SB)/8, $0x404030303020202 21 DATA ·cbrttab32069<> + 8(SB)/8, $0x101010101000000 22 DATA ·cbrttab32069<> + 16(SB)/8, $0x808070706060605 23 DATA ·cbrttab32069<> + 24(SB)/8, $0x505040404040303 24 DATA ·cbrttab32069<> + 32(SB)/8, $0xe0d0c0c0b0b0b0a 25 DATA ·cbrttab32069<> + 40(SB)/8, $0xa09090908080808 26 DATA ·cbrttab32069<> + 48(SB)/8, $0x11111010100f0f0f 27 DATA ·cbrttab32069<> + 56(SB)/8, $0xe0e0e0e0e0d0d0d 28 DATA ·cbrttab32069<> + 64(SB)/8, $0x1515141413131312 29 DATA ·cbrttab32069<> + 72(SB)/8, $0x1212111111111010 30 GLOBL ·cbrttab32069<> + 0(SB), RODATA, $80 31 32 DATA ·cbrttab22068<> + 0(SB)/8, $0x151015001420141 33 DATA ·cbrttab22068<> + 8(SB)/8, $0x140013201310130 34 DATA ·cbrttab22068<> + 16(SB)/8, $0x122012101200112 35 DATA ·cbrttab22068<> + 24(SB)/8, $0x111011001020101 36 DATA ·cbrttab22068<> + 32(SB)/8, $0x10000f200f100f0 37 DATA ·cbrttab22068<> + 40(SB)/8, $0xe200e100e000d2 38 DATA ·cbrttab22068<> + 48(SB)/8, $0xd100d000c200c1 39 DATA ·cbrttab22068<> + 56(SB)/8, $0xc000b200b100b0 40 DATA ·cbrttab22068<> + 64(SB)/8, $0xa200a100a00092 41 DATA ·cbrttab22068<> + 72(SB)/8, $0x91009000820081 42 DATA ·cbrttab22068<> + 80(SB)/8, $0x80007200710070 43 DATA ·cbrttab22068<> + 88(SB)/8, $0x62006100600052 44 DATA ·cbrttab22068<> + 96(SB)/8, $0x51005000420041 45 DATA ·cbrttab22068<> + 104(SB)/8, $0x40003200310030 46 DATA ·cbrttab22068<> + 112(SB)/8, $0x22002100200012 47 DATA ·cbrttab22068<> + 120(SB)/8, $0x11001000020001 48 GLOBL ·cbrttab22068<> + 0(SB), RODATA, $128 49 50 DATA ·cbrttab12067<> + 0(SB)/8, $0x53e1529051324fe1 51 DATA ·cbrttab12067<> + 8(SB)/8, $0x4e904d324be14a90 52 DATA ·cbrttab12067<> + 16(SB)/8, $0x493247e146904532 53 DATA ·cbrttab12067<> + 24(SB)/8, $0x43e1429041323fe1 54 DATA ·cbrttab12067<> + 32(SB)/8, $0x3e903d323be13a90 55 DATA ·cbrttab12067<> + 40(SB)/8, $0x393237e136903532 56 DATA ·cbrttab12067<> + 48(SB)/8, $0x33e1329031322fe1 57 DATA ·cbrttab12067<> + 56(SB)/8, $0x2e902d322be12a90 58 DATA ·cbrttab12067<> + 64(SB)/8, $0xd3e1d290d132cfe1 59 DATA ·cbrttab12067<> + 72(SB)/8, $0xce90cd32cbe1ca90 60 DATA ·cbrttab12067<> + 80(SB)/8, $0xc932c7e1c690c532 61 DATA ·cbrttab12067<> + 88(SB)/8, $0xc3e1c290c132bfe1 62 DATA ·cbrttab12067<> + 96(SB)/8, $0xbe90bd32bbe1ba90 63 DATA ·cbrttab12067<> + 104(SB)/8, $0xb932b7e1b690b532 64 DATA ·cbrttab12067<> + 112(SB)/8, $0xb3e1b290b132afe1 65 DATA ·cbrttab12067<> + 120(SB)/8, $0xae90ad32abe1aa90 66 GLOBL ·cbrttab12067<> + 0(SB), RODATA, $128 67 68 // Cbrt returns the cube root of the argument. 69 // 70 // Special cases are: 71 // Cbrt(±0) = ±0 72 // Cbrt(±Inf) = ±Inf 73 // Cbrt(NaN) = NaN 74 // The algorithm used is minimax polynomial approximation 75 // with coefficients determined with a Remez exchange algorithm. 76 77 TEXT ·cbrtAsm(SB), NOSPLIT, $0-16 78 FMOVD x+0(FP), F0 79 MOVD $·cbrtrodataL9<>+0(SB), R9 80 WORD $0xB3CD0020 //lgdr %r2, %f0 81 WORD $0xC039000F //iilf %r3,1048575 82 BYTE $0xFF 83 BYTE $0xFF 84 SRAD $32, R2 85 WORD $0xB9170012 //llgtr %r1,%r2 86 MOVW R1, R6 87 MOVW R3, R7 88 CMPBLE R6, R7, L2 89 WORD $0xC0397FEF //iilf %r3,2146435071 90 BYTE $0xFF 91 BYTE $0xFF 92 MOVW R3, R7 93 CMPBLE R6, R7, L8 94 L1: 95 FMOVD F0, ret+8(FP) 96 RET 97 L3: 98 L2: 99 WORD $0xB3120000 //ltdbr %f0,%f0 100 BEQ L1 101 FMOVD F0, F2 102 WORD $0xED209040 //mdb %f2,.L10-.L9(%r9) 103 BYTE $0x00 104 BYTE $0x1C 105 MOVH $0x200, R4 106 WORD $0xB3CD0022 //lgdr %r2, %f2 107 SRAD $32, R2 108 L4: 109 WORD $0xEC3239BE //risbg %r3,%r2,57,128+62,64-25 110 BYTE $0x27 111 BYTE $0x55 112 MOVD $·cbrttab12067<>+0(SB), R1 113 WORD $0x48131000 //lh %r1,0(%r3,%r1) 114 WORD $0xEC3239BE //risbg %r3,%r2,57,128+62,64-19 115 BYTE $0x2D 116 BYTE $0x55 117 MOVD $·cbrttab22068<>+0(SB), R5 118 WORD $0xEC223CBF //risbgn %r2,%r2,64-4,128+63,64+44+4 119 BYTE $0x70 120 BYTE $0x59 121 WORD $0x4A135000 //ah %r1,0(%r3,%r5) 122 BYTE $0x18 //lr %r3,%r1 123 BYTE $0x31 124 MOVD $·cbrttab32069<>+0(SB), R1 125 FMOVD 56(R9), F1 126 FMOVD 48(R9), F5 127 WORD $0xEC23393B //rosbg %r2,%r3,57,59,4 128 BYTE $0x04 129 BYTE $0x56 130 WORD $0xE3121000 //llc %r1,0(%r2,%r1) 131 BYTE $0x00 132 BYTE $0x94 133 ADDW R3, R1 134 ADDW R4, R1 135 SLW $16, R1, R1 136 SLD $32, R1, R1 137 WORD $0xB3C10021 //ldgr %f2,%r1 138 WFMDB V2, V2, V4 139 WFMDB V4, V0, V6 140 WFMSDB V4, V6, V2, V4 141 FMOVD 40(R9), F6 142 FMSUB F1, F4, F2 143 FMOVD 32(R9), F4 144 WFMDB V2, V2, V3 145 FMOVD 24(R9), F1 146 FMUL F3, F0 147 FMOVD 16(R9), F3 148 WFMADB V2, V0, V5, V2 149 FMOVD 8(R9), F5 150 FMADD F6, F2, F4 151 WFMADB V2, V1, V3, V1 152 WFMDB V2, V2, V6 153 FMOVD 0(R9), F3 154 WFMADB V4, V6, V1, V4 155 WFMADB V2, V5, V3, V2 156 FMADD F4, F6, F2 157 FMADD F2, F0, F0 158 FMOVD F0, ret+8(FP) 159 RET 160 L8: 161 MOVH $0x0, R4 162 BR L4