Linux-2.6.12-rc2
[linux-flexiantxendom0-natty.git] / arch / m68k / fpsp040 / stwotox.S
1 |
2 |       stwotox.sa 3.1 12/10/90
3 |
4 |       stwotox  --- 2**X
5 |       stwotoxd --- 2**X for denormalized X
6 |       stentox  --- 10**X
7 |       stentoxd --- 10**X for denormalized X
8 |
9 |       Input: Double-extended number X in location pointed to
10 |               by address register a0.
11 |
12 |       Output: The function values are returned in Fp0.
13 |
14 |       Accuracy and Monotonicity: The returned result is within 2 ulps in
15 |               64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
16 |               result is subsequently rounded to double precision. The
17 |               result is provably monotonic in double precision.
18 |
19 |       Speed: The program stwotox takes approximately 190 cycles and the
20 |               program stentox takes approximately 200 cycles.
21 |
22 |       Algorithm:
23 |
24 |       twotox
25 |       1. If |X| > 16480, go to ExpBig.
26 |
27 |       2. If |X| < 2**(-70), go to ExpSm.
28 |
29 |       3. Decompose X as X = N/64 + r where |r| <= 1/128. Furthermore
30 |               decompose N as
31 |                N = 64(M + M') + j,  j = 0,1,2,...,63.
32 |
33 |       4. Overwrite r := r * log2. Then
34 |               2**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).
35 |               Go to expr to compute that expression.
36 |
37 |       tentox
38 |       1. If |X| > 16480*log_10(2) (base 10 log of 2), go to ExpBig.
39 |
40 |       2. If |X| < 2**(-70), go to ExpSm.
41 |
42 |       3. Set y := X*log_2(10)*64 (base 2 log of 10). Set
43 |               N := round-to-int(y). Decompose N as
44 |                N = 64(M + M') + j,  j = 0,1,2,...,63.
45 |
46 |       4. Define r as
47 |               r := ((X - N*L1)-N*L2) * L10
48 |               where L1, L2 are the leading and trailing parts of log_10(2)/64
49 |               and L10 is the natural log of 10. Then
50 |               10**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).
51 |               Go to expr to compute that expression.
52 |
53 |       expr
54 |       1. Fetch 2**(j/64) from table as Fact1 and Fact2.
55 |
56 |       2. Overwrite Fact1 and Fact2 by
57 |               Fact1 := 2**(M) * Fact1
58 |               Fact2 := 2**(M) * Fact2
59 |               Thus Fact1 + Fact2 = 2**(M) * 2**(j/64).
60 |
61 |       3. Calculate P where 1 + P approximates exp(r):
62 |               P = r + r*r*(A1+r*(A2+...+r*A5)).
63 |
64 |       4. Let AdjFact := 2**(M'). Return
65 |               AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ).
66 |               Exit.
67 |
68 |       ExpBig
69 |       1. Generate overflow by Huge * Huge if X > 0; otherwise, generate
70 |               underflow by Tiny * Tiny.
71 |
72 |       ExpSm
73 |       1. Return 1 + X.
74 |
75
76 |               Copyright (C) Motorola, Inc. 1990
77 |                       All Rights Reserved
78 |
79 |       THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
80 |       The copyright notice above does not evidence any
81 |       actual or intended publication of such source code.
82
83 |STWOTOX        idnt    2,1 | Motorola 040 Floating Point Software Package
84
85         |section        8
86
87 #include "fpsp.h"
88
89 BOUNDS1:        .long 0x3FB98000,0x400D80C0 | ... 2^(-70),16480
90 BOUNDS2:        .long 0x3FB98000,0x400B9B07 | ... 2^(-70),16480 LOG2/LOG10
91
92 L2TEN64:        .long 0x406A934F,0x0979A371 | ... 64LOG10/LOG2
93 L10TWO1:        .long 0x3F734413,0x509F8000 | ... LOG2/64LOG10
94
95 L10TWO2:        .long 0xBFCD0000,0xC0219DC1,0xDA994FD2,0x00000000
96
97 LOG10:  .long 0x40000000,0x935D8DDD,0xAAA8AC17,0x00000000
98
99 LOG2:   .long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
100
101 EXPA5:  .long 0x3F56C16D,0x6F7BD0B2
102 EXPA4:  .long 0x3F811112,0x302C712C
103 EXPA3:  .long 0x3FA55555,0x55554CC1
104 EXPA2:  .long 0x3FC55555,0x55554A54
105 EXPA1:  .long 0x3FE00000,0x00000000,0x00000000,0x00000000
106
107 HUGE:   .long 0x7FFE0000,0xFFFFFFFF,0xFFFFFFFF,0x00000000
108 TINY:   .long 0x00010000,0xFFFFFFFF,0xFFFFFFFF,0x00000000
109
110 EXPTBL:
111         .long  0x3FFF0000,0x80000000,0x00000000,0x3F738000
112         .long  0x3FFF0000,0x8164D1F3,0xBC030773,0x3FBEF7CA
113         .long  0x3FFF0000,0x82CD8698,0xAC2BA1D7,0x3FBDF8A9
114         .long  0x3FFF0000,0x843A28C3,0xACDE4046,0x3FBCD7C9
115         .long  0x3FFF0000,0x85AAC367,0xCC487B15,0xBFBDE8DA
116         .long  0x3FFF0000,0x871F6196,0x9E8D1010,0x3FBDE85C
117         .long  0x3FFF0000,0x88980E80,0x92DA8527,0x3FBEBBF1
118         .long  0x3FFF0000,0x8A14D575,0x496EFD9A,0x3FBB80CA
119         .long  0x3FFF0000,0x8B95C1E3,0xEA8BD6E7,0xBFBA8373
120         .long  0x3FFF0000,0x8D1ADF5B,0x7E5BA9E6,0xBFBE9670
121         .long  0x3FFF0000,0x8EA4398B,0x45CD53C0,0x3FBDB700
122         .long  0x3FFF0000,0x9031DC43,0x1466B1DC,0x3FBEEEB0
123         .long  0x3FFF0000,0x91C3D373,0xAB11C336,0x3FBBFD6D
124         .long  0x3FFF0000,0x935A2B2F,0x13E6E92C,0xBFBDB319
125         .long  0x3FFF0000,0x94F4EFA8,0xFEF70961,0x3FBDBA2B
126         .long  0x3FFF0000,0x96942D37,0x20185A00,0x3FBE91D5
127         .long  0x3FFF0000,0x9837F051,0x8DB8A96F,0x3FBE8D5A
128         .long  0x3FFF0000,0x99E04593,0x20B7FA65,0xBFBCDE7B
129         .long  0x3FFF0000,0x9B8D39B9,0xD54E5539,0xBFBEBAAF
130         .long  0x3FFF0000,0x9D3ED9A7,0x2CFFB751,0xBFBD86DA
131         .long  0x3FFF0000,0x9EF53260,0x91A111AE,0xBFBEBEDD
132         .long  0x3FFF0000,0xA0B0510F,0xB9714FC2,0x3FBCC96E
133         .long  0x3FFF0000,0xA2704303,0x0C496819,0xBFBEC90B
134         .long  0x3FFF0000,0xA43515AE,0x09E6809E,0x3FBBD1DB
135         .long  0x3FFF0000,0xA5FED6A9,0xB15138EA,0x3FBCE5EB
136         .long  0x3FFF0000,0xA7CD93B4,0xE965356A,0xBFBEC274
137         .long  0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x3FBEA83C
138         .long  0x3FFF0000,0xAB7A39B5,0xA93ED337,0x3FBECB00
139         .long  0x3FFF0000,0xAD583EEA,0x42A14AC6,0x3FBE9301
140         .long  0x3FFF0000,0xAF3B78AD,0x690A4375,0xBFBD8367
141         .long  0x3FFF0000,0xB123F581,0xD2AC2590,0xBFBEF05F
142         .long  0x3FFF0000,0xB311C412,0xA9112489,0x3FBDFB3C
143         .long  0x3FFF0000,0xB504F333,0xF9DE6484,0x3FBEB2FB
144         .long  0x3FFF0000,0xB6FD91E3,0x28D17791,0x3FBAE2CB
145         .long  0x3FFF0000,0xB8FBAF47,0x62FB9EE9,0x3FBCDC3C
146         .long  0x3FFF0000,0xBAFF5AB2,0x133E45FB,0x3FBEE9AA
147         .long  0x3FFF0000,0xBD08A39F,0x580C36BF,0xBFBEAEFD
148         .long  0x3FFF0000,0xBF1799B6,0x7A731083,0xBFBCBF51
149         .long  0x3FFF0000,0xC12C4CCA,0x66709456,0x3FBEF88A
150         .long  0x3FFF0000,0xC346CCDA,0x24976407,0x3FBD83B2
151         .long  0x3FFF0000,0xC5672A11,0x5506DADD,0x3FBDF8AB
152         .long  0x3FFF0000,0xC78D74C8,0xABB9B15D,0xBFBDFB17
153         .long  0x3FFF0000,0xC9B9BD86,0x6E2F27A3,0xBFBEFE3C
154         .long  0x3FFF0000,0xCBEC14FE,0xF2727C5D,0xBFBBB6F8
155         .long  0x3FFF0000,0xCE248C15,0x1F8480E4,0xBFBCEE53
156         .long  0x3FFF0000,0xD06333DA,0xEF2B2595,0xBFBDA4AE
157         .long  0x3FFF0000,0xD2A81D91,0xF12AE45A,0x3FBC9124
158         .long  0x3FFF0000,0xD4F35AAB,0xCFEDFA1F,0x3FBEB243
159         .long  0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x3FBDE69A
160         .long  0x3FFF0000,0xD99D15C2,0x78AFD7B6,0xBFB8BC61
161         .long  0x3FFF0000,0xDBFBB797,0xDAF23755,0x3FBDF610
162         .long  0x3FFF0000,0xDE60F482,0x5E0E9124,0xBFBD8BE1
163         .long  0x3FFF0000,0xE0CCDEEC,0x2A94E111,0x3FBACB12
164         .long  0x3FFF0000,0xE33F8972,0xBE8A5A51,0x3FBB9BFE
165         .long  0x3FFF0000,0xE5B906E7,0x7C8348A8,0x3FBCF2F4
166         .long  0x3FFF0000,0xE8396A50,0x3C4BDC68,0x3FBEF22F
167         .long  0x3FFF0000,0xEAC0C6E7,0xDD24392F,0xBFBDBF4A
168         .long  0x3FFF0000,0xED4F301E,0xD9942B84,0x3FBEC01A
169         .long  0x3FFF0000,0xEFE4B99B,0xDCDAF5CB,0x3FBE8CAC
170         .long  0x3FFF0000,0xF281773C,0x59FFB13A,0xBFBCBB3F
171         .long  0x3FFF0000,0xF5257D15,0x2486CC2C,0x3FBEF73A
172         .long  0x3FFF0000,0xF7D0DF73,0x0AD13BB9,0xBFB8B795
173         .long  0x3FFF0000,0xFA83B2DB,0x722A033A,0x3FBEF84B
174         .long  0x3FFF0000,0xFD3E0C0C,0xF486C175,0xBFBEF581
175
176         .set    N,L_SCR1
177
178         .set    X,FP_SCR1
179         .set    XDCARE,X+2
180         .set    XFRAC,X+4
181
182         .set    ADJFACT,FP_SCR2
183
184         .set    FACT1,FP_SCR3
185         .set    FACT1HI,FACT1+4
186         .set    FACT1LOW,FACT1+8
187
188         .set    FACT2,FP_SCR4
189         .set    FACT2HI,FACT2+4
190         .set    FACT2LOW,FACT2+8
191
192         | xref  t_unfl
193         |xref   t_ovfl
194         |xref   t_frcinx
195
196         .global stwotoxd
197 stwotoxd:
198 |--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT
199
200         fmovel          %d1,%fpcr               | ...set user's rounding mode/precision
201         fmoves          #0x3F800000,%fp0  | ...RETURN 1 + X
202         movel           (%a0),%d0
203         orl             #0x00800001,%d0
204         fadds           %d0,%fp0
205         bra             t_frcinx
206
207         .global stwotox
208 stwotox:
209 |--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
210         fmovemx (%a0),%fp0-%fp0 | ...LOAD INPUT, do not set cc's
211
212         movel           (%a0),%d0
213         movew           4(%a0),%d0
214         fmovex          %fp0,X(%a6)
215         andil           #0x7FFFFFFF,%d0
216
217         cmpil           #0x3FB98000,%d0         | ...|X| >= 2**(-70)?
218         bges            TWOOK1
219         bra             EXPBORS
220
221 TWOOK1:
222         cmpil           #0x400D80C0,%d0         | ...|X| > 16480?
223         bles            TWOMAIN
224         bra             EXPBORS
225
226
227 TWOMAIN:
228 |--USUAL CASE, 2^(-70) <= |X| <= 16480
229
230         fmovex          %fp0,%fp1
231         fmuls           #0x42800000,%fp1  | ...64 * X
232
233         fmovel          %fp1,N(%a6)             | ...N = ROUND-TO-INT(64 X)
234         movel           %d2,-(%sp)
235         lea             EXPTBL,%a1      | ...LOAD ADDRESS OF TABLE OF 2^(J/64)
236         fmovel          N(%a6),%fp1             | ...N --> FLOATING FMT
237         movel           N(%a6),%d0
238         movel           %d0,%d2
239         andil           #0x3F,%d0               | ...D0 IS J
240         asll            #4,%d0          | ...DISPLACEMENT FOR 2^(J/64)
241         addal           %d0,%a1         | ...ADDRESS FOR 2^(J/64)
242         asrl            #6,%d2          | ...d2 IS L, N = 64L + J
243         movel           %d2,%d0
244         asrl            #1,%d0          | ...D0 IS M
245         subl            %d0,%d2         | ...d2 IS M', N = 64(M+M') + J
246         addil           #0x3FFF,%d2
247         movew           %d2,ADJFACT(%a6)        | ...ADJFACT IS 2^(M')
248         movel           (%sp)+,%d2
249 |--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
250 |--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
251 |--ADJFACT = 2^(M').
252 |--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
253
254         fmuls           #0x3C800000,%fp1  | ...(1/64)*N
255         movel           (%a1)+,FACT1(%a6)
256         movel           (%a1)+,FACT1HI(%a6)
257         movel           (%a1)+,FACT1LOW(%a6)
258         movew           (%a1)+,FACT2(%a6)
259         clrw            FACT2+2(%a6)
260
261         fsubx           %fp1,%fp0               | ...X - (1/64)*INT(64 X)
262
263         movew           (%a1)+,FACT2HI(%a6)
264         clrw            FACT2HI+2(%a6)
265         clrl            FACT2LOW(%a6)
266         addw            %d0,FACT1(%a6)
267
268         fmulx           LOG2,%fp0       | ...FP0 IS R
269         addw            %d0,FACT2(%a6)
270
271         bra             expr
272
273 EXPBORS:
274 |--FPCR, D0 SAVED
275         cmpil           #0x3FFF8000,%d0
276         bgts            EXPBIG
277
278 EXPSM:
279 |--|X| IS SMALL, RETURN 1 + X
280
281         fmovel          %d1,%FPCR               |restore users exceptions
282         fadds           #0x3F800000,%fp0  | ...RETURN 1 + X
283
284         bra             t_frcinx
285
286 EXPBIG:
287 |--|X| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW
288 |--REGISTERS SAVE SO FAR ARE FPCR AND  D0
289         movel           X(%a6),%d0
290         cmpil           #0,%d0
291         blts            EXPNEG
292
293         bclrb           #7,(%a0)                |t_ovfl expects positive value
294         bra             t_ovfl
295
296 EXPNEG:
297         bclrb           #7,(%a0)                |t_unfl expects positive value
298         bra             t_unfl
299
300         .global stentoxd
301 stentoxd:
302 |--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT
303
304         fmovel          %d1,%fpcr               | ...set user's rounding mode/precision
305         fmoves          #0x3F800000,%fp0  | ...RETURN 1 + X
306         movel           (%a0),%d0
307         orl             #0x00800001,%d0
308         fadds           %d0,%fp0
309         bra             t_frcinx
310
311         .global stentox
312 stentox:
313 |--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
314         fmovemx (%a0),%fp0-%fp0 | ...LOAD INPUT, do not set cc's
315
316         movel           (%a0),%d0
317         movew           4(%a0),%d0
318         fmovex          %fp0,X(%a6)
319         andil           #0x7FFFFFFF,%d0
320
321         cmpil           #0x3FB98000,%d0         | ...|X| >= 2**(-70)?
322         bges            TENOK1
323         bra             EXPBORS
324
325 TENOK1:
326         cmpil           #0x400B9B07,%d0         | ...|X| <= 16480*log2/log10 ?
327         bles            TENMAIN
328         bra             EXPBORS
329
330 TENMAIN:
331 |--USUAL CASE, 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10
332
333         fmovex          %fp0,%fp1
334         fmuld           L2TEN64,%fp1    | ...X*64*LOG10/LOG2
335
336         fmovel          %fp1,N(%a6)             | ...N=INT(X*64*LOG10/LOG2)
337         movel           %d2,-(%sp)
338         lea             EXPTBL,%a1      | ...LOAD ADDRESS OF TABLE OF 2^(J/64)
339         fmovel          N(%a6),%fp1             | ...N --> FLOATING FMT
340         movel           N(%a6),%d0
341         movel           %d0,%d2
342         andil           #0x3F,%d0               | ...D0 IS J
343         asll            #4,%d0          | ...DISPLACEMENT FOR 2^(J/64)
344         addal           %d0,%a1         | ...ADDRESS FOR 2^(J/64)
345         asrl            #6,%d2          | ...d2 IS L, N = 64L + J
346         movel           %d2,%d0
347         asrl            #1,%d0          | ...D0 IS M
348         subl            %d0,%d2         | ...d2 IS M', N = 64(M+M') + J
349         addil           #0x3FFF,%d2
350         movew           %d2,ADJFACT(%a6)        | ...ADJFACT IS 2^(M')
351         movel           (%sp)+,%d2
352
353 |--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
354 |--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
355 |--ADJFACT = 2^(M').
356 |--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
357
358         fmovex          %fp1,%fp2
359
360         fmuld           L10TWO1,%fp1    | ...N*(LOG2/64LOG10)_LEAD
361         movel           (%a1)+,FACT1(%a6)
362
363         fmulx           L10TWO2,%fp2    | ...N*(LOG2/64LOG10)_TRAIL
364
365         movel           (%a1)+,FACT1HI(%a6)
366         movel           (%a1)+,FACT1LOW(%a6)
367         fsubx           %fp1,%fp0               | ...X - N L_LEAD
368         movew           (%a1)+,FACT2(%a6)
369
370         fsubx           %fp2,%fp0               | ...X - N L_TRAIL
371
372         clrw            FACT2+2(%a6)
373         movew           (%a1)+,FACT2HI(%a6)
374         clrw            FACT2HI+2(%a6)
375         clrl            FACT2LOW(%a6)
376
377         fmulx           LOG10,%fp0      | ...FP0 IS R
378
379         addw            %d0,FACT1(%a6)
380         addw            %d0,FACT2(%a6)
381
382 expr:
383 |--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN.
384 |--ADJFACT CONTAINS 2**(M'), FACT1 + FACT2 = 2**(M) * 2**(J/64).
385 |--FP0 IS R. THE FOLLOWING CODE COMPUTES
386 |--     2**(M'+M) * 2**(J/64) * EXP(R)
387
388         fmovex          %fp0,%fp1
389         fmulx           %fp1,%fp1               | ...FP1 IS S = R*R
390
391         fmoved          EXPA5,%fp2      | ...FP2 IS A5
392         fmoved          EXPA4,%fp3      | ...FP3 IS A4
393
394         fmulx           %fp1,%fp2               | ...FP2 IS S*A5
395         fmulx           %fp1,%fp3               | ...FP3 IS S*A4
396
397         faddd           EXPA3,%fp2      | ...FP2 IS A3+S*A5
398         faddd           EXPA2,%fp3      | ...FP3 IS A2+S*A4
399
400         fmulx           %fp1,%fp2               | ...FP2 IS S*(A3+S*A5)
401         fmulx           %fp1,%fp3               | ...FP3 IS S*(A2+S*A4)
402
403         faddd           EXPA1,%fp2      | ...FP2 IS A1+S*(A3+S*A5)
404         fmulx           %fp0,%fp3               | ...FP3 IS R*S*(A2+S*A4)
405
406         fmulx           %fp1,%fp2               | ...FP2 IS S*(A1+S*(A3+S*A5))
407         faddx           %fp3,%fp0               | ...FP0 IS R+R*S*(A2+S*A4)
408
409         faddx           %fp2,%fp0               | ...FP0 IS EXP(R) - 1
410
411
412 |--FINAL RECONSTRUCTION PROCESS
413 |--EXP(X) = 2^M*2^(J/64) + 2^M*2^(J/64)*(EXP(R)-1)  -  (1 OR 0)
414
415         fmulx           FACT1(%a6),%fp0
416         faddx           FACT2(%a6),%fp0
417         faddx           FACT1(%a6),%fp0
418
419         fmovel          %d1,%FPCR               |restore users exceptions
420         clrw            ADJFACT+2(%a6)
421         movel           #0x80000000,ADJFACT+4(%a6)
422         clrl            ADJFACT+8(%a6)
423         fmulx           ADJFACT(%a6),%fp0       | ...FINAL ADJUSTMENT
424
425         bra             t_frcinx
426
427         |end