Annotation of sys/arch/hppa/spmath/impys.S, Revision 1.1.1.1
1.1 nbrk 1: /* $OpenBSD: impys.S,v 1.11 2005/01/23 18:01:30 mickey Exp $ */
2: /*
3: (c) Copyright 1986 HEWLETT-PACKARD COMPANY
4: To anyone who acknowledges that this file is provided "AS IS"
5: without any express or implied warranty:
6: permission to use, copy, modify, and distribute this file
7: for any purpose is hereby granted without fee, provided that
8: the above copyright notice and this notice appears in all
9: copies, and that the name of Hewlett-Packard Company not be
10: used in advertising or publicity pertaining to distribution
11: of the software without specific, written prior permission.
12: Hewlett-Packard Company makes no representations about the
13: suitability of this software for any purpose.
14: */
15: /* @(#)impys.s: Revision: 1.11.88.1 Date: 93/12/07 15:06:28 */
16:
17: #include <machine/asm.h>
18: #define _LOCORE
19: #include <machine/frame.h>
20:
21: ;****************************************************************************
22: ;
23: ;Implement an integer multiply routine for 32-bit operands and 64-bit product
24: ;with operand values of zero (multiplicand only) and -2**31 treated specially.
25: ;The algorithm uses the absolute value of the multiplier, four bits at a time,
26: ;from right to left, to generate partial product. Execution speed is more
27: ;important than program size in this implementation.
28: ;
29: ;****************************************************************************
30: ;
31: ; Definitions - General registers
32: ;
33: gr0 .reg %r0 ; General register zero
34: pu .reg %r3 ; upper part of product
35: pl .reg %r4 ; lower part of product
36: op2 .reg %r4 ; multiplier
37: op1 .reg %r5 ; multiplicand
38: cnt .reg %r6 ; count in multiply
39: brindex .reg %r7 ; index into the br. table
40: sign .reg %r8 ; sign of product
41: pc .reg %r9 ; carry bit of product, = 00...01
42: pm .reg %r10 ; value of -1 used in shifting
43:
44: ;*****************************************************************************
45: .text
46:
47: LEAF_ENTRY(s_xmpy)
48: stws,ma pu,4(sp) ; save registers on stack
49: stws,ma pl,4(sp) ; save registers on stack
50: stws,ma op1,4(sp) ; save registers on stack
51: stws,ma cnt,4(sp) ; save registers on stack
52: stws,ma brindex,4(sp) ; save registers on stack
53: stws,ma sign,4(sp) ; save registers on stack
54: stws,ma pc,4(sp) ; save registers on stack
55: stws,ma pm,4(sp) ; save registers on stack
56: ;
57: ; Start multiply process
58: ;
59: ldws 0(arg1),op2 ; get multiplier
60: ldws 0(arg0),op1 ; get multiplicand
61: addi -1,gr0,pm ; initialize pm to 111...1
62: comb,< op2,gr0,mpyb ; br. if multiplier < 0
63: xor op2,op1,sign ; sign(0) = sign of product
64: mpy1 comb,< op1,gr0,mpya ; br. if multiplicand < 0
65: addi 0,gr0,pu ; clear product
66: addib,= 0,op1,fini0 ; op1 = 0, product = 0
67: mpy2 addi 1,gr0,pc ; initialize pc to 00...01
68: movib,tr 8,cnt,mloop ; set count for mpy loop
69: extru op2,31,4,brindex ; 4 bits as index into table
70: ;
71: .align 8
72: ;
73: b sh4c ; br. if sign overflow
74: sh4n shd pu,pl,4,pl ; shift product right 4 bits
75: addib,<= -1,cnt,mulend ; reduce count by 1, exit if
76: extru pu,27,28,pu ; <= zero
77: ;
78: mloop blr brindex,gr0 ; br. into table
79: ; entries of 2 words
80: extru op2,27,4,brindex ; next 4 bits into index
81: ;
82: ;
83: ; branch table for the multiplication process with four multiplier bits
84: ;
85: mtable ; two words per entry
86: ;
87: ; ---- bits = 0000 ---- shift product 4 bits -------------------------------
88: ;
89: b sh4n+4 ; just shift partial
90: shd pu,pl,4,pl ; product right 4 bits
91: ;
92: ; ---- bits = 0001 ---- add op1, then shift 4 bits
93: ;
94: addb,tr op1,pu,sh4n+4 ; add op1 to product, to shift
95: shd pu,pl,4,pl ; product right 4 bits
96: ;
97: ; ---- bits = 0010 ---- add op1, add op1, then shift 4 bits
98: ;
99: addb,tr op1,pu,sh4n ; add 2*op1, to shift
100: addb,uv op1,pu,sh4c ; product right 4 bits
101: ;
102: ; ---- bits = 0011 ---- add op1, add 2*op1, shift 4 bits
103: ;
104: addb,tr op1,pu,sh4n-4 ; add op1 & 2*op1, shift
105: sh1add,nsv op1,pu,pu ; product right 4 bits
106: ;
107: ; ---- bits = 0100 ---- shift 2, add op1, shift 2
108: ;
109: b sh2sa
110: shd pu,pl,2,pl ; shift product 2 bits
111: ;
112: ; ---- bits = 0101 ---- add op1, shift 2, add op1, and shift 2 again
113: ;
114: addb,tr op1,pu,sh2us ; add op1 to product
115: shd pu,pl,2,pl ; shift 2 bits
116: ;
117: ; ---- bits = 0110 ---- add op1, add op1, shift 2, add op1, and shift 2 again
118: ;
119: addb,tr op1,pu,sh2c ; add 2*op1, to shift 2 bits
120: addb,nuv op1,pu,sh2us ; br. if not overflow
121: ;
122: ; ---- bits = 0111 ---- subtract op1, shift 3, add op1, and shift 1
123: ;
124: b sh3s
125: sub pu,op1,pu ; subtract op1, br. to sh3s
126:
127: ;
128: ; ---- bits = 1000 ---- shift 3, add op1, shift 1
129: ;
130: b sh3sa
131: shd pu,pl,3,pl ; shift product right 3 bits
132: ;
133: ; ---- bits = 1001 ---- add op1, shift 3, add op1, shift 1
134: ;
135: addb,tr op1,pu,sh3us ; add op1, to shift 3, add op1,
136: shd pu,pl,3,pl ; and shift 1
137: ;
138: ; ---- bits = 1010 ---- add op1, add op1, shift 3, add op1, shift 1
139: ;
140: addb,tr op1,pu,sh3c ; add 2*op1, to shift 3 bits
141: addb,nuv op1,pu,sh3us ; br. if no overflow
142: ;
143: ; ---- bits = 1011 ---- add -op1, shift 2, add -op1, shift 2, inc. next index
144: ;
145: addib,tr 1,brindex,sh2s ; add 1 to index, subtract op1,
146: sub pu,op1,pu ; shift 2 with minus sign
147: ;
148: ; ---- bits = 1100 ---- shift 2, subtract op1, shift 2, increment next index
149: ;
150: addib,tr 1,brindex,sh2sb ; add 1 to index, to shift
151: shd pu,pl,2,pl ; shift right 2 bits signed
152: ;
153: ; ---- bits = 1101 ---- add op1, shift 2, add -op1, shift 2
154: ;
155: addb,tr op1,pu,sh2ns ; add op1, to shift 2
156: shd pu,pl,2,pl ; right 2 unsigned, etc.
157: ;
158: ; ---- bits = 1110 ---- shift 1 signed, add -op1, shift 3 signed
159: ;
160: addib,tr 1,brindex,sh1sa ; add 1 to index, to shift
161: shd pu,pl,1,pl ; shift 1 bit
162: ;
163: ; ---- bits = 1111 ---- add -op1, shift 4 signed
164: ;
165: addib,tr 1,brindex,sh4s ; add 1 to index, subtract op1,
166: sub pu,op1,pu ; to shift 4 signed
167:
168: ;
169: ; ---- bits = 10000 ---- shift 4 signed
170: ;
171: addib,tr 1,brindex,sh4s+4 ; add 1 to index
172: shd pu,pl,4,pl ; shift 4 signed
173: ;
174: ; ---- end of table ---------------------------------------------------------
175: ;
176: sh4s shd pu,pl,4,pl
177: addib,tr -1,cnt,mloop ; loop (count > 0 always here)
178: shd pm,pu,4,pu ; shift 4, minus signed
179: ;
180: sh4c addib,> -1,cnt,mloop ; decrement count, loop if > 0
181: shd pc,pu,4,pu ; shift 4 with overflow
182: b signs ; end of multiply
183: bb,>=,n sign,0,fini ; test sign of procduct
184: ;
185: mpyb add,= op2,op2,gr0 ; if <> 0, back to main sect.
186: b mpy1
187: sub 0,op2,op2 ; op2 = |multiplier|
188: add,>= op1,gr0,gr0 ; if op1 < 0, invert sign,
189: xor pm,sign,sign ; for correct result
190: ;
191: ; special case for multiplier = -2**31, op1 = signed multiplicand
192: ; or multiplicand = -2**31, op1 = signed multiplier
193: ;
194: shd op1,0,1,pl ; shift op1 left 31 bits
195: mmax extrs op1,30,31,pu
196: b signs ; negate product (if needed)
197: bb,>=,n sign,0,fini ; test sign of product
198: ;
199: mpya add,= op1,op1,gr0 ; op1 = -2**31, special case
200: b mpy2
201: sub 0,op1,op1 ; op1 = |multiplicand|
202: add,>= op2,gr0,gr0 ; if op2 < 0, invert sign,
203: xor pm,sign,sign ; for correct result
204: movb,tr op2,op1,mmax ; use op2 as multiplicand
205: shd op1,0,1,pl ; shift it left 31 bits
206: ;
207: sh3c shd pu,pl,3,pl ; shift product 3 bits
208: shd pc,pu,3,pu ; shift 3 signed
209: addb,tr op1,pu,sh1 ; add op1, to shift 1 bit
210: shd pu,pl,1,pl
211: ;
212: sh3us extru pu,28,29,pu ; shift 3 unsigned
213: addb,tr op1,pu,sh1 ; add op1, to shift 1 bit
214: shd pu,pl,1,pl
215: ;
216: sh3sa extrs pu,28,29,pu ; shift 3 signed
217: addb,tr op1,pu,sh1 ; add op1, to shift 1 bit
218: shd pu,pl,1,pl
219: ;
220: sh3s shd pu,pl,3,pl ; shift 3 minus signed
221: shd pm,pu,3,pu
222: addb,tr op1,pu,sh1 ; add op1, to shift 1 bit
223: shd pu,pl,1,pl
224: ;
225: sh1 addib,> -1,cnt,mloop ; loop if count > 0
226: extru pu,30,31,pu
227: b signs ; end of multiply
228: bb,>=,n sign,0,fini ; test sign of product
229: ;
230: sh2ns addib,tr 1,brindex,sh2sb+4 ; increment index
231: extru pu,29,30,pu ; shift unsigned
232: ;
233: sh2s shd pu,pl,2,pl ; shift with minus sign
234: shd pm,pu,2,pu ;
235: sub pu,op1,pu ; subtract op1
236: shd pu,pl,2,pl ; shift with minus sign
237: addib,tr -1,cnt,mloop ; decrement count, loop
238: shd pm,pu,2,pu ; shift with minus sign
239: ; count never reaches 0 here
240: ;
241: sh2sb extrs pu,29,30,pu ; shift 2 signed
242: sub pu,op1,pu ; subtract op1 from product
243: shd pu,pl,2,pl ; shift with minus sign
244: addib,tr -1,cnt,mloop ; decrement count, loop
245: shd pm,pu,2,pu ; shift with minus sign
246: ; count never reaches 0 here
247: ;
248: sh1sa extrs pu,30,31,pu ; signed
249: sub pu,op1,pu ; subtract op1 from product
250: shd pu,pl,3,pl ; shift 3 with minus sign
251: addib,tr -1,cnt,mloop ; dec. count, to loop
252: shd pm,pu,3,pu ; count never reaches 0 here
253: ;
254: fini0 movib,tr,n 0,pl,fini ; product = 0 as op1 = 0
255: ;
256: sh2us extru pu,29,30,pu ; shift 2 unsigned
257: addb,tr op1,pu,sh2a ; add op1
258: shd pu,pl,2,pl ; shift 2 bits
259: ;
260: sh2c shd pu,pl,2,pl
261: shd pc,pu,2,pu ; shift with carry
262: addb,tr op1,pu,sh2a ; add op1 to product
263: shd pu,pl,2,pl ; br. to sh2 to shift pu
264: ;
265: sh2sa extrs pu,29,30,pu ; shift with sign
266: addb,tr op1,pu,sh2a ; add op1 to product
267: shd pu,pl,2,pl ; br. to sh2 to shift pu
268: ;
269: sh2a addib,> -1,cnt,mloop ; loop if count > 0
270: extru pu,29,30,pu
271: ;
272: mulend bb,>=,n sign,0,fini ; test sign of product
273: signs sub 0,pl,pl ; negate product if sign
274: subb 0,pu,pu ; is negative
275: ;
276: ; finish
277: ;
278: fini stws pu,0(arg2) ; save high part of result
279: stws pl,4(arg2) ; save low part of result
280:
281: ldws,mb -4(sp),pm ; restore registers
282: ldws,mb -4(sp),pc ; restore registers
283: ldws,mb -4(sp),sign ; restore registers
284: ldws,mb -4(sp),brindex ; restore registers
285: ldws,mb -4(sp),cnt ; restore registers
286: ldws,mb -4(sp),op1 ; restore registers
287: ldws,mb -4(sp),pl ; restore registers
288: bv 0(rp) ; return
289: ldws,mb -4(sp),pu ; restore registers
290: EXIT(s_xmpy)
291:
292: .end
CVSweb