Annotation of sys/arch/hppa/spmath/impyu.S, Revision 1.1.1.1
1.1 nbrk 1: /* $OpenBSD: impyu.S,v 1.11 2005/01/23 18:01:30 mickey Exp $ */
2: /*
3: (c) Copyright 1986 HEWLETT-PACKARD COMPANY
4: To anyone who acknowledges that this file is provided "AS IS"
5: without any express or implied warranty:
6: permission to use, copy, modify, and distribute this file
7: for any purpose is hereby granted without fee, provided that
8: the above copyright notice and this notice appears in all
9: copies, and that the name of Hewlett-Packard Company not be
10: used in advertising or publicity pertaining to distribution
11: of the software without specific, written prior permission.
12: Hewlett-Packard Company makes no representations about the
13: suitability of this software for any purpose.
14: */
15: /* @(#)impyu.s: Revision: 1.11.88.1 Date: 93/12/07 15:06:31 */
16:
17: #include <machine/asm.h>
18: #define _LOCORE
19: #include <machine/frame.h>
20:
21: ;****************************************************************************
22: ;
23: ;Implement an integer multiply routine for 32-bit operands and 64-bit product
24: ; with operand values of zero (multiplicand only) and 2**32reated specially.
25: ; The algorithm uses the multiplier, four bits at a time, from right to left,
26: ; to generate partial product. Execution speed is more important than program
27: ; size in this implementation.
28: ;
29: ;******************************************************************************
30: ;
31: ; Definitions - General registers
32: ;
33: gr0 .reg %r0 ; General register zero
34: pu .reg %r3 ; upper part of product
35: pl .reg %r4 ; lower part of product
36: op2 .reg %r4 ; multiplier
37: op1 .reg %r5 ; multiplicand
38: cnt .reg %r6 ; count in multiply
39: brindex .reg %r7 ; index into the br. table
40: saveop2 .reg %r8 ; save op2 if high bit of multiplicand
41: ; is set
42: pc .reg %r9 ; carry bit of product, = 00...01
43: pm .reg %r10 ; value of -1 used in shifting
44: temp .reg %r6
45:
46: ;****************************************************************************
47: .text
48: LEAF_ENTRY(u_xmpy)
49: stws,ma pu,4(sp) ; save registers on stack
50: stws,ma pl,4(sp) ; save registers on stack
51: stws,ma op1,4(sp) ; save registers on stack
52: stws,ma cnt,4(sp) ; save registers on stack
53: stws,ma brindex,4(sp) ; save registers on stack
54: stws,ma saveop2,4(sp) ; save registers on stack
55: stws,ma pc,4(sp) ; save registers on stack
56: stws,ma pm,4(sp) ; save registers on stack
57: ;
58: ; Start multiply process
59: ;
60: ldws 0(arg0),op1 ; get multiplicand
61: ldws 0(arg1),op2 ; get multiplier
62: addib,= 0,op1,fini0 ; op1 = 0, product = 0
63: addi 0,gr0,pu ; clear product
64: bb,>= op1,0,mpy1 ; test msb of multiplicand
65: addi 0,gr0,saveop2 ; clear saveop2
66: ;
67: ; msb of multiplicand is set so will save multiplier for a final
68: ; addition into the result
69: ;
70: extru,= op1,31,31,op1 ; clear msb of multiplicand
71: b mpy1 ; if op1 < 2**32, start multiply
72: add op2,gr0,saveop2 ; save op2 in saveop2
73: shd gr0,op2,1,pu ; shift op2 left 31 for result
74: b fini ; go to finish
75: shd op2,gr0,1,pl
76: ;
77: mpy1 addi -1,gr0,pm ; initialize pm to 111...1
78: addi 1,gr0,pc ; initialize pc to 00...01
79: movib,tr 8,cnt,mloop ; set count for mpy loop
80: extru op2,31,4,brindex ; 4 bits as index into table
81: ;
82: .align 8
83: ;
84: b sh4c ; br. if sign overflow
85: sh4n shd pu,pl,4,pl ; shift product right 4 bits
86: addib,<= -1,cnt,mulend ; reduce count by 1, exit if
87: extru pu,27,28,pu ; <= zero
88: ;
89: mloop blr brindex,gr0 ; br. into table
90: ; entries of 2 words
91: extru op2,27,4,brindex ; next 4 bits into index
92: ;
93: ;
94: ; branch table for the multiplication process with four multiplier bits
95: ;
96: mtable ; two words per entry
97: ;
98: ; ---- bits = 0000 ---- shift product 4 bits -------------------------------
99: ;
100: b sh4n+4 ; just shift partial
101: shd pu,pl,4,pl ; product right 4 bits
102: ;
103: ; ---- bits = 0001 ---- add op1, then shift 4 bits
104: ;
105: addb,tr op1,pu,sh4n+4 ; add op1 to product, to shift
106: shd pu,pl,4,pl ; product right 4 bits
107: ;
108: ; ---- bits = 0010 ---- add op1, add op1, then shift 4 bits
109: ;
110: addb,tr op1,pu,sh4n ; add 2*op1, to shift
111: addb,uv op1,pu,sh4c ; product right 4 bits
112: ;
113: ; ---- bits = 0011 ---- add op1, add 2*op1, shift 4 bits
114: ;
115: addb,tr op1,pu,sh4n-4 ; add op1 & 2*op1, shift
116: sh1add,nuv op1,pu,pu ; product right 4 bits
117: ;
118: ; ---- bits = 0100 ---- shift 2, add op1, shift 2
119: ;
120: b sh2sa
121: shd pu,pl,2,pl ; shift product 2 bits
122: ;
123: ; ---- bits = 0101 ---- add op1, shift 2, add op1, and shift 2 again
124: ;
125: addb,tr op1,pu,sh2us ; add op1 to product
126: shd pu,pl,2,pl ; shift 2 bits
127: ;
128: ; ---- bits = 0110 ---- add op1, add op1, shift 2, add op1, and shift 2 again
129: ;
130: addb,tr op1,pu,sh2c ; add 2*op1, to shift 2 bits
131: addb,nuv op1,pu,sh2us ; br. if not overflow
132: ;
133: ; ---- bits = 0111 ---- subtract op1, shift 3, add op1, and shift 1
134: ;
135: b sh3s
136: sub pu,op1,pu ; subtract op1, br. to sh3s
137:
138: ;
139: ; ---- bits = 1000 ---- shift 3, add op1, shift 1
140: ;
141: b sh3sa
142: shd pu,pl,3,pl ; shift product right 3 bits
143: ;
144: ; ---- bits = 1001 ---- add op1, shift 3, add op1, shift 1
145: ;
146: addb,tr op1,pu,sh3us ; add op1, to shift 3, add op1,
147: shd pu,pl,3,pl ; and shift 1
148: ;
149: ; ---- bits = 1010 ---- add op1, add op1, shift 3, add op1, shift 1
150: ;
151: addb,tr op1,pu,sh3c ; add 2*op1, to shift 3 bits
152: addb,nuv op1,pu,sh3us ; br. if no overflow
153: ;
154: ; ---- bits = 1011 ---- add -op1, shift 2, add -op1, shift 2, inc. next index
155: ;
156: addib,tr 1,brindex,sh2s ; add 1 to index, subtract op1,
157: sub pu,op1,pu ; shift 2 with minus sign
158: ;
159: ; ---- bits = 1100 ---- shift 2, subtract op1, shift 2, increment next index
160: ;
161: addib,tr 1,brindex,sh2sb ; add 1 to index, to shift
162: shd pu,pl,2,pl ; shift right 2 bits signed
163: ;
164: ; ---- bits = 1101 ---- add op1, shift 2, add -op1, shift 2
165: ;
166: addb,tr op1,pu,sh2ns ; add op1, to shift 2
167: shd pu,pl,2,pl ; right 2 unsigned, etc.
168: ;
169: ; ---- bits = 1110 ---- shift 1 signed, add -op1, shift 3 signed
170: ;
171: addib,tr 1,brindex,sh1sa ; add 1 to index, to shift
172: shd pu,pl,1,pl ; shift 1 bit
173: ;
174: ; ---- bits = 1111 ---- add -op1, shift 4 signed
175: ;
176: addib,tr 1,brindex,sh4s ; add 1 to index, subtract op1,
177: sub pu,op1,pu ; to shift 4 signed
178:
179: ;
180: ; ---- bits = 10000 ---- shift 4 signed
181: ;
182: addib,tr 1,brindex,sh4s+4 ; add 1 to index
183: shd pu,pl,4,pl ; shift 4 signed
184: ;
185: ; ---- end of table ---------------------------------------------------------
186: ;
187: sh4s shd pu,pl,4,pl
188: addib,> -1,cnt,mloop ; decrement count, loop if > 0
189: shd pm,pu,4,pu ; shift 4, minus signed
190: addb,tr op1,pu,lastadd ; do one more add, then finish
191: addb,=,n saveop2,gr0,fini ; check saveop2
192: ;
193: sh4c addib,> -1,cnt,mloop ; decrement count, loop if > 0
194: shd pc,pu,4,pu ; shift 4 with overflow
195: b lastadd ; end of multiply
196: addb,=,n saveop2,gr0,fini ; check saveop2
197: ;
198: sh3c shd pu,pl,3,pl ; shift product 3 bits
199: shd pc,pu,3,pu ; shift 3 signed
200: addb,tr op1,pu,sh1 ; add op1, to shift 1 bit
201: shd pu,pl,1,pl
202: ;
203: sh3us extru pu,28,29,pu ; shift 3 unsigned
204: addb,tr op1,pu,sh1 ; add op1, to shift 1 bit
205: shd pu,pl,1,pl
206: ;
207: sh3sa extrs pu,28,29,pu ; shift 3 signed
208: addb,tr op1,pu,sh1 ; add op1, to shift 1 bit
209: shd pu,pl,1,pl
210: ;
211: sh3s shd pu,pl,3,pl ; shift 3 minus signed
212: shd pm,pu,3,pu
213: addb,tr op1,pu,sh1 ; add op1, to shift 1 bit
214: shd pu,pl,1,pl
215: ;
216: sh1 addib,> -1,cnt,mloop ; loop if count > 0
217: extru pu,30,31,pu
218: b lastadd ; end of multiply
219: addb,=,n saveop2,gr0,fini ; check saveop2
220: ;
221: sh2ns addib,tr 1,brindex,sh2sb+4 ; increment index
222: extru pu,29,30,pu ; shift unsigned
223: ;
224: sh2s shd pu,pl,2,pl ; shift with minus sign
225: shd pm,pu,2,pu ;
226: sub pu,op1,pu ; subtract op1
227: shd pu,pl,2,pl ; shift with minus sign
228: addib,> -1,cnt,mloop ; decrement count, loop if > 0
229: shd pm,pu,2,pu ; shift with minus sign
230: addb,tr op1,pu,lastadd ; do one more add, then finish
231: addb,=,n saveop2,gr0,fini ; check saveop2
232: ;
233: sh2sb extrs pu,29,30,pu ; shift 2 signed
234: sub pu,op1,pu ; subtract op1 from product
235: shd pu,pl,2,pl ; shift with minus sign
236: addib,> -1,cnt,mloop ; decrement count, loop if > 0
237: shd pm,pu,2,pu ; shift with minus sign
238: addb,tr op1,pu,lastadd ; do one more add, then finish
239: addb,=,n saveop2,gr0,fini ; check saveop2
240: ;
241: sh1sa extrs pu,30,31,pu ; signed
242: sub pu,op1,pu ; subtract op1 from product
243: shd pu,pl,3,pl ; shift 3 with minus sign
244: addib,> -1,cnt,mloop ; decrement count, loop if >0
245: shd pm,pu,3,pu
246: addb,tr op1,pu,lastadd ; do one more add, then finish
247: addb,=,n saveop2,gr0,fini ; check saveop2
248: ;
249: fini0 movib,tr 0,pl,fini ; product = 0 as op1 = 0
250: stws pu,0(arg2) ; save high part of result
251: ;
252: sh2us extru pu,29,30,pu ; shift 2 unsigned
253: addb,tr op1,pu,sh2a ; add op1
254: shd pu,pl,2,pl ; shift 2 bits
255: ;
256: sh2c shd pu,pl,2,pl
257: shd pc,pu,2,pu ; shift with carry
258: addb,tr op1,pu,sh2a ; add op1 to product
259: shd pu,pl,2,pl ; br. to sh2 to shift pu
260: ;
261: sh2sa extrs pu,29,30,pu ; shift with sign
262: addb,tr op1,pu,sh2a ; add op1 to product
263: shd pu,pl,2,pl ; br. to sh2 to shift pu
264: ;
265: sh2a addib,> -1,cnt,mloop ; loop if count > 0
266: extru pu,29,30,pu
267: ;
268: mulend addb,=,n saveop2,gr0,fini ; check saveop2
269: lastadd shd saveop2,gr0,1,temp ; if saveop2 <> 0, shift it
270: shd gr0,saveop2,1,saveop2 ; left 31 and add to result
271: add pl,temp,pl
272: addc pu,saveop2,pu
273: ;
274: ; finish
275: ;
276: fini stws pu,0(arg2) ; save high part of result
277: stws pl,4(arg2) ; save low part of result
278:
279: ldws,mb -4(sp),pm ; restore registers
280: ldws,mb -4(sp),pc ; restore registers
281: ldws,mb -4(sp),saveop2 ; restore registers
282: ldws,mb -4(sp),brindex ; restore registers
283: ldws,mb -4(sp),cnt ; restore registers
284: ldws,mb -4(sp),op1 ; restore registers
285: ldws,mb -4(sp),pl ; restore registers
286: bv 0(rp) ; return
287: ldws,mb -4(sp),pu ; restore registers
288: EXIT(u_xmpy)
289:
290: .end
CVSweb