Annotation of sys/arch/hppa/spmath/impyu.S, Revision 1.1
1.1 ! nbrk 1: /* $OpenBSD: impyu.S,v 1.11 2005/01/23 18:01:30 mickey Exp $ */
! 2: /*
! 3: (c) Copyright 1986 HEWLETT-PACKARD COMPANY
! 4: To anyone who acknowledges that this file is provided "AS IS"
! 5: without any express or implied warranty:
! 6: permission to use, copy, modify, and distribute this file
! 7: for any purpose is hereby granted without fee, provided that
! 8: the above copyright notice and this notice appears in all
! 9: copies, and that the name of Hewlett-Packard Company not be
! 10: used in advertising or publicity pertaining to distribution
! 11: of the software without specific, written prior permission.
! 12: Hewlett-Packard Company makes no representations about the
! 13: suitability of this software for any purpose.
! 14: */
! 15: /* @(#)impyu.s: Revision: 1.11.88.1 Date: 93/12/07 15:06:31 */
! 16:
! 17: #include <machine/asm.h>
! 18: #define _LOCORE
! 19: #include <machine/frame.h>
! 20:
! 21: ;****************************************************************************
! 22: ;
! 23: ;Implement an integer multiply routine for 32-bit operands and 64-bit product
! 24: ; with operand values of zero (multiplicand only) and 2**32reated specially.
! 25: ; The algorithm uses the multiplier, four bits at a time, from right to left,
! 26: ; to generate partial product. Execution speed is more important than program
! 27: ; size in this implementation.
! 28: ;
! 29: ;******************************************************************************
! 30: ;
! 31: ; Definitions - General registers
! 32: ;
! 33: gr0 .reg %r0 ; General register zero
! 34: pu .reg %r3 ; upper part of product
! 35: pl .reg %r4 ; lower part of product
! 36: op2 .reg %r4 ; multiplier
! 37: op1 .reg %r5 ; multiplicand
! 38: cnt .reg %r6 ; count in multiply
! 39: brindex .reg %r7 ; index into the br. table
! 40: saveop2 .reg %r8 ; save op2 if high bit of multiplicand
! 41: ; is set
! 42: pc .reg %r9 ; carry bit of product, = 00...01
! 43: pm .reg %r10 ; value of -1 used in shifting
! 44: temp .reg %r6
! 45:
! 46: ;****************************************************************************
! 47: .text
! 48: LEAF_ENTRY(u_xmpy)
! 49: stws,ma pu,4(sp) ; save registers on stack
! 50: stws,ma pl,4(sp) ; save registers on stack
! 51: stws,ma op1,4(sp) ; save registers on stack
! 52: stws,ma cnt,4(sp) ; save registers on stack
! 53: stws,ma brindex,4(sp) ; save registers on stack
! 54: stws,ma saveop2,4(sp) ; save registers on stack
! 55: stws,ma pc,4(sp) ; save registers on stack
! 56: stws,ma pm,4(sp) ; save registers on stack
! 57: ;
! 58: ; Start multiply process
! 59: ;
! 60: ldws 0(arg0),op1 ; get multiplicand
! 61: ldws 0(arg1),op2 ; get multiplier
! 62: addib,= 0,op1,fini0 ; op1 = 0, product = 0
! 63: addi 0,gr0,pu ; clear product
! 64: bb,>= op1,0,mpy1 ; test msb of multiplicand
! 65: addi 0,gr0,saveop2 ; clear saveop2
! 66: ;
! 67: ; msb of multiplicand is set so will save multiplier for a final
! 68: ; addition into the result
! 69: ;
! 70: extru,= op1,31,31,op1 ; clear msb of multiplicand
! 71: b mpy1 ; if op1 < 2**32, start multiply
! 72: add op2,gr0,saveop2 ; save op2 in saveop2
! 73: shd gr0,op2,1,pu ; shift op2 left 31 for result
! 74: b fini ; go to finish
! 75: shd op2,gr0,1,pl
! 76: ;
! 77: mpy1 addi -1,gr0,pm ; initialize pm to 111...1
! 78: addi 1,gr0,pc ; initialize pc to 00...01
! 79: movib,tr 8,cnt,mloop ; set count for mpy loop
! 80: extru op2,31,4,brindex ; 4 bits as index into table
! 81: ;
! 82: .align 8
! 83: ;
! 84: b sh4c ; br. if sign overflow
! 85: sh4n shd pu,pl,4,pl ; shift product right 4 bits
! 86: addib,<= -1,cnt,mulend ; reduce count by 1, exit if
! 87: extru pu,27,28,pu ; <= zero
! 88: ;
! 89: mloop blr brindex,gr0 ; br. into table
! 90: ; entries of 2 words
! 91: extru op2,27,4,brindex ; next 4 bits into index
! 92: ;
! 93: ;
! 94: ; branch table for the multiplication process with four multiplier bits
! 95: ;
! 96: mtable ; two words per entry
! 97: ;
! 98: ; ---- bits = 0000 ---- shift product 4 bits -------------------------------
! 99: ;
! 100: b sh4n+4 ; just shift partial
! 101: shd pu,pl,4,pl ; product right 4 bits
! 102: ;
! 103: ; ---- bits = 0001 ---- add op1, then shift 4 bits
! 104: ;
! 105: addb,tr op1,pu,sh4n+4 ; add op1 to product, to shift
! 106: shd pu,pl,4,pl ; product right 4 bits
! 107: ;
! 108: ; ---- bits = 0010 ---- add op1, add op1, then shift 4 bits
! 109: ;
! 110: addb,tr op1,pu,sh4n ; add 2*op1, to shift
! 111: addb,uv op1,pu,sh4c ; product right 4 bits
! 112: ;
! 113: ; ---- bits = 0011 ---- add op1, add 2*op1, shift 4 bits
! 114: ;
! 115: addb,tr op1,pu,sh4n-4 ; add op1 & 2*op1, shift
! 116: sh1add,nuv op1,pu,pu ; product right 4 bits
! 117: ;
! 118: ; ---- bits = 0100 ---- shift 2, add op1, shift 2
! 119: ;
! 120: b sh2sa
! 121: shd pu,pl,2,pl ; shift product 2 bits
! 122: ;
! 123: ; ---- bits = 0101 ---- add op1, shift 2, add op1, and shift 2 again
! 124: ;
! 125: addb,tr op1,pu,sh2us ; add op1 to product
! 126: shd pu,pl,2,pl ; shift 2 bits
! 127: ;
! 128: ; ---- bits = 0110 ---- add op1, add op1, shift 2, add op1, and shift 2 again
! 129: ;
! 130: addb,tr op1,pu,sh2c ; add 2*op1, to shift 2 bits
! 131: addb,nuv op1,pu,sh2us ; br. if not overflow
! 132: ;
! 133: ; ---- bits = 0111 ---- subtract op1, shift 3, add op1, and shift 1
! 134: ;
! 135: b sh3s
! 136: sub pu,op1,pu ; subtract op1, br. to sh3s
! 137:
! 138: ;
! 139: ; ---- bits = 1000 ---- shift 3, add op1, shift 1
! 140: ;
! 141: b sh3sa
! 142: shd pu,pl,3,pl ; shift product right 3 bits
! 143: ;
! 144: ; ---- bits = 1001 ---- add op1, shift 3, add op1, shift 1
! 145: ;
! 146: addb,tr op1,pu,sh3us ; add op1, to shift 3, add op1,
! 147: shd pu,pl,3,pl ; and shift 1
! 148: ;
! 149: ; ---- bits = 1010 ---- add op1, add op1, shift 3, add op1, shift 1
! 150: ;
! 151: addb,tr op1,pu,sh3c ; add 2*op1, to shift 3 bits
! 152: addb,nuv op1,pu,sh3us ; br. if no overflow
! 153: ;
! 154: ; ---- bits = 1011 ---- add -op1, shift 2, add -op1, shift 2, inc. next index
! 155: ;
! 156: addib,tr 1,brindex,sh2s ; add 1 to index, subtract op1,
! 157: sub pu,op1,pu ; shift 2 with minus sign
! 158: ;
! 159: ; ---- bits = 1100 ---- shift 2, subtract op1, shift 2, increment next index
! 160: ;
! 161: addib,tr 1,brindex,sh2sb ; add 1 to index, to shift
! 162: shd pu,pl,2,pl ; shift right 2 bits signed
! 163: ;
! 164: ; ---- bits = 1101 ---- add op1, shift 2, add -op1, shift 2
! 165: ;
! 166: addb,tr op1,pu,sh2ns ; add op1, to shift 2
! 167: shd pu,pl,2,pl ; right 2 unsigned, etc.
! 168: ;
! 169: ; ---- bits = 1110 ---- shift 1 signed, add -op1, shift 3 signed
! 170: ;
! 171: addib,tr 1,brindex,sh1sa ; add 1 to index, to shift
! 172: shd pu,pl,1,pl ; shift 1 bit
! 173: ;
! 174: ; ---- bits = 1111 ---- add -op1, shift 4 signed
! 175: ;
! 176: addib,tr 1,brindex,sh4s ; add 1 to index, subtract op1,
! 177: sub pu,op1,pu ; to shift 4 signed
! 178:
! 179: ;
! 180: ; ---- bits = 10000 ---- shift 4 signed
! 181: ;
! 182: addib,tr 1,brindex,sh4s+4 ; add 1 to index
! 183: shd pu,pl,4,pl ; shift 4 signed
! 184: ;
! 185: ; ---- end of table ---------------------------------------------------------
! 186: ;
! 187: sh4s shd pu,pl,4,pl
! 188: addib,> -1,cnt,mloop ; decrement count, loop if > 0
! 189: shd pm,pu,4,pu ; shift 4, minus signed
! 190: addb,tr op1,pu,lastadd ; do one more add, then finish
! 191: addb,=,n saveop2,gr0,fini ; check saveop2
! 192: ;
! 193: sh4c addib,> -1,cnt,mloop ; decrement count, loop if > 0
! 194: shd pc,pu,4,pu ; shift 4 with overflow
! 195: b lastadd ; end of multiply
! 196: addb,=,n saveop2,gr0,fini ; check saveop2
! 197: ;
! 198: sh3c shd pu,pl,3,pl ; shift product 3 bits
! 199: shd pc,pu,3,pu ; shift 3 signed
! 200: addb,tr op1,pu,sh1 ; add op1, to shift 1 bit
! 201: shd pu,pl,1,pl
! 202: ;
! 203: sh3us extru pu,28,29,pu ; shift 3 unsigned
! 204: addb,tr op1,pu,sh1 ; add op1, to shift 1 bit
! 205: shd pu,pl,1,pl
! 206: ;
! 207: sh3sa extrs pu,28,29,pu ; shift 3 signed
! 208: addb,tr op1,pu,sh1 ; add op1, to shift 1 bit
! 209: shd pu,pl,1,pl
! 210: ;
! 211: sh3s shd pu,pl,3,pl ; shift 3 minus signed
! 212: shd pm,pu,3,pu
! 213: addb,tr op1,pu,sh1 ; add op1, to shift 1 bit
! 214: shd pu,pl,1,pl
! 215: ;
! 216: sh1 addib,> -1,cnt,mloop ; loop if count > 0
! 217: extru pu,30,31,pu
! 218: b lastadd ; end of multiply
! 219: addb,=,n saveop2,gr0,fini ; check saveop2
! 220: ;
! 221: sh2ns addib,tr 1,brindex,sh2sb+4 ; increment index
! 222: extru pu,29,30,pu ; shift unsigned
! 223: ;
! 224: sh2s shd pu,pl,2,pl ; shift with minus sign
! 225: shd pm,pu,2,pu ;
! 226: sub pu,op1,pu ; subtract op1
! 227: shd pu,pl,2,pl ; shift with minus sign
! 228: addib,> -1,cnt,mloop ; decrement count, loop if > 0
! 229: shd pm,pu,2,pu ; shift with minus sign
! 230: addb,tr op1,pu,lastadd ; do one more add, then finish
! 231: addb,=,n saveop2,gr0,fini ; check saveop2
! 232: ;
! 233: sh2sb extrs pu,29,30,pu ; shift 2 signed
! 234: sub pu,op1,pu ; subtract op1 from product
! 235: shd pu,pl,2,pl ; shift with minus sign
! 236: addib,> -1,cnt,mloop ; decrement count, loop if > 0
! 237: shd pm,pu,2,pu ; shift with minus sign
! 238: addb,tr op1,pu,lastadd ; do one more add, then finish
! 239: addb,=,n saveop2,gr0,fini ; check saveop2
! 240: ;
! 241: sh1sa extrs pu,30,31,pu ; signed
! 242: sub pu,op1,pu ; subtract op1 from product
! 243: shd pu,pl,3,pl ; shift 3 with minus sign
! 244: addib,> -1,cnt,mloop ; decrement count, loop if >0
! 245: shd pm,pu,3,pu
! 246: addb,tr op1,pu,lastadd ; do one more add, then finish
! 247: addb,=,n saveop2,gr0,fini ; check saveop2
! 248: ;
! 249: fini0 movib,tr 0,pl,fini ; product = 0 as op1 = 0
! 250: stws pu,0(arg2) ; save high part of result
! 251: ;
! 252: sh2us extru pu,29,30,pu ; shift 2 unsigned
! 253: addb,tr op1,pu,sh2a ; add op1
! 254: shd pu,pl,2,pl ; shift 2 bits
! 255: ;
! 256: sh2c shd pu,pl,2,pl
! 257: shd pc,pu,2,pu ; shift with carry
! 258: addb,tr op1,pu,sh2a ; add op1 to product
! 259: shd pu,pl,2,pl ; br. to sh2 to shift pu
! 260: ;
! 261: sh2sa extrs pu,29,30,pu ; shift with sign
! 262: addb,tr op1,pu,sh2a ; add op1 to product
! 263: shd pu,pl,2,pl ; br. to sh2 to shift pu
! 264: ;
! 265: sh2a addib,> -1,cnt,mloop ; loop if count > 0
! 266: extru pu,29,30,pu
! 267: ;
! 268: mulend addb,=,n saveop2,gr0,fini ; check saveop2
! 269: lastadd shd saveop2,gr0,1,temp ; if saveop2 <> 0, shift it
! 270: shd gr0,saveop2,1,saveop2 ; left 31 and add to result
! 271: add pl,temp,pl
! 272: addc pu,saveop2,pu
! 273: ;
! 274: ; finish
! 275: ;
! 276: fini stws pu,0(arg2) ; save high part of result
! 277: stws pl,4(arg2) ; save low part of result
! 278:
! 279: ldws,mb -4(sp),pm ; restore registers
! 280: ldws,mb -4(sp),pc ; restore registers
! 281: ldws,mb -4(sp),saveop2 ; restore registers
! 282: ldws,mb -4(sp),brindex ; restore registers
! 283: ldws,mb -4(sp),cnt ; restore registers
! 284: ldws,mb -4(sp),op1 ; restore registers
! 285: ldws,mb -4(sp),pl ; restore registers
! 286: bv 0(rp) ; return
! 287: ldws,mb -4(sp),pu ; restore registers
! 288: EXIT(u_xmpy)
! 289:
! 290: .end
CVSweb