sys/arch/hppa/spmath/impys.S - annotate

Return to impys.S CVS log
Up to [local] / sys / arch / hppa / spmath
Annotation of sys/arch/hppa/spmath/impys.S, Revision 1.1.1.1

1.1       nbrk        1: /*     $OpenBSD: impys.S,v 1.11 2005/01/23 18:01:30 mickey Exp $       */
                      2: /*
                      3:   (c) Copyright 1986 HEWLETT-PACKARD COMPANY
                      4:   To anyone who acknowledges that this file is provided "AS IS"
                      5:   without any express or implied warranty:
                      6:       permission to use, copy, modify, and distribute this file
                      7:   for any purpose is hereby granted without fee, provided that
                      8:   the above copyright notice and this notice appears in all
                      9:   copies, and that the name of Hewlett-Packard Company not be
                     10:   used in advertising or publicity pertaining to distribution
                     11:   of the software without specific, written prior permission.
                     12:   Hewlett-Packard Company makes no representations about the
                     13:   suitability of this software for any purpose.
                     14: */
                     15: /* @(#)impys.s: Revision: 1.11.88.1 Date: 93/12/07 15:06:28 */
                     16:
                     17: #include <machine/asm.h>
                     18: #define _LOCORE
                     19: #include <machine/frame.h>
                     20:
                     21: ;****************************************************************************
                     22: ;
                     23: ;Implement an integer multiply routine for 32-bit operands and 64-bit product
                     24: ;with operand values of zero (multiplicand only) and -2**31 treated specially.
                     25: ;The algorithm uses the absolute value of the multiplier, four bits at a time,
                     26: ;from right to left, to generate partial product.  Execution speed is more
                     27: ;important than program size in this implementation.
                     28: ;
                     29: ;****************************************************************************
                     30: ;
                     31: ; Definitions - General registers
                     32: ;
                     33: gr0    .reg            %r0             ; General register zero
                     34: pu     .reg            %r3             ; upper part of product
                     35: pl     .reg            %r4             ; lower part of product
                     36: op2    .reg            %r4             ; multiplier
                     37: op1    .reg            %r5             ; multiplicand
                     38: cnt    .reg            %r6             ; count in multiply
                     39: brindex        .reg            %r7             ; index into the br. table
                     40: sign   .reg            %r8             ; sign of product
                     41: pc     .reg            %r9             ; carry bit of product, = 00...01
                     42: pm     .reg            %r10            ; value of -1 used in shifting
                     43:
                     44: ;*****************************************************************************
                     45:        .text
                     46:
                     47: LEAF_ENTRY(s_xmpy)
                     48:        stws,ma         pu,4(sp)                ; save registers on stack
                     49:        stws,ma         pl,4(sp)                ; save registers on stack
                     50:        stws,ma         op1,4(sp)               ; save registers on stack
                     51:        stws,ma         cnt,4(sp)               ; save registers on stack
                     52:        stws,ma         brindex,4(sp)           ; save registers on stack
                     53:        stws,ma         sign,4(sp)              ; save registers on stack
                     54:        stws,ma         pc,4(sp)                ; save registers on stack
                     55:        stws,ma         pm,4(sp)                ; save registers on stack
                     56: ;
                     57: ;   Start multiply process
                     58: ;
                     59:        ldws            0(arg1),op2             ; get multiplier
                     60:        ldws            0(arg0),op1             ; get multiplicand
                     61:        addi            -1,gr0,pm               ; initialize pm to 111...1
                     62:        comb,<          op2,gr0,mpyb            ; br. if multiplier < 0
                     63:        xor             op2,op1,sign            ; sign(0) = sign of product
                     64: mpy1   comb,<          op1,gr0,mpya            ; br. if multiplicand < 0
                     65:        addi            0,gr0,pu                ; clear product
                     66:        addib,=         0,op1,fini0             ; op1 = 0, product = 0
                     67: mpy2   addi            1,gr0,pc                ; initialize pc to 00...01
                     68:        movib,tr        8,cnt,mloop             ; set count for mpy loop
                     69:        extru           op2,31,4,brindex        ; 4 bits as index into table
                     70: ;
                     71:        .align          8
                     72: ;
                     73:        b               sh4c                    ; br. if sign overflow
                     74: sh4n   shd             pu,pl,4,pl              ; shift product right 4 bits
                     75:        addib,<=        -1,cnt,mulend           ; reduce count by 1, exit if
                     76:        extru           pu,27,28,pu             ;   <= zero
                     77: ;
                     78: mloop  blr             brindex,gr0             ; br. into table
                     79:                                                ;   entries of 2 words
                     80:        extru           op2,27,4,brindex        ; next 4 bits into index
                     81: ;
                     82: ;
                     83: ;      branch table for the multiplication process with four multiplier bits
                     84: ;
                     85: mtable                                         ; two words per entry
                     86: ;
                     87: ; ---- bits = 0000 ---- shift product 4 bits -------------------------------
                     88: ;
                     89:        b               sh4n+4                  ; just shift partial
                     90:        shd             pu,pl,4,pl              ;   product right 4 bits
                     91: ;
                     92: ;  ----        bits = 0001 ---- add op1, then shift 4 bits
                     93: ;
                     94:        addb,tr         op1,pu,sh4n+4           ; add op1 to product, to shift
                     95:        shd             pu,pl,4,pl              ;   product right 4 bits
                     96: ;
                     97: ;  ----        bits = 0010 ---- add op1, add op1, then shift 4 bits
                     98: ;
                     99:        addb,tr         op1,pu,sh4n             ; add 2*op1, to shift
                    100:        addb,uv         op1,pu,sh4c             ;   product right 4 bits
                    101: ;
                    102: ;  ---- bits = 0011 ---- add op1, add 2*op1, shift 4 bits
                    103: ;
                    104:        addb,tr         op1,pu,sh4n-4           ; add op1 & 2*op1, shift
                    105:        sh1add,nsv      op1,pu,pu                       ;   product right 4 bits
                    106: ;
                    107: ;  ----        bits = 0100 ---- shift 2, add op1, shift 2
                    108: ;
                    109:        b               sh2sa
                    110:        shd             pu,pl,2,pl              ; shift product 2 bits
                    111: ;
                    112: ;  ----        bits = 0101 ---- add op1, shift 2, add op1, and shift 2 again
                    113: ;
                    114:        addb,tr         op1,pu,sh2us            ; add op1 to product
                    115:        shd             pu,pl,2,pl              ; shift 2 bits
                    116: ;
                    117: ;  ----        bits = 0110 ---- add op1, add op1, shift 2, add op1, and shift 2 again
                    118: ;
                    119:        addb,tr         op1,pu,sh2c             ; add 2*op1, to shift 2 bits
                    120:        addb,nuv        op1,pu,sh2us            ; br. if not overflow
                    121: ;
                    122: ;  ----        bits = 0111 ---- subtract op1, shift 3, add op1, and shift 1
                    123: ;
                    124:        b               sh3s
                    125:        sub             pu,op1,pu               ; subtract op1, br. to sh3s
                    126:
                    127: ;
                    128: ;  ----        bits = 1000 ---- shift 3, add op1, shift 1
                    129: ;
                    130:        b               sh3sa
                    131:        shd             pu,pl,3,pl              ; shift product right 3 bits
                    132: ;
                    133: ;  ----        bits = 1001 ---- add op1, shift 3, add op1, shift 1
                    134: ;
                    135:        addb,tr         op1,pu,sh3us            ; add op1, to shift 3, add op1,
                    136:        shd             pu,pl,3,pl              ;   and shift 1
                    137: ;
                    138: ;  ----        bits = 1010 ---- add op1, add op1, shift 3, add op1, shift 1
                    139: ;
                    140:        addb,tr         op1,pu,sh3c             ; add 2*op1, to shift 3 bits
                    141:        addb,nuv        op1,pu,sh3us            ;   br. if no overflow
                    142: ;
                    143: ;  ----        bits = 1011 ---- add -op1, shift 2, add -op1, shift 2, inc. next index
                    144: ;
                    145:        addib,tr        1,brindex,sh2s          ; add 1 to index, subtract op1,
                    146:        sub             pu,op1,pu               ;   shift 2 with minus sign
                    147: ;
                    148: ;  ----        bits = 1100 ---- shift 2, subtract op1, shift 2, increment next index
                    149: ;
                    150:        addib,tr        1,brindex,sh2sb         ; add 1 to index, to shift
                    151:        shd             pu,pl,2,pl              ; shift right 2 bits signed
                    152: ;
                    153: ;  ----        bits = 1101 ---- add op1, shift 2, add -op1, shift 2
                    154: ;
                    155:        addb,tr         op1,pu,sh2ns            ; add op1, to shift 2
                    156:        shd             pu,pl,2,pl              ;   right 2 unsigned, etc.
                    157: ;
                    158: ;  ----        bits = 1110 ---- shift 1 signed, add -op1, shift 3 signed
                    159: ;
                    160:        addib,tr        1,brindex,sh1sa         ; add 1 to index, to shift
                    161:        shd             pu,pl,1,pl              ; shift 1 bit
                    162: ;
                    163: ;  ----        bits = 1111 ---- add -op1, shift 4 signed
                    164: ;
                    165:        addib,tr        1,brindex,sh4s          ; add 1 to index, subtract op1,
                    166:        sub             pu,op1,pu               ;   to shift 4 signed
                    167:
                    168: ;
                    169: ;  ----        bits = 10000 ---- shift 4 signed
                    170: ;
                    171:        addib,tr        1,brindex,sh4s+4        ; add 1 to index
                    172:        shd             pu,pl,4,pl              ; shift 4 signed
                    173: ;
                    174: ;  ---- end of table ---------------------------------------------------------
                    175: ;
                    176: sh4s   shd             pu,pl,4,pl
                    177:        addib,tr        -1,cnt,mloop            ; loop (count > 0 always here)
                    178:        shd             pm,pu,4,pu              ; shift 4, minus signed
                    179: ;
                    180: sh4c   addib,>         -1,cnt,mloop            ; decrement count, loop if > 0
                    181:        shd             pc,pu,4,pu              ; shift 4 with overflow
                    182:        b               signs                   ; end of multiply
                    183:        bb,>=,n         sign,0,fini             ; test sign of procduct
                    184: ;
                    185: mpyb   add,=           op2,op2,gr0             ; if <> 0, back to main sect.
                    186:        b               mpy1
                    187:        sub             0,op2,op2               ; op2 = |multiplier|
                    188:        add,>=          op1,gr0,gr0             ; if op1 < 0, invert sign,
                    189:        xor             pm,sign,sign            ;   for correct result
                    190: ;
                    191: ;      special case for multiplier = -2**31, op1 = signed multiplicand
                    192: ;              or multiplicand = -2**31, op1 = signed multiplier
                    193: ;
                    194:        shd             op1,0,1,pl              ; shift op1 left 31 bits
                    195: mmax   extrs           op1,30,31,pu
                    196:        b               signs                   ; negate product (if needed)
                    197:        bb,>=,n         sign,0,fini             ; test sign of product
                    198: ;
                    199: mpya   add,=           op1,op1,gr0             ; op1 = -2**31, special case
                    200:        b               mpy2
                    201:        sub             0,op1,op1               ; op1 = |multiplicand|
                    202:        add,>=          op2,gr0,gr0             ; if op2 < 0, invert sign,
                    203:        xor             pm,sign,sign            ;   for correct result
                    204:        movb,tr         op2,op1,mmax            ; use op2 as multiplicand
                    205:        shd             op1,0,1,pl              ; shift it left 31 bits
                    206: ;
                    207: sh3c   shd             pu,pl,3,pl              ; shift product 3 bits
                    208:        shd             pc,pu,3,pu              ; shift 3 signed
                    209:        addb,tr         op1,pu,sh1              ; add op1, to shift 1 bit
                    210:        shd             pu,pl,1,pl
                    211: ;
                    212: sh3us  extru           pu,28,29,pu             ; shift 3 unsigned
                    213:        addb,tr         op1,pu,sh1              ; add op1, to shift 1 bit
                    214:        shd             pu,pl,1,pl
                    215: ;
                    216: sh3sa  extrs           pu,28,29,pu             ; shift 3 signed
                    217:        addb,tr         op1,pu,sh1              ; add op1, to shift 1 bit
                    218:        shd             pu,pl,1,pl
                    219: ;
                    220: sh3s   shd             pu,pl,3,pl              ; shift 3 minus signed
                    221:        shd             pm,pu,3,pu
                    222:        addb,tr         op1,pu,sh1              ; add op1, to shift 1 bit
                    223:        shd             pu,pl,1,pl
                    224: ;
                    225: sh1    addib,>         -1,cnt,mloop            ; loop if count > 0
                    226:        extru           pu,30,31,pu
                    227:        b               signs                   ; end of multiply
                    228:        bb,>=,n         sign,0,fini             ; test sign of product
                    229: ;
                    230: sh2ns  addib,tr        1,brindex,sh2sb+4       ; increment index
                    231:        extru           pu,29,30,pu             ; shift unsigned
                    232: ;
                    233: sh2s   shd             pu,pl,2,pl              ; shift with minus sign
                    234:        shd             pm,pu,2,pu              ;
                    235:        sub             pu,op1,pu               ; subtract op1
                    236:        shd             pu,pl,2,pl              ; shift with minus sign
                    237:        addib,tr        -1,cnt,mloop            ; decrement count, loop
                    238:        shd             pm,pu,2,pu              ; shift with minus sign
                    239:                                                ; count never reaches 0 here
                    240: ;
                    241: sh2sb  extrs           pu,29,30,pu             ; shift 2 signed
                    242:        sub             pu,op1,pu               ; subtract op1 from product
                    243:        shd             pu,pl,2,pl              ; shift with minus sign
                    244:        addib,tr        -1,cnt,mloop            ; decrement count, loop
                    245:        shd             pm,pu,2,pu              ; shift with minus sign
                    246:                                                ; count never reaches 0 here
                    247: ;
                    248: sh1sa  extrs           pu,30,31,pu             ;   signed
                    249:        sub             pu,op1,pu               ; subtract op1 from product
                    250:        shd             pu,pl,3,pl              ; shift 3 with minus sign
                    251:        addib,tr        -1,cnt,mloop            ; dec. count, to loop
                    252:        shd             pm,pu,3,pu              ; count never reaches 0 here
                    253: ;
                    254: fini0  movib,tr,n      0,pl,fini               ; product = 0 as op1 = 0
                    255: ;
                    256: sh2us  extru           pu,29,30,pu             ; shift 2 unsigned
                    257:        addb,tr         op1,pu,sh2a             ; add op1
                    258:        shd             pu,pl,2,pl              ; shift 2 bits
                    259: ;
                    260: sh2c   shd             pu,pl,2,pl
                    261:        shd             pc,pu,2,pu              ; shift with carry
                    262:        addb,tr         op1,pu,sh2a             ; add op1 to product
                    263:        shd             pu,pl,2,pl              ; br. to sh2 to shift pu
                    264: ;
                    265: sh2sa  extrs           pu,29,30,pu             ; shift with sign
                    266:        addb,tr         op1,pu,sh2a             ; add op1 to product
                    267:        shd             pu,pl,2,pl              ; br. to sh2 to shift pu
                    268: ;
                    269: sh2a   addib,>         -1,cnt,mloop            ; loop if count > 0
                    270:        extru           pu,29,30,pu
                    271: ;
                    272: mulend bb,>=,n         sign,0,fini             ; test sign of product
                    273: signs  sub             0,pl,pl                 ; negate product if sign
                    274:        subb            0,pu,pu                 ;   is negative
                    275: ;
                    276: ;      finish
                    277: ;
                    278: fini   stws            pu,0(arg2)              ; save high part of result
                    279:        stws            pl,4(arg2)              ; save low part of result
                    280:
                    281:        ldws,mb         -4(sp),pm               ; restore registers
                    282:        ldws,mb         -4(sp),pc               ; restore registers
                    283:        ldws,mb         -4(sp),sign             ; restore registers
                    284:        ldws,mb         -4(sp),brindex          ; restore registers
                    285:        ldws,mb         -4(sp),cnt              ; restore registers
                    286:        ldws,mb         -4(sp),op1              ; restore registers
                    287:        ldws,mb         -4(sp),pl               ; restore registers
                    288:        bv              0(rp)                   ; return
                    289:        ldws,mb         -4(sp),pu               ; restore registers
                    290: EXIT(s_xmpy)
                    291:
                    292:        .end
CVSweb