sys/lib/libkern/arch/sparc/divrem.m4 - annotate

Return to divrem.m4 CVS log
Up to [local] / sys / lib / libkern / arch / sparc
Annotation of sys/lib/libkern/arch/sparc/divrem.m4, Revision 1.1

1.1     ! nbrk        1: /*     $OpenBSD: divrem.m4,v 1.6 2003/06/02 23:28:09 millert Exp $     */
        !             2: /*     $NetBSD: divrem.m4,v 1.3 1995/04/22 09:37:39 pk Exp $   */
        !             3:
        !             4: /*
        !             5:  * Copyright (c) 1992, 1993
        !             6:  *     The Regents of the University of California.  All rights reserved.
        !             7:  *
        !             8:  * This software was developed by the Computer Systems Engineering group
        !             9:  * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
        !            10:  * contributed to Berkeley.
        !            11:  *
        !            12:  * Redistribution and use in source and binary forms, with or without
        !            13:  * modification, are permitted provided that the following conditions
        !            14:  * are met:
        !            15:  * 1. Redistributions of source code must retain the above copyright
        !            16:  *    notice, this list of conditions and the following disclaimer.
        !            17:  * 2. Redistributions in binary form must reproduce the above copyright
        !            18:  *    notice, this list of conditions and the following disclaimer in the
        !            19:  *    documentation and/or other materials provided with the distribution.
        !            20:  * 3. Neither the name of the University nor the names of its contributors
        !            21:  *    may be used to endorse or promote products derived from this software
        !            22:  *    without specific prior written permission.
        !            23:  *
        !            24:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
        !            25:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
        !            26:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
        !            27:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
        !            28:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
        !            29:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
        !            30:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
        !            31:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
        !            32:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
        !            33:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
        !            34:  * SUCH DAMAGE.
        !            35:  *
        !            36:  * Header: divrem.m4,v 1.4 92/06/25 13:23:57 torek Exp
        !            37:  */
        !            38:
        !            39: /*
        !            40:  * Division and remainder, from Appendix E of the Sparc Version 8
        !            41:  * Architecture Manual, with fixes from Gordon Irlam.
        !            42:  */
        !            43:
        !            44: #if defined(LIBC_SCCS) && !defined(lint)
        !            45: #ifdef notdef
        !            46:        .asciz "@(#)divrem.m4   8.1 (Berkeley) 6/4/93"
        !            47: #endif
        !            48:        .asciz "$OpenBSD: divrem.m4,v 1.6 2003/06/02 23:28:09 millert Exp $"
        !            49: #endif /* LIBC_SCCS and not lint */
        !            50:
        !            51: /*
        !            52:  * Input: dividend and divisor in %o0 and %o1 respectively.
        !            53:  *
        !            54:  * m4 parameters:
        !            55:  *  NAME       name of function to generate
        !            56:  *  NAME2      secondary name of function to generate
        !            57:  *  OP         OP=div => %o0 / %o1; OP=rem => %o0 % %o1
        !            58:  *  S          S=true => signed; S=false => unsigned
        !            59:  *
        !            60:  * Algorithm parameters:
        !            61:  *  N          how many bits per iteration we try to get (4)
        !            62:  *  WORDSIZE   total number of bits (32)
        !            63:  *
        !            64:  * Derived constants:
        !            65:  *  TWOSUPN    2^N, for label generation (m4 exponentiation currently broken)
        !            66:  *  TOPBITS    number of bits in the top `decade' of a number
        !            67:  *
        !            68:  * Important variables:
        !            69:  *  Q          the partial quotient under development (initially 0)
        !            70:  *  R          the remainder so far, initially the dividend
        !            71:  *  ITER       number of main division loop iterations required;
        !            72:  *             equal to ceil(log2(quotient) / N).  Note that this
        !            73:  *             is the log base (2^N) of the quotient.
        !            74:  *  V          the current comparand, initially divisor*2^(ITER*N-1)
        !            75:  *
        !            76:  * Cost:
        !            77:  *  Current estimate for non-large dividend is
        !            78:  *     ceil(log2(quotient) / N) * (10 + 7N/2) + C
        !            79:  *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
        !            80:  *  different path, as the upper bits of the quotient must be developed
        !            81:  *  one bit at a time.
        !            82:  */
        !            83:
        !            84: define(N, `4')
        !            85: define(TWOSUPN, `16')
        !            86: define(WORDSIZE, `32')
        !            87: define(TOPBITS, eval(WORDSIZE - N*((WORDSIZE-1)/N)))
        !            88:
        !            89: define(dividend, `%o0')
        !            90: define(divisor, `%o1')
        !            91: define(Q, `%o2')
        !            92: define(R, `%o3')
        !            93: define(ITER, `%o4')
        !            94: define(V, `%o5')
        !            95:
        !            96: /* m4 reminder: ifelse(a,b,c,d) => if a is b, then c, else d */
        !            97: define(T, `%g1')
        !            98: define(SC, `%g7')
        !            99: ifelse(S, `true', `define(SIGN, `%g6')')
        !           100:
        !           101: /*
        !           102:  * This is the recursive definition for developing quotient digits.
        !           103:  *
        !           104:  * Parameters:
        !           105:  *  $1 the current depth, 1 <= $1 <= N
        !           106:  *  $2 the current accumulation of quotient bits
        !           107:  *  N  max depth
        !           108:  *
        !           109:  * We add a new bit to $2 and either recurse or insert the bits in
        !           110:  * the quotient.  R, Q, and V are inputs and outputs as defined above;
        !           111:  * the condition codes are expected to reflect the input R, and are
        !           112:  * modified to reflect the output R.
        !           113:  */
        !           114: define(DEVELOP_QUOTIENT_BITS,
        !           115: `      ! depth $1, accumulated bits $2
        !           116:        bl      L.$1.eval(TWOSUPN+$2)
        !           117:        srl     V,1,V
        !           118:        ! remainder is positive
        !           119:        subcc   R,V,R
        !           120:        ifelse($1, N,
        !           121:        `       b       9f
        !           122:                add     Q, ($2*2+1), Q
        !           123:        ', `    DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2+1)')')
        !           124: L.$1.eval(TWOSUPN+$2):
        !           125:        ! remainder is negative
        !           126:        addcc   R,V,R
        !           127:        ifelse($1, N,
        !           128:        `       b       9f
        !           129:                add     Q, ($2*2-1), Q
        !           130:        ', `    DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2-1)')')
        !           131:        ifelse($1, 1, `9:')')
        !           132:
        !           133: #include "DEFS.h"
        !           134: #include <machine/trap.h>
        !           135: #include <machine/asm.h>
        !           136:
        !           137:        .globl NAME2
        !           138: NAME2:
        !           139: FUNC(NAME)
        !           140: ifelse(S, `true',
        !           141: `      ! compute sign of result; if neither is negative, no problem
        !           142:        orcc    divisor, dividend, %g0  ! either negative?
        !           143:        bge     2f                      ! no, go do the divide
        !           144:        ifelse(OP, `div',
        !           145:                `xor    divisor, dividend, SIGN',
        !           146:                `mov    dividend, SIGN')        ! compute sign in any case
        !           147:        tst     divisor
        !           148:        bge     1f
        !           149:        tst     dividend
        !           150:        ! divisor is definitely negative; dividend might also be negative
        !           151:        bge     2f                      ! if dividend not negative...
        !           152:        neg     divisor                 ! in any case, make divisor nonneg
        !           153: 1:     ! dividend is negative, divisor is nonnegative
        !           154:        neg     dividend                ! make dividend nonnegative
        !           155: 2:
        !           156: ')
        !           157:        ! Ready to divide.  Compute size of quotient; scale comparand.
        !           158:        orcc    divisor, %g0, V
        !           159:        bnz     1f
        !           160:        mov     dividend, R
        !           161:
        !           162:                ! Divide by zero trap.  If it returns, return 0 (about as
        !           163:                ! wrong as possible, but that is what SunOS does...).
        !           164:                t       ST_DIV0
        !           165:                retl
        !           166:                clr     %o0
        !           167:
        !           168: 1:
        !           169:        cmp     R, V                    ! if divisor exceeds dividend, done
        !           170:        blu     Lgot_result             ! (and algorithm fails otherwise)
        !           171:        clr     Q
        !           172:        sethi   %hi(1 << (WORDSIZE - TOPBITS - 1)), T
        !           173:        cmp     R, T
        !           174:        blu     Lnot_really_big
        !           175:        clr     ITER
        !           176:
        !           177:        ! `Here the dividend is >= 2^(31-N) or so.  We must be careful here,
        !           178:        ! as our usual N-at-a-shot divide step will cause overflow and havoc.
        !           179:        ! The number of bits in the result here is N*ITER+SC, where SC <= N.
        !           180:        ! Compute ITER in an unorthodox manner: know we need to shift V into
        !           181:        ! the top decade: so do not even bother to compare to R.'
        !           182:        1:
        !           183:                cmp     V, T
        !           184:                bgeu    3f
        !           185:                mov     1, SC
        !           186:                sll     V, N, V
        !           187:                b       1b
        !           188:                inc     ITER
        !           189:
        !           190:        ! Now compute SC.
        !           191:        2:      addcc   V, V, V
        !           192:                bcc     Lnot_too_big
        !           193:                inc     SC
        !           194:
        !           195:                ! We get here if the divisor overflowed while shifting.
        !           196:                ! This means that R has the high-order bit set.
        !           197:                ! Restore V and subtract from R.
        !           198:                sll     T, TOPBITS, T   ! high order bit
        !           199:                srl     V, 1, V         ! rest of V
        !           200:                add     V, T, V
        !           201:                b       Ldo_single_div
        !           202:                dec     SC
        !           203:
        !           204:        Lnot_too_big:
        !           205:        3:      cmp     V, R
        !           206:                blu     2b
        !           207:                nop
        !           208:                be      Ldo_single_div
        !           209:                nop
        !           210:        /* NB: these are commented out in the V8-Sparc manual as well */
        !           211:        /* (I do not understand this) */
        !           212:        ! V > R: went too far: back up 1 step
        !           213:        !       srl     V, 1, V
        !           214:        !       dec     SC
        !           215:        ! do single-bit divide steps
        !           216:        !
        !           217:        ! We have to be careful here.  We know that R >= V, so we can do the
        !           218:        ! first divide step without thinking.  BUT, the others are conditional,
        !           219:        ! and are only done if R >= 0.  Because both R and V may have the high-
        !           220:        ! order bit set in the first step, just falling into the regular
        !           221:        ! division loop will mess up the first time around.
        !           222:        ! So we unroll slightly...
        !           223:        Ldo_single_div:
        !           224:                deccc   SC
        !           225:                bl      Lend_regular_divide
        !           226:                nop
        !           227:                sub     R, V, R
        !           228:                mov     1, Q
        !           229:                b       Lend_single_divloop
        !           230:                nop
        !           231:        Lsingle_divloop:
        !           232:                sll     Q, 1, Q
        !           233:                bl      1f
        !           234:                srl     V, 1, V
        !           235:                ! R >= 0
        !           236:                sub     R, V, R
        !           237:                b       2f
        !           238:                inc     Q
        !           239:        1:      ! R < 0
        !           240:                add     R, V, R
        !           241:                dec     Q
        !           242:        2:
        !           243:        Lend_single_divloop:
        !           244:                deccc   SC
        !           245:                bge     Lsingle_divloop
        !           246:                tst     R
        !           247:                b,a     Lend_regular_divide
        !           248:
        !           249: Lnot_really_big:
        !           250: 1:
        !           251:        sll     V, N, V
        !           252:        cmp     V, R
        !           253:        bleu    1b
        !           254:        inccc   ITER
        !           255:        be      Lgot_result
        !           256:        dec     ITER
        !           257:
        !           258:        tst     R       ! set up for initial iteration
        !           259: Ldivloop:
        !           260:        sll     Q, N, Q
        !           261:        DEVELOP_QUOTIENT_BITS(1, 0)
        !           262: Lend_regular_divide:
        !           263:        deccc   ITER
        !           264:        bge     Ldivloop
        !           265:        tst     R
        !           266:        bl,a    Lgot_result
        !           267:        ! non-restoring fixup here (one instruction only!)
        !           268: ifelse(OP, `div',
        !           269: `      dec     Q
        !           270: ', `   add     R, divisor, R
        !           271: ')
        !           272:
        !           273: Lgot_result:
        !           274: ifelse(S, `true',
        !           275: `      ! check to see if answer should be < 0
        !           276:        tst     SIGN
        !           277:        bl,a    1f
        !           278:        ifelse(OP, `div', `neg Q', `neg R')
        !           279: 1:')
        !           280:        retl
        !           281:        ifelse(OP, `div', `mov Q, %o0', `mov R, %o0')
CVSweb