Annotation of sys/lib/libkern/arch/sparc/divrem.m4, Revision 1.1
1.1 ! nbrk 1: /* $OpenBSD: divrem.m4,v 1.6 2003/06/02 23:28:09 millert Exp $ */
! 2: /* $NetBSD: divrem.m4,v 1.3 1995/04/22 09:37:39 pk Exp $ */
! 3:
! 4: /*
! 5: * Copyright (c) 1992, 1993
! 6: * The Regents of the University of California. All rights reserved.
! 7: *
! 8: * This software was developed by the Computer Systems Engineering group
! 9: * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
! 10: * contributed to Berkeley.
! 11: *
! 12: * Redistribution and use in source and binary forms, with or without
! 13: * modification, are permitted provided that the following conditions
! 14: * are met:
! 15: * 1. Redistributions of source code must retain the above copyright
! 16: * notice, this list of conditions and the following disclaimer.
! 17: * 2. Redistributions in binary form must reproduce the above copyright
! 18: * notice, this list of conditions and the following disclaimer in the
! 19: * documentation and/or other materials provided with the distribution.
! 20: * 3. Neither the name of the University nor the names of its contributors
! 21: * may be used to endorse or promote products derived from this software
! 22: * without specific prior written permission.
! 23: *
! 24: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
! 25: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
! 26: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
! 27: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
! 28: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
! 29: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
! 30: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
! 31: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
! 32: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
! 33: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
! 34: * SUCH DAMAGE.
! 35: *
! 36: * Header: divrem.m4,v 1.4 92/06/25 13:23:57 torek Exp
! 37: */
! 38:
! 39: /*
! 40: * Division and remainder, from Appendix E of the Sparc Version 8
! 41: * Architecture Manual, with fixes from Gordon Irlam.
! 42: */
! 43:
! 44: #if defined(LIBC_SCCS) && !defined(lint)
! 45: #ifdef notdef
! 46: .asciz "@(#)divrem.m4 8.1 (Berkeley) 6/4/93"
! 47: #endif
! 48: .asciz "$OpenBSD: divrem.m4,v 1.6 2003/06/02 23:28:09 millert Exp $"
! 49: #endif /* LIBC_SCCS and not lint */
! 50:
! 51: /*
! 52: * Input: dividend and divisor in %o0 and %o1 respectively.
! 53: *
! 54: * m4 parameters:
! 55: * NAME name of function to generate
! 56: * NAME2 secondary name of function to generate
! 57: * OP OP=div => %o0 / %o1; OP=rem => %o0 % %o1
! 58: * S S=true => signed; S=false => unsigned
! 59: *
! 60: * Algorithm parameters:
! 61: * N how many bits per iteration we try to get (4)
! 62: * WORDSIZE total number of bits (32)
! 63: *
! 64: * Derived constants:
! 65: * TWOSUPN 2^N, for label generation (m4 exponentiation currently broken)
! 66: * TOPBITS number of bits in the top `decade' of a number
! 67: *
! 68: * Important variables:
! 69: * Q the partial quotient under development (initially 0)
! 70: * R the remainder so far, initially the dividend
! 71: * ITER number of main division loop iterations required;
! 72: * equal to ceil(log2(quotient) / N). Note that this
! 73: * is the log base (2^N) of the quotient.
! 74: * V the current comparand, initially divisor*2^(ITER*N-1)
! 75: *
! 76: * Cost:
! 77: * Current estimate for non-large dividend is
! 78: * ceil(log2(quotient) / N) * (10 + 7N/2) + C
! 79: * A large dividend is one greater than 2^(31-TOPBITS) and takes a
! 80: * different path, as the upper bits of the quotient must be developed
! 81: * one bit at a time.
! 82: */
! 83:
! 84: define(N, `4')
! 85: define(TWOSUPN, `16')
! 86: define(WORDSIZE, `32')
! 87: define(TOPBITS, eval(WORDSIZE - N*((WORDSIZE-1)/N)))
! 88:
! 89: define(dividend, `%o0')
! 90: define(divisor, `%o1')
! 91: define(Q, `%o2')
! 92: define(R, `%o3')
! 93: define(ITER, `%o4')
! 94: define(V, `%o5')
! 95:
! 96: /* m4 reminder: ifelse(a,b,c,d) => if a is b, then c, else d */
! 97: define(T, `%g1')
! 98: define(SC, `%g7')
! 99: ifelse(S, `true', `define(SIGN, `%g6')')
! 100:
! 101: /*
! 102: * This is the recursive definition for developing quotient digits.
! 103: *
! 104: * Parameters:
! 105: * $1 the current depth, 1 <= $1 <= N
! 106: * $2 the current accumulation of quotient bits
! 107: * N max depth
! 108: *
! 109: * We add a new bit to $2 and either recurse or insert the bits in
! 110: * the quotient. R, Q, and V are inputs and outputs as defined above;
! 111: * the condition codes are expected to reflect the input R, and are
! 112: * modified to reflect the output R.
! 113: */
! 114: define(DEVELOP_QUOTIENT_BITS,
! 115: ` ! depth $1, accumulated bits $2
! 116: bl L.$1.eval(TWOSUPN+$2)
! 117: srl V,1,V
! 118: ! remainder is positive
! 119: subcc R,V,R
! 120: ifelse($1, N,
! 121: ` b 9f
! 122: add Q, ($2*2+1), Q
! 123: ', ` DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2+1)')')
! 124: L.$1.eval(TWOSUPN+$2):
! 125: ! remainder is negative
! 126: addcc R,V,R
! 127: ifelse($1, N,
! 128: ` b 9f
! 129: add Q, ($2*2-1), Q
! 130: ', ` DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2-1)')')
! 131: ifelse($1, 1, `9:')')
! 132:
! 133: #include "DEFS.h"
! 134: #include <machine/trap.h>
! 135: #include <machine/asm.h>
! 136:
! 137: .globl NAME2
! 138: NAME2:
! 139: FUNC(NAME)
! 140: ifelse(S, `true',
! 141: ` ! compute sign of result; if neither is negative, no problem
! 142: orcc divisor, dividend, %g0 ! either negative?
! 143: bge 2f ! no, go do the divide
! 144: ifelse(OP, `div',
! 145: `xor divisor, dividend, SIGN',
! 146: `mov dividend, SIGN') ! compute sign in any case
! 147: tst divisor
! 148: bge 1f
! 149: tst dividend
! 150: ! divisor is definitely negative; dividend might also be negative
! 151: bge 2f ! if dividend not negative...
! 152: neg divisor ! in any case, make divisor nonneg
! 153: 1: ! dividend is negative, divisor is nonnegative
! 154: neg dividend ! make dividend nonnegative
! 155: 2:
! 156: ')
! 157: ! Ready to divide. Compute size of quotient; scale comparand.
! 158: orcc divisor, %g0, V
! 159: bnz 1f
! 160: mov dividend, R
! 161:
! 162: ! Divide by zero trap. If it returns, return 0 (about as
! 163: ! wrong as possible, but that is what SunOS does...).
! 164: t ST_DIV0
! 165: retl
! 166: clr %o0
! 167:
! 168: 1:
! 169: cmp R, V ! if divisor exceeds dividend, done
! 170: blu Lgot_result ! (and algorithm fails otherwise)
! 171: clr Q
! 172: sethi %hi(1 << (WORDSIZE - TOPBITS - 1)), T
! 173: cmp R, T
! 174: blu Lnot_really_big
! 175: clr ITER
! 176:
! 177: ! `Here the dividend is >= 2^(31-N) or so. We must be careful here,
! 178: ! as our usual N-at-a-shot divide step will cause overflow and havoc.
! 179: ! The number of bits in the result here is N*ITER+SC, where SC <= N.
! 180: ! Compute ITER in an unorthodox manner: know we need to shift V into
! 181: ! the top decade: so do not even bother to compare to R.'
! 182: 1:
! 183: cmp V, T
! 184: bgeu 3f
! 185: mov 1, SC
! 186: sll V, N, V
! 187: b 1b
! 188: inc ITER
! 189:
! 190: ! Now compute SC.
! 191: 2: addcc V, V, V
! 192: bcc Lnot_too_big
! 193: inc SC
! 194:
! 195: ! We get here if the divisor overflowed while shifting.
! 196: ! This means that R has the high-order bit set.
! 197: ! Restore V and subtract from R.
! 198: sll T, TOPBITS, T ! high order bit
! 199: srl V, 1, V ! rest of V
! 200: add V, T, V
! 201: b Ldo_single_div
! 202: dec SC
! 203:
! 204: Lnot_too_big:
! 205: 3: cmp V, R
! 206: blu 2b
! 207: nop
! 208: be Ldo_single_div
! 209: nop
! 210: /* NB: these are commented out in the V8-Sparc manual as well */
! 211: /* (I do not understand this) */
! 212: ! V > R: went too far: back up 1 step
! 213: ! srl V, 1, V
! 214: ! dec SC
! 215: ! do single-bit divide steps
! 216: !
! 217: ! We have to be careful here. We know that R >= V, so we can do the
! 218: ! first divide step without thinking. BUT, the others are conditional,
! 219: ! and are only done if R >= 0. Because both R and V may have the high-
! 220: ! order bit set in the first step, just falling into the regular
! 221: ! division loop will mess up the first time around.
! 222: ! So we unroll slightly...
! 223: Ldo_single_div:
! 224: deccc SC
! 225: bl Lend_regular_divide
! 226: nop
! 227: sub R, V, R
! 228: mov 1, Q
! 229: b Lend_single_divloop
! 230: nop
! 231: Lsingle_divloop:
! 232: sll Q, 1, Q
! 233: bl 1f
! 234: srl V, 1, V
! 235: ! R >= 0
! 236: sub R, V, R
! 237: b 2f
! 238: inc Q
! 239: 1: ! R < 0
! 240: add R, V, R
! 241: dec Q
! 242: 2:
! 243: Lend_single_divloop:
! 244: deccc SC
! 245: bge Lsingle_divloop
! 246: tst R
! 247: b,a Lend_regular_divide
! 248:
! 249: Lnot_really_big:
! 250: 1:
! 251: sll V, N, V
! 252: cmp V, R
! 253: bleu 1b
! 254: inccc ITER
! 255: be Lgot_result
! 256: dec ITER
! 257:
! 258: tst R ! set up for initial iteration
! 259: Ldivloop:
! 260: sll Q, N, Q
! 261: DEVELOP_QUOTIENT_BITS(1, 0)
! 262: Lend_regular_divide:
! 263: deccc ITER
! 264: bge Ldivloop
! 265: tst R
! 266: bl,a Lgot_result
! 267: ! non-restoring fixup here (one instruction only!)
! 268: ifelse(OP, `div',
! 269: ` dec Q
! 270: ', ` add R, divisor, R
! 271: ')
! 272:
! 273: Lgot_result:
! 274: ifelse(S, `true',
! 275: ` ! check to see if answer should be < 0
! 276: tst SIGN
! 277: bl,a 1f
! 278: ifelse(OP, `div', `neg Q', `neg R')
! 279: 1:')
! 280: retl
! 281: ifelse(OP, `div', `mov Q, %o0', `mov R, %o0')
CVSweb