sys/arch/sparc64/fpu/fpu_mul.c - annotate

Return to fpu_mul.c CVS log
Up to [local] / sys / arch / sparc64 / fpu
Annotation of sys/arch/sparc64/fpu/fpu_mul.c, Revision 1.1

1.1     ! nbrk        1: /*     $OpenBSD: fpu_mul.c,v 1.2 2003/06/02 23:27:55 millert Exp $     */
        !             2: /*     $NetBSD: fpu_mul.c,v 1.2 1994/11/20 20:52:44 deraadt Exp $ */
        !             3:
        !             4: /*
        !             5:  * Copyright (c) 1992, 1993
        !             6:  *     The Regents of the University of California.  All rights reserved.
        !             7:  *
        !             8:  * This software was developed by the Computer Systems Engineering group
        !             9:  * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
        !            10:  * contributed to Berkeley.
        !            11:  *
        !            12:  * All advertising materials mentioning features or use of this software
        !            13:  * must display the following acknowledgement:
        !            14:  *     This product includes software developed by the University of
        !            15:  *     California, Lawrence Berkeley Laboratory.
        !            16:  *
        !            17:  * Redistribution and use in source and binary forms, with or without
        !            18:  * modification, are permitted provided that the following conditions
        !            19:  * are met:
        !            20:  * 1. Redistributions of source code must retain the above copyright
        !            21:  *    notice, this list of conditions and the following disclaimer.
        !            22:  * 2. Redistributions in binary form must reproduce the above copyright
        !            23:  *    notice, this list of conditions and the following disclaimer in the
        !            24:  *    documentation and/or other materials provided with the distribution.
        !            25:  * 3. Neither the name of the University nor the names of its contributors
        !            26:  *    may be used to endorse or promote products derived from this software
        !            27:  *    without specific prior written permission.
        !            28:  *
        !            29:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
        !            30:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
        !            31:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
        !            32:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
        !            33:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
        !            34:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
        !            35:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
        !            36:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
        !            37:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
        !            38:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
        !            39:  * SUCH DAMAGE.
        !            40:  *
        !            41:  *     @(#)fpu_mul.c   8.1 (Berkeley) 6/11/93
        !            42:  */
        !            43:
        !            44: /*
        !            45:  * Perform an FPU multiply (return x * y).
        !            46:  */
        !            47:
        !            48: #include <sys/types.h>
        !            49:
        !            50: #include <machine/reg.h>
        !            51:
        !            52: #include <sparc64/fpu/fpu_arith.h>
        !            53: #include <sparc64/fpu/fpu_emu.h>
        !            54:
        !            55: /*
        !            56:  * The multiplication algorithm for normal numbers is as follows:
        !            57:  *
        !            58:  * The fraction of the product is built in the usual stepwise fashion.
        !            59:  * Each step consists of shifting the accumulator right one bit
        !            60:  * (maintaining any guard bits) and, if the next bit in y is set,
        !            61:  * adding the multiplicand (x) to the accumulator.  Then, in any case,
        !            62:  * we advance one bit leftward in y.  Algorithmically:
        !            63:  *
        !            64:  *     A = 0;
        !            65:  *     for (bit = 0; bit < FP_NMANT; bit++) {
        !            66:  *             sticky |= A & 1, A >>= 1;
        !            67:  *             if (Y & (1 << bit))
        !            68:  *                     A += X;
        !            69:  *     }
        !            70:  *
        !            71:  * (X and Y here represent the mantissas of x and y respectively.)
        !            72:  * The resultant accumulator (A) is the product's mantissa.  It may
        !            73:  * be as large as 11.11111... in binary and hence may need to be
        !            74:  * shifted right, but at most one bit.
        !            75:  *
        !            76:  * Since we do not have efficient multiword arithmetic, we code the
        !            77:  * accumulator as four separate words, just like any other mantissa.
        !            78:  * We use local `register' variables in the hope that this is faster
        !            79:  * than memory.  We keep x->fp_mant in locals for the same reason.
        !            80:  *
        !            81:  * In the algorithm above, the bits in y are inspected one at a time.
        !            82:  * We will pick them up 32 at a time and then deal with those 32, one
        !            83:  * at a time.  Note, however, that we know several things about y:
        !            84:  *
        !            85:  *    - the guard and round bits at the bottom are sure to be zero;
        !            86:  *
        !            87:  *    - often many low bits are zero (y is often from a single or double
        !            88:  *     precision source);
        !            89:  *
        !            90:  *    - bit FP_NMANT-1 is set, and FP_1*2 fits in a word.
        !            91:  *
        !            92:  * We can also test for 32-zero-bits swiftly.  In this case, the center
        !            93:  * part of the loop---setting sticky, shifting A, and not adding---will
        !            94:  * run 32 times without adding X to A.  We can do a 32-bit shift faster
        !            95:  * by simply moving words.  Since zeros are common, we optimize this case.
        !            96:  * Furthermore, since A is initially zero, we can omit the shift as well
        !            97:  * until we reach a nonzero word.
        !            98:  */
        !            99: struct fpn *
        !           100: fpu_mul(fe)
        !           101:        register struct fpemu *fe;
        !           102: {
        !           103:        register struct fpn *x = &fe->fe_f1, *y = &fe->fe_f2;
        !           104:        register u_int a3, a2, a1, a0, x3, x2, x1, x0, bit, m;
        !           105:        register int sticky;
        !           106:        FPU_DECL_CARRY
        !           107:
        !           108:        /*
        !           109:         * Put the `heavier' operand on the right (see fpu_emu.h).
        !           110:         * Then we will have one of the following cases, taken in the
        !           111:         * following order:
        !           112:         *
        !           113:         *  - y = NaN.  Implied: if only one is a signalling NaN, y is.
        !           114:         *      The result is y.
        !           115:         *  - y = Inf.  Implied: x != NaN (is 0, number, or Inf: the NaN
        !           116:         *    case was taken care of earlier).
        !           117:         *      If x = 0, the result is NaN.  Otherwise the result
        !           118:         *      is y, with its sign reversed if x is negative.
        !           119:         *  - x = 0.  Implied: y is 0 or number.
        !           120:         *      The result is 0 (with XORed sign as usual).
        !           121:         *  - other.  Implied: both x and y are numbers.
        !           122:         *      The result is x * y (XOR sign, multiply bits, add exponents).
        !           123:         */
        !           124:        ORDER(x, y);
        !           125:        if (ISNAN(y)) {
        !           126:                y->fp_sign ^= x->fp_sign;
        !           127:                return (y);
        !           128:        }
        !           129:        if (ISINF(y)) {
        !           130:                if (ISZERO(x))
        !           131:                        return (fpu_newnan(fe));
        !           132:                y->fp_sign ^= x->fp_sign;
        !           133:                return (y);
        !           134:        }
        !           135:        if (ISZERO(x)) {
        !           136:                x->fp_sign ^= y->fp_sign;
        !           137:                return (x);
        !           138:        }
        !           139:
        !           140:        /*
        !           141:         * Setup.  In the code below, the mask `m' will hold the current
        !           142:         * mantissa byte from y.  The variable `bit' denotes the bit
        !           143:         * within m.  We also define some macros to deal with everything.
        !           144:         */
        !           145:        x3 = x->fp_mant[3];
        !           146:        x2 = x->fp_mant[2];
        !           147:        x1 = x->fp_mant[1];
        !           148:        x0 = x->fp_mant[0];
        !           149:        sticky = a3 = a2 = a1 = a0 = 0;
        !           150:
        !           151: #define        ADD     /* A += X */ \
        !           152:        FPU_ADDS(a3, a3, x3); \
        !           153:        FPU_ADDCS(a2, a2, x2); \
        !           154:        FPU_ADDCS(a1, a1, x1); \
        !           155:        FPU_ADDC(a0, a0, x0)
        !           156:
        !           157: #define        SHR1    /* A >>= 1, with sticky */ \
        !           158:        sticky |= a3 & 1, a3 = (a3 >> 1) | (a2 << 31), \
        !           159:        a2 = (a2 >> 1) | (a1 << 31), a1 = (a1 >> 1) | (a0 << 31), a0 >>= 1
        !           160:
        !           161: #define        SHR32   /* A >>= 32, with sticky */ \
        !           162:        sticky |= a3, a3 = a2, a2 = a1, a1 = a0, a0 = 0
        !           163:
        !           164: #define        STEP    /* each 1-bit step of the multiplication */ \
        !           165:        SHR1; if (bit & m) { ADD; }; bit <<= 1
        !           166:
        !           167:        /*
        !           168:         * We are ready to begin.  The multiply loop runs once for each
        !           169:         * of the four 32-bit words.  Some words, however, are special.
        !           170:         * As noted above, the low order bits of Y are often zero.  Even
        !           171:         * if not, the first loop can certainly skip the guard bits.
        !           172:         * The last word of y has its highest 1-bit in position FP_NMANT-1,
        !           173:         * so we stop the loop when we move past that bit.
        !           174:         */
        !           175:        if ((m = y->fp_mant[3]) == 0) {
        !           176:                /* SHR32; */                    /* unneeded since A==0 */
        !           177:        } else {
        !           178:                bit = 1 << FP_NG;
        !           179:                do {
        !           180:                        STEP;
        !           181:                } while (bit != 0);
        !           182:        }
        !           183:        if ((m = y->fp_mant[2]) == 0) {
        !           184:                SHR32;
        !           185:        } else {
        !           186:                bit = 1;
        !           187:                do {
        !           188:                        STEP;
        !           189:                } while (bit != 0);
        !           190:        }
        !           191:        if ((m = y->fp_mant[1]) == 0) {
        !           192:                SHR32;
        !           193:        } else {
        !           194:                bit = 1;
        !           195:                do {
        !           196:                        STEP;
        !           197:                } while (bit != 0);
        !           198:        }
        !           199:        m = y->fp_mant[0];              /* definitely != 0 */
        !           200:        bit = 1;
        !           201:        do {
        !           202:                STEP;
        !           203:        } while (bit <= m);
        !           204:
        !           205:        /*
        !           206:         * Done with mantissa calculation.  Get exponent and handle
        !           207:         * 11.111...1 case, then put result in place.  We reuse x since
        !           208:         * it already has the right class (FP_NUM).
        !           209:         */
        !           210:        m = x->fp_exp + y->fp_exp;
        !           211:        if (a0 >= FP_2) {
        !           212:                SHR1;
        !           213:                m++;
        !           214:        }
        !           215:        x->fp_sign ^= y->fp_sign;
        !           216:        x->fp_exp = m;
        !           217:        x->fp_sticky = sticky;
        !           218:        x->fp_mant[3] = a3;
        !           219:        x->fp_mant[2] = a2;
        !           220:        x->fp_mant[1] = a1;
        !           221:        x->fp_mant[0] = a0;
        !           222:        return (x);
        !           223: }
CVSweb