sys/lib/libkern/arch/sparc/umul.S - annotate

Return to umul.S CVS log
Up to [local] / sys / lib / libkern / arch / sparc
Annotation of sys/lib/libkern/arch/sparc/umul.S, Revision 1.1

1.1     ! nbrk        1: /*     $OpenBSD: umul.S,v 1.5 2003/06/02 23:28:09 millert Exp $        */
        !             2: /*     $NetBSD: umul.S,v 1.2 1994/10/26 06:40:10 cgd Exp $     */
        !             3:
        !             4: /*
        !             5:  * Copyright (c) 1992, 1993
        !             6:  *     The Regents of the University of California.  All rights reserved.
        !             7:  *
        !             8:  * This software was developed by the Computer Systems Engineering group
        !             9:  * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
        !            10:  * contributed to Berkeley.
        !            11:  *
        !            12:  * Redistribution and use in source and binary forms, with or without
        !            13:  * modification, are permitted provided that the following conditions
        !            14:  * are met:
        !            15:  * 1. Redistributions of source code must retain the above copyright
        !            16:  *    notice, this list of conditions and the following disclaimer.
        !            17:  * 2. Redistributions in binary form must reproduce the above copyright
        !            18:  *    notice, this list of conditions and the following disclaimer in the
        !            19:  *    documentation and/or other materials provided with the distribution.
        !            20:  * 3. Neither the name of the University nor the names of its contributors
        !            21:  *    may be used to endorse or promote products derived from this software
        !            22:  *    without specific prior written permission.
        !            23:  *
        !            24:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
        !            25:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
        !            26:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
        !            27:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
        !            28:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
        !            29:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
        !            30:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
        !            31:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
        !            32:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
        !            33:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
        !            34:  * SUCH DAMAGE.
        !            35:  *
        !            36:  * Header: umul.s,v 1.4 92/06/25 13:24:05 torek Exp
        !            37:  */
        !            38:
        !            39: #if defined(LIBC_SCCS) && !defined(lint)
        !            40: #ifdef notdef
        !            41:        .asciz "@(#)umul.s      8.1 (Berkeley) 6/4/93"
        !            42: #endif
        !            43:        .asciz "$OpenBSD: umul.S,v 1.5 2003/06/02 23:28:09 millert Exp $"
        !            44: #endif /* LIBC_SCCS and not lint */
        !            45:
        !            46: /*
        !            47:  * Unsigned multiply.  Returns %o0 * %o1 in %o1%o0 (i.e., %o1 holds the
        !            48:  * upper 32 bits of the 64-bit product).
        !            49:  *
        !            50:  * This code optimizes short (less than 13-bit) multiplies.  Short
        !            51:  * multiplies require 25 instruction cycles, and long ones require
        !            52:  * 45 instruction cycles.
        !            53:  *
        !            54:  * On return, overflow has occurred (%o1 is not zero) if and only if
        !            55:  * the Z condition code is clear, allowing, e.g., the following:
        !            56:  *
        !            57:  *     call    .umul
        !            58:  *     nop
        !            59:  *     bnz     overflow        (or tnz)
        !            60:  */
        !            61:
        !            62: #include "DEFS.h"
        !            63: FUNC(.umul)
        !            64:        or      %o0, %o1, %o4
        !            65:        mov     %o0, %y         ! multiplier -> Y
        !            66:        andncc  %o4, 0xfff, %g0 ! test bits 12..31 of *both* args
        !            67:        be      Lmul_shortway   ! if zero, can do it the short way
        !            68:        andcc   %g0, %g0, %o4   ! zero the partial product and clear N and V
        !            69:
        !            70:        /*
        !            71:         * Long multiply.  32 steps, followed by a final shift step.
        !            72:         */
        !            73:        mulscc  %o4, %o1, %o4   ! 1
        !            74:        mulscc  %o4, %o1, %o4   ! 2
        !            75:        mulscc  %o4, %o1, %o4   ! 3
        !            76:        mulscc  %o4, %o1, %o4   ! 4
        !            77:        mulscc  %o4, %o1, %o4   ! 5
        !            78:        mulscc  %o4, %o1, %o4   ! 6
        !            79:        mulscc  %o4, %o1, %o4   ! 7
        !            80:        mulscc  %o4, %o1, %o4   ! 8
        !            81:        mulscc  %o4, %o1, %o4   ! 9
        !            82:        mulscc  %o4, %o1, %o4   ! 10
        !            83:        mulscc  %o4, %o1, %o4   ! 11
        !            84:        mulscc  %o4, %o1, %o4   ! 12
        !            85:        mulscc  %o4, %o1, %o4   ! 13
        !            86:        mulscc  %o4, %o1, %o4   ! 14
        !            87:        mulscc  %o4, %o1, %o4   ! 15
        !            88:        mulscc  %o4, %o1, %o4   ! 16
        !            89:        mulscc  %o4, %o1, %o4   ! 17
        !            90:        mulscc  %o4, %o1, %o4   ! 18
        !            91:        mulscc  %o4, %o1, %o4   ! 19
        !            92:        mulscc  %o4, %o1, %o4   ! 20
        !            93:        mulscc  %o4, %o1, %o4   ! 21
        !            94:        mulscc  %o4, %o1, %o4   ! 22
        !            95:        mulscc  %o4, %o1, %o4   ! 23
        !            96:        mulscc  %o4, %o1, %o4   ! 24
        !            97:        mulscc  %o4, %o1, %o4   ! 25
        !            98:        mulscc  %o4, %o1, %o4   ! 26
        !            99:        mulscc  %o4, %o1, %o4   ! 27
        !           100:        mulscc  %o4, %o1, %o4   ! 28
        !           101:        mulscc  %o4, %o1, %o4   ! 29
        !           102:        mulscc  %o4, %o1, %o4   ! 30
        !           103:        mulscc  %o4, %o1, %o4   ! 31
        !           104:        mulscc  %o4, %o1, %o4   ! 32
        !           105:        mulscc  %o4, %g0, %o4   ! final shift
        !           106:
        !           107:
        !           108:        /*
        !           109:         * Normally, with the shift-and-add approach, if both numbers are
        !           110:         * positive you get the correct result.  WIth 32-bit two's-complement
        !           111:         * numbers, -x is represented as
        !           112:         *
        !           113:         *                x                 32
        !           114:         *      ( 2  -  ------ ) mod 2  *  2
        !           115:         *                 32
        !           116:         *                2
        !           117:         *
        !           118:         * (the `mod 2' subtracts 1 from 1.bbbb).  To avoid lots of 2^32s,
        !           119:         * we can treat this as if the radix point were just to the left
        !           120:         * of the sign bit (multiply by 2^32), and get
        !           121:         *
        !           122:         *      -x  =  (2 - x) mod 2
        !           123:         *
        !           124:         * Then, ignoring the `mod 2's for convenience:
        !           125:         *
        !           126:         *   x *  y     = xy
        !           127:         *  -x *  y     = 2y - xy
        !           128:         *   x * -y     = 2x - xy
        !           129:         *  -x * -y     = 4 - 2x - 2y + xy
        !           130:         *
        !           131:         * For signed multiplies, we subtract (x << 32) from the partial
        !           132:         * product to fix this problem for negative multipliers (see mul.s).
        !           133:         * Because of the way the shift into the partial product is calculated
        !           134:         * (N xor V), this term is automatically removed for the multiplicand,
        !           135:         * so we don't have to adjust.
        !           136:         *
        !           137:         * But for unsigned multiplies, the high order bit wasn't a sign bit,
        !           138:         * and the correction is wrong.  So for unsigned multiplies where the
        !           139:         * high order bit is one, we end up with xy - (y << 32).  To fix it
        !           140:         * we add y << 32.
        !           141:         */
        !           142:        tst     %o1
        !           143:        bl,a    1f              ! if %o1 < 0 (high order bit = 1),
        !           144:        add     %o4, %o0, %o4   ! %o4 += %o0 (add y to upper half)
        !           145: 1:     rd      %y, %o0         ! get lower half of product
        !           146:        retl
        !           147:        addcc   %o4, %g0, %o1   ! put upper half in place and set Z for %o1==0
        !           148:
        !           149: Lmul_shortway:
        !           150:        /*
        !           151:         * Short multiply.  12 steps, followed by a final shift step.
        !           152:         * The resulting bits are off by 12 and (32-12) = 20 bit positions,
        !           153:         * but there is no problem with %o0 being negative (unlike above),
        !           154:         * and overflow is impossible (the answer is at most 24 bits long).
        !           155:         */
        !           156:        mulscc  %o4, %o1, %o4   ! 1
        !           157:        mulscc  %o4, %o1, %o4   ! 2
        !           158:        mulscc  %o4, %o1, %o4   ! 3
        !           159:        mulscc  %o4, %o1, %o4   ! 4
        !           160:        mulscc  %o4, %o1, %o4   ! 5
        !           161:        mulscc  %o4, %o1, %o4   ! 6
        !           162:        mulscc  %o4, %o1, %o4   ! 7
        !           163:        mulscc  %o4, %o1, %o4   ! 8
        !           164:        mulscc  %o4, %o1, %o4   ! 9
        !           165:        mulscc  %o4, %o1, %o4   ! 10
        !           166:        mulscc  %o4, %o1, %o4   ! 11
        !           167:        mulscc  %o4, %o1, %o4   ! 12
        !           168:        mulscc  %o4, %g0, %o4   ! final shift
        !           169:
        !           170:        /*
        !           171:         * %o4 has 20 of the bits that should be in the result; %y has
        !           172:         * the bottom 12 (as %y's top 12).  That is:
        !           173:         *
        !           174:         *        %o4               %y
        !           175:         * +----------------+----------------+
        !           176:         * | -12- |   -20-  | -12- |   -20-  |
        !           177:         * +------(---------+------)---------+
        !           178:         *         -----result-----
        !           179:         *
        !           180:         * The 12 bits of %o4 left of the `result' area are all zero;
        !           181:         * in fact, all top 20 bits of %o4 are zero.
        !           182:         */
        !           183:
        !           184:        rd      %y, %o5
        !           185:        sll     %o4, 12, %o0    ! shift middle bits left 12
        !           186:        srl     %o5, 20, %o5    ! shift low bits right 20
        !           187:        or      %o5, %o0, %o0
        !           188:        retl
        !           189:        addcc   %g0, %g0, %o1   ! %o1 = zero, and set Z
CVSweb