sys/lib/libkern/softfloat-macros.h - annotate

Return to softfloat-macros.h CVS log
Up to [local] / sys / lib / libkern
Annotation of sys/lib/libkern/softfloat-macros.h, Revision 1.1.1.1

1.1       nbrk        1: /*     $OpenBSD: softfloat-macros.h,v 1.1 2002/04/28 20:55:14 pvalchev Exp $   */
                      2: /*     $NetBSD: softfloat-macros.h,v 1.1 2001/04/26 03:10:47 ross Exp $        */
                      3:
                      4: /*
                      5: ===============================================================================
                      6:
                      7: This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
                      8: Arithmetic Package, Release 2a.
                      9:
                     10: Written by John R. Hauser.  This work was made possible in part by the
                     11: International Computer Science Institute, located at Suite 600, 1947 Center
                     12: Street, Berkeley, California 94704.  Funding was partially provided by the
                     13: National Science Foundation under grant MIP-9311980.  The original version
                     14: of this code was written as part of a project to build a fixed-point vector
                     15: processor in collaboration with the University of California at Berkeley,
                     16: overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
                     17: is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
                     18: arithmetic/SoftFloat.html'.
                     19:
                     20: THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable
                     21: effort has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT
                     22: WILL AT TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS
                     23: RESTRICTED TO PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL
                     24: RESPONSIBILITY FOR ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM
                     25: THEIR OWN USE OF THE SOFTWARE, AND WHO ALSO EFFECTIVELY INDEMNIFY
                     26: (possibly via similar legal warning) JOHN HAUSER AND THE INTERNATIONAL
                     27: COMPUTER SCIENCE INSTITUTE AGAINST ALL LOSSES, COSTS, OR OTHER PROBLEMS
                     28: ARISING FROM THE USE OF THE SOFTWARE BY THEIR CUSTOMERS AND CLIENTS.
                     29:
                     30: Derivative works are acceptable, even for commercial purposes, so long as
                     31: (1) they include prominent notice that the work is derivative, and (2) they
                     32: include prominent notice akin to these four paragraphs for those parts of
                     33: this code that are retained.
                     34:
                     35: ===============================================================================
                     36: */
                     37:
                     38: #ifndef NO_IEEE
                     39:
                     40: /*
                     41: -------------------------------------------------------------------------------
                     42: Shifts `a' right by the number of bits given in `count'.  If any nonzero
                     43: bits are shifted off, they are ``jammed'' into the least significant bit of
                     44: the result by setting the least significant bit to 1.  The value of `count'
                     45: can be arbitrarily large; in particular, if `count' is greater than 32, the
                     46: result will be either 0 or 1, depending on whether `a' is zero or nonzero.
                     47: The result is stored in the location pointed to by `zPtr'.
                     48: -------------------------------------------------------------------------------
                     49: */
                     50: INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
                     51: {
                     52:     bits32 z;
                     53:
                     54:     if ( count == 0 ) {
                     55:         z = a;
                     56:     }
                     57:     else if ( count < 32 ) {
                     58:         z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
                     59:     }
                     60:     else {
                     61:         z = ( a != 0 );
                     62:     }
                     63:     *zPtr = z;
                     64:
                     65: }
                     66:
                     67: /*
                     68: -------------------------------------------------------------------------------
                     69: Shifts `a' right by the number of bits given in `count'.  If any nonzero
                     70: bits are shifted off, they are ``jammed'' into the least significant bit of
                     71: the result by setting the least significant bit to 1.  The value of `count'
                     72: can be arbitrarily large; in particular, if `count' is greater than 64, the
                     73: result will be either 0 or 1, depending on whether `a' is zero or nonzero.
                     74: The result is stored in the location pointed to by `zPtr'.
                     75: -------------------------------------------------------------------------------
                     76: */
                     77: INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )
                     78: {
                     79:     bits64 z;
                     80:
                     81:     if ( count == 0 ) {
                     82:         z = a;
                     83:     }
                     84:     else if ( count < 64 ) {
                     85:         z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
                     86:     }
                     87:     else {
                     88:         z = ( a != 0 );
                     89:     }
                     90:     *zPtr = z;
                     91:
                     92: }
                     93:
                     94: /*
                     95: -------------------------------------------------------------------------------
                     96: Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
                     97: _plus_ the number of bits given in `count'.  The shifted result is at most
                     98: 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'.  The
                     99: bits shifted off form a second 64-bit result as follows:  The _last_ bit
                    100: shifted off is the most-significant bit of the extra result, and the other
                    101: 63 bits of the extra result are all zero if and only if _all_but_the_last_
                    102: bits shifted off were all zero.  This extra result is stored in the location
                    103: pointed to by `z1Ptr'.  The value of `count' can be arbitrarily large.
                    104:     (This routine makes more sense if `a0' and `a1' are considered to form a
                    105: fixed-point value with binary point between `a0' and `a1'.  This fixed-point
                    106: value is shifted right by the number of bits given in `count', and the
                    107: integer part of the result is returned at the location pointed to by
                    108: `z0Ptr'.  The fractional part of the result may be slightly corrupted as
                    109: described above, and is returned at the location pointed to by `z1Ptr'.)
                    110: -------------------------------------------------------------------------------
                    111: */
                    112: INLINE void
                    113:  shift64ExtraRightJamming(
                    114:      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
                    115: {
                    116:     bits64 z0, z1;
                    117:     int8 negCount = ( - count ) & 63;
                    118:
                    119:     if ( count == 0 ) {
                    120:         z1 = a1;
                    121:         z0 = a0;
                    122:     }
                    123:     else if ( count < 64 ) {
                    124:         z1 = ( a0<<negCount ) | ( a1 != 0 );
                    125:         z0 = a0>>count;
                    126:     }
                    127:     else {
                    128:         if ( count == 64 ) {
                    129:             z1 = a0 | ( a1 != 0 );
                    130:         }
                    131:         else {
                    132:             z1 = ( ( a0 | a1 ) != 0 );
                    133:         }
                    134:         z0 = 0;
                    135:     }
                    136:     *z1Ptr = z1;
                    137:     *z0Ptr = z0;
                    138:
                    139: }
                    140:
                    141: /*
                    142: -------------------------------------------------------------------------------
                    143: Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
                    144: number of bits given in `count'.  Any bits shifted off are lost.  The value
                    145: of `count' can be arbitrarily large; in particular, if `count' is greater
                    146: than 128, the result will be 0.  The result is broken into two 64-bit pieces
                    147: which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
                    148: -------------------------------------------------------------------------------
                    149: */
                    150: INLINE void
                    151:  shift128Right(
                    152:      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
                    153: {
                    154:     bits64 z0, z1;
                    155:     int8 negCount = ( - count ) & 63;
                    156:
                    157:     if ( count == 0 ) {
                    158:         z1 = a1;
                    159:         z0 = a0;
                    160:     }
                    161:     else if ( count < 64 ) {
                    162:         z1 = ( a0<<negCount ) | ( a1>>count );
                    163:         z0 = a0>>count;
                    164:     }
                    165:     else {
                    166:         z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
                    167:         z0 = 0;
                    168:     }
                    169:     *z1Ptr = z1;
                    170:     *z0Ptr = z0;
                    171:
                    172: }
                    173:
                    174: /*
                    175: -------------------------------------------------------------------------------
                    176: Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
                    177: number of bits given in `count'.  If any nonzero bits are shifted off, they
                    178: are ``jammed'' into the least significant bit of the result by setting the
                    179: least significant bit to 1.  The value of `count' can be arbitrarily large;
                    180: in particular, if `count' is greater than 128, the result will be either
                    181: 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
                    182: nonzero.  The result is broken into two 64-bit pieces which are stored at
                    183: the locations pointed to by `z0Ptr' and `z1Ptr'.
                    184: -------------------------------------------------------------------------------
                    185: */
                    186: INLINE void
                    187:  shift128RightJamming(
                    188:      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
                    189: {
                    190:     bits64 z0, z1;
                    191:     int8 negCount = ( - count ) & 63;
                    192:
                    193:     if ( count == 0 ) {
                    194:         z1 = a1;
                    195:         z0 = a0;
                    196:     }
                    197:     else if ( count < 64 ) {
                    198:         z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
                    199:         z0 = a0>>count;
                    200:     }
                    201:     else {
                    202:         if ( count == 64 ) {
                    203:             z1 = a0 | ( a1 != 0 );
                    204:         }
                    205:         else if ( count < 128 ) {
                    206:             z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
                    207:         }
                    208:         else {
                    209:             z1 = ( ( a0 | a1 ) != 0 );
                    210:         }
                    211:         z0 = 0;
                    212:     }
                    213:     *z1Ptr = z1;
                    214:     *z0Ptr = z0;
                    215:
                    216: }
                    217:
                    218: /*
                    219: -------------------------------------------------------------------------------
                    220: Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
                    221: by 64 _plus_ the number of bits given in `count'.  The shifted result is
                    222: at most 128 nonzero bits; these are broken into two 64-bit pieces which are
                    223: stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
                    224: off form a third 64-bit result as follows:  The _last_ bit shifted off is
                    225: the most-significant bit of the extra result, and the other 63 bits of the
                    226: extra result are all zero if and only if _all_but_the_last_ bits shifted off
                    227: were all zero.  This extra result is stored in the location pointed to by
                    228: `z2Ptr'.  The value of `count' can be arbitrarily large.
                    229:     (This routine makes more sense if `a0', `a1', and `a2' are considered
                    230: to form a fixed-point value with binary point between `a1' and `a2'.  This
                    231: fixed-point value is shifted right by the number of bits given in `count',
                    232: and the integer part of the result is returned at the locations pointed to
                    233: by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
                    234: corrupted as described above, and is returned at the location pointed to by
                    235: `z2Ptr'.)
                    236: -------------------------------------------------------------------------------
                    237: */
                    238: INLINE void
                    239:  shift128ExtraRightJamming(
                    240:      bits64 a0,
                    241:      bits64 a1,
                    242:      bits64 a2,
                    243:      int16 count,
                    244:      bits64 *z0Ptr,
                    245:      bits64 *z1Ptr,
                    246:      bits64 *z2Ptr
                    247:  )
                    248: {
                    249:     bits64 z0, z1, z2;
                    250:     int8 negCount = ( - count ) & 63;
                    251:
                    252:     if ( count == 0 ) {
                    253:         z2 = a2;
                    254:         z1 = a1;
                    255:         z0 = a0;
                    256:     }
                    257:     else {
                    258:         if ( count < 64 ) {
                    259:             z2 = a1<<negCount;
                    260:             z1 = ( a0<<negCount ) | ( a1>>count );
                    261:             z0 = a0>>count;
                    262:         }
                    263:         else {
                    264:             if ( count == 64 ) {
                    265:                 z2 = a1;
                    266:                 z1 = a0;
                    267:             }
                    268:             else {
                    269:                 a2 |= a1;
                    270:                 if ( count < 128 ) {
                    271:                     z2 = a0<<negCount;
                    272:                     z1 = a0>>( count & 63 );
                    273:                 }
                    274:                 else {
                    275:                     z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
                    276:                     z1 = 0;
                    277:                 }
                    278:             }
                    279:             z0 = 0;
                    280:         }
                    281:         z2 |= ( a2 != 0 );
                    282:     }
                    283:     *z2Ptr = z2;
                    284:     *z1Ptr = z1;
                    285:     *z0Ptr = z0;
                    286:
                    287: }
                    288:
                    289: /*
                    290: -------------------------------------------------------------------------------
                    291: Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
                    292: number of bits given in `count'.  Any bits shifted off are lost.  The value
                    293: of `count' must be less than 64.  The result is broken into two 64-bit
                    294: pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
                    295: -------------------------------------------------------------------------------
                    296: */
                    297: INLINE void
                    298:  shortShift128Left(
                    299:      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
                    300: {
                    301:
                    302:     *z1Ptr = a1<<count;
                    303:     *z0Ptr =
                    304:         ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
                    305:
                    306: }
                    307:
                    308: /*
                    309: -------------------------------------------------------------------------------
                    310: Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
                    311: by the number of bits given in `count'.  Any bits shifted off are lost.
                    312: The value of `count' must be less than 64.  The result is broken into three
                    313: 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
                    314: `z1Ptr', and `z2Ptr'.
                    315: -------------------------------------------------------------------------------
                    316: */
                    317: INLINE void
                    318:  shortShift192Left(
                    319:      bits64 a0,
                    320:      bits64 a1,
                    321:      bits64 a2,
                    322:      int16 count,
                    323:      bits64 *z0Ptr,
                    324:      bits64 *z1Ptr,
                    325:      bits64 *z2Ptr
                    326:  )
                    327: {
                    328:     bits64 z0, z1, z2;
                    329:     int8 negCount;
                    330:
                    331:     z2 = a2<<count;
                    332:     z1 = a1<<count;
                    333:     z0 = a0<<count;
                    334:     if ( 0 < count ) {
                    335:         negCount = ( ( - count ) & 63 );
                    336:         z1 |= a2>>negCount;
                    337:         z0 |= a1>>negCount;
                    338:     }
                    339:     *z2Ptr = z2;
                    340:     *z1Ptr = z1;
                    341:     *z0Ptr = z0;
                    342:
                    343: }
                    344:
                    345: /*
                    346: -------------------------------------------------------------------------------
                    347: Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
                    348: value formed by concatenating `b0' and `b1'.  Addition is modulo 2^128, so
                    349: any carry out is lost.  The result is broken into two 64-bit pieces which
                    350: are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
                    351: -------------------------------------------------------------------------------
                    352: */
                    353: INLINE void
                    354:  add128(
                    355:      bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
                    356: {
                    357:     bits64 z1;
                    358:
                    359:     z1 = a1 + b1;
                    360:     *z1Ptr = z1;
                    361:     *z0Ptr = a0 + b0 + ( z1 < a1 );
                    362:
                    363: }
                    364:
                    365: /*
                    366: -------------------------------------------------------------------------------
                    367: Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
                    368: 192-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
                    369: modulo 2^192, so any carry out is lost.  The result is broken into three
                    370: 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
                    371: `z1Ptr', and `z2Ptr'.
                    372: -------------------------------------------------------------------------------
                    373: */
                    374: INLINE void
                    375:  add192(
                    376:      bits64 a0,
                    377:      bits64 a1,
                    378:      bits64 a2,
                    379:      bits64 b0,
                    380:      bits64 b1,
                    381:      bits64 b2,
                    382:      bits64 *z0Ptr,
                    383:      bits64 *z1Ptr,
                    384:      bits64 *z2Ptr
                    385:  )
                    386: {
                    387:     bits64 z0, z1, z2;
                    388:     int8 carry0, carry1;
                    389:
                    390:     z2 = a2 + b2;
                    391:     carry1 = ( z2 < a2 );
                    392:     z1 = a1 + b1;
                    393:     carry0 = ( z1 < a1 );
                    394:     z0 = a0 + b0;
                    395:     z1 += carry1;
                    396:     z0 += ( z1 < carry1 );
                    397:     z0 += carry0;
                    398:     *z2Ptr = z2;
                    399:     *z1Ptr = z1;
                    400:     *z0Ptr = z0;
                    401:
                    402: }
                    403:
                    404: /*
                    405: -------------------------------------------------------------------------------
                    406: Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
                    407: 128-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
                    408: 2^128, so any borrow out (carry out) is lost.  The result is broken into two
                    409: 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
                    410: `z1Ptr'.
                    411: -------------------------------------------------------------------------------
                    412: */
                    413: INLINE void
                    414:  sub128(
                    415:      bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
                    416: {
                    417:
                    418:     *z1Ptr = a1 - b1;
                    419:     *z0Ptr = a0 - b0 - ( a1 < b1 );
                    420:
                    421: }
                    422:
                    423: /*
                    424: -------------------------------------------------------------------------------
                    425: Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
                    426: from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
                    427: Subtraction is modulo 2^192, so any borrow out (carry out) is lost.  The
                    428: result is broken into three 64-bit pieces which are stored at the locations
                    429: pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
                    430: -------------------------------------------------------------------------------
                    431: */
                    432: INLINE void
                    433:  sub192(
                    434:      bits64 a0,
                    435:      bits64 a1,
                    436:      bits64 a2,
                    437:      bits64 b0,
                    438:      bits64 b1,
                    439:      bits64 b2,
                    440:      bits64 *z0Ptr,
                    441:      bits64 *z1Ptr,
                    442:      bits64 *z2Ptr
                    443:  )
                    444: {
                    445:     bits64 z0, z1, z2;
                    446:     int8 borrow0, borrow1;
                    447:
                    448:     z2 = a2 - b2;
                    449:     borrow1 = ( a2 < b2 );
                    450:     z1 = a1 - b1;
                    451:     borrow0 = ( a1 < b1 );
                    452:     z0 = a0 - b0;
                    453:     z0 -= ( z1 < borrow1 );
                    454:     z1 -= borrow1;
                    455:     z0 -= borrow0;
                    456:     *z2Ptr = z2;
                    457:     *z1Ptr = z1;
                    458:     *z0Ptr = z0;
                    459:
                    460: }
                    461:
                    462: /*
                    463: -------------------------------------------------------------------------------
                    464: Multiplies `a' by `b' to obtain a 128-bit product.  The product is broken
                    465: into two 64-bit pieces which are stored at the locations pointed to by
                    466: `z0Ptr' and `z1Ptr'.
                    467: -------------------------------------------------------------------------------
                    468: */
                    469: INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )
                    470: {
                    471:     bits32 aHigh, aLow, bHigh, bLow;
                    472:     bits64 z0, zMiddleA, zMiddleB, z1;
                    473:
                    474:     aLow = a;
                    475:     aHigh = a>>32;
                    476:     bLow = b;
                    477:     bHigh = b>>32;
                    478:     z1 = ( (bits64) aLow ) * bLow;
                    479:     zMiddleA = ( (bits64) aLow ) * bHigh;
                    480:     zMiddleB = ( (bits64) aHigh ) * bLow;
                    481:     z0 = ( (bits64) aHigh ) * bHigh;
                    482:     zMiddleA += zMiddleB;
                    483:     z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
                    484:     zMiddleA <<= 32;
                    485:     z1 += zMiddleA;
                    486:     z0 += ( z1 < zMiddleA );
                    487:     *z1Ptr = z1;
                    488:     *z0Ptr = z0;
                    489:
                    490: }
                    491:
                    492: /*
                    493: -------------------------------------------------------------------------------
                    494: Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
                    495: `b' to obtain a 192-bit product.  The product is broken into three 64-bit
                    496: pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
                    497: `z2Ptr'.
                    498: -------------------------------------------------------------------------------
                    499: */
                    500: INLINE void
                    501:  mul128By64To192(
                    502:      bits64 a0,
                    503:      bits64 a1,
                    504:      bits64 b,
                    505:      bits64 *z0Ptr,
                    506:      bits64 *z1Ptr,
                    507:      bits64 *z2Ptr
                    508:  )
                    509: {
                    510:     bits64 z0, z1, z2, more1;
                    511:
                    512:     mul64To128( a1, b, &z1, &z2 );
                    513:     mul64To128( a0, b, &z0, &more1 );
                    514:     add128( z0, more1, 0, z1, &z0, &z1 );
                    515:     *z2Ptr = z2;
                    516:     *z1Ptr = z1;
                    517:     *z0Ptr = z0;
                    518:
                    519: }
                    520:
                    521: /*
                    522: -------------------------------------------------------------------------------
                    523: Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
                    524: 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
                    525: product.  The product is broken into four 64-bit pieces which are stored at
                    526: the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
                    527: -------------------------------------------------------------------------------
                    528: */
                    529: INLINE void
                    530:  mul128To256(
                    531:      bits64 a0,
                    532:      bits64 a1,
                    533:      bits64 b0,
                    534:      bits64 b1,
                    535:      bits64 *z0Ptr,
                    536:      bits64 *z1Ptr,
                    537:      bits64 *z2Ptr,
                    538:      bits64 *z3Ptr
                    539:  )
                    540: {
                    541:     bits64 z0, z1, z2, z3;
                    542:     bits64 more1, more2;
                    543:
                    544:     mul64To128( a1, b1, &z2, &z3 );
                    545:     mul64To128( a1, b0, &z1, &more2 );
                    546:     add128( z1, more2, 0, z2, &z1, &z2 );
                    547:     mul64To128( a0, b0, &z0, &more1 );
                    548:     add128( z0, more1, 0, z1, &z0, &z1 );
                    549:     mul64To128( a0, b1, &more1, &more2 );
                    550:     add128( more1, more2, 0, z2, &more1, &z2 );
                    551:     add128( z0, z1, 0, more1, &z0, &z1 );
                    552:     *z3Ptr = z3;
                    553:     *z2Ptr = z2;
                    554:     *z1Ptr = z1;
                    555:     *z0Ptr = z0;
                    556:
                    557: }
                    558:
                    559: /*
                    560: -------------------------------------------------------------------------------
                    561: Returns an approximation to the 64-bit integer quotient obtained by dividing
                    562: `b' into the 128-bit value formed by concatenating `a0' and `a1'.  The
                    563: divisor `b' must be at least 2^63.  If q is the exact quotient truncated
                    564: toward zero, the approximation returned lies between q and q + 2 inclusive.
                    565: If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
                    566: unsigned integer is returned.
                    567: -------------------------------------------------------------------------------
                    568: */
                    569: static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )
                    570: {
                    571:     bits64 b0, b1;
                    572:     bits64 rem0, rem1, term0, term1;
                    573:     bits64 z;
                    574:
                    575:     if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
                    576:     b0 = b>>32;
                    577:     z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
                    578:     mul64To128( b, z, &term0, &term1 );
                    579:     sub128( a0, a1, term0, term1, &rem0, &rem1 );
                    580:     while ( ( (sbits64) rem0 ) < 0 ) {
                    581:         z -= LIT64( 0x100000000 );
                    582:         b1 = b<<32;
                    583:         add128( rem0, rem1, b0, b1, &rem0, &rem1 );
                    584:     }
                    585:     rem0 = ( rem0<<32 ) | ( rem1>>32 );
                    586:     z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
                    587:     return z;
                    588:
                    589: }
                    590:
                    591: #ifndef SOFTFLOAT_FOR_GCC /* Not used */
                    592: /*
                    593: -------------------------------------------------------------------------------
                    594: Returns an approximation to the square root of the 32-bit significand given
                    595: by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
                    596: `aExp' (the least significant bit) is 1, the integer returned approximates
                    597: 2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
                    598: is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
                    599: case, the approximation returned lies strictly within +/-2 of the exact
                    600: value.
                    601: -------------------------------------------------------------------------------
                    602: */
                    603: static bits32 estimateSqrt32( int16 aExp, bits32 a )
                    604: {
                    605:     static const bits16 sqrtOddAdjustments[] = {
                    606:         0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
                    607:         0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
                    608:     };
                    609:     static const bits16 sqrtEvenAdjustments[] = {
                    610:         0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
                    611:         0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
                    612:     };
                    613:     int8 index;
                    614:     bits32 z;
                    615:
                    616:     index = ( a>>27 ) & 15;
                    617:     if ( aExp & 1 ) {
                    618:         z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ];
                    619:         z = ( ( a / z )<<14 ) + ( z<<15 );
                    620:         a >>= 1;
                    621:     }
                    622:     else {
                    623:         z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ];
                    624:         z = a / z + z;
                    625:         z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
                    626:         if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
                    627:     }
                    628:     return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 );
                    629:
                    630: }
                    631: #endif
                    632:
                    633: /*
                    634: -------------------------------------------------------------------------------
                    635: Returns the number of leading 0 bits before the most-significant 1 bit of
                    636: `a'.  If `a' is zero, 32 is returned.
                    637: -------------------------------------------------------------------------------
                    638: */
                    639: static int8 countLeadingZeros32( bits32 a )
                    640: {
                    641:     static const int8 countLeadingZerosHigh[] = {
                    642:         8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
                    643:         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
                    644:         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
                    645:         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
                    646:         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
                    647:         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
                    648:         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
                    649:         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
                    650:         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                    651:         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                    652:         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                    653:         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                    654:         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                    655:         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                    656:         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                    657:         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
                    658:     };
                    659:     int8 shiftCount;
                    660:
                    661:     shiftCount = 0;
                    662:     if ( a < 0x10000 ) {
                    663:         shiftCount += 16;
                    664:         a <<= 16;
                    665:     }
                    666:     if ( a < 0x1000000 ) {
                    667:         shiftCount += 8;
                    668:         a <<= 8;
                    669:     }
                    670:     shiftCount += countLeadingZerosHigh[ a>>24 ];
                    671:     return shiftCount;
                    672:
                    673: }
                    674:
                    675: /*
                    676: -------------------------------------------------------------------------------
                    677: Returns the number of leading 0 bits before the most-significant 1 bit of
                    678: `a'.  If `a' is zero, 64 is returned.
                    679: -------------------------------------------------------------------------------
                    680: */
                    681: static int8 countLeadingZeros64( bits64 a )
                    682: {
                    683:     int8 shiftCount;
                    684:
                    685:     shiftCount = 0;
                    686:     if ( a < ( (bits64) 1 )<<32 ) {
                    687:         shiftCount += 32;
                    688:     }
                    689:     else {
                    690:         a >>= 32;
                    691:     }
                    692:     shiftCount += countLeadingZeros32( a );
                    693:     return shiftCount;
                    694:
                    695: }
                    696:
                    697: /*
                    698: -------------------------------------------------------------------------------
                    699: Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
                    700: is equal to the 128-bit value formed by concatenating `b0' and `b1'.
                    701: Otherwise, returns 0.
                    702: -------------------------------------------------------------------------------
                    703: */
                    704: INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
                    705: {
                    706:
                    707:     return ( a0 == b0 ) && ( a1 == b1 );
                    708:
                    709: }
                    710:
                    711: /*
                    712: -------------------------------------------------------------------------------
                    713: Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
                    714: than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
                    715: Otherwise, returns 0.
                    716: -------------------------------------------------------------------------------
                    717: */
                    718: INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
                    719: {
                    720:
                    721:     return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
                    722:
                    723: }
                    724:
                    725: /*
                    726: -------------------------------------------------------------------------------
                    727: Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
                    728: than the 128-bit value formed by concatenating `b0' and `b1'.  Otherwise,
                    729: returns 0.
                    730: -------------------------------------------------------------------------------
                    731: */
                    732: INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
                    733: {
                    734:
                    735:     return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
                    736:
                    737: }
                    738:
                    739: /*
                    740: -------------------------------------------------------------------------------
                    741: Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
                    742: not equal to the 128-bit value formed by concatenating `b0' and `b1'.
                    743: Otherwise, returns 0.
                    744: -------------------------------------------------------------------------------
                    745: */
                    746: INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
                    747: {
                    748:
                    749:     return ( a0 != b0 ) || ( a1 != b1 );
                    750:
                    751: }
                    752:
                    753: #endif /* !NO_IEEE */
CVSweb