diff -Nura klibc-1.1.16/include/arch/sparc/klibc/archsignal.h klibc-1.1.16.sparc/include/arch/sparc/klibc/archsignal.h --- klibc-1.1.16/include/arch/sparc/klibc/archsignal.h 2006-01-06 03:11:43.000000000 -0300 +++ klibc-1.1.16.sparc/include/arch/sparc/klibc/archsignal.h 2006-01-25 14:49:01.000000000 -0300 @@ -10,6 +10,7 @@ /* Hidden definitions */ +#ifndef _KLIBC_SIGACTION struct __new_sigaction { __sighandler_t sa_handler; unsigned long sa_flags; @@ -34,5 +35,6 @@ int ss_flags; size_t ss_size; } stack_t; +#endif #endif diff -Nura klibc-1.1.16/klibc/arch/sparc/divrem.m4 klibc-1.1.16.sparc/klibc/arch/sparc/divrem.m4 --- klibc-1.1.16/klibc/arch/sparc/divrem.m4 2006-01-06 03:11:43.000000000 -0300 +++ klibc-1.1.16.sparc/klibc/arch/sparc/divrem.m4 1969-12-31 21:00:00.000000000 -0300 @@ -1,276 +0,0 @@ -/* - * Copyright (c) 1992, 1993 - * The Regents of the University of California. All rights reserved. - * - * This software was developed by the Computer Systems Engineering group - * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and - * contributed to Berkeley. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * from: Header: divrem.m4,v 1.4 92/06/25 13:23:57 torek Exp - * $NetBSD: divrem.m4,v 1.4 1997/10/09 10:07:54 lukem Exp $ - */ - -/* - * Division and remainder, from Appendix E of the Sparc Version 8 - * Architecture Manual, with fixes from Gordon Irlam. - */ - -#if defined(LIBC_SCCS) && !defined(lint) - .asciz "@(#)divrem.m4 8.1 (Berkeley) 6/4/93" -#endif /* LIBC_SCCS and not lint */ - -/* - * Input: dividend and divisor in %o0 and %o1 respectively. - * - * m4 parameters: - * NAME name of function to generate - * OP OP=div => %o0 / %o1; OP=rem => %o0 % %o1 - * S S=true => signed; S=false => unsigned - * - * Algorithm parameters: - * N how many bits per iteration we try to get (4) - * WORDSIZE total number of bits (32) - * - * Derived constants: - * TWOSUPN 2^N, for label generation (m4 exponentiation currently broken) - * TOPBITS number of bits in the top `decade' of a number - * - * Important variables: - * Q the partial quotient under development (initially 0) - * R the remainder so far, initially the dividend - * ITER number of main division loop iterations required; - * equal to ceil(log2(quotient) / N). Note that this - * is the log base (2^N) of the quotient. - * V the current comparand, initially divisor*2^(ITER*N-1) - * - * Cost: - * Current estimate for non-large dividend is - * ceil(log2(quotient) / N) * (10 + 7N/2) + C - * A large dividend is one greater than 2^(31-TOPBITS) and takes a - * different path, as the upper bits of the quotient must be developed - * one bit at a time. - */ - -define(N, `4') -define(TWOSUPN, `16') -define(WORDSIZE, `32') -define(TOPBITS, eval(WORDSIZE - N*((WORDSIZE-1)/N))) - -define(dividend, `%o0') -define(divisor, `%o1') -define(Q, `%o2') -define(R, `%o3') -define(ITER, `%o4') -define(V, `%o5') - -/* m4 reminder: ifelse(a,b,c,d) => if a is b, then c, else d */ -define(T, `%g1') -define(SC, `%g7') -ifelse(S, `true', `define(SIGN, `%g6')') - -/* - * This is the recursive definition for developing quotient digits. - * - * Parameters: - * $1 the current depth, 1 <= $1 <= N - * $2 the current accumulation of quotient bits - * N max depth - * - * We add a new bit to $2 and either recurse or insert the bits in - * the quotient. R, Q, and V are inputs and outputs as defined above; - * the condition codes are expected to reflect the input R, and are - * modified to reflect the output R. - */ -define(DEVELOP_QUOTIENT_BITS, -` ! depth $1, accumulated bits $2 - bl L.$1.eval(TWOSUPN+$2) - srl V,1,V - ! remainder is positive - subcc R,V,R - ifelse($1, N, - ` b 9f - add Q, ($2*2+1), Q - ', ` DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2+1)')') -L.$1.eval(TWOSUPN+$2): - ! remainder is negative - addcc R,V,R - ifelse($1, N, - ` b 9f - add Q, ($2*2-1), Q - ', ` DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2-1)')') - ifelse($1, 1, `9:')') - -#include -#include - -FUNC(NAME) -ifelse(S, `true', -` ! compute sign of result; if neither is negative, no problem - orcc divisor, dividend, %g0 ! either negative? - bge 2f ! no, go do the divide - ifelse(OP, `div', - `xor divisor, dividend, SIGN', - `mov dividend, SIGN') ! compute sign in any case - tst divisor - bge 1f - tst dividend - ! divisor is definitely negative; dividend might also be negative - bge 2f ! if dividend not negative... - neg divisor ! in any case, make divisor nonneg -1: ! dividend is negative, divisor is nonnegative - neg dividend ! make dividend nonnegative -2: -') - ! Ready to divide. Compute size of quotient; scale comparand. - orcc divisor, %g0, V - bnz 1f - mov dividend, R - - ! Divide by zero trap. If it returns, return 0 (about as - ! wrong as possible, but that is what SunOS does...). - t ST_DIV0 - retl - clr %o0 - -1: - cmp R, V ! if divisor exceeds dividend, done - blu Lgot_result ! (and algorithm fails otherwise) - clr Q - sethi %hi(1 << (WORDSIZE - TOPBITS - 1)), T - cmp R, T - blu Lnot_really_big - clr ITER - - ! `Here the dividend is >= 2^(31-N) or so. We must be careful here, - ! as our usual N-at-a-shot divide step will cause overflow and havoc. - ! The number of bits in the result here is N*ITER+SC, where SC <= N. - ! Compute ITER in an unorthodox manner: know we need to shift V into - ! the top decade: so do not even bother to compare to R.' - 1: - cmp V, T - bgeu 3f - mov 1, SC - sll V, N, V - b 1b - inc ITER - - ! Now compute SC. - 2: addcc V, V, V - bcc Lnot_too_big - inc SC - - ! We get here if the divisor overflowed while shifting. - ! This means that R has the high-order bit set. - ! Restore V and subtract from R. - sll T, TOPBITS, T ! high order bit - srl V, 1, V ! rest of V - add V, T, V - b Ldo_single_div - dec SC - - Lnot_too_big: - 3: cmp V, R - blu 2b - nop - be Ldo_single_div - nop - /* NB: these are commented out in the V8-Sparc manual as well */ - /* (I do not understand this) */ - ! V > R: went too far: back up 1 step - ! srl V, 1, V - ! dec SC - ! do single-bit divide steps - ! - ! We have to be careful here. We know that R >= V, so we can do the - ! first divide step without thinking. BUT, the others are conditional, - ! and are only done if R >= 0. Because both R and V may have the high- - ! order bit set in the first step, just falling into the regular - ! division loop will mess up the first time around. - ! So we unroll slightly... - Ldo_single_div: - deccc SC - bl Lend_regular_divide - nop - sub R, V, R - mov 1, Q - b Lend_single_divloop - nop - Lsingle_divloop: - sll Q, 1, Q - bl 1f - srl V, 1, V - ! R >= 0 - sub R, V, R - b 2f - inc Q - 1: ! R < 0 - add R, V, R - dec Q - 2: - Lend_single_divloop: - deccc SC - bge Lsingle_divloop - tst R - b,a Lend_regular_divide - -Lnot_really_big: -1: - sll V, N, V - cmp V, R - bleu 1b - inccc ITER - be Lgot_result - dec ITER - - tst R ! set up for initial iteration -Ldivloop: - sll Q, N, Q - DEVELOP_QUOTIENT_BITS(1, 0) -Lend_regular_divide: - deccc ITER - bge Ldivloop - tst R - bl,a Lgot_result - ! non-restoring fixup here (one instruction only!) -ifelse(OP, `div', -` dec Q -', ` add R, divisor, R -') - -Lgot_result: -ifelse(S, `true', -` ! check to see if answer should be < 0 - tst SIGN - bl,a 1f - ifelse(OP, `div', `neg Q', `neg R') -1:') - retl - ifelse(OP, `div', `mov Q, %o0', `mov R, %o0') diff -Nura klibc-1.1.16/klibc/arch/sparc/Makefile.inc klibc-1.1.16.sparc/klibc/arch/sparc/Makefile.inc --- klibc-1.1.16/klibc/arch/sparc/Makefile.inc 2006-01-06 03:11:43.000000000 -0300 +++ klibc-1.1.16.sparc/klibc/arch/sparc/Makefile.inc 2006-01-25 14:24:02.000000000 -0300 @@ -8,12 +8,6 @@ # ARCHOBJS = \ - arch/$(ARCH)/sdiv.o \ - arch/$(ARCH)/udiv.o \ - arch/$(ARCH)/srem.o \ - arch/$(ARCH)/urem.o \ - arch/$(ARCH)/smul.o \ - arch/$(ARCH)/umul.o \ arch/$(ARCH)/setjmp.o \ arch/$(ARCH)/syscall.o \ arch/$(ARCH)/sysfork.o \ @@ -23,29 +17,5 @@ libgcc/__umoddi3.o \ libgcc/__udivmoddi4.o -arch/$(ARCH)/sdiv.S: arch/$(ARCH)/divrem.m4 - @echo 'building $@ from $^' - @(echo "define(NAME,\`.div')define(OP,\`div')define(S,\`true')"; \ - cat $^) | m4 > $@ - @chmod 444 $@ - -arch/$(ARCH)/udiv.S: arch/$(ARCH)/divrem.m4 - @echo 'building $@ from $^' - @(echo "define(NAME,\`.udiv')define(OP,\`div')define(S,\`false')"; \ - cat $^) | m4 > $@ - @chmod 444 $@ - -arch/$(ARCH)/srem.S: arch/$(ARCH)/divrem.m4 - @echo 'building $@ from $^' - @(echo "define(NAME,\`.rem')define(OP,\`rem')define(S,\`true')"; \ - cat $^) | m4 > $@ - @chmod 444 $@ - -arch/$(ARCH)/urem.S: arch/$(ARCH)/divrem.m4 - @echo 'building $@ from $^' - @(echo "define(NAME,\`.urem')define(OP,\`rem')define(S,\`false')"; \ - cat $^) | m4 > $@ - @chmod 444 $@ - archclean: - rm -f arch/$(ARCH)/?div.S arch/$(ARCH)/?rem.S + diff -Nura klibc-1.1.16/klibc/arch/sparc/MCONFIG klibc-1.1.16.sparc/klibc/arch/sparc/MCONFIG --- klibc-1.1.16/klibc/arch/sparc/MCONFIG 2006-01-06 03:11:43.000000000 -0300 +++ klibc-1.1.16.sparc/klibc/arch/sparc/MCONFIG 2006-01-25 14:48:42.000000000 -0300 @@ -7,7 +7,7 @@ # accordingly. # -OPTFLAGS = -Os -m32 -mptr32 +OPTFLAGS = -Os -m32 -mptr32 -mcpu=v8 -mtune=v8 -D_KLIBC_SIGACTION BITSIZE = 32 # Extra linkflags when building the shared version of the library diff -Nura klibc-1.1.16/klibc/arch/sparc/smul.S klibc-1.1.16.sparc/klibc/arch/sparc/smul.S --- klibc-1.1.16/klibc/arch/sparc/smul.S 2006-01-06 03:11:43.000000000 -0300 +++ klibc-1.1.16.sparc/klibc/arch/sparc/smul.S 1969-12-31 21:00:00.000000000 -0300 @@ -1,160 +0,0 @@ -/* $NetBSD: mul.S,v 1.3 1997/07/16 14:37:42 christos Exp $ */ - -/* - * Copyright (c) 1992, 1993 - * The Regents of the University of California. All rights reserved. - * - * This software was developed by the Computer Systems Engineering group - * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and - * contributed to Berkeley. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * from: Header: mul.s,v 1.5 92/06/25 13:24:03 torek Exp - */ - -#include -#if defined(LIBC_SCCS) && !defined(lint) -#if 0 - .asciz "@(#)mul.s 8.1 (Berkeley) 6/4/93" -#else - RCSID("$NetBSD: mul.S,v 1.3 1997/07/16 14:37:42 christos Exp $") -#endif -#endif /* LIBC_SCCS and not lint */ - -/* - * Signed multiply, from Appendix E of the Sparc Version 8 - * Architecture Manual. - * - * Returns %o0 * %o1 in %o1%o0 (i.e., %o1 holds the upper 32 bits of - * the 64-bit product). - * - * This code optimizes short (less than 13-bit) multiplies. - */ - -FUNC(.mul) - mov %o0, %y ! multiplier -> Y - andncc %o0, 0xfff, %g0 ! test bits 12..31 - be Lmul_shortway ! if zero, can do it the short way - andcc %g0, %g0, %o4 ! zero the partial product and clear N and V - - /* - * Long multiply. 32 steps, followed by a final shift step. - */ - mulscc %o4, %o1, %o4 ! 1 - mulscc %o4, %o1, %o4 ! 2 - mulscc %o4, %o1, %o4 ! 3 - mulscc %o4, %o1, %o4 ! 4 - mulscc %o4, %o1, %o4 ! 5 - mulscc %o4, %o1, %o4 ! 6 - mulscc %o4, %o1, %o4 ! 7 - mulscc %o4, %o1, %o4 ! 8 - mulscc %o4, %o1, %o4 ! 9 - mulscc %o4, %o1, %o4 ! 10 - mulscc %o4, %o1, %o4 ! 11 - mulscc %o4, %o1, %o4 ! 12 - mulscc %o4, %o1, %o4 ! 13 - mulscc %o4, %o1, %o4 ! 14 - mulscc %o4, %o1, %o4 ! 15 - mulscc %o4, %o1, %o4 ! 16 - mulscc %o4, %o1, %o4 ! 17 - mulscc %o4, %o1, %o4 ! 18 - mulscc %o4, %o1, %o4 ! 19 - mulscc %o4, %o1, %o4 ! 20 - mulscc %o4, %o1, %o4 ! 21 - mulscc %o4, %o1, %o4 ! 22 - mulscc %o4, %o1, %o4 ! 23 - mulscc %o4, %o1, %o4 ! 24 - mulscc %o4, %o1, %o4 ! 25 - mulscc %o4, %o1, %o4 ! 26 - mulscc %o4, %o1, %o4 ! 27 - mulscc %o4, %o1, %o4 ! 28 - mulscc %o4, %o1, %o4 ! 29 - mulscc %o4, %o1, %o4 ! 30 - mulscc %o4, %o1, %o4 ! 31 - mulscc %o4, %o1, %o4 ! 32 - mulscc %o4, %g0, %o4 ! final shift - - ! If %o0 was negative, the result is - ! (%o0 * %o1) + (%o1 << 32)) - ! We fix that here. - - tst %o0 - bge 1f - rd %y, %o0 - - ! %o0 was indeed negative; fix upper 32 bits of result by subtracting - ! %o1 (i.e., return %o4 - %o1 in %o1). - retl - sub %o4, %o1, %o1 - -1: - retl - mov %o4, %o1 - -Lmul_shortway: - /* - * Short multiply. 12 steps, followed by a final shift step. - * The resulting bits are off by 12 and (32-12) = 20 bit positions, - * but there is no problem with %o0 being negative (unlike above). - */ - mulscc %o4, %o1, %o4 ! 1 - mulscc %o4, %o1, %o4 ! 2 - mulscc %o4, %o1, %o4 ! 3 - mulscc %o4, %o1, %o4 ! 4 - mulscc %o4, %o1, %o4 ! 5 - mulscc %o4, %o1, %o4 ! 6 - mulscc %o4, %o1, %o4 ! 7 - mulscc %o4, %o1, %o4 ! 8 - mulscc %o4, %o1, %o4 ! 9 - mulscc %o4, %o1, %o4 ! 10 - mulscc %o4, %o1, %o4 ! 11 - mulscc %o4, %o1, %o4 ! 12 - mulscc %o4, %g0, %o4 ! final shift - - /* - * %o4 has 20 of the bits that should be in the low part of the - * result; %y has the bottom 12 (as %y's top 12). That is: - * - * %o4 %y - * +----------------+----------------+ - * | -12- | -20- | -12- | -20- | - * +------(---------+------)---------+ - * --hi-- ----low-part---- - * - * The upper 12 bits of %o4 should be sign-extended to form the - * high part of the product (i.e., highpart = %o4 >> 20). - */ - - rd %y, %o5 - sll %o4, 12, %o0 ! shift middle bits left 12 - srl %o5, 20, %o5 ! shift low bits right 20, zero fill at left - or %o5, %o0, %o0 ! construct low part of result - retl - sra %o4, 20, %o1 ! ... and extract high part of result diff -Nura klibc-1.1.16/klibc/arch/sparc/umul.S klibc-1.1.16.sparc/klibc/arch/sparc/umul.S --- klibc-1.1.16/klibc/arch/sparc/umul.S 2006-01-06 03:11:43.000000000 -0300 +++ klibc-1.1.16.sparc/klibc/arch/sparc/umul.S 1969-12-31 21:00:00.000000000 -0300 @@ -1,193 +0,0 @@ -/* $NetBSD: umul.S,v 1.3 1997/07/16 14:37:44 christos Exp $ */ - -/* - * Copyright (c) 1992, 1993 - * The Regents of the University of California. All rights reserved. - * - * This software was developed by the Computer Systems Engineering group - * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and - * contributed to Berkeley. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * from: Header: umul.s,v 1.4 92/06/25 13:24:05 torek Exp - */ - -#include -#if defined(LIBC_SCCS) && !defined(lint) -#if 0 - .asciz "@(#)umul.s 8.1 (Berkeley) 6/4/93" -#else - RCSID("$NetBSD: umul.S,v 1.3 1997/07/16 14:37:44 christos Exp $") -#endif -#endif /* LIBC_SCCS and not lint */ - -/* - * Unsigned multiply. Returns %o0 * %o1 in %o1%o0 (i.e., %o1 holds the - * upper 32 bits of the 64-bit product). - * - * This code optimizes short (less than 13-bit) multiplies. Short - * multiplies require 25 instruction cycles, and long ones require - * 45 instruction cycles. - * - * On return, overflow has occurred (%o1 is not zero) if and only if - * the Z condition code is clear, allowing, e.g., the following: - * - * call .umul - * nop - * bnz overflow (or tnz) - */ - -FUNC(.umul) - or %o0, %o1, %o4 - mov %o0, %y ! multiplier -> Y - andncc %o4, 0xfff, %g0 ! test bits 12..31 of *both* args - be Lmul_shortway ! if zero, can do it the short way - andcc %g0, %g0, %o4 ! zero the partial product and clear N and V - - /* - * Long multiply. 32 steps, followed by a final shift step. - */ - mulscc %o4, %o1, %o4 ! 1 - mulscc %o4, %o1, %o4 ! 2 - mulscc %o4, %o1, %o4 ! 3 - mulscc %o4, %o1, %o4 ! 4 - mulscc %o4, %o1, %o4 ! 5 - mulscc %o4, %o1, %o4 ! 6 - mulscc %o4, %o1, %o4 ! 7 - mulscc %o4, %o1, %o4 ! 8 - mulscc %o4, %o1, %o4 ! 9 - mulscc %o4, %o1, %o4 ! 10 - mulscc %o4, %o1, %o4 ! 11 - mulscc %o4, %o1, %o4 ! 12 - mulscc %o4, %o1, %o4 ! 13 - mulscc %o4, %o1, %o4 ! 14 - mulscc %o4, %o1, %o4 ! 15 - mulscc %o4, %o1, %o4 ! 16 - mulscc %o4, %o1, %o4 ! 17 - mulscc %o4, %o1, %o4 ! 18 - mulscc %o4, %o1, %o4 ! 19 - mulscc %o4, %o1, %o4 ! 20 - mulscc %o4, %o1, %o4 ! 21 - mulscc %o4, %o1, %o4 ! 22 - mulscc %o4, %o1, %o4 ! 23 - mulscc %o4, %o1, %o4 ! 24 - mulscc %o4, %o1, %o4 ! 25 - mulscc %o4, %o1, %o4 ! 26 - mulscc %o4, %o1, %o4 ! 27 - mulscc %o4, %o1, %o4 ! 28 - mulscc %o4, %o1, %o4 ! 29 - mulscc %o4, %o1, %o4 ! 30 - mulscc %o4, %o1, %o4 ! 31 - mulscc %o4, %o1, %o4 ! 32 - mulscc %o4, %g0, %o4 ! final shift - - - /* - * Normally, with the shift-and-add approach, if both numbers are - * positive you get the correct result. WIth 32-bit two's-complement - * numbers, -x is represented as - * - * x 32 - * ( 2 - ------ ) mod 2 * 2 - * 32 - * 2 - * - * (the `mod 2' subtracts 1 from 1.bbbb). To avoid lots of 2^32s, - * we can treat this as if the radix point were just to the left - * of the sign bit (multiply by 2^32), and get - * - * -x = (2 - x) mod 2 - * - * Then, ignoring the `mod 2's for convenience: - * - * x * y = xy - * -x * y = 2y - xy - * x * -y = 2x - xy - * -x * -y = 4 - 2x - 2y + xy - * - * For signed multiplies, we subtract (x << 32) from the partial - * product to fix this problem for negative multipliers (see mul.s). - * Because of the way the shift into the partial product is calculated - * (N xor V), this term is automatically removed for the multiplicand, - * so we don't have to adjust. - * - * But for unsigned multiplies, the high order bit wasn't a sign bit, - * and the correction is wrong. So for unsigned multiplies where the - * high order bit is one, we end up with xy - (y << 32). To fix it - * we add y << 32. - */ - tst %o1 - bl,a 1f ! if %o1 < 0 (high order bit = 1), - add %o4, %o0, %o4 ! %o4 += %o0 (add y to upper half) -1: rd %y, %o0 ! get lower half of product - retl - addcc %o4, %g0, %o1 ! put upper half in place and set Z for %o1==0 - -Lmul_shortway: - /* - * Short multiply. 12 steps, followed by a final shift step. - * The resulting bits are off by 12 and (32-12) = 20 bit positions, - * but there is no problem with %o0 being negative (unlike above), - * and overflow is impossible (the answer is at most 24 bits long). - */ - mulscc %o4, %o1, %o4 ! 1 - mulscc %o4, %o1, %o4 ! 2 - mulscc %o4, %o1, %o4 ! 3 - mulscc %o4, %o1, %o4 ! 4 - mulscc %o4, %o1, %o4 ! 5 - mulscc %o4, %o1, %o4 ! 6 - mulscc %o4, %o1, %o4 ! 7 - mulscc %o4, %o1, %o4 ! 8 - mulscc %o4, %o1, %o4 ! 9 - mulscc %o4, %o1, %o4 ! 10 - mulscc %o4, %o1, %o4 ! 11 - mulscc %o4, %o1, %o4 ! 12 - mulscc %o4, %g0, %o4 ! final shift - - /* - * %o4 has 20 of the bits that should be in the result; %y has - * the bottom 12 (as %y's top 12). That is: - * - * %o4 %y - * +----------------+----------------+ - * | -12- | -20- | -12- | -20- | - * +------(---------+------)---------+ - * -----result----- - * - * The 12 bits of %o4 left of the `result' area are all zero; - * in fact, all top 20 bits of %o4 are zero. - */ - - rd %y, %o5 - sll %o4, 12, %o0 ! shift middle bits left 12 - srl %o5, 20, %o5 ! shift low bits right 20 - or %o5, %o0, %o0 - retl - addcc %g0, %g0, %o1 ! %o1 = zero, and set Z