/*filename: intpow.c revision date: 12-12-2013 */
/*"Computing x^n where n is an integer" by Gary Knott and Daniel Kerner */
/* This file contains the routine intpow(x,n) where x is a 64-bit
double floating-point value and n is a 16-bit integer. intpow
computes the best approximation of x^n and returns this 64-bit
floating-point value.
The code below depends on being linked with appropriate
overflow-exception handling code such as that found in MLAB.
Specifically, when a floating-point arithmetic operation produces
a result whose magnitude is too great to represent as a 64-bit
floating-point value (also known as a 64-bit 'double',)
we expect the overflow handler to "patch-in" the correctly-signed
largest-magnitude representable value, either MAXPOS or MAXNEG.
[MAXPOS = the positive largest magnitude 64-bit double
= (2^1023)*(1+(2^52-1)/2^52)
= approx 1.797...E+308
= (0 | 11111111110 | 1111...1111)
as (sign|biased-exponent|fraction)
MAXNEG = the negative largest magnitude 64-bit double
= -MAXPOS
= (1 | 11111111110 | 1111...1111)
]
If we have a "hard"-underflow, the floating-point chip acceptably
produces 0 "automatically" as the result, so the underflow exception
is to be "masked-off".
The detailed anatomy of an IEEE-standard 64-bit (double)
floating-point number is as follows.
A 64-bit floating-point value is of the form (g b f) where g is the
sign bit (0 for + and 1 for -,) b is the 11-bit biased-exponent, and f
is the 52-bit fraction-part. The actual exponent e represented by b
is e= b-1023, unless b= 11111111111 or unless b = 00000000000. (1023
is the "bias".) The biased-exponent b = 00000000000 corresponds to
e=-1022, the same as b = 00000000001. Also, the largest biased-
exponent is b = 11111111110, corresponding to e = 1023.
The 64-bit floating-point value (g b f) represents the real value
s*(2^(b-1023))*[1 +f/2^52] when 00000000001 <= b <= 11111111110, where
s = 1 when g=0 and s= -1 when g=1. (Note f is treated as an integer.)
These values are called the normalized floating-point numbers. The
largest positive normalized number is (2^1023)[2-1/2^52]. The smallest
positive normalized number is 2^-1022.
When b = 0000000000, the 64-bit floating-point value (g b f)
represents the real value s*(2^-1022))*[f/2^52], where s = 1 when g=0
and s= -1 when g=1. These values are called the denormalized
floating-point numbers. The largest positive denormalized number is
(2^-1022)[1-1/2^52]. The smallest positive denormalized number is
2^-1074. (Denormalized floating-point numbers have successively lower
precision as they decrease.)
The value 0 is represented with b = 00000000000 and f=0; g may be
either 0 or 1.
When b = 11111111111, the 64-bit floating-point value (g b f)
represents a NaN value (NaN stands for "not a number".) The NaNs
where f=0 are often called 'infinity' NaNs. The floating-point
arithmetic unit has special behavior associated with NaNs, none of
which we want to use.
------------------
We have the following constants of interest.
maxpos = the positive largest magnitude 64-bit double
= (2^1023)*(1+(2^52-1)/2^52)) = (2^1023)*(2-2^-52)
= approx 1.797...E+308.
= (0 | 11111111110 | 1111...1111)
maxneg = the negative largest magnitude 64-bit double
= -(2^1023)*(1+(2^52-1)/2^52))
= approx -1.797...E+308.
= (1 | 11111111110 | 1111...1111)
minpos = the positive smallest magnitude 64-bit normalized double
= 2^(-1022)
= approx 2.225073858507e-308.
= (0 | 00000000001 | 0000...0000)
minneg = the negative smallest magnitude 64-bit normalized double
= -2^(-1022)
= approx -2.225073858507e-308.
= (1 | 00000000001 | 0000...0000)
dminpos = the positive smallest magnitude 64-bit denormal double
= 2^(-1022)*2^(-52)
= approx 4.940656458412465e-324
= (0 | 00000000000 | 0000...0001)
dminneg = the negative smallest magnitude 64-bit denormal double
= -2^(-1022)*2^(-52)
= approx -4.940656458412465e-324
= (1 | 00000000000 | 0000...0001)
one = 1.0
= (2^0)*(1+0)
= (0 | 01111111111 | 0000...0000)
meps = "machine epsilon". This is the least positive value
such that 1+meps > 1.
= 2^-52
= (0 | 01111001011 | 0000...0000)
NaN = (g | 11111111111 | xxxx...xxxx) (NaN means "not-a-number".)
*/
/****************************************************************
This file contains the functions:
(private) basetwoexp() double's base-2 exponent
(private) pintpow() positive integer power of a double
(export) intpow() integer power of a double
*****************************************************************/
/* stdcsi.h contains the definitions of the macros EQ, AND, OR,
LITTLEENDIAN, int32, etc. used herein, as well as all other needed
definitions and declarations such as the function warning(). */
#include "stdcsi.h"
/* The parity macro defined here is used in intpow(). This macro uses
the fmod() C-library function: fmod(x,n) = x - iround(x/n)*n where
iround(a) = floor(a) when a>=0 and iround(a) = ceil(a) when a<0.
This definition holds for all real x and n. When n=2, then
fmod(x,2) = 0 only when x is an even integer.x */
#define parity(y) ((fmod(y,2.)==0)?0:1))
/*===================================================================*/
private int16 basetwoexp(double d)
/*--------------------------------------------------------------------
Compute the (unbiased) base-2 exponent of the 64-bit floating-point
value d. The result is the correct integer value (the bias has
been removed, except if d is a denormal, -1023 will be returned
instead of the correct smaller value.
*--------------------------------------------------------------------*/
{int32 x,*lptr,i;
double z;
/* Set lptr to the address of the first byte of the 8-byte argument.*/
lptr = (int32 *)&d;
/* Set x to the 32-bit field of the 64-bit double that contains the
biased exponent. This is either the first 4 bytes if we have a
big-endian machine, or the second 4 bytes if we have a little-endian
machine (e.g. an Intel 80x86).*/
#ifdef BIGENDIAN
/* This is for Motorola 68K and PowerPC chips, among others.*/
x = *lptr;
#endif /* BIGENDIAN */
#ifdef LITTLEENDIAN
/* This is for Intel 80x86-based chips, among others.*/
x = *(lptr+1);
#endif /* LITTLEENDIAN */
/* Zero all bits in x that are not biased exponent bits to extract
the 11-bit biased-exponent field.*/
x = x&0x7ff00000;
/* Right-justify (shift down) the exponent in the 32-bit field.*/
x = x>>20;
/* Subtract the exponent bias 1023 to obtain the true exponent.
Note, the biased-exponent 00000000000 really corresponds to -1022
(specifying 2^(-1022)) rather than -1023. (Such a zero-valued
biased-exponent specifies a "Denormal" value where the implicit
integer digit is 0!) Thus, to indicate the biased-exponent
00000000000 was seen, we return -1023 (!)
The exponent field 00000000001 represents -1022, 01111111110 =
represents -1, 01111111111 = represents 0, 10000000000 represents
1, 10000000001 represents 2, and 11111111110 represents
1023. (11111111111 represents a NaN.)
*/
return(x-1023);
}
/*====================================================================*/
private double pintpow(double x, int32 n)
/* double x; base
* int32 n; exponent
*/
/*----------------------------------------------------------------------
Compute x^n by shifting n and accumulating factors of x raised to the
powers of 2 corresponding to the one-bits in n. When this routine is
called, we will have: x != 0, x != 1, and n > 1 with n an integer.
Note, x^n may overflow if x > 1, or x^n may be a denormal or
underflow to 0 if x < 1. As discussed above, this code needs an
appropriate overflow-handler to work properly.
Note, the argument n will be the absolute value of an int16
in the range [-32768,32767], but an int32 is required to hold
abs(-32768).
----------------------------------------------------------------------*/
{double r;
/* Since this function is never called with x = 0, if we return 0, it
must be due to an underflow to 0. If we wish, we could instead
return (s^n)*DMINPOS which is the least-magnitude correctly-signed
representable non-zero floating-point value, where s = sign(x).
Returning 0 values accuracy more, but returning the denormal
(s^n)*DMINPOS considers returning a result of the correct sign
more important than minimizing the absolute error in the result.
Both points-of-view are valid.
*/
r = 1.0;
for (;;)
{if ((n&1) != 0) r *= x;
n >>= 1; /* n = n >> 1; i.e., n = floor(n/2) */
if (n EQ 0) return(r);
x *= x; /* Here x is replaced by one of x^2, x^4, x^8, .... */
}
}
/*=======================================================================*/
export double intpow(double x, int16 n)
/* double x; base
* int16 n; exponent
*/
/*-----------------------------------------------------------------------
n is an integer and x is a 64-bit double floating-point value. We
compute x^n and return its 64-bit double value. The magnitude of this
value is either 0, or lies in the range [DMINPOS, MAXPOS] where
DMINPOS is the least representable positive 64-bit double value
(DMINPOS = (2^(-1022))*(2^(-52)) = approx 4.940656458412465e-324,) and
MAXPOS = the largest representable positive 64-bit double value ( =
(2^1023)* (1+(2^52-1)/2^52) = approx 1.797...E+308.) (DMINPOS and
MAXPOS are not reciprocals because our floating-point number system
admits so-called "denormals".)
Except for various special cases, x^n is computed by shifting n and
accumulating a product of factors composed of x raised to the powers
of 2 corresponding to one-bits in n.
As discussed above, this code needs an appropriate overflow-handler
to work.
Let m = -n, a = abs(x), and let s = sign(x) (=1 or 0 or -1).
We have the following sequential cases:
x=0 and n>0: Return 0.
x=0 and n<=0: Return error-this is 0^0 or 1/(0^m).
x=1 or n=0: Return 1.
n=-1: Return 1./x
n>0: Compute x^n by accumulating products of powers of x. An
overflow in x^n results in (s^n)*MAXPOS. Note when abs(x)<1, x^n
might be a denormal or underflow to 0.
a>=1 and n<0: Compute 1/(x^m) where m = -n. We can only do this if
a^m < MAXPOS; otherwise we must compute (1/x)^m and accept 0 if
we underflow. We prefer to compute 1/(x^m) rather than (1/x)^m,
because the latter has more chance of greater round-off error in
the result, especially when a is a not-too-large positive
integer.
a<1 and n<0: Compute 1/(x^m). Note, x^m might be a denormal. If
so, 1/(x^m) will overflow to MAXPOS or MAXNEG, as appropriate.
If x^m underflows to 0, then we must generate the
correctly-signed result, MAXPOS or MAXNEG, explicitly.
--------------------------------------------------------------------*/
{int32 m;
int16 k;
double v,a;
#define LOG2MAXPOS 1023.99
if (x EQ 0.)
{if (n > 0) return(0.);
if (n EQ 0)
{warning("Trying to compute 0^0, 1 is returned.");
return(1.); /* This is a 0^0 error.*/
}
warning("Trying to compute 1/(0^m), MAXPOS is returned.");
return(MAXPOS); /* This is a 1/(0^m) error.*/
}
/* Here, x != 0.*/
if ((x EQ 1.) OR (n EQ 0)) return(1.);
if (n EQ -1) return(1./x);
if (n > 0) return(pintpow(x,(int32)n));
/* When x<1, pintpow(x,n) may be a denormal or underflow to 0. */
/* Here we have n < 0. Note, here -32768 <= n <= -2.*/
a = fabs(x); m = -(int32)n;
if (a >= 1.)
{/* If a^m <= MAXPOS return 1./pintpow(x,m). This is equivalent
to: if (m*log2(a) <= log2(MAXPOS)). We must err on the
conservative side: a^m <= MAXPOS must never be decided to be
true when it is false, but we may decide a^m > MAXPOS when
really a^m <= MAXPOS.
Let a = (2^k)*f where 0 < f < 2. Then log2(a) = k+log2(f).
Also log2(f) < 1. Thus log2(a) < k+1. Therefore, the test
m*(k+1) <= log2(MAXPOS) will be appropriate when log2(MAXPOS)
is greater than or equal to the true log2(MAXPOS) value.*/
k = basetwoexp(a); /* Here a is a normalized positive double. */
if ((m*(k+1)) <= LOG2MAXPOS) return(1./pintpow(x,m));
/* Here the value pintpow(a,m) will never be greater than
MAXPOS, so computing 1./pintpow(x,m) will never involve an
overflow. Below, x^m might overflow so we compute
(1/x)^m. Note pintpow(1./x,m) may be a denormal or
underflow to 0. See the remark in pintpow.*/
return(pintpow(1./x,m));
}
/* Here, a<1, the values a and x might be denormals, and pintpow(x,m)
might be a denormal or 0. See the remark in pintpow. */
v = pintpow(x,m);
/* pintpow(x,m) might be denormal, or even underflowed to 0.*/
if (v EQ 0.) {if (parity(n) EQ 1) return(MAXNEG); return(MAXPOS);}
return(1./v);
/* Note, if v is denormal, 1./v will overflow and MAXPOS or MAXNEG, as
appropriate, will be returned courtesy of our overflow handler.*/
}
/* end of file intpow.c */