basics
function from basics.nw:
void
basics(void)
{
double one = 1;
double m1 = -1;
double zero = 0.0;
double inf1 = one / zero;
double nan1 = sqrt(-1);
double nan2 = fmod(INFINITY, one);
double nan3 = fmod(one, zero);
double planck_ev = 4.135667696e-15;
double p0 = create_double(0, 0, 0, 0);
double m0 = create_double(1, 0, 0, 0);
/* zero is a special case, the biased exponent of 0 behaves as 0 */
show_double("0", zero);
show_double("0", p0);
show_double("-0", m0);
show_double("Planck eV", planck_ev);
/* round(log(4e-15) / log(2)) = -48 */
show_created_double("Planck eV", 0, DBLEXP(-48), 0x2a019, 0xa830a613);
show_long_bits("Planck eV", planck_ev);
show_bytes("Planck eV", planck_ev);
show_double("1", one);
show_created_double("1", 0, DBLEXP(0), 0, 0);
show_double("-1", m1);
show_created_double("-1", 1, DBLEXP(0), 0, 0);
/* 2047 = bias(1024) = 0x7ff = 0b11111111111 = all 11 biased-exponent
* bits set
*/
show_created_double("-inf", 1, DBLEXP(1024), 0, 0);
show_created_double("inf", 0, DBLEXP(1024), 0, 0);
show_double("1 / 0", inf1);
show_double("sqrt(-1)", nan1);
show_double("inf % 1", nan2);
show_double("1 % 0", nan3);
/* sNaN, signaling NaN (sign is indifferent) */
show_created_double("first +sNaN", 0, DBLEXP(1024), 0, 1);
show_created_double("last +sNaN", 0, DBLEXP(1024), 0x7ffff, 0xffffffff);
/* qNaN, quiet NaN (sign is indifferent) */
show_created_double("first +qNaN", 0, DBLEXP(1024), 0x80000, 0);
show_created_double("last +qNaN", 0, DBLEXP(1024), 0xfffff, 0xffffffff);
show_created_double("1 / 2^7", 0, DBLEXP(-7), 0, 0);
show_created_double("1 / 2^6", 0, DBLEXP(-6), 0, 0);
show_created_double("1 / 2^5", 0, DBLEXP(-5), 0, 0);
show_created_double("1 / 2^4", 0, DBLEXP(-4), 0, 0);
show_created_double("1 / 2^3", 0, DBLEXP(-3), 0, 0);
show_created_double("1 / 2^2", 0, DBLEXP(-2), 0, 0);
show_created_double("1 / 2^1", 0, DBLEXP(-1), 0, 0);
show_created_double("1 * 2^0", 0, DBLEXP(0), 0, 0);
show_created_double("1 * 2^1", 0, DBLEXP(1), 0, 0);
show_created_double("1 * 2^2", 0, DBLEXP(2), 0, 0);
show_created_double("1 * 2^3", 0, DBLEXP(3), 0, 0);
show_created_double("1 * 2^4", 0, DBLEXP(4), 0, 0);
show_created_double("1 * 2^5", 0, DBLEXP(5), 0, 0);
show_created_double("1 * 2^6", 0, DBLEXP(6), 0, 0);
show_created_double("1 * 2^7", 0, DBLEXP(7), 0, 0);
printf("%15.15s -> %e (formal machine epsilon, lapack)\n",
"2^-53", create_double(0, DBLEXP(-53), 0, 0));
printf("%15.15s -> %e (machine epsilon, iso c std, mathematica, matlab)\n",
"2^-52", create_double(0, DBLEXP(-52), 0, 0));
printf("%15.15s -> %e (machine_epsilon(1.0))\n",
"2^-52", machine_epsilon(1.0));
printf("%15.15s -> %e (machine_epsilon(-1.0))\n",
"2^-52", machine_epsilon(-1.0));
printf("%15.15s -> %.2f (machine_epsilon(2^51))\n",
"0.5", machine_epsilon(create_double(0, DBLEXP(51), 0, 0)));
printf("%15.15s -> %.2f (machine_epsilon(2^52))\n",
"1.0", machine_epsilon(create_double(0, DBLEXP(52), 0, 0)));
printf("%15.15s -> %.2f (machine_epsilon(2^53), greater than unit, will skip)\n",
"2.0", machine_epsilon(create_double(0, DBLEXP(53), 0, 0)));
printf("%15.15s -> %.1f (maximum integer before skipping occurs)\n",
"2^53", create_double(0, DBLEXP(53), 0, 0));
printf("%15.15s -> %.1f (smallest number greater than 2^53)\n",
"...", create_double(0, DBLEXP(53), 0, 1));
}
basics
output:
0 -> (special) 0.0 (+1.00000_00000000 * 2 ^ 0)
0 -> (special) 0.0 (+1.00000_00000000 * 2 ^ 0)
-0 -> (special) 0.0 (-1.00000_00000000 * 2 ^ 0)
Planck eV -> 4.13566769600e-15 (+1.2a019_a830a613 * 2 ^ unbias(3cf))
Planck eV -> 4.13566769600e-15 (+1.2a019_a830a613 * 2 ^ unbias(3cf))
Planck eV -> 0x3cf2a019a830a613 (+1.2a019_a830a613 * 2 ^ unbias(3cf))
Planck eV -> 13a630a819a0f23c (octets/bytes)
1 -> 1.00 (+1.00000_00000000 * 2 ^ unbias(3ff))
1 -> 1.00 (+1.00000_00000000 * 2 ^ unbias(3ff))
-1 -> -1.00 (-1.00000_00000000 * 2 ^ unbias(3ff))
-1 -> -1.00 (-1.00000_00000000 * 2 ^ unbias(3ff))
-inf -> -inf (-1.00000_00000000 * 2 ^ unbias(7ff))
inf -> inf (+1.00000_00000000 * 2 ^ unbias(7ff))
1 / 0 -> inf (+1.00000_00000000 * 2 ^ unbias(7ff))
sqrt(-1) -> -nan (-1.80000_00000000 * 2 ^ unbias(7ff))
inf % 1 -> -nan (-1.80000_00000000 * 2 ^ unbias(7ff))
1 % 0 -> -nan (-1.80000_00000000 * 2 ^ unbias(7ff))
first +sNaN -> nan (+1.00000_00000001 * 2 ^ unbias(7ff))
last +sNaN -> nan (+1.7ffff_ffffffff * 2 ^ unbias(7ff))
first +qNaN -> nan (+1.80000_00000000 * 2 ^ unbias(7ff))
last +qNaN -> nan (+1.fffff_ffffffff * 2 ^ unbias(7ff))
1 / 2^7 -> 0.0078125 (+1.00000_00000000 * 2 ^ unbias(3f8))
1 / 2^6 -> 0.0156250 (+1.00000_00000000 * 2 ^ unbias(3f9))
1 / 2^5 -> 0.0312500 (+1.00000_00000000 * 2 ^ unbias(3fa))
1 / 2^4 -> 0.0625000 (+1.00000_00000000 * 2 ^ unbias(3fb))
1 / 2^3 -> 0.1250000 (+1.00000_00000000 * 2 ^ unbias(3fc))
1 / 2^2 -> 0.2500000 (+1.00000_00000000 * 2 ^ unbias(3fd))
1 / 2^1 -> 0.5000000 (+1.00000_00000000 * 2 ^ unbias(3fe))
1 * 2^0 -> 1.00 (+1.00000_00000000 * 2 ^ unbias(3ff))
1 * 2^1 -> 2.00 (+1.00000_00000000 * 2 ^ unbias(400))
1 * 2^2 -> 4.00 (+1.00000_00000000 * 2 ^ unbias(401))
1 * 2^3 -> 8.00 (+1.00000_00000000 * 2 ^ unbias(402))
1 * 2^4 -> 16.00 (+1.00000_00000000 * 2 ^ unbias(403))
1 * 2^5 -> 32.00 (+1.00000_00000000 * 2 ^ unbias(404))
1 * 2^6 -> 64.00 (+1.00000_00000000 * 2 ^ unbias(405))
1 * 2^7 -> 128.00 (+1.00000_00000000 * 2 ^ unbias(406))
2^-53 -> 1.110223e-16 (formal machine epsilon, lapack)
2^-52 -> 2.220446e-16 (machine epsilon, iso c std, mathematica, matlab)
2^-52 -> 2.220446e-16 (machine_epsilon(1.0))
2^-52 -> 2.220446e-16 (machine_epsilon(-1.0))
0.5 -> 0.50 (machine_epsilon(2^51))
1.0 -> 1.00 (machine_epsilon(2^52))
2.0 -> 2.00 (machine_epsilon(2^53), greater than unit, will skip)
2^53 -> 9007199254740992.0 (maximum integer before skipping occurs)
... -> 9007199254740994.0 (smallest number greater than 2^53)
bits
function from bits.nw:
void
bits(void)
{
show_long_bits("zero", create_double(0, 0, 0, 0));
show_long_bits("first subnormal", create_double(0, 0, 0, 1));
show_long_bits("last subnormal", create_double(0, 0, 0xfffff, 0xffffffff));
show_long_bits("first normal", create_double(0, 1, 0, 0));
show_long_bits("2^(-53)", create_double(0, DBLEXP(-53), 0, 0));
show_long_bits("one", create_double(0, DBLEXP(0), 0, 0));
show_long_bits("2^(+53)", create_double(0, DBLEXP(+53), 0, 0));
show_long_bits("last normal", create_double(0, DBLEXP(1023), 0xfffff, 0xffffffff));
show_long_bits("infinite", create_double(0, DBLEXP(1024), 0, 0));
show_long_bits("first snan", create_double(0, DBLEXP(1024), 0, 1));
show_long_bits("last snan", create_double(0, DBLEXP(1024), 0x7ffff, 0xffffffff));
show_long_bits("first qnan", create_double(0, DBLEXP(1024), 0x80000, 0));
show_long_bits("last qnan", create_double(0, DBLEXP(1024), 0xfffff, 0xffffffff));
show_long_bits("(-) first subnormal", create_double(1, 0, 0, 1));
show_long_bits("(-) last subnormal", create_double(1, 0, 0xfffff, 0xffffffff));
show_long_bits("(-) one", create_double(1, DBLEXP(0), 0, 0));
show_long_bits("(-) infinite", create_double(1, DBLEXP(1024), 0, 0));
show_long_bits("(-) last qnan", create_double(1, DBLEXP(1024), 0xfffff, 0xffffffff));
}
bits
output:
zero -> 0x0000000000000000 +1.00000_00000000 (special) 0.000000e+00
first subnormal -> 0x0000000000000001 +1.00000_00000001 (special) 4.940656e-324
last subnormal -> 0x000fffffffffffff +1.fffff_ffffffff (special) 2.225074e-308
first normal -> 0x0010000000000000 +1.00000_00000000 * 2 ^ unbias( 1) 2.225074e-308
2^(-53) -> 0x3ca0000000000000 +1.00000_00000000 * 2 ^ unbias(3ca) 1.110223e-16
one -> 0x3ff0000000000000 +1.00000_00000000 * 2 ^ unbias(3ff) 1.000000e+00
2^(+53) -> 0x4340000000000000 +1.00000_00000000 * 2 ^ unbias(434) 9.007199e+15
last normal -> 0x7fefffffffffffff +1.fffff_ffffffff * 2 ^ unbias(7fe) 1.797693e+308
infinite -> 0x7ff0000000000000 +1.00000_00000000 * 2 ^ unbias(7ff) inf
first snan -> 0x7ff0000000000001 +1.00000_00000001 * 2 ^ unbias(7ff) nan
last snan -> 0x7ff7ffffffffffff +1.7ffff_ffffffff * 2 ^ unbias(7ff) nan
first qnan -> 0x7ff8000000000000 +1.80000_00000000 * 2 ^ unbias(7ff) nan
last qnan -> 0x7fffffffffffffff +1.fffff_ffffffff * 2 ^ unbias(7ff) nan
(-) first subnor -> 0x8000000000000001 -1.00000_00000001 (special) -4.940656e-324
(-) last subnorm -> 0x800fffffffffffff -1.fffff_ffffffff (special) -2.225074e-308
(-) one -> 0xbff0000000000000 -1.00000_00000000 * 2 ^ unbias(3ff) -1.000000e+00
(-) infinite -> 0xfff0000000000000 -1.00000_00000000 * 2 ^ unbias(7ff) -inf
(-) last qnan -> 0xffffffffffffffff -1.fffff_ffffffff * 2 ^ unbias(7ff) -nan
The double
type of the C language is known as binary64
in the IEEE 754
Standard.
The same principles shown for binary64
also applies to binary32
(c's
float
type).
More details in the link below.
This page was last modified on March 18, 2024 at 14:56:59 UTC.