Below is the output from a recent analysis run (contributed by Michael Meissner).
For the following program:
long
simple_rand ()
{
static unsigned long seed = 47114711;
unsigned long this = seed * 1103515245 + 12345;
seed = this;
return this >> 8;
}
unsigned long int
random_bitstring ()
{
unsigned long int x;
int ran, n_bits;
int tot_bits = 0;
x = 0;
for (;;)
{
ran = simple_rand ();
n_bits = (ran >> 1) % 16;
tot_bits += n_bits;
if (n_bits == 0)
return x;
else
{
x <<= n_bits;
if (ran & 1)
x |= (1 << n_bits) - 1;
if (tot_bits > 8 * sizeof (long) + 6)
return x;
}
}
}
#define ABS(x) ((x) >= 0 ? (x) : -(x))
main ()
{
int i;
for (i = 0; i < 50000; i++)
{
unsigned long x, y;
x = random_bitstring ();
y = random_bitstring ();
if (sizeof (int) == sizeof (long))
goto save_time;
{ unsigned long xx = x, yy = y, r1, r2;
if (yy == 0) continue;
r1 = xx / yy;
r2 = xx % yy;
if (r2 >= yy || r1 * yy + r2 != xx)
abort ();
}
{ signed long xx = x, yy = y, r1, r2;
if ((unsigned long) xx << 1 == 0 && yy == -1)
continue;
r1 = xx / yy;
r2 = xx % yy;
if (ABS (r2) >= (unsigned long) ABS (yy) || (signed long) (r1 * yy + r2) != xx)
abort ();
}
save_time:
{ unsigned int xx = x, yy = y, r1, r2;
if (yy == 0) continue;
r1 = xx / yy;
r2 = xx % yy;
if (r2 >= yy || r1 * yy + r2 != xx)
abort ();
}
{ signed int xx = x, yy = y, r1, r2;
if ((unsigned int) xx << 1 == 0 && yy == -1)
continue;
r1 = xx / yy;
r2 = xx % yy;
if (ABS (r2) >= (unsigned int) ABS (yy) || (signed int) (r1 * yy + r2) != xx)
abort ();
}
{ unsigned short xx = x, yy = y, r1, r2;
if (yy == 0) continue;
r1 = xx / yy;
r2 = xx % yy;
if (r2 >= yy || r1 * yy + r2 != xx)
abort ();
}
{ signed short xx = x, yy = y, r1, r2;
r1 = xx / yy;
r2 = xx % yy;
if (ABS (r2) >= (unsigned short) ABS (yy) || (signed short) (r1 * yy + r2) != xx)
abort ();
}
{ unsigned char xx = x, yy = y, r1, r2;
if (yy == 0) continue;
r1 = xx / yy;
r2 = xx % yy;
if (r2 >= yy || r1 * yy + r2 != xx)
abort ();
}
{ signed char xx = x, yy = y, r1, r2;
r1 = xx / yy;
r2 = xx % yy;
if (ABS (r2) >= (unsigned char) ABS (yy) || (signed char) (r1 * yy + r2) != xx)
abort ();
}
}
exit (0);
}
Here is the current output generated with the -I switch on a 90 Mhz pentium (the compiler used is the devlopment version of GCC with a new scheduler replacing the old one):
CPU #1 executed 41,994 AND instructions. CPU #1 executed 519,785 AND Immediate instructions. CPU #1 executed 680,058 Add instructions. CPU #1 executed 41,994 Add Extended instructions. CPU #1 executed 921,916 Add Immediate instructions. CPU #1 executed 221,199 Add Immediate Carrying instructions. CPU #1 executed 943,823 Add Immediate Shifted instructions. CPU #1 executed 471,909 Add to Zero Extended instructions. CPU #1 executed 571,915 Branch instructions. CPU #1 executed 1,992,403 Branch Conditional instructions. CPU #1 executed 571,910 Branch Conditional to Link Register instructions. CPU #1 executed 320,431 Compare instructions. CPU #1 executed 471,911 Compare Immediate instructions. CPU #1 executed 145,867 Compare Logical instructions. CPU #1 executed 442,414 Compare Logical Immediate instructions. CPU #1 executed 1 Condition Register XOR instruction. CPU #1 executed 103,873 Divide Word instructions. CPU #1 executed 104,275 Divide Word Unsigned instructions. CPU #1 executed 132,510 Extend Sign Byte instructions. CPU #1 executed 178,895 Extend Sign Half Word instructions. CPU #1 executed 871,920 Load Word and Zero instructions. CPU #1 executed 41,994 Move From Condition Register instructions. CPU #1 executed 100,005 Move from Special Purpose Register instructions. CPU #1 executed 100,002 Move to Special Purpose Register instructions. CPU #1 executed 804,619 Multiply Low Word instructions. CPU #1 executed 421,201 OR instructions. CPU #1 executed 471,910 OR Immediate instructions. CPU #1 executed 1,292,020 Rotate Left Word Immediate then AND with Mask instructions. CPU #1 executed 663,613 Shift Left Word instructions. CPU #1 executed 1,151,564 Shift Right Algebraic Word Immediate instructions. CPU #1 executed 871,922 Store Word instructions. CPU #1 executed 100,004 Store Word with Update instructions. CPU #1 executed 887,804 Subtract From instructions. CPU #1 executed 83,988 Subtract From Immediate Carrying instructions. CPU #1 executed 1 System Call instruction. CPU #1 executed 207,746 XOR instructions. CPU #1 executed 23,740,856 cycles. CPU #1 executed 10,242,780 stalls waiting for data. CPU #1 executed 1 stall waiting for a function unit. CPU #1 executed 1 stall waiting for serialization. CPU #1 executed 1,757,900 times a writeback slot was unavilable. CPU #1 executed 1,088,135 branches. CPU #1 executed 2,048,093 conditional branches fell through. CPU #1 executed 1,088,135 successful branch predictions. CPU #1 executed 904,268 unsuccessful branch predictions. CPU #1 executed 742,557 branch if the condition is FALSE conditional branches. CPU #1 executed 1,249,846 branch if the condition is TRUE conditional branches. CPU #1 executed 571,910 branch always conditional branches. CPU #1 executed 9,493,653 1st single cycle integer functional unit instructions. CPU #1 executed 1,220,900 2nd single cycle integer functional unit instructions. CPU #1 executed 1,254,768 multiple cycle integer functional unit instructions. CPU #1 executed 1,843,846 load/store functional unit instructions. CPU #1 executed 3,136,229 branch functional unit instructions. CPU #1 executed 16,949,396 instructions that were accounted for in timing info. CPU #1 executed 871,920 data reads. CPU #1 executed 971,926 data writes. CPU #1 executed 221 icache misses. CPU #1 executed 16,949,396 instructions in total. Simulator speed was 250,731 instructions/second