aboutsummaryrefslogtreecommitdiff
path: root/gmp-6.3.0/tune/time.c
diff options
context:
space:
mode:
authorDuncan Wilkie <antigravityd@gmail.com>2023-11-18 06:11:09 -0600
committerDuncan Wilkie <antigravityd@gmail.com>2023-11-18 06:11:09 -0600
commit11da511c784eca003deb90c23570f0873954e0de (patch)
treee14fdd3d5d6345956d67e79ae771d0633d28362b /gmp-6.3.0/tune/time.c
Initial commit.
Diffstat (limited to 'gmp-6.3.0/tune/time.c')
-rw-r--r--gmp-6.3.0/tune/time.c1598
1 files changed, 1598 insertions, 0 deletions
diff --git a/gmp-6.3.0/tune/time.c b/gmp-6.3.0/tune/time.c
new file mode 100644
index 0000000..5ba482b
--- /dev/null
+++ b/gmp-6.3.0/tune/time.c
@@ -0,0 +1,1598 @@
+/* Time routines for speed measurements.
+
+Copyright 1999-2004, 2010-2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+/* Usage:
+
+ The code in this file implements the lowest level of time measuring,
+ simple one-time measuring of time between two points.
+
+ void speed_starttime (void)
+ double speed_endtime (void)
+ Call speed_starttime to start measuring, and then call speed_endtime
+ when done.
+
+ speed_endtime returns the time taken, in seconds. Or if the timebase
+ is in CPU cycles and the CPU frequency is unknown then speed_endtime
+ returns cycles. Applications can identify the cycles return by
+ checking for speed_cycletime (described below) equal to 1.0.
+
+ If some sort of temporary glitch occurs then speed_endtime returns
+ 0.0. Currently this is for various cases where a negative time has
+ occurred. This unfortunately occurs with getrusage on some systems,
+ and with the hppa cycle counter on hpux.
+
+ double speed_cycletime
+ The time in seconds for each CPU cycle. For example on a 100 MHz CPU
+ this would be 1.0e-8.
+
+ If the CPU frequency is unknown, then speed_cycletime is either 0.0
+ or 1.0. It's 0.0 when speed_endtime is returning seconds, or it's
+ 1.0 when speed_endtime is returning cycles.
+
+ It may be noted that "speed_endtime() / speed_cycletime" gives a
+ measured time in cycles, irrespective of whether speed_endtime is
+ returning cycles or seconds. (Assuming cycles can be had, ie. it's
+ either cycles already or the cpu frequency is known. See also
+ speed_cycletime_need_cycles below.)
+
+ double speed_unittime
+ The unit of time measurement accuracy for the timing method in use.
+ This is in seconds or cycles, as per speed_endtime.
+
+ char speed_time_string[]
+ A null-terminated string describing the time method in use.
+
+ void speed_time_init (void)
+ Initialize time measuring. speed_starttime() does this
+ automatically, so it's only needed if an application wants to inspect
+ the above global variables before making a measurement.
+
+ int speed_precision
+ The intended accuracy of time measurements. speed_measure() in
+ common.c for instance runs target routines with enough repetitions so
+ it takes at least "speed_unittime * speed_precision" (this expression
+ works for both cycles or seconds from speed_endtime).
+
+ A program can provide an option so the user to set speed_precision.
+ If speed_precision is zero when speed_time_init or speed_starttime
+ first run then it gets a default based on the measuring method
+ chosen. (More precision for higher accuracy methods.)
+
+ void speed_cycletime_need_seconds (void)
+ Call this to demand that speed_endtime will return seconds, and not
+ cycles. If only cycles are available then an error is printed and
+ the program exits.
+
+ void speed_cycletime_need_cycles (void)
+ Call this to demand that speed_cycletime is non-zero, so that
+ "speed_endtime() / speed_cycletime" will give times in cycles.
+
+
+
+ Notes:
+
+ Various combinations of cycle counter, read_real_time(), getrusage(),
+ gettimeofday() and times() can arise, according to which are available
+ and their precision.
+
+
+ Allowing speed_endtime() to return either seconds or cycles is only a
+ slight complication and makes it possible for the speed program to do
+ some sensible things without demanding the CPU frequency. If seconds are
+ being measured then it can always print seconds, and if cycles are being
+ measured then it can always print them without needing to know how long
+ they are. Also the tune program doesn't care at all what the units are.
+
+ GMP_CPU_FREQUENCY can always be set when the automated methods in freq.c
+ fail. This will be needed if times in seconds are wanted but a cycle
+ counter is being used, or if times in cycles are wanted but getrusage or
+ another seconds based timer is in use.
+
+ If the measuring method uses a cycle counter but supplements it with
+ getrusage or the like, then knowing the CPU frequency is mandatory since
+ the code compares values from the two.
+
+
+ Not done:
+
+ Solaris gethrtime() seems no more than a slow way to access the Sparc V9
+ cycle counter. gethrvtime() seems to be relevant only to light weight
+ processes, it doesn't for instance give nanosecond virtual time. So
+ neither of these are used.
+
+
+ Bugs:
+
+ getrusage_microseconds_p is fundamentally flawed, getrusage and
+ gettimeofday can have resolutions other than clock ticks or microseconds,
+ for instance IRIX 5 has a tick of 10 ms but a getrusage of 1 ms.
+
+
+ Enhancements:
+
+ The SGI hardware counter has 64 bits on some machines, which could be
+ used when available. But perhaps 32 bits is enough range, and then rely
+ on the getrusage supplement.
+
+ Maybe getrusage (or times) should be used as a supplement for any
+ wall-clock measuring method. Currently a wall clock with a good range
+ (eg. a 64-bit cycle counter) is used without a supplement.
+
+ On PowerPC the timebase registers could be used, but would have to do
+ something to find out the speed. On 6xx chips it's normally 1/4 bus
+ speed, on 4xx chips it's either that or an external clock. Measuring
+ against gettimeofday might be ok. */
+
+
+#include "config.h"
+
+#include <errno.h>
+#include <setjmp.h>
+#include <signal.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h> /* for getenv() */
+
+#if HAVE_FCNTL_H
+#include <fcntl.h> /* for open() */
+#endif
+
+#if HAVE_STDINT_H
+#include <stdint.h> /* for uint64_t */
+#endif
+
+#if HAVE_UNISTD_H
+#include <unistd.h> /* for sysconf() */
+#endif
+
+#include <sys/types.h>
+
+#if TIME_WITH_SYS_TIME
+# include <sys/time.h> /* for struct timeval */
+# include <time.h>
+#else
+# if HAVE_SYS_TIME_H
+# include <sys/time.h>
+# else
+# include <time.h>
+# endif
+#endif
+
+#if HAVE_SYS_MMAN_H
+#include <sys/mman.h> /* for mmap() */
+#endif
+
+#if HAVE_SYS_RESOURCE_H
+#include <sys/resource.h> /* for struct rusage */
+#endif
+
+#if HAVE_SYS_SYSSGI_H
+#include <sys/syssgi.h> /* for syssgi() */
+#endif
+
+#if HAVE_SYS_SYSTEMCFG_H
+#include <sys/systemcfg.h> /* for RTC_POWER on AIX */
+#endif
+
+#if HAVE_SYS_TIMES_H
+#include <sys/times.h> /* for times() and struct tms */
+#endif
+
+#include "gmp-impl.h"
+
+#include "speed.h"
+
+
+/* strerror is only used for some stuff on newish systems, no need to have a
+ proper replacement */
+#if ! HAVE_STRERROR
+#define strerror(n) "<strerror not available>"
+#endif
+
+
+char speed_time_string[256];
+int speed_precision = 0;
+double speed_unittime;
+double speed_cycletime = 0.0;
+
+
+/* don't rely on "unsigned" to "double" conversion, it's broken in SunOS 4
+ native cc */
+#define M_2POWU (((double) INT_MAX + 1.0) * 2.0)
+
+#define M_2POW32 4294967296.0
+#define M_2POW64 (M_2POW32 * M_2POW32)
+
+
+/* Conditionals for the time functions available are done with normal C
+ code, which is a lot easier than wildly nested preprocessor directives.
+
+ The choice of what to use is partly made at run-time, according to
+ whether the cycle counter works and the measured accuracy of getrusage
+ and gettimeofday.
+
+ A routine that's not available won't be getting called, but is an abort()
+ to be sure it isn't called mistakenly.
+
+ It can be assumed that if a function exists then its data type will, but
+ if the function doesn't then the data type might or might not exist, so
+ the type can't be used unconditionally. The "struct_rusage" etc macros
+ provide dummies when the respective function doesn't exist. */
+
+
+#if HAVE_SPEED_CYCLECOUNTER
+static const int have_cycles = HAVE_SPEED_CYCLECOUNTER;
+#else
+static const int have_cycles = 0;
+#define speed_cyclecounter(p) ASSERT_FAIL (speed_cyclecounter not available)
+#endif
+
+/* "stck" returns ticks since 1 Jan 1900 00:00 GMT, where each tick is 2^-12
+ microseconds. Same #ifdefs here as in longlong.h. */
+#if defined (__GNUC__) && ! defined (NO_ASM) \
+ && (defined (__i370__) || defined (__s390__) || defined (__mvs__))
+static const int have_stck = 1;
+static const int use_stck = 1; /* always use when available */
+typedef uint64_t stck_t; /* gcc for s390 is quite new, always has uint64_t */
+#define STCK(timestamp) \
+ do { \
+ asm ("stck %0" : "=Q" (timestamp)); \
+ } while (0)
+#else
+static const int have_stck = 0;
+static const int use_stck = 0;
+typedef unsigned long stck_t; /* dummy */
+#define STCK(timestamp) ASSERT_FAIL (stck instruction not available)
+#endif
+#define STCK_PERIOD (1.0 / 4096e6) /* 2^-12 microseconds */
+
+/* mftb
+ Enhancement: On 64-bit chips mftb gives a 64-bit value, no need for mftbu
+ and a loop (see powerpc64.asm). */
+#if HAVE_HOST_CPU_FAMILY_powerpc
+static const int have_mftb = 1;
+#if defined (__GNUC__) && ! defined (NO_ASM)
+#define MFTB(a) \
+ do { \
+ unsigned __h1, __l, __h2; \
+ do { \
+ asm volatile ("mftbu %0\n" \
+ "mftb %1\n" \
+ "mftbu %2" \
+ : "=r" (__h1), \
+ "=r" (__l), \
+ "=r" (__h2)); \
+ } while (__h1 != __h2); \
+ a[0] = __l; \
+ a[1] = __h1; \
+ } while (0)
+#else
+#define MFTB(a) mftb_function (a)
+#endif
+#else /* ! powerpc */
+static const int have_mftb = 0;
+#define MFTB(a) \
+ do { \
+ a[0] = 0; \
+ a[1] = 0; \
+ ASSERT_FAIL (mftb not available); \
+ } while (0)
+#endif
+
+/* Unicos 10.X has syssgi(), but not mmap(). */
+#if HAVE_SYSSGI && HAVE_MMAP
+static const int have_sgi = 1;
+#else
+static const int have_sgi = 0;
+#endif
+
+#if HAVE_READ_REAL_TIME
+static const int have_rrt = 1;
+#else
+static const int have_rrt = 0;
+#define read_real_time(t,s) ASSERT_FAIL (read_real_time not available)
+#define time_base_to_time(t,s) ASSERT_FAIL (time_base_to_time not available)
+#define RTC_POWER 1
+#define RTC_POWER_PC 2
+#define timebasestruct_t struct timebasestruct_dummy
+struct timebasestruct_dummy {
+ int flag;
+ unsigned int tb_high;
+ unsigned int tb_low;
+};
+#endif
+
+#if HAVE_CLOCK_GETTIME
+static const int have_cgt = 1;
+#define struct_timespec struct timespec
+#else
+static const int have_cgt = 0;
+#define struct_timespec struct timespec_dummy
+#define clock_gettime(id,ts) (ASSERT_FAIL (clock_gettime not available), -1)
+#define clock_getres(id,ts) (ASSERT_FAIL (clock_getres not available), -1)
+#endif
+
+#if HAVE_GETRUSAGE
+static const int have_grus = 1;
+#define struct_rusage struct rusage
+#else
+static const int have_grus = 0;
+#define getrusage(n,ru) ASSERT_FAIL (getrusage not available)
+#define struct_rusage struct rusage_dummy
+#endif
+
+#if HAVE_GETTIMEOFDAY
+static const int have_gtod = 1;
+#define struct_timeval struct timeval
+#else
+static const int have_gtod = 0;
+#define gettimeofday(tv,tz) ASSERT_FAIL (gettimeofday not available)
+#define struct_timeval struct timeval_dummy
+#endif
+
+#if HAVE_TIMES
+static const int have_times = 1;
+#define struct_tms struct tms
+#else
+static const int have_times = 0;
+#define times(tms) ASSERT_FAIL (times not available)
+#define struct_tms struct tms_dummy
+#endif
+
+struct tms_dummy {
+ long tms_utime;
+};
+struct timeval_dummy {
+ long tv_sec;
+ long tv_usec;
+};
+struct rusage_dummy {
+ struct_timeval ru_utime;
+};
+struct timespec_dummy {
+ long tv_sec;
+ long tv_nsec;
+};
+
+static int use_cycles;
+static int use_mftb;
+static int use_sgi;
+static int use_rrt;
+static int use_cgt;
+static int use_gtod;
+static int use_grus;
+static int use_times;
+static int use_tick_boundary;
+
+static unsigned start_cycles[2];
+static stck_t start_stck;
+static unsigned start_mftb[2];
+static unsigned start_sgi;
+static timebasestruct_t start_rrt;
+static struct_timespec start_cgt;
+static struct_rusage start_grus;
+static struct_timeval start_gtod;
+static struct_tms start_times;
+
+static double cycles_limit = 1e100;
+static double mftb_unittime;
+static double sgi_unittime;
+static double cgt_unittime;
+static double grus_unittime;
+static double gtod_unittime;
+static double times_unittime;
+
+/* for RTC_POWER format, ie. seconds and nanoseconds */
+#define TIMEBASESTRUCT_SECS(t) ((t)->tb_high + (t)->tb_low * 1e-9)
+
+
+/* Return a string representing a time in seconds, nicely formatted.
+ Eg. "10.25ms". */
+char *
+unittime_string (double t)
+{
+ static char buf[128];
+
+ const char *unit;
+ int prec;
+
+ /* choose units and scale */
+ if (t < 1e-6)
+ t *= 1e9, unit = "ns";
+ else if (t < 1e-3)
+ t *= 1e6, unit = "us";
+ else if (t < 1.0)
+ t *= 1e3, unit = "ms";
+ else
+ unit = "s";
+
+ /* want 4 significant figures */
+ if (t < 1.0)
+ prec = 4;
+ else if (t < 10.0)
+ prec = 3;
+ else if (t < 100.0)
+ prec = 2;
+ else
+ prec = 1;
+
+ sprintf (buf, "%.*f%s", prec, t, unit);
+ return buf;
+}
+
+
+static jmp_buf cycles_works_buf;
+
+static RETSIGTYPE
+cycles_works_handler (int sig)
+{
+ longjmp (cycles_works_buf, 1);
+}
+
+int
+cycles_works_p (void)
+{
+ static int result = -1;
+
+ if (result != -1)
+ goto done;
+
+ /* FIXME: On linux, the cycle counter is not saved and restored over
+ * context switches, making it almost useless for precise cputime
+ * measurements. When available, it's better to use clock_gettime,
+ * which seems to have reasonable accuracy (tested on x86_32,
+ * linux-2.6.26, glibc-2.7). However, there are also some linux
+ * systems where clock_gettime is broken in one way or the other,
+ * like CLOCK_PROCESS_CPUTIME_ID not implemented (easy case) or
+ * kind-of implemented but broken (needs code to detect that), and
+ * on those systems a wall-clock cycle counter is the least bad
+ * fallback.
+ *
+ * So we need some code to disable the cycle counter on some but not
+ * all linux systems. */
+#ifdef SIGILL
+ {
+ RETSIGTYPE (*old_handler) (int);
+ unsigned cycles[2];
+
+ old_handler = signal (SIGILL, cycles_works_handler);
+ if (old_handler == SIG_ERR)
+ {
+ if (speed_option_verbose)
+ printf ("cycles_works_p(): SIGILL not supported, assuming speed_cyclecounter() works\n");
+ goto yes;
+ }
+ if (setjmp (cycles_works_buf))
+ {
+ if (speed_option_verbose)
+ printf ("cycles_works_p(): SIGILL during speed_cyclecounter(), so doesn't work\n");
+ result = 0;
+ goto done;
+ }
+ speed_cyclecounter (cycles);
+ signal (SIGILL, old_handler);
+ if (speed_option_verbose)
+ printf ("cycles_works_p(): speed_cyclecounter() works\n");
+ }
+#else
+
+ if (speed_option_verbose)
+ printf ("cycles_works_p(): SIGILL not defined, assuming speed_cyclecounter() works\n");
+ goto yes;
+#endif
+
+ yes:
+ result = 1;
+
+ done:
+ return result;
+}
+
+
+/* The number of clock ticks per second, but looking at sysconf rather than
+ just CLK_TCK, where possible. */
+long
+clk_tck (void)
+{
+ static long result = -1L;
+ if (result != -1L)
+ return result;
+
+#if HAVE_SYSCONF
+ result = sysconf (_SC_CLK_TCK);
+ if (result != -1L)
+ {
+ if (speed_option_verbose)
+ printf ("sysconf(_SC_CLK_TCK) is %ld per second\n", result);
+ return result;
+ }
+
+ fprintf (stderr,
+ "sysconf(_SC_CLK_TCK) not working, using CLK_TCK instead\n");
+#endif
+
+#ifdef CLK_TCK
+ result = CLK_TCK;
+ if (speed_option_verbose)
+ printf ("CLK_TCK is %ld per second\n", result);
+ return result;
+#else
+ fprintf (stderr, "CLK_TCK not defined, cannot continue\n");
+ abort ();
+#endif
+}
+
+
+/* If two times can be observed less than half a clock tick apart, then
+ assume "get" is microsecond accurate.
+
+ Two times only 1 microsecond apart are not believed, since some kernels
+ take it upon themselves to ensure gettimeofday doesn't return the same
+ value twice, for the benefit of applications using it for a timestamp.
+ This is obviously very stupid given the speed of CPUs these days.
+
+ Making "reps" many calls to noop_1() is designed to waste some CPU, with
+ a view to getting measurements 2 microseconds (or more) apart. "reps" is
+ increased progressively until such a period is seen.
+
+ The outer loop "attempts" are just to allow for any random nonsense or
+ system load upsetting the measurements (ie. making two successive calls
+ to "get" come out as a longer interval than normal).
+
+ Bugs:
+
+ The assumption that any interval less than a half tick implies
+ microsecond resolution is obviously fairly rash, the true resolution
+ could be anything between a microsecond and that half tick. Perhaps
+ something special would have to be done on a system where this is the
+ case, since there's no obvious reliable way to detect it
+ automatically. */
+
+#define MICROSECONDS_P(name, type, get, sec, usec) \
+ { \
+ static int result = -1; \
+ type st, et; \
+ long dt, half_tick; \
+ unsigned attempt, reps, i, j; \
+ \
+ if (result != -1) \
+ return result; \
+ \
+ result = 0; \
+ half_tick = (1000000L / clk_tck ()) / 2; \
+ \
+ for (attempt = 0; attempt < 5; attempt++) \
+ { \
+ reps = 0; \
+ for (;;) \
+ { \
+ get (st); \
+ for (i = 0; i < reps; i++) \
+ for (j = 0; j < 100; j++) \
+ noop_1 (CNST_LIMB(0)); \
+ get (et); \
+ \
+ dt = (sec(et)-sec(st))*1000000L + usec(et)-usec(st); \
+ \
+ if (speed_option_verbose >= 2) \
+ printf ("%s attempt=%u, reps=%u, dt=%ld\n", \
+ name, attempt, reps, dt); \
+ \
+ if (dt >= 2) \
+ break; \
+ \
+ reps = (reps == 0 ? 1 : 2*reps); \
+ if (reps == 0) \
+ break; /* uint overflow, not normal */ \
+ } \
+ \
+ if (dt < half_tick) \
+ { \
+ result = 1; \
+ break; \
+ } \
+ } \
+ \
+ if (speed_option_verbose) \
+ { \
+ if (result) \
+ printf ("%s is microsecond accurate\n", name); \
+ else \
+ printf ("%s is only %s clock tick accurate\n", \
+ name, unittime_string (1.0/clk_tck())); \
+ } \
+ return result; \
+ }
+
+
+int
+gettimeofday_microseconds_p (void)
+{
+#define call_gettimeofday(t) gettimeofday (&(t), NULL)
+#define timeval_tv_sec(t) ((t).tv_sec)
+#define timeval_tv_usec(t) ((t).tv_usec)
+ MICROSECONDS_P ("gettimeofday", struct_timeval,
+ call_gettimeofday, timeval_tv_sec, timeval_tv_usec);
+}
+
+int
+getrusage_microseconds_p (void)
+{
+#define call_getrusage(t) getrusage (0, &(t))
+#define rusage_tv_sec(t) ((t).ru_utime.tv_sec)
+#define rusage_tv_usec(t) ((t).ru_utime.tv_usec)
+ MICROSECONDS_P ("getrusage", struct_rusage,
+ call_getrusage, rusage_tv_sec, rusage_tv_usec);
+}
+
+/* Test whether getrusage goes backwards, return non-zero if it does
+ (suggesting it's flawed).
+
+ On a macintosh m68040-unknown-netbsd1.4.1 getrusage looks like it's
+ microsecond accurate, but has been seen remaining unchanged after many
+ microseconds have elapsed. It also regularly goes backwards by 1000 to
+ 5000 usecs, this has been seen after between 500 and 4000 attempts taking
+ perhaps 0.03 seconds. We consider this too broken for good measuring.
+ We used to have configure pretend getrusage didn't exist on this system,
+ but a runtime test should be more reliable, since we imagine the problem
+ is not confined to just this exact system tuple. */
+
+int
+getrusage_backwards_p (void)
+{
+ static int result = -1;
+ struct rusage start, prev, next;
+ long d;
+ int i;
+
+ if (result != -1)
+ return result;
+
+ getrusage (0, &start);
+ memcpy (&next, &start, sizeof (next));
+
+ result = 0;
+ i = 0;
+ for (;;)
+ {
+ memcpy (&prev, &next, sizeof (prev));
+ getrusage (0, &next);
+
+ if (next.ru_utime.tv_sec < prev.ru_utime.tv_sec
+ || (next.ru_utime.tv_sec == prev.ru_utime.tv_sec
+ && next.ru_utime.tv_usec < prev.ru_utime.tv_usec))
+ {
+ if (speed_option_verbose)
+ printf ("getrusage went backwards (attempt %d: %ld.%06ld -> %ld.%06ld)\n",
+ i,
+ (long) prev.ru_utime.tv_sec, (long) prev.ru_utime.tv_usec,
+ (long) next.ru_utime.tv_sec, (long) next.ru_utime.tv_usec);
+ result = 1;
+ break;
+ }
+
+ /* minimum 1000 attempts, then stop after either 0.1 seconds or 50000
+ attempts, whichever comes first */
+ d = 1000000 * (next.ru_utime.tv_sec - start.ru_utime.tv_sec)
+ + (next.ru_utime.tv_usec - start.ru_utime.tv_usec);
+ i++;
+ if (i > 50000 || (i > 1000 && d > 100000))
+ break;
+ }
+
+ return result;
+}
+
+/* CLOCK_PROCESS_CPUTIME_ID looks like it's going to be in a future version
+ of glibc (some time post 2.2).
+
+ CLOCK_VIRTUAL is process time, available in BSD systems (though sometimes
+ defined, but returning -1 for an error). */
+
+#ifdef CLOCK_PROCESS_CPUTIME_ID
+# define CGT_ID CLOCK_PROCESS_CPUTIME_ID
+#else
+# ifdef CLOCK_VIRTUAL
+# define CGT_ID CLOCK_VIRTUAL
+# endif
+#endif
+#ifdef CGT_ID
+const int have_cgt_id = 1;
+#else
+const int have_cgt_id = 0;
+# define CGT_ID (ASSERT_FAIL (CGT_ID not determined), -1)
+#endif
+
+#define CGT_DELAY_COUNT 1000
+
+int
+cgt_works_p (void)
+{
+ static int result = -1;
+ struct_timespec unit;
+
+ if (! have_cgt)
+ return 0;
+
+ if (! have_cgt_id)
+ {
+ if (speed_option_verbose)
+ printf ("clock_gettime don't know what ID to use\n");
+ result = 0;
+ return result;
+ }
+
+ if (result != -1)
+ return result;
+
+ /* trial run to see if it works */
+ if (clock_gettime (CGT_ID, &unit) != 0)
+ {
+ if (speed_option_verbose)
+ printf ("clock_gettime id=%d error: %s\n", CGT_ID, strerror (errno));
+ result = 0;
+ return result;
+ }
+
+ /* get the resolution */
+ if (clock_getres (CGT_ID, &unit) != 0)
+ {
+ if (speed_option_verbose)
+ printf ("clock_getres id=%d error: %s\n", CGT_ID, strerror (errno));
+ result = 0;
+ return result;
+ }
+
+ cgt_unittime = unit.tv_sec + unit.tv_nsec * 1e-9;
+ if (speed_option_verbose)
+ printf ("clock_gettime is %s accurate\n", unittime_string (cgt_unittime));
+
+ if (cgt_unittime < 10e-9)
+ {
+ /* Do we believe this? */
+ struct timespec start, end;
+ static volatile int counter;
+ double duration;
+ if (clock_gettime (CGT_ID, &start))
+ {
+ if (speed_option_verbose)
+ printf ("clock_gettime id=%d error: %s\n", CGT_ID, strerror (errno));
+ result = 0;
+ return result;
+ }
+ /* Loop of at least 1000 memory accesses, ought to take at
+ least 100 ns*/
+ for (counter = 0; counter < CGT_DELAY_COUNT; counter++)
+ ;
+ if (clock_gettime (CGT_ID, &end))
+ {
+ if (speed_option_verbose)
+ printf ("clock_gettime id=%d error: %s\n", CGT_ID, strerror (errno));
+ result = 0;
+ return result;
+ }
+ duration = (end.tv_sec + end.tv_nsec * 1e-9
+ - start.tv_sec - start.tv_nsec * 1e-9);
+ if (speed_option_verbose)
+ printf ("delay loop of %d rounds took %s (according to clock_gettime)\n",
+ CGT_DELAY_COUNT, unittime_string (duration));
+ if (duration < 100e-9)
+ {
+ if (speed_option_verbose)
+ printf ("clock_gettime id=%d not believable\n", CGT_ID);
+ result = 0;
+ return result;
+ }
+ }
+ result = 1;
+ return result;
+}
+
+
+static double
+freq_measure_mftb_one (void)
+{
+#define call_gettimeofday(t) gettimeofday (&(t), NULL)
+#define timeval_tv_sec(t) ((t).tv_sec)
+#define timeval_tv_usec(t) ((t).tv_usec)
+ FREQ_MEASURE_ONE ("mftb", struct_timeval,
+ call_gettimeofday, MFTB,
+ timeval_tv_sec, timeval_tv_usec);
+}
+
+
+static jmp_buf mftb_works_buf;
+
+static RETSIGTYPE
+mftb_works_handler (int sig)
+{
+ longjmp (mftb_works_buf, 1);
+}
+
+int
+mftb_works_p (void)
+{
+ unsigned a[2];
+ RETSIGTYPE (*old_handler) (int);
+ double cycletime;
+
+ /* suppress a warning about a[] unused */
+ a[0] = 0;
+
+ if (! have_mftb)
+ return 0;
+
+#ifdef SIGILL
+ old_handler = signal (SIGILL, mftb_works_handler);
+ if (old_handler == SIG_ERR)
+ {
+ if (speed_option_verbose)
+ printf ("mftb_works_p(): SIGILL not supported, assuming mftb works\n");
+ return 1;
+ }
+ if (setjmp (mftb_works_buf))
+ {
+ if (speed_option_verbose)
+ printf ("mftb_works_p(): SIGILL during mftb, so doesn't work\n");
+ return 0;
+ }
+ MFTB (a);
+ signal (SIGILL, old_handler);
+ if (speed_option_verbose)
+ printf ("mftb_works_p(): mftb works\n");
+#else
+
+ if (speed_option_verbose)
+ printf ("mftb_works_p(): SIGILL not defined, assuming mftb works\n");
+#endif
+
+#if ! HAVE_GETTIMEOFDAY
+ if (speed_option_verbose)
+ printf ("mftb_works_p(): no gettimeofday available to measure mftb\n");
+ return 0;
+#endif
+
+ /* The time base is normally 1/4 of the bus speed on 6xx and 7xx chips, on
+ other chips it can be driven from an external clock. */
+ cycletime = freq_measure ("mftb", freq_measure_mftb_one);
+ if (cycletime == -1.0)
+ {
+ if (speed_option_verbose)
+ printf ("mftb_works_p(): cannot measure mftb period\n");
+ return 0;
+ }
+
+ mftb_unittime = cycletime;
+ return 1;
+}
+
+
+volatile unsigned *sgi_addr;
+
+int
+sgi_works_p (void)
+{
+#if HAVE_SYSSGI && HAVE_MMAP
+ static int result = -1;
+
+ size_t pagesize, offset;
+ __psunsigned_t phys, physpage;
+ void *virtpage;
+ unsigned period_picoseconds;
+ int size, fd;
+
+ if (result != -1)
+ return result;
+
+ phys = syssgi (SGI_QUERY_CYCLECNTR, &period_picoseconds);
+ if (phys == (__psunsigned_t) -1)
+ {
+ /* ENODEV is the error when a counter is not available */
+ if (speed_option_verbose)
+ printf ("syssgi SGI_QUERY_CYCLECNTR error: %s\n", strerror (errno));
+ result = 0;
+ return result;
+ }
+ sgi_unittime = period_picoseconds * 1e-12;
+
+ /* IRIX 5 doesn't have SGI_CYCLECNTR_SIZE, assume 32 bits in that case.
+ Challenge/ONYX hardware has a 64 bit byte counter, but there seems no
+ obvious way to identify that without SGI_CYCLECNTR_SIZE. */
+#ifdef SGI_CYCLECNTR_SIZE
+ size = syssgi (SGI_CYCLECNTR_SIZE);
+ if (size == -1)
+ {
+ if (speed_option_verbose)
+ {
+ printf ("syssgi SGI_CYCLECNTR_SIZE error: %s\n", strerror (errno));
+ printf (" will assume size==4\n");
+ }
+ size = 32;
+ }
+#else
+ size = 32;
+#endif
+
+ if (size < 32)
+ {
+ printf ("syssgi SGI_CYCLECNTR_SIZE gives %d, expected 32 or 64\n", size);
+ result = 0;
+ return result;
+ }
+
+ pagesize = getpagesize();
+ offset = (size_t) phys & (pagesize-1);
+ physpage = phys - offset;
+
+ /* shouldn't cross over a page boundary */
+ ASSERT_ALWAYS (offset + size/8 <= pagesize);
+
+ fd = open("/dev/mmem", O_RDONLY);
+ if (fd == -1)
+ {
+ if (speed_option_verbose)
+ printf ("open /dev/mmem: %s\n", strerror (errno));
+ result = 0;
+ return result;
+ }
+
+ virtpage = mmap (0, pagesize, PROT_READ, MAP_PRIVATE, fd, (off_t) physpage);
+ if (virtpage == (void *) -1)
+ {
+ if (speed_option_verbose)
+ printf ("mmap /dev/mmem: %s\n", strerror (errno));
+ result = 0;
+ return result;
+ }
+
+ /* address of least significant 4 bytes, knowing mips is big endian */
+ sgi_addr = (unsigned *) ((char *) virtpage + offset
+ + size/8 - sizeof(unsigned));
+ result = 1;
+ return result;
+
+#else /* ! (HAVE_SYSSGI && HAVE_MMAP) */
+ return 0;
+#endif
+}
+
+
+#define DEFAULT(var,n) \
+ do { \
+ if (! (var)) \
+ (var) = (n); \
+ } while (0)
+
+void
+speed_time_init (void)
+{
+ double supplement_unittime = 0.0;
+
+ static int speed_time_initialized = 0;
+ if (speed_time_initialized)
+ return;
+ speed_time_initialized = 1;
+
+ speed_cycletime_init ();
+
+ if (!speed_option_cycles_broken && have_cycles && cycles_works_p ())
+ {
+ use_cycles = 1;
+ DEFAULT (speed_cycletime, 1.0);
+ speed_unittime = speed_cycletime;
+ DEFAULT (speed_precision, 10000);
+ strcpy (speed_time_string, "CPU cycle counter");
+
+ /* only used if a supplementary method is chosen below */
+ cycles_limit = (have_cycles == 1 ? M_2POW32 : M_2POW64) / 2.0
+ * speed_cycletime;
+
+ if (have_grus && getrusage_microseconds_p() && ! getrusage_backwards_p())
+ {
+ /* this is a good combination */
+ use_grus = 1;
+ supplement_unittime = grus_unittime = 1.0e-6;
+ strcpy (speed_time_string, "CPU cycle counter, supplemented by microsecond getrusage()");
+ }
+ else if (have_cycles == 1)
+ {
+ /* When speed_cyclecounter has a limited range, look for something
+ to supplement it. */
+ if (have_gtod && gettimeofday_microseconds_p())
+ {
+ use_gtod = 1;
+ supplement_unittime = gtod_unittime = 1.0e-6;
+ strcpy (speed_time_string, "CPU cycle counter, supplemented by microsecond gettimeofday()");
+ }
+ else if (have_grus)
+ {
+ use_grus = 1;
+ supplement_unittime = grus_unittime = 1.0 / (double) clk_tck ();
+ sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick getrusage()", unittime_string (supplement_unittime));
+ }
+ else if (have_times)
+ {
+ use_times = 1;
+ supplement_unittime = times_unittime = 1.0 / (double) clk_tck ();
+ sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick times()", unittime_string (supplement_unittime));
+ }
+ else if (have_gtod)
+ {
+ use_gtod = 1;
+ supplement_unittime = gtod_unittime = 1.0 / (double) clk_tck ();
+ sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick gettimeofday()", unittime_string (supplement_unittime));
+ }
+ else
+ {
+ fprintf (stderr, "WARNING: cycle counter is 32 bits and there's no other functions.\n");
+ fprintf (stderr, " Wraparounds may produce bad results on long measurements.\n");
+ }
+ }
+
+ if (use_grus || use_times || use_gtod)
+ {
+ /* must know cycle period to compare cycles to other measuring
+ (via cycles_limit) */
+ speed_cycletime_need_seconds ();
+
+ if (speed_precision * supplement_unittime > cycles_limit)
+ {
+ fprintf (stderr, "WARNING: requested precision can't always be achieved due to limited range\n");
+ fprintf (stderr, " cycle counter and limited precision supplemental method\n");
+ fprintf (stderr, " (%s)\n", speed_time_string);
+ }
+ }
+ }
+ else if (have_stck)
+ {
+ strcpy (speed_time_string, "STCK timestamp");
+ /* stck is in units of 2^-12 microseconds, which is very likely higher
+ resolution than a cpu cycle */
+ if (speed_cycletime == 0.0)
+ speed_cycletime_fail
+ ("Need to know CPU frequency for effective stck unit");
+ speed_unittime = MAX (speed_cycletime, STCK_PERIOD);
+ DEFAULT (speed_precision, 10000);
+ }
+ else if (have_mftb && mftb_works_p ())
+ {
+ use_mftb = 1;
+ DEFAULT (speed_precision, 10000);
+ speed_unittime = mftb_unittime;
+ sprintf (speed_time_string, "mftb counter (%s)",
+ unittime_string (speed_unittime));
+ }
+ else if (have_sgi && sgi_works_p ())
+ {
+ use_sgi = 1;
+ DEFAULT (speed_precision, 10000);
+ speed_unittime = sgi_unittime;
+ sprintf (speed_time_string, "syssgi() mmap counter (%s), supplemented by millisecond getrusage()",
+ unittime_string (speed_unittime));
+ /* supplemented with getrusage, which we assume to have 1ms resolution */
+ use_grus = 1;
+ supplement_unittime = 1e-3;
+ }
+ else if (have_rrt)
+ {
+ timebasestruct_t t;
+ use_rrt = 1;
+ DEFAULT (speed_precision, 10000);
+ read_real_time (&t, sizeof(t));
+ switch (t.flag) {
+ case RTC_POWER:
+ /* FIXME: What's the actual RTC resolution? */
+ speed_unittime = 1e-7;
+ strcpy (speed_time_string, "read_real_time() power nanoseconds");
+ break;
+ case RTC_POWER_PC:
+ t.tb_high = 1;
+ t.tb_low = 0;
+ time_base_to_time (&t, sizeof(t));
+ speed_unittime = TIMEBASESTRUCT_SECS(&t) / M_2POW32;
+ sprintf (speed_time_string, "%s read_real_time() powerpc ticks",
+ unittime_string (speed_unittime));
+ break;
+ default:
+ fprintf (stderr, "ERROR: Unrecognised timebasestruct_t flag=%d\n",
+ t.flag);
+ abort ();
+ }
+ }
+ else if (have_cgt && cgt_works_p() && cgt_unittime < 1.5e-6)
+ {
+ /* use clock_gettime if microsecond or better resolution */
+ choose_cgt:
+ use_cgt = 1;
+ speed_unittime = cgt_unittime;
+ DEFAULT (speed_precision, (cgt_unittime <= 0.1e-6 ? 10000 : 1000));
+ strcpy (speed_time_string, "microsecond accurate clock_gettime()");
+ }
+ else if (have_times && clk_tck() > 1000000)
+ {
+ /* Cray vector systems have times() which is clock cycle resolution
+ (eg. 450 MHz). */
+ DEFAULT (speed_precision, 10000);
+ goto choose_times;
+ }
+ else if (have_grus && getrusage_microseconds_p() && ! getrusage_backwards_p())
+ {
+ use_grus = 1;
+ speed_unittime = grus_unittime = 1.0e-6;
+ DEFAULT (speed_precision, 1000);
+ strcpy (speed_time_string, "microsecond accurate getrusage()");
+ }
+ else if (have_gtod && gettimeofday_microseconds_p())
+ {
+ use_gtod = 1;
+ speed_unittime = gtod_unittime = 1.0e-6;
+ DEFAULT (speed_precision, 1000);
+ strcpy (speed_time_string, "microsecond accurate gettimeofday()");
+ }
+ else if (have_cgt && cgt_works_p() && cgt_unittime < 1.5/clk_tck())
+ {
+ /* use clock_gettime if 1 tick or better resolution */
+ goto choose_cgt;
+ }
+ else if (have_times)
+ {
+ use_tick_boundary = 1;
+ DEFAULT (speed_precision, 200);
+ choose_times:
+ use_times = 1;
+ speed_unittime = times_unittime = 1.0 / (double) clk_tck ();
+ sprintf (speed_time_string, "%s clock tick times()",
+ unittime_string (speed_unittime));
+ }
+ else if (have_grus)
+ {
+ use_grus = 1;
+ use_tick_boundary = 1;
+ speed_unittime = grus_unittime = 1.0 / (double) clk_tck ();
+ DEFAULT (speed_precision, 200);
+ sprintf (speed_time_string, "%s clock tick getrusage()\n",
+ unittime_string (speed_unittime));
+ }
+ else if (have_gtod)
+ {
+ use_gtod = 1;
+ use_tick_boundary = 1;
+ speed_unittime = gtod_unittime = 1.0 / (double) clk_tck ();
+ DEFAULT (speed_precision, 200);
+ sprintf (speed_time_string, "%s clock tick gettimeofday()",
+ unittime_string (speed_unittime));
+ }
+ else
+ {
+ fprintf (stderr, "No time measuring method available\n");
+ fprintf (stderr, "None of: speed_cyclecounter(), STCK(), getrusage(), gettimeofday(), times()\n");
+ abort ();
+ }
+
+ if (speed_option_verbose)
+ {
+ printf ("speed_time_init: %s\n", speed_time_string);
+ printf (" speed_precision %d\n", speed_precision);
+ printf (" speed_unittime %.2g\n", speed_unittime);
+ if (supplement_unittime)
+ printf (" supplement_unittime %.2g\n", supplement_unittime);
+ printf (" use_tick_boundary %d\n", use_tick_boundary);
+ if (have_cycles)
+ printf (" cycles_limit %.2g seconds\n", cycles_limit);
+ }
+}
+
+
+
+/* Burn up CPU until a clock tick boundary, for greater accuracy. Set the
+ corresponding "start_foo" appropriately too. */
+
+void
+grus_tick_boundary (void)
+{
+ struct_rusage prev;
+ getrusage (0, &prev);
+ do {
+ getrusage (0, &start_grus);
+ } while (start_grus.ru_utime.tv_usec == prev.ru_utime.tv_usec);
+}
+
+void
+gtod_tick_boundary (void)
+{
+ struct_timeval prev;
+ gettimeofday (&prev, NULL);
+ do {
+ gettimeofday (&start_gtod, NULL);
+ } while (start_gtod.tv_usec == prev.tv_usec);
+}
+
+void
+times_tick_boundary (void)
+{
+ struct_tms prev;
+ times (&prev);
+ do
+ times (&start_times);
+ while (start_times.tms_utime == prev.tms_utime);
+}
+
+
+/* "have_" values are tested to let unused code go dead. */
+
+void
+speed_starttime (void)
+{
+ speed_time_init ();
+
+ if (have_grus && use_grus)
+ {
+ if (use_tick_boundary)
+ grus_tick_boundary ();
+ else
+ getrusage (0, &start_grus);
+ }
+
+ if (have_gtod && use_gtod)
+ {
+ if (use_tick_boundary)
+ gtod_tick_boundary ();
+ else
+ gettimeofday (&start_gtod, NULL);
+ }
+
+ if (have_times && use_times)
+ {
+ if (use_tick_boundary)
+ times_tick_boundary ();
+ else
+ times (&start_times);
+ }
+
+ if (have_cgt && use_cgt)
+ clock_gettime (CGT_ID, &start_cgt);
+
+ if (have_rrt && use_rrt)
+ read_real_time (&start_rrt, sizeof(start_rrt));
+
+ if (have_sgi && use_sgi)
+ start_sgi = *sgi_addr;
+
+ if (have_mftb && use_mftb)
+ MFTB (start_mftb);
+
+ if (have_stck && use_stck)
+ STCK (start_stck);
+
+ /* Cycles sampled last for maximum accuracy. */
+ if (have_cycles && use_cycles)
+ speed_cyclecounter (start_cycles);
+}
+
+
+/* Calculate the difference between two cycle counter samples, as a "double"
+ counter of cycles.
+
+ The start and end values are allowed to cancel in integers in case the
+ counter values are bigger than the 53 bits that normally fit in a double.
+
+ This works even if speed_cyclecounter() puts a value bigger than 32-bits
+ in the low word (the high word always gets a 2**32 multiplier though). */
+
+double
+speed_cyclecounter_diff (const unsigned end[2], const unsigned start[2])
+{
+ unsigned d;
+ double t;
+
+ if (have_cycles == 1)
+ {
+ t = (end[0] - start[0]);
+ }
+ else
+ {
+ d = end[0] - start[0];
+ t = d - (d > end[0] ? M_2POWU : 0.0);
+ t += (end[1] - start[1]) * M_2POW32;
+ }
+ return t;
+}
+
+
+double
+speed_mftb_diff (const unsigned end[2], const unsigned start[2])
+{
+ unsigned d;
+ double t;
+
+ d = end[0] - start[0];
+ t = (double) d - (d > end[0] ? M_2POW32 : 0.0);
+ t += (end[1] - start[1]) * M_2POW32;
+ return t;
+}
+
+
+/* Calculate the difference between "start" and "end" using fields "sec" and
+ "psec", where each "psec" is a "punit" of a second.
+
+ The seconds parts are allowed to cancel before being combined with the
+ psec parts, in case a simple "sec+psec*punit" exceeds the precision of a
+ double.
+
+ Total time is only calculated in a "double" since an integer count of
+ psecs might overflow. 2^32 microseconds is only a bit over an hour, or
+ 2^32 nanoseconds only about 4 seconds.
+
+ The casts to "long" are for the benefit of timebasestruct_t, where the
+ fields are only "unsigned int", but we want a signed difference. */
+
+#define DIFF_SECS_ROUTINE(sec, psec, punit) \
+ { \
+ long sec_diff, psec_diff; \
+ sec_diff = (long) end->sec - (long) start->sec; \
+ psec_diff = (long) end->psec - (long) start->psec; \
+ return (double) sec_diff + punit * (double) psec_diff; \
+ }
+
+double
+timeval_diff_secs (const struct_timeval *end, const struct_timeval *start)
+{
+ DIFF_SECS_ROUTINE (tv_sec, tv_usec, 1e-6);
+}
+
+double
+rusage_diff_secs (const struct_rusage *end, const struct_rusage *start)
+{
+ DIFF_SECS_ROUTINE (ru_utime.tv_sec, ru_utime.tv_usec, 1e-6);
+}
+
+double
+timespec_diff_secs (const struct_timespec *end, const struct_timespec *start)
+{
+ DIFF_SECS_ROUTINE (tv_sec, tv_nsec, 1e-9);
+}
+
+/* This is for use after time_base_to_time, ie. for seconds and nanoseconds. */
+double
+timebasestruct_diff_secs (const timebasestruct_t *end,
+ const timebasestruct_t *start)
+{
+ DIFF_SECS_ROUTINE (tb_high, tb_low, 1e-9);
+}
+
+
+double
+speed_endtime (void)
+{
+#define END_USE(name,value) \
+ do { \
+ if (speed_option_verbose >= 3) \
+ printf ("speed_endtime(): used %s\n", name); \
+ result = value; \
+ goto done; \
+ } while (0)
+
+#define END_ENOUGH(name,value) \
+ do { \
+ if (speed_option_verbose >= 3) \
+ printf ("speed_endtime(): %s gives enough precision\n", name); \
+ result = value; \
+ goto done; \
+ } while (0)
+
+#define END_EXCEED(name,value) \
+ do { \
+ if (speed_option_verbose >= 3) \
+ printf ("speed_endtime(): cycle counter limit exceeded, used %s\n", \
+ name); \
+ result = value; \
+ goto done; \
+ } while (0)
+
+ unsigned end_cycles[2];
+ stck_t end_stck;
+ unsigned end_mftb[2];
+ unsigned end_sgi;
+ timebasestruct_t end_rrt;
+ struct_timespec end_cgt;
+ struct_timeval end_gtod;
+ struct_rusage end_grus;
+ struct_tms end_times;
+ double t_gtod, t_grus, t_times, t_cgt;
+ double t_rrt, t_sgi, t_mftb, t_stck, t_cycles;
+ double result;
+
+ /* Cycles sampled first for maximum accuracy.
+ "have_" values tested to let unused code go dead. */
+
+ if (have_cycles && use_cycles) speed_cyclecounter (end_cycles);
+ if (have_stck && use_stck) STCK (end_stck);
+ if (have_mftb && use_mftb) MFTB (end_mftb);
+ if (have_sgi && use_sgi) end_sgi = *sgi_addr;
+ if (have_rrt && use_rrt) read_real_time (&end_rrt, sizeof(end_rrt));
+ if (have_cgt && use_cgt) clock_gettime (CGT_ID, &end_cgt);
+ if (have_gtod && use_gtod) gettimeofday (&end_gtod, NULL);
+ if (have_grus && use_grus) getrusage (0, &end_grus);
+ if (have_times && use_times) times (&end_times);
+
+ result = -1.0;
+
+ if (speed_option_verbose >= 4)
+ {
+ printf ("speed_endtime():\n");
+ if (use_cycles)
+ printf (" cycles 0x%X,0x%X -> 0x%X,0x%X\n",
+ start_cycles[1], start_cycles[0],
+ end_cycles[1], end_cycles[0]);
+
+ if (use_stck)
+ printf (" stck 0x%lX -> 0x%lX\n", start_stck, end_stck);
+
+ if (use_mftb)
+ printf (" mftb 0x%X,%08X -> 0x%X,%08X\n",
+ start_mftb[1], start_mftb[0],
+ end_mftb[1], end_mftb[0]);
+
+ if (use_sgi)
+ printf (" sgi 0x%X -> 0x%X\n", start_sgi, end_sgi);
+
+ if (use_rrt)
+ printf (" read_real_time (%d)%u,%u -> (%d)%u,%u\n",
+ start_rrt.flag, start_rrt.tb_high, start_rrt.tb_low,
+ end_rrt.flag, end_rrt.tb_high, end_rrt.tb_low);
+
+ if (use_cgt)
+ printf (" clock_gettime %ld.%09ld -> %ld.%09ld\n",
+ (long) start_cgt.tv_sec, (long) start_cgt.tv_nsec,
+ (long) end_cgt.tv_sec, (long) end_cgt.tv_nsec);
+
+ if (use_gtod)
+ printf (" gettimeofday %ld.%06ld -> %ld.%06ld\n",
+ (long) start_gtod.tv_sec, (long) start_gtod.tv_usec,
+ (long) end_gtod.tv_sec, (long) end_gtod.tv_usec);
+
+ if (use_grus)
+ printf (" getrusage %ld.%06ld -> %ld.%06ld\n",
+ (long) start_grus.ru_utime.tv_sec,
+ (long) start_grus.ru_utime.tv_usec,
+ (long) end_grus.ru_utime.tv_sec,
+ (long) end_grus.ru_utime.tv_usec);
+
+ if (use_times)
+ printf (" times %ld -> %ld\n",
+ start_times.tms_utime, end_times.tms_utime);
+ }
+
+ if (use_rrt)
+ {
+ time_base_to_time (&start_rrt, sizeof(start_rrt));
+ time_base_to_time (&end_rrt, sizeof(end_rrt));
+ t_rrt = timebasestruct_diff_secs (&end_rrt, &start_rrt);
+ END_USE ("read_real_time()", t_rrt);
+ }
+
+ if (use_cgt)
+ {
+ t_cgt = timespec_diff_secs (&end_cgt, &start_cgt);
+ END_USE ("clock_gettime()", t_cgt);
+ }
+
+ if (use_grus)
+ {
+ t_grus = rusage_diff_secs (&end_grus, &start_grus);
+
+ /* Use getrusage() if the cycle counter limit would be exceeded, or if
+ it provides enough accuracy already. */
+ if (use_cycles)
+ {
+ if (t_grus >= speed_precision*grus_unittime)
+ END_ENOUGH ("getrusage()", t_grus);
+ if (t_grus >= cycles_limit)
+ END_EXCEED ("getrusage()", t_grus);
+ }
+ }
+
+ if (use_times)
+ {
+ t_times = (end_times.tms_utime - start_times.tms_utime) * times_unittime;
+
+ /* Use times() if the cycle counter limit would be exceeded, or if
+ it provides enough accuracy already. */
+ if (use_cycles)
+ {
+ if (t_times >= speed_precision*times_unittime)
+ END_ENOUGH ("times()", t_times);
+ if (t_times >= cycles_limit)
+ END_EXCEED ("times()", t_times);
+ }
+ }
+
+ if (use_gtod)
+ {
+ t_gtod = timeval_diff_secs (&end_gtod, &start_gtod);
+
+ /* Use gettimeofday() if it measured a value bigger than the cycle
+ counter can handle. */
+ if (use_cycles)
+ {
+ if (t_gtod >= cycles_limit)
+ END_EXCEED ("gettimeofday()", t_gtod);
+ }
+ }
+
+ if (use_mftb)
+ {
+ t_mftb = speed_mftb_diff (end_mftb, start_mftb) * mftb_unittime;
+ END_USE ("mftb", t_mftb);
+ }
+
+ if (use_stck)
+ {
+ t_stck = (end_stck - start_stck) * STCK_PERIOD;
+ END_USE ("stck", t_stck);
+ }
+
+ if (use_sgi)
+ {
+ t_sgi = (end_sgi - start_sgi) * sgi_unittime;
+ END_USE ("SGI hardware counter", t_sgi);
+ }
+
+ if (use_cycles)
+ {
+ t_cycles = speed_cyclecounter_diff (end_cycles, start_cycles)
+ * speed_cycletime;
+ END_USE ("cycle counter", t_cycles);
+ }
+
+ if (use_grus && getrusage_microseconds_p())
+ END_USE ("getrusage()", t_grus);
+
+ if (use_gtod && gettimeofday_microseconds_p())
+ END_USE ("gettimeofday()", t_gtod);
+
+ if (use_times) END_USE ("times()", t_times);
+ if (use_grus) END_USE ("getrusage()", t_grus);
+ if (use_gtod) END_USE ("gettimeofday()", t_gtod);
+
+ fprintf (stderr, "speed_endtime(): oops, no time method available\n");
+ abort ();
+
+ done:
+ if (result < 0.0)
+ {
+ if (speed_option_verbose >= 2)
+ fprintf (stderr, "speed_endtime(): warning, treating negative time as zero: %.9f\n", result);
+ result = 0.0;
+ }
+ return result;
+}