/*
 * a timing utilities library
 *
 * Requires 64bit integers to work.
 *
 * $Id: lib_timing.c,v 1.28 1997/11/20 05:10:51 lm Exp $
 *
 * Copyright (c) 1994-1996 Larry McVoy.
 */
#define	 _LIB /* bench.h needs this */
#include "bench.h"

#define	nz(x)	((x) == 0 ? 1 : (x))

/*
 * I know you think these should be 2^10 and 2^20, but people are quoting
 * disk sizes in powers of 10, and bandwidths are all power of ten.
 * Deal with it.
 */
#define	MB	(1000*1000.0)
#define	KB	(1000.0)

static struct timeval start_tv, stop_tv, dummy_tv;
FILE	*ftiming = stderr;
uint64		use_result_dummy;	/* !static for optimizers. */
static	uint64	iterations;
static	void	init_timing(void);


#ifndef	hpux
#define RUSAGE
#endif
#ifdef	RUSAGE
#include <sys/resource.h>
#define	SECS(tv)	(tv.tv_sec + tv.tv_usec / 1000000.0)
#define	mine(f)		(int)(ru_stop.f - ru_start.f)

static struct rusage ru_start, ru_stop;

void
rusage(void)
{
	double  sys, user, idle;
	double  per;

	sys = SECS(ru_stop.ru_stime) - SECS(ru_start.ru_stime);
	user = SECS(ru_stop.ru_utime) - SECS(ru_start.ru_utime);
	idle = timespent() - (sys + user);
	per = idle / timespent() * 100;
	fprintf(ftiming, "real=%.2f sys=%.2f user=%.2f idle=%.2f stall=%.0f%% ",
	    timespent(), sys, user, idle, per);
	fprintf(ftiming, "rd=%d wr=%d min=%d maj=%d ctx=%d\n",
	    mine(ru_inblock), mine(ru_oublock),
	    mine(ru_minflt), mine(ru_majflt),
	    mine(ru_nvcsw) + mine(ru_nivcsw));
}

#endif	/* RUSAGE */
/*
 * Redirect output someplace else.
 */
void
timing(FILE *out)
{
	ftiming = out;
}

/*
 * Start ftiming now.
 */
void
start(struct timeval *tv)
{
	if (tv == NULL) {
		tv = &start_tv;
	}
#ifdef	RUSAGE
	getrusage(RUSAGE_SELF, &ru_start);
#endif
	(void) gettimeofday(tv, (struct timezone *) 0);
}

/*
 * Stop ftiming and return real time in microseconds.
 */
uint64
stop(struct timeval *begin, struct timeval *end)
{
	struct timeval tdiff;
	uint64	m;

	if (end == NULL) {
		end = &stop_tv;
	}
	(void) gettimeofday(end, (struct timezone *) 0);
#ifdef	RUSAGE
	getrusage(RUSAGE_SELF, &ru_stop);
#endif

	if (begin == NULL) {
		begin = &start_tv;
	}
	tvsub(&tdiff, end, begin);
	m = tdiff.tv_sec;
	m *= 1000000;
	m += tdiff.tv_usec;
	return (m);
}

uint64
now(void)
{
	struct timeval t;
	uint64	m;

	(void) gettimeofday(&t, (struct timezone *) 0);
	m = t.tv_sec;
	m *= 1000000;
	m += t.tv_usec;
	return (m);
}

double
Now(void)
{
	struct timeval t;

	(void) gettimeofday(&t, (struct timezone *) 0);
	return (t.tv_sec * 1000000.0 + t.tv_usec);
}

uint64
delta(void)
{
	static struct timeval last;
	struct timeval t;
	struct timeval diff;
	uint64	m;

	(void) gettimeofday(&t, (struct timezone *) 0);
	if (last.tv_usec) {
		tvsub(&diff, &t, &last);
		last = t;
		m = diff.tv_sec;
		m *= 1000000;
		m += diff.tv_usec;
		return (m);
	} else {
		last = t;
		return (0);
	}
}

double
Delta(void)
{
	struct timeval t;
	struct timeval diff;

	(void) gettimeofday(&t, (struct timezone *) 0);
	tvsub(&diff, &t, &start_tv);
	return (diff.tv_sec + diff.tv_usec / 1000000.0);
}

void
save_n(uint64 n)
{
	iterations = n;
}

uint64
get_n(void)
{
	return (iterations);
}

/*
 * Make the time spend be usecs.
 */
void
settime(uint64 usecs)
{
	bzero((void*)&start_tv, sizeof(start_tv));
	stop_tv.tv_sec = usecs / 1000000;
	stop_tv.tv_usec = usecs % 1000000;
}

void
bandwidth(uint64 bytes, uint64 times, int verbose)
{
	struct timeval tdiff;
	double  mb, secs;

	tvsub(&tdiff, &stop_tv, &start_tv);
	secs = tdiff.tv_sec;
	secs *= 1000000;
	secs += tdiff.tv_usec;
	secs /= 1000000;
	secs /= times;
	mb = bytes / MB;
	if (verbose) {
		(void) fprintf(ftiming,
		    "%.4f MB in %.4f secs, %.4f MB/sec\n",
		    mb, secs, mb/secs);
	} else {
		if (mb < 1) {
			(void) fprintf(ftiming, "%.6f ", mb);
		} else {
			(void) fprintf(ftiming, "%.2f ", mb);
		}
		if (mb / secs < 1) {
			(void) fprintf(ftiming, "%.6f\n", mb/secs);
		} else {
			(void) fprintf(ftiming, "%.2f\n", mb/secs);
		}
	}
}

void
kb(uint64 bytes)
{
	struct timeval td;
	double  s, bs;

	tvsub(&td, &stop_tv, &start_tv);
	s = td.tv_sec + td.tv_usec / 1000000.0;
	bs = bytes / nz(s);
	(void) fprintf(ftiming, "%.0f KB/sec\n", bs / KB);
}

void
mb(uint64 bytes)
{
	struct timeval td;
	double  s, bs;

	tvsub(&td, &stop_tv, &start_tv);
	s = td.tv_sec + td.tv_usec / 1000000.0;
	bs = bytes / nz(s);
	(void) fprintf(ftiming, "%.2f MB/sec\n", bs / MB);
}

void
latency(uint64 xfers, uint64 size)
{
	struct timeval td;
	double  s;

	tvsub(&td, &stop_tv, &start_tv);
	s = td.tv_sec + td.tv_usec / 1000000.0;
	if (xfers > 1) {
		fprintf(ftiming, "%d %dKB xfers in %.2f secs, ",
		    (int) xfers, (int) (size / KB), s);
	} else {
		fprintf(ftiming, "%.1fKB in ", size / KB);
	}
	if ((s * 1000 / xfers) > 100) {
		fprintf(ftiming, "%.0f millisec%s, ",
		    s * 1000 / xfers, xfers > 1 ? "/xfer" : "s");
	} else {
		fprintf(ftiming, "%.4f millisec%s, ",
		    s * 1000 / xfers, xfers > 1 ? "/xfer" : "s");
	}
	if (((xfers * size) / (MB * s)) > 1) {
		fprintf(ftiming, "%.2f MB/sec\n", (xfers * size) / (MB * s));
	} else {
		fprintf(ftiming, "%.2f KB/sec\n", (xfers * size) / (KB * s));
	}
}

void
context(uint64 xfers)
{
	struct timeval td;
	double  s;

	tvsub(&td, &stop_tv, &start_tv);
	s = td.tv_sec + td.tv_usec / 1000000.0;
	fprintf(ftiming,
	    "%d context switches in %.2f secs, %.0f microsec/switch\n",
	    (int)xfers, s, s * 1000000 / xfers);
}

void
nano(char *s, uint64 n)
{
	struct timeval td;
	double  micro;

	tvsub(&td, &stop_tv, &start_tv);
	micro = td.tv_sec * 1000000 + td.tv_usec;
	micro *= 1000;
	fprintf(ftiming, "%s: %.0f nanoseconds\n", s, micro / n);
}

void
micro(char *s, uint64 n)
{
	struct timeval td;
	double	micro;

	tvsub(&td, &stop_tv, &start_tv);
	micro = td.tv_sec * 1000000 + td.tv_usec;
	micro /= n;
	fprintf(ftiming, "%s: %.4f microseconds\n", s, micro);
#if 0
	if (micro >= 100) {
		fprintf(ftiming, "%s: %.1f microseconds\n", s, micro);
	} else if (micro >= 10) {
		fprintf(ftiming, "%s: %.3f microseconds\n", s, micro);
	} else {
		fprintf(ftiming, "%s: %.4f microseconds\n", s, micro);
	}
#endif
}

void
micromb(uint64 sz, uint64 n)
{
	struct timeval td;
	double	mb, micro;

	tvsub(&td, &stop_tv, &start_tv);
	micro = td.tv_sec * 1000000 + td.tv_usec;
	micro /= n;
	mb = sz;
	mb /= MB;
	if (micro >= 10) {
		fprintf(ftiming, "%.6f %.0f\n", mb, micro);
	} else {
		fprintf(ftiming, "%.6f %.3f\n", mb, micro);
	}
}

void
milli(char *s, uint64 n)
{
	struct timeval td;
	uint64 milli;

	tvsub(&td, &stop_tv, &start_tv);
	milli = td.tv_sec * 1000 + td.tv_usec / 1000;
	milli /= n;
	fprintf(ftiming, "%s: %d milliseconds\n", s, (int)milli);
}

void
ptime(uint64 n)
{
	struct timeval td;
	double  s;

	tvsub(&td, &stop_tv, &start_tv);
	s = td.tv_sec + td.tv_usec / 1000000.0;
	fprintf(ftiming,
	    "%d in %.2f secs, %.0f microseconds each\n",
	    (int)n, s, s * 1000000 / n);
}

uint64
tvdelta(struct timeval *start, struct timeval *stop)
{
	struct timeval td;
	uint64	usecs;

	tvsub(&td, stop, start);
	usecs = td.tv_sec;
	usecs *= 1000000;
	usecs += td.tv_usec;
	return (usecs);
}

void
tvsub(struct timeval * tdiff, struct timeval * t1, struct timeval * t0)
{
	tdiff->tv_sec = t1->tv_sec - t0->tv_sec;
	tdiff->tv_usec = t1->tv_usec - t0->tv_usec;
	if (tdiff->tv_usec < 0)
		tdiff->tv_sec--, tdiff->tv_usec += 1000000;
}

uint64
gettime(void)
{
	return (tvdelta(&start_tv, &stop_tv));
}

double
timespent(void)
{
	struct timeval td;

	tvsub(&td, &stop_tv, &start_tv);
	return (td.tv_sec + td.tv_usec / 1000000.0);
}

static	char	p64buf[10][20];
static	int	n;

char	*
p64(uint64 big)
{
	char	*s = p64buf[n++];

	if (n == 10) n = 0;
#ifdef  linux
	{
        int     *a = (int*)&big;

        if (a[1]) {
                sprintf(s, "0x%x%08x", a[1], a[0]);
        } else {
                sprintf(s, "0x%x", a[0]);
        }
	}
#endif
#ifdef	__sgi
        sprintf(s, "0x%llx", big);
#endif
	return (s);
}

char	*
p64sz(uint64 big)
{
	double	d = big;
	char	*tags = " KMGTPE";
	int	t = 0;
	char	*s = p64buf[n++];

	if (n == 10) n = 0;
	while (d > 512) t++, d /= 1024;
	if (d == 0) {
		return ("0");
	}
	if (d < 100) {
		sprintf(s, "%.4f%c", d, tags[t]);
	} else {
		sprintf(s, "%.2f%c", d, tags[t]);
	}
	return (s);
}

char
last(char *s)
{
	while (*s++)
		;
	return (s[-2]);
}

int
bytes(char *s)
{
	int	n = atoi(s);

	if ((last(s) == 'k') || (last(s) == 'K'))
		n *= 1024;
	if ((last(s) == 'm') || (last(s) == 'M'))
		n *= (1024 * 1024);
	return (n);
}

void
use_int(int result) { use_result_dummy += result; }

void
use_pointer(void *result) { use_result_dummy += (int)result; }

void
insertinit(result_t *r, int n)
{
	int	i;

	for (i = 0; i < n; r[i++].u = 0);
}

/* biggest to smallest */
void
insertsort(uint64 u, uint64 n, result_t *r, int l)
{
	int	i, j;

	for (i = 0; i < l; ++i) {
		if (u > r[i].u) {
			for (j = l-1; j > i; --j) {
				r[j] = r[j-1];
			}
			r[i].u = u;
			r[i].n = n;
			return;
		}
	}
}
/*
 * Fake one for timing_overhead
 */
static void
timing_start(struct timeval *tv)
{
	if (tv == NULL) {
		tv = &dummy_tv;
	}
	(void) gettimeofday(tv, (struct timezone *) 0);
}

#define	LTRIES	(TRIES * 1)
/*
 * The inner loop tracks bench.h but uses a different results array.
 */
static long *
one_op(register long *p)
{
	result_t r[LTRIES];
	int	i;

	insertinit(r, LTRIES);
	for (i = 0; i < LTRIES; ++i) {
		BENCH1(p = (long *)*p, 0);
		insertsort(gettime(), get_n(), r, LTRIES);
	}
	/*
	for (i = 0; i < LTRIES; ++i) 
		printf("%d\t%d\t%f%s\n",
		    (int)r[i].n, (int)r[i].u, (double)r[i].u/r[i].n,
		    i==LTRIES/2?" *":"");
	*/
	save_n(r[LTRIES/2].n); settime(r[LTRIES/2].u);
	return (p);
}

static long *
two_op(register long *p, register long *q)
{
	result_t r[LTRIES];
	int	i;

	insertinit(r, LTRIES);
	for (i = 0; i < LTRIES; ++i) {
		BENCH1(p = (long *)*q; q = (long*)*p, 0);
		insertsort(gettime(), get_n(), r, LTRIES);
	}
	/*
	for (i = 0; i < LTRIES; ++i) 
		printf("%d\t%d\t%f%s\n",
		    (int)r[i].n, (int)r[i].u, (double)r[i].u/r[i].n,
		    i==LTRIES/2?" *":"");
	*/
	save_n(r[LTRIES/2].n); settime(r[LTRIES/2].u);
	return (p);
}

static long	*p = (long *)&p;
static long	*q = (long *)&q;

/*
 * Carl - this was busted in the follwing ways:
 *	The compiler optimized the p = *, p = *p into one op.
 *	We want the variables to be statics at fixed addresses in each run.
 *	I also changed it to take the median, not the min.
 */
double
l_overhead(void)
{
	uint64	u1, u2, n1, n2;
	double	tmp;
	static	double o;
	static	int done = 0;

	init_timing();
	if (done) { return (o); }

	done = 1;
	if (getenv("LOOP_O")) {
		o = atof(getenv("LOOP_O"));
		return (o);
	}
	use_pointer((void *)one_op(p));
	u1 = gettime();
	n1 = get_n();
	use_pointer((void *)two_op(p, q));
	u2 = gettime();
	n2 = get_n();
	/*
	fprintf(stderr, "%lld %lld  %f  %lld  %lld  %f  ",
	    u1,n1,(double)u1/n1,u2,n2,(double)u2/n2);
	fprintf(stderr, "%f  %f  ", (double)u1/n1,(double)u2/n2);
    	*/

	/*
	 * u1 = (n1 * (overhead + work))
	 * u2 = (n2 * (overhead + 2 * work))
	 */
	o = 2 * u1;
	o /= n1;
	tmp = u2;
	tmp /= n2;
	o -= tmp;
	if (o < 0) o = 0;	/* Gag */
	return (o);
}
/*
 * Figure out the timing overhead.  This has to track bench.h
 */
double
t_overhead(void)
{
	uint64		N, usecs;
	static int	initialized = 0;
	static double	overhead = 0.0;

	init_timing();
	if (overhead == 0.0) {
		if (getenv("TIMING_O")) {
			overhead = atof(getenv("TIMING_O"));
		} else if (!initialized) {
			initialized = 1;
			if (get_enough(0) > 50000) {
				/* it is in the noise, so ignore it */
				overhead = 0.0;
			} else {
				N = get_n(); usecs = gettime();
				BENCH(timing_start(0), 0);
				overhead = gettime();
				overhead /= get_n();
				save_n(N); settime(usecs);
			}
		}
	}
	return overhead;
}

/*
 * Figure out how long to run it.
 * If enough == 0, then they want us to figure it out.
 * If enough is !0 then return it unless we think it is too short.
 */
static	int	long_enough;
static	int	compute_enough();

int
get_enough(int e)
{
	init_timing();
	return (long_enough > e ? long_enough : e);
}


static void
init_timing(void)
{
	static	int done = 0;

	if (done) return;
	done = 1;
	long_enough = compute_enough();
	t_overhead();
	l_overhead();
}

typedef long TYPE;

static TYPE **
enough_duration(register long N, register TYPE ** p)
{
#define	ENOUGH_DURATION_TEN(one)	one one one one one one one one one one
	while (N-- > 0) {
		ENOUGH_DURATION_TEN(p = (TYPE **) *p;);
	}
	return p;
}

static int
duration(int N)
{
	int     usecs;
	TYPE   *x = (TYPE *)&x;
	TYPE  **p = (TYPE **)&x;

	start(0);
	p = enough_duration(N, p);
	usecs = stop(0, 0);
	use_pointer((void *)p);
	return usecs;
}

/*
 * find the median time that work "N" takes in "tries" tests
 */
static int
time_N(long N)
{
	int     i, usecs;
	result_t r[LTRIES];

	insertinit(r, LTRIES);
	for (i = 1; i < LTRIES; ++i) {
		insertsort(duration(N), N, r, LTRIES);
	}
	return r[TRIES/2].u;
}

/*
 * return the amount of work needed to run "enough" microseconds
 */
static int
find_N(int enough)
{
	int	tries;
	static int N = 10000;
	static int usecs = 0;

	if (usecs = 0) usecs = time_N(N);

	for (tries = 0; tries < 10; ++tries) {
		if (0.98 * enough < usecs && usecs < 1.02 * enough)
			return N;
		if (usecs < 1000)
			N *= 10;
		else {
			double  n = N;

			n /= usecs;
			n *= enough;
			N = n + 1;
		}
		usecs = time_N(N);
	}
	return -1;
}

/*
 * We want to verify that small modifications proportionally affect the runtime
 */
#define	POINTS	4
static int
test_time(int enough)
{
	int     i, j, N = find_N(enough);
	int     n[POINTS], usecs[POINTS];
	int	expected[POINTS];
	double  rate[POINTS];

	if (N <= 0)
		return 0;

	for (i = 0; i < POINTS; ++i) {
		double fraction = 1.0 + (double) i * 0.005;
		n[i] = (int)((double) N * fraction);
		usecs[i] = time_N(n[i]);
		expected[i] = (int)((double)usecs[0] * fraction);
		rate[i] = (double) usecs[i] / n[i];
		if (ABS(enough - usecs[i]) * fraction / (double)enough > 0.03)
			return 0;
		for (j = 0; j < i; ++j) {
			if (ABS(rate[i] - rate[j]) > 0.001)
				return 0;
		}
	}
	return 1;
}

static	int     possibilities[] = { 5000, 10000, 50000, 100000 };

/*
 * We want to find the smallest timing interval that has accurate timing
 */
static int
compute_enough()
{
	int     i;

	if (getenv("ENOUGH")) {
		return (atoi(getenv("ENOUGH")));
	}
	for (i = 0; i < sizeof(possibilities) / sizeof(int); ++i) {
		if (test_time(possibilities[i]))
			return possibilities[i];
	}

	/* 
	 * if we can't find a timing interval that is sufficient, 
	 * then use SHORT as a default.
	 */
	return SHORT;
}

/*
 * This stuff isn't really lib_timing, but ...
 */
void
morefds(void)
{
#ifdef	RLIMIT_NOFILE
	struct	rlimit r;

	getrlimit(RLIMIT_NOFILE, &r);
	r.rlim_cur = r.rlim_max;
	setrlimit(RLIMIT_NOFILE, &r);
#endif
}

touch(char *buf, int nbytes)
{
	static	psize;

	if (!psize) {
		psize = getpagesize();
	}
	while (nbytes > 0) {
		*buf = 1;
		buf += psize;
		nbytes -= psize;
	}
}
