/* init.c - MemTest-86  Version 2.9
 *
 * Released under version 2 of the Gnu Public License.
 * By Chris Brady, cbrady@sgi.com
 */

#include "test.h"
#include "defs.h"
#include "config.h"
#include "io.h"

#define PARAM   ((unsigned char *)&mem_info)
#define EXT_MEM_K (*(unsigned short *) (PARAM+E88))
#define ALT_MEM_K (*(ulong *) (PARAM+E801))
#define E820_MAP_NR (*(char*) (PARAM+E820NR))
#define E820_MAP    ((struct e820entry *) (PARAM+E820MAP))

extern struct vars *v;
extern short serial_cons;

char mem_info[800];
short debug = 0;
struct cpu_ident cpu_id;
ulong st_low, st_high;
ulong end_low, end_high;
ulong cal_low, cal_high;

/*
 * Initialize test, setup screen and find out how much memory there is.
 */
void init()
{
	int i;
	volatile char *pp;
	struct e820entry *bm;

	outb(0x8, 0x3f2);  /* Kill Floppy Motor */

	serial_echo_init();
        serial_echo_print("[LINE_SCROLL;24r"); /* Set scroll area row 7-23 */
        serial_echo_print("[H[2J");   /* Clear Screen */
        serial_echo_print("[37m[44m");
        serial_echo_print("[0m");
        serial_echo_print("[37m[44m");

	/* Clear screen & set background to blue */
	for(i=0, pp=(char *)(SCREEN_ADR); i<80*24; i++) {
		*pp++ = ' ';
		*pp++ = 0x17;
	}

	/* Make the name background red */
	for(i=0, pp=(char *)(SCREEN_ADR+1); i<TITLE_WIDTH; i++, pp+=2) {
		*pp = 0x47;
	}
	cprint(0, 0, "      Memtest-86 v2.9       ");

	/* Do reverse video for the bottom display line */
	for(i=0, pp=(char *)(SCREEN_ADR+1+(24 * 160)); i<80; i++, pp+=2) {
		*pp = 0x71;
	}

        serial_echo_print("[0m");

	/* Make a copy of the memory info table so that we can re-evaluate */
	/* The memory map later */
	v->ext_mem_k = EXT_MEM_K;
	v->alt_mem_k = ALT_MEM_K;
	v->e820_nr = E820_MAP_NR;
	for (i=0, bm = E820_MAP; i<E820_MAP_NR; i++) {
		v->e820[i].addr = bm[i].addr;
		v->e820[i].size = bm[i].size;
		v->e820[i].type = bm[i].type;
	}
	
	/* Determine the memory map */
	v->memsz_mode = SZ_MODE_BIOS;
	mem_size();

	v->test = 0;
	v->testsel = -1;
	v->msg_line = LINE_SCROLL-1;
	v->scroll_start = v->msg_line * 160;

	cprint(LINE_CPU+1, 0, "L1 Cache - Unknown");
	cprint(LINE_CPU+2, 0, "L2 Cache - Unknown");
	cprint(LINE_CPU+3, 0, "Memory ");
	aprint(LINE_CPU+3, 10, v->test_mem);

	cpu_type();

	if (v->rdtsc) {
		cacheable();
		cprint(LINE_TIME, 0, "Elapsed Time");
		cprint(LINE_TIME, COL_TIME+4, ":  :");
	}
	cprint(0, COL_MID,"Pass   %");
	cprint(1, COL_MID,"Test   %");
	cprint(2, COL_MID,"Test #");
	cprint(3, COL_MID,"Testing: ");
	cprint(5, COL_MID,"MemoryMap  Cache    Pattern    Test  Pass  Errors");
	cprint(6, COL_MID,"---------  -----  -----------  ----  ----  ------");
	cprint(6, 0,
"WallTime  Cached  RsvdMem  MemoryMap  Cache    Pattern    Test  Pass  Errors");
	cprint(7, 0,
"--------  ------  -------  ---------  -----  -----------  ----  ----  ------");
	cprint(LINE_INFO, COL_TST, "Std");
	cprint(LINE_INFO, COL_PASS, "    0");
	cprint(LINE_INFO, COL_ERR, "     0");
	cprint(LINE_INFO+1, 0, "------------------------------------------------------------------------------");

	for(i=0; i <= LINE_INFO; i++) {
		cprint(i, COL_MID-2, "| ");
	}
	footer();
	v->printmode=PRINTMODE_ADDRESSES;
	v->numpatn=0;
	find_ticks();
}

/*
 * Find CPU type and cache sizes
 */
void cpu_type()
{
	int i, off=0;
	int l1_cache=0, l2_cache=0;
	ulong speed;

	v->rdtsc = 0;

#ifdef CPUID_DEBUG
	dprint(9,0,cpu_id.type,3,1);
	dprint(10,0,cpu_id.model,3,1);
	dprint(11,0,cpu_id.cpuid,3,1);
#endif

	/* If the CPUID instruction is not supported then this is */
	/* a 386, 486 or one of the early Cyrix CPU's */
	if (cpu_id.cpuid < 1) {
		switch (cpu_id.type) {
		case 2:
			/* This is a Cyrix CPU without CPUID */
			i = getCx86(0xfe);
			i &= 0xf0;
			i >>= 4;
			switch(i) {
			case 0:
			case 1:
				cprint(LINE_CPU, 0, "Cyrix Cx486");
				break;
			case 2:
				cprint(LINE_CPU, 0,"Cyrix 5x86");
				break;
			case 3:
				cprint(LINE_CPU, 0,"Cyrix 6x86");
				break;
			case 4:
				cprint(LINE_CPU, 0,"Cyrix MediaGX");
				break;
			case 5:
				cprint(LINE_CPU, 0,"Cyrix 6x86MX");
				break;
			case 6:
				cprint(LINE_CPU, 0,"Cyrix MII");
				break;
			default:
				cprint(LINE_CPU, 0,"Cyrix ???");
				break;
			}
			break;
		case 3:
			cprint(LINE_CPU, 0, "386");
			break;

			cprint(LINE_CPU, 0, "386");
			break;

		case 4:
			cprint(LINE_CPU, 0, "486");
			l1_cache = 8;
			break;
		}
		return;
	}

	switch(cpu_id.vend_id[0]) {
	/* AMD Processors */
	case 'A':
		switch(cpu_id.type) {
		case 4:
			switch(cpu_id.model) {
			case 3:
				cprint(LINE_CPU, 0, "AMD 486DX2");
				break;
			case 7:
				cprint(LINE_CPU, 0, "AMD 486DX2-WB");
				break;
			case 8:
				cprint(LINE_CPU, 0, "AMD 486DX4");
				break;
			case 9:
				cprint(LINE_CPU, 0, "AMD 486DX4-WB");
				break;
			case 14:
				cprint(LINE_CPU, 0, "AMD 5x86-WT");
				break;
			}
			/* Since we can't get CPU speed or cache info return */
			return;
		case 5:
			switch(cpu_id.model) {
			case 0:
			case 1:
			case 2:
			case 3:
				cprint(LINE_CPU, 0, "AMD K5");
				off = 6;
				break;
			case 6:
			case 7:
				cprint(LINE_CPU, 0, "AMD K6");
				off = 6;
				l1_cache = cpu_id.cache_info[3];
				l1_cache += cpu_id.cache_info[7];
				break;
			case 8:
				cprint(LINE_CPU, 0, "AMD K6-2");
				off = 8;
				l1_cache = cpu_id.cache_info[3];
				l1_cache += cpu_id.cache_info[7];
				break;
			case 9:
				cprint(LINE_CPU, 0, "AMD K6-III");
				off = 10;
				l1_cache = cpu_id.cache_info[3];
				l1_cache += cpu_id.cache_info[7];
				l2_cache = (cpu_id.cache_info[11] << 8);
				l2_cache += cpu_id.cache_info[10];
				break;
			}
			break;
		case 6:
			switch(cpu_id.model) {
			case 1:
			case 2:
			case 4:
			case 6:
				cprint(LINE_CPU, 0, "AMD Athlon");
				off = 10;
				l2_cache = (cpu_id.cache_info[11] << 8);
				l2_cache += cpu_id.cache_info[10];
				break;
			case 3:
			case 7:
				cprint(LINE_CPU, 0, "AMD Duron");
				off = 9;
				/* Duron stepping 0 CPUID for L2 is broken */
				/* (AMD errata T13)*/
				if (cpu_id.step == 0) { /* stepping 0 */
					/* Hard code the right size */
					l2_cache = 64;
				} else {
					l2_cache = (cpu_id.cache_info[11] << 8);
					l2_cache += cpu_id.cache_info[10];
				}
				break;
			}
			l1_cache = cpu_id.cache_info[3];
			l1_cache += cpu_id.cache_info[7];
		}
		break;

	/* Intel Processors */
	case 'G':
		if (cpu_id.type == 4) {
			switch(cpu_id.model) {
			case 0:
			case 1:
				cprint(LINE_CPU, 0, "Intel 486DX");
				off = 11;
				break;
			case 2:
				cprint(LINE_CPU, 0, "Intel 486SX");
				off = 11;
				break;
			case 3:
				cprint(LINE_CPU, 0, "Intel 486DX2");
				off = 12;
				break;
			case 4:
				cprint(LINE_CPU, 0, "Intel 486SL");
				off = 11;
				break;
			case 5:
				cprint(LINE_CPU, 0, "Intel 486SX2");
				off = 12;
				break;
			case 7:
				cprint(LINE_CPU, 0, "Intel 486DX2-WB");
				off = 15;
				break;
			case 8:
				cprint(LINE_CPU, 0, "Intel 486DX4");
				off = 12;
				break;
			case 9:
				cprint(LINE_CPU, 0, "Intel 486DX4-WB");
				off = 15;
				break;
			}
			/* Since we can't get CPU speed or cache info return */
			return;
		}

		/* Get the cache info */
		for (i=0; i<16; i++) {
#ifdef CPUID_DEBUG
			dprint(12,i*3,cpu_id.cache_info[i],2,1);
#endif
			switch(cpu_id.cache_info[i]) {
			case 0x6:
			case 0xa:
			case 0x66:
				l1_cache += 8;
				break;
			case 0x8:
			case 0xc:
			case 0x67:
				l1_cache += 16;
				break;
			case 0x68:
				l1_cache += 32;
				break;
			case 0x40:
				l2_cache = 0;
				break;
			case 0x41:
			case 0x79:
				l2_cache = 128;
				break;
			case 0x42:
			case 0x7a:
			case 0x82:
				l2_cache = 256;
				break;
			case 0x43:
			case 0x7b:
			case 0x83:
				l2_cache = 512;
				break;
			case 0x44:
			case 0x7c:
			case 0x84:
				l2_cache = 1024;
				break;
			case 0x45:
			case 0x85:
				l2_cache = 2048;
				break;
			}
		}

		switch(cpu_id.type) {
		case 5:
			switch(cpu_id.model) {
			case 0:
			case 1:
			case 2:
			case 3:
			case 7:
				cprint(LINE_CPU, 0, "Pentium");
				if (l1_cache == 0) {
					l1_cache = 8;
				}
				off = 7;
				break;
			case 4:
			case 8:
				cprint(LINE_CPU, 0, "Pentium-MMX");
				if (l1_cache == 0) {
					l1_cache = 16;
				}
				off = 11;
				break;
			}
			break;
		case 6:
			switch(cpu_id.model) {
			case 0:
			case 1:
				cprint(LINE_CPU, 0, "Pentium Pro");
				off = 11;
				break;
			case 3:
				cprint(LINE_CPU, 0, "Pentium II");
				off = 10;
				break;
			case 5:
				if (l2_cache == 0) {
					cprint(LINE_CPU, 0, "Celeron");
					off = 7;
				} else {
					cprint(LINE_CPU, 0, "Pentium II");
					off = 10;
				}
				break;
			case 6:
				if (l2_cache == 128) {
					cprint(LINE_CPU, 0, "Celeron");
					off = 7;
				} else {
					cprint(LINE_CPU, 0, "Pentium II");
					off = 10;
				}
				break;
			case 7:
			case 8:
			case 10:
			case 11:
				cprint(LINE_CPU, 0, "Pentium III");
				off = 11;
				break;
			}
			break;
		case 15:
			cprint(LINE_CPU, 0, "Pentium 4");
			off = 9;
		}
		break;

	/* Cyrix Processors with CPUID */
	case 'C':
		switch(cpu_id.model) {
		case 0:
			cprint(LINE_CPU, 0, "Cyrix 6x86MX/MII");
			off = 16;
			break;
		case 4:
			cprint(LINE_CPU, 0, "Cyrix GXm");
			off = 9;
			break;
		}
		return;
		break;

	/* Unknown processor */
	default:
		off = 3;
		/* Make a guess at the family */
		switch(cpu_id.type) {
		case 5:
			cprint(LINE_CPU, 0, "586");
			return;
		case 6:
			cprint(LINE_CPU, 0, "686");
			return;
		}
	}

	/* We are here only if the CPU type supports the rdtsc instruction */

	/* Print CPU speed */
	if ((speed = cpuspeed()) > 0) {
		if (speed < 1000000) {
			speed += 50; /* for rounding */
			cprint(LINE_CPU, off, "    . MHz");
			dprint(LINE_CPU, off+1, speed/1000, 3, 1);
			dprint(LINE_CPU, off+5, (speed/100)%10, 1, 0);
		} else {
			speed += 500; /* for rounding */
			cprint(LINE_CPU, off, "      Mhz");
			dprint(LINE_CPU, off+1, speed/1000, 5, 0);
		}
	}

	/* Print out L1 cache info */
	/* To measure L1 cache speed we use a block size that is 1/4th */
	/* of the total L1 cache size since half of it is for instructions */
	if (l1_cache) {
		cprint(LINE_CPU+1, 9, "     K     ");
		dprint(LINE_CPU+1, 10, l1_cache, 4, 0);
		if ((speed=memspeed(0x100000, (l1_cache / 4) * 1024, 50))) {
			cprint(LINE_CPU+1, 15, "      MB/s");
			dprint(LINE_CPU+1, 15, speed, 6, 0);
		}
	}

	/* Print out L2 cache info */
	/* We measure the L2 cache speed by using a block size that is */
	/* the size of the L1 cache.  We have to fudge if the L1 */
	/* cache is bigger than the L2 */
	if (l2_cache) {
		cprint(LINE_CPU+2, 9, "     K     ");
		cprint(LINE_CPU+2, 0, "L2 Cache    ?K");
		dprint(LINE_CPU+2, 10, l2_cache, 4, 0);

		if (l2_cache < l1_cache) {
			i = l1_cache / 4 + l2_cache / 4;
		} else {
			i = l1_cache;
		}
		if ((speed=memspeed(0x100000, i*1024, 50))) {
			cprint(LINE_CPU+2, 15, "      MB/s");
			dprint(LINE_CPU+2, 15, speed, 6, 0);
		}
	}

	/* Determine memory speed.  To find the memory spped we use */
	/* A block size that is 5x the sum of the L1 and L2 caches */
	i = (l2_cache + l1_cache) * 5;

	/* Make sure that we have enough memory to do the test */
	if ((1 + (i * 2)) *  1024 > v->lim_upper) {
		i = ((v->lim_upper / 1024) - 1) / 2;
	}
	if((speed = memspeed(0x100000, i*1024, 40))) {
		cprint(LINE_CPU+3, 15, "      MB/s");
		dprint(LINE_CPU+3, 15, speed, 6, 0);
	}

	/* Record the sarting time */
        asm __volatile__ ("rdtsc":"=a" (v->startl),"=d" (v->starth));
        v->snapl = v->startl;
        v->snaph = v->starth;
	v->rdtsc = 1;
}

/* Find cache-able memory size */
void cacheable()
{
	ulong speed, pspeed, addr, caddr;

	caddr = v->lim_upper;
	pspeed = 0;
	for (addr=0x200000; addr <= v->lim_upper-262144; addr+=0x400000) {
		speed = memspeed(addr, 131072, 1);
		if (pspeed) {
			if (speed < pspeed) {
				caddr = addr;
			}
			pspeed = (ulong)((float)speed * 0.7);
		}

		/* check for overflow */
		if ((addr + 0x400000) < addr) {
			break;
		}
	}
	cprint(LINE_CPU+4, 0, "Cacheable");
	aprint(LINE_CPU+4, 10, caddr);
}


/* #define TICKS 5 * 11832 (count = 6376)*/
/* #define TICKS (65536 - 12752) */
#define TICKS (65536 - 8271)

/* Returns CPU clock in khz */
int cpuspeed()
{
	int loops;

	/* Setup timer */
	outb((inb(0x61) & ~0x02) | 0x01, 0x61);
	outb(0xb0, 0x43); 
	outb(TICKS & 0xff, 0x42);
	outb(TICKS >> 8, 0x42);

	asm __volatile__ ("rdtsc":"=a" (st_low),"=d" (st_high));

	loops = 0;
	do {
		loops++;
	} while ((inb(0x61) & 0x20) == 0);

	asm __volatile__ (
		"rdtsc\n\t" \
		"subl st_low,%%eax\n\t" \
		"subl st_high,%%edx\n\t" \
		:"=a" (end_low), "=d" (end_high)
	);

	/* Make sure we have a credible result */
	if (loops < 4 || end_low < 50000) {
		return(-1);
	}
	v->clks_msec = end_low/48;
	return(v->clks_msec);
}

/* Measure cache/memory speed by copying a block of memory. */
/* Returned value is kbytes/second */
ulong memspeed(ulong src, ulong len, int iter)
{
	ulong dst;
	ulong wlen;
	int i;

	dst = src + len;
	wlen = len / 4;  /* Length is bytes */

	/* Calibrate the overhead with a zero word copy */
	asm __volatile__ ("rdtsc":"=a" (st_low),"=d" (st_high));
	for (i=0; i<iter; i++) {
		asm __volatile__ (
			"movl %0,%%esi\n\t" \
       		 	"movl %1,%%edi\n\t" \
       		 	"movl %2,%%ecx\n\t" \
       		 	"cld\n\t" \
       		 	"rep\n\t" \
       		 	"movsl\n\t" \
				:: "g" (src), "g" (dst), "g" (0)
			: "esi", "edi", "ecx"
		);
	}
	asm __volatile__ ("rdtsc":"=a" (cal_low),"=d" (cal_high));

	/* Compute the overhead time */
	asm __volatile__ (
		"subl %2,%0\n\t"
		"sbbl %3,%1"
		:"=a" (cal_low), "=d" (cal_high)
		:"g" (st_low), "g" (st_high),
		"0" (cal_low), "1" (cal_high)
	);

	/* Do the first copy to prime the cache */
	asm __volatile__ (
		"movl %0,%%esi\n\t" \
		"movl %1,%%edi\n\t" \
       	 	"movl %2,%%ecx\n\t" \
       	 	"cld\n\t" \
       	 	"rep\n\t" \
       	 	"movsl\n\t" \
			:: "g" (src), "g" (dst), "g" (wlen)
		: "esi", "edi", "ecx"
	);

	/* Now measure the speed */
	asm __volatile__ ("rdtsc":"=a" (st_low),"=d" (st_high));
	for (i=0; i<iter; i++) {
	        asm __volatile__ (
			"movl %0,%%esi\n\t" \
       	 		"movl %1,%%edi\n\t" \
       	 		"movl %2,%%ecx\n\t" \
       	 		"cld\n\t" \
       	 		"rep\n\t" \
       	 		"movsl\n\t" \
				:: "g" (src), "g" (dst), "g" (wlen)
			: "esi", "edi", "ecx"
		);
	}
	asm __volatile__ ("rdtsc":"=a" (end_low),"=d" (end_high));

	/* Compute the elapsed time */
	asm __volatile__ (
		"subl %2,%0\n\t"
		"sbbl %3,%1"
		:"=a" (end_low), "=d" (end_high)
		:"g" (st_low), "g" (st_high),
		"0" (end_low), "1" (end_high)
	);
	/* Subtract the overhead time */
	asm __volatile__ (
		"subl %2,%0\n\t"
		"sbbl %3,%1"
		:"=a" (end_low), "=d" (end_high)
		:"g" (cal_low), "g" (cal_high),
		"0" (end_low), "1" (end_high)
	);

	/* Make sure that the result fits in 32 bits */
	if (end_high) {
		return(0);
	}

	/* Since a copy does both a read & write we need to adjuect the time */
	end_low /= 2;

	/* Convert to clocks/KB */
	end_low /= len;
	end_low *= 1024;
	end_low /= iter;
	if (end_low == 0) {
		return(0);
	}

	/* Convert to kbytes/sec */
	return((v->clks_msec)/end_low);
}
