/*
 * raid_io.c : Utility for the Linux Multiple Devices driver
 *             Copyright (C) 1997 Ingo Molnar, Miguel de Icaza, Gadi Oxman
 *
 * RAID IO operations.
 *
 * This source is covered by the GNU GPL, the same as all Linux kernel
 * sources.
 *
 * 97-09-23: >4GB limit fix, Rob Hagopian <hagopiar@vuser.vu.union.edu>
 */

#include "common.h"
#include "config.h"
#include "parser.h"

md_cfg_entry_t *p;
md_superblock_t *sb;


static pthread_t parallel_thread [MD_SB_DISKS];
static pthread_mutex_t start_work [MD_SB_DISKS];
static pthread_mutex_t stop_work [MD_SB_DISKS];
static int fd[MD_SB_DISKS];

static int divider;
static __u32 sum;
static __u32 curr[MD_SB_DISKS];

static char *buffer;


#define TIME long long

static TIME begin_t = 0;

static unsigned long T (void)
{
	struct timeval t;
	t.tv_usec = 1;
	t.tv_sec = 1;
	gettimeofday(&t,0);

	return (t.tv_usec+t.tv_sec*1000000);
}


#define F float
void progress (unsigned long blocks, unsigned long current)
{
	static TIME begin_t = 0, last_status_t = 0;
	TIME left_t, elapsed_t;
	TIME now_t;

	if (!begin_t)
		begin_t = last_status_t = T();

	now_t = T();
	/*
	 * do max one status update per second
	 */
	if (now_t-last_status_t < (TIME)1e6)
		return;

	elapsed_t = (now_t-begin_t)/(TIME)1e6;
	left_t = (TIME) ((F)(elapsed_t) * ((F)blocks/(F)current-1.0));

	OUT("(%2d%% done; ~%d:%02d left %d:%02d elapsed [%.1f KB/sec])",
		(current / (blocks/100)),
		(int) left_t/60, (int) left_t%60,
		(int) elapsed_t/60, (int) elapsed_t%60,
		((F)current)/(F)elapsed_t
	);
	fflush(stderr);
	putc('\r', stderr);

	last_status_t=now_t;
}
#undef F

#define __NR__llseek            140

static int _llseek (unsigned int, unsigned long,
		unsigned long, long long *, unsigned int);

static _syscall5( int, _llseek, unsigned int, fd, unsigned long, offset_high,
		unsigned long, offset_low, long long *, result,
		unsigned int, origin)

long long raidseek (unsigned int fd, unsigned long blk)
{
	unsigned long long offset = (unsigned long long)blk *
					(unsigned long long) MD_BLK_SIZ;
	long long result;
	int retval;

	retval = _llseek (fd, ((unsigned long long) offset) >> 32,
			((unsigned long long) offset) & 0xffffffff,
			&result, SEEK_SET);
	return (retval == -1 ? (long long) retval : result);
}


static __u32 get_random(void)
{
	__u32 num;
	int fd;

	fd = open("/dev/urandom", O_RDONLY);
	if (fd != -1 && read(fd, &num, 4) == 4) {
#if DEBUG
		printf("raid set magic: %x\n", num);
#endif
		close(fd);
	} else {
		num = rand();
#if DEBUG
		printf("raid set magic (pseudo-random): %x\n", num);
#endif
	}
	return num;
}

void print_sb (md_superblock_t *sb)
{
	time_t t;
	md_descriptor_t *disk;
	int i;

	printf("MD ID:                   %x\n", sb->md_magic);
	printf("Conforms to MD version:  %d.%d.%d\n", sb->major_version, sb->minor_version, sb->patch_version);
#if 0
	printf("gvalid_words:            %d\n", sb->gvalid_words);
#endif
	printf("Raid set ID:             %x\n", sb->set_magic);
	t = (time_t) sb->ctime;
	printf("Creation time:           %s", ctime(&t));
	t = (time_t) sb->utime;
	printf("Update time:             %s", ctime(&t));
	printf("State:                   %d%s\n", sb->state, sb->state & (1 << MD_SB_CLEAN) ? " (clean)" : "");
	printf("Raid level:              %d\n", sb->level);
	printf("Individual disk size:    %uMB (%ukB)\n", sb->size / MD_BLK_SIZ, sb->size);
	if (sb->level == 4 || sb->level == 5)
		printf("Chunk size:              %dkB\n", sb->chunk_size / MD_BLK_SIZ);
	i = sb->parity_algorithm;
	if (sb->level == 5)
		printf("Parity algorithm:        %d (%s)\n", i, i < 4 ? parity_algorithm_table[i] : "unknown");
	printf("Total number of disks:   %d\n", sb->nr_disks);
	printf("Number of raid disks:    %d\n", sb->raid_disks);
	printf("Number of active disks:  %d\n", sb->active_disks);
	printf("Number of working disks: %d\n", sb->working_disks);
	printf("Number of failed disks:  %d\n", sb->failed_disks);
	printf("Number of spare disks:   %d\n", sb->spare_disks);
	printf("\n");

	for (i = 0; i < sb->nr_disks; i++) {
		disk = sb->disks + i;
#if 0
		printf("Disk %d: major %d, minor %d, raid_disk %d, ", disk->number
			, disk->major, disk->minor, disk->raid_disk);
#else
		printf("Disk %d: raid_disk %d, ", disk->number, disk->raid_disk);
#endif
		printf("state: %d (%s, %s, %s)\n", disk->state,
		disk->state & (1 << MD_FAULTY_DEVICE) ? "faulty" : "operational",
		disk->state & (1 << MD_ACTIVE_DEVICE) ? "active" : "not active",
		disk->state & (1 << MD_SYNC_DEVICE) ? "sync" : "not in sync");
	}
}

static int sanity_checks (char *name, int fd, __u32 block_offset)
{
	FILE *fp;
	unsigned char buffer[MD_SB_BYTES];
	md_superblock_t *sb;

	/*
	 * Check if the device is mounted
	 */
	fp = fopen("/etc/mtab", "r");
	if (fp != NULL) {
		while (1) {
			if ((fgets(buffer, MAX_LINE_LENGTH, fp)) == NULL)
				break;
			if (strstr(buffer, name)) {
				fprintf(stderr, "%s is mounted\n", name);
				fclose(fp);
				return 1;
			}
		}
	}
	fclose(fp);

	if (force_flag)
		return 0;

	/*
	 * Check if the device contains an ext2 filesystem
	 */
	if ((raidseek(fd, 1)) == -1)
		return 1;
	if ((read(fd, buffer, MD_BLK_SIZ)) != MD_BLK_SIZ)
		return 1;
	if (buffer[56] == 0x53 && buffer[57] == 0xef && buffer[33] == 0x20 && buffer[37] == 0x20) {
		fprintf(stderr, "%s appears to contain an ext2 filesystem -- use -f to override\n", name);
		return 1;
	}

	/*
	 * Check if the device was already a part of a raid array
	 */
	if (raidseek(fd, block_offset) == -1)
		return 1;
	if ((read(fd, buffer, MD_SB_BYTES)) != MD_SB_BYTES)
		return 1;
	sb = (md_superblock_t *) buffer;
	if (sb->md_magic == MD_SB_MAGIC) {
		fprintf(stderr, "%s appears to be part of a MD raid array -- use -f to override\n", name);
		return 1;
	}
	return 0;
}

int analyze_sb (void)
{
	int fd, i, j;
	md_superblock_t *sb;
	md_descriptor_t *disk;
	struct stat stat_buf;
	__u32 nr_blocks;

	cfg = cfg_head;
	if (!cfg)
		return 1;
	while (cfg) {
		printf("handling MD device %s\n", cfg->md_name);
		printf("analyzing super-block\n");
		sb = &cfg->sb;
		sb->md_magic = MD_SB_MAGIC;
		sb->major_version = MKRAID_MAJOR_VERSION;
		sb->minor_version = MKRAID_MINOR_VERSION;
		sb->patch_version = MKRAID_PATCHLEVEL_VERSION;
		sb->gvalid_words = 15;
		sb->set_magic = get_random();
		sb->ctime = sb->utime = (__u32) time(NULL);
		sb->state = 1 << MD_SB_CLEAN;
		if (sb->level != 4 && sb->level != 5 && sb->level != 1) {
			fprintf(stderr, "%s: raid level %d not supported\n", cfg->md_name, sb->level);
			return 1;
		}
		sb->working_disks = sb->nr_disks;
		sb->active_disks = sb->raid_disks;
		if (sb->raid_disks + sb->spare_disks != sb->nr_disks) {
			fprintf(stderr, "raid_disks + spare_disks != nr_disks\n");
			return 1;
		}
		for (i = 0; i < sb->nr_disks; i++) {
			disk = sb->disks + i;
			disk->number = i;

			if (stat(cfg->device_name[i], &stat_buf) == -1) {
				fprintf(stderr, "couldn't call stat() on device %s -- %s\n", cfg->device_name[i], strerror(errno));
				return 1;
			}
			disk->major = MAJOR(stat_buf.st_dev);
			disk->minor = MINOR(stat_buf.st_dev);
			if (disk->raid_disk >= sb->nr_disks) {
				fprintf(stderr, "raid_disk for %s (%d) > nr_disks (%d)\n", cfg->device_name[i], disk->raid_disk, sb->nr_disks);
				return 1;
			}
			for (j = 0; j < i; j++)
				if (sb->disks[j].raid_disk == disk->raid_disk) {
					fprintf(stderr, "raid_disk conflict on %s and %s (%d)\n", cfg->device_name[j], cfg->device_name[i], disk->raid_disk);
					return 1;
				}
			if (disk->raid_disk < sb->raid_disks)
				disk->state = (1 << MD_ACTIVE_DEVICE | 1 << MD_SYNC_DEVICE);
			if ((sb->level == 4 || sb->level == 5) && (!sb->chunk_size || sb->chunk_size % 4)) {
				fprintf(stderr, "invalid chunk-size (%dkB)\n", sb->chunk_size);
				return 1;
			}
			fd = open(cfg->device_name[i], O_RDONLY);
			if (fd == -1) {
				fprintf(stderr, "couldn't open device %s -- %s\n", cfg->device_name[i], strerror(errno));
				return 1;
			}
			if (ioctl(fd, BLKGETSIZE, &nr_blocks) == -1) {
				fprintf(stderr, "couldn't get device size for %s -- %s\n", cfg->device_name[i], strerror(errno));
				close(fd);
				return 1;
			}
			nr_blocks >>= 1;
			if (nr_blocks < MD_RESERVED_BLOCKS * 2) {
				fprintf(stderr, "%s: device too small (%dkB)\n", cfg->device_name[i], nr_blocks);
				close(fd);
				return 1;
			}
			cfg->sb_block_offset[i] = MD_NEW_SIZE_BLOCKS(nr_blocks);
			printf("disk %d: %s, %ukB, raid superblock at %dkB\n", i, cfg->device_name[i], nr_blocks, cfg->sb_block_offset[i]);
			if (!sb->size || cfg->sb_block_offset[i] < sb->size)
				sb->size = cfg->sb_block_offset[i];
			if (sb->level == 4 || sb->level == 5)
				sb->size &= ~(sb->chunk_size / MD_BLK_SIZ - 1);
			if (sanity_checks(cfg->device_name[i], fd, cfg->sb_block_offset[i])) {
				close(fd);
				return 1;
			}
			close(fd);
		}
		cfg = cfg->next;
	}
	return 0;
}

void * writer_thread (void * data)
{
	int i = (int) data, j, k=0;
	__u32 blocks, count, curr_sum;

	pthread_mutex_lock(&start_work[i]);
	blocks = count = p->sb_block_offset[i];
	curr[i] = 0;
	while (blocks) {
		count = MIN(ZERO_BUFFER_SIZE, blocks);
		if ((write(fd[i], buffer, count * MD_BLK_SIZ)) == -1) {
			ERR("couldn't clear %s\n",
				p->device_name[i]);
			goto abort;
		}
		k++;
		if (!(k&255))
			sync();

		blocks -= count;
		curr[i] += count;

		curr_sum = 0;
		for (j = 0; j < sb->nr_disks; j++)
			curr_sum += curr[j];

		if (!i)
			progress(sum/divider,curr_sum/divider);
	}
abort:
	pthread_mutex_unlock(&stop_work[i]);
	pthread_exit(0);
}

/*
 * This routine clears the raid set
 */
int init_set (void)
{
	int i, N;

	p = cfg_head;
	sb = &p->sb;
	N = sb->nr_disks;

	printf("initializing raid set\n");
	if ((buffer = malloc (ZERO_BUFFER_SIZE * MD_BLK_SIZ)) == NULL) {
		ERR("couldn't allocate %u bytes for zero buffer\n",
			(ZERO_BUFFER_SIZE * MD_BLK_SIZ));
		goto abort;
	}
	memset(buffer, 0, ZERO_BUFFER_SIZE * MD_BLK_SIZ);

	sum = 0;
	for (i = 0; i < N; i++)
		sum += p->sb_block_offset[i];

	if (sb->level == 1)
		divider = N;
	else
		divider = 1;

	for (i = 0; i < N; i++) {
		pthread_mutex_init(&start_work[i], NULL);
		pthread_mutex_lock(&start_work[i]);

		pthread_mutex_init(&stop_work[i], NULL);
		pthread_mutex_lock(&stop_work[i]);

		fd[i] = open(p->device_name[i], O_WRONLY);
		if (fd[i] == -1)
			goto abort;

		OUT("clearing device %s\n", p->device_name[i]);
		pthread_create(&parallel_thread[i], NULL,
			writer_thread, (void *)i);
	}

	for (i = 0; i < N; i++)
		pthread_mutex_unlock(&start_work[i]);

	for (i = 0; i < N; i++)
		pthread_mutex_lock(&stop_work[i]);

	printf("\n");
	free(buffer);
	return 0;
abort:
	if (buffer)
		free(buffer);
	return 1;
}

int write_sb (int old)
{
	md_cfg_entry_t *p = cfg_head;
	md_superblock_t *sb;
	md_descriptor_t *disk;
	int i, fd = -1;

	printf("writing raid superblock\n");
	while (p) {
		sb = old ? &p->sb_old : &p->sb;
		sb->state = 1 << MD_SB_CLEAN;
		sb->utime = (__u32) time(NULL);
		sb->working_disks = sb->nr_disks;
		sb->active_disks = sb->raid_disks;
		sb->failed_disks = 0;
		for (i = 0; i < sb->nr_disks; i++) {
			disk = sb->disks + i;
			if (disk->raid_disk < sb->raid_disks)
				disk->state = (1 << MD_ACTIVE_DEVICE | 1 << MD_SYNC_DEVICE);
		}
		print_sb(sb);
		for (i = 0; i < sb->nr_disks; i++) {
			fd = open(p->device_name[i], O_WRONLY);
			if (fd == -1)
				goto abort;
			if (raidseek(fd, p->sb_block_offset[i]) == -1)
				return 1;
			memcpy(&sb->descriptor, sb->disks + i, MD_SB_DESCRIPTOR_WORDS * 4);
			if ((write(fd, sb, MD_SB_BYTES)) != MD_SB_BYTES) {
				fprintf(stderr, "%s: couldn't write superblock\n", p->device_name[i]);
				goto abort;
			}
			close(fd);
		}
		sync();
		p = p->next;
	}
	return 0;
abort:
	if (fd != -1)
		close(fd);
	return 1;
}

int check_active (md_cfg_entry_t *p)
{
	char buffer[MAX_LINE_LENGTH], line[MAX_LINE_LENGTH], *ch;
	FILE *fp;

	strcpy(buffer, "md0");
	if ((ch = strstr(p->md_name, "/md")) == NULL)
		return 0;
	buffer[2] = *(ch + 3);
	if ((fp = fopen("/proc/mdstat", "r")) == NULL)
		return 0;
	while (1) {
		if ((fgets(line, MAX_LINE_LENGTH, fp)) == NULL)
			break;
		if (strstr(line, buffer) && !strstr(line, "inactive")) {
			fprintf(stderr, "%s: active -- run mdstop\n", p->md_name);
			fclose(fp);
			return 1;
		}
	}
	fclose(fp);
	return 0;
}

int read_sb (void)
{
	md_cfg_entry_t *p = cfg_head;
	md_superblock_t tmp, *sb = &tmp;
	int i, fd = -1, valid;

	printf("reading raid superblock\n");
	while (p) {
		valid = 0;
		for (i = 0; i < p->sb.nr_disks; i++) {
			fd = open(p->device_name[i], O_RDONLY);
			if (fd == -1)
				continue;
			if (raidseek(fd, p->sb_block_offset[i]) == -1) {
				fprintf(stderr, "%s: couldn't read superblock\n", p->device_name[i]);
				close(fd);
				continue;
			}
			if ((read(fd, sb, MD_SB_BYTES)) != MD_SB_BYTES) {
				fprintf(stderr, "%s: couldn't read superblock\n", p->device_name[i]);
				close(fd);
				continue;
			}
			close(fd);
			if (sb->md_magic != MD_SB_MAGIC) {
				fprintf(stderr, "%s: invalid raid superblock magic (%x)\n", p->device_name[i], sb->md_magic);
				continue;
			}
			if (!valid) {
				memcpy(&p->sb_old, sb, MD_SB_BYTES);
				valid = 1;
			}
			if (sb->utime > p->sb_old.utime)
				memcpy(&p->sb_old, sb, MD_SB_BYTES);
			p->sb_present[i] = 1;
		}
		if (!valid) {
			fprintf(stderr, "couldn't find a valid raid superblock\n");
			return 1;
		}
		if (p->sb_old.major_version != MKRAID_MAJOR_VERSION || p->sb_old.minor_version > MKRAID_MINOR_VERSION) {
			printf("unsupported raid array version %d.%d.%d\n",
			p->sb_old.major_version, p->sb_old.minor_version, p->sb_old.patch_version);
			return 1;
		}
		p = p->next;
	}
	return 0;
}
