/*****
This module is similar to mod_core, but it kicks outs data in XML.
If you have an XSL browser, you should be able to read these files
directly.
*****/
static const char rcsid[] = "$Id$";

#include <assert.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <stdlib.h>
#include <ctype.h>
#include <sys/wait.h>
#include "magpie.h"

#define	OUTPUT_DIR	"xml"
#define	OUTPUT_FILE	"xml/all.xml"

extern int mkdir (const char *, mode_t);

/* this is the dtd */
static const char dtd[] = "\
<!DOCTYPE magpie\n\
 [\n\
  <!ELEMENT magpie (package|target)* >\n\
\n\
  <!ELEMENT target EMPTY>\n\
  <!ATTLIST target name ID #REQUIRED>\n\
\n\
  <!ELEMENT package\n\
     (name?, version, maintainer, depends?, recommends?, suggests?, provides?, filename?, size?, md5sum?, summary, description, installed-size?, source?) >\n\
  <!ATTLIST package\n\
     name        ID #REQUIRED\n\
     section	(main|contrib|non-free|non-us|local|affinity|alien) #REQUIRED\n\
     priority   (required|important|standard|optional|extra) #REQUIRED\n\
     category   (admin|base|comm|devel|doc|editors|electronics|games|graphics|hamradio|interpreters|libs|mail|math|misc|net|news|oldlibs|otherosfs|shells|sound|tex|text|utils|web|x11|science|unspecified) #IMPLIED\n\
     essential  (yes|no) #IMPLIED\n\
	 predepends IDREFS   #IMPLIED\n\
	 depends    IDREFS   #IMPLIED\n\
	 suggests   IDREFS   #IMPLIED\n\
	 recommends IDREFS   #IMPLIED\n\
	 provides   IDREFS   #IMPLIED\n\
     >\n\
\n\
  <!ELEMENT maintainer (#PCDATA)>\n\
  <!ELEMENT name (#PCDATA)>\n\
  <!ELEMENT version (#PCDATA)>\n\
  <!ELEMENT maintainier (#PCDATA)>\n\
  <!ELEMENT filename (#PCDATA)>\n\
  <!ELEMENT depends (#PCDATA)>\n\
  <!ELEMENT recommends (#PCDATA)>\n\
  <!ELEMENT suggests (#PCDATA)>\n\
  <!ELEMENT provides (#PCDATA)>\n\
  <!ELEMENT size (#PCDATA)>\n\
  <!ELEMENT md5sum (#PCDATA)>\n\
  <!ELEMENT summary (#PCDATA)>\n\
  <!ELEMENT description (#PCDATA)>\n\
  <!ELEMENT installed-size (#PCDATA)>\n\
  <!ELEMENT source (#PCDATA)>\n\
\n\
  <!ENTITY lt '<'>\n\
  <!ENTITY gt '>'>\n\
  <!ENTITY amp '&'>\n\
 ]>\n";

static const char pseudotargets[] = "\
<!--\n\
these targets are currently hardcoded (*ugh*), but it's a necessary\n\
stopgap measure until we generate them automatically.\n\
-->\n";

static const char *targets[] = {
	"9fonts",
	"ada-rm",
	"ale-clone-data",
	"alias",
	"alsadriver",
	"alsalib",
	"alsalib-dev",
	"alsaplayer-output",
	"aolserver",
	"asis",
	"asis-runtime",
	"atlas-dev",
	"awe-patch",
	"awe-utils",
	"awk",
	"base",
	"bibtex",
	"binutils-m68k-palmos-coff",
	"bnlib-dev",
	"browser-45",
	"c++-compiler",
	"c-compiler",
	"c-shell",
	"cdgrab",
	"cfontsdeb",
	"chill-compiler",
	"ckermit",
	"communicator-browser-408",
	"communicator-browser-461",
	"communicator-browser-47",
	"communicator-spell-408",
	"console-tools-data",
	"console-utilities",
	"cplaindeb",
	"csfontsdeb",
	"cslatexdeb",
	"csplaindeb",
	"ctags",
	"cupsys-dev",
	"darxite-client",
	"darxite-monitor",
	"data-dumper",
	"delimmatch",
	"device3dfx-module",
	"dhcpd",
	"diskless-image",
	"docbk-xml",
	"docbook-to-man-ans",
	"doom-engine",
	"doom-wad",
	"doom-wad-editor",
	"dotfile-module",
	"dvips",
	"dvipsk",
	"dvipsk-ja",
	"e2fslibsg",
	"e2p-dev",
	"editor",
	"egcc",
	"elf-binutils",
	"elf-libgdbm",
	"elm",
	"emacs-dl-canna",
	"emacs-dl-wnn",
	"emacsen",
	"emusic",
	"enlightenment-theme",
	"epic4-script",
	"erlang-dev",
	"etags",
	"ex2fs-dev",
	"expect",
	"expect-dev",
	"expectk",
	"ext2fs-dev",
	"ezmlm-idx",
	"festvox-16k",
	"festvox-8k",
	"fortify-linux-ppc",
	"fortify-os2",
	"fortify-unix-mips",
	"fortify-unix-sparc",
	"fortify-unix-x86",
	"fortify-win32",
	"fortran77-compiler",
	"fortune",
	"fortune-cookie-db",
	"freeciv-client",
	"ftape-module",
	"ftp-server",
	"fvwm2",
	"gap4",
	"gas",
	"gcc-m68k-palmos-coff",
	"gdb-m68k-palmos-coff",
	"gdk-imlib",
	"gdk-imlib-development",
	"ghostscript",
	"giflib-dev",
	"gimp-data-min",
	"glibc-pic",
	"gltt2",
	"gltt2-dev",
	"glut-dev",
	"gmt-coastline-data",
	"gnupg",
	"gpg-rsa",
	"gpg-rsaref",
	"gs-pdfencrypt",
	"gstep-base",
	"gstep-base-dbg",
	"gstep-base-dev",
	"gstep-gui-dev",
	"gstep-xgps",
	"gstep-xgps-dev",
	"gtkicq",
	"guile",
	"honyaku-damashii-server",
	"honyakudamashii-server",
	"httpd",
	"hugs98",
	"hylafax-client",
	"hztty",
	"ident-server",
	"ifcico-cm",
	"ilu",
	"imap-client",
	"imap-server",
	"imap-serverdoc",
	"imlib",
	"imlib-development",
	"info-browser",
	"intlfonts-chinese",
	"intlfonts-european",
	"intlfonts-japanese",
	"io",
	"irc",
	"ircii",
	"ish",
	"ispell-dictionary",
	"itcl-dev",
	"itclsh",
	"itk-dev",
	"itkwish",
	"iwidgets-dev",
	"java-compiler",
	"java-virtual-machine",
	"jcodepl",
	"jdk-common",
	"jdk1.1-runtime",
	"jdk1.2",
	"kakasi-dev",
	"kernel-headers",
	"kernel-image",
	"kernel-image-2.0.35",
	"kernel-patch-kdb",
	"kernel-source",
	"kinput2",
	"knfs",
	"komirr",
	"kpilot",
	"lam-dev",
	"lambdamoo-core",
	"lambdamoo-server",
	"latex",
	"ldap-server",
	"ldconfig",
	"lg-issue",
	"lib-fesi-java",
	"libapache-mod-auth-sys",
	"libapache-mod-python",
	"libapache-mod-put",
	"libapache-mod-roaming",
	"libapache-mod-ssl",
	"libapt-pkg2.6",
	"libart-pkg2.6",
	"libasound0",
	"libawe-dev",
	"libbfd-dev",
	"libc-dev",
	"libc-doc",
	"libc-pic",
	"libc6.1",
	"libcomerr2",
	"libcq-plugin",
	"libdbd-msql-perl",
	"libdl1",
	"libe2p2",
	"libeb",
	"libeb-deb",
	"libeb-dev",
	"libext2fs",
	"libext2fs2",
	"libfreecdb-dev",
	"libg++",
	"libg++-dev",
	"libgc-dev",
	"libgcj-dev",
	"libgd-dev",
	"libgdk-imlib1",
	"libgg-dev",
	"libgg0",
	"libgg0-dev",
	"libggi-dev",
	"libggi-doc",
	"libggi-target",
	"libgii-dev",
	"libgl-dev",
	"libgl1",
	"libglade-dev",
	"libglade-gnome-dev",
	"libglib-dev",
	"libglib1.1-dev",
	"libglide-dev",
	"libglide2",
	"libgtk1.1-dev",
	"libgtkgl-dev",
	"libguile3",
	"libguile4",
	"libguile4-dev",
	"libid3-dev",
	"libimlib1",
	"libjpeg-dev",
	"libjpeg-gif",
	"libjpeg6b",
	"libjsw1",
	"liblam",
	"libmbonecommon",
	"libmbonecommon-dev",
	"libmd5-perl",
	"libmikmod-dev",
	"libmm-dev",
	"libmsql2",
	"libncurses-dev",
	"libnet",
	"libnet-dev",
	"libopenldap1-dev",
	"libpam-dev",
	"libpam-mkhomedir",
	"libpam-motd",
	"libpng-dev",
	"libproplist-dev",
	"libreadline-dbg",
	"libreadline-dev",
	"libreadline-doc",
	"librrds-perl",
	"libsmi-dev",
	"libsnmp",
	"libsnmp-dev",
	"libss2",
	"libstdc++-dev",
	"libtclobjc",
	"libtermreadkey-perl",
	"libtiff-dev",
	"libtsp-dev",
	"libucl-common",
	"libucl-common-dev",
	"libuuid1",
	"libwmaker-dev",
	"libwrap-dev",
	"libwraster-dev",
	"libxml0",
	"libxmlo",
	"libxmpi",
	"libxpm4",
	"libxpm4-dev",
	"libz-dev",
	"libz1",
	"licq-plugin",
	"linuxdoc-sgml",
	"lisp-compiler",
	"lisp-core",
	"lm-sensors-doc",
	"lm-sensors-mod",
	"lpd",
	"lsof",
	"ltxgraph",
	"mail-reader",
	"mail-transport-agent",
	"mail-user-agent",
	"man",
	"man-browser",
	"metafont",
	"modules",
	"mpi",
	"mule2",
	"multiarch-binutils",
	"nat",
	"navigator-browser-408",
	"navigator-browser-461",
	"navigator-browser-47",
	"navigator-smotif-45",
	"navigator-spellchk-408",
	"ncbi-tools-dev",
	"ncurses-dev",
	"ncurses-runtime",
	"netcdf",
	"netcdfg",
	"netscape-browser-408",
	"netscape-browser-461",
	"netscape-browser-47",
	"news-reader",
	"news-transport-system",
	"nntpcache-doc",
	"objc-compiler",
	"offix-editor",
	"offix-files",
	"open",
	"palmpython",
	"pam-doc",
	"pascal-compiler",
	"pbmplus",
	"pcmcia-modules",
	"pdf-viewer",
	"perl-curses",
	"perl-doc",
	"perl5",
	"perl5-base",
	"perl5-suid",
	"perl5-thread",
	"pgp",
	"pgp-i",
	"pgp5",
	"php3-cgi-database",
	"php3-database",
	"php3-msql",
	"picons",
	"pike-crypto",
	"pine",
	"pop2-server",
	"pop3-server",
	"popclient",
	"postscript-preview",
	"postscript-viewer",
	"ppmtoagafb",
	"ptex-base",
	"pw",
	"python",
	"python-bsddb",
	"python-curses",
	"python-misc",
	"python-net",
	"qmail",
	"qt2-dev",
	"quake",
	"radiusd",
	"ratfor77",
	"rgrep",
	"rvplayer",
	"rx1g",
	"rx1g-dev",
	"scalapack-mpi",
	"scalapack-pvm",
	"scalapack1-lam-test",
	"sdr",
	"serialmail",
	"siag",
	"slang-dev",
	"slang-dev6",
	"slang-pic",
	"squake",
	"ssh-askpass",
	"svgalib1-bin",
	"syslogd",
	"tcd",
	"tcl-dev",
	"tcl-doc",
	"tcl76",
	"tclsh",
	"tclx-dev",
	"tclx80",
	"telnet98",
	"tex",
	"timedate",
	"tinytable-zope",
	"tip",
	"tix",
	"tix-dev",
	"tk-dev",
	"tk-doc",
	"tk42",
	"tkman",
	"tm",
	"toshiba-fan",
	"toshiba-hotkey",
	"tpctl-modules",
	"tput",
	"translation-dictionary",
	"troffcnv",
	"tyvis-dev",
	"ucspi-tcp",
	"ups-monitor",
	"userlink",
	"vflib",
	"virtual-mysql-server",
	"vncviewer",
	"vrml-browser",
	"w3-el",
	"watch",
	"web-browser",
	"wish",
	"wmaker-gnome",
	"wmaker-plain",
	"wmaker-usersguide",
	"wnn",
	"wnn6",
	"wordlist",
	"www-browser",
	"www-search",
	"x-terminal-emulator",
	"x-window-manager",
	"xarchon-theme",
	"xcoral-doc",
	"xdvi",
	"xemacs19",
	"xemacs20",
	"xfnt100",
	"xfnt75",
	"xfntbase",
	"xfntbig",
	"xfntbig5-cmex24m",
	"xfntbig5p-cmex24m",
	"xfntcyr",
	"xfntpex",
	"xfntscl",
	"xfonts-biznet-iso-8859-base",
	"xfonts-johab",
	"xforms0.86",
	"xmhtml-dev",
	"xmodmap",
	"xmpi",
	"xmpi-dev",
	"xmpi-runtime",
	"xpilot-client",
	"xquake",
	"xserver",
	"xshipwars-images",
	"xshipwars-sounds",
	"xswallow",
	"xvile",
	"zcode-game",
	"zcode-interpreter",
	"zip-crypt"
	};


/*+
Quote any text which is written into an 'attribute'.
+*/
static void xml_quote_attribute (FILE *fp, const char *p)
{
	char ch;

	assert (p);
	fputs ("\"", fp);
	while ((ch = *p++)) {
		switch (ch) {
		case '<':  fputs ("&lt;", fp); break;
		case '>':  fputs ("&gt;", fp); break;
		case '&':  fputs ("&amp;", fp); break;
		case '\'': fputs ("&apos;", fp); break;
		case '\"': fputs ("&quot;", fp); break;
		default:   fputc (ch, fp);
		}
	}
	fputs ("\"", fp);
}

/*+
Quote any text which is written into a 'data' section.
+*/
static void xml_quote_data (FILE *fp, const char *p)
{
	char ch;

	assert (p);
	while ((ch = *p++)) {
		switch (ch) {
		case '<':  fputs ("&lt;", fp); break;
		case '>':  fputs ("&gt;", fp); break;
		case '&':  fputs ("&amp;", fp); break;
		default:   fputc (ch, fp);
		}
	}
}

/*+
Quote any text which is written into an 'id' attribute.
+*/
static void xml_quote_id (FILE *fp, const char *p)
{
	char ch;

	assert (p);

	if (isdigit (*p))
		fputc ('N', fp);

	while ((ch = *p++)) {
		switch (ch) {
			case '+': 
				if (*p == '+') {
					fputs (".plusplus.", fp);
					p++;
				}
				else
					fputs (".and.", fp); break;
				break;
			default:
				fputc (ch, fp);
		}
	}
}

/*+
+*/
static void xml_quote_ids (FILE *fp, struct package_list *d)
{
	struct package_list *q;

	fputc ('"', fp);
	while (d) {
		xml_quote_id (fp, d->name);
		if (d->down) {
			q = d->down;
			while (q) {
				fputc (' ', fp);
				xml_quote_id (fp, q->name);
				q = q->next;
			}
		}
		d = d->next;
		if (d)
			fputc (' ', fp);
	}
	fputc ('"', fp);
}


/*+
Quote the 'description' field.
+*/
static void put_description (FILE *fp, struct package_info *p)
{
	int i;
	fprintf (fp, "  <description>\n");
	for (i = 0; i < p->desccnt; i++) {
		fprintf (fp, "    ");
		xml_quote_data (fp, p->description[i]);
		fprintf (fp, "\n");
	}
	fprintf (fp, "  </description>\n");
}

/*+
Put a simple "name:value" pair.
+*/
static void xml_name_value (FILE *fp, const char *name, const char *value)
{
	if (!value || value[0] == '\0')
		return;

	if (2 * strlen (name) + strlen (value) < 78) {
		fprintf (fp, "  ");
		fprintf (fp, "<%s>", name);
		xml_quote_data (fp, value);
		fprintf (fp, "</%s>", name);
		fprintf (fp, "\n");
	}
	else {
		fprintf (fp, "  <%s>\n", name);
		fprintf (fp, "    ");
		xml_quote_data (fp, value);
		fprintf (fp, "\n");
		fprintf (fp, "  </%s>\n", name);
	}
}


/*+
+*/
static void put_list (FILE *fp, const char *name, struct package_list *d)
{
	struct package_list *q;
	const char *fmt;

	fprintf (fp, "  <%s>", name);
	while (d) {
		fmt = (d->restriction) ? "%s %s" : "%s";
		fprintf (fp, fmt, d->name, d->restriction);

		if (d->down) {
			q = d->down;
			while (q) {
				fputs (" | ", fp);
				fmt = (q->restriction) ? "%s %s" : "%s";
				fprintf (fp, fmt, q->name, q->restriction);
				q = q->down;
			}
		}
		d = d->next;
		if (d)
			fputs (", ", fp);
	}
	fprintf (fp, "</%s>\n", name);
}


/*+
Write the XML information about a single package.
+*/
static void put_package (FILE *fp, struct package_info *p)
{
	fprintf (fp, "<package name=");
	fputc ('"', fp);
	xml_quote_id (fp, p->name);
	fputc ('"', fp);

	if (strcmp (sections[p->section], "non-US") == 0) {
		fprintf (fp, " section=%s priority=%s",
			sections[p->section], priorities[p->priority]);
	}
	else {
		fprintf (fp, " section=%s category=%s priority=%s",
			sections[p->section], categories[p->category], 
			priorities[p->priority]);
	}
	if (p->essential)
		fprintf (fp, " essential=yes");
#if 0
	if (p->installed)
		fprintf (fp, " installed");
	if (p->unpacked)
		fprintf (fp, " unpacked");
#endif
	if (p->predepends) {
		fprintf (fp, "\n         predepends=");
		xml_quote_ids (fp, p->predepends);
	}
	if (p->depends) {
		fprintf (fp, "\n         depends=");
		xml_quote_ids (fp, p->depends);
	}
	if (p->suggests) {
		fprintf (fp, "\n         suggests=");
		xml_quote_ids (fp, p->suggests);
	}
	if (p->recommends) {
		fprintf (fp, "\n         recommends=");
		xml_quote_ids (fp, p->recommends);
	}
	if (p->provides) {
		fprintf (fp, "\n         provides=");
		xml_quote_ids (fp, p->provides);
	}
	fprintf (fp, ">\n");

	xml_name_value (fp, "name", p->name);
	xml_name_value (fp, "version", p->version);
	xml_name_value (fp, "maintainer", p->maintainer);

	if (p->depends)
		put_list (fp, "depends", p->depends);
	if (p->recommends)
		put_list (fp, "recommends", p->recommends);
	if (p->suggests)
		put_list (fp, "suggests", p->suggests);
	if (p->provides)
		put_list (fp, "provides", p->provides);

	xml_name_value (fp, "filename", p->filename);
	fprintf (fp, "  <size>%ld</size>\n", p->size);
	xml_name_value (fp, "md5sum", p->md5sum);

	xml_name_value (fp, "summary", p->summary);
	put_description (fp, p);

	if (p->installed_size)
		fprintf (fp, "  <installed-size>%ld</installed-size>\n", 
			p->installed_size);
	if (p->source)
		put_list (fp, "source", p->source);

	fprintf (fp, "</package>\n");
	fprintf (fp, "\n");
}


/*+
Create basic XML documents
+*/
static int xml_init (void)
{
	FILE *fp;
	int i, j;
	struct package_info *p;
	char pathname[256];

	mkdir (OUTPUT_DIR, 0755);

	fp = fopen (OUTPUT_FILE, "w");
	fprintf (fp, dtd);
	fprintf (fp, "\n");
	fprintf (fp, "<?xml version='1.0' encoding='UTF-8' standalone='yes'?>\n");
	fprintf (fp, "\n");
	fprintf (fp, "<!-- all content -->\n");
	fprintf (fp, "\n");
	fprintf (fp, "<magpie>\n");

	fprintf (fp, "\n");
	for (i = 0; i < cachecnt; i++) {
		p = cache[i];
		
		put_package (fp, p);
	}

	fprintf (fp, "\n");
	fprintf (fp, pseudotargets);
	for (i = 0; i < sizeof targets / sizeof targets[0]; i++) {
		fprintf (fp, "<target name=\"");
		xml_quote_id (fp, targets[i]);
		fputs ("\">\n", fp);
	}
		
	fprintf (fp, "</magpie>\n");
	fclose (fp);
	gzip (OUTPUT_FILE);

	/*
	 *	Also dump some individual files as well
	 */
	for (j = 0; j < CNT_SECTIONS && j < 5; j++) {
		sprintf (pathname, "%s/%s.xml", OUTPUT_DIR, sections[j]);
		fp = fopen (pathname, "w");

		fprintf (fp, dtd);
		fprintf (fp, "\n");
		fprintf (fp,
			"<?xml version='1.0' encoding='UTF-8' standalone='yes'?>\n");
		fprintf (fp, "\n");
		fprintf (fp, "<!-- section '%s' only -->\n", sections[j]);
		fprintf (fp, "\n");
		fprintf (fp, "<magpie>\n");

		fprintf (fp, "\n");
		for (i = 0; i < cachecnt; i++) {
			p = cache[i];
			if (p->section != j)
				continue;
		
			put_package (fp, p);
		}
		fprintf (fp, "</magpie>\n");
		fclose (fp);
		gzip (pathname);
	}

	for (j = 0; j < CNT_PRIORITIES; j++) {
		sprintf (pathname, "%s/%s.xml", OUTPUT_DIR, priorities[j]);
		fp = fopen (pathname, "w");

		fprintf (fp, dtd);
		fprintf (fp, "\n");
		fprintf (fp,
			"<?xml version='1.0' encoding='UTF-8' standalone='yes'?>\n");
		fprintf (fp, "\n");
		fprintf (fp, "<!-- priority '%s' only -->\n", priorities[j]);
		fprintf (fp, "\n");
		fprintf (fp, "<magpie>\n");

		fprintf (fp, "\n");
		for (i = 0; i < cachecnt; i++) {
			p = cache[i];
			if (p->priority != j)
				continue;
		
			put_package (fp, p);
		}
		fprintf (fp, "</magpie>\n");
		fclose (fp);
		gzip (pathname);
	}

	return 0;
}


struct magpie_module mod_core_xml = { 
	version           : MAGPIE_VERSION,
	description       : "core module (XML version)",
	init              : xml_init
};
