# The contents of this file are subject to the Mozilla Public
# License Version 1.1 (the "License"); you may not use this file
# except in compliance with the License. You may obtain a copy of
# the License at http://www.mozilla.org/MPL/
#
# Software distributed under the License is distributed on an "AS
# IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
# implied. See the License for the specific language governing
# rights and limitations under the License.
#
# The Original Code is RDFGrabber version 1.0.
#
# The Initial Developer of the Original Code is European Environment
# Agency (EEA).  Portions created by EEA are
# Copyright (C) European Environment Agency.  All
# Rights Reserved.
#
# Contributor(s):
# Soren Roug, EEA
# Tomas Hjelmberg, CMG
#

# Here we put the Zope class stuff
# Zope imports
from DateTime import *
import Globals
from Globals import Persistent, Acquisition
import OFS
import AccessControl

import binascii,md5
import rdfparser, objects

import pickle, types, os, string, re
from os.path import join, isfile

_repos = join(CLIENT_HOME, 'RDFGrabber')

fixslash = string.maketrans('/','-')

class SPO:
    # Allow (reluctantly) access to unprotected attributes
    __allow_access_to_unprotected_subobjects__=1

    def __init__(self, subject,predicate,object):
        self._subject = subject
        self._predicate = predicate
        self._object = object

    def strsubject(self):
	return str(self._subject)
    def strpredicate(self):
	return str(self._predicate)
    def strobject(self):
	return str(self._object)

    def subject(self):
	return self._subject
    def predicate(self):
	return self._predicate
    def object(self):
	return self._object

class RDFGrabber (
    Acquisition.Implicit,
    Persistent,
    AccessControl.Role.RoleManager,
    OFS.SimpleItem.Item):
    "Retrieve RDF from other websites."

    __ac_permissions__=(
	('View management screens', ('manage_main',)),
	('View', ('', 'index_html', 'update','dumbdown',
	  'query', 'query_html', 'show_source','label_of',
	  'rdfsources',
	  )),
	('Change RDFGrabbers', ('manage_edit',), ('Manager',)),
	)

    manage_options=(
	{'label':'Properties', 'action':'manage_main'},
        {'label':'Query', 'action':'query_html'},
	{'label':'Triples', 'action':'triples_html'},
	{'label':'Update', 'action':'update'},
	{'label':'Source', 'action':'show_source'},
	) + OFS.SimpleItem.SimpleItem.manage_options

    meta_type = 'RDF Grabber'

    triples_html = Globals.DTMLFile("htmlview", globals())
    show_source = Globals.DTMLFile("source", globals())
    manage_main = Globals.DTMLFile("edit_prop", globals())
    query_html = Globals.DTMLFile("results", globals())
    index_html = Globals.DTMLFile("index_html", globals())

    def rdfsources(self,key):
	"Returns a dictionary of sources where the key is the url"
        return self._v_rdfsources[key]

    def lastupdated(self):
	return self._v_updatedate

    def triples(self):
        return self._v_triples

    def subjects(self):
        return self._v_subjects

    def predicates(self):
        return self._v_predicates

    def objects(self):
        return self._v_objects

    def filename(self):
	return self._filename

    def lookup_subject(self,subject):
	if not self._v_subjects.has_key(str(subject)):
	    return []
        return self._v_subjects[str(subject)]

    def lookup_predicate(self,predicate):
	if not self._v_predicates.has_key(str(predicate)):
	    return []
        return self._v_predicates[str(predicate)]

    def lookup_object(self,object):
	if not self._v_objects.has_key(str(object)):
	    return []
        return self._v_objects[str(object)]

    def query(self,subject=None,predicate=None,object=None,onehit=None):
	"Search triples that match the query"
	res = []
	try:
	    if subject:
		    tmp =  self._v_subjects[str(subject)]
		    subject=None
	    elif predicate:
		tmp =  self._v_predicates[str(predicate)]
		predicate=None
	    elif object:
		tmp =  self._v_objects[str(object)]
		object=None
	    else:
		if onehit:
		    return self._v_triples[0]
		else:
		    return self._v_triples
	except:
	    tmp = []

	for x in tmp:
	    c = x
	    if subject and str(subject) != str(x.subject()):
		c = None
	    if c and predicate and str(predicate) != str(c.predicate()):
		c = None
	    if c and object and str(object) != str(c.object()):
		c = None
	    if c:
		res.append(c)

#	if self._v_subjects.has_key(str(subject)):
#	    for x in self._v_subjects[str(subject)]:
#		if str(x.predicate()) == str(predicate):
#		    res.append(x)
	if onehit:
	    if len(res) > 0:
		return res[0]
	    else:
		return None
	else:
	    return res
	

    def label_of(self,predicate):
        """Convenience to find the label for a predicate
	   Assumes the RDF schema for that class has been loaded
	"""
	for item in self.lookup_subject(predicate):
	    if item.strpredicate() == "http://www.w3.org/2000/01/rdf-schema#label":
		return item.strobject()
	return predicate

    def dumbdown(self,object):
	"""
	This convenience method will find follow the pointers
	(the generated identifiers) for a subject,predicate pair
	and return a list of objects when it finds
	an rdf:Bag or an rdf:value
	"""
	list = []
	return self._dumbdown(self.lookup_object(str(object)),list)

    def _dumbdown(self,spo,list):
        for i in spo:
            if i.object().is_anonymous() is None:
                if i.strpredicate() == "http://www.w3.org/1999/02/22-rdf-syntax-ns#value" \
                or i.strpredicate()[:44] == "http://www.w3.org/1999/02/22-rdf-syntax-ns#_":
                    list.append(i.strobject())
            else:
                self._dumbdown(self.lookup_subject(i.strobject()),list)
        return list

    def _loadpickles(self):
        try:
            f = open(self.physicalpath(self._filename), 'r')
	    self._v_updatedate = pickle.load(f)
            self._v_rdfsources = pickle.load(f)
            self._v_triples = pickle.load(f)
            self._v_subjects = pickle.load(f)
            self._v_predicates = pickle.load(f)
            self._v_objects = pickle.load(f)
            f.close()
        except IOError:
	    self._v_updatedate = None
            self._v_rdfsources = {}
	    self._v_triples = []
	    self._v_subjects = {}
	    self._v_predicates = {}
	    self._v_objects = {}

    def __init__(self, id, title, rdfurls, http_proxy):
        self.id = id
        self.title = title
        self.rdfurls = rdfurls
        self.http_proxy = http_proxy
	self._filename = ''

        self._v_rdfsources = {}
	self._v_updatedate = None
	self._v_triples = []
	self._v_subjects = {}
	self._v_predicates = {}
	self._v_objects = {}

    def __setstate__(self,state):
	Persistent.__setstate__(self,state)

	if not hasattr(self, "_filename"): # backwards compatibility
	    self._filename = self.id
	if not hasattr(self, "rdfurls"): # backwards compatibility
	    self.rdfurls = [ self.rdfurl ]
	    delattr(self, "rdfurl")
	self._loadpickles()

    def _ICanAdd(self,subject, predicate, object):
	# Assume spo is shared in all dictionaries
	spo = SPO(subject, predicate, object)

        self._v_triples.append(spo)

	x = str(subject)
	if not self._v_subjects.has_key(x):
	    self._v_subjects[x] = []
	self._v_subjects[x].append(spo)

	x = str(predicate)
	if not self._v_predicates.has_key(x):
	    self._v_predicates[x] = []
	self._v_predicates[x].append(spo)

	x = str(object)
	if not self._v_objects.has_key(x):
	    self._v_objects[x] = []
	self._v_objects[x].append(spo)

    def manage_edit(self, title, rdfurls, http_proxy, REQUEST=None):
	"Edits the grabber's characteristics"
	self.title = title
	self.rdfurls = rdfurls
	self.http_proxy = http_proxy
        return self.update(REQUEST)

    def update(self, REQUEST=None):
        "Call this function to get it to update its content"
         # make the directories
        if not os.path.isdir(_repos):
            try:
                os.makedirs(_repos)
            except:
                raise OSError, 'Can\'t create directory %s' % _repos
	self._v_updatedate = DateTime()
	self._v_triples = []
	self._v_subjects = {}
	self._v_predicates = {}
	self._v_objects = {}
	self._v_rdfsources = {}
        p=rdfparser.RDFParser(self._ICanAdd, http_proxy=self.http_proxy)
	for rdfurl in self.rdfurls:
	    p.parse_url(rdfurl)
            self._v_rdfsources[rdfurl] = p.rdfsource

        fn = self.physicalpath(self._filename)
        try:
            os.rename(fn, fn+'.undo')
        except OSError:
            pass

        #write objects
        f = open(fn, 'w')
        pickle.dump(self._v_updatedate, f)
        pickle.dump(self._v_rdfsources, f)
        pickle.dump(self._v_triples, f)
        pickle.dump(self._v_subjects, f)
        pickle.dump(self._v_predicates, f)
        pickle.dump(self._v_objects, f)
        f.close()

        if REQUEST is not None:
	    return Globals.MessageDialog(
		title='Updated',
		message='Content of <strong>%s</strong> has been updated.<br>' %
                self.id, action ='manage_main')

    def _undo(self):
        """ Restore filename after undo or copy-paste """
	if self._filename == '':
	    return
	fn = self.physicalpath(self._filename)
	if not isfile(fn) and isfile(fn+'.undo'):
		os.rename(fn+'.undo', fn)
	self._loadpickles()

    def _copy(self, infile, outfile):
	""" read binary data from infile and write it to outfile
	    infile and outfile my be strings, in which case a file with that
	    name is opened, or filehandles, in which case they are accessed
	    directly.
	"""
	if type(infile) is types.StringType:
		try:
			instream = open(infile, 'rb')
		except IOError:
			self._undo()
			try:
				instream = open(infile, 'rb')
			except IOError:
				raise IOError, ("%s (%s)" %(self.id, infile))
		close_in = 1
	else:
		instream = infile
		close_in = 0
	if type(outfile) is types.StringType:
		try:
			outstream = open(outfile, 'wb')
		except IOError:
			raise IOError, ("%s (%s)" %(self.id, outfile))
		close_out = 1
	else:
		outstream = outfile
		close_out = 0
	try:
		blocksize = 2<<16
		block = instream.read(blocksize)
		outstream.write(block)
		while len(block)==blocksize:
			block = instream.read(blocksize)
			outstream.write(block)
	except IOError:
		raise IOError, ("%s (%s)" %(self.id, filename))
	try: instream.seek(0)
	except: pass
	if close_in: instream.close()
	if close_out: outstream.close()

    def _get_new_ufn(self):
	""" Create a new unique filename, drop the last newline
	    The base64 set of characters are listed in rfc1341. Unfortunately
	    it includes the / character, and I must deal with that in UNIX systems.
	"""
	return string.translate(binascii.b2a_base64(md5.new(self.absolute_url(1)).digest()),
	  fixslash,'\r\n')

    def physicalpath(self, filename=''):
	""" Generate the full filename, including directories from
	    _repos and self._filename
	"""
	path = _repos
	if type(filename)==types.ListType:
	    for item in filename:
		path = join(path,item)
	elif filename != '':
	    path = join(path,filename)
	return path


    ################################
    ## Special management methods  #
    ################################

    def manage_afterAdd(self, item, container, new_fn=None):
        """ This method is called, whenever _setObject in ObjectManager gets
        called. This is the case after a normal add and if the object is a
        result of cut-paste- or rename-operation.
        """
	new_fn = new_fn or self._get_new_ufn()
	if self._filename != '':
	    old_fn = self.physicalpath(self._filename)
	    if isfile(old_fn):
		self._copy(old_fn, self.physicalpath(new_fn))
	    else:
		if isfile(old_fn+'.undo'):
		    self._copy(old_fn+'.undo', self.physicalpath(new_fn))
	self._filename = new_fn
	self._loadpickles()
        return RDFGrabber.inheritedAttribute ("manage_afterAdd") \
               (self, item, container)

    def manage_beforeDelete(self, item, container):
        """ This method is called, when the object is deleted. To support
        undo-functionality and because this happens too, when the object
        is moved (cut-paste) or renamed, the external file is not deleted.
        It is just renamed to filename.undo and remains in the
        repository, until it is deleted manually.
        """
        fn = self.physicalpath(self._filename)
        try:
	    os.unlink(fn+'.undo')
        except OSError:
            pass
        try:
            os.rename(fn, fn+'.undo')
        except OSError:
            pass
        return RDFGrabber.inheritedAttribute ("manage_beforeDelete") \
               (self, item, container)

    def manage_undo_transactions(self, transaction_info, REQUEST=None):
        """ This method is called, when the user has chosen an Undo-action.
        To support undo-functionality the external file is just renamed back from
        filename.undo to filename.
        """
        fn = self.physicalpath(self._filename)
        try:
            os.rename(fn+'.undo', fn)
	    self._loadpickles()
        except OSError:
            pass
        return RDFGrabber.inheritedAttribute ("manage_undo_transactions") \
               (self, transaction_info, REQUEST)

Globals.default__class_init__(RDFGrabber)

def manage_addRDFGrabber(self, id, title, rdfurls, http_proxy, REQUEST=None):
    """Create an object and install it in its parent Folder.
    The argument 'self' will be bound to the parent Folder.
    """
    grabber = RDFGrabber(id, title, rdfurls, http_proxy )
    self._setObject(id, grabber)
    if REQUEST is not None:
        return self.manage_main(self, REQUEST)

manage_addRDFGrabberForm = Globals.DTMLFile('add_grabber', globals())
