import ParsedSummary
import feedparser
import SummaryItem
import utils
import error
import string
import sgmllib
import types

class ImageParser(sgmllib.SGMLParser):
    def __init__(self, feed):
        sgmllib.SGMLParser.__init__(self)
        self._feed = feed
        self._image_urls = []

    def do_img(self, attrs):
        for name, value in attrs:
            if name == 'src':
                url = utils.complete_url(value, self._feed.location)
                self._image_urls.append(url)

    def get_image_urls(self):
        return self._image_urls

    def close(self):
        sgmllib.SGMLParser.close(self)

def _remove_ids_if_duplicates(items):
    ids = {}
    duplicates = False
    for i in items:
        if i.guid is not None and i.guid != "":
            if ids.has_key(i.guid):
                duplicates = True
                break
            ids[i.guid] = True
    if duplicates:
        for i in items:
            i.guid = None
            i.guidislink = False
    return

def parse(content, feed):
    parsed = ParsedSummary.ParsedSummary()
    imp = ImageParser(feed)

    parsed_content = feedparser.parse(content)
    encoding = getattr(parsed_content, 'encoding', 'iso-8859-1')

    def toUnicode(text):
        if text is not None and not isinstance(text, types.UnicodeType):
            text = unicode(text, encoding)
        return text

    parsed.title = toUnicode(getattr(parsed_content.feed, 'title', ""))
    parsed.description = toUnicode(getattr(parsed_content.feed, 'tagline', ""))
    parsed.link = toUnicode(getattr(parsed_content.feed, 'link', ""))

    if parsed_content.feed.has_key('copyright'):
        parsed.copyright = toUnicode(parsed_content.feed.copyright)

    if parsed_content.feed.has_key('modified'):
        parsed.last_build_date = parsed_content.feed.modified

    if parsed_content.feed.has_key('author'):
        parsed.creator = toUnicode(parsed_content.feed.author)

    # item properties
    for entry in parsed_content.entries:
        item = SummaryItem.SummaryItem()
        item.feed = feed

        if entry.has_key('content'):
            # it can have multiple content, so we just aggregate them for now.
            content = []
            for c in entry.content:
                try:
                    content.append(c['value'])
                except TypeError:
                    pass
            description = "<br /><br />".join(content)
        else:
            # try summary
            description = entry.get('summary', "")

        if len(description):
            try:
                try:
                    imp.feed(description)
                    image_urls = imp.get_image_urls()
                    if len(image_urls):
                        for im in image_urls:
                            item.add_image(im)
                except Exception, ex:
                    error.log("Error while checkin description for images. %s: %s %s" % (item.feed.title, type(ex), ex))
            finally:
                imp.close()

        title = toUnicode(getattr(entry, 'title', utils.read_text(description,60) + ' ...'))
        item.title = utils.convert_title(title, description)
        item.title_converted = True

        item.guidislink = getattr(entry, 'guidislink', False)
        item.link = toUnicode(entry.get('link', None))
        item.guid = toUnicode(entry.get('id', None))

        if entry.has_key('author'):
            item.creator = toUnicode(entry.author)

        if entry.has_key('contributors'):
            # entry.contributors is a list of dicts
            item.contributors = entry.contributors

        if entry.has_key('modified_parsed'):
            item.pub_date = entry.modified_parsed

        if entry.has_key('source'):
            item.source = {'url': toUnicode(entry.source.url), 'text': toUnicode(entry.source.value)}

        if entry.has_key('license'):
            item.license_urls.append(toUnicode(entry.license))

        if entry.has_key('fm_license'):
                item.fm_license = toUnicode(entry.get('fm_license', None))
                item.fm_changes = toUnicode(entry.get('fm_changes', None))

        if entry.has_key('fm_screenshoturl'):
            description += "<img src='%s' />" % entry.screenshot_url

        # PRISM
        if entry.has_key('prism_publicationname'):
            item.publication_name = toUnicode(entry.publicationName)
            item.publication_volume = toUnicode(entry.get('prism_volume', None))
            item.publication_number = toUnicode(entry.get('prism_number', None))
            item.publication_section = toUnicode(entry.get('prism_section', None))
            item.publication_starting_page = toUnicode(entry.get('prism_startingpage', None))

        item.description = toUnicode(description)
        parsed.addItem(item)
    _remove_ids_if_duplicates(parsed.items)
    return parsed

