feedify: support any type (json, xml, html)
This commit is contained in:
		@@ -88,16 +88,21 @@ def parse_rules(filename=None):
 | 
			
		||||
    return rules
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def parse(data, url=None, encoding=None):
 | 
			
		||||
def parse(data, url=None, encoding=None, ruleset=None):
 | 
			
		||||
    " Determine which ruleset to use "
 | 
			
		||||
 | 
			
		||||
    rulesets = parse_rules()
 | 
			
		||||
    if ruleset is not None:
 | 
			
		||||
        rulesets = [ruleset]
 | 
			
		||||
 | 
			
		||||
    else:
 | 
			
		||||
        rulesets = parse_rules().values()
 | 
			
		||||
 | 
			
		||||
    parsers = [FeedXML, FeedHTML, FeedJSON]
 | 
			
		||||
 | 
			
		||||
    # 1) Look for a ruleset based on path
 | 
			
		||||
 | 
			
		||||
    if url is not None:
 | 
			
		||||
        for ruleset in rulesets.values():
 | 
			
		||||
        for ruleset in rulesets:
 | 
			
		||||
            if 'path' in ruleset:
 | 
			
		||||
                for path in ruleset['path']:
 | 
			
		||||
                    if fnmatch(url, path):
 | 
			
		||||
@@ -111,9 +116,6 @@ def parse(data, url=None, encoding=None):
 | 
			
		||||
        # 3b) See if .items matches anything
 | 
			
		||||
 | 
			
		||||
    for parser in parsers:
 | 
			
		||||
        ruleset_candidates = [x for x in rulesets.values() if x['mode'] == parser.mode and 'path' not in x]
 | 
			
		||||
            # 'path' as they should have been caught beforehands
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            feed = parser(data, encoding=encoding)
 | 
			
		||||
 | 
			
		||||
@@ -124,13 +126,17 @@ def parse(data, url=None, encoding=None):
 | 
			
		||||
        else:
 | 
			
		||||
            # parsing worked, now we try the rulesets
 | 
			
		||||
 | 
			
		||||
            ruleset_candidates = [x for x in rulesets if x.get('mode', None) in (parser.mode, None) and 'path' not in x]
 | 
			
		||||
                # 'path' as they should have been caught beforehands
 | 
			
		||||
                # try anyway if no 'mode' specified
 | 
			
		||||
 | 
			
		||||
            for ruleset in ruleset_candidates:
 | 
			
		||||
                feed.rules = ruleset
 | 
			
		||||
 | 
			
		||||
                try:
 | 
			
		||||
                    feed.items[0]
 | 
			
		||||
 | 
			
		||||
                except (AttributeError, IndexError):
 | 
			
		||||
                except (AttributeError, IndexError, TypeError):
 | 
			
		||||
                    # parsing and or item picking did not work out
 | 
			
		||||
                    pass
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -104,6 +104,13 @@ class Options:
 | 
			
		||||
    def __contains__(self, key):
 | 
			
		||||
        return key in self.options
 | 
			
		||||
 | 
			
		||||
    def get(self, key, default=None):
 | 
			
		||||
        if key in self.options:
 | 
			
		||||
            return self.options[key]
 | 
			
		||||
 | 
			
		||||
        else:
 | 
			
		||||
            return default
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def ItemFix(item, options, feedurl='/'):
 | 
			
		||||
    """ Improves feed items (absolute links, resolve feedburner links, etc) """
 | 
			
		||||
@@ -276,22 +283,23 @@ def FeedFetch(url, options):
 | 
			
		||||
 | 
			
		||||
    if options.items:
 | 
			
		||||
        # using custom rules
 | 
			
		||||
        rss = feeds.FeedHTML(req['data'], encoding=req['encoding'])
 | 
			
		||||
        ruleset = {}
 | 
			
		||||
 | 
			
		||||
        rss.rules['title'] = options.title              if options.title        else '//head/title'
 | 
			
		||||
        rss.rules['desc'] = options.desc                if options.desc         else '//head/meta[@name="description"]/@content'
 | 
			
		||||
        ruleset['items'] = options.items
 | 
			
		||||
 | 
			
		||||
        rss.rules['items'] = options.items
 | 
			
		||||
        ruleset['title'] = options.get('title', '//head/title')
 | 
			
		||||
        ruleset['desc'] = options.get('desc', '//head/meta[@name="description"]/@content')
 | 
			
		||||
 | 
			
		||||
        rss.rules['item_title'] = options.item_title    if options.item_title   else '.'
 | 
			
		||||
        rss.rules['item_link'] = options.item_link      if options.item_link    else './@href|.//a/@href|ancestor::a/@href'
 | 
			
		||||
        ruleset['item_title'] = options.get('item_title', '.')
 | 
			
		||||
        ruleset['item_link'] = options.get('item_link', './@href|.//a/@href|ancestor::a/@href')
 | 
			
		||||
 | 
			
		||||
        if options.item_content:
 | 
			
		||||
            rss.rules['item_content'] = options.item_content
 | 
			
		||||
            ruleset['item_content'] = options.item_content
 | 
			
		||||
 | 
			
		||||
        if options.item_time:
 | 
			
		||||
            rss.rules['item_time'] = options.item_time
 | 
			
		||||
            ruleset['item_time'] = options.item_time
 | 
			
		||||
 | 
			
		||||
        rss = feeds.parse(req['data'], encoding=req['encoding'], ruleset=ruleset)
 | 
			
		||||
        rss = rss.convert(feeds.FeedXML)
 | 
			
		||||
 | 
			
		||||
    else:
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user