Compare commits

..

No commits in common. "961a31141f5353a76c4101a488f4a776171d8f9e" and "fbcb23cf88711cdd3a57ffa26f0135fef0504e2a" have entirely different histories.

2 changed files with 6 additions and 12 deletions

View File

@ -284,26 +284,24 @@ def ItemAfter(item, options):
return item return item
def UrlFix(url): def FeedFetch(url, options):
# basic url clean-up
if url is None: if url is None:
raise MorssException('No url provided') raise MorssException('No url provided')
if isinstance(url, bytes):
url = url.decode()
if urlparse(url).scheme not in PROTOCOL: if urlparse(url).scheme not in PROTOCOL:
url = 'http://' + url url = 'http://' + url
log(url) log(url)
url = url.replace(' ', '%20') url = url.replace(' ', '%20')
return url if isinstance(url, bytes):
url = url.decode()
def FeedFetch(url, options):
# allow for code execution for feedify # allow for code execution for feedify
pre = feedify.pre_worker(url) pre = feedify.pre_worker(url)
if pre: if pre:
url = UrlFix(pre) url = pre
log('url redirect') log('url redirect')
log(url) log(url)
@ -326,7 +324,7 @@ def FeedFetch(url, options):
if options.items: if options.items:
# using custom rules # using custom rules
rss = feeds.FeedHTML(xml) rss = feeds.FeedHTML(xml, url, contenttype)
feed.rule feed.rule
rss.rules['items'] = options.items rss.rules['items'] = options.items
@ -477,7 +475,6 @@ def process(url, cache=None, options=None):
if cache: if cache:
crawler.default_cache = crawler.SQLiteCache(cache) crawler.default_cache = crawler.SQLiteCache(cache)
url = UrlFix(url)
rss = FeedFetch(url, options) rss = FeedFetch(url, options)
rss = FeedGather(rss, url, options) rss = FeedGather(rss, url, options)
@ -540,7 +537,6 @@ def cgi_app(environ, start_response):
crawler.default_cache = crawler.SQLiteCache(os.path.join(os.getcwd(), 'morss-cache.db')) crawler.default_cache = crawler.SQLiteCache(os.path.join(os.getcwd(), 'morss-cache.db'))
# get the work done # get the work done
url = UrlFix(url)
rss = FeedFetch(url, options) rss = FeedFetch(url, options)
if headers['content-type'] == 'text/xml': if headers['content-type'] == 'text/xml':
@ -612,7 +608,6 @@ def cli_app():
crawler.default_cache = crawler.SQLiteCache(os.path.expanduser('~/.cache/morss-cache.db')) crawler.default_cache = crawler.SQLiteCache(os.path.expanduser('~/.cache/morss-cache.db'))
url = UrlFix(url)
rss = FeedFetch(url, options) rss = FeedFetch(url, options)
rss = FeedGather(rss, url, options) rss = FeedGather(rss, url, options)
out = FeedFormat(rss, options) out = FeedFormat(rss, options)

View File

@ -93,7 +93,6 @@ def score_node(node):
class_id = node.get('class', '') + node.get('id', '') class_id = node.get('class', '') + node.get('id', '')
if (isinstance(node, lxml.html.HtmlComment) if (isinstance(node, lxml.html.HtmlComment)
or isinstance(node, lxml.html.HtmlProcessingInstruction)
or node.tag in tags_bad or node.tag in tags_bad
or regex_bad.search(class_id)): or regex_bad.search(class_id)):
return 0 return 0