Compare commits
No commits in common. "961a31141f5353a76c4101a488f4a776171d8f9e" and "fbcb23cf88711cdd3a57ffa26f0135fef0504e2a" have entirely different histories.
961a31141f
...
fbcb23cf88
|
@ -284,26 +284,24 @@ def ItemAfter(item, options):
|
||||||
return item
|
return item
|
||||||
|
|
||||||
|
|
||||||
def UrlFix(url):
|
def FeedFetch(url, options):
|
||||||
|
# basic url clean-up
|
||||||
if url is None:
|
if url is None:
|
||||||
raise MorssException('No url provided')
|
raise MorssException('No url provided')
|
||||||
|
|
||||||
if isinstance(url, bytes):
|
|
||||||
url = url.decode()
|
|
||||||
|
|
||||||
if urlparse(url).scheme not in PROTOCOL:
|
if urlparse(url).scheme not in PROTOCOL:
|
||||||
url = 'http://' + url
|
url = 'http://' + url
|
||||||
log(url)
|
log(url)
|
||||||
|
|
||||||
url = url.replace(' ', '%20')
|
url = url.replace(' ', '%20')
|
||||||
|
|
||||||
return url
|
if isinstance(url, bytes):
|
||||||
|
url = url.decode()
|
||||||
|
|
||||||
def FeedFetch(url, options):
|
|
||||||
# allow for code execution for feedify
|
# allow for code execution for feedify
|
||||||
pre = feedify.pre_worker(url)
|
pre = feedify.pre_worker(url)
|
||||||
if pre:
|
if pre:
|
||||||
url = UrlFix(pre)
|
url = pre
|
||||||
log('url redirect')
|
log('url redirect')
|
||||||
log(url)
|
log(url)
|
||||||
|
|
||||||
|
@ -326,7 +324,7 @@ def FeedFetch(url, options):
|
||||||
|
|
||||||
if options.items:
|
if options.items:
|
||||||
# using custom rules
|
# using custom rules
|
||||||
rss = feeds.FeedHTML(xml)
|
rss = feeds.FeedHTML(xml, url, contenttype)
|
||||||
feed.rule
|
feed.rule
|
||||||
|
|
||||||
rss.rules['items'] = options.items
|
rss.rules['items'] = options.items
|
||||||
|
@ -477,7 +475,6 @@ def process(url, cache=None, options=None):
|
||||||
if cache:
|
if cache:
|
||||||
crawler.default_cache = crawler.SQLiteCache(cache)
|
crawler.default_cache = crawler.SQLiteCache(cache)
|
||||||
|
|
||||||
url = UrlFix(url)
|
|
||||||
rss = FeedFetch(url, options)
|
rss = FeedFetch(url, options)
|
||||||
rss = FeedGather(rss, url, options)
|
rss = FeedGather(rss, url, options)
|
||||||
|
|
||||||
|
@ -540,7 +537,6 @@ def cgi_app(environ, start_response):
|
||||||
crawler.default_cache = crawler.SQLiteCache(os.path.join(os.getcwd(), 'morss-cache.db'))
|
crawler.default_cache = crawler.SQLiteCache(os.path.join(os.getcwd(), 'morss-cache.db'))
|
||||||
|
|
||||||
# get the work done
|
# get the work done
|
||||||
url = UrlFix(url)
|
|
||||||
rss = FeedFetch(url, options)
|
rss = FeedFetch(url, options)
|
||||||
|
|
||||||
if headers['content-type'] == 'text/xml':
|
if headers['content-type'] == 'text/xml':
|
||||||
|
@ -612,7 +608,6 @@ def cli_app():
|
||||||
|
|
||||||
crawler.default_cache = crawler.SQLiteCache(os.path.expanduser('~/.cache/morss-cache.db'))
|
crawler.default_cache = crawler.SQLiteCache(os.path.expanduser('~/.cache/morss-cache.db'))
|
||||||
|
|
||||||
url = UrlFix(url)
|
|
||||||
rss = FeedFetch(url, options)
|
rss = FeedFetch(url, options)
|
||||||
rss = FeedGather(rss, url, options)
|
rss = FeedGather(rss, url, options)
|
||||||
out = FeedFormat(rss, options)
|
out = FeedFormat(rss, options)
|
||||||
|
|
|
@ -93,7 +93,6 @@ def score_node(node):
|
||||||
class_id = node.get('class', '') + node.get('id', '')
|
class_id = node.get('class', '') + node.get('id', '')
|
||||||
|
|
||||||
if (isinstance(node, lxml.html.HtmlComment)
|
if (isinstance(node, lxml.html.HtmlComment)
|
||||||
or isinstance(node, lxml.html.HtmlProcessingInstruction)
|
|
||||||
or node.tag in tags_bad
|
or node.tag in tags_bad
|
||||||
or regex_bad.search(class_id)):
|
or regex_bad.search(class_id)):
|
||||||
return 0
|
return 0
|
||||||
|
|
Loading…
Reference in New Issue