From 7f4589c5784313a4e433c44fcb3c20556a24a109 Mon Sep 17 00:00:00 2001
From: pictuga <contact@pictuga.com>
Date: Tue, 28 Apr 2020 22:10:20 +0200
Subject: [PATCH] crawler: return dict instead of tuple

---
 morss/crawler.py     | 12 +++++++++---
 morss/feeds.py       |  4 ++--
 morss/morss.py       | 26 +++++++++++++-------------
 morss/readabilite.py |  4 ++--
 4 files changed, 26 insertions(+), 20 deletions(-)

diff --git a/morss/crawler.py b/morss/crawler.py
index 4e68593..35804b4 100644
--- a/morss/crawler.py
+++ b/morss/crawler.py
@@ -55,7 +55,7 @@ PROTOCOL = ['http', 'https']
 
 
 def get(*args, **kwargs):
-    return adv_get(*args, **kwargs)[0]
+    return adv_get(*args, **kwargs)['data']
 
 
 def adv_get(url, timeout=None, *args, **kwargs):
@@ -72,7 +72,13 @@ def adv_get(url, timeout=None, *args, **kwargs):
     contenttype = con.info().get('Content-Type', '').split(';')[0]
     encoding= detect_encoding(data, con)
 
-    return data, con, contenttype, encoding
+    return {
+        'data':data,
+        'url': con.geturl(),
+        'con': con,
+        'contenttype': contenttype,
+        'encoding': encoding
+    }
 
 
 def custom_handler(follow=None, delay=None, encoding=None):
@@ -624,4 +630,4 @@ if __name__ == '__main__':
     data, con, contenttype, encoding = adv_get(sys.argv[1] if len(sys.argv) > 1 else 'https://morss.it')
 
     if not sys.flags.interactive:
-        print(data.decode(encoding))
+        print(req['data'].decode(req['encoding']))
diff --git a/morss/feeds.py b/morss/feeds.py
index 6b9df2f..1be9b27 100644
--- a/morss/feeds.py
+++ b/morss/feeds.py
@@ -759,8 +759,8 @@ class ItemJSON(Item, ParserJSON):
 if __name__ == '__main__':
     from . import crawler
 
-    data, con, contenttype, encoding = crawler.adv_get(sys.argv[1] if len(sys.argv) > 1 else 'https://www.nytimes.com/', follow='rss')
-    feed = parse(data, url=con.geturl(), encoding=encoding)
+    req = crawler.adv_get(sys.argv[1] if len(sys.argv) > 1 else 'https://www.nytimes.com/', follow='rss')
+    feed = parse(req['data'], url=req['url'], encoding=req['encoding'])
 
     if not sys.flags.interactive:
         for item in feed.items:
diff --git a/morss/morss.py b/morss/morss.py
index e6b25a1..83990ec 100644
--- a/morss/morss.py
+++ b/morss/morss.py
@@ -248,17 +248,17 @@ def ItemFill(item, options, feedurl='/', fast=False):
         delay = -2
 
     try:
-        data, con, contenttype, encoding = crawler.adv_get(url=link, delay=delay, timeout=TIMEOUT)
+        req = crawler.adv_get(url=link, delay=delay, timeout=TIMEOUT)
 
     except (IOError, HTTPException) as e:
         log('http error')
         return False # let's just delete errors stuff when in cache mode
 
-    if contenttype not in crawler.MIMETYPE['html'] and contenttype != 'text/plain':
+    if req['contenttype'] not in crawler.MIMETYPE['html'] and req['contenttype'] != 'text/plain':
         log('non-text page')
         return True
 
-    out = readabilite.get_article(data, url=con.geturl(), encoding_in=encoding, encoding_out='unicode')
+    out = readabilite.get_article(req['data'], url=req['url'], encoding_in=req['encoding'], encoding_out='unicode')
 
     if out is not None:
         item.content = out
@@ -303,14 +303,14 @@ def FeedFetch(url, options):
         delay = 0
 
     try:
-        xml, con, contenttype, encoding = crawler.adv_get(url=url, follow='rss', delay=delay, timeout=TIMEOUT * 2)
+        req = crawler.adv_get(url=url, follow='rss', delay=delay, timeout=TIMEOUT * 2)
 
     except (IOError, HTTPException):
         raise MorssException('Error downloading feed')
 
     if options.items:
         # using custom rules
-        rss = feeds.FeedHTML(xml, encoding=encoding)
+        rss = feeds.FeedHTML(req['data'], encoding=req['encoding'])
 
         rss.rules['title'] = options.title              if options.title        else '//head/title'
         rss.rules['desc'] = options.desc                if options.desc         else '//head/meta[@name="description"]/@content'
@@ -330,13 +330,13 @@ def FeedFetch(url, options):
 
     else:
         try:
-            rss = feeds.parse(xml, url, encoding=encoding)
+            rss = feeds.parse(req['data'], url=url, encoding=req['encoding'])
             rss = rss.convert(feeds.FeedXML)
                 # contains all fields, otherwise much-needed data can be lost
 
         except TypeError:
             log('random page')
-            log(contenttype)
+            log(req['contenttype'])
             raise MorssException('Link provided is not a valid feed')
 
     return rss
@@ -594,12 +594,12 @@ def cgi_get(environ, start_response):
     url, options = cgi_parse_environ(environ)
 
     # get page
-    data, con, contenttype, encoding = crawler.adv_get(url=url, timeout=TIMEOUT)
+    req = crawler.adv_get(url=url, timeout=TIMEOUT)
 
-    if contenttype in ['text/html', 'application/xhtml+xml', 'application/xml']:
+    if req['contenttype'] in ['text/html', 'application/xhtml+xml', 'application/xml']:
         if options.get == 'page':
-            html = readabilite.parse(data, encoding=encoding)
-            html.make_links_absolute(con.geturl())
+            html = readabilite.parse(req['data'], encoding=req['encoding'])
+            html.make_links_absolute(req['url'])
 
             kill_tags = ['script', 'iframe', 'noscript']
 
@@ -610,13 +610,13 @@ def cgi_get(environ, start_response):
             output = lxml.etree.tostring(html.getroottree(), encoding='utf-8')
 
         elif options.get == 'article':
-            output = readabilite.get_article(data, url=con.geturl(), encoding_in=encoding, encoding_out='utf-8', debug=options.debug)
+            output = readabilite.get_article(req['data'], url=req['url'], encoding_in=req['encoding'], encoding_out='utf-8', debug=options.debug)
 
         else:
             raise MorssException('no :get option passed')
 
     else:
-        output = data
+        output = req['data']
 
     # return html page
     headers = {'status': '200 OK', 'content-type': 'text/html; charset=utf-8'}
diff --git a/morss/readabilite.py b/morss/readabilite.py
index e14f88b..a4514b6 100644
--- a/morss/readabilite.py
+++ b/morss/readabilite.py
@@ -348,8 +348,8 @@ if __name__ == '__main__':
     import sys
     from . import crawler
 
-    data, con, contenttype, encoding = crawler.adv_get(sys.argv[1] if len(sys.argv) > 1 else 'https://morss.it')
-    article = get_article(data, url=con.geturl(), encoding_in=encoding, encoding_out='unicode')
+    req = crawler.adv_get(sys.argv[1] if len(sys.argv) > 1 else 'https://morss.it')
+    article = get_article(req['data'], url=req['url'], encoding_in=req['encoding'], encoding_out='unicode')
 
     if not sys.flags.interactive:
         print(article)