Move custom_handler to crawler

Makes more sense. Easier to reuse. Also cleaned up a bit the code
2017-03-18 22:51:27 -10:00
parent beec6469cc
commit 2003e2760b
2 changed files with 24 additions and 17 deletions
--- a/morss/morss.py
+++ b/morss/morss.py
@@ -48,8 +48,6 @@ THREADS = 10  # number of threads (1 for single-threaded)
 DEBUG = False
 PORT = 8080

-DEFAULT_UA = 'Mozilla/5.0 (X11; Linux x86_64; rv:25.0) Gecko/20100101 Firefox/25.0'
-
 PROTOCOL = ['http', 'https', 'ftp']


@@ -127,19 +125,6 @@ def parseOptions(options):
    return out


-default_handlers = [crawler.GZIPHandler(), crawler.UAHandler(DEFAULT_UA),
-                    crawler.AutoRefererHandler(), crawler.HTTPEquivHandler(),
-                    crawler.HTTPRefreshHandler()]
-
-def custom_handler(accept, strict=False, delay=DELAY, encoding=None):
-    handlers = default_handlers[:]
-    handlers.append(crawler.EncodingFixHandler(encoding))
-    handlers.append(crawler.ContentNegociationHandler(crawler.MIMETYPE[accept], strict))
-    handlers.append(crawler.SQliteCacheHandler(delay))
-
-    return build_opener(*handlers)
-
-
 def ItemFix(item, feedurl='/'):
    """ Improves feed items (absolute links, resolve feedburner links, etc) """

@@ -267,7 +252,7 @@ def ItemFill(item, options, feedurl='/', fast=False):
        delay = -2

    try:
-        con = custom_handler('html', False, delay, options.encoding).open(link, timeout=TIMEOUT)
+        con = crawler.custom_handler('html', False, delay, options.encoding).open(link, timeout=TIMEOUT)
        data = con.read()

    except (IOError, HTTPException) as e:
@@ -368,7 +353,7 @@ def FeedFetch(url, options):
        delay = 0

    try:
-        con = custom_handler('xml', True, delay, options.encoding).open(url, timeout=TIMEOUT * 2)
+        con = crawler.custom_handler('xml', True, delay, options.encoding).open(url, timeout=TIMEOUT * 2)
        xml = con.read()

    except (HTTPError) as e: