crawler: replace ContentNegoHandler with AlternateHandler

More basic. Sends the same headers no matter what. Make requests more "replicable".
Also, drop "text/xml" from RSS contenttype, too broad, matches garbage
This commit is contained in:
2020-04-05 16:05:59 +02:00
parent 8a4d68d72c
commit d20f6237bd
2 changed files with 12 additions and 28 deletions

View File

@@ -252,7 +252,7 @@ def ItemFill(item, options, feedurl='/', fast=False):
delay = -2
try:
con = crawler.custom_handler('html', False, delay, options.encoding).open(link, timeout=TIMEOUT)
con = crawler.custom_handler(delay=delay, encoding=options.encoding).open(link, timeout=TIMEOUT)
data = con.read()
except (IOError, HTTPException) as e:
@@ -335,8 +335,7 @@ def FeedFetch(url, options):
delay = 0
try:
con = crawler.custom_handler(accept='xml', strict=True, delay=delay,
encoding=options.encoding) \
con = crawler.custom_handler(follow='rss', delay=delay, encoding=options.encoding) \
.open(url, timeout=TIMEOUT * 2)
xml = con.read()