2to3: using unicode/str to please py3
parent
cbeb01e555
commit
656b29e0ef
|
@ -2,14 +2,13 @@ import ssl
|
||||||
import socket
|
import socket
|
||||||
|
|
||||||
from gzip import GzipFile
|
from gzip import GzipFile
|
||||||
|
from io import BytesIO
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from StringIO import StringIO
|
|
||||||
from urllib2 import URLError
|
from urllib2 import URLError
|
||||||
from urllib2 import HTTPSHandler, BaseHandler, AbstractHTTPHandler, Request, addinfourl
|
from urllib2 import HTTPSHandler, BaseHandler, AbstractHTTPHandler, Request, addinfourl
|
||||||
from httplib import HTTPException, HTTPConnection, HTTPS_PORT
|
from httplib import HTTPException, HTTPConnection, HTTPS_PORT
|
||||||
except ImportError:
|
except ImportError:
|
||||||
from io import StringIO
|
|
||||||
from urllib.error import URLError
|
from urllib.error import URLError
|
||||||
from urllib.request import HTTPSHandler, BaseHandler, AbstractHTTPHandler, Request, addinfourl
|
from urllib.request import HTTPSHandler, BaseHandler, AbstractHTTPHandler, Request, addinfourl
|
||||||
from http.client import HTTPException, HTTPConnection, HTTPS_PORT
|
from http.client import HTTPException, HTTPConnection, HTTPS_PORT
|
||||||
|
@ -118,9 +117,9 @@ class GZIPHandler(BaseHandler):
|
||||||
if 200 <= resp.code < 300:
|
if 200 <= resp.code < 300:
|
||||||
if resp.headers.get('Content-Encoding') == 'gzip':
|
if resp.headers.get('Content-Encoding') == 'gzip':
|
||||||
data = resp.read()
|
data = resp.read()
|
||||||
data = GzipFile(fileobj=StringIO(data), mode='r').read()
|
data = GzipFile(fileobj=BytesIO(data), mode='r').read()
|
||||||
|
|
||||||
fp = StringIO(data)
|
fp = BytesIO(data)
|
||||||
old_resp = resp
|
old_resp = resp
|
||||||
resp = addinfourl(fp, old_resp.headers, old_resp.url, old_resp.code)
|
resp = addinfourl(fp, old_resp.headers, old_resp.url, old_resp.code)
|
||||||
resp.msg = old_resp.msg
|
resp.msg = old_resp.msg
|
||||||
|
@ -135,13 +134,13 @@ def detect_encoding(data, con=None):
|
||||||
if con is not None and con.info().get('charset'):
|
if con is not None and con.info().get('charset'):
|
||||||
return con.info().get('charset')
|
return con.info().get('charset')
|
||||||
|
|
||||||
match = re.search('charset=["\']?([0-9a-zA-Z-]+)', data[:1000])
|
match = re.search(b'charset=["\']?([0-9a-zA-Z-]+)', data[:1000])
|
||||||
if match:
|
if match:
|
||||||
return match.groups()[0]
|
return match.groups()[0].lower().decode()
|
||||||
|
|
||||||
match = re.search('encoding=["\']?([0-9a-zA-Z-]+)', data[:100])
|
match = re.search(b'encoding=["\']?([0-9a-zA-Z-]+)', data[:100])
|
||||||
if match:
|
if match:
|
||||||
return match.groups()[0].lower()
|
return match.groups()[0].lower().decode()
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
@ -157,7 +156,7 @@ class EncodingFixHandler(BaseHandler):
|
||||||
data = data.decode(enc, 'replace')
|
data = data.decode(enc, 'replace')
|
||||||
data = data.encode(enc)
|
data = data.encode(enc)
|
||||||
|
|
||||||
fp = StringIO(data)
|
fp = BytesIO(data)
|
||||||
old_resp = resp
|
old_resp = resp
|
||||||
resp = addinfourl(fp, old_resp.headers, old_resp.url, old_resp.code)
|
resp = addinfourl(fp, old_resp.headers, old_resp.url, old_resp.code)
|
||||||
resp.msg = old_resp.msg
|
resp.msg = old_resp.msg
|
||||||
|
@ -231,7 +230,7 @@ class MetaRedirectHandler(BaseHandler):
|
||||||
if 200 <= resp.code < 300 and contenttype.startswith('text/'):
|
if 200 <= resp.code < 300 and contenttype.startswith('text/'):
|
||||||
if contenttype in MIMETYPE['html']:
|
if contenttype in MIMETYPE['html']:
|
||||||
data = resp.read()
|
data = resp.read()
|
||||||
match = re.search(r'(?i)<meta http-equiv=.refresh[^>]*?url=(http.*?)["\']', data)
|
match = re.search(b'(?i)<meta http-equiv=.refresh[^>]*?url=(http.*?)["\']', data)
|
||||||
if match:
|
if match:
|
||||||
new_url = match.groups()[0]
|
new_url = match.groups()[0]
|
||||||
new_headers = dict((k, v) for k, v in list(req.headers.items())
|
new_headers = dict((k, v) for k, v in list(req.headers.items())
|
||||||
|
@ -243,7 +242,7 @@ class MetaRedirectHandler(BaseHandler):
|
||||||
|
|
||||||
return self.parent.open(new, timeout=req.timeout)
|
return self.parent.open(new, timeout=req.timeout)
|
||||||
else:
|
else:
|
||||||
fp = StringIO(data)
|
fp = BytesIO(data)
|
||||||
old_resp = resp
|
old_resp = resp
|
||||||
resp = addinfourl(fp, old_resp.headers, old_resp.url, old_resp.code)
|
resp = addinfourl(fp, old_resp.headers, old_resp.url, old_resp.code)
|
||||||
resp.msg = old_resp.msg
|
resp.msg = old_resp.msg
|
||||||
|
@ -273,7 +272,7 @@ class EtagHandler(BaseHandler):
|
||||||
headers.addheader('etag', self.etag)
|
headers.addheader('etag', self.etag)
|
||||||
if self.lastmodified:
|
if self.lastmodified:
|
||||||
headers.addheader('last-modified', self.lastmodified)
|
headers.addheader('last-modified', self.lastmodified)
|
||||||
resp = addinfourl(StringIO(self.cache), headers, req.get_full_url(), 200)
|
resp = addinfourl(BytesIO(self.cache), headers, req.get_full_url(), 200)
|
||||||
return resp
|
return resp
|
||||||
|
|
||||||
https_request = http_request
|
https_request = http_request
|
||||||
|
|
|
@ -88,9 +88,9 @@ class FeedException(Exception):
|
||||||
|
|
||||||
def parse(data):
|
def parse(data):
|
||||||
# encoding
|
# encoding
|
||||||
match = re.search('encoding=["\']?([0-9a-zA-Z-]+)', data[:100])
|
match = re.search(b'encoding=["\']?([0-9a-zA-Z-]+)', data[:100])
|
||||||
if match:
|
if match:
|
||||||
enc = match.groups()[0].lower()
|
enc = match.groups()[0].lower().decode()
|
||||||
if isinstance(data, bytes):
|
if isinstance(data, bytes):
|
||||||
data = data.decode(enc, 'ignore')
|
data = data.decode(enc, 'ignore')
|
||||||
data = data.encode(enc)
|
data = data.encode(enc)
|
||||||
|
|
|
@ -182,6 +182,9 @@ class Cache:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def set(self, key, content):
|
def set(self, key, content):
|
||||||
|
if sys.version > '3' and isinstance(content, bytes):
|
||||||
|
content = content.decode('utf-8')
|
||||||
|
|
||||||
self._cache[key] = {'last': time.time(), 'value': content}
|
self._cache[key] = {'last': time.time(), 'value': content}
|
||||||
|
|
||||||
__getitem__ = get
|
__getitem__ = get
|
||||||
|
@ -383,8 +386,7 @@ def Fill(item, cache, options, feedurl='/', fast=False):
|
||||||
|
|
||||||
# download
|
# download
|
||||||
try:
|
try:
|
||||||
url = link.encode('utf-8')
|
con = build_opener(*accept_handler(('html', 'text/*'), True)).open(link, timeout=TIMEOUT)
|
||||||
con = build_opener(*accept_handler(('html', 'text/*'), True)).open(url, timeout=TIMEOUT)
|
|
||||||
data = con.read()
|
data = con.read()
|
||||||
except (IOError, HTTPException) as e:
|
except (IOError, HTTPException) as e:
|
||||||
log('http error: %s' % e.message)
|
log('http error: %s' % e.message)
|
||||||
|
@ -423,6 +425,9 @@ def Init(url, cache_path, options):
|
||||||
|
|
||||||
url = url.replace(' ', '%20')
|
url = url.replace(' ', '%20')
|
||||||
|
|
||||||
|
if isinstance(url, bytes):
|
||||||
|
url = url.decode()
|
||||||
|
|
||||||
# cache
|
# cache
|
||||||
cache = Cache(cache_path, url)
|
cache = Cache(cache_path, url)
|
||||||
log(cache._hash)
|
log(cache._hash)
|
||||||
|
@ -464,7 +469,7 @@ def Fetch(url, cache, options):
|
||||||
|
|
||||||
if url.startswith('https://itunes.apple.com/lookup?id='):
|
if url.startswith('https://itunes.apple.com/lookup?id='):
|
||||||
style = 'itunes'
|
style = 'itunes'
|
||||||
elif xml.startswith('<?xml') or contenttype in MIMETYPE['xml']:
|
elif xml.startswith(b'<?xml') or contenttype in MIMETYPE['xml']:
|
||||||
style = 'normal'
|
style = 'normal'
|
||||||
elif feedify.supported(url):
|
elif feedify.supported(url):
|
||||||
style = 'feedify'
|
style = 'feedify'
|
||||||
|
|
Loading…
Reference in New Issue