parent
c3f06da947
commit
cb69e3167f
|
@ -12,10 +12,14 @@ import random
|
||||||
try:
|
try:
|
||||||
# python 2
|
# python 2
|
||||||
from urllib2 import BaseHandler, HTTPCookieProcessor, Request, addinfourl, parse_keqv_list, parse_http_list, build_opener
|
from urllib2 import BaseHandler, HTTPCookieProcessor, Request, addinfourl, parse_keqv_list, parse_http_list, build_opener
|
||||||
|
from urllib import quote
|
||||||
|
from urlparse import urlparse, urlunparse
|
||||||
import mimetools
|
import mimetools
|
||||||
except ImportError:
|
except ImportError:
|
||||||
# python 3
|
# python 3
|
||||||
from urllib.request import BaseHandler, HTTPCookieProcessor, Request, addinfourl, parse_keqv_list, parse_http_list, build_opener
|
from urllib.request import BaseHandler, HTTPCookieProcessor, Request, addinfourl, parse_keqv_list, parse_http_list, build_opener
|
||||||
|
from urllib.parse import quote
|
||||||
|
from urllib.parse import urlparse, urlunparse
|
||||||
import email
|
import email
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -52,6 +56,8 @@ def get(*args, **kwargs):
|
||||||
|
|
||||||
|
|
||||||
def adv_get(url, timeout=None, *args, **kwargs):
|
def adv_get(url, timeout=None, *args, **kwargs):
|
||||||
|
url = encode_url(url)
|
||||||
|
|
||||||
if timeout is None:
|
if timeout is None:
|
||||||
con = custom_handler(*args, **kwargs).open(url)
|
con = custom_handler(*args, **kwargs).open(url)
|
||||||
|
|
||||||
|
@ -95,6 +101,34 @@ def custom_handler(follow=None, delay=None, encoding=None):
|
||||||
return build_opener(*handlers)
|
return build_opener(*handlers)
|
||||||
|
|
||||||
|
|
||||||
|
def is_ascii(string):
|
||||||
|
# there's a native function in py3, but home-made fix for backward compatibility
|
||||||
|
try:
|
||||||
|
string.encode('ascii')
|
||||||
|
|
||||||
|
except UnicodeError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
else:
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def encode_url(url):
|
||||||
|
" Escape non-ascii unicode characters "
|
||||||
|
# https://stackoverflow.com/a/4391299
|
||||||
|
parts = list(urlparse(url))
|
||||||
|
|
||||||
|
for i in range(len(parts)):
|
||||||
|
if not is_ascii(parts[i]):
|
||||||
|
if i == 1:
|
||||||
|
parts[i] = parts[i].encode('idna').decode('ascii')
|
||||||
|
|
||||||
|
else:
|
||||||
|
parts[i] = quote(parts[i].encode('utf-8'))
|
||||||
|
|
||||||
|
return urlunparse(parts)
|
||||||
|
|
||||||
|
|
||||||
class DebugHandler(BaseHandler):
|
class DebugHandler(BaseHandler):
|
||||||
handler_order = 2000
|
handler_order = 2000
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue