Drop HTTPS SSL certificate verification
Breaks everything with python 3. Now built-in in recent python 2.7.9 and python 3.4-ish
This commit is contained in:
		@@ -5,13 +5,9 @@ from gzip import GzipFile
 | 
			
		||||
from io import BytesIO
 | 
			
		||||
 | 
			
		||||
try:
 | 
			
		||||
    from urllib2 import URLError
 | 
			
		||||
    from urllib2 import HTTPSHandler, BaseHandler, AbstractHTTPHandler, Request, addinfourl
 | 
			
		||||
    from httplib import HTTPException, HTTPConnection, HTTPS_PORT
 | 
			
		||||
    from urllib2 import BaseHandler, addinfourl, parse_keqv_list, parse_http_list
 | 
			
		||||
except ImportError:
 | 
			
		||||
    from urllib.error import URLError
 | 
			
		||||
    from urllib.request import HTTPSHandler, BaseHandler, AbstractHTTPHandler, Request, addinfourl
 | 
			
		||||
    from http.client import HTTPException, HTTPConnection, HTTPS_PORT
 | 
			
		||||
    from urllib.request import BaseHandler, addinfourl, parse_keqv_list, parse_http_list
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
@@ -25,89 +21,6 @@ MIMETYPE = {
 | 
			
		||||
    'xml': ['text/xml', 'application/xml', 'application/rss+xml', 'application/rdf+xml', 'application/atom+xml'],
 | 
			
		||||
    'html': ['text/html', 'application/xhtml+xml', 'application/xml']}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# SSL-related code proudly copy-pasted from https://stackoverflow.com/questions/1087227/validate-ssl-certificates-with-python
 | 
			
		||||
 | 
			
		||||
class InvalidCertificateException(HTTPException, URLError):
 | 
			
		||||
    def __init__(self, host, cert, reason):
 | 
			
		||||
        HTTPException.__init__(self)
 | 
			
		||||
        self.host = host
 | 
			
		||||
        self.cert = cert
 | 
			
		||||
        self.reason = reason
 | 
			
		||||
 | 
			
		||||
    def __str__(self):
 | 
			
		||||
        return ('Host %s returned an invalid certificate (%s) %s\n' %
 | 
			
		||||
                (self.host, self.reason, self.cert))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class CertValidatingHTTPSConnection(HTTPConnection):
 | 
			
		||||
    default_port = HTTPS_PORT
 | 
			
		||||
 | 
			
		||||
    def __init__(self, host, port=None, key_file=None, cert_file=None,
 | 
			
		||||
                             ca_certs=None, strict=None, **kwargs):
 | 
			
		||||
        HTTPConnection.__init__(self, host, port, strict, **kwargs)
 | 
			
		||||
        self.key_file = key_file
 | 
			
		||||
        self.cert_file = cert_file
 | 
			
		||||
        self.ca_certs = ca_certs
 | 
			
		||||
        if self.ca_certs:
 | 
			
		||||
            self.cert_reqs = ssl.CERT_REQUIRED
 | 
			
		||||
        else:
 | 
			
		||||
            self.cert_reqs = ssl.CERT_NONE
 | 
			
		||||
 | 
			
		||||
    def _GetValidHostsForCert(self, cert):
 | 
			
		||||
        if 'subjectAltName' in cert:
 | 
			
		||||
            return [x[1] for x in cert['subjectAltName']
 | 
			
		||||
                         if x[0].lower() == 'dns']
 | 
			
		||||
        else:
 | 
			
		||||
            return [x[0][1] for x in cert['subject']
 | 
			
		||||
                            if x[0][0].lower() == 'commonname']
 | 
			
		||||
 | 
			
		||||
    def _ValidateCertificateHostname(self, cert, hostname):
 | 
			
		||||
        hosts = self._GetValidHostsForCert(cert)
 | 
			
		||||
        for host in hosts:
 | 
			
		||||
            host_re = host.replace('.', '\.').replace('*', '[^.]*')
 | 
			
		||||
            if re.search('^%s$' % (host_re,), hostname, re.I):
 | 
			
		||||
                return True
 | 
			
		||||
        return False
 | 
			
		||||
 | 
			
		||||
    def connect(self):
 | 
			
		||||
        sock = socket.create_connection((self.host, self.port))
 | 
			
		||||
        self.sock = ssl.wrap_socket(sock, keyfile=self.key_file,
 | 
			
		||||
                                          certfile=self.cert_file,
 | 
			
		||||
                                          cert_reqs=self.cert_reqs,
 | 
			
		||||
                                          ca_certs=self.ca_certs)
 | 
			
		||||
        if self.cert_reqs & ssl.CERT_REQUIRED:
 | 
			
		||||
            cert = self.sock.getpeercert()
 | 
			
		||||
            hostname = self.host.split(':', 0)[0]
 | 
			
		||||
            if not self._ValidateCertificateHostname(cert, hostname):
 | 
			
		||||
                raise InvalidCertificateException(hostname, cert,
 | 
			
		||||
                                                  'hostname mismatch')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class VerifiedHTTPSHandler(HTTPSHandler):
 | 
			
		||||
    def __init__(self, **kwargs):
 | 
			
		||||
        AbstractHTTPHandler.__init__(self)
 | 
			
		||||
        self._connection_args = kwargs
 | 
			
		||||
 | 
			
		||||
    def https_open(self, req):
 | 
			
		||||
        def http_class_wrapper(host, **kwargs):
 | 
			
		||||
            full_kwargs = dict(self._connection_args)
 | 
			
		||||
            full_kwargs.update(kwargs)
 | 
			
		||||
            return CertValidatingHTTPSConnection(host, **full_kwargs)
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            return self.do_open(http_class_wrapper, req)
 | 
			
		||||
        except URLError as e:
 | 
			
		||||
            if type(e.reason) == ssl.SSLError and e.reason.args[0] == 1:
 | 
			
		||||
                raise InvalidCertificateException(req.host, '',
 | 
			
		||||
                                                  e.reason.args[1])
 | 
			
		||||
            raise
 | 
			
		||||
 | 
			
		||||
    https_request = HTTPSHandler.do_request_
 | 
			
		||||
 | 
			
		||||
# end of copy-paste code
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class GZIPHandler(BaseHandler):
 | 
			
		||||
    def http_request(self, req):
 | 
			
		||||
        req.add_unredirected_header('Accept-Encoding', 'gzip')
 | 
			
		||||
 
 | 
			
		||||
@@ -232,8 +232,7 @@ class Cache:
 | 
			
		||||
            return self
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
default_handlers = [crawler.VerifiedHTTPSHandler(ca_certs=CA_CERT),
 | 
			
		||||
                    crawler.GZIPHandler(), crawler.UAHandler(DEFAULT_UA),
 | 
			
		||||
default_handlers = [crawler.GZIPHandler(), crawler.UAHandler(DEFAULT_UA),
 | 
			
		||||
                    crawler.AutoRefererHandler(), crawler.MetaRedirectHandler(),
 | 
			
		||||
                    crawler.EncodingFixHandler()]
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user