From 7bd448789d64d99cb1b1c561acf0025cd4c10303 Mon Sep 17 00:00:00 2001 From: pictuga Date: Thu, 26 Feb 2015 00:50:23 +0800 Subject: [PATCH] 2to3: first attempt to fix strings --- morss/crawler.py | 6 +++++- morss/feedify.py | 7 ++++++- morss/feeds.py | 11 ++++++++--- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/morss/crawler.py b/morss/crawler.py index a5d8707..5a6c859 100644 --- a/morss/crawler.py +++ b/morss/crawler.py @@ -14,9 +14,13 @@ except ImportError: from urllib.request import HTTPSHandler, BaseHandler, AbstractHTTPHandler, Request, addinfourl from http.client import HTTPException, HTTPConnection, HTTPS_PORT - import re +try: + basestring +except NameError: + basestring = str + MIMETYPE = { 'xml': ['text/xml', 'application/xml', 'application/rss+xml', 'application/rdf+xml', 'application/atom+xml'], diff --git a/morss/feedify.py b/morss/feedify.py index 0ab83d5..8dc58ec 100644 --- a/morss/feedify.py +++ b/morss/feedify.py @@ -18,6 +18,11 @@ except ImportError: from urllib.parse import urlparse, urljoin from urllib.request import urlopen +try: + basestring +except NameError: + basestring = str + def to_class(query): pattern = r'\[class=([^\]]+)\]' @@ -108,7 +113,7 @@ class Builder(object): self.rule = get_rule(link) if self.rule['mode'] == 'xpath': - if not isinstance(self.data, unicode): + if isinstance(self.data, bytes): self.data = self.data.decode(crawler.detect_encoding(self.data), 'replace') self.doc = lxml.html.fromstring(self.data) elif self.rule['mode'] == 'json': diff --git a/morss/feeds.py b/morss/feeds.py index 449bfcd..4225aaa 100644 --- a/morss/feeds.py +++ b/morss/feeds.py @@ -28,6 +28,11 @@ except ImportError: from io import StringIO from urllib.request import urlopen +try: + basestring +except NameError: + basestring = unicode = str + Element = etree.Element @@ -79,7 +84,7 @@ def parse(data): match = re.search('encoding=["\']?([0-9a-zA-Z-]+)', data[:100]) if match: enc = match.groups()[0].lower() - if not isinstance(data, unicode): + if isinstance(data, bytes): data = data.decode(enc, 'ignore') data = data.encode(enc) @@ -373,8 +378,8 @@ class FeedParser(FeedBase): out = StringIO() c = csv.writer(out, dialect=csv.excel) for item in self.items: - row = [x[1].encode('utf-8') if isinstance(x[1], unicode) else x[1] for x in item if - isinstance(x[1], basestring)] + row = [x[1].encode('utf-8') if isinstance(x[1], unicode) else x[1] for x in item] # str + #isinstance(x[1], basestring)] # bytes or str c.writerow(row) out.seek(0) return out.read()