diff --git a/morss.py b/morss.py index e235495..c7a95b0 100644 --- a/morss.py +++ b/morss.py @@ -3,6 +3,7 @@ import sys import os from os.path import expanduser from lxml import etree +import re import string import urllib2 from cookielib import CookieJar @@ -37,11 +38,20 @@ class Info: self.opener = False self.enc = False - self.link = self.item.findtext('link') + self.link = self.item.xpath('link')[0] self.desc = self.item.xpath('description')[0] + def checkURL(self): + if self.link.text.startswith("http://rss.feedsportal.com"): + log('feedsportal') + url = re.search('/([0-9a-zA-Z]+)/[a-zA-Z0-9\.]+$', self.link.text).groups()[0].split('0') + t = {'A':'0', 'B':'.', 'C':'/', 'D':'?', 'E':'-', 'L':'ww', 'S':'w.'} + self.link.text = 'http://' + "".join([(t[s[0]] if s[0] in t else "=") + s[1:] for s in url[1:]]) + log(self.link.text) + def fetch(self): - log(self.link) + log(self.link.text) + self.checkURL() if not self.findCache(): self.download() self.chardet() @@ -64,7 +74,7 @@ class Info: def findCache(self): if self.feed.cache is not False: - xpath = "//link[text()='" + self.link + "']/../description/text()" + xpath = "//link[text()='" + self.link.text + "']/../description/text()" match = self.feed.cache.xpath(xpath) if len(match): log('cached') @@ -87,7 +97,7 @@ class Info: try: cj = CookieJar() self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) - self.con = self.opener.open(self.link.encode('utf-8')) + self.con = self.opener.open(self.link.text.encode('utf-8')) self.data = self.con.read() except (urllib2.HTTPError, urllib2.URLError) as error: log(error)