From 51fe6ce81b9a66c588ad8d0c7618e5d0f93507ef Mon Sep 17 00:00:00 2001 From: pictuga Date: Mon, 25 Feb 2013 15:50:32 +0100 Subject: [PATCH] First commit --- liferea.css | 20 ++++++++++++++++++++ morss | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ rules | 14 ++++++++++++++ 3 files changed, 85 insertions(+) create mode 100644 liferea.css create mode 100755 morss create mode 100644 rules diff --git a/liferea.css b/liferea.css new file mode 100644 index 0000000..7e26984 --- /dev/null +++ b/liferea.css @@ -0,0 +1,20 @@ +img +{ + max-width: 80%; + height: auto; +} + +.bbx_container, /*TT*/ +.share-help, /*BBC*/ +div.video_container iframe, /*LM*/ +.story-info, .story-share.bluelinks, .story-content img:last-child, .pager /*CI*/ +{ + display: none; +} + +h2.txt15_140, /*LM*/ +h2.chapo /*FranceInfo*/ +{ + font-size: 1em; + font-weight: normal; +} diff --git a/morss b/morss new file mode 100755 index 0000000..e50050f --- /dev/null +++ b/morss @@ -0,0 +1,51 @@ +#! /usr/bin/env python2.7 +import sys +from lxml import etree +import urllib2 + +if len(sys.argv) < 2: + print "argument please" + sys.exit(1) + +debug = (len(sys.argv) == 3) + +node = sys.argv[1] + +xml = sys.stdin.read() +rss = etree.fromstring(xml) +items = rss.xpath('//item') + +#comment below to keep too-long feeds (like BBC) +del items[30:] + +for item in items: + title = item.findtext('title') + link = item.findtext('link') + desc = item.xpath('description')[0] + + if debug: + print title + print link + + try: + data = urllib2.urlopen(link).read() + html = etree.HTML(data) + match = html.xpath(node) + + if len(match): + text = etree.tostring(match[0]) + + if debug: + print text + + desc.text = text + else: + if debug: + print "no match" + + except urllib2.HTTPError, error: + if debug: + print "error" + +if len(sys.argv) == 2: + print etree.tostring(rss) diff --git a/rules b/rules new file mode 100644 index 0000000..212a96b --- /dev/null +++ b/rules @@ -0,0 +1,14 @@ +LeMonde +//*[contains(@class,'article') or contains(@id,'content')] + +TehranTimes +//div[@class='article-indent'] + +BBC +//h1/.. + +FranceInfo +//h2[@class='chapo']/.. + +Courrier International +//div[@class='story-content']