because it is said to be good for SEO. Debug now requires env variable "DEBUG" to be set to something else than "".
67 lines
1.3 KiB
Python
Executable File
67 lines
1.3 KiB
Python
Executable File
#! /usr/bin/env python2.7
|
|
import sys
|
|
import os
|
|
from os.path import expanduser
|
|
from lxml import etree
|
|
import urllib2
|
|
import urllib
|
|
from cookielib import CookieJar
|
|
|
|
def log(txt):
|
|
if os.getenv('DEBUG', False):
|
|
print txt
|
|
|
|
node = sys.argv[1] if len(sys.argv) > 1 else "//h1/.."
|
|
|
|
xml = sys.stdin.read()
|
|
rss = etree.fromstring(xml)
|
|
items = rss.xpath('//item')
|
|
|
|
cache = expanduser("~") + "/.cache/morss"
|
|
if not os.path.exists(cache):
|
|
os.makedirs(cache)
|
|
|
|
for item in items:
|
|
title = item.findtext('title')
|
|
link = item.findtext('link')
|
|
desc = item.xpath('description')[0]
|
|
|
|
log(title)
|
|
log(link)
|
|
|
|
cached = cache + "/" + str(hash(link))
|
|
log(cached)
|
|
|
|
if os.path.exists(cached):
|
|
log("cached")
|
|
desc.text = open(cached, 'r').read()
|
|
else:
|
|
try:
|
|
cj = CookieJar()
|
|
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
|
|
data = opener.open(link).read()
|
|
html = etree.HTML(data)
|
|
match = html.xpath(node)
|
|
|
|
if len(match):
|
|
try:
|
|
text = etree.tostring(match[0])
|
|
log("ok txt")
|
|
except etree.SerialisationError:
|
|
log('serialisation')
|
|
continue
|
|
try:
|
|
desc.text = text
|
|
open(cached, 'w').write(text)
|
|
except ValueError:
|
|
log('xml error')
|
|
else:
|
|
log("no match")
|
|
|
|
except urllib2.HTTPError, error:
|
|
log(error)
|
|
log("http error")
|
|
|
|
if not os.getenv('DEBUG', False):
|
|
print etree.tostring(rss)
|