Default to "//h1/.." since most website use it
because it is said to be good for SEO. Debug now requires env variable "DEBUG" to be set to something else than "".master
parent
253bc27f17
commit
ed8a45875c
14
morss
14
morss
|
@ -7,15 +7,11 @@ import urllib2
|
||||||
import urllib
|
import urllib
|
||||||
from cookielib import CookieJar
|
from cookielib import CookieJar
|
||||||
|
|
||||||
def log(str):
|
def log(txt):
|
||||||
if (len(sys.argv) == 3):
|
if os.getenv('DEBUG', False):
|
||||||
print str
|
print txt
|
||||||
|
|
||||||
if len(sys.argv) < 2:
|
node = sys.argv[1] if len(sys.argv) > 1 else "//h1/.."
|
||||||
print "argument please"
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
node = sys.argv[1]
|
|
||||||
|
|
||||||
xml = sys.stdin.read()
|
xml = sys.stdin.read()
|
||||||
rss = etree.fromstring(xml)
|
rss = etree.fromstring(xml)
|
||||||
|
@ -66,5 +62,5 @@ for item in items:
|
||||||
log(error)
|
log(error)
|
||||||
log("http error")
|
log("http error")
|
||||||
|
|
||||||
if len(sys.argv) == 2:
|
if not os.getenv('DEBUG', False):
|
||||||
print etree.tostring(rss)
|
print etree.tostring(rss)
|
||||||
|
|
12
rules
12
rules
|
@ -1,23 +1,11 @@
|
||||||
LeMonde
|
|
||||||
http://www.lemonde.fr/rss/une.xml
|
|
||||||
//*[contains(@class,'article') or contains(@id,'content')]
|
|
||||||
|
|
||||||
TehranTimes
|
TehranTimes
|
||||||
http://www.tehrantimes.com/component/ninjarsssyndicator/?feed_id=1&format=raw
|
http://www.tehrantimes.com/component/ninjarsssyndicator/?feed_id=1&format=raw
|
||||||
//div[@class='article-indent']
|
//div[@class='article-indent']
|
||||||
|
|
||||||
BBC
|
|
||||||
http://feeds.bbci.co.uk/news/rss.xml
|
|
||||||
//h1/..
|
|
||||||
|
|
||||||
FranceInfo
|
FranceInfo
|
||||||
http://www.franceinfo.fr/rss.xml
|
http://www.franceinfo.fr/rss.xml
|
||||||
//h2[@class='chapo']/..
|
//h2[@class='chapo']/..
|
||||||
|
|
||||||
Courrier International
|
|
||||||
http://www.courrierinternational.com/rss/all/rss.xml
|
|
||||||
//div[@class='story-content']
|
|
||||||
|
|
||||||
Spiegel
|
Spiegel
|
||||||
http://www.spiegel.de/schlagzeilen/tops/index.rss
|
http://www.spiegel.de/schlagzeilen/tops/index.rss
|
||||||
//div[@id='spArticleSection']
|
//div[@id='spArticleSection']
|
||||||
|
|
Loading…
Reference in New Issue