Compare commits
1 Commits
v1.1
...
d94750d25a
Author | SHA1 | Date | |
---|---|---|---|
d94750d25a |
28
README.md
28
README.md
@@ -55,9 +55,7 @@ You do need:
|
||||
|
||||
Simplest way to get these:
|
||||
|
||||
```shell
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
pip install -r requirements.txt
|
||||
|
||||
You may also need:
|
||||
|
||||
@@ -160,12 +158,8 @@ You can change the port and the location of the `www/` folder like this `python
|
||||
|
||||
#### Passing arguments
|
||||
|
||||
Then visit:
|
||||
```
|
||||
http://PATH/TO/MORSS/[main.py/][:argwithoutvalue[:argwithvalue=value[...]]]/FEEDURL
|
||||
```
|
||||
For example: `http://morss.example/:clip/https://twitter.com/pictuga`
|
||||
|
||||
Then visit: **`http://PATH/TO/MORSS/[main.py/][:argwithoutvalue[:argwithvalue=value[...]]]/FEEDURL`**
|
||||
For example: `http://morss.example/:clip/https://twitter.com/pictuga`
|
||||
*(Brackets indicate optional text)*
|
||||
|
||||
The `main.py` part is only needed if your server doesn't support the Apache redirect rule set in the provided `.htaccess`.
|
||||
@@ -174,12 +168,8 @@ Works like a charm with [Tiny Tiny RSS](http://tt-rss.org/redmine/projects/tt-rs
|
||||
|
||||
### As a CLI application
|
||||
|
||||
Run:
|
||||
```
|
||||
python[2.7] -m morss [argwithoutvalue] [argwithvalue=value] [...] FEEDURL
|
||||
```
|
||||
For example: `python -m morss debug http://feeds.bbci.co.uk/news/rss.xml`
|
||||
|
||||
Run: **`python[2.7] -m morss [argwithoutvalue] [argwithvalue=value] [...] FEEDURL`**
|
||||
For example: `python -m morss debug http://feeds.bbci.co.uk/news/rss.xml`
|
||||
*(Brackets indicate optional text)*
|
||||
|
||||
### As a newsreader hook
|
||||
@@ -189,12 +179,8 @@ To use it, the newsreader [Liferea](http://lzone.de/liferea/) is required
|
||||
scripts can be run on top of the RSS feed, using its
|
||||
[output](http://lzone.de/liferea/scraping.htm) as an RSS feed.
|
||||
|
||||
To use this script, you have to enable "(Unix) command" in liferea feed settings, and use the command:
|
||||
```
|
||||
[python[2.7]] PATH/TO/MORSS/main.py [argwithoutvalue] [argwithvalue=value] [...] FEEDURL
|
||||
```
|
||||
For example: `python2.7 PATH/TO/MORSS/main.py http://feeds.bbci.co.uk/news/rss.xml`
|
||||
|
||||
To use this script, you have to enable "(Unix) command" in liferea feed settings, and use the command: **`[python2.7] PATH/TO/MORSS/main.py [argwithoutvalue] [argwithvalue=value] [...] FEEDURL`**
|
||||
For example: `python2.7 PATH/TO/MORSS/main.py http://feeds.bbci.co.uk/news/rss.xml`
|
||||
*(Brackets indicate optional text)*
|
||||
|
||||
### As a python library
|
||||
|
100
morss/feeds.py
100
morss/feeds.py
@@ -21,10 +21,12 @@ json.encoder.c_make_encoder = None
|
||||
try:
|
||||
# python 2
|
||||
from StringIO import StringIO
|
||||
from urllib2 import urlopen
|
||||
from ConfigParser import RawConfigParser
|
||||
except ImportError:
|
||||
# python 3
|
||||
from io import StringIO
|
||||
from urllib.request import urlopen
|
||||
from configparser import RawConfigParser
|
||||
|
||||
try:
|
||||
@@ -162,7 +164,7 @@ class ParserBase(object):
|
||||
return self.convert(FeedHTML).tostring(**k)
|
||||
|
||||
def convert(self, TargetParser):
|
||||
if type(self) == TargetParser:
|
||||
if isinstance(self, TargetParser):
|
||||
return self
|
||||
|
||||
target = TargetParser()
|
||||
@@ -206,11 +208,11 @@ class ParserBase(object):
|
||||
pass
|
||||
|
||||
def rule_remove(self, rule):
|
||||
# remove node from its parent. Returns nothing
|
||||
# remove node from its parent
|
||||
pass
|
||||
|
||||
def rule_set(self, rule, value):
|
||||
# set the value. Returns nothing
|
||||
# value is always a str?
|
||||
pass
|
||||
|
||||
def rule_str(self, rule):
|
||||
@@ -245,30 +247,25 @@ class ParserBase(object):
|
||||
|
||||
return self.rule_search_all(self.rules[rule_name])
|
||||
|
||||
def get(self, rule_name):
|
||||
def get_str(self, rule_name):
|
||||
# simple function to get nice text from the rule name
|
||||
# for use in @property, ie. self.get('title')
|
||||
# for use in @property, ie. self.get_str('title')
|
||||
if rule_name not in self.rules:
|
||||
return None
|
||||
|
||||
return self.rule_str(self.rules[rule_name]) or None
|
||||
return self.rule_str(self.rules[rule_name])
|
||||
|
||||
def set(self, rule_name, value):
|
||||
# simple function to set nice text from the rule name. Returns nothing
|
||||
def set_str(self, rule_name, value):
|
||||
if rule_name not in self.rules:
|
||||
return
|
||||
|
||||
if value is None:
|
||||
self.rmv(rule_name)
|
||||
return
|
||||
return None
|
||||
|
||||
try:
|
||||
self.rule_set(self.rules[rule_name], value)
|
||||
return self.rule_set(self.rules[rule_name], value)
|
||||
|
||||
except AttributeError:
|
||||
# does not exist, have to create it
|
||||
self.rule_create(self.rules[rule_name])
|
||||
self.rule_set(self.rules[rule_name], value)
|
||||
return self.rule_set(self.rules[rule_name], value)
|
||||
|
||||
def rmv(self, rule_name):
|
||||
# easy deleter
|
||||
@@ -294,7 +291,7 @@ class ParserXML(ParserBase):
|
||||
'rssfake': 'http://purl.org/rss/1.0/'}
|
||||
|
||||
def parse(self, raw):
|
||||
parser = etree.XMLParser(recover=True, remove_blank_text=True, remove_pis=True) # remove_blank_text needed for pretty_print
|
||||
parser = etree.XMLParser(recover=True)
|
||||
return etree.fromstring(raw, parser)
|
||||
|
||||
def remove(self):
|
||||
@@ -372,6 +369,10 @@ class ParserXML(ParserBase):
|
||||
match.getparent().append(element)
|
||||
return element
|
||||
|
||||
# try duplicating from template
|
||||
# FIXME
|
||||
# >>> self.xml.getroottree().getpath(ff.find('a'))
|
||||
|
||||
return None
|
||||
|
||||
def rule_remove(self, rule):
|
||||
@@ -431,7 +432,7 @@ class ParserXML(ParserBase):
|
||||
return etree.tostring(match, method='text', encoding='unicode').strip()
|
||||
|
||||
else:
|
||||
return match # might be None is no match
|
||||
return match or ""
|
||||
|
||||
|
||||
class ParserHTML(ParserXML):
|
||||
@@ -440,8 +441,7 @@ class ParserHTML(ParserXML):
|
||||
mimetype = ['text/html', 'application/xhtml+xml']
|
||||
|
||||
def parse(self, raw):
|
||||
parser = etree.HTMLParser(remove_blank_text=True) # remove_blank_text needed for pretty_print
|
||||
return etree.fromstring(raw, parser)
|
||||
return lxml.html.fromstring(raw)
|
||||
|
||||
def tostring(self, encoding='unicode', **k):
|
||||
return lxml.html.tostring(self.root, encoding=encoding, **k)
|
||||
@@ -467,12 +467,11 @@ class ParserHTML(ParserXML):
|
||||
element = deepcopy(match)
|
||||
match.getparent().append(element)
|
||||
|
||||
# TODO def rule_set for the html part
|
||||
|
||||
|
||||
def parse_time(value):
|
||||
if value is None or value == 0:
|
||||
return None
|
||||
|
||||
elif isinstance(value, basestring):
|
||||
if isinstance(value, basestring):
|
||||
if re.match(r'^[0-9]+$', value):
|
||||
return datetime.fromtimestamp(int(value), tz.UTC)
|
||||
|
||||
@@ -484,9 +483,8 @@ def parse_time(value):
|
||||
|
||||
elif isinstance(value, datetime):
|
||||
return value
|
||||
|
||||
else:
|
||||
return None
|
||||
return False
|
||||
|
||||
|
||||
class ParserJSON(ParserBase):
|
||||
@@ -498,9 +496,8 @@ class ParserJSON(ParserBase):
|
||||
return json.loads(raw)
|
||||
|
||||
def remove(self):
|
||||
# impossible to "delete" oneself per se but can clear all its items
|
||||
for attr in self.root:
|
||||
del self.root[attr]
|
||||
# delete oneself FIXME
|
||||
pass
|
||||
|
||||
def tostring(self, encoding='unicode', **k):
|
||||
dump = json.dumps(self.root, ensure_ascii=False, **k) # ensure_ascii = False to have proper (unicode) string and not \u00
|
||||
@@ -560,15 +557,10 @@ class ParserJSON(ParserBase):
|
||||
rrule = self._rule_parse(rule)
|
||||
cur = self.root
|
||||
|
||||
try:
|
||||
for node in rrule[:-1]:
|
||||
cur = cur[node]
|
||||
for node in rrule[:-1]:
|
||||
cur = cur[node]
|
||||
|
||||
del cur[rrule[-1]]
|
||||
|
||||
except KeyError:
|
||||
# nothing to delete
|
||||
pass
|
||||
del cur[rrule[-1]]
|
||||
|
||||
def rule_set(self, rule, value):
|
||||
if '[]' in rule:
|
||||
@@ -616,12 +608,12 @@ class Feed(object):
|
||||
return [itemsClass(x, self.rules, self) for x in items]
|
||||
|
||||
title = property(
|
||||
lambda f: f.get('title'),
|
||||
lambda f,x: f.set('title', x),
|
||||
lambda f: f.get_str('title'),
|
||||
lambda f,x: f.set_str('title', x),
|
||||
lambda f: f.rmv('title') )
|
||||
description = desc = property(
|
||||
lambda f: f.get('desc'),
|
||||
lambda f,x: f.set('desc', x),
|
||||
lambda f: f.get_str('desc'),
|
||||
lambda f,x: f.set_str('desc', x),
|
||||
lambda f: f.rmv('desc') )
|
||||
items = property(
|
||||
lambda f: f )
|
||||
@@ -668,28 +660,28 @@ class Item(Uniq):
|
||||
return id(xml)
|
||||
|
||||
title = property(
|
||||
lambda f: f.get('item_title'),
|
||||
lambda f,x: f.set('item_title', x),
|
||||
lambda f: f.get_str('item_title'),
|
||||
lambda f,x: f.set_str('item_title', x),
|
||||
lambda f: f.rmv('item_title') )
|
||||
link = property(
|
||||
lambda f: f.get('item_link'),
|
||||
lambda f,x: f.set('item_link', x),
|
||||
lambda f: f.get_str('item_link'),
|
||||
lambda f,x: f.set_str('item_link', x),
|
||||
lambda f: f.rmv('item_link') )
|
||||
description = desc = property(
|
||||
lambda f: f.get('item_desc'),
|
||||
lambda f,x: f.set('item_desc', x),
|
||||
lambda f: f.get_str('item_desc'),
|
||||
lambda f,x: f.set_str('item_desc', x),
|
||||
lambda f: f.rmv('item_desc') )
|
||||
content = property(
|
||||
lambda f: f.get('item_content'),
|
||||
lambda f,x: f.set('item_content', x),
|
||||
lambda f: f.get_str('item_content'),
|
||||
lambda f,x: f.set_str('item_content', x),
|
||||
lambda f: f.rmv('item_content') )
|
||||
time = property(
|
||||
lambda f: f.time_prs(f.get('item_time')),
|
||||
lambda f,x: f.set('item_time', f.time_fmt(x)),
|
||||
lambda f: f.time_prs(f.get_str('item_time')),
|
||||
lambda f,x: f.set_str('item_time', f.time_fmt(x)),
|
||||
lambda f: f.rmv('item_time') )
|
||||
updated = property(
|
||||
lambda f: f.time_prs(f.get('item_updated')),
|
||||
lambda f,x: f.set('item_updated', f.time_fmt(x)),
|
||||
lambda f: f.time_prs(f.get_str('item_updated')),
|
||||
lambda f,x: f.set_str('item_updated', f.time_fmt(x)),
|
||||
lambda f: f.rmv('item_updated') )
|
||||
|
||||
|
||||
@@ -698,10 +690,6 @@ class FeedXML(Feed, ParserXML):
|
||||
|
||||
def tostring(self, encoding='unicode', **k):
|
||||
# override needed due to "getroottree" inclusion
|
||||
|
||||
if self.root.getprevious() is None:
|
||||
self.root.addprevious(etree.PI('xml-stylesheet', 'type="text/xsl" href="/sheet.xsl"'))
|
||||
|
||||
return etree.tostring(self.root.getroottree(), encoding=encoding, **k)
|
||||
|
||||
|
||||
|
@@ -204,7 +204,7 @@ def ItemFill(item, options, feedurl='/', fast=False):
|
||||
|
||||
# twitter
|
||||
if urlparse(feedurl).netloc == 'twitter.com':
|
||||
match = lxml.html.fromstring(item.desc).xpath('//a/@data-expanded-url')
|
||||
match = lxml.html.fromstring(item.content).xpath('//a/@data-expanded-url')
|
||||
if len(match):
|
||||
link = match[0]
|
||||
log(link)
|
||||
@@ -341,8 +341,6 @@ def FeedFetch(url, options):
|
||||
else:
|
||||
try:
|
||||
rss = feeds.parse(xml, url, contenttype)
|
||||
rss = rss.convert(feeds.FeedXML)
|
||||
# contains all fields, otherwise much-needed data can be lost
|
||||
|
||||
except TypeError:
|
||||
log('random page')
|
||||
@@ -437,10 +435,8 @@ def FeedFormat(rss, options):
|
||||
if options.callback:
|
||||
if re.match(r'^[a-zA-Z0-9\.]+$', options.callback) is not None:
|
||||
return '%s(%s)' % (options.callback, rss.tojson())
|
||||
|
||||
else:
|
||||
raise MorssException('Invalid callback var name')
|
||||
|
||||
elif options.json:
|
||||
if options.indent:
|
||||
return rss.tojson(encoding='UTF-8', indent=4)
|
||||
@@ -452,11 +448,7 @@ def FeedFormat(rss, options):
|
||||
return rss.tocsv(encoding='UTF-8')
|
||||
|
||||
elif options.reader:
|
||||
if options.indent:
|
||||
return rss.tohtml(encoding='UTF-8', pretty_print=True)
|
||||
|
||||
else:
|
||||
return rss.tohtml(encoding='UTF-8')
|
||||
return rss.tohtml(encoding='UTF-8')
|
||||
|
||||
else:
|
||||
if options.indent:
|
||||
|
@@ -2,7 +2,7 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>morss</title>
|
||||
<meta name="viewport" content="width=device-width; initial-scale=1.0; maximum-scale=1.0;" />
|
||||
<meta name="viewport" content="width=device-width; initial-scale=1.0; maximum-scale=1.0;">
|
||||
<meta charset="UTF-8" />
|
||||
<style type="text/css">
|
||||
body
|
||||
|
122
www/sheet.xsl
122
www/sheet.xsl
@@ -1,122 +0,0 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<xsl:stylesheet version="1.1" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
|
||||
|
||||
<xsl:output method="html"/>
|
||||
|
||||
<xsl:template match="/">
|
||||
<html>
|
||||
<head>
|
||||
<title>RSS feed by morss</title>
|
||||
<meta name="viewport" content="width=device-width; initial-scale=1.0; maximum-scale=1.0;" />
|
||||
|
||||
<style type="text/css">
|
||||
body {
|
||||
overflow-wrap: anywhere;
|
||||
word-wrap: anywhere;
|
||||
}
|
||||
|
||||
#url {
|
||||
background-color: rgba(255, 165, 0, 0.25);
|
||||
padding: 1% 5%;
|
||||
display: inline-block;
|
||||
max-width: 100%;
|
||||
}
|
||||
|
||||
body > ul {
|
||||
background-color: #FFFAF4;
|
||||
padding: 1%;
|
||||
max-width: 100%;
|
||||
}
|
||||
|
||||
ul {
|
||||
list-style-type: none;
|
||||
}
|
||||
|
||||
.tag {
|
||||
color: darkred;
|
||||
}
|
||||
|
||||
.attr {
|
||||
color: darksalmon;
|
||||
}
|
||||
|
||||
.value {
|
||||
color: darkblue;
|
||||
}
|
||||
|
||||
.comment {
|
||||
color: lightgrey;
|
||||
}
|
||||
|
||||
pre {
|
||||
margin: 0;
|
||||
max-width: 100%;
|
||||
white-space: normal;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<h1>RSS feed by morss</h1>
|
||||
|
||||
<p>Your RSS feed is <strong style="color: green">ready</strong>. You
|
||||
can enter the following url in your newsreader:</p>
|
||||
|
||||
<div id="url"></div>
|
||||
|
||||
<ul>
|
||||
<xsl:apply-templates/>
|
||||
</ul>
|
||||
|
||||
<script>
|
||||
document.getElementById("url").innerHTML = window.location.href;
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="*">
|
||||
<li>
|
||||
<span class="element">
|
||||
<
|
||||
<span class="tag"><xsl:value-of select="name()"/></span>
|
||||
|
||||
<xsl:for-each select="@*">
|
||||
<span class="attr"> <xsl:value-of select="name()"/></span>
|
||||
=
|
||||
"<span class="value"><xsl:value-of select="."/></span>"
|
||||
</xsl:for-each>
|
||||
>
|
||||
</span>
|
||||
|
||||
<xsl:if test="node()">
|
||||
<ul>
|
||||
<xsl:apply-templates/>
|
||||
</ul>
|
||||
</xsl:if>
|
||||
|
||||
<span class="element">
|
||||
</
|
||||
<span class="tag"><xsl:value-of select="name()"/></span>
|
||||
>
|
||||
</span>
|
||||
</li>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="comment()">
|
||||
<li>
|
||||
<pre class="comment"><![CDATA[<!--]]><xsl:value-of select="."/><![CDATA[-->]]></pre>
|
||||
</li>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="text()">
|
||||
<li>
|
||||
<pre>
|
||||
<xsl:value-of select="normalize-space(.)"/>
|
||||
</pre>
|
||||
</li>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="text()[not(normalize-space())]"/>
|
||||
|
||||
</xsl:stylesheet>
|
Reference in New Issue
Block a user