Compare commits
8 Commits
5865af64f9
...
d24734110a
Author | SHA1 | Date |
---|---|---|
pictuga | d24734110a | |
pictuga | a41c2a3a62 | |
pictuga | dd2651061f | |
pictuga | 912c323c40 | |
pictuga | 5705a0be17 | |
pictuga | 4735ffba45 | |
pictuga | 08e39f5631 | |
pictuga | 765a43511e |
|
@ -21,12 +21,10 @@ json.encoder.c_make_encoder = None
|
||||||
try:
|
try:
|
||||||
# python 2
|
# python 2
|
||||||
from StringIO import StringIO
|
from StringIO import StringIO
|
||||||
from urllib2 import urlopen
|
|
||||||
from ConfigParser import RawConfigParser
|
from ConfigParser import RawConfigParser
|
||||||
except ImportError:
|
except ImportError:
|
||||||
# python 3
|
# python 3
|
||||||
from io import StringIO
|
from io import StringIO
|
||||||
from urllib.request import urlopen
|
|
||||||
from configparser import RawConfigParser
|
from configparser import RawConfigParser
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -164,7 +162,7 @@ class ParserBase(object):
|
||||||
return self.convert(FeedHTML).tostring(**k)
|
return self.convert(FeedHTML).tostring(**k)
|
||||||
|
|
||||||
def convert(self, TargetParser):
|
def convert(self, TargetParser):
|
||||||
if isinstance(self, TargetParser):
|
if type(self) == TargetParser:
|
||||||
return self
|
return self
|
||||||
|
|
||||||
target = TargetParser()
|
target = TargetParser()
|
||||||
|
@ -208,11 +206,11 @@ class ParserBase(object):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def rule_remove(self, rule):
|
def rule_remove(self, rule):
|
||||||
# remove node from its parent
|
# remove node from its parent. Returns nothing
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def rule_set(self, rule, value):
|
def rule_set(self, rule, value):
|
||||||
# value is always a str?
|
# set the value. Returns nothing
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def rule_str(self, rule):
|
def rule_str(self, rule):
|
||||||
|
@ -247,25 +245,30 @@ class ParserBase(object):
|
||||||
|
|
||||||
return self.rule_search_all(self.rules[rule_name])
|
return self.rule_search_all(self.rules[rule_name])
|
||||||
|
|
||||||
def get_str(self, rule_name):
|
def get(self, rule_name):
|
||||||
# simple function to get nice text from the rule name
|
# simple function to get nice text from the rule name
|
||||||
# for use in @property, ie. self.get_str('title')
|
# for use in @property, ie. self.get('title')
|
||||||
if rule_name not in self.rules:
|
if rule_name not in self.rules:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
return self.rule_str(self.rules[rule_name])
|
return self.rule_str(self.rules[rule_name]) or None
|
||||||
|
|
||||||
def set_str(self, rule_name, value):
|
def set(self, rule_name, value):
|
||||||
|
# simple function to set nice text from the rule name. Returns nothing
|
||||||
if rule_name not in self.rules:
|
if rule_name not in self.rules:
|
||||||
return None
|
return
|
||||||
|
|
||||||
|
if value is None:
|
||||||
|
self.rmv(rule_name)
|
||||||
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return self.rule_set(self.rules[rule_name], value)
|
self.rule_set(self.rules[rule_name], value)
|
||||||
|
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
# does not exist, have to create it
|
# does not exist, have to create it
|
||||||
self.rule_create(self.rules[rule_name])
|
self.rule_create(self.rules[rule_name])
|
||||||
return self.rule_set(self.rules[rule_name], value)
|
self.rule_set(self.rules[rule_name], value)
|
||||||
|
|
||||||
def rmv(self, rule_name):
|
def rmv(self, rule_name):
|
||||||
# easy deleter
|
# easy deleter
|
||||||
|
@ -369,10 +372,6 @@ class ParserXML(ParserBase):
|
||||||
match.getparent().append(element)
|
match.getparent().append(element)
|
||||||
return element
|
return element
|
||||||
|
|
||||||
# try duplicating from template
|
|
||||||
# FIXME
|
|
||||||
# >>> self.xml.getroottree().getpath(ff.find('a'))
|
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def rule_remove(self, rule):
|
def rule_remove(self, rule):
|
||||||
|
@ -432,7 +431,7 @@ class ParserXML(ParserBase):
|
||||||
return etree.tostring(match, method='text', encoding='unicode').strip()
|
return etree.tostring(match, method='text', encoding='unicode').strip()
|
||||||
|
|
||||||
else:
|
else:
|
||||||
return match or ""
|
return match # might be None is no match
|
||||||
|
|
||||||
|
|
||||||
class ParserHTML(ParserXML):
|
class ParserHTML(ParserXML):
|
||||||
|
@ -468,8 +467,6 @@ class ParserHTML(ParserXML):
|
||||||
element = deepcopy(match)
|
element = deepcopy(match)
|
||||||
match.getparent().append(element)
|
match.getparent().append(element)
|
||||||
|
|
||||||
# TODO def rule_set for the html part
|
|
||||||
|
|
||||||
|
|
||||||
def parse_time(value):
|
def parse_time(value):
|
||||||
if isinstance(value, basestring):
|
if isinstance(value, basestring):
|
||||||
|
@ -484,6 +481,7 @@ def parse_time(value):
|
||||||
|
|
||||||
elif isinstance(value, datetime):
|
elif isinstance(value, datetime):
|
||||||
return value
|
return value
|
||||||
|
|
||||||
else:
|
else:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
@ -497,8 +495,9 @@ class ParserJSON(ParserBase):
|
||||||
return json.loads(raw)
|
return json.loads(raw)
|
||||||
|
|
||||||
def remove(self):
|
def remove(self):
|
||||||
# delete oneself FIXME
|
# impossible to "delete" oneself per se but can clear all its items
|
||||||
pass
|
for attr in self.root:
|
||||||
|
del self.root[attr]
|
||||||
|
|
||||||
def tostring(self, encoding='unicode', **k):
|
def tostring(self, encoding='unicode', **k):
|
||||||
dump = json.dumps(self.root, ensure_ascii=False, **k) # ensure_ascii = False to have proper (unicode) string and not \u00
|
dump = json.dumps(self.root, ensure_ascii=False, **k) # ensure_ascii = False to have proper (unicode) string and not \u00
|
||||||
|
@ -558,11 +557,16 @@ class ParserJSON(ParserBase):
|
||||||
rrule = self._rule_parse(rule)
|
rrule = self._rule_parse(rule)
|
||||||
cur = self.root
|
cur = self.root
|
||||||
|
|
||||||
|
try:
|
||||||
for node in rrule[:-1]:
|
for node in rrule[:-1]:
|
||||||
cur = cur[node]
|
cur = cur[node]
|
||||||
|
|
||||||
del cur[rrule[-1]]
|
del cur[rrule[-1]]
|
||||||
|
|
||||||
|
except KeyError:
|
||||||
|
# nothing to delete
|
||||||
|
pass
|
||||||
|
|
||||||
def rule_set(self, rule, value):
|
def rule_set(self, rule, value):
|
||||||
if '[]' in rule:
|
if '[]' in rule:
|
||||||
raise ValueError('not supported') # FIXME
|
raise ValueError('not supported') # FIXME
|
||||||
|
@ -609,12 +613,12 @@ class Feed(object):
|
||||||
return [itemsClass(x, self.rules, self) for x in items]
|
return [itemsClass(x, self.rules, self) for x in items]
|
||||||
|
|
||||||
title = property(
|
title = property(
|
||||||
lambda f: f.get_str('title'),
|
lambda f: f.get('title'),
|
||||||
lambda f,x: f.set_str('title', x),
|
lambda f,x: f.set('title', x),
|
||||||
lambda f: f.rmv('title') )
|
lambda f: f.rmv('title') )
|
||||||
description = desc = property(
|
description = desc = property(
|
||||||
lambda f: f.get_str('desc'),
|
lambda f: f.get('desc'),
|
||||||
lambda f,x: f.set_str('desc', x),
|
lambda f,x: f.set('desc', x),
|
||||||
lambda f: f.rmv('desc') )
|
lambda f: f.rmv('desc') )
|
||||||
items = property(
|
items = property(
|
||||||
lambda f: f )
|
lambda f: f )
|
||||||
|
@ -661,28 +665,28 @@ class Item(Uniq):
|
||||||
return id(xml)
|
return id(xml)
|
||||||
|
|
||||||
title = property(
|
title = property(
|
||||||
lambda f: f.get_str('item_title'),
|
lambda f: f.get('item_title'),
|
||||||
lambda f,x: f.set_str('item_title', x),
|
lambda f,x: f.set('item_title', x),
|
||||||
lambda f: f.rmv('item_title') )
|
lambda f: f.rmv('item_title') )
|
||||||
link = property(
|
link = property(
|
||||||
lambda f: f.get_str('item_link'),
|
lambda f: f.get('item_link'),
|
||||||
lambda f,x: f.set_str('item_link', x),
|
lambda f,x: f.set('item_link', x),
|
||||||
lambda f: f.rmv('item_link') )
|
lambda f: f.rmv('item_link') )
|
||||||
description = desc = property(
|
description = desc = property(
|
||||||
lambda f: f.get_str('item_desc'),
|
lambda f: f.get('item_desc'),
|
||||||
lambda f,x: f.set_str('item_desc', x),
|
lambda f,x: f.set('item_desc', x),
|
||||||
lambda f: f.rmv('item_desc') )
|
lambda f: f.rmv('item_desc') )
|
||||||
content = property(
|
content = property(
|
||||||
lambda f: f.get_str('item_content'),
|
lambda f: f.get('item_content'),
|
||||||
lambda f,x: f.set_str('item_content', x),
|
lambda f,x: f.set('item_content', x),
|
||||||
lambda f: f.rmv('item_content') )
|
lambda f: f.rmv('item_content') )
|
||||||
time = property(
|
time = property(
|
||||||
lambda f: f.time_prs(f.get_str('item_time')),
|
lambda f: f.time_prs(f.get('item_time')),
|
||||||
lambda f,x: f.set_str('item_time', f.time_fmt(x)),
|
lambda f,x: f.set('item_time', f.time_fmt(x)),
|
||||||
lambda f: f.rmv('item_time') )
|
lambda f: f.rmv('item_time') )
|
||||||
updated = property(
|
updated = property(
|
||||||
lambda f: f.time_prs(f.get_str('item_updated')),
|
lambda f: f.time_prs(f.get('item_updated')),
|
||||||
lambda f,x: f.set_str('item_updated', f.time_fmt(x)),
|
lambda f,x: f.set('item_updated', f.time_fmt(x)),
|
||||||
lambda f: f.rmv('item_updated') )
|
lambda f: f.rmv('item_updated') )
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -204,7 +204,7 @@ def ItemFill(item, options, feedurl='/', fast=False):
|
||||||
|
|
||||||
# twitter
|
# twitter
|
||||||
if urlparse(feedurl).netloc == 'twitter.com':
|
if urlparse(feedurl).netloc == 'twitter.com':
|
||||||
match = lxml.html.fromstring(item.content).xpath('//a/@data-expanded-url')
|
match = lxml.html.fromstring(item.desc).xpath('//a/@data-expanded-url')
|
||||||
if len(match):
|
if len(match):
|
||||||
link = match[0]
|
link = match[0]
|
||||||
log(link)
|
log(link)
|
||||||
|
@ -341,6 +341,8 @@ def FeedFetch(url, options):
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
rss = feeds.parse(xml, url, contenttype)
|
rss = feeds.parse(xml, url, contenttype)
|
||||||
|
rss = rss.convert(feeds.FeedXML)
|
||||||
|
# contains all fields, otherwise much-needed data can be lost
|
||||||
|
|
||||||
except TypeError:
|
except TypeError:
|
||||||
log('random page')
|
log('random page')
|
||||||
|
@ -435,8 +437,10 @@ def FeedFormat(rss, options):
|
||||||
if options.callback:
|
if options.callback:
|
||||||
if re.match(r'^[a-zA-Z0-9\.]+$', options.callback) is not None:
|
if re.match(r'^[a-zA-Z0-9\.]+$', options.callback) is not None:
|
||||||
return '%s(%s)' % (options.callback, rss.tojson())
|
return '%s(%s)' % (options.callback, rss.tojson())
|
||||||
|
|
||||||
else:
|
else:
|
||||||
raise MorssException('Invalid callback var name')
|
raise MorssException('Invalid callback var name')
|
||||||
|
|
||||||
elif options.json:
|
elif options.json:
|
||||||
if options.indent:
|
if options.indent:
|
||||||
return rss.tojson(encoding='UTF-8', indent=4)
|
return rss.tojson(encoding='UTF-8', indent=4)
|
||||||
|
|
Loading…
Reference in New Issue