Remove "clip" from Fill
Put that in Gather. Also removed from feeds.py. "alone" mode was also added (it removes the description).master
parent
1b7fe8fbee
commit
78706952fe
7
feeds.py
7
feeds.py
|
@ -343,14 +343,11 @@ class FeedItem(FeedBase):
|
|||
description = desc = FeedDescriptor('desc')
|
||||
content = FeedDescriptor('content')
|
||||
|
||||
def pushContent(self, value, clip=False):
|
||||
def pushContent(self, value):
|
||||
if not self.desc and self.content:
|
||||
self.desc = self.content
|
||||
|
||||
if self.desc and clip:
|
||||
self.content = self.desc + "<br/><br/>* * *<br/><br/>" + value
|
||||
else:
|
||||
self.content = value
|
||||
self.content = value
|
||||
|
||||
def remove(self):
|
||||
self.xml.getparent().remove(self.xml)
|
||||
|
|
18
morss.py
18
morss.py
|
@ -252,7 +252,7 @@ def decodeHTML(data, con=None):
|
|||
log(enc)
|
||||
return data.decode(enc, 'replace')
|
||||
|
||||
def Fill(item, cache, feedurl='/', fast=False, clip=False):
|
||||
def Fill(item, cache, feedurl='/', fast=False):
|
||||
""" Returns True when it has done its best """
|
||||
|
||||
if not item.link:
|
||||
|
@ -309,7 +309,6 @@ def Fill(item, cache, feedurl='/', fast=False, clip=False):
|
|||
match = lxml.html.fromstring(item.content).xpath('//a/@data-expanded-url')
|
||||
if len(match):
|
||||
link = match[0]
|
||||
clip = True
|
||||
log(link)
|
||||
else:
|
||||
link = None
|
||||
|
@ -330,7 +329,7 @@ def Fill(item, cache, feedurl='/', fast=False, clip=False):
|
|||
log('old error')
|
||||
else:
|
||||
log('cached')
|
||||
item.pushContent(cache.get(link), clip)
|
||||
item.pushContent(cache.get(link))
|
||||
return True
|
||||
|
||||
# super-fast mode
|
||||
|
@ -356,7 +355,7 @@ def Fill(item, cache, feedurl='/', fast=False, clip=False):
|
|||
out = readability.Document(data, url=con.url).summary(True)
|
||||
|
||||
if countWord(out) > max(count_content, count_desc) > 0:
|
||||
item.pushContent(out, clip)
|
||||
item.pushContent(out)
|
||||
cache.set(link, out)
|
||||
else:
|
||||
log('not bigger enough')
|
||||
|
@ -435,11 +434,20 @@ def Gather(url, cachePath, options):
|
|||
|
||||
if i+1 > LIM_ITEM > 0:
|
||||
item.remove()
|
||||
continue
|
||||
elif time.time() - startTime > MAX_TIME >= 0 or i+1 > MAX_ITEM > 0:
|
||||
if Fill(item, cache, url, True) is False:
|
||||
item.remove()
|
||||
continue
|
||||
else:
|
||||
Fill(item, cache, url, clip='clip' in options)
|
||||
Fill(item, cache, url)
|
||||
|
||||
if item.desc and item.content:
|
||||
if 'clip' in options:
|
||||
item.content = item.desc + "<br/><br/>* * *<br/><br/>" + item.content
|
||||
del item.desc
|
||||
if 'alone' in options:
|
||||
del item.desc
|
||||
|
||||
log(len(rss.items))
|
||||
log(time.time() - startTime)
|
||||
|
|
Loading…
Reference in New Issue