Add :smart :noref modes, update README
parent
f991802d9e
commit
7211093cc5
|
@ -40,6 +40,8 @@ The arguments are:
|
|||
- Advanced
|
||||
- `csv`: export to csv
|
||||
- `md`: convert articles to Markdown
|
||||
- `nolink`: drop links, but keeps links' inner text
|
||||
- `noref`: drop items' link
|
||||
- `cache`: only take articles from the cache (ie. don't grab new articles' content), so as to save time
|
||||
- `debug`: to have some feedback from the script execution. Useful for debugging
|
||||
- `theforce`: force download the rss feed
|
||||
|
|
|
@ -11,6 +11,7 @@ from fnmatch import fnmatch
|
|||
import re
|
||||
import json
|
||||
|
||||
import lxml.etree
|
||||
import lxml.html
|
||||
|
||||
import feeds
|
||||
|
@ -628,6 +629,16 @@ def After(rss, options):
|
|||
if not options.keep:
|
||||
del item.desc
|
||||
|
||||
if options.nolink and item.content:
|
||||
content = lxml.html.fromstring(item.content)
|
||||
for link in content.xpath('//a'):
|
||||
log(link.text_content())
|
||||
link.drop_tag()
|
||||
item.content = lxml.etree.tostring(content)
|
||||
|
||||
if options.noref:
|
||||
item.link = ''
|
||||
|
||||
if options.md:
|
||||
conv = HTML2Text(baseurl=item.link)
|
||||
conv.unicode_snob = True
|
||||
|
|
Loading…
Reference in New Issue