Use internal readability fork

Much simpler, doesn't clean the html, probably less efficient, but much faster
This commit is contained in:
2016-05-31 02:50:03 +02:00
parent 2b9bfb47e5
commit b14381f575
2 changed files with 98 additions and 9 deletions

View File

@@ -79,16 +79,10 @@ def log(txt, force=False):
print(repr(txt))
try:
from readability.readability import Document
from . import readabilite
def readability(html, url):
return readabilite.get_article(html)
def readability(html, url=None):
return Document(html, url=url).summary()
except ImportError:
import breadability.readable
def readability(html, url=None):
return breadability.readable.Article(html, url=url).readable
def len_html(txt):