From 7d005e9a65d2e8b471fe10796a41f4bfc807c0e9 Mon Sep 17 00:00:00 2001 From: pictuga Date: Thu, 25 Oct 2018 01:08:25 +0200 Subject: [PATCH] readabilite: run the new cleaning code --- morss/readabilite.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/morss/readabilite.py b/morss/readabilite.py index e8a061a..b4c43e8 100644 --- a/morss/readabilite.py +++ b/morss/readabilite.py @@ -132,6 +132,12 @@ def write_score_all(root, grades): node.attrib['score'] = str(int(grades.get(node, 0))) +def clean_root(root): + for node in list(root): + clean_root(node) + clean_node(node) + + def clean_node(node): parent = node.getparent() @@ -277,4 +283,6 @@ def get_article(data, url=None, encoding=None): if url: best.make_links_absolute(url) + clean_root(best) + return lxml.etree.tostring(best, pretty_print=True)