From 72d03f21fe24a6a8801615d039a77b78d54c7508 Mon Sep 17 00:00:00 2001 From: pictuga Date: Thu, 25 Oct 2018 01:11:29 +0200 Subject: [PATCH] readabilite: forgot count_content Was meant to be in an earlier commit --- morss/readabilite.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/morss/readabilite.py b/morss/readabilite.py index 50f64ec..007fa5c 100644 --- a/morss/readabilite.py +++ b/morss/readabilite.py @@ -38,6 +38,11 @@ def count_words(string): return count +def count_content(node): + # count words and imgs + return count_words(node.text_content()) + len(node.findall('.//img')) + + regex_bad = re.compile('|'.join(['comment', 'community', 'extra', 'foot', 'sponsor', 'pagination', 'pager', 'tweet', 'twitter', 'com-', 'masthead', 'media', 'meta', 'related', 'shopping', 'tags', 'tool', 'author', 'about']),