From b47e40246cc92a66b21776ef2a591e5c79238eef Mon Sep 17 00:00:00 2001 From: pictuga Date: Thu, 19 Mar 2020 11:35:51 +0100 Subject: [PATCH] feeds: clean up html code handling --- morss/feeds.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/morss/feeds.py b/morss/feeds.py index 4f5e964..d10dda5 100644 --- a/morss/feeds.py +++ b/morss/feeds.py @@ -320,8 +320,10 @@ class ParserXML(ParserBase): @staticmethod def _clean_node(xml): - if xml is not None and len(xml): - [xml.remove(child) for child in xml] + if xml is not None: + if len(xml): + [xml.remove(child) for child in xml] + xml.text = None def rule_search_all(self, rule): @@ -401,9 +403,12 @@ class ParserXML(ParserBase): else: if html_rich: # atom stuff + if 'atom' in rule: + match.attrib['type'] = 'xhtml' + self._clean_node(match) - match.attrib['type'] = 'xhtml' match.append(lxml.html.fragment_fromstring(value, create_parent='div')) + match.find('div').drop_tag() else: if match is not None and len(match):