From 5865af64f9470fdc3311cdc6a439dfa3425f4bc2 Mon Sep 17 00:00:00 2001 From: pictuga Date: Fri, 20 Mar 2020 12:18:13 +0100 Subject: [PATCH] Fix indent output for html/xml --- morss/feeds.py | 5 +++-- morss/morss.py | 6 +++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/morss/feeds.py b/morss/feeds.py index 6956708..8eec47a 100644 --- a/morss/feeds.py +++ b/morss/feeds.py @@ -291,7 +291,7 @@ class ParserXML(ParserBase): 'rssfake': 'http://purl.org/rss/1.0/'} def parse(self, raw): - parser = etree.XMLParser(recover=True) + parser = etree.XMLParser(recover=True, remove_blank_text=True) # remove_blank_text needed for pretty_print return etree.fromstring(raw, parser) def remove(self): @@ -441,7 +441,8 @@ class ParserHTML(ParserXML): mimetype = ['text/html', 'application/xhtml+xml'] def parse(self, raw): - return lxml.html.fromstring(raw) + parser = etree.HTMLParser(remove_blank_text=True) # remove_blank_text needed for pretty_print + return etree.fromstring(raw, parser) def tostring(self, encoding='unicode', **k): return lxml.html.tostring(self.root, encoding=encoding, **k) diff --git a/morss/morss.py b/morss/morss.py index 517594f..af4507f 100644 --- a/morss/morss.py +++ b/morss/morss.py @@ -448,7 +448,11 @@ def FeedFormat(rss, options): return rss.tocsv(encoding='UTF-8') elif options.reader: - return rss.tohtml(encoding='UTF-8') + if options.indent: + return rss.tohtml(encoding='UTF-8', pretty_print=True) + + else: + return rss.tohtml(encoding='UTF-8') else: if options.indent: