From dbdca910d8cc8b2af893172ab20586203c4234d8 Mon Sep 17 00:00:00 2001 From: pictuga Date: Mon, 3 Jan 2022 17:51:49 +0000 Subject: [PATCH] readabilite: fix new parser code & drop PIs --- morss/readabilite.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/morss/readabilite.py b/morss/readabilite.py index a7ac92f..716bdd1 100644 --- a/morss/readabilite.py +++ b/morss/readabilite.py @@ -25,7 +25,7 @@ import lxml.html.soupparser class CustomTreeBuilder(bs4.builder._lxml.LXMLTreeBuilder): def default_parser(self, encoding): - return lxml.html.HTMLParser(remove_comments=True, encoding=encoding) + return lxml.html.HTMLParser(target=self, remove_comments=True, remove_pis=True, encoding=encoding) def parse(data, encoding=None):