Use etree.tostring 'method' arg

Gives appropriately formatted html code.
Some pages might otherwise be rendered as blank.
This commit is contained in:
2020-05-13 11:44:34 +02:00
parent 7d0d416610
commit 22005065e8
3 changed files with 6 additions and 6 deletions

View File

@@ -294,7 +294,7 @@ def ItemAfter(item, options):
for link in content.xpath('//a'):
log(link.text_content())
link.drop_tag()
item.content = lxml.etree.tostring(content)
item.content = lxml.etree.tostring(content, method='html')
if options.noref:
item.link = ''
@@ -612,7 +612,7 @@ def cgi_get(environ, start_response):
for elem in html.xpath('//'+tag):
elem.getparent().remove(elem)
output = lxml.etree.tostring(html.getroottree(), encoding='utf-8')
output = lxml.etree.tostring(html.getroottree(), encoding='utf-8', method='html')
elif options.get == 'article':
output = readabilite.get_article(req['data'], url=req['url'], encoding_in=req['encoding'], encoding_out='utf-8', debug=options.debug)