readabilite: remove code duplicate
This commit is contained in:
@@ -207,8 +207,10 @@ def clean_root(root, keep_threshold=None):
|
||||
def clean_node(node, keep_threshold=None):
|
||||
parent = node.getparent()
|
||||
|
||||
# remove comments
|
||||
if (isinstance(node, lxml.html.HtmlComment)
|
||||
or isinstance(node, lxml.html.HtmlProcessingInstruction)):
|
||||
parent.remove(node)
|
||||
return
|
||||
|
||||
if parent is None:
|
||||
@@ -242,11 +244,6 @@ def clean_node(node, keep_threshold=None):
|
||||
parent.remove(node)
|
||||
return
|
||||
|
||||
# remove comments
|
||||
if isinstance(node, lxml.html.HtmlComment) or isinstance(node, lxml.html.HtmlProcessingInstruction):
|
||||
parent.remove(node)
|
||||
return
|
||||
|
||||
# remove if too many kids & too high link density
|
||||
wc = count_words(node.text_content())
|
||||
if wc != 0 and len(list(node.iter())) > 3:
|
||||
|
Reference in New Issue
Block a user