readabilite: improve score for <p>
Helps a lot with bbc, le monde. Might backfire on other websites tho...master
parent
a8ac2ed1ca
commit
3fc89d5359
|
@ -63,6 +63,9 @@ def score_node(node):
|
|||
if node.tag in ['h1', 'h2', 'article']:
|
||||
score += 8
|
||||
|
||||
if node.tag in ['p']:
|
||||
score += 3
|
||||
|
||||
class_id = node.get('class', '') + node.get('id', '')
|
||||
|
||||
score += len(regex_good.findall(class_id) * 4)
|
||||
|
|
Loading…
Reference in New Issue