readabilite: some technical improvements for score
Linear, removed misplaced debugging codemaster
parent
040d2cb889
commit
787d90fac0
|
@ -130,6 +130,9 @@ def score_all(root):
|
||||||
|
|
||||||
|
|
||||||
def spread_score(node, score, grades):
|
def spread_score(node, score, grades):
|
||||||
|
" Spread the node's score to its parents, on a linear way "
|
||||||
|
|
||||||
|
delta = score / 2
|
||||||
for ancestor in [node,] + list(node.iterancestors()):
|
for ancestor in [node,] + list(node.iterancestors()):
|
||||||
if score >= 1 or ancestor is node:
|
if score >= 1 or ancestor is node:
|
||||||
try:
|
try:
|
||||||
|
@ -137,7 +140,7 @@ def spread_score(node, score, grades):
|
||||||
except KeyError:
|
except KeyError:
|
||||||
grades[ancestor] = score
|
grades[ancestor] = score
|
||||||
|
|
||||||
score /= 2
|
score -= delta
|
||||||
|
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
|
@ -145,7 +148,7 @@ def spread_score(node, score, grades):
|
||||||
|
|
||||||
def write_score_all(root, grades):
|
def write_score_all(root, grades):
|
||||||
for node in root.iter():
|
for node in root.iter():
|
||||||
node.attrib['score'] = str(int(grades[node]))
|
node.attrib['score'] = str(int(grades.get(node, 0)))
|
||||||
|
|
||||||
|
|
||||||
def clean_node(node):
|
def clean_node(node):
|
||||||
|
@ -243,15 +246,15 @@ def rank_nodes(grades):
|
||||||
return sorted(grades.items(), key=lambda x: x[1], reverse=True)
|
return sorted(grades.items(), key=lambda x: x[1], reverse=True)
|
||||||
|
|
||||||
|
|
||||||
def get_best_node(grades, highlight=False):
|
def get_best_node(grades):
|
||||||
|
" To pick the best (raw) node. Another function will clean it "
|
||||||
|
|
||||||
|
if len(grades) == 1:
|
||||||
|
return grades[0]
|
||||||
|
|
||||||
top = rank_nodes(grades)
|
top = rank_nodes(grades)
|
||||||
lowest = lowest_common_ancestor(top[0][0], top[1][0], 3)
|
lowest = lowest_common_ancestor(top[0][0], top[1][0], 3)
|
||||||
|
|
||||||
if highlight:
|
|
||||||
top[0][0].attrib['style'] = 'border: 2px solid blue'
|
|
||||||
top[1][0].attrib['style'] = 'border: 2px solid green'
|
|
||||||
lowest.attrib['style'] = 'outline: 2px solid red'
|
|
||||||
|
|
||||||
return lowest
|
return lowest
|
||||||
|
|
||||||
|
|
||||||
|
@ -259,8 +262,11 @@ def get_article(data, url=None, encoding=None):
|
||||||
html = parse(data, encoding)
|
html = parse(data, encoding)
|
||||||
br2p(html)
|
br2p(html)
|
||||||
scores = score_all(html)
|
scores = score_all(html)
|
||||||
best = get_best_node(scores)
|
|
||||||
|
|
||||||
|
if not len(scores):
|
||||||
|
return None
|
||||||
|
|
||||||
|
best = get_best_node(scores)
|
||||||
wc = count_words(best.text_content())
|
wc = count_words(best.text_content())
|
||||||
wca = count_words(' '.join([x.text_content() for x in best.findall('.//a')]))
|
wca = count_words(' '.join([x.text_content() for x in best.findall('.//a')]))
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue