From e1ed33f3207612ee9b96e60ac7697089ad2a896a Mon Sep 17 00:00:00 2001
From: pictuga <contact@pictuga.com>
Date: Wed, 9 Feb 2022 15:57:12 +0100
Subject: [PATCH] crawler: improve html iter code

Ignores tags without attributes. Avoids bug with unclosed tags.
---
 morss/crawler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/morss/crawler.py b/morss/crawler.py
index 4e44cf9..955cf02 100644
--- a/morss/crawler.py
+++ b/morss/crawler.py
@@ -368,7 +368,7 @@ class BrowserlyHeaderHandler(BaseHandler):
 def iter_html_tag(html_str, tag_name):
     " To avoid parsing whole pages when looking for a simple tag "
 
-    re_tag = r'<%s(\s*[^>])*>' % tag_name
+    re_tag = r'<%s\s+[^>]+>' % tag_name
     re_attr = r'(?P<key>[^=\s]+)=[\'"](?P<value>[^\'"]+)[\'"]'
 
     for tag_match in re.finditer(re_tag, html_str):