crawler: use chardet again
Always nice in case no encoding is specified. Somehow got dropped with commit 245ba99. Most probably by accident
			
			
This commit is contained in:
		@@ -6,6 +6,7 @@ import socket
 | 
			
		||||
from gzip import GzipFile
 | 
			
		||||
from io import BytesIO, StringIO
 | 
			
		||||
import re
 | 
			
		||||
import chardet
 | 
			
		||||
import sqlite3
 | 
			
		||||
import time
 | 
			
		||||
 | 
			
		||||
@@ -58,10 +59,14 @@ def detect_encoding(data, con=None):
 | 
			
		||||
    if match:
 | 
			
		||||
        return match.groups()[0].lower().decode()
 | 
			
		||||
 | 
			
		||||
    match = re.search(b'encoding=["\']?([0-9a-zA-Z-]+)', data[:100])
 | 
			
		||||
    match = re.search(b'encoding=["\']?([0-9a-zA-Z-]+)', data[:1000])
 | 
			
		||||
    if match:
 | 
			
		||||
        return match.groups()[0].lower().decode()
 | 
			
		||||
 | 
			
		||||
    enc = chardet.detect(data[:1000])['encoding']
 | 
			
		||||
    if enc:
 | 
			
		||||
            return enc
 | 
			
		||||
 | 
			
		||||
    return 'utf-8'
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user