diff --git a/README.md b/README.md index 516894e..d821730 100644 --- a/README.md +++ b/README.md @@ -36,6 +36,7 @@ You do need: - [wheezy.template](https://pypi.python.org/pypi/wheezy.template) to generate HTML pages - [chardet](https://pypi.python.org/pypi/chardet) - [six](https://pypi.python.org/pypi/six), a dependency of chardet +- pymysql Simplest way to get these: diff --git a/morss/crawler.py b/morss/crawler.py index 1b37ef9..4d00fca 100644 --- a/morss/crawler.py +++ b/morss/crawler.py @@ -517,3 +517,36 @@ class SQLiteCache(BaseCache): self.con.execute('INSERT INTO data VALUES (?,?,?,?,?,?)', (url,) + value) +import pymysql.cursors + + +class MySQLCacheHandler(BaseCache): + " NB. Requires mono-threading, as pymysql doesn't isn't thread-safe " + def __init__(self, user, password, database, host='localhost'): + self.con = pymysql.connect(host=host, user=user, password=password, database=database, charset='utf8', autocommit=True) + + with self.con.cursor() as cursor: + cursor.execute('CREATE TABLE IF NOT EXISTS data (url VARCHAR(255) NOT NULL PRIMARY KEY, code INT, msg TEXT, headers TEXT, data BLOB, timestamp INT)') + + def __del__(self): + self.con.close() + + def __getitem__(self, url): + cursor = self.con.cursor() + cursor.execute('SELECT * FROM data WHERE url=%s', (url,)) + row = cursor.fetchone() + + if not row: + raise KeyError + + return row[1:] + + def __setitem__(self, url, value): # (code, msg, headers, data, timestamp) + if url in self: + with self.con.cursor() as cursor: + cursor.execute('UPDATE data SET code=%s, msg=%s, headers=%s, data=%s, timestamp=%s WHERE url=%s', + value + (url,)) + + else: + with self.con.cursor() as cursor: + cursor.execute('INSERT INTO data VALUES (%s,%s,%s,%s,%s,%s)', (url,) + value) diff --git a/requirements.txt b/requirements.txt index ac6a777..a4bc57e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,4 @@ html2text ordereddict wheezy.template chardet +pymysql