From c71cf5d5ce3d0decb9fe71d9813ff2b102d62e81 Mon Sep 17 00:00:00 2001 From: pictuga Date: Mon, 8 Nov 2021 21:57:43 +0100 Subject: [PATCH] caching: fix diskcache implementation --- README.md | 10 ++++++---- morss/caching.py | 7 +++++-- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index b06e924..5ad1035 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,7 @@ Some features of morss: - Follow 301/meta redirects - Recover xml feeds with corrupt encoding - Supports gzip-compressed http content -- HTTP caching with 3 different backends (in-memory/sqlite/mysql) +- HTTP caching with different backends (in-memory/sqlite/mysql/redis/diskcache) - Works as server/cli tool - Deobfuscate various tracking links @@ -60,8 +60,8 @@ Full installation (including optional dependencies) pip install git+https://git.pictuga.com/pictuga/morss.git#[full] ``` -The full install includes mysql and redis (possible cache backends). Otherwise, -only in-memory and sqlite3 caches are available. +The full install includes mysql, redis and diskcache (possible cache backends). +Otherwise, only in-memory and sqlite3 caches are available. The dependency `lxml` is fairly long to install (especially on Raspberry Pi, as C code needs to be compiled). If possible on your distribution, try installing @@ -390,12 +390,14 @@ will be cleared every time the program is run). Path can be defined with environment variables: `MYSQL_USER`, `MYSQL_PWD`, `MYSQL_DB`, `MYSQL_HOST` - `CACHE=redis`: Redis cache. Connection can be defined with the following environment variables: `REDIS_HOST`, `REDIS_PORT`, `REDIS_DB`, `REDIS_PWD` +- `CACHE=diskcache`: disk-based cache. Target directory canbe defined with +`DISKCAHE_DIR`. To limit the size of the cache: - `CACHE_SIZE` sets the target number of items in the cache (further items will be deleted but the cache might be temporarily bigger than that). Defaults to 1k -entries. +entries. NB. When using `diskcache`, this is the cache max size in Bytes. - `CACHE_LIFESPAN` (seconds) sets how often the cache must be trimmed (i.e. cut down to the number of items set in `CACHE_SIZE`). Defaults to 1min. diff --git a/morss/caching.py b/morss/caching.py index b5f55d6..3d1ecec 100644 --- a/morss/caching.py +++ b/morss/caching.py @@ -168,11 +168,14 @@ class DiskCacheHandler(BaseCache): def __init__(self, directory=None, **kwargs): self.cache = diskcache.Cache(directory=directory, eviction_policy='least-frequently-used', **kwargs) + def __del__(self): + self.cache.close() + def trim(self): self.cache.cull() def __getitem__(self, key): - return self.cache['key'] + return self.cache[key] def __setitem__(self, key, data): self.cache.set(key, data) @@ -203,7 +206,7 @@ if 'CACHE' in os.environ: elif os.environ['CACHE'] == 'diskcache': default_cache = DiskCacheHandler( directory = os.getenv('DISKCAHE_DIR', '/tmp/morss-diskcache'), - size_limit = CACHE_SIZE * 102400 # assuming 1 cache item is 100kiB + size_limit = CACHE_SIZE # in Bytes ) else: