crawler: do not re-save cached response
continuous-integration/drone/push Build is passing
Details
continuous-integration/drone/push Build is passing
Details
Otherwise cache never gets invalidated!master
parent
3e2b81286f
commit
917aa0fbc5
|
@ -500,6 +500,8 @@ class CacheHandler(BaseHandler):
|
||||||
self.cache[key] = pickle.dumps(data, 0)
|
self.cache[key] = pickle.dumps(data, 0)
|
||||||
|
|
||||||
def cached_response(self, req, fallback=None):
|
def cached_response(self, req, fallback=None):
|
||||||
|
req.from_morss_cache = True
|
||||||
|
|
||||||
data = self.load(req.get_full_url())
|
data = self.load(req.get_full_url())
|
||||||
|
|
||||||
if data is not None:
|
if data is not None:
|
||||||
|
@ -512,6 +514,10 @@ class CacheHandler(BaseHandler):
|
||||||
return fallback
|
return fallback
|
||||||
|
|
||||||
def save_response(self, req, resp):
|
def save_response(self, req, resp):
|
||||||
|
if req.from_morss_cache:
|
||||||
|
# do not re-save (would reset the timing)
|
||||||
|
return resp
|
||||||
|
|
||||||
data = resp.read()
|
data = resp.read()
|
||||||
|
|
||||||
self.save(req.get_full_url(), {
|
self.save(req.get_full_url(), {
|
||||||
|
@ -530,6 +536,8 @@ class CacheHandler(BaseHandler):
|
||||||
return resp
|
return resp
|
||||||
|
|
||||||
def http_request(self, req):
|
def http_request(self, req):
|
||||||
|
req.from_morss_cache = False # to track whether it comes from cache
|
||||||
|
|
||||||
data = self.load(req.get_full_url())
|
data = self.load(req.get_full_url())
|
||||||
|
|
||||||
if data is not None:
|
if data is not None:
|
||||||
|
@ -621,8 +629,7 @@ class CacheHandler(BaseHandler):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def http_response(self, req, resp):
|
def http_response(self, req, resp):
|
||||||
# code for after-fetch, to know whether to save to hard-drive (if stiking to http headers' will)
|
# code for after-fetch, to know whether to save to hard-drive (if sticking to http headers' will)
|
||||||
# NB. It might re-save requests pulled from cache, which will re-set the time() to the latest, i.e. lenghten its useful life
|
|
||||||
|
|
||||||
if resp.code == 304 and resp.url in self.cache:
|
if resp.code == 304 and resp.url in self.cache:
|
||||||
# we are hopefully the first after the HTTP handler, so no need
|
# we are hopefully the first after the HTTP handler, so no need
|
||||||
|
|
Loading…
Reference in New Issue