crawler: comment on how urllib works

master
pictuga 2021-03-25 23:49:58 +01:00
parent 981da9e66a
commit 7342ab26d2
1 changed files with 9 additions and 0 deletions

View File

@ -114,6 +114,15 @@ def custom_handler(follow=None, delay=None):
# HTTPDefaultErrorHandler, HTTPRedirectHandler, # HTTPDefaultErrorHandler, HTTPRedirectHandler,
# FTPHandler, FileHandler, HTTPErrorProcessor] # FTPHandler, FileHandler, HTTPErrorProcessor]
# & HTTPSHandler # & HTTPSHandler
#
# when processing a request:
# (1) all the *_request are run
# (2) the *_open are run until sth is returned (other than None)
# (3) all the *_response are run
#
# During (3), if an http error occurs (i.e. not a 2XX response code), the
# http_error_* are run until sth is returned (other than None). If they all
# return nothing, a python error is raised
#handlers.append(DebugHandler()) #handlers.append(DebugHandler())
handlers.append(SizeLimitHandler(500*1024)) # 500KiB handlers.append(SizeLimitHandler(500*1024)) # 500KiB