parent
036e5190f1
commit
7e45b2611d
|
@ -85,7 +85,6 @@ The arguments are:
|
|||
- `noref`: drop items' link
|
||||
- `cache`: only take articles from the cache (ie. don't grab new articles' content), so as to save time
|
||||
- `debug`: to have some feedback from the script execution. Useful for debugging
|
||||
- `mono`: disable multithreading while fetching, makes debugging easier
|
||||
- `theforce`: force download the rss feed and ignore cached http errros
|
||||
- `silent`: don't output the final RSS (useless on its own, but can be nice when debugging)
|
||||
- http server only
|
||||
|
@ -262,7 +261,7 @@ morss uses caching to make loading faster. There are 2 possible cache backends
|
|||
|
||||
- `SQLiteCache`: sqlite3 cache. Default file location is in-memory (i.e. it will
|
||||
be cleared every time the program is run
|
||||
- `MySQLCacheHandler`: /!\ Does NOT support multi-threading
|
||||
- `MySQLCacheHandler`
|
||||
|
||||
## Configuration
|
||||
### Length limitation
|
||||
|
@ -281,7 +280,6 @@ different values at the top of the script.
|
|||
|
||||
- `DELAY` sets the browser cache delay, only for HTTP clients
|
||||
- `TIMEOUT` sets the HTTP timeout when fetching rss feeds and articles
|
||||
- `THREADS` sets the number of threads to use. `1` makes no use of multithreading.
|
||||
|
||||
### Content matching
|
||||
|
||||
|
|
|
@ -6,8 +6,6 @@ import time
|
|||
from datetime import datetime
|
||||
from dateutil import tz
|
||||
|
||||
import threading
|
||||
|
||||
from fnmatch import fnmatch
|
||||
import re
|
||||
|
||||
|
@ -25,13 +23,11 @@ import cgitb
|
|||
|
||||
try:
|
||||
# python 2
|
||||
from Queue import Queue
|
||||
from httplib import HTTPException
|
||||
from urllib import unquote
|
||||
from urlparse import urlparse, urljoin, parse_qs
|
||||
except ImportError:
|
||||
# python 3
|
||||
from queue import Queue
|
||||
from http.client import HTTPException
|
||||
from urllib.parse import unquote
|
||||
from urllib.parse import urlparse, urljoin, parse_qs
|
||||
|
@ -374,35 +370,22 @@ def FeedGather(rss, url, options):
|
|||
lim_time = LIM_TIME
|
||||
max_item = MAX_ITEM
|
||||
max_time = MAX_TIME
|
||||
threads = THREADS
|
||||
|
||||
if options.cache:
|
||||
max_time = 0
|
||||
|
||||
if options.mono:
|
||||
threads = 1
|
||||
|
||||
# set
|
||||
def runner(queue):
|
||||
while True:
|
||||
value = queue.get()
|
||||
try:
|
||||
worker(*value)
|
||||
|
||||
except Exception as e:
|
||||
log('Thread Error: %s' % e.message)
|
||||
queue.task_done()
|
||||
|
||||
def worker(i, item):
|
||||
now = datetime.now(tz.tzutc())
|
||||
sorted_items = sorted(rss.items, key=lambda x:x.updated or x.time or now, reverse=True)
|
||||
for i, item in enumerate(sorted_items):
|
||||
if time.time() - start_time > lim_time >= 0 or i + 1 > lim_item >= 0:
|
||||
log('dropped')
|
||||
item.remove()
|
||||
return
|
||||
continue
|
||||
|
||||
item = ItemBefore(item, options)
|
||||
|
||||
if item is None:
|
||||
return
|
||||
continue
|
||||
|
||||
item = ItemFix(item, url)
|
||||
|
||||
|
@ -410,7 +393,7 @@ def FeedGather(rss, url, options):
|
|||
if not options.proxy:
|
||||
if ItemFill(item, options, url, True) is False:
|
||||
item.remove()
|
||||
return
|
||||
continue
|
||||
|
||||
else:
|
||||
if not options.proxy:
|
||||
|
@ -418,25 +401,6 @@ def FeedGather(rss, url, options):
|
|||
|
||||
item = ItemAfter(item, options)
|
||||
|
||||
queue = Queue()
|
||||
|
||||
for i in range(threads):
|
||||
t = threading.Thread(target=runner, args=(queue,))
|
||||
t.daemon = True
|
||||
t.start()
|
||||
|
||||
now = datetime.now(tz.tzutc())
|
||||
sorted_items = sorted(rss.items, key=lambda x:x.updated or x.time or now, reverse=True)
|
||||
for i, item in enumerate(sorted_items):
|
||||
if threads == 1:
|
||||
worker(*[i, item])
|
||||
|
||||
else:
|
||||
queue.put([i, item])
|
||||
|
||||
if threads != 1:
|
||||
queue.join()
|
||||
|
||||
if options.ad:
|
||||
new = rss.items.append()
|
||||
new.title = "Are you hungry?"
|
||||
|
|
Loading…
Reference in New Issue