Compare commits
11 Commits
master
...
046f3f9f3d
Author | SHA1 | Date | |
---|---|---|---|
046f3f9f3d | |||
db8e046eae | |||
b4b1e93289 | |||
8a329fbb6d | |||
77159b99ca | |||
c158e65192 | |||
0d64964a02 | |||
e8271ae9a0 | |||
2abe061422 | |||
57bd94d42f | |||
69cdf05341 |
15
.drone.yml
Normal file
15
.drone.yml
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
kind: pipeline
|
||||||
|
name: default
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: isort
|
||||||
|
image: python:alpine
|
||||||
|
commands:
|
||||||
|
- pip install isort
|
||||||
|
- isort --check-only --diff .
|
||||||
|
- name: pylint
|
||||||
|
image: alpine
|
||||||
|
commands:
|
||||||
|
- apk add --no-cache python3 py3-lxml py3-pip py3-wheel py3-pylint py3-enchant hunspell-en
|
||||||
|
- pip3 install --no-cache-dir .
|
||||||
|
- pylint morss --rcfile=.pylintrc --disable=C,R,W --fail-under=8
|
50
.pylintrc
Normal file
50
.pylintrc
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
[MASTER]
|
||||||
|
ignore=CVS
|
||||||
|
suggestion-mode=yes
|
||||||
|
extension-pkg-allow-list=lxml.etree
|
||||||
|
|
||||||
|
[MESSAGES CONTROL]
|
||||||
|
disable=missing-function-docstring,
|
||||||
|
missing-class-docstring,
|
||||||
|
missing-module-docstring,
|
||||||
|
wrong-spelling-in-comment,
|
||||||
|
|
||||||
|
[REPORTS]
|
||||||
|
reports=yes
|
||||||
|
score=yes
|
||||||
|
|
||||||
|
[SPELLING]
|
||||||
|
spelling-dict=en_GB
|
||||||
|
spelling-ignore-words=morss
|
||||||
|
|
||||||
|
[STRING]
|
||||||
|
check-quote-consistency=yes
|
||||||
|
check-str-concat-over-line-jumps=yes
|
||||||
|
|
||||||
|
[VARIABLES]
|
||||||
|
allow-global-unused-variables=no
|
||||||
|
init-import=no
|
||||||
|
|
||||||
|
[FORMAT]
|
||||||
|
expected-line-ending-format=LF
|
||||||
|
indent-string=' '
|
||||||
|
max-line-length=120
|
||||||
|
max-module-lines=1000
|
||||||
|
|
||||||
|
[BASIC]
|
||||||
|
argument-naming-style=snake_case
|
||||||
|
attr-naming-style=snake_case
|
||||||
|
class-attribute-naming-style=snake_case
|
||||||
|
class-const-naming-style=UPPER_CASE
|
||||||
|
class-naming-style=PascalCase
|
||||||
|
const-naming-style=UPPER_CASE
|
||||||
|
function-naming-style=snake_case
|
||||||
|
inlinevar-naming-style=snake_case
|
||||||
|
method-naming-style=snake_case
|
||||||
|
module-naming-style=snake_case
|
||||||
|
variable-naming-style=snake_case
|
||||||
|
|
||||||
|
include-naming-hint=yes
|
||||||
|
|
||||||
|
bad-names=foo, bar
|
||||||
|
good-names=i, j, k
|
@@ -1,5 +1,7 @@
|
|||||||
# Morss - Get full-text RSS feeds
|
# Morss - Get full-text RSS feeds
|
||||||
|
|
||||||
|
[](https://ci.pictuga.com/pictuga/morss)
|
||||||
|
|
||||||
_GNU AGPLv3 code_
|
_GNU AGPLv3 code_
|
||||||
_Provided logo is CC BY-NC-SA 4.0_
|
_Provided logo is CC BY-NC-SA 4.0_
|
||||||
|
|
||||||
|
@@ -16,5 +16,8 @@
|
|||||||
# with this program. If not, see <https://www.gnu.org/licenses/>.
|
# with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
# ran on `import morss`
|
# ran on `import morss`
|
||||||
|
|
||||||
|
# pylint: disable=unused-import,unused-variable
|
||||||
|
|
||||||
from .morss import *
|
from .morss import *
|
||||||
from .wsgi import application
|
from .wsgi import application
|
||||||
|
@@ -20,9 +20,7 @@
|
|||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
from . import wsgi
|
from . import cli, wsgi
|
||||||
from . import cli
|
|
||||||
|
|
||||||
from .morss import MorssException
|
from .morss import MorssException
|
||||||
|
|
||||||
|
|
||||||
|
@@ -15,12 +15,11 @@
|
|||||||
# You should have received a copy of the GNU Affero General Public License along
|
# You should have received a copy of the GNU Affero General Public License along
|
||||||
# with this program. If not, see <https://www.gnu.org/licenses/>.
|
# with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
import sys
|
|
||||||
import os.path
|
|
||||||
import argparse
|
import argparse
|
||||||
|
import os.path
|
||||||
|
import sys
|
||||||
|
|
||||||
from .morss import FeedFetch, FeedGather, FeedFormat
|
from .morss import FeedFetch, FeedFormat, FeedGather, Options
|
||||||
from .morss import Options
|
|
||||||
|
|
||||||
|
|
||||||
def cli_app():
|
def cli_app():
|
||||||
|
@@ -16,30 +16,34 @@
|
|||||||
# with this program. If not, see <https://www.gnu.org/licenses/>.
|
# with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import sys
|
|
||||||
|
|
||||||
import zlib
|
|
||||||
from io import BytesIO, StringIO
|
|
||||||
import re
|
|
||||||
import chardet
|
|
||||||
from cgi import parse_header
|
|
||||||
import time
|
|
||||||
import threading
|
|
||||||
import random
|
import random
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
import zlib
|
||||||
|
from cgi import parse_header
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
|
from io import BytesIO, StringIO
|
||||||
|
|
||||||
|
import chardet
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# python 2
|
# python 2
|
||||||
from urllib2 import BaseHandler, HTTPCookieProcessor, Request, addinfourl, parse_keqv_list, parse_http_list, build_opener
|
|
||||||
from urllib import quote
|
from urllib import quote
|
||||||
from urlparse import urlparse, urlunparse
|
|
||||||
import mimetools
|
import mimetools
|
||||||
|
from urllib2 import (BaseHandler, HTTPCookieProcessor, HTTPRedirectHandler,
|
||||||
|
Request, addinfourl, build_opener, parse_http_list,
|
||||||
|
parse_keqv_list)
|
||||||
|
from urlparse import urlparse, urlunparse
|
||||||
except ImportError:
|
except ImportError:
|
||||||
# python 3
|
# python 3
|
||||||
from urllib.request import BaseHandler, HTTPCookieProcessor, Request, addinfourl, parse_keqv_list, parse_http_list, build_opener
|
|
||||||
from urllib.parse import quote
|
|
||||||
from urllib.parse import urlparse, urlunparse
|
|
||||||
import email
|
import email
|
||||||
|
from urllib.parse import quote, urlparse, urlunparse
|
||||||
|
from urllib.request import (BaseHandler, HTTPCookieProcessor,
|
||||||
|
HTTPRedirectHandler, Request, addinfourl,
|
||||||
|
build_opener, parse_http_list, parse_keqv_list)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# python 2
|
# python 2
|
||||||
@@ -131,6 +135,7 @@ def custom_opener(follow=None, delay=None):
|
|||||||
handlers.append(SizeLimitHandler(500*1024)) # 500KiB
|
handlers.append(SizeLimitHandler(500*1024)) # 500KiB
|
||||||
handlers.append(HTTPCookieProcessor())
|
handlers.append(HTTPCookieProcessor())
|
||||||
handlers.append(GZIPHandler())
|
handlers.append(GZIPHandler())
|
||||||
|
handlers.append(HTTPAllRedirectHandler())
|
||||||
handlers.append(HTTPEquivHandler())
|
handlers.append(HTTPEquivHandler())
|
||||||
handlers.append(HTTPRefreshHandler())
|
handlers.append(HTTPRefreshHandler())
|
||||||
handlers.append(UAHandler(random.choice(DEFAULT_UAS)))
|
handlers.append(UAHandler(random.choice(DEFAULT_UAS)))
|
||||||
@@ -397,6 +402,11 @@ class HTTPEquivHandler(RespStrHandler):
|
|||||||
resp.headers[meta.get('http-equiv').lower()] = meta.get('content')
|
resp.headers[meta.get('http-equiv').lower()] = meta.get('content')
|
||||||
|
|
||||||
|
|
||||||
|
class HTTPAllRedirectHandler(HTTPRedirectHandler):
|
||||||
|
def http_error_308(self, req, fp, code, msg, headers):
|
||||||
|
return self.http_error_301(req, fp, 301, msg, headers)
|
||||||
|
|
||||||
|
|
||||||
class HTTPRefreshHandler(BaseHandler):
|
class HTTPRefreshHandler(BaseHandler):
|
||||||
handler_order = 700 # HTTPErrorProcessor has a handler_order of 1000
|
handler_order = 700 # HTTPErrorProcessor has a handler_order of 1000
|
||||||
|
|
||||||
@@ -620,7 +630,7 @@ class BaseCache:
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
import sqlite3
|
import sqlite3 # isort:skip
|
||||||
|
|
||||||
|
|
||||||
class SQLiteCache(BaseCache):
|
class SQLiteCache(BaseCache):
|
||||||
@@ -657,7 +667,7 @@ class SQLiteCache(BaseCache):
|
|||||||
self.con.execute('INSERT INTO data VALUES (?,?,?,?,?,?) ON CONFLICT(url) DO UPDATE SET code=?, msg=?, headers=?, data=?, timestamp=?', (url,) + value + value)
|
self.con.execute('INSERT INTO data VALUES (?,?,?,?,?,?) ON CONFLICT(url) DO UPDATE SET code=?, msg=?, headers=?, data=?, timestamp=?', (url,) + value + value)
|
||||||
|
|
||||||
|
|
||||||
import pymysql.cursors
|
import pymysql.cursors # isort:skip
|
||||||
|
|
||||||
|
|
||||||
class MySQLCacheHandler(BaseCache):
|
class MySQLCacheHandler(BaseCache):
|
||||||
|
@@ -15,35 +15,35 @@
|
|||||||
# You should have received a copy of the GNU Affero General Public License along
|
# You should have received a copy of the GNU Affero General Public License along
|
||||||
# with this program. If not, see <https://www.gnu.org/licenses/>.
|
# with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
import sys
|
|
||||||
import os.path
|
import os.path
|
||||||
|
import sys
|
||||||
|
|
||||||
from datetime import datetime
|
sys.path.append('/home/paul/Documents/Code/morss/lib')
|
||||||
|
|
||||||
import re
|
|
||||||
import json
|
|
||||||
import csv
|
import csv
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
from copy import deepcopy
|
||||||
|
from datetime import datetime
|
||||||
from fnmatch import fnmatch
|
from fnmatch import fnmatch
|
||||||
|
|
||||||
from lxml import etree
|
|
||||||
from dateutil import tz
|
|
||||||
import dateutil.parser
|
import dateutil.parser
|
||||||
from copy import deepcopy
|
|
||||||
|
|
||||||
import lxml.html
|
import lxml.html
|
||||||
|
from dateutil import tz
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
from .readabilite import parse as html_parse
|
from .readabilite import parse as html_parse
|
||||||
|
|
||||||
json.encoder.c_make_encoder = None
|
json.encoder.c_make_encoder = None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# python 2
|
# python 2
|
||||||
from StringIO import StringIO
|
|
||||||
from ConfigParser import RawConfigParser
|
from ConfigParser import RawConfigParser
|
||||||
|
from StringIO import StringIO
|
||||||
except ImportError:
|
except ImportError:
|
||||||
# python 3
|
# python 3
|
||||||
from io import StringIO
|
|
||||||
from configparser import RawConfigParser
|
from configparser import RawConfigParser
|
||||||
|
from io import StringIO
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# python 2
|
# python 2
|
||||||
|
@@ -16,30 +16,25 @@
|
|||||||
# with this program. If not, see <https://www.gnu.org/licenses/>.
|
# with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
import time
|
import time
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from dateutil import tz
|
|
||||||
|
|
||||||
from fnmatch import fnmatch
|
from fnmatch import fnmatch
|
||||||
import re
|
|
||||||
|
|
||||||
import lxml.etree
|
import lxml.etree
|
||||||
import lxml.html
|
import lxml.html
|
||||||
|
from dateutil import tz
|
||||||
|
|
||||||
from . import feeds
|
from . import crawler, feeds, readabilite
|
||||||
from . import crawler
|
|
||||||
from . import readabilite
|
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# python 2
|
# python 2
|
||||||
from httplib import HTTPException
|
from httplib import HTTPException
|
||||||
from urlparse import urlparse, urljoin, parse_qs
|
from urlparse import parse_qs, urljoin, urlparse
|
||||||
except ImportError:
|
except ImportError:
|
||||||
# python 3
|
# python 3
|
||||||
from http.client import HTTPException
|
from http.client import HTTPException
|
||||||
from urllib.parse import urlparse, urljoin, parse_qs
|
from urllib.parse import parse_qs, urljoin, urlparse
|
||||||
|
|
||||||
|
|
||||||
MAX_ITEM = int(os.getenv('MAX_ITEM', 5)) # cache-only beyond
|
MAX_ITEM = int(os.getenv('MAX_ITEM', 5)) # cache-only beyond
|
||||||
|
@@ -15,10 +15,11 @@
|
|||||||
# You should have received a copy of the GNU Affero General Public License along
|
# You should have received a copy of the GNU Affero General Public License along
|
||||||
# with this program. If not, see <https://www.gnu.org/licenses/>.
|
# with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
import lxml.etree
|
import lxml.etree
|
||||||
import lxml.html
|
import lxml.html
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
import re
|
|
||||||
|
|
||||||
|
|
||||||
def parse(data, encoding=None):
|
def parse(data, encoding=None):
|
||||||
@@ -352,6 +353,7 @@ def get_article(data, url=None, encoding_in=None, encoding_out='unicode', debug=
|
|||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
from . import crawler
|
from . import crawler
|
||||||
|
|
||||||
req = crawler.adv_get(sys.argv[1] if len(sys.argv) > 1 else 'https://morss.it')
|
req = crawler.adv_get(sys.argv[1] if len(sys.argv) > 1 else 'https://morss.it')
|
||||||
|
@@ -15,16 +15,16 @@
|
|||||||
# You should have received a copy of the GNU Affero General Public License along
|
# You should have received a copy of the GNU Affero General Public License along
|
||||||
# with this program. If not, see <https://www.gnu.org/licenses/>.
|
# with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
import sys
|
import cgitb
|
||||||
|
import mimetypes
|
||||||
import os.path
|
import os.path
|
||||||
import re
|
import re
|
||||||
import lxml.etree
|
import sys
|
||||||
|
|
||||||
import cgitb
|
|
||||||
import wsgiref.util
|
|
||||||
import wsgiref.simple_server
|
|
||||||
import wsgiref.handlers
|
import wsgiref.handlers
|
||||||
import mimetypes
|
import wsgiref.simple_server
|
||||||
|
import wsgiref.util
|
||||||
|
|
||||||
|
import lxml.etree
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# python 2
|
# python 2
|
||||||
@@ -33,11 +33,9 @@ except ImportError:
|
|||||||
# python 3
|
# python 3
|
||||||
from urllib.parse import unquote
|
from urllib.parse import unquote
|
||||||
|
|
||||||
from . import crawler
|
from . import crawler, readabilite
|
||||||
from . import readabilite
|
from .morss import (DELAY, TIMEOUT, FeedFetch, FeedFormat, FeedGather,
|
||||||
from .morss import FeedFetch, FeedGather, FeedFormat
|
MorssException, Options, log)
|
||||||
from .morss import Options, log, TIMEOUT, DELAY, MorssException
|
|
||||||
|
|
||||||
|
|
||||||
PORT = int(os.getenv('PORT', 8080))
|
PORT = int(os.getenv('PORT', 8080))
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user