Compare commits

...

11 Commits

Author SHA1 Message Date
046f3f9f3d crawler: support 308 redirects
All checks were successful
continuous-integration/drone/push Build is passing
2021-09-11 11:34:16 +02:00
db8e046eae ci: add spell check dict
All checks were successful
continuous-integration/drone/push Build is passing
2021-09-08 22:21:49 +02:00
b4b1e93289 ci: fix spell check
Some checks failed
continuous-integration/drone/push Build is failing
2021-09-08 22:15:53 +02:00
8a329fbb6d ci: fix pylint install
Some checks failed
continuous-integration/drone/push Build is failing
2021-09-08 22:09:56 +02:00
77159b99ca drone: use alpine image (to benefit from pkgs)
Some checks failed
continuous-integration/drone/push Build is failing
2021-09-08 22:07:21 +02:00
c158e65192 ci: added pylint (triggered upon error w/ score < 8 only)
Some checks failed
continuous-integration/drone/push Build is failing
2021-09-08 22:01:49 +02:00
0d64964a02 README: add ci badge 2021-09-08 21:39:12 +02:00
e8271ae9a0 ci/cd: fix isort args
All checks were successful
continuous-integration/drone/push Build is passing
2021-09-08 21:31:42 +02:00
2abe061422 Further isort implementation
All checks were successful
continuous-integration/drone/push Build is passing
2021-09-08 21:29:56 +02:00
57bd94d42f ci/cd attempt
Some checks failed
continuous-integration/drone/push Build is failing
2021-09-08 21:24:50 +02:00
69cdf05341 Apply isort 2021-09-08 20:54:34 +02:00
12 changed files with 131 additions and 58 deletions

15
.drone.yml Normal file
View File

@@ -0,0 +1,15 @@
kind: pipeline
name: default
steps:
- name: isort
image: python:alpine
commands:
- pip install isort
- isort --check-only --diff .
- name: pylint
image: alpine
commands:
- apk add --no-cache python3 py3-lxml py3-pip py3-wheel py3-pylint py3-enchant hunspell-en
- pip3 install --no-cache-dir .
- pylint morss --rcfile=.pylintrc --disable=C,R,W --fail-under=8

50
.pylintrc Normal file
View File

@@ -0,0 +1,50 @@
[MASTER]
ignore=CVS
suggestion-mode=yes
extension-pkg-allow-list=lxml.etree
[MESSAGES CONTROL]
disable=missing-function-docstring,
missing-class-docstring,
missing-module-docstring,
wrong-spelling-in-comment,
[REPORTS]
reports=yes
score=yes
[SPELLING]
spelling-dict=en_GB
spelling-ignore-words=morss
[STRING]
check-quote-consistency=yes
check-str-concat-over-line-jumps=yes
[VARIABLES]
allow-global-unused-variables=no
init-import=no
[FORMAT]
expected-line-ending-format=LF
indent-string=' '
max-line-length=120
max-module-lines=1000
[BASIC]
argument-naming-style=snake_case
attr-naming-style=snake_case
class-attribute-naming-style=snake_case
class-const-naming-style=UPPER_CASE
class-naming-style=PascalCase
const-naming-style=UPPER_CASE
function-naming-style=snake_case
inlinevar-naming-style=snake_case
method-naming-style=snake_case
module-naming-style=snake_case
variable-naming-style=snake_case
include-naming-hint=yes
bad-names=foo, bar
good-names=i, j, k

View File

@@ -1,5 +1,7 @@
# Morss - Get full-text RSS feeds # Morss - Get full-text RSS feeds
[![Build Status](https://ci.pictuga.com/api/badges/pictuga/morss/status.svg)](https://ci.pictuga.com/pictuga/morss)
_GNU AGPLv3 code_ _GNU AGPLv3 code_
_Provided logo is CC BY-NC-SA 4.0_ _Provided logo is CC BY-NC-SA 4.0_

View File

@@ -16,5 +16,8 @@
# with this program. If not, see <https://www.gnu.org/licenses/>. # with this program. If not, see <https://www.gnu.org/licenses/>.
# ran on `import morss` # ran on `import morss`
# pylint: disable=unused-import,unused-variable
from .morss import * from .morss import *
from .wsgi import application from .wsgi import application

View File

@@ -20,9 +20,7 @@
import os import os
import sys import sys
from . import wsgi from . import cli, wsgi
from . import cli
from .morss import MorssException from .morss import MorssException

View File

@@ -15,12 +15,11 @@
# You should have received a copy of the GNU Affero General Public License along # You should have received a copy of the GNU Affero General Public License along
# with this program. If not, see <https://www.gnu.org/licenses/>. # with this program. If not, see <https://www.gnu.org/licenses/>.
import sys
import os.path
import argparse import argparse
import os.path
import sys
from .morss import FeedFetch, FeedGather, FeedFormat from .morss import FeedFetch, FeedFormat, FeedGather, Options
from .morss import Options
def cli_app(): def cli_app():

View File

@@ -16,30 +16,34 @@
# with this program. If not, see <https://www.gnu.org/licenses/>. # with this program. If not, see <https://www.gnu.org/licenses/>.
import os import os
import sys
import zlib
from io import BytesIO, StringIO
import re
import chardet
from cgi import parse_header
import time
import threading
import random import random
import re
import sys
import threading
import time
import zlib
from cgi import parse_header
from collections import OrderedDict from collections import OrderedDict
from io import BytesIO, StringIO
import chardet
try: try:
# python 2 # python 2
from urllib2 import BaseHandler, HTTPCookieProcessor, Request, addinfourl, parse_keqv_list, parse_http_list, build_opener
from urllib import quote from urllib import quote
from urlparse import urlparse, urlunparse
import mimetools import mimetools
from urllib2 import (BaseHandler, HTTPCookieProcessor, HTTPRedirectHandler,
Request, addinfourl, build_opener, parse_http_list,
parse_keqv_list)
from urlparse import urlparse, urlunparse
except ImportError: except ImportError:
# python 3 # python 3
from urllib.request import BaseHandler, HTTPCookieProcessor, Request, addinfourl, parse_keqv_list, parse_http_list, build_opener
from urllib.parse import quote
from urllib.parse import urlparse, urlunparse
import email import email
from urllib.parse import quote, urlparse, urlunparse
from urllib.request import (BaseHandler, HTTPCookieProcessor,
HTTPRedirectHandler, Request, addinfourl,
build_opener, parse_http_list, parse_keqv_list)
try: try:
# python 2 # python 2
@@ -131,6 +135,7 @@ def custom_opener(follow=None, delay=None):
handlers.append(SizeLimitHandler(500*1024)) # 500KiB handlers.append(SizeLimitHandler(500*1024)) # 500KiB
handlers.append(HTTPCookieProcessor()) handlers.append(HTTPCookieProcessor())
handlers.append(GZIPHandler()) handlers.append(GZIPHandler())
handlers.append(HTTPAllRedirectHandler())
handlers.append(HTTPEquivHandler()) handlers.append(HTTPEquivHandler())
handlers.append(HTTPRefreshHandler()) handlers.append(HTTPRefreshHandler())
handlers.append(UAHandler(random.choice(DEFAULT_UAS))) handlers.append(UAHandler(random.choice(DEFAULT_UAS)))
@@ -397,6 +402,11 @@ class HTTPEquivHandler(RespStrHandler):
resp.headers[meta.get('http-equiv').lower()] = meta.get('content') resp.headers[meta.get('http-equiv').lower()] = meta.get('content')
class HTTPAllRedirectHandler(HTTPRedirectHandler):
def http_error_308(self, req, fp, code, msg, headers):
return self.http_error_301(req, fp, 301, msg, headers)
class HTTPRefreshHandler(BaseHandler): class HTTPRefreshHandler(BaseHandler):
handler_order = 700 # HTTPErrorProcessor has a handler_order of 1000 handler_order = 700 # HTTPErrorProcessor has a handler_order of 1000
@@ -620,7 +630,7 @@ class BaseCache:
return True return True
import sqlite3 import sqlite3 # isort:skip
class SQLiteCache(BaseCache): class SQLiteCache(BaseCache):
@@ -657,7 +667,7 @@ class SQLiteCache(BaseCache):
self.con.execute('INSERT INTO data VALUES (?,?,?,?,?,?) ON CONFLICT(url) DO UPDATE SET code=?, msg=?, headers=?, data=?, timestamp=?', (url,) + value + value) self.con.execute('INSERT INTO data VALUES (?,?,?,?,?,?) ON CONFLICT(url) DO UPDATE SET code=?, msg=?, headers=?, data=?, timestamp=?', (url,) + value + value)
import pymysql.cursors import pymysql.cursors # isort:skip
class MySQLCacheHandler(BaseCache): class MySQLCacheHandler(BaseCache):

View File

@@ -15,35 +15,35 @@
# You should have received a copy of the GNU Affero General Public License along # You should have received a copy of the GNU Affero General Public License along
# with this program. If not, see <https://www.gnu.org/licenses/>. # with this program. If not, see <https://www.gnu.org/licenses/>.
import sys
import os.path import os.path
import sys
from datetime import datetime sys.path.append('/home/paul/Documents/Code/morss/lib')
import re
import json
import csv import csv
import json
import re
from copy import deepcopy
from datetime import datetime
from fnmatch import fnmatch from fnmatch import fnmatch
from lxml import etree
from dateutil import tz
import dateutil.parser import dateutil.parser
from copy import deepcopy
import lxml.html import lxml.html
from dateutil import tz
from lxml import etree
from .readabilite import parse as html_parse from .readabilite import parse as html_parse
json.encoder.c_make_encoder = None json.encoder.c_make_encoder = None
try: try:
# python 2 # python 2
from StringIO import StringIO
from ConfigParser import RawConfigParser from ConfigParser import RawConfigParser
from StringIO import StringIO
except ImportError: except ImportError:
# python 3 # python 3
from io import StringIO
from configparser import RawConfigParser from configparser import RawConfigParser
from io import StringIO
try: try:
# python 2 # python 2

View File

@@ -16,30 +16,25 @@
# with this program. If not, see <https://www.gnu.org/licenses/>. # with this program. If not, see <https://www.gnu.org/licenses/>.
import os import os
import re
import time import time
from datetime import datetime from datetime import datetime
from dateutil import tz
from fnmatch import fnmatch from fnmatch import fnmatch
import re
import lxml.etree import lxml.etree
import lxml.html import lxml.html
from dateutil import tz
from . import feeds from . import crawler, feeds, readabilite
from . import crawler
from . import readabilite
try: try:
# python 2 # python 2
from httplib import HTTPException from httplib import HTTPException
from urlparse import urlparse, urljoin, parse_qs from urlparse import parse_qs, urljoin, urlparse
except ImportError: except ImportError:
# python 3 # python 3
from http.client import HTTPException from http.client import HTTPException
from urllib.parse import urlparse, urljoin, parse_qs from urllib.parse import parse_qs, urljoin, urlparse
MAX_ITEM = int(os.getenv('MAX_ITEM', 5)) # cache-only beyond MAX_ITEM = int(os.getenv('MAX_ITEM', 5)) # cache-only beyond

View File

@@ -15,10 +15,11 @@
# You should have received a copy of the GNU Affero General Public License along # You should have received a copy of the GNU Affero General Public License along
# with this program. If not, see <https://www.gnu.org/licenses/>. # with this program. If not, see <https://www.gnu.org/licenses/>.
import re
import lxml.etree import lxml.etree
import lxml.html import lxml.html
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
import re
def parse(data, encoding=None): def parse(data, encoding=None):
@@ -352,6 +353,7 @@ def get_article(data, url=None, encoding_in=None, encoding_out='unicode', debug=
if __name__ == '__main__': if __name__ == '__main__':
import sys import sys
from . import crawler from . import crawler
req = crawler.adv_get(sys.argv[1] if len(sys.argv) > 1 else 'https://morss.it') req = crawler.adv_get(sys.argv[1] if len(sys.argv) > 1 else 'https://morss.it')

View File

@@ -15,16 +15,16 @@
# You should have received a copy of the GNU Affero General Public License along # You should have received a copy of the GNU Affero General Public License along
# with this program. If not, see <https://www.gnu.org/licenses/>. # with this program. If not, see <https://www.gnu.org/licenses/>.
import sys import cgitb
import mimetypes
import os.path import os.path
import re import re
import lxml.etree import sys
import cgitb
import wsgiref.util
import wsgiref.simple_server
import wsgiref.handlers import wsgiref.handlers
import mimetypes import wsgiref.simple_server
import wsgiref.util
import lxml.etree
try: try:
# python 2 # python 2
@@ -33,11 +33,9 @@ except ImportError:
# python 3 # python 3
from urllib.parse import unquote from urllib.parse import unquote
from . import crawler from . import crawler, readabilite
from . import readabilite from .morss import (DELAY, TIMEOUT, FeedFetch, FeedFormat, FeedGather,
from .morss import FeedFetch, FeedGather, FeedFormat MorssException, Options, log)
from .morss import Options, log, TIMEOUT, DELAY, MorssException
PORT = int(os.getenv('PORT', 8080)) PORT = int(os.getenv('PORT', 8080))

View File

@@ -1,6 +1,7 @@
from setuptools import setup
from glob import glob from glob import glob
from setuptools import setup
package_name = 'morss' package_name = 'morss'
setup( setup(