Compare commits

...

11 Commits

Author SHA1 Message Date
046f3f9f3d crawler: support 308 redirects
All checks were successful
continuous-integration/drone/push Build is passing
2021-09-11 11:34:16 +02:00
db8e046eae ci: add spell check dict
All checks were successful
continuous-integration/drone/push Build is passing
2021-09-08 22:21:49 +02:00
b4b1e93289 ci: fix spell check
Some checks failed
continuous-integration/drone/push Build is failing
2021-09-08 22:15:53 +02:00
8a329fbb6d ci: fix pylint install
Some checks failed
continuous-integration/drone/push Build is failing
2021-09-08 22:09:56 +02:00
77159b99ca drone: use alpine image (to benefit from pkgs)
Some checks failed
continuous-integration/drone/push Build is failing
2021-09-08 22:07:21 +02:00
c158e65192 ci: added pylint (triggered upon error w/ score < 8 only)
Some checks failed
continuous-integration/drone/push Build is failing
2021-09-08 22:01:49 +02:00
0d64964a02 README: add ci badge 2021-09-08 21:39:12 +02:00
e8271ae9a0 ci/cd: fix isort args
All checks were successful
continuous-integration/drone/push Build is passing
2021-09-08 21:31:42 +02:00
2abe061422 Further isort implementation
All checks were successful
continuous-integration/drone/push Build is passing
2021-09-08 21:29:56 +02:00
57bd94d42f ci/cd attempt
Some checks failed
continuous-integration/drone/push Build is failing
2021-09-08 21:24:50 +02:00
69cdf05341 Apply isort 2021-09-08 20:54:34 +02:00
12 changed files with 131 additions and 58 deletions

15
.drone.yml Normal file
View File

@@ -0,0 +1,15 @@
kind: pipeline
name: default
steps:
- name: isort
image: python:alpine
commands:
- pip install isort
- isort --check-only --diff .
- name: pylint
image: alpine
commands:
- apk add --no-cache python3 py3-lxml py3-pip py3-wheel py3-pylint py3-enchant hunspell-en
- pip3 install --no-cache-dir .
- pylint morss --rcfile=.pylintrc --disable=C,R,W --fail-under=8

50
.pylintrc Normal file
View File

@@ -0,0 +1,50 @@
[MASTER]
ignore=CVS
suggestion-mode=yes
extension-pkg-allow-list=lxml.etree
[MESSAGES CONTROL]
disable=missing-function-docstring,
missing-class-docstring,
missing-module-docstring,
wrong-spelling-in-comment,
[REPORTS]
reports=yes
score=yes
[SPELLING]
spelling-dict=en_GB
spelling-ignore-words=morss
[STRING]
check-quote-consistency=yes
check-str-concat-over-line-jumps=yes
[VARIABLES]
allow-global-unused-variables=no
init-import=no
[FORMAT]
expected-line-ending-format=LF
indent-string=' '
max-line-length=120
max-module-lines=1000
[BASIC]
argument-naming-style=snake_case
attr-naming-style=snake_case
class-attribute-naming-style=snake_case
class-const-naming-style=UPPER_CASE
class-naming-style=PascalCase
const-naming-style=UPPER_CASE
function-naming-style=snake_case
inlinevar-naming-style=snake_case
method-naming-style=snake_case
module-naming-style=snake_case
variable-naming-style=snake_case
include-naming-hint=yes
bad-names=foo, bar
good-names=i, j, k

View File

@@ -1,5 +1,7 @@
# Morss - Get full-text RSS feeds
[![Build Status](https://ci.pictuga.com/api/badges/pictuga/morss/status.svg)](https://ci.pictuga.com/pictuga/morss)
_GNU AGPLv3 code_
_Provided logo is CC BY-NC-SA 4.0_

View File

@@ -16,5 +16,8 @@
# with this program. If not, see <https://www.gnu.org/licenses/>.
# ran on `import morss`
# pylint: disable=unused-import,unused-variable
from .morss import *
from .wsgi import application

View File

@@ -20,9 +20,7 @@
import os
import sys
from . import wsgi
from . import cli
from . import cli, wsgi
from .morss import MorssException

View File

@@ -15,12 +15,11 @@
# You should have received a copy of the GNU Affero General Public License along
# with this program. If not, see <https://www.gnu.org/licenses/>.
import sys
import os.path
import argparse
import os.path
import sys
from .morss import FeedFetch, FeedGather, FeedFormat
from .morss import Options
from .morss import FeedFetch, FeedFormat, FeedGather, Options
def cli_app():

View File

@@ -16,30 +16,34 @@
# with this program. If not, see <https://www.gnu.org/licenses/>.
import os
import sys
import zlib
from io import BytesIO, StringIO
import re
import chardet
from cgi import parse_header
import time
import threading
import random
import re
import sys
import threading
import time
import zlib
from cgi import parse_header
from collections import OrderedDict
from io import BytesIO, StringIO
import chardet
try:
# python 2
from urllib2 import BaseHandler, HTTPCookieProcessor, Request, addinfourl, parse_keqv_list, parse_http_list, build_opener
from urllib import quote
from urlparse import urlparse, urlunparse
import mimetools
from urllib2 import (BaseHandler, HTTPCookieProcessor, HTTPRedirectHandler,
Request, addinfourl, build_opener, parse_http_list,
parse_keqv_list)
from urlparse import urlparse, urlunparse
except ImportError:
# python 3
from urllib.request import BaseHandler, HTTPCookieProcessor, Request, addinfourl, parse_keqv_list, parse_http_list, build_opener
from urllib.parse import quote
from urllib.parse import urlparse, urlunparse
import email
from urllib.parse import quote, urlparse, urlunparse
from urllib.request import (BaseHandler, HTTPCookieProcessor,
HTTPRedirectHandler, Request, addinfourl,
build_opener, parse_http_list, parse_keqv_list)
try:
# python 2
@@ -131,6 +135,7 @@ def custom_opener(follow=None, delay=None):
handlers.append(SizeLimitHandler(500*1024)) # 500KiB
handlers.append(HTTPCookieProcessor())
handlers.append(GZIPHandler())
handlers.append(HTTPAllRedirectHandler())
handlers.append(HTTPEquivHandler())
handlers.append(HTTPRefreshHandler())
handlers.append(UAHandler(random.choice(DEFAULT_UAS)))
@@ -397,6 +402,11 @@ class HTTPEquivHandler(RespStrHandler):
resp.headers[meta.get('http-equiv').lower()] = meta.get('content')
class HTTPAllRedirectHandler(HTTPRedirectHandler):
def http_error_308(self, req, fp, code, msg, headers):
return self.http_error_301(req, fp, 301, msg, headers)
class HTTPRefreshHandler(BaseHandler):
handler_order = 700 # HTTPErrorProcessor has a handler_order of 1000
@@ -620,7 +630,7 @@ class BaseCache:
return True
import sqlite3
import sqlite3 # isort:skip
class SQLiteCache(BaseCache):
@@ -657,7 +667,7 @@ class SQLiteCache(BaseCache):
self.con.execute('INSERT INTO data VALUES (?,?,?,?,?,?) ON CONFLICT(url) DO UPDATE SET code=?, msg=?, headers=?, data=?, timestamp=?', (url,) + value + value)
import pymysql.cursors
import pymysql.cursors # isort:skip
class MySQLCacheHandler(BaseCache):

View File

@@ -15,35 +15,35 @@
# You should have received a copy of the GNU Affero General Public License along
# with this program. If not, see <https://www.gnu.org/licenses/>.
import sys
import os.path
import sys
from datetime import datetime
sys.path.append('/home/paul/Documents/Code/morss/lib')
import re
import json
import csv
import json
import re
from copy import deepcopy
from datetime import datetime
from fnmatch import fnmatch
from lxml import etree
from dateutil import tz
import dateutil.parser
from copy import deepcopy
import lxml.html
from dateutil import tz
from lxml import etree
from .readabilite import parse as html_parse
json.encoder.c_make_encoder = None
try:
# python 2
from StringIO import StringIO
from ConfigParser import RawConfigParser
from StringIO import StringIO
except ImportError:
# python 3
from io import StringIO
from configparser import RawConfigParser
from io import StringIO
try:
# python 2

View File

@@ -16,30 +16,25 @@
# with this program. If not, see <https://www.gnu.org/licenses/>.
import os
import re
import time
from datetime import datetime
from dateutil import tz
from fnmatch import fnmatch
import re
import lxml.etree
import lxml.html
from dateutil import tz
from . import feeds
from . import crawler
from . import readabilite
from . import crawler, feeds, readabilite
try:
# python 2
from httplib import HTTPException
from urlparse import urlparse, urljoin, parse_qs
from urlparse import parse_qs, urljoin, urlparse
except ImportError:
# python 3
from http.client import HTTPException
from urllib.parse import urlparse, urljoin, parse_qs
from urllib.parse import parse_qs, urljoin, urlparse
MAX_ITEM = int(os.getenv('MAX_ITEM', 5)) # cache-only beyond

View File

@@ -15,10 +15,11 @@
# You should have received a copy of the GNU Affero General Public License along
# with this program. If not, see <https://www.gnu.org/licenses/>.
import re
import lxml.etree
import lxml.html
from bs4 import BeautifulSoup
import re
def parse(data, encoding=None):
@@ -352,6 +353,7 @@ def get_article(data, url=None, encoding_in=None, encoding_out='unicode', debug=
if __name__ == '__main__':
import sys
from . import crawler
req = crawler.adv_get(sys.argv[1] if len(sys.argv) > 1 else 'https://morss.it')

View File

@@ -15,16 +15,16 @@
# You should have received a copy of the GNU Affero General Public License along
# with this program. If not, see <https://www.gnu.org/licenses/>.
import sys
import cgitb
import mimetypes
import os.path
import re
import lxml.etree
import cgitb
import wsgiref.util
import wsgiref.simple_server
import sys
import wsgiref.handlers
import mimetypes
import wsgiref.simple_server
import wsgiref.util
import lxml.etree
try:
# python 2
@@ -33,11 +33,9 @@ except ImportError:
# python 3
from urllib.parse import unquote
from . import crawler
from . import readabilite
from .morss import FeedFetch, FeedGather, FeedFormat
from .morss import Options, log, TIMEOUT, DELAY, MorssException
from . import crawler, readabilite
from .morss import (DELAY, TIMEOUT, FeedFetch, FeedFormat, FeedGather,
MorssException, Options, log)
PORT = int(os.getenv('PORT', 8080))

View File

@@ -1,6 +1,7 @@
from setuptools import setup
from glob import glob
from setuptools import setup
package_name = 'morss'
setup(