Fn to access data_files & pkg files
parent
11bc9f643e
commit
51f1d330a4
|
@ -17,9 +17,7 @@
|
||||||
|
|
||||||
import csv
|
import csv
|
||||||
import json
|
import json
|
||||||
import os.path
|
|
||||||
import re
|
import re
|
||||||
import sys
|
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from fnmatch import fnmatch
|
from fnmatch import fnmatch
|
||||||
|
@ -30,6 +28,7 @@ from dateutil import tz
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
|
||||||
from .readabilite import parse as html_parse
|
from .readabilite import parse as html_parse
|
||||||
|
from .util import *
|
||||||
|
|
||||||
json.encoder.c_make_encoder = None
|
json.encoder.c_make_encoder = None
|
||||||
|
|
||||||
|
@ -52,7 +51,7 @@ except NameError:
|
||||||
|
|
||||||
def parse_rules(filename=None):
|
def parse_rules(filename=None):
|
||||||
if not filename:
|
if not filename:
|
||||||
filename = os.path.join(os.path.dirname(__file__), 'feedify.ini')
|
filename = pkg_path('feedify.ini')
|
||||||
|
|
||||||
config = RawConfigParser()
|
config = RawConfigParser()
|
||||||
config.read(filename)
|
config.read(filename)
|
||||||
|
@ -66,18 +65,9 @@ def parse_rules(filename=None):
|
||||||
# for each rule
|
# for each rule
|
||||||
|
|
||||||
if rules[section][arg].startswith('file:'):
|
if rules[section][arg].startswith('file:'):
|
||||||
paths = [os.path.join(sys.prefix, 'share/morss/www', rules[section][arg][5:]),
|
file_raw = open(data_path(rules[section][arg][5:])).read()
|
||||||
os.path.join(os.path.dirname(__file__), '../www', rules[section][arg][5:]),
|
file_clean = re.sub('<[/?]?(xsl|xml)[^>]+?>', '', file_raw)
|
||||||
os.path.join(os.path.dirname(__file__), '../..', rules[section][arg][5:])]
|
rules[section][arg] = file_clean
|
||||||
|
|
||||||
for path in paths:
|
|
||||||
try:
|
|
||||||
file_raw = open(path).read()
|
|
||||||
file_clean = re.sub('<[/?]?(xsl|xml)[^>]+?>', '', file_raw)
|
|
||||||
rules[section][arg] = file_clean
|
|
||||||
|
|
||||||
except IOError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
elif '\n' in rules[section][arg]:
|
elif '\n' in rules[section][arg]:
|
||||||
rules[section][arg] = rules[section][arg].split('\n')[1:]
|
rules[section][arg] = rules[section][arg].split('\n')[1:]
|
||||||
|
@ -810,6 +800,8 @@ class FeedJSON(Feed, ParserJSON):
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
import sys
|
||||||
|
|
||||||
from . import crawler
|
from . import crawler
|
||||||
|
|
||||||
req = crawler.adv_get(sys.argv[1] if len(sys.argv) > 1 else 'https://www.nytimes.com/', follow='rss')
|
req = crawler.adv_get(sys.argv[1] if len(sys.argv) > 1 else 'https://www.nytimes.com/', follow='rss')
|
||||||
|
|
|
@ -0,0 +1,50 @@
|
||||||
|
# This file is part of morss
|
||||||
|
#
|
||||||
|
# Copyright (C) 2013-2020 pictuga <contact@pictuga.com>
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify it under
|
||||||
|
# the terms of the GNU Affero General Public License as published by the Free
|
||||||
|
# Software Foundation, either version 3 of the License, or (at your option) any
|
||||||
|
# later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
|
||||||
|
# details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License along
|
||||||
|
# with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
import os.path
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
def pkg_path(path=''):
|
||||||
|
return os.path.join(os.path.dirname(__file__), path)
|
||||||
|
|
||||||
|
|
||||||
|
data_path_base = None
|
||||||
|
|
||||||
|
|
||||||
|
def data_path(path=''):
|
||||||
|
global data_path_base
|
||||||
|
|
||||||
|
if data_path_base is not None:
|
||||||
|
return os.path.join(data_path_base, path)
|
||||||
|
|
||||||
|
bases = [
|
||||||
|
os.path.join(sys.prefix, 'share/morss/www'),
|
||||||
|
os.path.join(pkg_path(), './../../../../share/morss/www'),
|
||||||
|
os.path.join(pkg_path(), '../www'),
|
||||||
|
os.path.join(pkg_path(), '../..')
|
||||||
|
]
|
||||||
|
|
||||||
|
for base in bases:
|
||||||
|
full_path = os.path.join(base, path)
|
||||||
|
|
||||||
|
if os.path.isfile(full_path):
|
||||||
|
data_path_base = base
|
||||||
|
return data_path(path)
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise IOError()
|
Loading…
Reference in New Issue