Move iTunes code to feedify.py

master
pictuga 2017-03-18 23:41:37 -10:00
parent d4937812a8
commit 0b6e553054
2 changed files with 15 additions and 11 deletions

View File

@ -15,10 +15,12 @@ try:
from ConfigParser import ConfigParser from ConfigParser import ConfigParser
from urlparse import urlparse, urljoin from urlparse import urlparse, urljoin
from urllib2 import urlopen from urllib2 import urlopen
from httplib import HTTPException
except ImportError: except ImportError:
from configparser import ConfigParser from configparser import ConfigParser
from urllib.parse import urlparse, urljoin from urllib.parse import urlparse, urljoin
from urllib.request import urlopen from urllib.request import urlopen
from http.client import HTTPException
try: try:
basestring basestring
@ -95,12 +97,20 @@ def format_string(string, getter, error=False):
def pre_worker(url): def pre_worker(url):
if urlparse(url).netloc == 'itunes.apple.com': if url.startswith('http://itunes.apple.com/') or url.startswith('https://itunes.apple.com/'):
match = re.search('/id([0-9]+)(\?.*)?$', url) match = re.search('/id([0-9]+)(\?.*)?$', url)
if match: if match:
iid = match.groups()[0] iid = match.groups()[0]
redirect = 'https://itunes.apple.com/lookup?id={id}'.format(id=iid) redirect = 'https://itunes.apple.com/lookup?id=%s' % iid
return redirect
try:
con = crawler.custom_handler(basic=True).open(redirect, timeout=4)
data = con.read()
except (IOError, HTTPException):
raise
return json.loads(data.decode('utf-8', 'replace'))['results'][0]['feedUrl']
return None return None

View File

@ -7,7 +7,6 @@ import threading
from fnmatch import fnmatch from fnmatch import fnmatch
import re import re
import json
import lxml.etree import lxml.etree
import lxml.html import lxml.html
@ -335,7 +334,7 @@ def FeedFetch(url, options):
if isinstance(url, bytes): if isinstance(url, bytes):
url = url.decode() url = url.decode()
# do some useful facebook work # allow for code execution for feedify
pre = feedify.pre_worker(url) pre = feedify.pre_worker(url)
if pre: if pre:
url = pre url = pre
@ -357,12 +356,7 @@ def FeedFetch(url, options):
contenttype = con.info().get('Content-Type', '').split(';')[0] contenttype = con.info().get('Content-Type', '').split(';')[0]
if url.startswith('https://itunes.apple.com/lookup?id='): if re.match(b'\s*<?xml', xml) is not None or contenttype in crawler.MIMETYPE['xml']:
link = json.loads(xml.decode('utf-8', 'replace'))['results'][0]['feedUrl']
log('itunes redirect: %s' % link)
return FeedFetch(link, options)
elif re.match(b'\s*<?xml', xml) is not None or contenttype in crawler.MIMETYPE['xml']:
rss = feeds.parse(xml) rss = feeds.parse(xml)
elif feedify.supported(url): elif feedify.supported(url):