Move iTunes code to feedify.py
parent
d4937812a8
commit
0b6e553054
|
@ -15,10 +15,12 @@ try:
|
||||||
from ConfigParser import ConfigParser
|
from ConfigParser import ConfigParser
|
||||||
from urlparse import urlparse, urljoin
|
from urlparse import urlparse, urljoin
|
||||||
from urllib2 import urlopen
|
from urllib2 import urlopen
|
||||||
|
from httplib import HTTPException
|
||||||
except ImportError:
|
except ImportError:
|
||||||
from configparser import ConfigParser
|
from configparser import ConfigParser
|
||||||
from urllib.parse import urlparse, urljoin
|
from urllib.parse import urlparse, urljoin
|
||||||
from urllib.request import urlopen
|
from urllib.request import urlopen
|
||||||
|
from http.client import HTTPException
|
||||||
|
|
||||||
try:
|
try:
|
||||||
basestring
|
basestring
|
||||||
|
@ -95,12 +97,20 @@ def format_string(string, getter, error=False):
|
||||||
|
|
||||||
|
|
||||||
def pre_worker(url):
|
def pre_worker(url):
|
||||||
if urlparse(url).netloc == 'itunes.apple.com':
|
if url.startswith('http://itunes.apple.com/') or url.startswith('https://itunes.apple.com/'):
|
||||||
match = re.search('/id([0-9]+)(\?.*)?$', url)
|
match = re.search('/id([0-9]+)(\?.*)?$', url)
|
||||||
if match:
|
if match:
|
||||||
iid = match.groups()[0]
|
iid = match.groups()[0]
|
||||||
redirect = 'https://itunes.apple.com/lookup?id={id}'.format(id=iid)
|
redirect = 'https://itunes.apple.com/lookup?id=%s' % iid
|
||||||
return redirect
|
|
||||||
|
try:
|
||||||
|
con = crawler.custom_handler(basic=True).open(redirect, timeout=4)
|
||||||
|
data = con.read()
|
||||||
|
|
||||||
|
except (IOError, HTTPException):
|
||||||
|
raise
|
||||||
|
|
||||||
|
return json.loads(data.decode('utf-8', 'replace'))['results'][0]['feedUrl']
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
|
@ -7,7 +7,6 @@ import threading
|
||||||
|
|
||||||
from fnmatch import fnmatch
|
from fnmatch import fnmatch
|
||||||
import re
|
import re
|
||||||
import json
|
|
||||||
|
|
||||||
import lxml.etree
|
import lxml.etree
|
||||||
import lxml.html
|
import lxml.html
|
||||||
|
@ -335,7 +334,7 @@ def FeedFetch(url, options):
|
||||||
if isinstance(url, bytes):
|
if isinstance(url, bytes):
|
||||||
url = url.decode()
|
url = url.decode()
|
||||||
|
|
||||||
# do some useful facebook work
|
# allow for code execution for feedify
|
||||||
pre = feedify.pre_worker(url)
|
pre = feedify.pre_worker(url)
|
||||||
if pre:
|
if pre:
|
||||||
url = pre
|
url = pre
|
||||||
|
@ -357,12 +356,7 @@ def FeedFetch(url, options):
|
||||||
|
|
||||||
contenttype = con.info().get('Content-Type', '').split(';')[0]
|
contenttype = con.info().get('Content-Type', '').split(';')[0]
|
||||||
|
|
||||||
if url.startswith('https://itunes.apple.com/lookup?id='):
|
if re.match(b'\s*<?xml', xml) is not None or contenttype in crawler.MIMETYPE['xml']:
|
||||||
link = json.loads(xml.decode('utf-8', 'replace'))['results'][0]['feedUrl']
|
|
||||||
log('itunes redirect: %s' % link)
|
|
||||||
return FeedFetch(link, options)
|
|
||||||
|
|
||||||
elif re.match(b'\s*<?xml', xml) is not None or contenttype in crawler.MIMETYPE['xml']:
|
|
||||||
rss = feeds.parse(xml)
|
rss = feeds.parse(xml)
|
||||||
|
|
||||||
elif feedify.supported(url):
|
elif feedify.supported(url):
|
||||||
|
|
Loading…
Reference in New Issue