Move iTunes code to feedify.py
parent
d4937812a8
commit
0b6e553054
|
@ -15,10 +15,12 @@ try:
|
|||
from ConfigParser import ConfigParser
|
||||
from urlparse import urlparse, urljoin
|
||||
from urllib2 import urlopen
|
||||
from httplib import HTTPException
|
||||
except ImportError:
|
||||
from configparser import ConfigParser
|
||||
from urllib.parse import urlparse, urljoin
|
||||
from urllib.request import urlopen
|
||||
from http.client import HTTPException
|
||||
|
||||
try:
|
||||
basestring
|
||||
|
@ -95,12 +97,20 @@ def format_string(string, getter, error=False):
|
|||
|
||||
|
||||
def pre_worker(url):
|
||||
if urlparse(url).netloc == 'itunes.apple.com':
|
||||
if url.startswith('http://itunes.apple.com/') or url.startswith('https://itunes.apple.com/'):
|
||||
match = re.search('/id([0-9]+)(\?.*)?$', url)
|
||||
if match:
|
||||
iid = match.groups()[0]
|
||||
redirect = 'https://itunes.apple.com/lookup?id={id}'.format(id=iid)
|
||||
return redirect
|
||||
redirect = 'https://itunes.apple.com/lookup?id=%s' % iid
|
||||
|
||||
try:
|
||||
con = crawler.custom_handler(basic=True).open(redirect, timeout=4)
|
||||
data = con.read()
|
||||
|
||||
except (IOError, HTTPException):
|
||||
raise
|
||||
|
||||
return json.loads(data.decode('utf-8', 'replace'))['results'][0]['feedUrl']
|
||||
|
||||
return None
|
||||
|
||||
|
|
|
@ -7,7 +7,6 @@ import threading
|
|||
|
||||
from fnmatch import fnmatch
|
||||
import re
|
||||
import json
|
||||
|
||||
import lxml.etree
|
||||
import lxml.html
|
||||
|
@ -335,7 +334,7 @@ def FeedFetch(url, options):
|
|||
if isinstance(url, bytes):
|
||||
url = url.decode()
|
||||
|
||||
# do some useful facebook work
|
||||
# allow for code execution for feedify
|
||||
pre = feedify.pre_worker(url)
|
||||
if pre:
|
||||
url = pre
|
||||
|
@ -357,12 +356,7 @@ def FeedFetch(url, options):
|
|||
|
||||
contenttype = con.info().get('Content-Type', '').split(';')[0]
|
||||
|
||||
if url.startswith('https://itunes.apple.com/lookup?id='):
|
||||
link = json.loads(xml.decode('utf-8', 'replace'))['results'][0]['feedUrl']
|
||||
log('itunes redirect: %s' % link)
|
||||
return FeedFetch(link, options)
|
||||
|
||||
elif re.match(b'\s*<?xml', xml) is not None or contenttype in crawler.MIMETYPE['xml']:
|
||||
if re.match(b'\s*<?xml', xml) is not None or contenttype in crawler.MIMETYPE['xml']:
|
||||
rss = feeds.parse(xml)
|
||||
|
||||
elif feedify.supported(url):
|
||||
|
|
Loading…
Reference in New Issue