]>
Commit | Line | Data |
---|---|---|
9e1e67fc PH |
1 | from __future__ import unicode_literals |
2 | ||
d7e66d39 | 3 | import re |
d7e66d39 JMF |
4 | |
5 | from .common import InfoExtractor | |
1cc79574 | 6 | from ..compat import ( |
d7e66d39 | 7 | compat_urlparse, |
4b7b839f | 8 | compat_urllib_parse, |
1cc79574 PH |
9 | ) |
10 | from ..utils import ( | |
d7e66d39 | 11 | xpath_with_ns, |
d7e66d39 JMF |
12 | ) |
13 | ||
14 | ||
15 | class InternetVideoArchiveIE(InfoExtractor): | |
16 | _VALID_URL = r'https?://video\.internetvideoarchive\.net/flash/players/.*?\?.*?publishedid.*?' | |
17 | ||
18 | _TEST = { | |
9e1e67fc PH |
19 | 'url': 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?customerid=69249&publishedid=452693&playerid=247', |
20 | 'info_dict': { | |
21 | 'id': '452693', | |
22 | 'ext': 'mp4', | |
23 | 'title': 'SKYFALL', | |
24 | 'description': 'In SKYFALL, Bond\'s loyalty to M is tested as her past comes back to haunt her. As MI6 comes under attack, 007 must track down and destroy the threat, no matter how personal the cost.', | |
85d253af | 25 | 'duration': 152, |
d7e66d39 JMF |
26 | }, |
27 | } | |
28 | ||
29 | @staticmethod | |
30 | def _build_url(query): | |
31 | return 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?' + query | |
32 | ||
4b7b839f JMF |
33 | @staticmethod |
34 | def _clean_query(query): | |
35 | NEEDED_ARGS = ['publishedid', 'customerid'] | |
36 | query_dic = compat_urlparse.parse_qs(query) | |
5f6a1245 | 37 | cleaned_dic = dict((k, v[0]) for (k, v) in query_dic.items() if k in NEEDED_ARGS) |
4b7b839f JMF |
38 | # Other player ids return m3u8 urls |
39 | cleaned_dic['playerid'] = '247' | |
40 | cleaned_dic['videokbrate'] = '100000' | |
41 | return compat_urllib_parse.urlencode(cleaned_dic) | |
42 | ||
d7e66d39 JMF |
43 | def _real_extract(self, url): |
44 | query = compat_urlparse.urlparse(url).query | |
45 | query_dic = compat_urlparse.parse_qs(query) | |
46 | video_id = query_dic['publishedid'][0] | |
47 | url = self._build_url(query) | |
48 | ||
e26f8712 | 49 | flashconfiguration = self._download_xml(url, video_id, |
9e1a5b84 | 50 | 'Downloading flash configuration') |
d7e66d39 JMF |
51 | file_url = flashconfiguration.find('file').text |
52 | file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx') | |
4b7b839f JMF |
53 | # Replace some of the parameters in the query to get the best quality |
54 | # and http links (no m3u8 manifests) | |
55 | file_url = re.sub(r'(?<=\?)(.+)$', | |
9e1a5b84 JW |
56 | lambda m: self._clean_query(m.group()), |
57 | file_url) | |
e26f8712 | 58 | info = self._download_xml(file_url, video_id, |
9e1a5b84 | 59 | 'Downloading video info') |
d7e66d39 JMF |
60 | item = info.find('channel/item') |
61 | ||
62 | def _bp(p): | |
9e1a5b84 JW |
63 | return xpath_with_ns( |
64 | p, | |
65 | { | |
66 | 'media': 'http://search.yahoo.com/mrss/', | |
67 | 'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats', | |
68 | } | |
69 | ) | |
d7e66d39 JMF |
70 | formats = [] |
71 | for content in item.findall(_bp('media:group/media:content')): | |
72 | attr = content.attrib | |
73 | f_url = attr['url'] | |
12c97873 PH |
74 | width = int(attr['width']) |
75 | bitrate = int(attr['bitrate']) | |
76 | format_id = '%d-%dk' % (width, bitrate) | |
d7e66d39 | 77 | formats.append({ |
12c97873 | 78 | 'format_id': format_id, |
d7e66d39 | 79 | 'url': f_url, |
12c97873 PH |
80 | 'width': width, |
81 | 'tbr': bitrate, | |
d7e66d39 | 82 | }) |
12c97873 PH |
83 | |
84 | self._sort_formats(formats) | |
d7e66d39 | 85 | |
cbbd9a9c | 86 | return { |
d7e66d39 JMF |
87 | 'id': video_id, |
88 | 'title': item.find('title').text, | |
89 | 'formats': formats, | |
90 | 'thumbnail': item.find(_bp('media:thumbnail')).attrib['url'], | |
91 | 'description': item.find('description').text, | |
92 | 'duration': int(attr['duration']), | |
93 | } |