]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/internetvideoarchive.py
PEP8: applied even more rules
[yt-dlp.git] / youtube_dl / extractor / internetvideoarchive.py
CommitLineData
9e1e67fc
PH
1from __future__ import unicode_literals
2
d7e66d39 3import re
d7e66d39
JMF
4
5from .common import InfoExtractor
6from ..utils import (
7 compat_urlparse,
4b7b839f 8 compat_urllib_parse,
d7e66d39 9 xpath_with_ns,
d7e66d39
JMF
10)
11
12
13class InternetVideoArchiveIE(InfoExtractor):
14 _VALID_URL = r'https?://video\.internetvideoarchive\.net/flash/players/.*?\?.*?publishedid.*?'
15
16 _TEST = {
9e1e67fc
PH
17 'url': 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?customerid=69249&publishedid=452693&playerid=247',
18 'info_dict': {
19 'id': '452693',
20 'ext': 'mp4',
21 'title': 'SKYFALL',
22 'description': 'In SKYFALL, Bond\'s loyalty to M is tested as her past comes back to haunt her. As MI6 comes under attack, 007 must track down and destroy the threat, no matter how personal the cost.',
72d53356 23 'duration': 149,
d7e66d39
JMF
24 },
25 }
26
27 @staticmethod
28 def _build_url(query):
29 return 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?' + query
30
4b7b839f
JMF
31 @staticmethod
32 def _clean_query(query):
33 NEEDED_ARGS = ['publishedid', 'customerid']
34 query_dic = compat_urlparse.parse_qs(query)
5f6a1245 35 cleaned_dic = dict((k, v[0]) for (k, v) in query_dic.items() if k in NEEDED_ARGS)
4b7b839f
JMF
36 # Other player ids return m3u8 urls
37 cleaned_dic['playerid'] = '247'
38 cleaned_dic['videokbrate'] = '100000'
39 return compat_urllib_parse.urlencode(cleaned_dic)
40
d7e66d39
JMF
41 def _real_extract(self, url):
42 query = compat_urlparse.urlparse(url).query
43 query_dic = compat_urlparse.parse_qs(query)
44 video_id = query_dic['publishedid'][0]
45 url = self._build_url(query)
46
e26f8712 47 flashconfiguration = self._download_xml(url, video_id,
9e1a5b84 48 'Downloading flash configuration')
d7e66d39
JMF
49 file_url = flashconfiguration.find('file').text
50 file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx')
4b7b839f
JMF
51 # Replace some of the parameters in the query to get the best quality
52 # and http links (no m3u8 manifests)
53 file_url = re.sub(r'(?<=\?)(.+)$',
9e1a5b84
JW
54 lambda m: self._clean_query(m.group()),
55 file_url)
e26f8712 56 info = self._download_xml(file_url, video_id,
9e1a5b84 57 'Downloading video info')
d7e66d39
JMF
58 item = info.find('channel/item')
59
60 def _bp(p):
9e1a5b84
JW
61 return xpath_with_ns(
62 p,
63 {
64 'media': 'http://search.yahoo.com/mrss/',
65 'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats',
66 }
67 )
d7e66d39
JMF
68 formats = []
69 for content in item.findall(_bp('media:group/media:content')):
70 attr = content.attrib
71 f_url = attr['url']
12c97873
PH
72 width = int(attr['width'])
73 bitrate = int(attr['bitrate'])
74 format_id = '%d-%dk' % (width, bitrate)
d7e66d39 75 formats.append({
12c97873 76 'format_id': format_id,
d7e66d39 77 'url': f_url,
12c97873
PH
78 'width': width,
79 'tbr': bitrate,
d7e66d39 80 })
12c97873
PH
81
82 self._sort_formats(formats)
d7e66d39 83
cbbd9a9c 84 return {
d7e66d39
JMF
85 'id': video_id,
86 'title': item.find('title').text,
87 'formats': formats,
88 'thumbnail': item.find(_bp('media:thumbnail')).attrib['url'],
89 'description': item.find('description').text,
90 'duration': int(attr['duration']),
91 }