]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/internetvideoarchive.py
Merge pull request #8061 from dstftw/introduce-chapter-and-series-fields
[yt-dlp.git] / youtube_dl / extractor / internetvideoarchive.py
CommitLineData
9e1e67fc
PH
1from __future__ import unicode_literals
2
d7e66d39 3import re
d7e66d39
JMF
4
5from .common import InfoExtractor
1cc79574 6from ..compat import (
d7e66d39 7 compat_urlparse,
4b7b839f 8 compat_urllib_parse,
1cc79574
PH
9)
10from ..utils import (
d7e66d39 11 xpath_with_ns,
d7e66d39
JMF
12)
13
14
15class InternetVideoArchiveIE(InfoExtractor):
16 _VALID_URL = r'https?://video\.internetvideoarchive\.net/flash/players/.*?\?.*?publishedid.*?'
17
18 _TEST = {
9e1e67fc
PH
19 'url': 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?customerid=69249&publishedid=452693&playerid=247',
20 'info_dict': {
21 'id': '452693',
22 'ext': 'mp4',
23 'title': 'SKYFALL',
24 'description': 'In SKYFALL, Bond\'s loyalty to M is tested as her past comes back to haunt her. As MI6 comes under attack, 007 must track down and destroy the threat, no matter how personal the cost.',
85d253af 25 'duration': 152,
d7e66d39
JMF
26 },
27 }
28
29 @staticmethod
30 def _build_url(query):
31 return 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?' + query
32
4b7b839f
JMF
33 @staticmethod
34 def _clean_query(query):
35 NEEDED_ARGS = ['publishedid', 'customerid']
36 query_dic = compat_urlparse.parse_qs(query)
5f6a1245 37 cleaned_dic = dict((k, v[0]) for (k, v) in query_dic.items() if k in NEEDED_ARGS)
4b7b839f
JMF
38 # Other player ids return m3u8 urls
39 cleaned_dic['playerid'] = '247'
40 cleaned_dic['videokbrate'] = '100000'
41 return compat_urllib_parse.urlencode(cleaned_dic)
42
d7e66d39
JMF
43 def _real_extract(self, url):
44 query = compat_urlparse.urlparse(url).query
45 query_dic = compat_urlparse.parse_qs(query)
46 video_id = query_dic['publishedid'][0]
47 url = self._build_url(query)
48
e26f8712 49 flashconfiguration = self._download_xml(url, video_id,
9e1a5b84 50 'Downloading flash configuration')
d7e66d39
JMF
51 file_url = flashconfiguration.find('file').text
52 file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx')
4b7b839f
JMF
53 # Replace some of the parameters in the query to get the best quality
54 # and http links (no m3u8 manifests)
55 file_url = re.sub(r'(?<=\?)(.+)$',
9e1a5b84
JW
56 lambda m: self._clean_query(m.group()),
57 file_url)
e26f8712 58 info = self._download_xml(file_url, video_id,
9e1a5b84 59 'Downloading video info')
d7e66d39
JMF
60 item = info.find('channel/item')
61
62 def _bp(p):
9e1a5b84
JW
63 return xpath_with_ns(
64 p,
65 {
66 'media': 'http://search.yahoo.com/mrss/',
67 'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats',
68 }
69 )
d7e66d39
JMF
70 formats = []
71 for content in item.findall(_bp('media:group/media:content')):
72 attr = content.attrib
73 f_url = attr['url']
12c97873
PH
74 width = int(attr['width'])
75 bitrate = int(attr['bitrate'])
76 format_id = '%d-%dk' % (width, bitrate)
d7e66d39 77 formats.append({
12c97873 78 'format_id': format_id,
d7e66d39 79 'url': f_url,
12c97873
PH
80 'width': width,
81 'tbr': bitrate,
d7e66d39 82 })
12c97873
PH
83
84 self._sort_formats(formats)
d7e66d39 85
cbbd9a9c 86 return {
d7e66d39
JMF
87 'id': video_id,
88 'title': item.find('title').text,
89 'formats': formats,
90 'thumbnail': item.find(_bp('media:thumbnail')).attrib['url'],
91 'description': item.find('description').text,
92 'duration': int(attr['duration']),
93 }