]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/internetvideoarchive.py
[youtube] Update test description field
[yt-dlp.git] / youtube_dl / extractor / internetvideoarchive.py
CommitLineData
d7e66d39 1import re
d7e66d39
JMF
2
3from .common import InfoExtractor
4from ..utils import (
5 compat_urlparse,
4b7b839f 6 compat_urllib_parse,
d7e66d39 7 xpath_with_ns,
d7e66d39
JMF
8)
9
10
11class InternetVideoArchiveIE(InfoExtractor):
12 _VALID_URL = r'https?://video\.internetvideoarchive\.net/flash/players/.*?\?.*?publishedid.*?'
13
14 _TEST = {
15 u'url': u'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?customerid=69249&publishedid=452693&playerid=247',
16 u'file': u'452693.mp4',
17 u'info_dict': {
18 u'title': u'SKYFALL',
19 u'description': u'In SKYFALL, Bond\'s loyalty to M is tested as her past comes back to haunt her. As MI6 comes under attack, 007 must track down and destroy the threat, no matter how personal the cost.',
cbbd9a9c 20 u'duration': 153,
d7e66d39
JMF
21 },
22 }
23
24 @staticmethod
25 def _build_url(query):
26 return 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?' + query
27
4b7b839f
JMF
28 @staticmethod
29 def _clean_query(query):
30 NEEDED_ARGS = ['publishedid', 'customerid']
31 query_dic = compat_urlparse.parse_qs(query)
32 cleaned_dic = dict((k,v[0]) for (k,v) in query_dic.items() if k in NEEDED_ARGS)
33 # Other player ids return m3u8 urls
34 cleaned_dic['playerid'] = '247'
35 cleaned_dic['videokbrate'] = '100000'
36 return compat_urllib_parse.urlencode(cleaned_dic)
37
d7e66d39
JMF
38 def _real_extract(self, url):
39 query = compat_urlparse.urlparse(url).query
40 query_dic = compat_urlparse.parse_qs(query)
41 video_id = query_dic['publishedid'][0]
42 url = self._build_url(query)
43
e26f8712 44 flashconfiguration = self._download_xml(url, video_id,
d7e66d39 45 u'Downloading flash configuration')
d7e66d39
JMF
46 file_url = flashconfiguration.find('file').text
47 file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx')
4b7b839f
JMF
48 # Replace some of the parameters in the query to get the best quality
49 # and http links (no m3u8 manifests)
50 file_url = re.sub(r'(?<=\?)(.+)$',
51 lambda m: self._clean_query(m.group()),
52 file_url)
e26f8712 53 info = self._download_xml(file_url, video_id,
d7e66d39 54 u'Downloading video info')
d7e66d39
JMF
55 item = info.find('channel/item')
56
57 def _bp(p):
58 return xpath_with_ns(p,
59 {'media': 'http://search.yahoo.com/mrss/',
60 'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats'})
61 formats = []
62 for content in item.findall(_bp('media:group/media:content')):
63 attr = content.attrib
64 f_url = attr['url']
12c97873
PH
65 width = int(attr['width'])
66 bitrate = int(attr['bitrate'])
67 format_id = '%d-%dk' % (width, bitrate)
d7e66d39 68 formats.append({
12c97873 69 'format_id': format_id,
d7e66d39 70 'url': f_url,
12c97873
PH
71 'width': width,
72 'tbr': bitrate,
d7e66d39 73 })
12c97873
PH
74
75 self._sort_formats(formats)
d7e66d39 76
cbbd9a9c 77 return {
d7e66d39
JMF
78 'id': video_id,
79 'title': item.find('title').text,
80 'formats': formats,
81 'thumbnail': item.find(_bp('media:thumbnail')).attrib['url'],
82 'description': item.find('description').text,
83 'duration': int(attr['duration']),
84 }