]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/internetvideoarchive.py
[tbs] update tokenizer url(fixes #15395)
[yt-dlp.git] / youtube_dl / extractor / internetvideoarchive.py
CommitLineData
9e1e67fc
PH
1from __future__ import unicode_literals
2
d7e66d39 3from .common import InfoExtractor
1cc79574 4from ..compat import (
c05025fd 5 compat_parse_qs,
d7e66d39 6 compat_urlparse,
1cc79574
PH
7)
8from ..utils import (
c05025fd
YCH
9 determine_ext,
10 int_or_none,
11 xpath_text,
d7e66d39
JMF
12)
13
14
15class InternetVideoArchiveIE(InfoExtractor):
c05025fd 16 _VALID_URL = r'https?://video\.internetvideoarchive\.net/(?:player|flash/players)/.*?\?.*?publishedid.*?'
d7e66d39
JMF
17
18 _TEST = {
c05025fd 19 'url': 'http://video.internetvideoarchive.net/player/6/configuration.ashx?customerid=69249&publishedid=194487&reporttag=vdbetatitle&playerid=641&autolist=0&domain=www.videodetective.com&maxrate=high&minrate=low&socialplayer=false',
9e1e67fc 20 'info_dict': {
c05025fd 21 'id': '194487',
9e1e67fc 22 'ext': 'mp4',
c05025fd
YCH
23 'title': 'KICK-ASS 2',
24 'description': 'md5:c189d5b7280400630a1d3dd17eaa8d8a',
25 },
26 'params': {
27 # m3u8 download
28 'skip_download': True,
d7e66d39
JMF
29 },
30 }
31
32 @staticmethod
c05025fd
YCH
33 def _build_json_url(query):
34 return 'http://video.internetvideoarchive.net/player/6/configuration.ashx?' + query
d7e66d39 35
4b7b839f 36 @staticmethod
c05025fd
YCH
37 def _build_xml_url(query):
38 return 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?' + query
4b7b839f 39
d7e66d39
JMF
40 def _real_extract(self, url):
41 query = compat_urlparse.urlparse(url).query
c05025fd 42 query_dic = compat_parse_qs(query)
d7e66d39 43 video_id = query_dic['publishedid'][0]
d7e66d39 44
c05025fd
YCH
45 if '/player/' in url:
46 configuration = self._download_json(url, video_id)
47
48 # There are multiple videos in the playlist whlie only the first one
49 # matches the video played in browsers
50 video_info = configuration['playlist'][0]
0def7587 51 title = video_info['title']
c05025fd
YCH
52
53 formats = []
54 for source in video_info['sources']:
55 file_url = source['file']
56 if determine_ext(file_url) == 'm3u8':
0def7587
RA
57 m3u8_formats = self._extract_m3u8_formats(
58 file_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
59 if m3u8_formats:
60 formats.extend(m3u8_formats)
61 file_url = m3u8_formats[0]['url']
62 formats.extend(self._extract_f4m_formats(
63 file_url.replace('.m3u8', '.f4m'),
64 video_id, f4m_id='hds', fatal=False))
65 formats.extend(self._extract_mpd_formats(
66 file_url.replace('.m3u8', '.mpd'),
67 video_id, mpd_id='dash', fatal=False))
c05025fd
YCH
68 else:
69 a_format = {
70 'url': file_url,
71 }
72
73 if source.get('label') and source['label'][-4:] == ' kbs':
74 tbr = int_or_none(source['label'][:-4])
75 a_format.update({
76 'tbr': tbr,
77 'format_id': 'http-%d' % tbr,
78 })
79 formats.append(a_format)
d7e66d39 80
c05025fd 81 self._sort_formats(formats)
12c97873 82
c05025fd
YCH
83 description = video_info.get('description')
84 thumbnail = video_info.get('image')
85 else:
86 configuration = self._download_xml(url, video_id)
87 formats = [{
88 'url': xpath_text(configuration, './file', 'file URL', fatal=True),
89 }]
90 thumbnail = xpath_text(configuration, './image', 'thumbnail')
91 title = 'InternetVideoArchive video %s' % video_id
92 description = None
d7e66d39 93
cbbd9a9c 94 return {
d7e66d39 95 'id': video_id,
c05025fd 96 'title': title,
d7e66d39 97 'formats': formats,
c05025fd
YCH
98 'thumbnail': thumbnail,
99 'description': description,
d7e66d39 100 }