]>
Commit | Line | Data |
---|---|---|
29825140 RA |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import re | |
5 | ||
6 | from .common import InfoExtractor | |
7 | from ..utils import ( | |
8 | xpath_text, | |
9 | int_or_none, | |
10 | determine_ext, | |
11 | parse_duration, | |
12 | xpath_attr, | |
13 | update_url_query, | |
b8079a40 | 14 | compat_urlparse, |
29825140 RA |
15 | ) |
16 | ||
17 | ||
18 | class TurnerBaseIE(InfoExtractor): | |
19 | def _extract_cvp_info(self, data_src, video_id, path_data={}): | |
20 | video_data = self._download_xml(data_src, video_id) | |
21 | video_id = video_data.attrib['id'].split('/')[-1].split('.')[0] | |
22 | title = xpath_text(video_data, 'headline', fatal=True) | |
23 | # rtmp_src = xpath_text(video_data, 'akamai/src') | |
24 | # if rtmp_src: | |
25 | # splited_rtmp_src = rtmp_src.split(',') | |
26 | # if len(splited_rtmp_src) == 2: | |
27 | # rtmp_src = splited_rtmp_src[1] | |
28 | # aifp = xpath_text(video_data, 'akamai/aifp', default='') | |
29 | ||
30 | tokens = {} | |
31 | urls = [] | |
32 | formats = [] | |
33 | rex = re.compile(r'''(?x) | |
34 | (?P<width>[0-9]+)x(?P<height>[0-9]+) | |
35 | (?:_(?P<bitrate>[0-9]+))? | |
36 | ''') | |
37 | for video_file in video_data.findall('files/file'): | |
38 | video_url = video_file.text.strip() | |
39 | if not video_url: | |
40 | continue | |
41 | ext = determine_ext(video_url) | |
42 | if video_url.startswith('/mp4:protected/'): | |
43 | continue | |
44 | # TODO Correct extraction for these files | |
45 | # protected_path_data = path_data.get('protected') | |
46 | # if not protected_path_data or not rtmp_src: | |
47 | # continue | |
48 | # protected_path = self._search_regex( | |
49 | # r'/mp4:(.+)\.[a-z0-9]', video_url, 'secure path') | |
50 | # auth = self._download_webpage( | |
51 | # protected_path_data['tokenizer_src'], query={ | |
52 | # 'path': protected_path, | |
53 | # 'videoId': video_id, | |
54 | # 'aifp': aifp, | |
55 | # }) | |
56 | # token = xpath_text(auth, 'token') | |
57 | # if not token: | |
58 | # continue | |
59 | # video_url = rtmp_src + video_url + '?' + token | |
60 | elif video_url.startswith('/secure/'): | |
61 | secure_path_data = path_data.get('secure') | |
62 | if not secure_path_data: | |
63 | continue | |
64 | video_url = secure_path_data['media_src'] + video_url | |
65 | secure_path = self._search_regex(r'https?://[^/]+(.+/)', video_url, 'secure path') + '*' | |
66 | token = tokens.get(secure_path) | |
67 | if not token: | |
68 | auth = self._download_xml( | |
69 | secure_path_data['tokenizer_src'], video_id, query={ | |
70 | 'path': secure_path, | |
71 | 'videoId': video_id, | |
72 | }) | |
73 | token = xpath_text(auth, 'token') | |
74 | if not token: | |
75 | continue | |
76 | tokens[secure_path] = token | |
77 | video_url = video_url + '?hdnea=' + token | |
78 | elif not re.match('https?://', video_url): | |
79 | base_path_data = path_data.get(ext, path_data.get('default', {})) | |
80 | media_src = base_path_data.get('media_src') | |
81 | if not media_src: | |
82 | continue | |
83 | video_url = media_src + video_url | |
84 | if video_url in urls: | |
85 | continue | |
86 | urls.append(video_url) | |
87 | format_id = video_file.attrib['bitrate'] | |
88 | if ext == 'smil': | |
89 | formats.extend(self._extract_smil_formats(video_url, video_id, fatal=False)) | |
90 | elif ext == 'm3u8': | |
b8079a40 RA |
91 | m3u8_formats = self._extract_m3u8_formats( |
92 | video_url, video_id, 'mp4', m3u8_id=format_id, fatal=False) | |
93 | if m3u8_formats: | |
94 | # Sometimes final URLs inside m3u8 are unsigned, let's fix this | |
95 | # ourselves | |
96 | qs = compat_urlparse.urlparse(video_url).query | |
97 | if qs: | |
98 | query = compat_urlparse.parse_qs(qs) | |
99 | for m3u8_format in m3u8_formats: | |
100 | m3u8_format['url'] = update_url_query(m3u8_format['url'], query) | |
101 | m3u8_format['extra_param_to_segment_url'] = qs | |
102 | formats.extend(m3u8_formats) | |
29825140 RA |
103 | elif ext == 'f4m': |
104 | formats.extend(self._extract_f4m_formats( | |
105 | update_url_query(video_url, {'hdcore': '3.7.0'}), | |
106 | video_id, f4m_id=format_id, fatal=False)) | |
107 | else: | |
108 | f = { | |
109 | 'format_id': format_id, | |
110 | 'url': video_url, | |
111 | 'ext': ext, | |
112 | } | |
113 | mobj = rex.search(format_id + video_url) | |
114 | if mobj: | |
115 | f.update({ | |
116 | 'width': int(mobj.group('width')), | |
117 | 'height': int(mobj.group('height')), | |
118 | 'tbr': int_or_none(mobj.group('bitrate')), | |
119 | }) | |
120 | elif format_id.isdigit(): | |
121 | f['tbr'] = int(format_id) | |
122 | else: | |
123 | mobj = re.match(r'ios_(audio|[0-9]+)$', format_id) | |
124 | if mobj: | |
125 | if mobj.group(1) == 'audio': | |
126 | f.update({ | |
127 | 'vcodec': 'none', | |
128 | 'ext': 'm4a', | |
129 | }) | |
130 | else: | |
131 | f['tbr'] = int(mobj.group(1)) | |
132 | formats.append(f) | |
133 | self._sort_formats(formats) | |
134 | ||
135 | subtitles = {} | |
136 | for source in video_data.findall('closedCaptions/source'): | |
137 | for track in source.findall('track'): | |
547993dc S |
138 | track_url = track.get('url') |
139 | if not track_url: | |
29825140 | 140 | continue |
547993dc S |
141 | lang = track.get('lang') or track.get('label') or 'en' |
142 | subtitles.setdefault(lang, []).append({ | |
143 | 'url': track_url, | |
29825140 RA |
144 | 'ext': { |
145 | 'scc': 'scc', | |
146 | 'webvtt': 'vtt', | |
147 | 'smptett': 'tt', | |
148 | }.get(source.get('format')) | |
149 | }) | |
150 | ||
151 | thumbnails = [{ | |
152 | 'id': image.get('cut'), | |
153 | 'url': image.text, | |
154 | 'width': int_or_none(image.get('width')), | |
155 | 'height': int_or_none(image.get('height')), | |
156 | } for image in video_data.findall('images/image')] | |
157 | ||
158 | timestamp = None | |
159 | if 'cnn.com' not in data_src: | |
160 | timestamp = int_or_none(xpath_attr(video_data, 'dateCreated', 'uts')) | |
161 | ||
162 | return { | |
163 | 'id': video_id, | |
164 | 'title': title, | |
165 | 'formats': formats, | |
166 | 'subtitles': subtitles, | |
167 | 'thumbnails': thumbnails, | |
168 | 'description': xpath_text(video_data, 'description'), | |
169 | 'duration': parse_duration(xpath_text(video_data, 'length') or xpath_text(video_data, 'trt')), | |
170 | 'timestamp': timestamp, | |
171 | 'upload_date': xpath_attr(video_data, 'metas', 'version'), | |
172 | 'series': xpath_text(video_data, 'showTitle'), | |
173 | 'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')), | |
174 | 'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')), | |
175 | } |