]>
Commit | Line | Data |
---|---|---|
cf1eb451 JMF |
1 | from __future__ import unicode_literals |
2 | ||
3 | import base64 | |
4 | import io | |
5 | import itertools | |
6 | import os | |
cf1eb451 JMF |
7 | import time |
8 | import xml.etree.ElementTree as etree | |
9 | ||
10 | from .common import FileDownloader | |
11 | from .http import HttpFD | |
12 | from ..utils import ( | |
b53466e1 PH |
13 | struct_pack, |
14 | struct_unpack, | |
cf1eb451 JMF |
15 | compat_urlparse, |
16 | format_bytes, | |
17 | encodeFilename, | |
18 | sanitize_open, | |
b509a4b1 | 19 | xpath_text, |
cf1eb451 JMF |
20 | ) |
21 | ||
22 | ||
23 | class FlvReader(io.BytesIO): | |
24 | """ | |
25 | Reader for Flv files | |
26 | The file format is documented in https://www.adobe.com/devnet/f4v.html | |
27 | """ | |
28 | ||
29 | # Utility functions for reading numbers and strings | |
30 | def read_unsigned_long_long(self): | |
b53466e1 | 31 | return struct_unpack('!Q', self.read(8))[0] |
cf1eb451 JMF |
32 | |
33 | def read_unsigned_int(self): | |
b53466e1 | 34 | return struct_unpack('!I', self.read(4))[0] |
cf1eb451 JMF |
35 | |
36 | def read_unsigned_char(self): | |
b53466e1 | 37 | return struct_unpack('!B', self.read(1))[0] |
cf1eb451 JMF |
38 | |
39 | def read_string(self): | |
40 | res = b'' | |
41 | while True: | |
42 | char = self.read(1) | |
43 | if char == b'\x00': | |
44 | break | |
45 | res += char | |
46 | return res | |
47 | ||
48 | def read_box_info(self): | |
49 | """ | |
50 | Read a box and return the info as a tuple: (box_size, box_type, box_data) | |
51 | """ | |
52 | real_size = size = self.read_unsigned_int() | |
53 | box_type = self.read(4) | |
54 | header_end = 8 | |
55 | if size == 1: | |
56 | real_size = self.read_unsigned_long_long() | |
57 | header_end = 16 | |
8bcc8756 | 58 | return real_size, box_type, self.read(real_size -header_end) |
cf1eb451 JMF |
59 | |
60 | def read_asrt(self): | |
61 | # version | |
62 | self.read_unsigned_char() | |
63 | # flags | |
64 | self.read(3) | |
65 | quality_entry_count = self.read_unsigned_char() | |
66 | # QualityEntryCount | |
67 | for i in range(quality_entry_count): | |
68 | self.read_string() | |
69 | ||
70 | segment_run_count = self.read_unsigned_int() | |
71 | segments = [] | |
72 | for i in range(segment_run_count): | |
73 | first_segment = self.read_unsigned_int() | |
74 | fragments_per_segment = self.read_unsigned_int() | |
75 | segments.append((first_segment, fragments_per_segment)) | |
76 | ||
77 | return { | |
78 | 'segment_run': segments, | |
79 | } | |
80 | ||
81 | def read_afrt(self): | |
82 | # version | |
83 | self.read_unsigned_char() | |
84 | # flags | |
85 | self.read(3) | |
86 | # time scale | |
87 | self.read_unsigned_int() | |
88 | ||
89 | quality_entry_count = self.read_unsigned_char() | |
90 | # QualitySegmentUrlModifiers | |
91 | for i in range(quality_entry_count): | |
92 | self.read_string() | |
93 | ||
94 | fragments_count = self.read_unsigned_int() | |
95 | fragments = [] | |
96 | for i in range(fragments_count): | |
97 | first = self.read_unsigned_int() | |
98 | first_ts = self.read_unsigned_long_long() | |
99 | duration = self.read_unsigned_int() | |
100 | if duration == 0: | |
101 | discontinuity_indicator = self.read_unsigned_char() | |
102 | else: | |
103 | discontinuity_indicator = None | |
104 | fragments.append({ | |
105 | 'first': first, | |
106 | 'ts': first_ts, | |
107 | 'duration': duration, | |
108 | 'discontinuity_indicator': discontinuity_indicator, | |
109 | }) | |
110 | ||
111 | return { | |
112 | 'fragments': fragments, | |
113 | } | |
114 | ||
115 | def read_abst(self): | |
116 | # version | |
117 | self.read_unsigned_char() | |
118 | # flags | |
119 | self.read(3) | |
1df96e59 PH |
120 | |
121 | self.read_unsigned_int() # BootstrapinfoVersion | |
cf1eb451 JMF |
122 | # Profile,Live,Update,Reserved |
123 | self.read(1) | |
124 | # time scale | |
125 | self.read_unsigned_int() | |
126 | # CurrentMediaTime | |
127 | self.read_unsigned_long_long() | |
128 | # SmpteTimeCodeOffset | |
129 | self.read_unsigned_long_long() | |
1df96e59 PH |
130 | |
131 | self.read_string() # MovieIdentifier | |
cf1eb451 JMF |
132 | server_count = self.read_unsigned_char() |
133 | # ServerEntryTable | |
134 | for i in range(server_count): | |
135 | self.read_string() | |
136 | quality_count = self.read_unsigned_char() | |
137 | # QualityEntryTable | |
1df96e59 | 138 | for i in range(quality_count): |
cf1eb451 JMF |
139 | self.read_string() |
140 | # DrmData | |
141 | self.read_string() | |
142 | # MetaData | |
143 | self.read_string() | |
144 | ||
145 | segments_count = self.read_unsigned_char() | |
146 | segments = [] | |
147 | for i in range(segments_count): | |
148 | box_size, box_type, box_data = self.read_box_info() | |
149 | assert box_type == b'asrt' | |
150 | segment = FlvReader(box_data).read_asrt() | |
151 | segments.append(segment) | |
152 | fragments_run_count = self.read_unsigned_char() | |
153 | fragments = [] | |
154 | for i in range(fragments_run_count): | |
155 | box_size, box_type, box_data = self.read_box_info() | |
156 | assert box_type == b'afrt' | |
157 | fragments.append(FlvReader(box_data).read_afrt()) | |
158 | ||
159 | return { | |
160 | 'segments': segments, | |
161 | 'fragments': fragments, | |
162 | } | |
163 | ||
164 | def read_bootstrap_info(self): | |
165 | total_size, box_type, box_data = self.read_box_info() | |
166 | assert box_type == b'abst' | |
167 | return FlvReader(box_data).read_abst() | |
168 | ||
169 | ||
170 | def read_bootstrap_info(bootstrap_bytes): | |
171 | return FlvReader(bootstrap_bytes).read_bootstrap_info() | |
172 | ||
173 | ||
174 | def build_fragments_list(boot_info): | |
175 | """ Return a list of (segment, fragment) for each fragment in the video """ | |
176 | res = [] | |
177 | segment_run_table = boot_info['segments'][0] | |
178 | # I've only found videos with one segment | |
179 | segment_run_entry = segment_run_table['segment_run'][0] | |
180 | n_frags = segment_run_entry[1] | |
181 | fragment_run_entry_table = boot_info['fragments'][0]['fragments'] | |
182 | first_frag_number = fragment_run_entry_table[0]['first'] | |
8bcc8756 | 183 | for (i, frag_number) in zip(range(1, n_frags +1), itertools.count(first_frag_number)): |
cf1eb451 JMF |
184 | res.append((1, frag_number)) |
185 | return res | |
186 | ||
187 | ||
188 | def write_flv_header(stream, metadata): | |
189 | """Writes the FLV header and the metadata to stream""" | |
190 | # FLV header | |
191 | stream.write(b'FLV\x01') | |
192 | stream.write(b'\x05') | |
193 | stream.write(b'\x00\x00\x00\x09') | |
194 | # FLV File body | |
195 | stream.write(b'\x00\x00\x00\x00') | |
196 | # FLVTAG | |
197 | # Script data | |
198 | stream.write(b'\x12') | |
199 | # Size of the metadata with 3 bytes | |
b53466e1 | 200 | stream.write(struct_pack('!L', len(metadata))[1:]) |
cf1eb451 JMF |
201 | stream.write(b'\x00\x00\x00\x00\x00\x00\x00') |
202 | stream.write(metadata) | |
203 | # Magic numbers extracted from the output files produced by AdobeHDS.php | |
204 | #(https://github.com/K-S-V/Scripts) | |
205 | stream.write(b'\x00\x00\x01\x73') | |
206 | ||
207 | ||
208 | def _add_ns(prop): | |
209 | return '{http://ns.adobe.com/f4m/1.0}%s' % prop | |
210 | ||
211 | ||
212 | class HttpQuietDownloader(HttpFD): | |
213 | def to_screen(self, *args, **kargs): | |
214 | pass | |
215 | ||
216 | ||
217 | class F4mFD(FileDownloader): | |
218 | """ | |
219 | A downloader for f4m manifests or AdobeHDS. | |
220 | """ | |
221 | ||
222 | def real_download(self, filename, info_dict): | |
223 | man_url = info_dict['url'] | |
31bb8d3f | 224 | requested_bitrate = info_dict.get('tbr') |
cf1eb451 JMF |
225 | self.to_screen('[download] Downloading f4m manifest') |
226 | manifest = self.ydl.urlopen(man_url).read() | |
227 | self.report_destination(filename) | |
1824b481 JMF |
228 | http_dl = HttpQuietDownloader(self.ydl, |
229 | { | |
230 | 'continuedl': True, | |
231 | 'quiet': True, | |
232 | 'noprogress': True, | |
233 | 'test': self.params.get('test', False), | |
234 | }) | |
cf1eb451 JMF |
235 | |
236 | doc = etree.fromstring(manifest) | |
237 | formats = [(int(f.attrib.get('bitrate', -1)), f) for f in doc.findall(_add_ns('media'))] | |
31bb8d3f JMF |
238 | if requested_bitrate is None: |
239 | # get the best format | |
240 | formats = sorted(formats, key=lambda f: f[0]) | |
241 | rate, media = formats[-1] | |
242 | else: | |
243 | rate, media = list(filter( | |
244 | lambda f: int(f[0]) == requested_bitrate, formats))[0] | |
245 | ||
cf1eb451 | 246 | base_url = compat_urlparse.urljoin(man_url, media.attrib['url']) |
ee966928 PH |
247 | bootstrap_node = doc.find(_add_ns('bootstrapInfo')) |
248 | if bootstrap_node.text is None: | |
249 | bootstrap_url = compat_urlparse.urljoin( | |
250 | base_url, bootstrap_node.attrib['url']) | |
251 | bootstrap = self.ydl.urlopen(bootstrap_url).read() | |
252 | else: | |
253 | bootstrap = base64.b64decode(bootstrap_node.text) | |
cf1eb451 JMF |
254 | metadata = base64.b64decode(media.find(_add_ns('metadata')).text) |
255 | boot_info = read_bootstrap_info(bootstrap) | |
ee966928 | 256 | |
cf1eb451 | 257 | fragments_list = build_fragments_list(boot_info) |
1824b481 JMF |
258 | if self.params.get('test', False): |
259 | # We only download the first fragment | |
260 | fragments_list = fragments_list[:1] | |
cf1eb451 | 261 | total_frags = len(fragments_list) |
b509a4b1 JMF |
262 | # For some akamai manifests we'll need to add a query to the fragment url |
263 | akamai_pv = xpath_text(doc, _add_ns('pv-2.0')) | |
cf1eb451 JMF |
264 | |
265 | tmpfilename = self.temp_name(filename) | |
266 | (dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb') | |
267 | write_flv_header(dest_stream, metadata) | |
268 | ||
269 | # This dict stores the download progress, it's updated by the progress | |
270 | # hook | |
271 | state = { | |
272 | 'downloaded_bytes': 0, | |
273 | 'frag_counter': 0, | |
274 | } | |
275 | start = time.time() | |
276 | ||
277 | def frag_progress_hook(status): | |
278 | frag_total_bytes = status.get('total_bytes', 0) | |
279 | estimated_size = (state['downloaded_bytes'] + | |
280 | (total_frags - state['frag_counter']) * frag_total_bytes) | |
281 | if status['status'] == 'finished': | |
282 | state['downloaded_bytes'] += frag_total_bytes | |
283 | state['frag_counter'] += 1 | |
284 | progress = self.calc_percent(state['frag_counter'], total_frags) | |
285 | byte_counter = state['downloaded_bytes'] | |
286 | else: | |
287 | frag_downloaded_bytes = status['downloaded_bytes'] | |
288 | byte_counter = state['downloaded_bytes'] + frag_downloaded_bytes | |
289 | frag_progress = self.calc_percent(frag_downloaded_bytes, | |
290 | frag_total_bytes) | |
291 | progress = self.calc_percent(state['frag_counter'], total_frags) | |
292 | progress += frag_progress / float(total_frags) | |
293 | ||
294 | eta = self.calc_eta(start, time.time(), estimated_size, byte_counter) | |
295 | self.report_progress(progress, format_bytes(estimated_size), | |
296 | status.get('speed'), eta) | |
297 | http_dl.add_progress_hook(frag_progress_hook) | |
298 | ||
299 | frags_filenames = [] | |
300 | for (seg_i, frag_i) in fragments_list: | |
301 | name = 'Seg%d-Frag%d' % (seg_i, frag_i) | |
302 | url = base_url + name | |
b509a4b1 JMF |
303 | if akamai_pv: |
304 | url += '?' + akamai_pv.strip(';') | |
cf1eb451 JMF |
305 | frag_filename = '%s-%s' % (tmpfilename, name) |
306 | success = http_dl.download(frag_filename, {'url': url}) | |
307 | if not success: | |
308 | return False | |
309 | with open(frag_filename, 'rb') as down: | |
310 | down_data = down.read() | |
311 | reader = FlvReader(down_data) | |
312 | while True: | |
313 | _, box_type, box_data = reader.read_box_info() | |
314 | if box_type == b'mdat': | |
315 | dest_stream.write(box_data) | |
316 | break | |
317 | frags_filenames.append(frag_filename) | |
318 | ||
b900e7cb | 319 | dest_stream.close() |
cf1eb451 JMF |
320 | self.report_finish(format_bytes(state['downloaded_bytes']), time.time() - start) |
321 | ||
322 | self.try_rename(tmpfilename, filename) | |
323 | for frag_file in frags_filenames: | |
324 | os.remove(frag_file) | |
325 | ||
326 | fsize = os.path.getsize(encodeFilename(filename)) | |
327 | self._hook_progress({ | |
328 | 'downloaded_bytes': fsize, | |
329 | 'total_bytes': fsize, | |
330 | 'filename': filename, | |
331 | 'status': 'finished', | |
332 | }) | |
333 | ||
334 | return True |