]>
Commit | Line | Data |
---|---|---|
95d8f7ea S |
1 | from __future__ import division, unicode_literals |
2 | ||
3 | import os | |
4 | import time | |
ea0c2f21 | 5 | import json |
95d8f7ea S |
6 | |
7 | from .common import FileDownloader | |
8 | from .http import HttpFD | |
9 | from ..utils import ( | |
2e99cd30 | 10 | error_to_compat_str, |
95d8f7ea S |
11 | encodeFilename, |
12 | sanitize_open, | |
69035555 | 13 | sanitized_Request, |
95d8f7ea S |
14 | ) |
15 | ||
16 | ||
17 | class HttpQuietDownloader(HttpFD): | |
18 | def to_screen(self, *args, **kargs): | |
19 | pass | |
20 | ||
21 | ||
22 | class FragmentFD(FileDownloader): | |
23 | """ | |
24 | A base file downloader class for fragmented media (e.g. f4m/m3u8 manifests). | |
16a8b798 S |
25 | |
26 | Available options: | |
27 | ||
9603b660 S |
28 | fragment_retries: Number of times to retry a fragment for HTTP error (DASH |
29 | and hlsnative only) | |
30 | skip_unavailable_fragments: | |
31 | Skip unavailable fragments (DASH and hlsnative only) | |
0eee52f3 S |
32 | keep_fragments: Keep downloaded fragments on disk after downloading is |
33 | finished | |
290f64db S |
34 | |
35 | For each incomplete fragment download youtube-dl keeps on disk a special | |
36 | bookkeeping file with download state and metadata (in future such files will | |
37 | be used for any incomplete download handled by youtube-dl). This file is | |
38 | used to properly handle resuming, check download file consistency and detect | |
39 | potential errors. The file has a .ytdl extension and represents a standard | |
40 | JSON file of the following format: | |
41 | ||
42 | extractor: | |
43 | Dictionary of extractor related data. TBD. | |
44 | ||
45 | downloader: | |
46 | Dictionary of downloader related data. May contain following data: | |
47 | current_fragment: | |
48 | Dictionary with current (being downloaded) fragment data: | |
85f6de25 | 49 | index: 0-based index of current fragment among all fragments |
290f64db S |
50 | fragment_count: |
51 | Total count of fragments | |
50534b71 | 52 | |
85f6de25 | 53 | This feature is experimental and file format may change in future. |
95d8f7ea S |
54 | """ |
55 | ||
75a24854 | 56 | def report_retry_fragment(self, err, frag_index, count, retries): |
721f26b8 | 57 | self.to_screen( |
75a24854 RA |
58 | '[download] Got server HTTP error: %s. Retrying fragment %d (attempt %d of %s)...' |
59 | % (error_to_compat_str(err), frag_index, count, self.format_retries(retries))) | |
721f26b8 | 60 | |
75a24854 RA |
61 | def report_skip_fragment(self, frag_index): |
62 | self.to_screen('[download] Skipping fragment %d...' % frag_index) | |
9603b660 | 63 | |
69035555 S |
64 | def _prepare_url(self, info_dict, url): |
65 | headers = info_dict.get('http_headers') | |
66 | return sanitized_Request(url, None, headers) if headers else url | |
67 | ||
95d8f7ea S |
68 | def _prepare_and_start_frag_download(self, ctx): |
69 | self._prepare_frag_download(ctx) | |
70 | self._start_frag_download(ctx) | |
71 | ||
adb4b03c S |
72 | @staticmethod |
73 | def __do_ytdl_file(ctx): | |
74 | return not ctx['live'] and not ctx['tmpfilename'] == '-' | |
75 | ||
d3f0687c | 76 | def _read_ytdl_file(self, ctx): |
500a86a5 | 77 | assert 'ytdl_corrupt' not in ctx |
d3f0687c | 78 | stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'r') |
500a86a5 S |
79 | try: |
80 | ctx['fragment_index'] = json.loads(stream.read())['downloader']['current_fragment']['index'] | |
81 | except Exception: | |
82 | ctx['ytdl_corrupt'] = True | |
83 | finally: | |
84 | stream.close() | |
d3f0687c S |
85 | |
86 | def _write_ytdl_file(self, ctx): | |
87 | frag_index_stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'w') | |
290f64db S |
88 | downloader = { |
89 | 'current_fragment': { | |
90 | 'index': ctx['fragment_index'], | |
d3f0687c | 91 | }, |
290f64db S |
92 | } |
93 | if ctx.get('fragment_count') is not None: | |
94 | downloader['fragment_count'] = ctx['fragment_count'] | |
95 | frag_index_stream.write(json.dumps({'downloader': downloader})) | |
d3f0687c S |
96 | frag_index_stream.close() |
97 | ||
75a24854 | 98 | def _download_fragment(self, ctx, frag_url, info_dict, headers=None): |
d3f0687c S |
99 | fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index']) |
100 | success = ctx['dl'].download(fragment_filename, { | |
75a24854 RA |
101 | 'url': frag_url, |
102 | 'http_headers': headers or info_dict.get('http_headers'), | |
103 | }) | |
104 | if not success: | |
105 | return False, None | |
d3f0687c S |
106 | down, frag_sanitized = sanitize_open(fragment_filename, 'rb') |
107 | ctx['fragment_filename_sanitized'] = frag_sanitized | |
108 | frag_content = down.read() | |
75a24854 RA |
109 | down.close() |
110 | return True, frag_content | |
111 | ||
112 | def _append_fragment(self, ctx, frag_content): | |
d3f0687c S |
113 | try: |
114 | ctx['dest_stream'].write(frag_content) | |
593f2f79 | 115 | ctx['dest_stream'].flush() |
d3f0687c | 116 | finally: |
adb4b03c | 117 | if self.__do_ytdl_file(ctx): |
d3f0687c | 118 | self._write_ytdl_file(ctx) |
0eee52f3 | 119 | if not self.params.get('keep_fragments', False): |
99081da9 | 120 | os.remove(encodeFilename(ctx['fragment_filename_sanitized'])) |
d3f0687c | 121 | del ctx['fragment_filename_sanitized'] |
75a24854 | 122 | |
95d8f7ea | 123 | def _prepare_frag_download(self, ctx): |
5fa1702c S |
124 | if 'live' not in ctx: |
125 | ctx['live'] = False | |
5efaf43c S |
126 | if not ctx['live']: |
127 | total_frags_str = '%d' % ctx['total_frags'] | |
128 | ad_frags = ctx.get('ad_frags', 0) | |
129 | if ad_frags: | |
130 | total_frags_str += ' (not including %d ad)' % ad_frags | |
131 | else: | |
132 | total_frags_str = 'unknown (live)' | |
5fa1702c | 133 | self.to_screen( |
5efaf43c | 134 | '[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str)) |
95d8f7ea S |
135 | self.report_destination(ctx['filename']) |
136 | dl = HttpQuietDownloader( | |
137 | self.ydl, | |
138 | { | |
139 | 'continuedl': True, | |
140 | 'quiet': True, | |
141 | 'noprogress': True, | |
d800609c | 142 | 'ratelimit': self.params.get('ratelimit'), |
6828c809 | 143 | 'retries': self.params.get('retries', 0), |
7097bffb | 144 | 'nopart': self.params.get('nopart', False), |
95d8f7ea S |
145 | 'test': self.params.get('test', False), |
146 | } | |
147 | ) | |
148 | tmpfilename = self.temp_name(ctx['filename']) | |
75a24854 RA |
149 | open_mode = 'wb' |
150 | resume_len = 0 | |
d3f0687c | 151 | |
75a24854 RA |
152 | # Establish possible resume length |
153 | if os.path.isfile(encodeFilename(tmpfilename)): | |
154 | open_mode = 'ab' | |
155 | resume_len = os.path.getsize(encodeFilename(tmpfilename)) | |
d3f0687c | 156 | |
adb4b03c S |
157 | # Should be initialized before ytdl file check |
158 | ctx.update({ | |
159 | 'tmpfilename': tmpfilename, | |
160 | 'fragment_index': 0, | |
161 | }) | |
d3f0687c | 162 | |
adb4b03c S |
163 | if self.__do_ytdl_file(ctx): |
164 | if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))): | |
165 | self._read_ytdl_file(ctx) | |
500a86a5 S |
166 | is_corrupt = ctx.get('ytdl_corrupt') is True |
167 | is_inconsistent = ctx['fragment_index'] > 0 and resume_len == 0 | |
168 | if is_corrupt or is_inconsistent: | |
169 | message = ( | |
170 | '.ytdl file is corrupt' if is_corrupt else | |
171 | 'Inconsistent state of incomplete fragment download') | |
6f3b4a98 | 172 | self.report_warning( |
500a86a5 | 173 | '%s. Restarting from the beginning...' % message) |
e7c3e334 | 174 | ctx['fragment_index'] = resume_len = 0 |
500a86a5 S |
175 | if 'ytdl_corrupt' in ctx: |
176 | del ctx['ytdl_corrupt'] | |
e7c3e334 | 177 | self._write_ytdl_file(ctx) |
adb4b03c S |
178 | else: |
179 | self._write_ytdl_file(ctx) | |
e7c3e334 | 180 | assert ctx['fragment_index'] == 0 |
d3f0687c | 181 | |
75a24854 RA |
182 | dest_stream, tmpfilename = sanitize_open(tmpfilename, open_mode) |
183 | ||
95d8f7ea S |
184 | ctx.update({ |
185 | 'dl': dl, | |
186 | 'dest_stream': dest_stream, | |
187 | 'tmpfilename': tmpfilename, | |
75a24854 RA |
188 | # Total complete fragments downloaded so far in bytes |
189 | 'complete_frags_downloaded_bytes': resume_len, | |
95d8f7ea S |
190 | }) |
191 | ||
192 | def _start_frag_download(self, ctx): | |
193 | total_frags = ctx['total_frags'] | |
194 | # This dict stores the download progress, it's updated by the progress | |
195 | # hook | |
196 | state = { | |
197 | 'status': 'downloading', | |
75a24854 | 198 | 'downloaded_bytes': ctx['complete_frags_downloaded_bytes'], |
3e0304fe RA |
199 | 'fragment_index': ctx['fragment_index'], |
200 | 'fragment_count': total_frags, | |
95d8f7ea S |
201 | 'filename': ctx['filename'], |
202 | 'tmpfilename': ctx['tmpfilename'], | |
b83b782d S |
203 | } |
204 | ||
205 | start = time.time() | |
206 | ctx.update({ | |
207 | 'started': start, | |
709185a2 S |
208 | # Amount of fragment's bytes downloaded by the time of the previous |
209 | # frag progress hook invocation | |
b83b782d S |
210 | 'prev_frag_downloaded_bytes': 0, |
211 | }) | |
95d8f7ea S |
212 | |
213 | def frag_progress_hook(s): | |
214 | if s['status'] not in ('downloading', 'finished'): | |
215 | return | |
216 | ||
5fa1702c | 217 | time_now = time.time() |
2c2f1efd | 218 | state['elapsed'] = time_now - start |
3c91e416 | 219 | frag_total_bytes = s.get('total_bytes') or 0 |
5fa1702c S |
220 | if not ctx['live']: |
221 | estimated_size = ( | |
222 | (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) / | |
3e0304fe | 223 | (state['fragment_index'] + 1) * total_frags) |
5fa1702c | 224 | state['total_bytes_estimate'] = estimated_size |
95d8f7ea | 225 | |
709185a2 | 226 | if s['status'] == 'finished': |
3e0304fe RA |
227 | state['fragment_index'] += 1 |
228 | ctx['fragment_index'] = state['fragment_index'] | |
b83b782d S |
229 | state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes'] |
230 | ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes'] | |
231 | ctx['prev_frag_downloaded_bytes'] = 0 | |
709185a2 S |
232 | else: |
233 | frag_downloaded_bytes = s['downloaded_bytes'] | |
b83b782d | 234 | state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes'] |
5fa1702c S |
235 | if not ctx['live']: |
236 | state['eta'] = self.calc_eta( | |
237 | start, time_now, estimated_size, | |
238 | state['downloaded_bytes']) | |
1b5284b1 S |
239 | state['speed'] = s.get('speed') or ctx.get('speed') |
240 | ctx['speed'] = state['speed'] | |
b83b782d | 241 | ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes |
95d8f7ea S |
242 | self._hook_progress(state) |
243 | ||
244 | ctx['dl'].add_progress_hook(frag_progress_hook) | |
245 | ||
246 | return start | |
247 | ||
248 | def _finish_frag_download(self, ctx): | |
249 | ctx['dest_stream'].close() | |
adb4b03c S |
250 | if self.__do_ytdl_file(ctx): |
251 | ytdl_filename = encodeFilename(self.ytdl_filename(ctx['filename'])) | |
252 | if os.path.isfile(ytdl_filename): | |
253 | os.remove(ytdl_filename) | |
95d8f7ea | 254 | elapsed = time.time() - ctx['started'] |
0ff2c1ec S |
255 | |
256 | if ctx['tmpfilename'] == '-': | |
257 | downloaded_bytes = ctx['complete_frags_downloaded_bytes'] | |
258 | else: | |
259 | self.try_rename(ctx['tmpfilename'], ctx['filename']) | |
260 | downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename'])) | |
95d8f7ea S |
261 | |
262 | self._hook_progress({ | |
0ff2c1ec S |
263 | 'downloaded_bytes': downloaded_bytes, |
264 | 'total_bytes': downloaded_bytes, | |
95d8f7ea S |
265 | 'filename': ctx['filename'], |
266 | 'status': 'finished', | |
267 | 'elapsed': elapsed, | |
268 | }) |