]> jfr.im git - yt-dlp.git/blame - youtube_dl/YoutubeDL.py
release 2013.12.23
[yt-dlp.git] / youtube_dl / YoutubeDL.py
CommitLineData
8222d8de
JMF
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
4from __future__ import absolute_import
5
26e63931 6import collections
c1c9a79c 7import errno
8222d8de 8import io
8694c600 9import json
8222d8de 10import os
dca08720 11import platform
8222d8de
JMF
12import re
13import shutil
dca08720 14import subprocess
8222d8de
JMF
15import socket
16import sys
17import time
18import traceback
19
1e5b9a95
PH
20if os.name == 'nt':
21 import ctypes
22
ce02ed60 23from .utils import (
dca08720 24 compat_cookiejar,
ce02ed60 25 compat_http_client,
ce02ed60
PH
26 compat_str,
27 compat_urllib_error,
28 compat_urllib_request,
29 ContentTooShortError,
30 date_from_str,
31 DateRange,
32 determine_ext,
33 DownloadError,
34 encodeFilename,
35 ExtractorError,
02dbf93f 36 format_bytes,
525ef922 37 formatSeconds,
1c088fa8 38 get_term_width,
ce02ed60 39 locked_file,
dca08720 40 make_HTTPS_handler,
ce02ed60
PH
41 MaxDownloadsReached,
42 PostProcessingError,
dca08720 43 platform_name,
ce02ed60
PH
44 preferredencoding,
45 SameFileError,
46 sanitize_filename,
47 subtitles_filename,
48 takewhile_inclusive,
49 UnavailableVideoError,
29eb5174 50 url_basename,
ce02ed60
PH
51 write_json_file,
52 write_string,
dca08720 53 YoutubeDLHandler,
ce02ed60 54)
023fa8c4 55from .extractor import get_info_extractor, gen_extractors
8222d8de 56from .FileDownloader import FileDownloader
dca08720 57from .version import __version__
8222d8de
JMF
58
59
60class YoutubeDL(object):
61 """YoutubeDL class.
62
63 YoutubeDL objects are the ones responsible of downloading the
64 actual video file and writing it to disk if the user has requested
65 it, among some other tasks. In most cases there should be one per
66 program. As, given a video URL, the downloader doesn't know how to
67 extract all the needed information, task that InfoExtractors do, it
68 has to pass the URL to one of them.
69
70 For this, YoutubeDL objects have a method that allows
71 InfoExtractors to be registered in a given order. When it is passed
72 a URL, the YoutubeDL object handles it to the first InfoExtractor it
73 finds that reports being able to handle it. The InfoExtractor extracts
74 all the information about the video or videos the URL refers to, and
75 YoutubeDL process the extracted information, possibly using a File
76 Downloader to download the video.
77
78 YoutubeDL objects accept a lot of parameters. In order not to saturate
79 the object constructor with arguments, it receives a dictionary of
80 options instead. These options are available through the params
81 attribute for the InfoExtractors to use. The YoutubeDL also
82 registers itself as the downloader in charge for the InfoExtractors
83 that are added to it, so this is a "mutual registration".
84
85 Available options:
86
87 username: Username for authentication purposes.
88 password: Password for authentication purposes.
c6c19746 89 videopassword: Password for acces a video.
8222d8de
JMF
90 usenetrc: Use netrc for authentication instead.
91 verbose: Print additional info to stdout.
92 quiet: Do not print messages to stdout.
93 forceurl: Force printing final URL.
94 forcetitle: Force printing title.
95 forceid: Force printing ID.
96 forcethumbnail: Force printing thumbnail URL.
97 forcedescription: Force printing description.
98 forcefilename: Force printing final filename.
525ef922 99 forceduration: Force printing duration.
8694c600 100 forcejson: Force printing info_dict as JSON.
8222d8de
JMF
101 simulate: Do not download the video files.
102 format: Video format code.
103 format_limit: Highest quality format to try.
104 outtmpl: Template for output names.
105 restrictfilenames: Do not allow "&" and spaces in file names
106 ignoreerrors: Do not stop on download errors.
107 nooverwrites: Prevent overwriting files.
108 playliststart: Playlist item to start at.
109 playlistend: Playlist item to end at.
110 matchtitle: Download only matching titles.
111 rejecttitle: Reject downloads for matching titles.
8bf9319e 112 logger: Log messages to a logging.Logger instance.
8222d8de
JMF
113 logtostderr: Log messages to stderr instead of stdout.
114 writedescription: Write the video description to a .description file
115 writeinfojson: Write the video description to a .info.json file
1fb07d10 116 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de
JMF
117 writethumbnail: Write the thumbnail image to a file
118 writesubtitles: Write the video subtitles to a file
b004821f 119 writeautomaticsub: Write the automatic subtitles to a file
8222d8de 120 allsubtitles: Downloads all the subtitles of the video
0b7f3118 121 (requires writesubtitles or writeautomaticsub)
8222d8de 122 listsubtitles: Lists all available subtitles for the video
b98a6b2f 123 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
aa6a10c4 124 subtitleslangs: List of languages of the subtitles to download
8222d8de
JMF
125 keepvideo: Keep the video file after post-processing
126 daterange: A DateRange object, download only if the upload_date is in the range.
127 skip_download: Skip the actual download of the video file
c35f9e72 128 cachedir: Location of the cache files in the filesystem.
c3c88a26 129 None to disable filesystem cache.
47192f92 130 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
131 age_limit: An integer representing the user's age in years.
132 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
133 min_views: An integer representing the minimum view count the video
134 must have in order to not be skipped.
135 Videos without view count information are always
136 downloaded. None for no limit.
137 max_views: An integer representing the maximum view count.
138 Videos that are more popular than that are not
139 downloaded.
140 Videos without view count information are always
141 downloaded. None for no limit.
142 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
143 Videos already present in the file are not downloaded
144 again.
dca08720 145 cookiefile: File name where cookies should be read from and dumped to.
a1ee09e8
PH
146 nocheckcertificate:Do not verify SSL certificates
147 proxy: URL of the proxy server to use
e344693b 148 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
149 bidi_workaround: Work around buggy terminals without bidirectional text
150 support, using fridibi
fe7e0c98 151
8222d8de
JMF
152 The following parameters are not used by YoutubeDL itself, they are used by
153 the FileDownloader:
154 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
155 noresizebuffer, retries, continuedl, noprogress, consoletitle
156 """
157
158 params = None
159 _ies = []
160 _pps = []
161 _download_retcode = None
162 _num_downloads = None
163 _screen_file = None
164
a3fb4675 165 def __init__(self, params=None):
8222d8de
JMF
166 """Create a FileDownloader object with the given options."""
167 self._ies = []
56c73665 168 self._ies_instances = {}
8222d8de
JMF
169 self._pps = []
170 self._progress_hooks = []
171 self._download_retcode = 0
172 self._num_downloads = 0
173 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
0783b09b 174 self._err_file = sys.stderr
a3fb4675 175 self.params = {} if params is None else params
34308b30 176
0783b09b 177 if params.get('bidi_workaround', False):
1c088fa8
PH
178 try:
179 import pty
180 master, slave = pty.openpty()
181 width = get_term_width()
182 if width is None:
183 width_args = []
184 else:
185 width_args = ['-w', str(width)]
186 self._fribidi = subprocess.Popen(
187 ['fribidi', '-c', 'UTF-8'] + width_args,
188 stdin=subprocess.PIPE,
189 stdout=slave,
190 stderr=self._err_file)
191 self._fribidi_channel = os.fdopen(master, 'rb')
192 except OSError as ose:
193 if ose.errno == 2:
194 self.report_warning(u'Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
195 else:
196 raise
0783b09b 197
34308b30
PH
198 if (sys.version_info >= (3,) and sys.platform != 'win32' and
199 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
200 and not params['restrictfilenames']):
201 # On Python 3, the Unicode filesystem API will throw errors (#1474)
202 self.report_warning(
1d368c75 203 u'Assuming --restrict-filenames since file system encoding '
34308b30
PH
204 u'cannot encode all charactes. '
205 u'Set the LC_ALL environment variable to fix this.')
4a98cdbf 206 self.params['restrictfilenames'] = True
34308b30 207
8222d8de
JMF
208 self.fd = FileDownloader(self, self.params)
209
a3927cf7 210 if '%(stitle)s' in self.params.get('outtmpl', ''):
8222d8de
JMF
211 self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
212
dca08720
PH
213 self._setup_opener()
214
8222d8de
JMF
215 def add_info_extractor(self, ie):
216 """Add an InfoExtractor object to the end of the list."""
217 self._ies.append(ie)
56c73665 218 self._ies_instances[ie.ie_key()] = ie
8222d8de
JMF
219 ie.set_downloader(self)
220
56c73665
JMF
221 def get_info_extractor(self, ie_key):
222 """
223 Get an instance of an IE with name ie_key, it will try to get one from
224 the _ies list, if there's no instance it will create a new one and add
225 it to the extractor list.
226 """
227 ie = self._ies_instances.get(ie_key)
228 if ie is None:
229 ie = get_info_extractor(ie_key)()
230 self.add_info_extractor(ie)
231 return ie
232
023fa8c4
JMF
233 def add_default_info_extractors(self):
234 """
235 Add the InfoExtractors returned by gen_extractors to the end of the list
236 """
237 for ie in gen_extractors():
238 self.add_info_extractor(ie)
239
8222d8de
JMF
240 def add_post_processor(self, pp):
241 """Add a PostProcessor object to the end of the chain."""
242 self._pps.append(pp)
243 pp.set_downloader(self)
244
1c088fa8
PH
245 def _bidi_workaround(self, message):
246 if not hasattr(self, '_fribidi_channel'):
247 return message
248
249 assert type(message) == type(u'')
250 line_count = message.count(u'\n') + 1
251 self._fribidi.stdin.write((message + u'\n').encode('utf-8'))
252 self._fribidi.stdin.flush()
253 res = u''.join(self._fribidi_channel.readline().decode('utf-8')
254 for _ in range(line_count))
255 return res[:-len(u'\n')]
256
8222d8de 257 def to_screen(self, message, skip_eol=False):
0783b09b
PH
258 """Print message to stdout if not in quiet mode."""
259 return self.to_stdout(message, skip_eol, check_quiet=True)
260
261 def to_stdout(self, message, skip_eol=False, check_quiet=False):
8222d8de 262 """Print message to stdout if not in quiet mode."""
8bf9319e 263 if self.params.get('logger'):
43afe285 264 self.params['logger'].debug(message)
0783b09b 265 elif not check_quiet or not self.params.get('quiet', False):
1c088fa8 266 message = self._bidi_workaround(message)
8222d8de
JMF
267 terminator = [u'\n', u''][skip_eol]
268 output = message + terminator
1c088fa8 269
7459e3a2 270 write_string(output, self._screen_file)
8222d8de
JMF
271
272 def to_stderr(self, message):
273 """Print message to stderr."""
274 assert type(message) == type(u'')
8bf9319e 275 if self.params.get('logger'):
43afe285
IB
276 self.params['logger'].error(message)
277 else:
1c088fa8 278 message = self._bidi_workaround(message)
43afe285 279 output = message + u'\n'
0783b09b 280 write_string(output, self._err_file)
8222d8de 281
1e5b9a95
PH
282 def to_console_title(self, message):
283 if not self.params.get('consoletitle', False):
284 return
285 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
286 # c_wchar_p() might not be necessary if `message` is
287 # already of type unicode()
288 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
289 elif 'TERM' in os.environ:
749febf4 290 write_string(u'\033]0;%s\007' % message, self._screen_file)
1e5b9a95 291
bdde425c
PH
292 def save_console_title(self):
293 if not self.params.get('consoletitle', False):
294 return
295 if 'TERM' in os.environ:
efd6c574
JMF
296 # Save the title on stack
297 write_string(u'\033[22;0t', self._screen_file)
bdde425c
PH
298
299 def restore_console_title(self):
300 if not self.params.get('consoletitle', False):
301 return
302 if 'TERM' in os.environ:
efd6c574
JMF
303 # Restore the title from stack
304 write_string(u'\033[23;0t', self._screen_file)
bdde425c
PH
305
306 def __enter__(self):
307 self.save_console_title()
308 return self
309
310 def __exit__(self, *args):
311 self.restore_console_title()
dca08720
PH
312
313 if self.params.get('cookiefile') is not None:
314 self.cookiejar.save()
bdde425c 315
8222d8de
JMF
316 def trouble(self, message=None, tb=None):
317 """Determine action to take when a download problem appears.
318
319 Depending on if the downloader has been configured to ignore
320 download errors or not, this method may throw an exception or
321 not when errors are found, after printing the message.
322
323 tb, if given, is additional traceback information.
324 """
325 if message is not None:
326 self.to_stderr(message)
327 if self.params.get('verbose'):
328 if tb is None:
329 if sys.exc_info()[0]: # if .trouble has been called from an except block
330 tb = u''
331 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
332 tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
333 tb += compat_str(traceback.format_exc())
334 else:
335 tb_data = traceback.format_list(traceback.extract_stack())
336 tb = u''.join(tb_data)
337 self.to_stderr(tb)
338 if not self.params.get('ignoreerrors', False):
339 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
340 exc_info = sys.exc_info()[1].exc_info
341 else:
342 exc_info = sys.exc_info()
343 raise DownloadError(message, exc_info)
344 self._download_retcode = 1
345
346 def report_warning(self, message):
347 '''
348 Print the message to stderr, it will be prefixed with 'WARNING:'
349 If stderr is a tty file the 'WARNING:' will be colored
350 '''
0783b09b 351 if self._err_file.isatty() and os.name != 'nt':
fe7e0c98 352 _msg_header = u'\033[0;33mWARNING:\033[0m'
8222d8de 353 else:
fe7e0c98
JMF
354 _msg_header = u'WARNING:'
355 warning_message = u'%s %s' % (_msg_header, message)
8222d8de
JMF
356 self.to_stderr(warning_message)
357
358 def report_error(self, message, tb=None):
359 '''
360 Do the same as trouble, but prefixes the message with 'ERROR:', colored
361 in red if stderr is a tty file.
362 '''
0783b09b 363 if self._err_file.isatty() and os.name != 'nt':
8222d8de
JMF
364 _msg_header = u'\033[0;31mERROR:\033[0m'
365 else:
366 _msg_header = u'ERROR:'
367 error_message = u'%s %s' % (_msg_header, message)
368 self.trouble(error_message, tb)
369
8222d8de
JMF
370 def report_file_already_downloaded(self, file_name):
371 """Report file has already been fully downloaded."""
372 try:
373 self.to_screen(u'[download] %s has already been downloaded' % file_name)
ce02ed60 374 except UnicodeEncodeError:
8222d8de
JMF
375 self.to_screen(u'[download] The file has already been downloaded')
376
377 def increment_downloads(self):
378 """Increment the ordinal that assigns a number to each file."""
379 self._num_downloads += 1
380
381 def prepare_filename(self, info_dict):
382 """Generate the output filename."""
383 try:
384 template_dict = dict(info_dict)
385
386 template_dict['epoch'] = int(time.time())
387 autonumber_size = self.params.get('autonumber_size')
388 if autonumber_size is None:
389 autonumber_size = 5
390 autonumber_templ = u'%0' + str(autonumber_size) + u'd'
391 template_dict['autonumber'] = autonumber_templ % self._num_downloads
702665c0 392 if template_dict.get('playlist_index') is not None:
8222d8de
JMF
393 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
394
586a91b6 395 sanitize = lambda k, v: sanitize_filename(
45598aab 396 compat_str(v),
8222d8de 397 restricted=self.params.get('restrictfilenames'),
586a91b6
PH
398 is_id=(k == u'id'))
399 template_dict = dict((k, sanitize(k, v))
45598aab
PH
400 for k, v in template_dict.items()
401 if v is not None)
26e63931 402 template_dict = collections.defaultdict(lambda: u'NA', template_dict)
8222d8de 403
586a91b6
PH
404 tmpl = os.path.expanduser(self.params['outtmpl'])
405 filename = tmpl % template_dict
8222d8de 406 return filename
8222d8de 407 except ValueError as err:
4efba05c 408 self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
409 return None
410
411 def _match_entry(self, info_dict):
412 """ Returns None iff the file should be downloaded """
413
5fe18bdb 414 video_title = info_dict.get('title', info_dict.get('id', u'video'))
7012b23c
PH
415 if 'title' in info_dict:
416 # This can happen when we're just evaluating the playlist
417 title = info_dict['title']
418 matchtitle = self.params.get('matchtitle', False)
419 if matchtitle:
420 if not re.search(matchtitle, title, re.IGNORECASE):
5fe18bdb 421 return u'"' + title + '" title did not match pattern "' + matchtitle + '"'
7012b23c
PH
422 rejecttitle = self.params.get('rejecttitle', False)
423 if rejecttitle:
424 if re.search(rejecttitle, title, re.IGNORECASE):
425 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
8222d8de
JMF
426 date = info_dict.get('upload_date', None)
427 if date is not None:
428 dateRange = self.params.get('daterange', DateRange())
429 if date not in dateRange:
5fe18bdb
PH
430 return u'%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
431 view_count = info_dict.get('view_count', None)
432 if view_count is not None:
433 min_views = self.params.get('min_views')
434 if min_views is not None and view_count < min_views:
435 return u'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
436 max_views = self.params.get('max_views')
437 if max_views is not None and view_count > max_views:
438 return u'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
8dbe9899
PH
439 age_limit = self.params.get('age_limit')
440 if age_limit is not None:
cfadd183 441 if age_limit < info_dict.get('age_limit', 0):
8dbe9899 442 return u'Skipping "' + title + '" because it is age restricted'
c1c9a79c 443 if self.in_download_archive(info_dict):
5fe18bdb 444 return u'%s has already been recorded in archive' % video_title
8222d8de 445 return None
fe7e0c98 446
b6c45014
JMF
447 @staticmethod
448 def add_extra_info(info_dict, extra_info):
449 '''Set the keys from extra_info in info dict if they are missing'''
450 for key, value in extra_info.items():
451 info_dict.setdefault(key, value)
452
7fc3fa05
PH
453 def extract_info(self, url, download=True, ie_key=None, extra_info={},
454 process=True):
8222d8de
JMF
455 '''
456 Returns a list with a dictionary for each video we find.
457 If 'download', also downloads the videos.
458 extra_info is a dict containing the extra values to add to each result
459 '''
fe7e0c98 460
8222d8de 461 if ie_key:
56c73665 462 ies = [self.get_info_extractor(ie_key)]
8222d8de
JMF
463 else:
464 ies = self._ies
465
466 for ie in ies:
467 if not ie.suitable(url):
468 continue
469
470 if not ie.working():
471 self.report_warning(u'The program functionality for this site has been marked as broken, '
472 u'and will probably not work.')
473
474 try:
475 ie_result = ie.extract(url)
476 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
477 break
478 if isinstance(ie_result, list):
479 # Backwards compatibility: old IE result format
8222d8de
JMF
480 ie_result = {
481 '_type': 'compat_list',
482 'entries': ie_result,
483 }
9103bbc5
JMF
484 self.add_extra_info(ie_result,
485 {
486 'extractor': ie.IE_NAME,
be97abc2 487 'webpage_url': url,
29eb5174 488 'webpage_url_basename': url_basename(url),
be97abc2 489 'extractor_key': ie.ie_key(),
9103bbc5 490 })
7fc3fa05
PH
491 if process:
492 return self.process_ie_result(ie_result, download, extra_info)
493 else:
494 return ie_result
8222d8de
JMF
495 except ExtractorError as de: # An error we somewhat expected
496 self.report_error(compat_str(de), de.format_traceback())
497 break
498 except Exception as e:
499 if self.params.get('ignoreerrors', False):
500 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
501 break
502 else:
503 raise
504 else:
505 self.report_error(u'no suitable InfoExtractor: %s' % url)
fe7e0c98 506
8222d8de
JMF
507 def process_ie_result(self, ie_result, download=True, extra_info={}):
508 """
509 Take the result of the ie(may be modified) and resolve all unresolved
510 references (URLs, playlist items).
511
512 It will also download the videos if 'download'.
513 Returns the resolved ie_result.
514 """
515
516 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
517 if result_type == 'video':
b6c45014 518 self.add_extra_info(ie_result, extra_info)
feee2ecf 519 return self.process_video_result(ie_result, download=download)
8222d8de
JMF
520 elif result_type == 'url':
521 # We have to add extra_info to the results because it may be
522 # contained in a playlist
523 return self.extract_info(ie_result['url'],
524 download,
525 ie_key=ie_result.get('ie_key'),
526 extra_info=extra_info)
7fc3fa05
PH
527 elif result_type == 'url_transparent':
528 # Use the information from the embedding page
529 info = self.extract_info(
530 ie_result['url'], ie_key=ie_result.get('ie_key'),
531 extra_info=extra_info, download=False, process=False)
532
533 def make_result(embedded_info):
534 new_result = ie_result.copy()
535 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
536 'entries', 'urlhandle', 'ie_key', 'duration',
ef4fd848
PH
537 'subtitles', 'annotations', 'format',
538 'thumbnail', 'thumbnails'):
7fc3fa05
PH
539 if f in new_result:
540 del new_result[f]
541 if f in embedded_info:
542 new_result[f] = embedded_info[f]
543 return new_result
544 new_result = make_result(info)
545
546 assert new_result.get('_type') != 'url_transparent'
547 if new_result.get('_type') == 'compat_list':
548 new_result['entries'] = [
549 make_result(e) for e in new_result['entries']]
550
551 return self.process_ie_result(
552 new_result, download=download, extra_info=extra_info)
8222d8de
JMF
553 elif result_type == 'playlist':
554 # We process each entry in the playlist
555 playlist = ie_result.get('title', None) or ie_result.get('id', None)
fe7e0c98 556 self.to_screen(u'[download] Downloading playlist: %s' % playlist)
8222d8de
JMF
557
558 playlist_results = []
559
560 n_all_entries = len(ie_result['entries'])
561 playliststart = self.params.get('playliststart', 1) - 1
a19fd00c
PH
562 playlistend = self.params.get('playlistend', None)
563 # For backwards compatibility, interpret -1 as whole list
8222d8de 564 if playlistend == -1:
a19fd00c 565 playlistend = None
8222d8de 566
a19fd00c 567 entries = ie_result['entries'][playliststart:playlistend]
8222d8de
JMF
568 n_entries = len(entries)
569
a19fd00c
PH
570 self.to_screen(
571 u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
8222d8de
JMF
572 (ie_result['extractor'], playlist, n_all_entries, n_entries))
573
fe7e0c98
JMF
574 for i, entry in enumerate(entries, 1):
575 self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
8222d8de 576 extra = {
fe7e0c98
JMF
577 'playlist': playlist,
578 'playlist_index': i + playliststart,
b6c45014 579 'extractor': ie_result['extractor'],
9103bbc5 580 'webpage_url': ie_result['webpage_url'],
29eb5174 581 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 582 'extractor_key': ie_result['extractor_key'],
fe7e0c98 583 }
7012b23c
PH
584
585 reason = self._match_entry(entry)
586 if reason is not None:
587 self.to_screen(u'[download] ' + reason)
588 continue
589
8222d8de
JMF
590 entry_result = self.process_ie_result(entry,
591 download=download,
592 extra_info=extra)
593 playlist_results.append(entry_result)
594 ie_result['entries'] = playlist_results
595 return ie_result
596 elif result_type == 'compat_list':
597 def _fixup(r):
b6c45014 598 self.add_extra_info(r,
9103bbc5
JMF
599 {
600 'extractor': ie_result['extractor'],
601 'webpage_url': ie_result['webpage_url'],
29eb5174 602 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 603 'extractor_key': ie_result['extractor_key'],
9103bbc5 604 })
8222d8de
JMF
605 return r
606 ie_result['entries'] = [
b6c45014 607 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
608 for r in ie_result['entries']
609 ]
610 return ie_result
611 else:
612 raise Exception('Invalid result type: %s' % result_type)
613
a9c58ad9
JMF
614 def select_format(self, format_spec, available_formats):
615 if format_spec == 'best' or format_spec is None:
616 return available_formats[-1]
617 elif format_spec == 'worst':
618 return available_formats[0]
619 else:
49e86983
JMF
620 extensions = [u'mp4', u'flv', u'webm', u'3gp']
621 if format_spec in extensions:
622 filter_f = lambda f: f['ext'] == format_spec
623 else:
624 filter_f = lambda f: f['format_id'] == format_spec
fe7e0c98 625 matches = list(filter(filter_f, available_formats))
a9c58ad9
JMF
626 if matches:
627 return matches[-1]
628 return None
629
dd82ffea
JMF
630 def process_video_result(self, info_dict, download=True):
631 assert info_dict.get('_type', 'video') == 'video'
632
633 if 'playlist' not in info_dict:
634 # It isn't part of a playlist
635 info_dict['playlist'] = None
636 info_dict['playlist_index'] = None
637
6ff000b8 638 # This extractors handle format selection themselves
a7685f3b 639 if info_dict['extractor'] in [u'youtube', u'Youku']:
12893efe
JMF
640 if download:
641 self.process_info(info_dict)
6ff000b8
JMF
642 return info_dict
643
dd82ffea
JMF
644 # We now pick which formats have to be downloaded
645 if info_dict.get('formats') is None:
646 # There's only one format available
647 formats = [info_dict]
648 else:
649 formats = info_dict['formats']
650
651 # We check that all the formats have the format and format_id fields
652 for (i, format) in enumerate(formats):
dd82ffea 653 if format.get('format_id') is None:
8016c922 654 format['format_id'] = compat_str(i)
8c51aa65
JMF
655 if format.get('format') is None:
656 format['format'] = u'{id} - {res}{note}'.format(
657 id=format['format_id'],
658 res=self.format_resolution(format),
71934988 659 note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
8c51aa65 660 )
c1002e96
PH
661 # Automatically determine file extension if missing
662 if 'ext' not in format:
663 format['ext'] = determine_ext(format['url'])
dd82ffea
JMF
664
665 if self.params.get('listformats', None):
666 self.list_formats(info_dict)
667 return
668
99e206d5
JMF
669 format_limit = self.params.get('format_limit', None)
670 if format_limit:
f4d96df0
PH
671 formats = list(takewhile_inclusive(
672 lambda f: f['format_id'] != format_limit, formats
673 ))
e028d0d1
JMF
674 if self.params.get('prefer_free_formats'):
675 def _free_formats_key(f):
676 try:
677 ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
678 except ValueError:
679 ext_ord = -1
680 # We only compare the extension if they have the same height and width
681 return (f.get('height'), f.get('width'), ext_ord)
682 formats = sorted(formats, key=_free_formats_key)
99e206d5 683
dd82ffea 684 req_format = self.params.get('format', 'best')
a9c58ad9
JMF
685 if req_format is None:
686 req_format = 'best'
dd82ffea 687 formats_to_download = []
dd82ffea 688 # The -1 is for supporting YoutubeIE
a9c58ad9 689 if req_format in ('-1', 'all'):
dd82ffea
JMF
690 formats_to_download = formats
691 else:
a9c58ad9 692 # We can accept formats requestd in the format: 34/5/best, we pick
416a5efc 693 # the first that is available, starting from left
dd82ffea
JMF
694 req_formats = req_format.split('/')
695 for rf in req_formats:
a9c58ad9
JMF
696 selected_format = self.select_format(rf, formats)
697 if selected_format is not None:
698 formats_to_download = [selected_format]
dd82ffea
JMF
699 break
700 if not formats_to_download:
78a3a9f8
PH
701 raise ExtractorError(u'requested format not available',
702 expected=True)
dd82ffea
JMF
703
704 if download:
705 if len(formats_to_download) > 1:
706 self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
707 for format in formats_to_download:
708 new_info = dict(info_dict)
709 new_info.update(format)
710 self.process_info(new_info)
711 # We update the info dict with the best quality format (backwards compatibility)
712 info_dict.update(formats_to_download[-1])
713 return info_dict
714
8222d8de
JMF
715 def process_info(self, info_dict):
716 """Process a single resolved IE result."""
717
718 assert info_dict.get('_type', 'video') == 'video'
719 #We increment the download the download count here to match the previous behaviour.
720 self.increment_downloads()
721
722 info_dict['fulltitle'] = info_dict['title']
723 if len(info_dict['title']) > 200:
724 info_dict['title'] = info_dict['title'][:197] + u'...'
725
726 # Keep for backwards compatibility
727 info_dict['stitle'] = info_dict['title']
728
729 if not 'format' in info_dict:
730 info_dict['format'] = info_dict['ext']
731
732 reason = self._match_entry(info_dict)
733 if reason is not None:
734 self.to_screen(u'[download] ' + reason)
735 return
736
737 max_downloads = self.params.get('max_downloads')
738 if max_downloads is not None:
739 if self._num_downloads > int(max_downloads):
740 raise MaxDownloadsReached()
741
742 filename = self.prepare_filename(info_dict)
743
744 # Forced printings
745 if self.params.get('forcetitle', False):
0783b09b 746 self.to_stdout(info_dict['fulltitle'])
8222d8de 747 if self.params.get('forceid', False):
0783b09b 748 self.to_stdout(info_dict['id'])
8222d8de 749 if self.params.get('forceurl', False):
edde6c56 750 # For RTMP URLs, also include the playpath
0783b09b 751 self.to_stdout(info_dict['url'] + info_dict.get('play_path', u''))
216d71d0 752 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
0783b09b 753 self.to_stdout(info_dict['thumbnail'])
216d71d0 754 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
0783b09b 755 self.to_stdout(info_dict['description'])
8222d8de 756 if self.params.get('forcefilename', False) and filename is not None:
0783b09b 757 self.to_stdout(filename)
525ef922
PH
758 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
759 self.to_stdout(formatSeconds(info_dict['duration']))
8222d8de 760 if self.params.get('forceformat', False):
0783b09b 761 self.to_stdout(info_dict['format'])
9d153818 762 if self.params.get('forcejson', False):
a0d96c98 763 info_dict['_filename'] = filename
0783b09b 764 self.to_stdout(json.dumps(info_dict))
8222d8de
JMF
765
766 # Do nothing else if in simulate mode
767 if self.params.get('simulate', False):
768 return
769
770 if filename is None:
771 return
772
773 try:
774 dn = os.path.dirname(encodeFilename(filename))
775 if dn != '' and not os.path.exists(dn):
776 os.makedirs(dn)
777 except (OSError, IOError) as err:
778 self.report_error(u'unable to create directory ' + compat_str(err))
779 return
780
781 if self.params.get('writedescription', False):
7b6fefc9
PH
782 descfn = filename + u'.description'
783 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
784 self.to_screen(u'[info] Video description is already present')
785 else:
786 try:
787 self.to_screen(u'[info] Writing video description to: ' + descfn)
788 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
789 descfile.write(info_dict['description'])
790 except (KeyError, TypeError):
791 self.report_warning(u'There\'s no description to write.')
792 except (OSError, IOError):
793 self.report_error(u'Cannot write description file ' + descfn)
794 return
8222d8de 795
1fb07d10 796 if self.params.get('writeannotations', False):
7b6fefc9
PH
797 annofn = filename + u'.annotations.xml'
798 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
799 self.to_screen(u'[info] Video annotations are already present')
800 else:
801 try:
802 self.to_screen(u'[info] Writing video annotations to: ' + annofn)
803 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
804 annofile.write(info_dict['annotations'])
805 except (KeyError, TypeError):
806 self.report_warning(u'There are no annotations to write.')
807 except (OSError, IOError):
808 self.report_error(u'Cannot write annotations file: ' + annofn)
809 return
1fb07d10 810
c4a91be7 811 subtitles_are_requested = any([self.params.get('writesubtitles', False),
0b7f3118 812 self.params.get('writeautomaticsub')])
c4a91be7 813
fe7e0c98 814 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
8222d8de
JMF
815 # subtitles download errors are already managed as troubles in relevant IE
816 # that way it will silently go on when used with unsupporting IE
8222d8de 817 subtitles = info_dict['subtitles']
ca715127 818 sub_format = self.params.get('subtitlesformat', 'srt')
5d51a883
JMF
819 for sub_lang in subtitles.keys():
820 sub = subtitles[sub_lang]
6804038d
JMF
821 if sub is None:
822 continue
8222d8de 823 try:
d4051a8e 824 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
7b6fefc9
PH
825 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
826 self.to_screen(u'[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
827 else:
828 self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
829 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
830 subfile.write(sub)
8222d8de
JMF
831 except (OSError, IOError):
832 self.report_error(u'Cannot write subtitles file ' + descfn)
833 return
834
8222d8de 835 if self.params.get('writeinfojson', False):
9771cceb 836 infofn = os.path.splitext(filename)[0] + u'.info.json'
7b6fefc9
PH
837 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
838 self.to_screen(u'[info] Video description metadata is already present')
839 else:
840 self.to_screen(u'[info] Writing video description metadata as JSON to: ' + infofn)
841 try:
842 json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
843 write_json_file(json_info_dict, encodeFilename(infofn))
844 except (OSError, IOError):
845 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
846 return
8222d8de
JMF
847
848 if self.params.get('writethumbnail', False):
d8269e1d 849 if info_dict.get('thumbnail') is not None:
cbdbb766 850 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
357ddadb 851 thumb_filename = os.path.splitext(filename)[0] + u'.' + thumb_format
0a9ce268 852 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
7b6fefc9
PH
853 self.to_screen(u'[%s] %s: Thumbnail is already present' %
854 (info_dict['extractor'], info_dict['id']))
855 else:
856 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
857 (info_dict['extractor'], info_dict['id']))
858 try:
859 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
860 with open(thumb_filename, 'wb') as thumbf:
861 shutil.copyfileobj(uf, thumbf)
862 self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
863 (info_dict['extractor'], info_dict['id'], thumb_filename))
864 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
865 self.report_warning(u'Unable to download thumbnail "%s": %s' %
866 (info_dict['thumbnail'], compat_str(err)))
8222d8de
JMF
867
868 if not self.params.get('skip_download', False):
869 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
870 success = True
871 else:
872 try:
873 success = self.fd._do_download(filename, info_dict)
8222d8de
JMF
874 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
875 self.report_error(u'unable to download video data: %s' % str(err))
876 return
c40c6aaa
JMF
877 except (OSError, IOError) as err:
878 raise UnavailableVideoError(err)
8222d8de
JMF
879 except (ContentTooShortError, ) as err:
880 self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
881 return
882
883 if success:
884 try:
885 self.post_process(filename, info_dict)
886 except (PostProcessingError) as err:
887 self.report_error(u'postprocessing: %s' % str(err))
888 return
889
c1c9a79c
PH
890 self.record_download_archive(info_dict)
891
8222d8de
JMF
892 def download(self, url_list):
893 """Download a given list of URLs."""
0c75c3fa
PH
894 if (len(url_list) > 1 and
895 '%' not in self.params['outtmpl']
896 and self.params.get('max_downloads') != 1):
8222d8de
JMF
897 raise SameFileError(self.params['outtmpl'])
898
899 for url in url_list:
900 try:
901 #It also downloads the videos
dca08720 902 self.extract_info(url)
8222d8de
JMF
903 except UnavailableVideoError:
904 self.report_error(u'unable to download video')
905 except MaxDownloadsReached:
906 self.to_screen(u'[info] Maximum number of downloaded files reached.')
907 raise
908
909 return self._download_retcode
910
1dcc4c0c 911 def download_with_info_file(self, info_filename):
395293a8 912 with io.open(info_filename, 'r', encoding='utf-8') as f:
1dcc4c0c 913 info = json.load(f)
d4943898
JMF
914 try:
915 self.process_ie_result(info, download=True)
916 except DownloadError:
917 webpage_url = info.get('webpage_url')
918 if webpage_url is not None:
919 self.report_warning(u'The info failed to download, trying with "%s"' % webpage_url)
920 return self.download([webpage_url])
921 else:
922 raise
923 return self._download_retcode
1dcc4c0c 924
8222d8de
JMF
925 def post_process(self, filename, ie_info):
926 """Run all the postprocessors on the given file."""
927 info = dict(ie_info)
928 info['filepath'] = filename
929 keep_video = None
930 for pp in self._pps:
931 try:
fe7e0c98 932 keep_video_wish, new_info = pp.run(info)
8222d8de
JMF
933 if keep_video_wish is not None:
934 if keep_video_wish:
935 keep_video = keep_video_wish
936 elif keep_video is None:
937 # No clear decision yet, let IE decide
938 keep_video = keep_video_wish
939 except PostProcessingError as e:
bbcbf4d4 940 self.report_error(e.msg)
8222d8de
JMF
941 if keep_video is False and not self.params.get('keepvideo', False):
942 try:
943 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
944 os.remove(encodeFilename(filename))
945 except (IOError, OSError):
946 self.report_warning(u'Unable to remove downloaded video file')
c1c9a79c 947
5db07df6
PH
948 def _make_archive_id(self, info_dict):
949 # Future-proof against any change in case
950 # and backwards compatibility with prior versions
d31209a1 951 extractor = info_dict.get('extractor_key')
7012b23c
PH
952 if extractor is None:
953 if 'id' in info_dict:
954 extractor = info_dict.get('ie_key') # key in a playlist
955 if extractor is None:
5db07df6
PH
956 return None # Incomplete video information
957 return extractor.lower() + u' ' + info_dict['id']
958
959 def in_download_archive(self, info_dict):
960 fn = self.params.get('download_archive')
961 if fn is None:
962 return False
963
964 vid_id = self._make_archive_id(info_dict)
965 if vid_id is None:
7012b23c 966 return False # Incomplete video information
5db07df6 967
c1c9a79c
PH
968 try:
969 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
970 for line in archive_file:
971 if line.strip() == vid_id:
972 return True
973 except IOError as ioe:
974 if ioe.errno != errno.ENOENT:
975 raise
976 return False
977
978 def record_download_archive(self, info_dict):
979 fn = self.params.get('download_archive')
980 if fn is None:
981 return
5db07df6
PH
982 vid_id = self._make_archive_id(info_dict)
983 assert vid_id
c1c9a79c
PH
984 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
985 archive_file.write(vid_id + u'\n')
dd82ffea 986
8c51aa65 987 @staticmethod
8abeeb94 988 def format_resolution(format, default='unknown'):
fb04e403
PH
989 if format.get('vcodec') == 'none':
990 return 'audio only'
57dd9a8f
PH
991 if format.get('_resolution') is not None:
992 return format['_resolution']
8c51aa65
JMF
993 if format.get('height') is not None:
994 if format.get('width') is not None:
995 res = u'%sx%s' % (format['width'], format['height'])
996 else:
997 res = u'%sp' % format['height']
998 else:
8abeeb94 999 res = default
8c51aa65
JMF
1000 return res
1001
dd82ffea 1002 def list_formats(self, info_dict):
91c7271a 1003 def format_note(fdict):
91c7271a 1004 res = u''
02dbf93f
PH
1005 if fdict.get('format_note') is not None:
1006 res += fdict['format_note'] + u' '
fb04e403
PH
1007 if (fdict.get('vcodec') is not None and
1008 fdict.get('vcodec') != 'none'):
7150858d
PH
1009 res += u'%-5s' % fdict['vcodec']
1010 elif fdict.get('vbr') is not None:
1011 res += u'video'
91c7271a
PH
1012 if fdict.get('vbr') is not None:
1013 res += u'@%4dk' % fdict['vbr']
1014 if fdict.get('acodec') is not None:
1015 if res:
1016 res += u', '
7150858d
PH
1017 res += u'%-5s' % fdict['acodec']
1018 elif fdict.get('abr') is not None:
1019 if res:
1020 res += u', '
1021 res += 'audio'
91c7271a
PH
1022 if fdict.get('abr') is not None:
1023 res += u'@%3dk' % fdict['abr']
02dbf93f
PH
1024 if fdict.get('filesize') is not None:
1025 if res:
1026 res += u', '
1027 res += format_bytes(fdict['filesize'])
91c7271a
PH
1028 return res
1029
02dbf93f
PH
1030 def line(format, idlen=20):
1031 return ((u'%-' + compat_str(idlen + 1) + u's%-10s%-12s%s') % (
8c51aa65
JMF
1032 format['format_id'],
1033 format['ext'],
8c51aa65 1034 self.format_resolution(format),
91c7271a 1035 format_note(format),
02dbf93f 1036 ))
57dd9a8f 1037
94badb25 1038 formats = info_dict.get('formats', [info_dict])
02dbf93f
PH
1039 idlen = max(len(u'format code'),
1040 max(len(f['format_id']) for f in formats))
1041 formats_s = [line(f, idlen) for f in formats]
94badb25 1042 if len(formats) > 1:
b5349e87
PH
1043 formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
1044 formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
57dd9a8f
PH
1045
1046 header_line = line({
1047 'format_id': u'format code', 'ext': u'extension',
02dbf93f 1048 '_resolution': u'resolution', 'format_note': u'note'}, idlen=idlen)
57dd9a8f
PH
1049 self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
1050 (info_dict['id'], header_line, u"\n".join(formats_s)))
dca08720
PH
1051
1052 def urlopen(self, req):
1053 """ Start an HTTP download """
1054 return self._opener.open(req)
1055
1056 def print_debug_header(self):
1057 if not self.params.get('verbose'):
1058 return
1059 write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
1060 try:
1061 sp = subprocess.Popen(
1062 ['git', 'rev-parse', '--short', 'HEAD'],
1063 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1064 cwd=os.path.dirname(os.path.abspath(__file__)))
1065 out, err = sp.communicate()
1066 out = out.decode().strip()
1067 if re.match('[0-9a-f]+', out):
1068 write_string(u'[debug] Git HEAD: ' + out + u'\n')
1069 except:
1070 try:
1071 sys.exc_clear()
1072 except:
1073 pass
1074 write_string(u'[debug] Python version %s - %s' %
1075 (platform.python_version(), platform_name()) + u'\n')
1076
1077 proxy_map = {}
1078 for handler in self._opener.handlers:
1079 if hasattr(handler, 'proxies'):
1080 proxy_map.update(handler.proxies)
1081 write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
1082
e344693b 1083 def _setup_opener(self):
6ad14cab
PH
1084 timeout_val = self.params.get('socket_timeout')
1085 timeout = 600 if timeout_val is None else float(timeout_val)
1086
dca08720
PH
1087 opts_cookiefile = self.params.get('cookiefile')
1088 opts_proxy = self.params.get('proxy')
1089
1090 if opts_cookiefile is None:
1091 self.cookiejar = compat_cookiejar.CookieJar()
1092 else:
1093 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1094 opts_cookiefile)
1095 if os.access(opts_cookiefile, os.R_OK):
1096 self.cookiejar.load()
1097
1098 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1099 self.cookiejar)
1100 if opts_proxy is not None:
1101 if opts_proxy == '':
1102 proxies = {}
1103 else:
1104 proxies = {'http': opts_proxy, 'https': opts_proxy}
1105 else:
1106 proxies = compat_urllib_request.getproxies()
1107 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1108 if 'http' in proxies and 'https' not in proxies:
1109 proxies['https'] = proxies['http']
1110 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1111 https_handler = make_HTTPS_handler(
1112 self.params.get('nocheckcertificate', False))
1113 opener = compat_urllib_request.build_opener(
1114 https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
1115 # Delete the default user-agent header, which would otherwise apply in
1116 # cases where our custom HTTP handler doesn't come into play
1117 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1118 opener.addheaders = []
1119 self._opener = opener
1120
1121 # TODO remove this global modification
1122 compat_urllib_request.install_opener(opener)
1123 socket.setdefaulttimeout(timeout)