]> jfr.im git - yt-dlp.git/blame - youtube_dl/YoutubeDL.py
[udemy] Initial support for free courses (#1617)
[yt-dlp.git] / youtube_dl / YoutubeDL.py
CommitLineData
8222d8de
JMF
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
6febd1c1 4from __future__ import absolute_import, unicode_literals
8222d8de 5
26e63931 6import collections
9d2ecdbc 7import datetime
c1c9a79c 8import errno
8222d8de 9import io
8694c600 10import json
8222d8de 11import os
dca08720 12import platform
8222d8de
JMF
13import re
14import shutil
dca08720 15import subprocess
8222d8de
JMF
16import socket
17import sys
18import time
19import traceback
20
1e5b9a95
PH
21if os.name == 'nt':
22 import ctypes
23
ce02ed60 24from .utils import (
dca08720 25 compat_cookiejar,
ce02ed60 26 compat_http_client,
ce02ed60
PH
27 compat_str,
28 compat_urllib_error,
29 compat_urllib_request,
30 ContentTooShortError,
31 date_from_str,
32 DateRange,
33 determine_ext,
34 DownloadError,
35 encodeFilename,
36 ExtractorError,
02dbf93f 37 format_bytes,
525ef922 38 formatSeconds,
1c088fa8 39 get_term_width,
ce02ed60 40 locked_file,
dca08720 41 make_HTTPS_handler,
ce02ed60 42 MaxDownloadsReached,
b7ab0590 43 PagedList,
ce02ed60 44 PostProcessingError,
dca08720 45 platform_name,
ce02ed60
PH
46 preferredencoding,
47 SameFileError,
48 sanitize_filename,
49 subtitles_filename,
50 takewhile_inclusive,
51 UnavailableVideoError,
29eb5174 52 url_basename,
ce02ed60
PH
53 write_json_file,
54 write_string,
dca08720 55 YoutubeDLHandler,
6350728b 56 prepend_extension,
ce02ed60 57)
023fa8c4 58from .extractor import get_info_extractor, gen_extractors
3bc2ddcc 59from .downloader import get_suitable_downloader
56327689 60from .postprocessor import FFmpegMergerPP
dca08720 61from .version import __version__
8222d8de
JMF
62
63
64class YoutubeDL(object):
65 """YoutubeDL class.
66
67 YoutubeDL objects are the ones responsible of downloading the
68 actual video file and writing it to disk if the user has requested
69 it, among some other tasks. In most cases there should be one per
70 program. As, given a video URL, the downloader doesn't know how to
71 extract all the needed information, task that InfoExtractors do, it
72 has to pass the URL to one of them.
73
74 For this, YoutubeDL objects have a method that allows
75 InfoExtractors to be registered in a given order. When it is passed
76 a URL, the YoutubeDL object handles it to the first InfoExtractor it
77 finds that reports being able to handle it. The InfoExtractor extracts
78 all the information about the video or videos the URL refers to, and
79 YoutubeDL process the extracted information, possibly using a File
80 Downloader to download the video.
81
82 YoutubeDL objects accept a lot of parameters. In order not to saturate
83 the object constructor with arguments, it receives a dictionary of
84 options instead. These options are available through the params
85 attribute for the InfoExtractors to use. The YoutubeDL also
86 registers itself as the downloader in charge for the InfoExtractors
87 that are added to it, so this is a "mutual registration".
88
89 Available options:
90
91 username: Username for authentication purposes.
92 password: Password for authentication purposes.
c6c19746 93 videopassword: Password for acces a video.
8222d8de
JMF
94 usenetrc: Use netrc for authentication instead.
95 verbose: Print additional info to stdout.
96 quiet: Do not print messages to stdout.
97 forceurl: Force printing final URL.
98 forcetitle: Force printing title.
99 forceid: Force printing ID.
100 forcethumbnail: Force printing thumbnail URL.
101 forcedescription: Force printing description.
102 forcefilename: Force printing final filename.
525ef922 103 forceduration: Force printing duration.
8694c600 104 forcejson: Force printing info_dict as JSON.
8222d8de
JMF
105 simulate: Do not download the video files.
106 format: Video format code.
107 format_limit: Highest quality format to try.
108 outtmpl: Template for output names.
109 restrictfilenames: Do not allow "&" and spaces in file names
110 ignoreerrors: Do not stop on download errors.
111 nooverwrites: Prevent overwriting files.
112 playliststart: Playlist item to start at.
113 playlistend: Playlist item to end at.
114 matchtitle: Download only matching titles.
115 rejecttitle: Reject downloads for matching titles.
8bf9319e 116 logger: Log messages to a logging.Logger instance.
8222d8de
JMF
117 logtostderr: Log messages to stderr instead of stdout.
118 writedescription: Write the video description to a .description file
119 writeinfojson: Write the video description to a .info.json file
1fb07d10 120 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de
JMF
121 writethumbnail: Write the thumbnail image to a file
122 writesubtitles: Write the video subtitles to a file
b004821f 123 writeautomaticsub: Write the automatic subtitles to a file
8222d8de 124 allsubtitles: Downloads all the subtitles of the video
0b7f3118 125 (requires writesubtitles or writeautomaticsub)
8222d8de 126 listsubtitles: Lists all available subtitles for the video
b98a6b2f 127 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
aa6a10c4 128 subtitleslangs: List of languages of the subtitles to download
8222d8de
JMF
129 keepvideo: Keep the video file after post-processing
130 daterange: A DateRange object, download only if the upload_date is in the range.
131 skip_download: Skip the actual download of the video file
c35f9e72 132 cachedir: Location of the cache files in the filesystem.
c3c88a26 133 None to disable filesystem cache.
47192f92 134 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
135 age_limit: An integer representing the user's age in years.
136 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
137 min_views: An integer representing the minimum view count the video
138 must have in order to not be skipped.
139 Videos without view count information are always
140 downloaded. None for no limit.
141 max_views: An integer representing the maximum view count.
142 Videos that are more popular than that are not
143 downloaded.
144 Videos without view count information are always
145 downloaded. None for no limit.
146 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
147 Videos already present in the file are not downloaded
148 again.
dca08720 149 cookiefile: File name where cookies should be read from and dumped to.
a1ee09e8
PH
150 nocheckcertificate:Do not verify SSL certificates
151 proxy: URL of the proxy server to use
e344693b 152 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
153 bidi_workaround: Work around buggy terminals without bidirectional text
154 support, using fridibi
a0ddb8a2 155 debug_printtraffic:Print out sent and received HTTP traffic
7b0817e8 156 include_ads: Download ads as well
04b4d394
PH
157 default_search: Prepend this string if an input url is not valid.
158 'auto' for elaborate guessing
fe7e0c98 159
8222d8de
JMF
160 The following parameters are not used by YoutubeDL itself, they are used by
161 the FileDownloader:
162 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
163 noresizebuffer, retries, continuedl, noprogress, consoletitle
76b1bd67
JMF
164
165 The following options are used by the post processors:
166 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
167 otherwise prefer avconv.
8222d8de
JMF
168 """
169
170 params = None
171 _ies = []
172 _pps = []
173 _download_retcode = None
174 _num_downloads = None
175 _screen_file = None
176
a3fb4675 177 def __init__(self, params=None):
8222d8de 178 """Create a FileDownloader object with the given options."""
e9f9a10f
JMF
179 if params is None:
180 params = {}
8222d8de 181 self._ies = []
56c73665 182 self._ies_instances = {}
8222d8de 183 self._pps = []
933605d7 184 self._progress_hooks = []
8222d8de
JMF
185 self._download_retcode = 0
186 self._num_downloads = 0
187 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
0783b09b 188 self._err_file = sys.stderr
e9f9a10f 189 self.params = params
34308b30 190
0783b09b 191 if params.get('bidi_workaround', False):
1c088fa8
PH
192 try:
193 import pty
194 master, slave = pty.openpty()
195 width = get_term_width()
196 if width is None:
197 width_args = []
198 else:
199 width_args = ['-w', str(width)]
5d681e96 200 sp_kwargs = dict(
1c088fa8
PH
201 stdin=subprocess.PIPE,
202 stdout=slave,
203 stderr=self._err_file)
5d681e96
PH
204 try:
205 self._output_process = subprocess.Popen(
206 ['bidiv'] + width_args, **sp_kwargs
207 )
208 except OSError:
5d681e96
PH
209 self._output_process = subprocess.Popen(
210 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
211 self._output_channel = os.fdopen(master, 'rb')
1c088fa8
PH
212 except OSError as ose:
213 if ose.errno == 2:
6febd1c1 214 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
215 else:
216 raise
0783b09b 217
34308b30
PH
218 if (sys.version_info >= (3,) and sys.platform != 'win32' and
219 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
220 and not params['restrictfilenames']):
221 # On Python 3, the Unicode filesystem API will throw errors (#1474)
222 self.report_warning(
6febd1c1
PH
223 'Assuming --restrict-filenames since file system encoding '
224 'cannot encode all charactes. '
225 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 226 self.params['restrictfilenames'] = True
34308b30 227
a3927cf7 228 if '%(stitle)s' in self.params.get('outtmpl', ''):
6febd1c1 229 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
8222d8de 230
dca08720
PH
231 self._setup_opener()
232
8222d8de
JMF
233 def add_info_extractor(self, ie):
234 """Add an InfoExtractor object to the end of the list."""
235 self._ies.append(ie)
56c73665 236 self._ies_instances[ie.ie_key()] = ie
8222d8de
JMF
237 ie.set_downloader(self)
238
56c73665
JMF
239 def get_info_extractor(self, ie_key):
240 """
241 Get an instance of an IE with name ie_key, it will try to get one from
242 the _ies list, if there's no instance it will create a new one and add
243 it to the extractor list.
244 """
245 ie = self._ies_instances.get(ie_key)
246 if ie is None:
247 ie = get_info_extractor(ie_key)()
248 self.add_info_extractor(ie)
249 return ie
250
023fa8c4
JMF
251 def add_default_info_extractors(self):
252 """
253 Add the InfoExtractors returned by gen_extractors to the end of the list
254 """
255 for ie in gen_extractors():
256 self.add_info_extractor(ie)
257
8222d8de
JMF
258 def add_post_processor(self, pp):
259 """Add a PostProcessor object to the end of the chain."""
260 self._pps.append(pp)
261 pp.set_downloader(self)
262
933605d7
JMF
263 def add_progress_hook(self, ph):
264 """Add the progress hook (currently only for the file downloader)"""
265 self._progress_hooks.append(ph)
8ab470f1 266
1c088fa8 267 def _bidi_workaround(self, message):
5d681e96 268 if not hasattr(self, '_output_channel'):
1c088fa8
PH
269 return message
270
5d681e96 271 assert hasattr(self, '_output_process')
6febd1c1
PH
272 assert type(message) == type('')
273 line_count = message.count('\n') + 1
274 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 275 self._output_process.stdin.flush()
6febd1c1 276 res = ''.join(self._output_channel.readline().decode('utf-8')
1c088fa8 277 for _ in range(line_count))
6febd1c1 278 return res[:-len('\n')]
1c088fa8 279
8222d8de 280 def to_screen(self, message, skip_eol=False):
0783b09b
PH
281 """Print message to stdout if not in quiet mode."""
282 return self.to_stdout(message, skip_eol, check_quiet=True)
283
284 def to_stdout(self, message, skip_eol=False, check_quiet=False):
8222d8de 285 """Print message to stdout if not in quiet mode."""
8bf9319e 286 if self.params.get('logger'):
43afe285 287 self.params['logger'].debug(message)
0783b09b 288 elif not check_quiet or not self.params.get('quiet', False):
1c088fa8 289 message = self._bidi_workaround(message)
6febd1c1 290 terminator = ['\n', ''][skip_eol]
8222d8de 291 output = message + terminator
1c088fa8 292
7459e3a2 293 write_string(output, self._screen_file)
8222d8de
JMF
294
295 def to_stderr(self, message):
296 """Print message to stderr."""
6febd1c1 297 assert type(message) == type('')
8bf9319e 298 if self.params.get('logger'):
43afe285
IB
299 self.params['logger'].error(message)
300 else:
1c088fa8 301 message = self._bidi_workaround(message)
6febd1c1 302 output = message + '\n'
0783b09b 303 write_string(output, self._err_file)
8222d8de 304
1e5b9a95
PH
305 def to_console_title(self, message):
306 if not self.params.get('consoletitle', False):
307 return
308 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
309 # c_wchar_p() might not be necessary if `message` is
310 # already of type unicode()
311 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
312 elif 'TERM' in os.environ:
6febd1c1 313 write_string('\033]0;%s\007' % message, self._screen_file)
1e5b9a95 314
bdde425c
PH
315 def save_console_title(self):
316 if not self.params.get('consoletitle', False):
317 return
318 if 'TERM' in os.environ:
efd6c574 319 # Save the title on stack
6febd1c1 320 write_string('\033[22;0t', self._screen_file)
bdde425c
PH
321
322 def restore_console_title(self):
323 if not self.params.get('consoletitle', False):
324 return
325 if 'TERM' in os.environ:
efd6c574 326 # Restore the title from stack
6febd1c1 327 write_string('\033[23;0t', self._screen_file)
bdde425c
PH
328
329 def __enter__(self):
330 self.save_console_title()
331 return self
332
333 def __exit__(self, *args):
334 self.restore_console_title()
f89197d7 335
dca08720
PH
336 if self.params.get('cookiefile') is not None:
337 self.cookiejar.save()
bdde425c 338
8222d8de
JMF
339 def trouble(self, message=None, tb=None):
340 """Determine action to take when a download problem appears.
341
342 Depending on if the downloader has been configured to ignore
343 download errors or not, this method may throw an exception or
344 not when errors are found, after printing the message.
345
346 tb, if given, is additional traceback information.
347 """
348 if message is not None:
349 self.to_stderr(message)
350 if self.params.get('verbose'):
351 if tb is None:
352 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 353 tb = ''
8222d8de 354 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 355 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
8222d8de
JMF
356 tb += compat_str(traceback.format_exc())
357 else:
358 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 359 tb = ''.join(tb_data)
8222d8de
JMF
360 self.to_stderr(tb)
361 if not self.params.get('ignoreerrors', False):
362 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
363 exc_info = sys.exc_info()[1].exc_info
364 else:
365 exc_info = sys.exc_info()
366 raise DownloadError(message, exc_info)
367 self._download_retcode = 1
368
369 def report_warning(self, message):
370 '''
371 Print the message to stderr, it will be prefixed with 'WARNING:'
372 If stderr is a tty file the 'WARNING:' will be colored
373 '''
6d07ce01
JMF
374 if self.params.get('logger') is not None:
375 self.params['logger'].warning(message)
8222d8de 376 else:
6d07ce01
JMF
377 if self._err_file.isatty() and os.name != 'nt':
378 _msg_header = '\033[0;33mWARNING:\033[0m'
379 else:
380 _msg_header = 'WARNING:'
381 warning_message = '%s %s' % (_msg_header, message)
382 self.to_stderr(warning_message)
8222d8de
JMF
383
384 def report_error(self, message, tb=None):
385 '''
386 Do the same as trouble, but prefixes the message with 'ERROR:', colored
387 in red if stderr is a tty file.
388 '''
0783b09b 389 if self._err_file.isatty() and os.name != 'nt':
6febd1c1 390 _msg_header = '\033[0;31mERROR:\033[0m'
8222d8de 391 else:
6febd1c1
PH
392 _msg_header = 'ERROR:'
393 error_message = '%s %s' % (_msg_header, message)
8222d8de
JMF
394 self.trouble(error_message, tb)
395
8222d8de
JMF
396 def report_file_already_downloaded(self, file_name):
397 """Report file has already been fully downloaded."""
398 try:
6febd1c1 399 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 400 except UnicodeEncodeError:
6febd1c1 401 self.to_screen('[download] The file has already been downloaded')
8222d8de 402
8222d8de
JMF
403 def prepare_filename(self, info_dict):
404 """Generate the output filename."""
405 try:
406 template_dict = dict(info_dict)
407
408 template_dict['epoch'] = int(time.time())
409 autonumber_size = self.params.get('autonumber_size')
410 if autonumber_size is None:
411 autonumber_size = 5
6febd1c1 412 autonumber_templ = '%0' + str(autonumber_size) + 'd'
8222d8de 413 template_dict['autonumber'] = autonumber_templ % self._num_downloads
702665c0 414 if template_dict.get('playlist_index') is not None:
6febd1c1 415 template_dict['playlist_index'] = '%05d' % template_dict['playlist_index']
17b75c0d
PH
416 if template_dict.get('resolution') is None:
417 if template_dict.get('width') and template_dict.get('height'):
418 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
419 elif template_dict.get('height'):
805ef3c6 420 template_dict['resolution'] = '%sp' % template_dict['height']
17b75c0d 421 elif template_dict.get('width'):
805ef3c6 422 template_dict['resolution'] = '?x%d' % template_dict['width']
8222d8de 423
586a91b6 424 sanitize = lambda k, v: sanitize_filename(
45598aab 425 compat_str(v),
8222d8de 426 restricted=self.params.get('restrictfilenames'),
6febd1c1 427 is_id=(k == 'id'))
586a91b6 428 template_dict = dict((k, sanitize(k, v))
45598aab
PH
429 for k, v in template_dict.items()
430 if v is not None)
6febd1c1 431 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
8222d8de 432
586a91b6
PH
433 tmpl = os.path.expanduser(self.params['outtmpl'])
434 filename = tmpl % template_dict
8222d8de 435 return filename
8222d8de 436 except ValueError as err:
6febd1c1 437 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
438 return None
439
440 def _match_entry(self, info_dict):
441 """ Returns None iff the file should be downloaded """
442
6febd1c1 443 video_title = info_dict.get('title', info_dict.get('id', 'video'))
7012b23c
PH
444 if 'title' in info_dict:
445 # This can happen when we're just evaluating the playlist
446 title = info_dict['title']
447 matchtitle = self.params.get('matchtitle', False)
448 if matchtitle:
449 if not re.search(matchtitle, title, re.IGNORECASE):
6febd1c1 450 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
7012b23c
PH
451 rejecttitle = self.params.get('rejecttitle', False)
452 if rejecttitle:
453 if re.search(rejecttitle, title, re.IGNORECASE):
6febd1c1 454 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
8222d8de
JMF
455 date = info_dict.get('upload_date', None)
456 if date is not None:
457 dateRange = self.params.get('daterange', DateRange())
458 if date not in dateRange:
6febd1c1 459 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
5fe18bdb
PH
460 view_count = info_dict.get('view_count', None)
461 if view_count is not None:
462 min_views = self.params.get('min_views')
463 if min_views is not None and view_count < min_views:
6febd1c1 464 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
5fe18bdb
PH
465 max_views = self.params.get('max_views')
466 if max_views is not None and view_count > max_views:
6febd1c1 467 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
8dbe9899
PH
468 age_limit = self.params.get('age_limit')
469 if age_limit is not None:
cfadd183 470 if age_limit < info_dict.get('age_limit', 0):
6febd1c1 471 return 'Skipping "' + title + '" because it is age restricted'
c1c9a79c 472 if self.in_download_archive(info_dict):
6febd1c1 473 return '%s has already been recorded in archive' % video_title
8222d8de 474 return None
fe7e0c98 475
b6c45014
JMF
476 @staticmethod
477 def add_extra_info(info_dict, extra_info):
478 '''Set the keys from extra_info in info dict if they are missing'''
479 for key, value in extra_info.items():
480 info_dict.setdefault(key, value)
481
7fc3fa05
PH
482 def extract_info(self, url, download=True, ie_key=None, extra_info={},
483 process=True):
8222d8de
JMF
484 '''
485 Returns a list with a dictionary for each video we find.
486 If 'download', also downloads the videos.
487 extra_info is a dict containing the extra values to add to each result
488 '''
fe7e0c98 489
8222d8de 490 if ie_key:
56c73665 491 ies = [self.get_info_extractor(ie_key)]
8222d8de
JMF
492 else:
493 ies = self._ies
494
495 for ie in ies:
496 if not ie.suitable(url):
497 continue
498
499 if not ie.working():
6febd1c1
PH
500 self.report_warning('The program functionality for this site has been marked as broken, '
501 'and will probably not work.')
8222d8de
JMF
502
503 try:
504 ie_result = ie.extract(url)
505 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
506 break
507 if isinstance(ie_result, list):
508 # Backwards compatibility: old IE result format
8222d8de
JMF
509 ie_result = {
510 '_type': 'compat_list',
511 'entries': ie_result,
512 }
9103bbc5
JMF
513 self.add_extra_info(ie_result,
514 {
515 'extractor': ie.IE_NAME,
be97abc2 516 'webpage_url': url,
29eb5174 517 'webpage_url_basename': url_basename(url),
be97abc2 518 'extractor_key': ie.ie_key(),
9103bbc5 519 })
7fc3fa05
PH
520 if process:
521 return self.process_ie_result(ie_result, download, extra_info)
522 else:
523 return ie_result
8222d8de
JMF
524 except ExtractorError as de: # An error we somewhat expected
525 self.report_error(compat_str(de), de.format_traceback())
526 break
d3e5bbf4
PH
527 except MaxDownloadsReached:
528 raise
8222d8de
JMF
529 except Exception as e:
530 if self.params.get('ignoreerrors', False):
531 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
532 break
533 else:
534 raise
535 else:
6febd1c1 536 self.report_error('no suitable InfoExtractor: %s' % url)
fe7e0c98 537
8222d8de
JMF
538 def process_ie_result(self, ie_result, download=True, extra_info={}):
539 """
540 Take the result of the ie(may be modified) and resolve all unresolved
541 references (URLs, playlist items).
542
543 It will also download the videos if 'download'.
544 Returns the resolved ie_result.
545 """
546
547 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
548 if result_type == 'video':
b6c45014 549 self.add_extra_info(ie_result, extra_info)
feee2ecf 550 return self.process_video_result(ie_result, download=download)
8222d8de
JMF
551 elif result_type == 'url':
552 # We have to add extra_info to the results because it may be
553 # contained in a playlist
554 return self.extract_info(ie_result['url'],
555 download,
556 ie_key=ie_result.get('ie_key'),
557 extra_info=extra_info)
7fc3fa05
PH
558 elif result_type == 'url_transparent':
559 # Use the information from the embedding page
560 info = self.extract_info(
561 ie_result['url'], ie_key=ie_result.get('ie_key'),
562 extra_info=extra_info, download=False, process=False)
563
564 def make_result(embedded_info):
565 new_result = ie_result.copy()
566 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
1538eff6 567 'entries', 'ie_key', 'duration',
ef4fd848
PH
568 'subtitles', 'annotations', 'format',
569 'thumbnail', 'thumbnails'):
7fc3fa05
PH
570 if f in new_result:
571 del new_result[f]
572 if f in embedded_info:
573 new_result[f] = embedded_info[f]
574 return new_result
575 new_result = make_result(info)
576
577 assert new_result.get('_type') != 'url_transparent'
578 if new_result.get('_type') == 'compat_list':
579 new_result['entries'] = [
580 make_result(e) for e in new_result['entries']]
581
582 return self.process_ie_result(
583 new_result, download=download, extra_info=extra_info)
8222d8de
JMF
584 elif result_type == 'playlist':
585 # We process each entry in the playlist
586 playlist = ie_result.get('title', None) or ie_result.get('id', None)
6febd1c1 587 self.to_screen('[download] Downloading playlist: %s' % playlist)
8222d8de
JMF
588
589 playlist_results = []
590
8222d8de 591 playliststart = self.params.get('playliststart', 1) - 1
a19fd00c
PH
592 playlistend = self.params.get('playlistend', None)
593 # For backwards compatibility, interpret -1 as whole list
8222d8de 594 if playlistend == -1:
a19fd00c 595 playlistend = None
8222d8de 596
b7ab0590
PH
597 if isinstance(ie_result['entries'], list):
598 n_all_entries = len(ie_result['entries'])
599 entries = ie_result['entries'][playliststart:playlistend]
600 n_entries = len(entries)
601 self.to_screen(
602 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
603 (ie_result['extractor'], playlist, n_all_entries, n_entries))
604 else:
605 assert isinstance(ie_result['entries'], PagedList)
606 entries = ie_result['entries'].getslice(
607 playliststart, playlistend)
608 n_entries = len(entries)
609 self.to_screen(
610 "[%s] playlist %s: Downloading %d videos" %
611 (ie_result['extractor'], playlist, n_entries))
8222d8de 612
fe7e0c98 613 for i, entry in enumerate(entries, 1):
6febd1c1 614 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
8222d8de 615 extra = {
fe7e0c98
JMF
616 'playlist': playlist,
617 'playlist_index': i + playliststart,
b6c45014 618 'extractor': ie_result['extractor'],
9103bbc5 619 'webpage_url': ie_result['webpage_url'],
29eb5174 620 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 621 'extractor_key': ie_result['extractor_key'],
fe7e0c98 622 }
7012b23c
PH
623
624 reason = self._match_entry(entry)
625 if reason is not None:
6febd1c1 626 self.to_screen('[download] ' + reason)
7012b23c
PH
627 continue
628
8222d8de
JMF
629 entry_result = self.process_ie_result(entry,
630 download=download,
631 extra_info=extra)
632 playlist_results.append(entry_result)
633 ie_result['entries'] = playlist_results
634 return ie_result
635 elif result_type == 'compat_list':
636 def _fixup(r):
b6c45014 637 self.add_extra_info(r,
9103bbc5
JMF
638 {
639 'extractor': ie_result['extractor'],
640 'webpage_url': ie_result['webpage_url'],
29eb5174 641 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 642 'extractor_key': ie_result['extractor_key'],
9103bbc5 643 })
8222d8de
JMF
644 return r
645 ie_result['entries'] = [
b6c45014 646 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
647 for r in ie_result['entries']
648 ]
649 return ie_result
650 else:
651 raise Exception('Invalid result type: %s' % result_type)
652
a9c58ad9
JMF
653 def select_format(self, format_spec, available_formats):
654 if format_spec == 'best' or format_spec is None:
655 return available_formats[-1]
656 elif format_spec == 'worst':
657 return available_formats[0]
ba7678f9
PH
658 elif format_spec == 'bestaudio':
659 audio_formats = [
660 f for f in available_formats
661 if f.get('vcodec') == 'none']
662 if audio_formats:
663 return audio_formats[-1]
664 elif format_spec == 'worstaudio':
665 audio_formats = [
666 f for f in available_formats
667 if f.get('vcodec') == 'none']
668 if audio_formats:
669 return audio_formats[0]
a9c58ad9 670 else:
6febd1c1 671 extensions = ['mp4', 'flv', 'webm', '3gp']
49e86983
JMF
672 if format_spec in extensions:
673 filter_f = lambda f: f['ext'] == format_spec
674 else:
675 filter_f = lambda f: f['format_id'] == format_spec
fe7e0c98 676 matches = list(filter(filter_f, available_formats))
a9c58ad9
JMF
677 if matches:
678 return matches[-1]
679 return None
680
dd82ffea
JMF
681 def process_video_result(self, info_dict, download=True):
682 assert info_dict.get('_type', 'video') == 'video'
683
684 if 'playlist' not in info_dict:
685 # It isn't part of a playlist
686 info_dict['playlist'] = None
687 info_dict['playlist_index'] = None
688
c9ae7b95 689 if 'display_id' not in info_dict and 'id' in info_dict:
0afef30b
PH
690 info_dict['display_id'] = info_dict['id']
691
9d2ecdbc
PH
692 if info_dict.get('upload_date') is None and info_dict.get('upload_timestamp') is not None:
693 upload_date = datetime.datetime.utcfromtimestamp(
694 info_dict['upload_timestamp'])
695 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
696
6ff000b8 697 # This extractors handle format selection themselves
6febd1c1 698 if info_dict['extractor'] in ['Youku']:
12893efe
JMF
699 if download:
700 self.process_info(info_dict)
6ff000b8
JMF
701 return info_dict
702
dd82ffea
JMF
703 # We now pick which formats have to be downloaded
704 if info_dict.get('formats') is None:
705 # There's only one format available
706 formats = [info_dict]
707 else:
708 formats = info_dict['formats']
709
db95dc13
PH
710 if not formats:
711 raise ExtractorError('No video formats found!')
712
dd82ffea 713 # We check that all the formats have the format and format_id fields
db95dc13 714 for i, format in enumerate(formats):
dd82ffea 715 if format.get('format_id') is None:
8016c922 716 format['format_id'] = compat_str(i)
8c51aa65 717 if format.get('format') is None:
6febd1c1 718 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
719 id=format['format_id'],
720 res=self.format_resolution(format),
6febd1c1 721 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
8c51aa65 722 )
c1002e96
PH
723 # Automatically determine file extension if missing
724 if 'ext' not in format:
725 format['ext'] = determine_ext(format['url'])
dd82ffea 726
99e206d5
JMF
727 format_limit = self.params.get('format_limit', None)
728 if format_limit:
f4d96df0
PH
729 formats = list(takewhile_inclusive(
730 lambda f: f['format_id'] != format_limit, formats
731 ))
4bcc7bd1
PH
732
733 # TODO Central sorting goes here
99e206d5 734
f89197d7 735 if formats[0] is not info_dict:
b3d9ef88
JMF
736 # only set the 'formats' fields if the original info_dict list them
737 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 738 # element in the 'formats' field in info_dict is info_dict itself,
b3d9ef88
JMF
739 # wich can't be exported to json
740 info_dict['formats'] = formats
bfaae0a7 741 if self.params.get('listformats', None):
742 self.list_formats(info_dict)
743 return
744
de3ef3ed 745 req_format = self.params.get('format')
a9c58ad9
JMF
746 if req_format is None:
747 req_format = 'best'
dd82ffea 748 formats_to_download = []
dd82ffea 749 # The -1 is for supporting YoutubeIE
a9c58ad9 750 if req_format in ('-1', 'all'):
dd82ffea
JMF
751 formats_to_download = formats
752 else:
a9c5e5ca 753 # We can accept formats requested in the format: 34/5/best, we pick
416a5efc 754 # the first that is available, starting from left
dd82ffea
JMF
755 req_formats = req_format.split('/')
756 for rf in req_formats:
6350728b
JMF
757 if re.match(r'.+?\+.+?', rf) is not None:
758 # Two formats have been requested like '137+139'
759 format_1, format_2 = rf.split('+')
760 formats_info = (self.select_format(format_1, formats),
761 self.select_format(format_2, formats))
762 if all(formats_info):
a9c5e5ca
PH
763 selected_format = {
764 'requested_formats': formats_info,
765 'format': rf,
766 'ext': formats_info[0]['ext'],
767 }
6350728b
JMF
768 else:
769 selected_format = None
770 else:
771 selected_format = self.select_format(rf, formats)
a9c58ad9
JMF
772 if selected_format is not None:
773 formats_to_download = [selected_format]
dd82ffea
JMF
774 break
775 if not formats_to_download:
6febd1c1 776 raise ExtractorError('requested format not available',
78a3a9f8 777 expected=True)
dd82ffea
JMF
778
779 if download:
780 if len(formats_to_download) > 1:
6febd1c1 781 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
dd82ffea
JMF
782 for format in formats_to_download:
783 new_info = dict(info_dict)
784 new_info.update(format)
785 self.process_info(new_info)
786 # We update the info dict with the best quality format (backwards compatibility)
787 info_dict.update(formats_to_download[-1])
788 return info_dict
789
8222d8de
JMF
790 def process_info(self, info_dict):
791 """Process a single resolved IE result."""
792
793 assert info_dict.get('_type', 'video') == 'video'
fd288278
PH
794
795 max_downloads = self.params.get('max_downloads')
796 if max_downloads is not None:
797 if self._num_downloads >= int(max_downloads):
798 raise MaxDownloadsReached()
8222d8de
JMF
799
800 info_dict['fulltitle'] = info_dict['title']
801 if len(info_dict['title']) > 200:
6febd1c1 802 info_dict['title'] = info_dict['title'][:197] + '...'
8222d8de
JMF
803
804 # Keep for backwards compatibility
805 info_dict['stitle'] = info_dict['title']
806
807 if not 'format' in info_dict:
808 info_dict['format'] = info_dict['ext']
809
810 reason = self._match_entry(info_dict)
811 if reason is not None:
6febd1c1 812 self.to_screen('[download] ' + reason)
8222d8de
JMF
813 return
814
fd288278 815 self._num_downloads += 1
8222d8de
JMF
816
817 filename = self.prepare_filename(info_dict)
818
819 # Forced printings
820 if self.params.get('forcetitle', False):
0783b09b 821 self.to_stdout(info_dict['fulltitle'])
8222d8de 822 if self.params.get('forceid', False):
0783b09b 823 self.to_stdout(info_dict['id'])
8222d8de 824 if self.params.get('forceurl', False):
edde6c56 825 # For RTMP URLs, also include the playpath
6febd1c1 826 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
216d71d0 827 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
0783b09b 828 self.to_stdout(info_dict['thumbnail'])
216d71d0 829 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
0783b09b 830 self.to_stdout(info_dict['description'])
8222d8de 831 if self.params.get('forcefilename', False) and filename is not None:
0783b09b 832 self.to_stdout(filename)
525ef922
PH
833 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
834 self.to_stdout(formatSeconds(info_dict['duration']))
8222d8de 835 if self.params.get('forceformat', False):
0783b09b 836 self.to_stdout(info_dict['format'])
9d153818 837 if self.params.get('forcejson', False):
a0d96c98 838 info_dict['_filename'] = filename
0783b09b 839 self.to_stdout(json.dumps(info_dict))
8222d8de
JMF
840
841 # Do nothing else if in simulate mode
842 if self.params.get('simulate', False):
843 return
844
845 if filename is None:
846 return
847
848 try:
849 dn = os.path.dirname(encodeFilename(filename))
850 if dn != '' and not os.path.exists(dn):
851 os.makedirs(dn)
852 except (OSError, IOError) as err:
6febd1c1 853 self.report_error('unable to create directory ' + compat_str(err))
8222d8de
JMF
854 return
855
856 if self.params.get('writedescription', False):
6febd1c1 857 descfn = filename + '.description'
7b6fefc9 858 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
6febd1c1 859 self.to_screen('[info] Video description is already present')
7b6fefc9
PH
860 else:
861 try:
6febd1c1 862 self.to_screen('[info] Writing video description to: ' + descfn)
7b6fefc9
PH
863 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
864 descfile.write(info_dict['description'])
865 except (KeyError, TypeError):
6febd1c1 866 self.report_warning('There\'s no description to write.')
7b6fefc9 867 except (OSError, IOError):
6febd1c1 868 self.report_error('Cannot write description file ' + descfn)
7b6fefc9 869 return
8222d8de 870
1fb07d10 871 if self.params.get('writeannotations', False):
6febd1c1 872 annofn = filename + '.annotations.xml'
7b6fefc9 873 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
6febd1c1 874 self.to_screen('[info] Video annotations are already present')
7b6fefc9
PH
875 else:
876 try:
6febd1c1 877 self.to_screen('[info] Writing video annotations to: ' + annofn)
7b6fefc9
PH
878 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
879 annofile.write(info_dict['annotations'])
880 except (KeyError, TypeError):
6febd1c1 881 self.report_warning('There are no annotations to write.')
7b6fefc9 882 except (OSError, IOError):
6febd1c1 883 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 884 return
1fb07d10 885
c4a91be7 886 subtitles_are_requested = any([self.params.get('writesubtitles', False),
0b7f3118 887 self.params.get('writeautomaticsub')])
c4a91be7 888
fe7e0c98 889 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
8222d8de
JMF
890 # subtitles download errors are already managed as troubles in relevant IE
891 # that way it will silently go on when used with unsupporting IE
8222d8de 892 subtitles = info_dict['subtitles']
ca715127 893 sub_format = self.params.get('subtitlesformat', 'srt')
5d51a883
JMF
894 for sub_lang in subtitles.keys():
895 sub = subtitles[sub_lang]
6804038d
JMF
896 if sub is None:
897 continue
8222d8de 898 try:
d4051a8e 899 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
7b6fefc9 900 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
6febd1c1 901 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
7b6fefc9 902 else:
6febd1c1 903 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
7b6fefc9
PH
904 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
905 subfile.write(sub)
8222d8de 906 except (OSError, IOError):
6febd1c1 907 self.report_error('Cannot write subtitles file ' + descfn)
8222d8de
JMF
908 return
909
8222d8de 910 if self.params.get('writeinfojson', False):
6febd1c1 911 infofn = os.path.splitext(filename)[0] + '.info.json'
7b6fefc9 912 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
6febd1c1 913 self.to_screen('[info] Video description metadata is already present')
7b6fefc9 914 else:
6febd1c1 915 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
7b6fefc9 916 try:
1538eff6 917 write_json_file(info_dict, encodeFilename(infofn))
7b6fefc9 918 except (OSError, IOError):
6febd1c1 919 self.report_error('Cannot write metadata to JSON file ' + infofn)
7b6fefc9 920 return
8222d8de
JMF
921
922 if self.params.get('writethumbnail', False):
d8269e1d 923 if info_dict.get('thumbnail') is not None:
6febd1c1
PH
924 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
925 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
0a9ce268 926 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
6febd1c1 927 self.to_screen('[%s] %s: Thumbnail is already present' %
7b6fefc9
PH
928 (info_dict['extractor'], info_dict['id']))
929 else:
6febd1c1 930 self.to_screen('[%s] %s: Downloading thumbnail ...' %
7b6fefc9
PH
931 (info_dict['extractor'], info_dict['id']))
932 try:
e9c092f1 933 uf = self.urlopen(info_dict['thumbnail'])
7b6fefc9
PH
934 with open(thumb_filename, 'wb') as thumbf:
935 shutil.copyfileobj(uf, thumbf)
6febd1c1 936 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
7b6fefc9
PH
937 (info_dict['extractor'], info_dict['id'], thumb_filename))
938 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
6febd1c1 939 self.report_warning('Unable to download thumbnail "%s": %s' %
7b6fefc9 940 (info_dict['thumbnail'], compat_str(err)))
8222d8de
JMF
941
942 if not self.params.get('skip_download', False):
943 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
944 success = True
945 else:
946 try:
6350728b
JMF
947 def dl(name, info):
948 fd = get_suitable_downloader(info)(self, self.params)
949 for ph in self._progress_hooks:
950 fd.add_progress_hook(ph)
951 return fd.download(name, info)
952 if info_dict.get('requested_formats') is not None:
953 downloaded = []
954 success = True
58c3c7ae
JMF
955 merger = FFmpegMergerPP(self)
956 if not merger._get_executable():
957 postprocessors = []
958 self.report_warning('You have requested multiple '
959 'formats but ffmpeg or avconv are not installed.'
960 ' The formats won\'t be merged')
961 else:
962 postprocessors = [merger]
6350728b
JMF
963 for f in info_dict['requested_formats']:
964 new_info = dict(info_dict)
965 new_info.update(f)
966 fname = self.prepare_filename(new_info)
967 fname = prepend_extension(fname, 'f%s' % f['format_id'])
968 downloaded.append(fname)
969 partial_success = dl(fname, new_info)
970 success = success and partial_success
58c3c7ae 971 info_dict['__postprocessors'] = postprocessors
6350728b
JMF
972 info_dict['__files_to_merge'] = downloaded
973 else:
974 # Just a single file
975 success = dl(filename, info_dict)
8222d8de 976 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
6febd1c1 977 self.report_error('unable to download video data: %s' % str(err))
8222d8de 978 return
c40c6aaa
JMF
979 except (OSError, IOError) as err:
980 raise UnavailableVideoError(err)
8222d8de 981 except (ContentTooShortError, ) as err:
6febd1c1 982 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
8222d8de
JMF
983 return
984
985 if success:
986 try:
987 self.post_process(filename, info_dict)
988 except (PostProcessingError) as err:
6febd1c1 989 self.report_error('postprocessing: %s' % str(err))
8222d8de
JMF
990 return
991
c1c9a79c
PH
992 self.record_download_archive(info_dict)
993
8222d8de
JMF
994 def download(self, url_list):
995 """Download a given list of URLs."""
0c75c3fa
PH
996 if (len(url_list) > 1 and
997 '%' not in self.params['outtmpl']
998 and self.params.get('max_downloads') != 1):
8222d8de
JMF
999 raise SameFileError(self.params['outtmpl'])
1000
1001 for url in url_list:
1002 try:
1003 #It also downloads the videos
dca08720 1004 self.extract_info(url)
8222d8de 1005 except UnavailableVideoError:
6febd1c1 1006 self.report_error('unable to download video')
8222d8de 1007 except MaxDownloadsReached:
6febd1c1 1008 self.to_screen('[info] Maximum number of downloaded files reached.')
8222d8de
JMF
1009 raise
1010
1011 return self._download_retcode
1012
1dcc4c0c 1013 def download_with_info_file(self, info_filename):
395293a8 1014 with io.open(info_filename, 'r', encoding='utf-8') as f:
1dcc4c0c 1015 info = json.load(f)
d4943898
JMF
1016 try:
1017 self.process_ie_result(info, download=True)
1018 except DownloadError:
1019 webpage_url = info.get('webpage_url')
1020 if webpage_url is not None:
6febd1c1 1021 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
d4943898
JMF
1022 return self.download([webpage_url])
1023 else:
1024 raise
1025 return self._download_retcode
1dcc4c0c 1026
8222d8de
JMF
1027 def post_process(self, filename, ie_info):
1028 """Run all the postprocessors on the given file."""
1029 info = dict(ie_info)
1030 info['filepath'] = filename
1031 keep_video = None
6350728b
JMF
1032 pps_chain = []
1033 if ie_info.get('__postprocessors') is not None:
1034 pps_chain.extend(ie_info['__postprocessors'])
1035 pps_chain.extend(self._pps)
1036 for pp in pps_chain:
8222d8de 1037 try:
fe7e0c98 1038 keep_video_wish, new_info = pp.run(info)
8222d8de
JMF
1039 if keep_video_wish is not None:
1040 if keep_video_wish:
1041 keep_video = keep_video_wish
1042 elif keep_video is None:
1043 # No clear decision yet, let IE decide
1044 keep_video = keep_video_wish
1045 except PostProcessingError as e:
bbcbf4d4 1046 self.report_error(e.msg)
8222d8de
JMF
1047 if keep_video is False and not self.params.get('keepvideo', False):
1048 try:
6febd1c1 1049 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
8222d8de
JMF
1050 os.remove(encodeFilename(filename))
1051 except (IOError, OSError):
6febd1c1 1052 self.report_warning('Unable to remove downloaded video file')
c1c9a79c 1053
5db07df6
PH
1054 def _make_archive_id(self, info_dict):
1055 # Future-proof against any change in case
1056 # and backwards compatibility with prior versions
d31209a1 1057 extractor = info_dict.get('extractor_key')
7012b23c
PH
1058 if extractor is None:
1059 if 'id' in info_dict:
1060 extractor = info_dict.get('ie_key') # key in a playlist
1061 if extractor is None:
5db07df6 1062 return None # Incomplete video information
6febd1c1 1063 return extractor.lower() + ' ' + info_dict['id']
5db07df6
PH
1064
1065 def in_download_archive(self, info_dict):
1066 fn = self.params.get('download_archive')
1067 if fn is None:
1068 return False
1069
1070 vid_id = self._make_archive_id(info_dict)
1071 if vid_id is None:
7012b23c 1072 return False # Incomplete video information
5db07df6 1073
c1c9a79c
PH
1074 try:
1075 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1076 for line in archive_file:
1077 if line.strip() == vid_id:
1078 return True
1079 except IOError as ioe:
1080 if ioe.errno != errno.ENOENT:
1081 raise
1082 return False
1083
1084 def record_download_archive(self, info_dict):
1085 fn = self.params.get('download_archive')
1086 if fn is None:
1087 return
5db07df6
PH
1088 vid_id = self._make_archive_id(info_dict)
1089 assert vid_id
c1c9a79c 1090 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 1091 archive_file.write(vid_id + '\n')
dd82ffea 1092
8c51aa65 1093 @staticmethod
8abeeb94 1094 def format_resolution(format, default='unknown'):
fb04e403
PH
1095 if format.get('vcodec') == 'none':
1096 return 'audio only'
f49d89ee
PH
1097 if format.get('resolution') is not None:
1098 return format['resolution']
8c51aa65
JMF
1099 if format.get('height') is not None:
1100 if format.get('width') is not None:
6febd1c1 1101 res = '%sx%s' % (format['width'], format['height'])
8c51aa65 1102 else:
6febd1c1 1103 res = '%sp' % format['height']
f49d89ee 1104 elif format.get('width') is not None:
6febd1c1 1105 res = '?x%d' % format['width']
8c51aa65 1106 else:
8abeeb94 1107 res = default
8c51aa65
JMF
1108 return res
1109
dd82ffea 1110 def list_formats(self, info_dict):
91c7271a 1111 def format_note(fdict):
6febd1c1 1112 res = ''
1cdfc31e 1113 if fdict.get('ext') in ['f4f', 'f4m']:
6febd1c1 1114 res += '(unsupported) '
02dbf93f 1115 if fdict.get('format_note') is not None:
6febd1c1 1116 res += fdict['format_note'] + ' '
7217e148 1117 if fdict.get('tbr') is not None:
6febd1c1 1118 res += '%4dk ' % fdict['tbr']
1394ce65
PH
1119 if fdict.get('container') is not None:
1120 if res:
1121 res += ', '
1122 res += '%s container' % fdict['container']
fb04e403
PH
1123 if (fdict.get('vcodec') is not None and
1124 fdict.get('vcodec') != 'none'):
1394ce65
PH
1125 if res:
1126 res += ', '
1127 res += fdict['vcodec']
282962bd
PH
1128 if fdict.get('vbr') is not None:
1129 res += '@'
f49d89ee 1130 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
6febd1c1 1131 res += 'video@'
91c7271a 1132 if fdict.get('vbr') is not None:
6febd1c1 1133 res += '%4dk' % fdict['vbr']
91c7271a
PH
1134 if fdict.get('acodec') is not None:
1135 if res:
6febd1c1 1136 res += ', '
1394ce65
PH
1137 if fdict['acodec'] == 'none':
1138 res += 'video only'
1139 else:
1140 res += '%-5s' % fdict['acodec']
7150858d
PH
1141 elif fdict.get('abr') is not None:
1142 if res:
6febd1c1 1143 res += ', '
7150858d 1144 res += 'audio'
91c7271a 1145 if fdict.get('abr') is not None:
6febd1c1 1146 res += '@%3dk' % fdict['abr']
dd27fd17
PH
1147 if fdict.get('asr') is not None:
1148 res += ' (%5dHz)' % fdict['asr']
02dbf93f
PH
1149 if fdict.get('filesize') is not None:
1150 if res:
6febd1c1 1151 res += ', '
02dbf93f 1152 res += format_bytes(fdict['filesize'])
91c7271a
PH
1153 return res
1154
02dbf93f 1155 def line(format, idlen=20):
6febd1c1 1156 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
8c51aa65
JMF
1157 format['format_id'],
1158 format['ext'],
8c51aa65 1159 self.format_resolution(format),
91c7271a 1160 format_note(format),
02dbf93f 1161 ))
57dd9a8f 1162
94badb25 1163 formats = info_dict.get('formats', [info_dict])
6febd1c1 1164 idlen = max(len('format code'),
02dbf93f
PH
1165 max(len(f['format_id']) for f in formats))
1166 formats_s = [line(f, idlen) for f in formats]
94badb25 1167 if len(formats) > 1:
b5349e87
PH
1168 formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
1169 formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
57dd9a8f
PH
1170
1171 header_line = line({
6febd1c1
PH
1172 'format_id': 'format code', 'ext': 'extension',
1173 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1174 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1175 (info_dict['id'], header_line, '\n'.join(formats_s)))
dca08720
PH
1176
1177 def urlopen(self, req):
1178 """ Start an HTTP download """
19a41fc6 1179 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
1180
1181 def print_debug_header(self):
1182 if not self.params.get('verbose'):
1183 return
6febd1c1 1184 write_string('[debug] youtube-dl version ' + __version__ + '\n')
dca08720
PH
1185 try:
1186 sp = subprocess.Popen(
1187 ['git', 'rev-parse', '--short', 'HEAD'],
1188 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1189 cwd=os.path.dirname(os.path.abspath(__file__)))
1190 out, err = sp.communicate()
1191 out = out.decode().strip()
1192 if re.match('[0-9a-f]+', out):
6febd1c1 1193 write_string('[debug] Git HEAD: ' + out + '\n')
dca08720
PH
1194 except:
1195 try:
1196 sys.exc_clear()
1197 except:
1198 pass
6febd1c1
PH
1199 write_string('[debug] Python version %s - %s' %
1200 (platform.python_version(), platform_name()) + '\n')
dca08720
PH
1201
1202 proxy_map = {}
1203 for handler in self._opener.handlers:
1204 if hasattr(handler, 'proxies'):
1205 proxy_map.update(handler.proxies)
6febd1c1 1206 write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
dca08720 1207
e344693b 1208 def _setup_opener(self):
6ad14cab 1209 timeout_val = self.params.get('socket_timeout')
19a41fc6 1210 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
6ad14cab 1211
dca08720
PH
1212 opts_cookiefile = self.params.get('cookiefile')
1213 opts_proxy = self.params.get('proxy')
1214
1215 if opts_cookiefile is None:
1216 self.cookiejar = compat_cookiejar.CookieJar()
1217 else:
1218 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1219 opts_cookiefile)
1220 if os.access(opts_cookiefile, os.R_OK):
1221 self.cookiejar.load()
1222
1223 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1224 self.cookiejar)
1225 if opts_proxy is not None:
1226 if opts_proxy == '':
1227 proxies = {}
1228 else:
1229 proxies = {'http': opts_proxy, 'https': opts_proxy}
1230 else:
1231 proxies = compat_urllib_request.getproxies()
1232 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1233 if 'http' in proxies and 'https' not in proxies:
1234 proxies['https'] = proxies['http']
1235 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
a0ddb8a2
PH
1236
1237 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
dca08720 1238 https_handler = make_HTTPS_handler(
a0ddb8a2
PH
1239 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1240 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
dca08720 1241 opener = compat_urllib_request.build_opener(
a0ddb8a2 1242 https_handler, proxy_handler, cookie_processor, ydlh)
dca08720
PH
1243 # Delete the default user-agent header, which would otherwise apply in
1244 # cases where our custom HTTP handler doesn't come into play
1245 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1246 opener.addheaders = []
1247 self._opener = opener