2 # -*- coding: utf-8 -*-
4 from __future__
import absolute_import
28 compat_urllib_request
,
48 UnavailableVideoError
,
53 from .extractor
import get_info_extractor
, gen_extractors
54 from .FileDownloader
import FileDownloader
55 from .version
import __version__
58 class YoutubeDL(object):
61 YoutubeDL objects are the ones responsible of downloading the
62 actual video file and writing it to disk if the user has requested
63 it, among some other tasks. In most cases there should be one per
64 program. As, given a video URL, the downloader doesn't know how to
65 extract all the needed information, task that InfoExtractors do, it
66 has to pass the URL to one of them.
68 For this, YoutubeDL objects have a method that allows
69 InfoExtractors to be registered in a given order. When it is passed
70 a URL, the YoutubeDL object handles it to the first InfoExtractor it
71 finds that reports being able to handle it. The InfoExtractor extracts
72 all the information about the video or videos the URL refers to, and
73 YoutubeDL process the extracted information, possibly using a File
74 Downloader to download the video.
76 YoutubeDL objects accept a lot of parameters. In order not to saturate
77 the object constructor with arguments, it receives a dictionary of
78 options instead. These options are available through the params
79 attribute for the InfoExtractors to use. The YoutubeDL also
80 registers itself as the downloader in charge for the InfoExtractors
81 that are added to it, so this is a "mutual registration".
85 username: Username for authentication purposes.
86 password: Password for authentication purposes.
87 videopassword: Password for acces a video.
88 usenetrc: Use netrc for authentication instead.
89 verbose: Print additional info to stdout.
90 quiet: Do not print messages to stdout.
91 forceurl: Force printing final URL.
92 forcetitle: Force printing title.
93 forceid: Force printing ID.
94 forcethumbnail: Force printing thumbnail URL.
95 forcedescription: Force printing description.
96 forcefilename: Force printing final filename.
97 forcejson: Force printing info_dict as JSON.
98 simulate: Do not download the video files.
99 format: Video format code.
100 format_limit: Highest quality format to try.
101 outtmpl: Template for output names.
102 restrictfilenames: Do not allow "&" and spaces in file names
103 ignoreerrors: Do not stop on download errors.
104 nooverwrites: Prevent overwriting files.
105 playliststart: Playlist item to start at.
106 playlistend: Playlist item to end at.
107 matchtitle: Download only matching titles.
108 rejecttitle: Reject downloads for matching titles.
109 logger: Log messages to a logging.Logger instance.
110 logtostderr: Log messages to stderr instead of stdout.
111 writedescription: Write the video description to a .description file
112 writeinfojson: Write the video description to a .info.json file
113 writeannotations: Write the video annotations to a .annotations.xml file
114 writethumbnail: Write the thumbnail image to a file
115 writesubtitles: Write the video subtitles to a file
116 writeautomaticsub: Write the automatic subtitles to a file
117 allsubtitles: Downloads all the subtitles of the video
118 (requires writesubtitles or writeautomaticsub)
119 listsubtitles: Lists all available subtitles for the video
120 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
121 subtitleslangs: List of languages of the subtitles to download
122 keepvideo: Keep the video file after post-processing
123 daterange: A DateRange object, download only if the upload_date is in the range.
124 skip_download: Skip the actual download of the video file
125 cachedir: Location of the cache files in the filesystem.
126 None to disable filesystem cache.
127 noplaylist: Download single video instead of a playlist if in doubt.
128 age_limit: An integer representing the user's age in years.
129 Unsuitable videos for the given age are skipped.
130 download_archive: File name of a file where all downloads are recorded.
131 Videos already present in the file are not downloaded
133 cookiefile: File name where cookies should be read from and dumped to.
134 nocheckcertificate:Do not verify SSL certificates
135 proxy: URL of the proxy server to use
136 socket_timeout: Time to wait for unresponsive hosts, in seconds
137 bidi_workaround: Work around buggy terminals without bidirectional text
138 support, using fridibi
140 The following parameters are not used by YoutubeDL itself, they are used by
142 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
143 noresizebuffer, retries, continuedl, noprogress, consoletitle
149 _download_retcode
= None
150 _num_downloads
= None
153 def __init__(self
, params
=None):
154 """Create a FileDownloader object with the given options."""
156 self
._ies
_instances
= {}
158 self
._progress
_hooks
= []
159 self
._download
_retcode
= 0
160 self
._num
_downloads
= 0
161 self
._screen
_file
= [sys
.stdout
, sys
.stderr
][params
.get('logtostderr', False)]
162 self
._err
_file
= sys
.stderr
163 self
.params
= {} if params
is None else params
165 if params
.get('bidi_workaround', False):
168 master
, slave
= pty
.openpty()
169 width
= get_term_width()
173 width_args
= ['-w', str(width
)]
174 self
._fribidi
= subprocess
.Popen(
175 ['fribidi', '-c', 'UTF-8'] + width_args
,
176 stdin
=subprocess
.PIPE
,
178 stderr
=self
._err
_file
)
179 self
._fribidi
_channel
= os
.fdopen(master
, 'rb')
180 except OSError as ose
:
182 self
.report_warning(u
'Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
186 if (sys
.version_info
>= (3,) and sys
.platform
!= 'win32' and
187 sys
.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
188 and not params
['restrictfilenames']):
189 # On Python 3, the Unicode filesystem API will throw errors (#1474)
191 u
'Assuming --restrict-filenames since file system encoding '
192 u
'cannot encode all charactes. '
193 u
'Set the LC_ALL environment variable to fix this.')
194 self
.params
['restrictfilenames'] = True
196 self
.fd
= FileDownloader(self
, self
.params
)
198 if '%(stitle)s' in self
.params
.get('outtmpl', ''):
199 self
.report_warning(u
'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
203 def add_info_extractor(self
, ie
):
204 """Add an InfoExtractor object to the end of the list."""
206 self
._ies
_instances
[ie
.ie_key()] = ie
207 ie
.set_downloader(self
)
209 def get_info_extractor(self
, ie_key
):
211 Get an instance of an IE with name ie_key, it will try to get one from
212 the _ies list, if there's no instance it will create a new one and add
213 it to the extractor list.
215 ie
= self
._ies
_instances
.get(ie_key
)
217 ie
= get_info_extractor(ie_key
)()
218 self
.add_info_extractor(ie
)
221 def add_default_info_extractors(self
):
223 Add the InfoExtractors returned by gen_extractors to the end of the list
225 for ie
in gen_extractors():
226 self
.add_info_extractor(ie
)
228 def add_post_processor(self
, pp
):
229 """Add a PostProcessor object to the end of the chain."""
231 pp
.set_downloader(self
)
233 def _bidi_workaround(self
, message
):
234 if not hasattr(self
, '_fribidi_channel'):
237 assert type(message
) == type(u
'')
238 line_count
= message
.count(u
'\n') + 1
239 self
._fribidi
.stdin
.write((message
+ u
'\n').encode('utf-8'))
240 self
._fribidi
.stdin
.flush()
241 res
= u
''.join(self
._fribidi
_channel
.readline().decode('utf-8')
242 for _
in range(line_count
))
243 return res
[:-len(u
'\n')]
245 def to_screen(self
, message
, skip_eol
=False):
246 """Print message to stdout if not in quiet mode."""
247 return self
.to_stdout(message
, skip_eol
, check_quiet
=True)
249 def to_stdout(self
, message
, skip_eol
=False, check_quiet
=False):
250 """Print message to stdout if not in quiet mode."""
251 if self
.params
.get('logger'):
252 self
.params
['logger'].debug(message
)
253 elif not check_quiet
or not self
.params
.get('quiet', False):
254 message
= self
._bidi
_workaround
(message
)
255 terminator
= [u
'\n', u
''][skip_eol
]
256 output
= message
+ terminator
258 write_string(output
, self
._screen
_file
)
260 def to_stderr(self
, message
):
261 """Print message to stderr."""
262 assert type(message
) == type(u
'')
263 if self
.params
.get('logger'):
264 self
.params
['logger'].error(message
)
266 message
= self
._bidi
_workaround
(message
)
267 output
= message
+ u
'\n'
268 write_string(output
, self
._err
_file
)
270 def to_console_title(self
, message
):
271 if not self
.params
.get('consoletitle', False):
273 if os
.name
== 'nt' and ctypes
.windll
.kernel32
.GetConsoleWindow():
274 # c_wchar_p() might not be necessary if `message` is
275 # already of type unicode()
276 ctypes
.windll
.kernel32
.SetConsoleTitleW(ctypes
.c_wchar_p(message
))
277 elif 'TERM' in os
.environ
:
278 write_string(u
'\033]0;%s\007' % message
, self
._screen
_file
)
280 def save_console_title(self
):
281 if not self
.params
.get('consoletitle', False):
283 if 'TERM' in os
.environ
:
284 # Save the title on stack
285 write_string(u
'\033[22;0t', self
._screen
_file
)
287 def restore_console_title(self
):
288 if not self
.params
.get('consoletitle', False):
290 if 'TERM' in os
.environ
:
291 # Restore the title from stack
292 write_string(u
'\033[23;0t', self
._screen
_file
)
295 self
.save_console_title()
298 def __exit__(self
, *args
):
299 self
.restore_console_title()
301 if self
.params
.get('cookiefile') is not None:
302 self
.cookiejar
.save()
304 def trouble(self
, message
=None, tb
=None):
305 """Determine action to take when a download problem appears.
307 Depending on if the downloader has been configured to ignore
308 download errors or not, this method may throw an exception or
309 not when errors are found, after printing the message.
311 tb, if given, is additional traceback information.
313 if message
is not None:
314 self
.to_stderr(message
)
315 if self
.params
.get('verbose'):
317 if sys
.exc_info()[0]: # if .trouble has been called from an except block
319 if hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]:
320 tb
+= u
''.join(traceback
.format_exception(*sys
.exc_info()[1].exc_info
))
321 tb
+= compat_str(traceback
.format_exc())
323 tb_data
= traceback
.format_list(traceback
.extract_stack())
324 tb
= u
''.join(tb_data
)
326 if not self
.params
.get('ignoreerrors', False):
327 if sys
.exc_info()[0] and hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]:
328 exc_info
= sys
.exc_info()[1].exc_info
330 exc_info
= sys
.exc_info()
331 raise DownloadError(message
, exc_info
)
332 self
._download
_retcode
= 1
334 def report_warning(self
, message
):
336 Print the message to stderr, it will be prefixed with 'WARNING:'
337 If stderr is a tty file the 'WARNING:' will be colored
339 if self
._err
_file
.isatty() and os
.name
!= 'nt':
340 _msg_header
= u
'\033[0;33mWARNING:\033[0m'
342 _msg_header
= u
'WARNING:'
343 warning_message
= u
'%s %s' % (_msg_header
, message
)
344 self
.to_stderr(warning_message
)
346 def report_error(self
, message
, tb
=None):
348 Do the same as trouble, but prefixes the message with 'ERROR:', colored
349 in red if stderr is a tty file.
351 if self
._err
_file
.isatty() and os
.name
!= 'nt':
352 _msg_header
= u
'\033[0;31mERROR:\033[0m'
354 _msg_header
= u
'ERROR:'
355 error_message
= u
'%s %s' % (_msg_header
, message
)
356 self
.trouble(error_message
, tb
)
358 def report_writedescription(self
, descfn
):
359 """ Report that the description file is being written """
360 self
.to_screen(u
'[info] Writing video description to: ' + descfn
)
362 def report_writesubtitles(self
, sub_filename
):
363 """ Report that the subtitles file is being written """
364 self
.to_screen(u
'[info] Writing video subtitles to: ' + sub_filename
)
366 def report_writeinfojson(self
, infofn
):
367 """ Report that the metadata file has been written """
368 self
.to_screen(u
'[info] Video description metadata as JSON to: ' + infofn
)
370 def report_writeannotations(self
, annofn
):
371 """ Report that the annotations file has been written. """
372 self
.to_screen(u
'[info] Writing video annotations to: ' + annofn
)
374 def report_file_already_downloaded(self
, file_name
):
375 """Report file has already been fully downloaded."""
377 self
.to_screen(u
'[download] %s has already been downloaded' % file_name
)
378 except UnicodeEncodeError:
379 self
.to_screen(u
'[download] The file has already been downloaded')
381 def increment_downloads(self
):
382 """Increment the ordinal that assigns a number to each file."""
383 self
._num
_downloads
+= 1
385 def prepare_filename(self
, info_dict
):
386 """Generate the output filename."""
388 template_dict
= dict(info_dict
)
390 template_dict
['epoch'] = int(time
.time())
391 autonumber_size
= self
.params
.get('autonumber_size')
392 if autonumber_size
is None:
394 autonumber_templ
= u
'%0' + str(autonumber_size
) + u
'd'
395 template_dict
['autonumber'] = autonumber_templ
% self
._num
_downloads
396 if template_dict
.get('playlist_index') is not None:
397 template_dict
['playlist_index'] = u
'%05d' % template_dict
['playlist_index']
399 sanitize
= lambda k
, v
: sanitize_filename(
401 restricted
=self
.params
.get('restrictfilenames'),
403 template_dict
= dict((k
, sanitize(k
, v
))
404 for k
, v
in template_dict
.items()
406 template_dict
= collections
.defaultdict(lambda: u
'NA', template_dict
)
408 tmpl
= os
.path
.expanduser(self
.params
['outtmpl'])
409 filename
= tmpl
% template_dict
411 except ValueError as err
:
412 self
.report_error(u
'Error in output template: ' + str(err
) + u
' (encoding: ' + repr(preferredencoding()) + ')')
415 def _match_entry(self
, info_dict
):
416 """ Returns None iff the file should be downloaded """
418 if 'title' in info_dict
:
419 # This can happen when we're just evaluating the playlist
420 title
= info_dict
['title']
421 matchtitle
= self
.params
.get('matchtitle', False)
423 if not re
.search(matchtitle
, title
, re
.IGNORECASE
):
424 return u
'[download] "' + title
+ '" title did not match pattern "' + matchtitle
+ '"'
425 rejecttitle
= self
.params
.get('rejecttitle', False)
427 if re
.search(rejecttitle
, title
, re
.IGNORECASE
):
428 return u
'"' + title
+ '" title matched reject pattern "' + rejecttitle
+ '"'
429 date
= info_dict
.get('upload_date', None)
431 dateRange
= self
.params
.get('daterange', DateRange())
432 if date
not in dateRange
:
433 return u
'[download] %s upload date is not in range %s' % (date_from_str(date
).isoformat(), dateRange
)
434 age_limit
= self
.params
.get('age_limit')
435 if age_limit
is not None:
436 if age_limit
< info_dict
.get('age_limit', 0):
437 return u
'Skipping "' + title
+ '" because it is age restricted'
438 if self
.in_download_archive(info_dict
):
439 return (u
'%s has already been recorded in archive'
440 % info_dict
.get('title', info_dict
.get('id', u
'video')))
444 def add_extra_info(info_dict
, extra_info
):
445 '''Set the keys from extra_info in info dict if they are missing'''
446 for key
, value
in extra_info
.items():
447 info_dict
.setdefault(key
, value
)
449 def extract_info(self
, url
, download
=True, ie_key
=None, extra_info
={},
452 Returns a list with a dictionary for each video we find.
453 If 'download', also downloads the videos.
454 extra_info is a dict containing the extra values to add to each result
458 ies
= [self
.get_info_extractor(ie_key
)]
463 if not ie
.suitable(url
):
467 self
.report_warning(u
'The program functionality for this site has been marked as broken, '
468 u
'and will probably not work.')
471 ie_result
= ie
.extract(url
)
472 if ie_result
is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
474 if isinstance(ie_result
, list):
475 # Backwards compatibility: old IE result format
477 '_type': 'compat_list',
478 'entries': ie_result
,
480 self
.add_extra_info(ie_result
,
482 'extractor': ie
.IE_NAME
,
484 'extractor_key': ie
.ie_key(),
487 return self
.process_ie_result(ie_result
, download
, extra_info
)
490 except ExtractorError
as de
: # An error we somewhat expected
491 self
.report_error(compat_str(de
), de
.format_traceback())
493 except Exception as e
:
494 if self
.params
.get('ignoreerrors', False):
495 self
.report_error(compat_str(e
), tb
=compat_str(traceback
.format_exc()))
500 self
.report_error(u
'no suitable InfoExtractor: %s' % url
)
502 def process_ie_result(self
, ie_result
, download
=True, extra_info
={}):
504 Take the result of the ie(may be modified) and resolve all unresolved
505 references (URLs, playlist items).
507 It will also download the videos if 'download'.
508 Returns the resolved ie_result.
511 result_type
= ie_result
.get('_type', 'video') # If not given we suppose it's a video, support the default old system
512 if result_type
== 'video':
513 self
.add_extra_info(ie_result
, extra_info
)
514 return self
.process_video_result(ie_result
, download
=download
)
515 elif result_type
== 'url':
516 # We have to add extra_info to the results because it may be
517 # contained in a playlist
518 return self
.extract_info(ie_result
['url'],
520 ie_key
=ie_result
.get('ie_key'),
521 extra_info
=extra_info
)
522 elif result_type
== 'url_transparent':
523 # Use the information from the embedding page
524 info
= self
.extract_info(
525 ie_result
['url'], ie_key
=ie_result
.get('ie_key'),
526 extra_info
=extra_info
, download
=False, process
=False)
528 def make_result(embedded_info
):
529 new_result
= ie_result
.copy()
530 for f
in ('_type', 'url', 'ext', 'player_url', 'formats',
531 'entries', 'urlhandle', 'ie_key', 'duration',
532 'subtitles', 'annotations', 'format',
533 'thumbnail', 'thumbnails'):
536 if f
in embedded_info
:
537 new_result
[f
] = embedded_info
[f
]
539 new_result
= make_result(info
)
541 assert new_result
.get('_type') != 'url_transparent'
542 if new_result
.get('_type') == 'compat_list':
543 new_result
['entries'] = [
544 make_result(e
) for e
in new_result
['entries']]
546 return self
.process_ie_result(
547 new_result
, download
=download
, extra_info
=extra_info
)
548 elif result_type
== 'playlist':
549 # We process each entry in the playlist
550 playlist
= ie_result
.get('title', None) or ie_result
.get('id', None)
551 self
.to_screen(u
'[download] Downloading playlist: %s' % playlist
)
553 playlist_results
= []
555 n_all_entries
= len(ie_result
['entries'])
556 playliststart
= self
.params
.get('playliststart', 1) - 1
557 playlistend
= self
.params
.get('playlistend', -1)
559 if playlistend
== -1:
560 entries
= ie_result
['entries'][playliststart
:]
562 entries
= ie_result
['entries'][playliststart
:playlistend
]
564 n_entries
= len(entries
)
566 self
.to_screen(u
"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
567 (ie_result
['extractor'], playlist
, n_all_entries
, n_entries
))
569 for i
, entry
in enumerate(entries
, 1):
570 self
.to_screen(u
'[download] Downloading video #%s of %s' % (i
, n_entries
))
572 'playlist': playlist
,
573 'playlist_index': i
+ playliststart
,
574 'extractor': ie_result
['extractor'],
575 'webpage_url': ie_result
['webpage_url'],
576 'extractor_key': ie_result
['extractor_key'],
579 reason
= self
._match
_entry
(entry
)
580 if reason
is not None:
581 self
.to_screen(u
'[download] ' + reason
)
584 entry_result
= self
.process_ie_result(entry
,
587 playlist_results
.append(entry_result
)
588 ie_result
['entries'] = playlist_results
590 elif result_type
== 'compat_list':
592 self
.add_extra_info(r
,
594 'extractor': ie_result
['extractor'],
595 'webpage_url': ie_result
['webpage_url'],
596 'extractor_key': ie_result
['extractor_key'],
599 ie_result
['entries'] = [
600 self
.process_ie_result(_fixup(r
), download
, extra_info
)
601 for r
in ie_result
['entries']
605 raise Exception('Invalid result type: %s' % result_type
)
607 def select_format(self
, format_spec
, available_formats
):
608 if format_spec
== 'best' or format_spec
is None:
609 return available_formats
[-1]
610 elif format_spec
== 'worst':
611 return available_formats
[0]
613 extensions
= [u
'mp4', u
'flv', u
'webm', u
'3gp']
614 if format_spec
in extensions
:
615 filter_f
= lambda f
: f
['ext'] == format_spec
617 filter_f
= lambda f
: f
['format_id'] == format_spec
618 matches
= list(filter(filter_f
, available_formats
))
623 def process_video_result(self
, info_dict
, download
=True):
624 assert info_dict
.get('_type', 'video') == 'video'
626 if 'playlist' not in info_dict
:
627 # It isn't part of a playlist
628 info_dict
['playlist'] = None
629 info_dict
['playlist_index'] = None
631 # This extractors handle format selection themselves
632 if info_dict
['extractor'] in [u
'youtube', u
'Youku']:
634 self
.process_info(info_dict
)
637 # We now pick which formats have to be downloaded
638 if info_dict
.get('formats') is None:
639 # There's only one format available
640 formats
= [info_dict
]
642 formats
= info_dict
['formats']
644 # We check that all the formats have the format and format_id fields
645 for (i
, format
) in enumerate(formats
):
646 if format
.get('format_id') is None:
647 format
['format_id'] = compat_str(i
)
648 if format
.get('format') is None:
649 format
['format'] = u
'{id} - {res}{note}'.format(
650 id=format
['format_id'],
651 res
=self
.format_resolution(format
),
652 note
=u
' ({0})'.format(format
['format_note']) if format
.get('format_note') is not None else '',
654 # Automatically determine file extension if missing
655 if 'ext' not in format
:
656 format
['ext'] = determine_ext(format
['url'])
658 if self
.params
.get('listformats', None):
659 self
.list_formats(info_dict
)
662 format_limit
= self
.params
.get('format_limit', None)
664 formats
= list(takewhile_inclusive(
665 lambda f
: f
['format_id'] != format_limit
, formats
667 if self
.params
.get('prefer_free_formats'):
668 def _free_formats_key(f
):
670 ext_ord
= [u
'flv', u
'mp4', u
'webm'].index(f
['ext'])
673 # We only compare the extension if they have the same height and width
674 return (f
.get('height'), f
.get('width'), ext_ord
)
675 formats
= sorted(formats
, key
=_free_formats_key
)
677 req_format
= self
.params
.get('format', 'best')
678 if req_format
is None:
680 formats_to_download
= []
681 # The -1 is for supporting YoutubeIE
682 if req_format
in ('-1', 'all'):
683 formats_to_download
= formats
685 # We can accept formats requestd in the format: 34/5/best, we pick
686 # the first that is available, starting from left
687 req_formats
= req_format
.split('/')
688 for rf
in req_formats
:
689 selected_format
= self
.select_format(rf
, formats
)
690 if selected_format
is not None:
691 formats_to_download
= [selected_format
]
693 if not formats_to_download
:
694 raise ExtractorError(u
'requested format not available',
698 if len(formats_to_download
) > 1:
699 self
.to_screen(u
'[info] %s: downloading video in %s formats' % (info_dict
['id'], len(formats_to_download
)))
700 for format
in formats_to_download
:
701 new_info
= dict(info_dict
)
702 new_info
.update(format
)
703 self
.process_info(new_info
)
704 # We update the info dict with the best quality format (backwards compatibility)
705 info_dict
.update(formats_to_download
[-1])
708 def process_info(self
, info_dict
):
709 """Process a single resolved IE result."""
711 assert info_dict
.get('_type', 'video') == 'video'
712 #We increment the download the download count here to match the previous behaviour.
713 self
.increment_downloads()
715 info_dict
['fulltitle'] = info_dict
['title']
716 if len(info_dict
['title']) > 200:
717 info_dict
['title'] = info_dict
['title'][:197] + u
'...'
719 # Keep for backwards compatibility
720 info_dict
['stitle'] = info_dict
['title']
722 if not 'format' in info_dict
:
723 info_dict
['format'] = info_dict
['ext']
725 reason
= self
._match
_entry
(info_dict
)
726 if reason
is not None:
727 self
.to_screen(u
'[download] ' + reason
)
730 max_downloads
= self
.params
.get('max_downloads')
731 if max_downloads
is not None:
732 if self
._num
_downloads
> int(max_downloads
):
733 raise MaxDownloadsReached()
735 filename
= self
.prepare_filename(info_dict
)
738 if self
.params
.get('forcetitle', False):
739 self
.to_stdout(info_dict
['fulltitle'])
740 if self
.params
.get('forceid', False):
741 self
.to_stdout(info_dict
['id'])
742 if self
.params
.get('forceurl', False):
743 # For RTMP URLs, also include the playpath
744 self
.to_stdout(info_dict
['url'] + info_dict
.get('play_path', u
''))
745 if self
.params
.get('forcethumbnail', False) and info_dict
.get('thumbnail') is not None:
746 self
.to_stdout(info_dict
['thumbnail'])
747 if self
.params
.get('forcedescription', False) and info_dict
.get('description') is not None:
748 self
.to_stdout(info_dict
['description'])
749 if self
.params
.get('forcefilename', False) and filename
is not None:
750 self
.to_stdout(filename
)
751 if self
.params
.get('forceformat', False):
752 self
.to_stdout(info_dict
['format'])
753 if self
.params
.get('forcejson', False):
754 info_dict
['_filename'] = filename
755 self
.to_stdout(json
.dumps(info_dict
))
757 # Do nothing else if in simulate mode
758 if self
.params
.get('simulate', False):
765 dn
= os
.path
.dirname(encodeFilename(filename
))
766 if dn
!= '' and not os
.path
.exists(dn
):
768 except (OSError, IOError) as err
:
769 self
.report_error(u
'unable to create directory ' + compat_str(err
))
772 if self
.params
.get('writedescription', False):
774 descfn
= filename
+ u
'.description'
775 self
.report_writedescription(descfn
)
776 with io
.open(encodeFilename(descfn
), 'w', encoding
='utf-8') as descfile
:
777 descfile
.write(info_dict
['description'])
778 except (KeyError, TypeError):
779 self
.report_warning(u
'There\'s no description to write.')
780 except (OSError, IOError):
781 self
.report_error(u
'Cannot write description file ' + descfn
)
784 if self
.params
.get('writeannotations', False):
786 annofn
= filename
+ u
'.annotations.xml'
787 self
.report_writeannotations(annofn
)
788 with io
.open(encodeFilename(annofn
), 'w', encoding
='utf-8') as annofile
:
789 annofile
.write(info_dict
['annotations'])
790 except (KeyError, TypeError):
791 self
.report_warning(u
'There are no annotations to write.')
792 except (OSError, IOError):
793 self
.report_error(u
'Cannot write annotations file: ' + annofn
)
796 subtitles_are_requested
= any([self
.params
.get('writesubtitles', False),
797 self
.params
.get('writeautomaticsub')])
799 if subtitles_are_requested
and 'subtitles' in info_dict
and info_dict
['subtitles']:
800 # subtitles download errors are already managed as troubles in relevant IE
801 # that way it will silently go on when used with unsupporting IE
802 subtitles
= info_dict
['subtitles']
803 sub_format
= self
.params
.get('subtitlesformat', 'srt')
804 for sub_lang
in subtitles
.keys():
805 sub
= subtitles
[sub_lang
]
809 sub_filename
= subtitles_filename(filename
, sub_lang
, sub_format
)
810 self
.report_writesubtitles(sub_filename
)
811 with io
.open(encodeFilename(sub_filename
), 'w', encoding
='utf-8') as subfile
:
813 except (OSError, IOError):
814 self
.report_error(u
'Cannot write subtitles file ' + descfn
)
817 if self
.params
.get('writeinfojson', False):
818 infofn
= os
.path
.splitext(filename
)[0] + u
'.info.json'
819 self
.report_writeinfojson(infofn
)
821 json_info_dict
= dict((k
, v
) for k
, v
in info_dict
.items() if not k
in ['urlhandle'])
822 write_json_file(json_info_dict
, encodeFilename(infofn
))
823 except (OSError, IOError):
824 self
.report_error(u
'Cannot write metadata to JSON file ' + infofn
)
827 if self
.params
.get('writethumbnail', False):
828 if info_dict
.get('thumbnail') is not None:
829 thumb_format
= determine_ext(info_dict
['thumbnail'], u
'jpg')
830 thumb_filename
= os
.path
.splitext(filename
)[0] + u
'.' + thumb_format
831 self
.to_screen(u
'[%s] %s: Downloading thumbnail ...' %
832 (info_dict
['extractor'], info_dict
['id']))
834 uf
= compat_urllib_request
.urlopen(info_dict
['thumbnail'])
835 with open(thumb_filename
, 'wb') as thumbf
:
836 shutil
.copyfileobj(uf
, thumbf
)
837 self
.to_screen(u
'[%s] %s: Writing thumbnail to: %s' %
838 (info_dict
['extractor'], info_dict
['id'], thumb_filename
))
839 except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
:
840 self
.report_warning(u
'Unable to download thumbnail "%s": %s' %
841 (info_dict
['thumbnail'], compat_str(err
)))
843 if not self
.params
.get('skip_download', False):
844 if self
.params
.get('nooverwrites', False) and os
.path
.exists(encodeFilename(filename
)):
848 success
= self
.fd
._do
_download
(filename
, info_dict
)
849 except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
:
850 self
.report_error(u
'unable to download video data: %s' % str(err
))
852 except (OSError, IOError) as err
:
853 raise UnavailableVideoError(err
)
854 except (ContentTooShortError
, ) as err
:
855 self
.report_error(u
'content too short (expected %s bytes and served %s)' % (err
.expected
, err
.downloaded
))
860 self
.post_process(filename
, info_dict
)
861 except (PostProcessingError
) as err
:
862 self
.report_error(u
'postprocessing: %s' % str(err
))
865 self
.record_download_archive(info_dict
)
867 def download(self
, url_list
):
868 """Download a given list of URLs."""
869 if (len(url_list
) > 1 and
870 '%' not in self
.params
['outtmpl']
871 and self
.params
.get('max_downloads') != 1):
872 raise SameFileError(self
.params
['outtmpl'])
876 #It also downloads the videos
877 self
.extract_info(url
)
878 except UnavailableVideoError
:
879 self
.report_error(u
'unable to download video')
880 except MaxDownloadsReached
:
881 self
.to_screen(u
'[info] Maximum number of downloaded files reached.')
884 return self
._download
_retcode
886 def download_with_info_file(self
, info_filename
):
887 with io
.open(info_filename
, 'r', encoding
='utf-8') as f
:
890 self
.process_ie_result(info
, download
=True)
891 except DownloadError
:
892 webpage_url
= info
.get('webpage_url')
893 if webpage_url
is not None:
894 self
.report_warning(u
'The info failed to download, trying with "%s"' % webpage_url
)
895 return self
.download([webpage_url
])
898 return self
._download
_retcode
900 def post_process(self
, filename
, ie_info
):
901 """Run all the postprocessors on the given file."""
903 info
['filepath'] = filename
907 keep_video_wish
, new_info
= pp
.run(info
)
908 if keep_video_wish
is not None:
910 keep_video
= keep_video_wish
911 elif keep_video
is None:
912 # No clear decision yet, let IE decide
913 keep_video
= keep_video_wish
914 except PostProcessingError
as e
:
915 self
.report_error(e
.msg
)
916 if keep_video
is False and not self
.params
.get('keepvideo', False):
918 self
.to_screen(u
'Deleting original file %s (pass -k to keep)' % filename
)
919 os
.remove(encodeFilename(filename
))
920 except (IOError, OSError):
921 self
.report_warning(u
'Unable to remove downloaded video file')
923 def _make_archive_id(self
, info_dict
):
924 # Future-proof against any change in case
925 # and backwards compatibility with prior versions
926 extractor
= info_dict
.get('extractor_key')
927 if extractor
is None:
928 if 'id' in info_dict
:
929 extractor
= info_dict
.get('ie_key') # key in a playlist
930 if extractor
is None:
931 return None # Incomplete video information
932 return extractor
.lower() + u
' ' + info_dict
['id']
934 def in_download_archive(self
, info_dict
):
935 fn
= self
.params
.get('download_archive')
939 vid_id
= self
._make
_archive
_id
(info_dict
)
941 return False # Incomplete video information
944 with locked_file(fn
, 'r', encoding
='utf-8') as archive_file
:
945 for line
in archive_file
:
946 if line
.strip() == vid_id
:
948 except IOError as ioe
:
949 if ioe
.errno
!= errno
.ENOENT
:
953 def record_download_archive(self
, info_dict
):
954 fn
= self
.params
.get('download_archive')
957 vid_id
= self
._make
_archive
_id
(info_dict
)
959 with locked_file(fn
, 'a', encoding
='utf-8') as archive_file
:
960 archive_file
.write(vid_id
+ u
'\n')
963 def format_resolution(format
, default
='unknown'):
964 if format
.get('vcodec') == 'none':
966 if format
.get('_resolution') is not None:
967 return format
['_resolution']
968 if format
.get('height') is not None:
969 if format
.get('width') is not None:
970 res
= u
'%sx%s' % (format
['width'], format
['height'])
972 res
= u
'%sp' % format
['height']
977 def list_formats(self
, info_dict
):
978 def format_note(fdict
):
980 if fdict
.get('format_note') is not None:
981 res
+= fdict
['format_note'] + u
' '
982 if (fdict
.get('vcodec') is not None and
983 fdict
.get('vcodec') != 'none'):
984 res
+= u
'%-5s' % fdict
['vcodec']
985 elif fdict
.get('vbr') is not None:
987 if fdict
.get('vbr') is not None:
988 res
+= u
'@%4dk' % fdict
['vbr']
989 if fdict
.get('acodec') is not None:
992 res
+= u
'%-5s' % fdict
['acodec']
993 elif fdict
.get('abr') is not None:
997 if fdict
.get('abr') is not None:
998 res
+= u
'@%3dk' % fdict
['abr']
999 if fdict
.get('filesize') is not None:
1002 res
+= format_bytes(fdict
['filesize'])
1005 def line(format
, idlen
=20):
1006 return ((u
'%-' + compat_str(idlen
+ 1) + u
's%-10s%-12s%s') % (
1007 format
['format_id'],
1009 self
.format_resolution(format
),
1010 format_note(format
),
1013 formats
= info_dict
.get('formats', [info_dict
])
1014 idlen
= max(len(u
'format code'),
1015 max(len(f
['format_id']) for f
in formats
))
1016 formats_s
= [line(f
, idlen
) for f
in formats
]
1017 if len(formats
) > 1:
1018 formats_s
[0] += (' ' if format_note(formats
[0]) else '') + '(worst)'
1019 formats_s
[-1] += (' ' if format_note(formats
[-1]) else '') + '(best)'
1021 header_line
= line({
1022 'format_id': u
'format code', 'ext': u
'extension',
1023 '_resolution': u
'resolution', 'format_note': u
'note'}, idlen
=idlen
)
1024 self
.to_screen(u
'[info] Available formats for %s:\n%s\n%s' %
1025 (info_dict
['id'], header_line
, u
"\n".join(formats_s
)))
1027 def urlopen(self
, req
):
1028 """ Start an HTTP download """
1029 return self
._opener
.open(req
)
1031 def print_debug_header(self
):
1032 if not self
.params
.get('verbose'):
1034 write_string(u
'[debug] youtube-dl version ' + __version__
+ u
'\n')
1036 sp
= subprocess
.Popen(
1037 ['git', 'rev-parse', '--short', 'HEAD'],
1038 stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
,
1039 cwd
=os
.path
.dirname(os
.path
.abspath(__file__
)))
1040 out
, err
= sp
.communicate()
1041 out
= out
.decode().strip()
1042 if re
.match('[0-9a-f]+', out
):
1043 write_string(u
'[debug] Git HEAD: ' + out
+ u
'\n')
1049 write_string(u
'[debug] Python version %s - %s' %
1050 (platform
.python_version(), platform_name()) + u
'\n')
1053 for handler
in self
._opener
.handlers
:
1054 if hasattr(handler
, 'proxies'):
1055 proxy_map
.update(handler
.proxies
)
1056 write_string(u
'[debug] Proxy map: ' + compat_str(proxy_map
) + u
'\n')
1058 def _setup_opener(self
):
1059 timeout_val
= self
.params
.get('socket_timeout')
1060 timeout
= 600 if timeout_val
is None else float(timeout_val
)
1062 opts_cookiefile
= self
.params
.get('cookiefile')
1063 opts_proxy
= self
.params
.get('proxy')
1065 if opts_cookiefile
is None:
1066 self
.cookiejar
= compat_cookiejar
.CookieJar()
1068 self
.cookiejar
= compat_cookiejar
.MozillaCookieJar(
1070 if os
.access(opts_cookiefile
, os
.R_OK
):
1071 self
.cookiejar
.load()
1073 cookie_processor
= compat_urllib_request
.HTTPCookieProcessor(
1075 if opts_proxy
is not None:
1076 if opts_proxy
== '':
1079 proxies
= {'http': opts_proxy, 'https': opts_proxy}
1081 proxies
= compat_urllib_request
.getproxies()
1082 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1083 if 'http' in proxies
and 'https' not in proxies
:
1084 proxies
['https'] = proxies
['http']
1085 proxy_handler
= compat_urllib_request
.ProxyHandler(proxies
)
1086 https_handler
= make_HTTPS_handler(
1087 self
.params
.get('nocheckcertificate', False))
1088 opener
= compat_urllib_request
.build_opener(
1089 https_handler
, proxy_handler
, cookie_processor
, YoutubeDLHandler())
1090 # Delete the default user-agent header, which would otherwise apply in
1091 # cases where our custom HTTP handler doesn't come into play
1092 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1093 opener
.addheaders
= []
1094 self
._opener
= opener
1096 # TODO remove this global modification
1097 compat_urllib_request
.install_opener(opener
)
1098 socket
.setdefaulttimeout(timeout
)