]> jfr.im git - yt-dlp.git/blobdiff - youtube_dlc/YoutubeDL.py
Update to ytdl-2021.01.03
[yt-dlp.git] / youtube_dlc / YoutubeDL.py
index 0724f4dbcdee2c0323a8a6b123c67aff9a31f40a..715eaa7dc49ee8f126627969683d684a24687fde 100644 (file)
@@ -51,6 +51,9 @@
     DEFAULT_OUTTMPL,
     determine_ext,
     determine_protocol,
+    DOT_DESKTOP_LINK_TEMPLATE,
+    DOT_URL_LINK_TEMPLATE,
+    DOT_WEBLOC_LINK_TEMPLATE,
     DownloadError,
     encode_compat_str,
     encodeFilename,
     expand_path,
     ExtractorError,
     format_bytes,
+    format_field,
     formatSeconds,
     GeoRestrictedError,
     int_or_none,
+    iri_to_uri,
     ISO3166Utils,
     locked_file,
     make_HTTPS_handler,
@@ -84,6 +89,7 @@
     std_headers,
     str_or_none,
     subtitles_filename,
+    to_high_limit_path,
     UnavailableVideoError,
     url_basename,
     version_tuple,
@@ -161,10 +167,17 @@ class YoutubeDL(object):
     forcejson:         Force printing info_dict as JSON.
     dump_single_json:  Force printing the info_dict of the whole playlist
                        (or video) as a single JSON line.
+    force_write_download_archive: Force writing download archive regardless of
+                       'skip_download' or 'simulate'.
     simulate:          Do not download the video files.
-    format:            Video format code. See options.py for more information.
+    format:            Video format code. see "FORMAT SELECTION" for more details.
+    format_sort:       How to sort the video formats. see "Sorting Formats" for more details.
+    format_sort_force: Force the given format_sort. see "Sorting Formats" for more details.
+    allow_multiple_video_streams:   Allow multiple video streams to be merged into a single file
+    allow_multiple_audio_streams:   Allow multiple audio streams to be merged into a single file
     outtmpl:           Template for output names.
-    restrictfilenames: Do not allow "&" and spaces in file names
+    restrictfilenames: Do not allow "&" and spaces in file names.
+    trim_file_name:    Limit length of filename (extension excluded).
     ignoreerrors:      Do not stop on download errors.
     force_generic_extractor: Force downloader to use the generic extractor
     nooverwrites:      Prevent overwriting files.
@@ -182,6 +195,11 @@ class YoutubeDL(object):
     writeannotations:  Write the video annotations to a .annotations.xml file
     writethumbnail:    Write the thumbnail image to a file
     write_all_thumbnails:  Write all thumbnail formats to files
+    writelink:         Write an internet shortcut file, depending on the
+                       current platform (.url/.webloc/.desktop)
+    writeurllink:      Write a Windows internet shortcut file (.url)
+    writewebloclink:   Write a macOS internet shortcut file (.webloc)
+    writedesktoplink:  Write a Linux internet shortcut file (.desktop)
     writesubtitles:    Write the video subtitles to a file
     writeautomaticsub: Write the automatically generated subtitles to a file
     allsubtitles:      Downloads all the subtitles of the video
@@ -209,6 +227,8 @@ class YoutubeDL(object):
     download_archive:  File name of a file where all downloads are recorded.
                        Videos already present in the file are not downloaded
                        again.
+    break_on_existing: Stop the download process after attempting to download a file that's
+                       in the archive.
     cookiefile:        File name where cookies should be read from and dumped to.
     nocheckcertificate:Do not verify SSL certificates
     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
@@ -359,6 +379,22 @@ def __init__(self, params=None, auto_init=True):
         }
         self.params.update(params)
         self.cache = Cache(self)
+        self.archive = set()
+
+        """Preload the archive, if any is specified"""
+        def preload_download_archive(self):
+            fn = self.params.get('download_archive')
+            if fn is None:
+                return False
+            try:
+                with locked_file(fn, 'r', encoding='utf-8') as archive_file:
+                    for line in archive_file:
+                        self.archive.add(line.strip())
+            except IOError as ioe:
+                if ioe.errno != errno.ENOENT:
+                    raise
+                return False
+            return True
 
         def check_deprecated(param, option, suggestion):
             if self.params.get(param) is not None:
@@ -367,6 +403,11 @@ def check_deprecated(param, option, suggestion):
                 return True
             return False
 
+        if self.params.get('verbose'):
+            self.to_stdout('[debug] Loading archive file %r' % self.params.get('download_archive'))
+
+        preload_download_archive(self)
+
         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
             if self.params.get('geo_verification_proxy') is None:
                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
@@ -711,6 +752,16 @@ def prepare_filename(self, info_dict):
             # title "Hello $PATH", we don't want `$PATH` to be expanded.
             filename = expand_path(outtmpl).replace(sep, '') % template_dict
 
+            # https://github.com/blackjack4494/youtube-dlc/issues/85
+            trim_file_name = self.params.get('trim_file_name', False)
+            if trim_file_name:
+                fn_groups = filename.rsplit('.')
+                ext = fn_groups[-1]
+                sub_ext = ''
+                if len(fn_groups) > 2:
+                    sub_ext = fn_groups[-2]
+                filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
+
             # Temporary fix for #4787
             # 'Treat' all problem characters by passing filename through preferredencoding
             # to workaround encoding issues with subprocess on python2 @ Windows
@@ -722,7 +773,7 @@ def prepare_filename(self, info_dict):
             return None
 
     def _match_entry(self, info_dict, incomplete):
-        """ Returns None iff the file should be downloaded """
+        """ Returns None if the file should be downloaded """
 
         video_title = info_dict.get('title', info_dict.get('id', 'video'))
         if 'title' in info_dict:
@@ -769,7 +820,7 @@ def add_extra_info(info_dict, extra_info):
         for key, value in extra_info.items():
             info_dict.setdefault(key, value)
 
-    def extract_info(self, url, download=True, ie_key=None, extra_info={},
+    def extract_info(self, url, download=True, ie_key=None, info_dict=None, extra_info={},
                      process=True, force_generic_extractor=False):
         '''
         Returns a list with a dictionary for each video we find.
@@ -789,26 +840,30 @@ def extract_info(self, url, download=True, ie_key=None, extra_info={},
             if not ie.suitable(url):
                 continue
 
-            ie = self.get_info_extractor(ie.ie_key())
+            ie_key = ie.ie_key()
+            ie = self.get_info_extractor(ie_key)
             if not ie.working():
                 self.report_warning('The program functionality for this site has been marked as broken, '
                                     'and will probably not work.')
 
             try:
-                ie_result = ie.extract(url)
-                if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
-                    break
-                if isinstance(ie_result, list):
-                    # Backwards compatibility: old IE result format
-                    ie_result = {
-                        '_type': 'compat_list',
-                        'entries': ie_result,
-                    }
-                self.add_default_extra_info(ie_result, ie, url)
-                if process:
-                    return self.process_ie_result(ie_result, download, extra_info)
-                else:
-                    return ie_result
+                temp_id = ie.extract_id(url) if callable(getattr(ie, 'extract_id', None)) else ie._match_id(url)
+            except (AssertionError, IndexError, AttributeError):
+                temp_id = None
+            if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
+                self.to_screen("[%s] %s: has already been recorded in archive" % (
+                               ie_key, temp_id))
+                break
+
+            return self.__extract_info(url, ie, download, extra_info, process, info_dict)
+
+        else:
+            self.report_error('no suitable InfoExtractor for URL %s' % url)
+
+    def __handle_extraction_exceptions(func):
+        def wrapper(self, *args, **kwargs):
+            try:
+                return func(self, *args, **kwargs)
             except GeoRestrictedError as e:
                 msg = e.msg
                 if e.countries:
@@ -816,20 +871,38 @@ def extract_info(self, url, download=True, ie_key=None, extra_info={},
                         map(ISO3166Utils.short2full, e.countries))
                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
                 self.report_error(msg)
-                break
             except ExtractorError as e:  # An error we somewhat expected
                 self.report_error(compat_str(e), e.format_traceback())
-                break
             except MaxDownloadsReached:
                 raise
             except Exception as e:
                 if self.params.get('ignoreerrors', False):
                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
-                    break
                 else:
                     raise
+        return wrapper
+
+    @__handle_extraction_exceptions
+    def __extract_info(self, url, ie, download, extra_info, process, info_dict):
+        ie_result = ie.extract(url)
+        if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
+            return
+        if isinstance(ie_result, list):
+            # Backwards compatibility: old IE result format
+            ie_result = {
+                '_type': 'compat_list',
+                'entries': ie_result,
+            }
+        if info_dict:
+            if info_dict.get('id'):
+                ie_result['id'] = info_dict['id']
+            if info_dict.get('title'):
+                ie_result['title'] = info_dict['title']
+        self.add_default_extra_info(ie_result, ie, url)
+        if process:
+            return self.process_ie_result(ie_result, download, extra_info)
         else:
-            self.report_error('no suitable InfoExtractor for URL %s' % url)
+            return ie_result
 
     def add_default_extra_info(self, ie_result, ie, url):
         self.add_extra_info(ie_result, {
@@ -866,7 +939,7 @@ def process_ie_result(self, ie_result, download=True, extra_info={}):
             # We have to add extra_info to the results because it may be
             # contained in a playlist
             return self.extract_info(ie_result['url'],
-                                     download,
+                                     download, info_dict=ie_result,
                                      ie_key=ie_result.get('ie_key'),
                                      extra_info=extra_info)
         elif result_type == 'url_transparent':
@@ -1001,12 +1074,15 @@ def report_download(num_entries):
 
                 reason = self._match_entry(entry, incomplete=True)
                 if reason is not None:
-                    self.to_screen('[download] ' + reason)
-                    continue
+                    if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing'):
+                        print('[download] tried downloading a file that\'s already in the archive, stopping since --break-on-existing is set.')
+                        break
+                    else:
+                        self.to_screen('[download] ' + reason)
+                        continue
 
-                entry_result = self.process_ie_result(entry,
-                                                      download=download,
-                                                      extra_info=extra)
+                entry_result = self.__process_iterable_entry(entry, download, extra)
+                # TODO: skip failed (empty) entries?
                 playlist_results.append(entry_result)
             ie_result['entries'] = playlist_results
             self.to_screen('[download] Finished downloading playlist: %s' % playlist)
@@ -1035,6 +1111,11 @@ def _fixup(r):
         else:
             raise Exception('Invalid result type: %s' % result_type)
 
+    @__handle_extraction_exceptions
+    def __process_iterable_entry(self, entry, download, extra_info):
+        return self.process_ie_result(
+            entry, download=download, extra_info=extra_info)
+
     def _build_format_filter(self, filter_spec):
         " Returns a function to filter the formats according to the filter_spec "
 
@@ -1074,7 +1155,7 @@ def _build_format_filter(self, filter_spec):
                 '*=': lambda attr, value: value in attr,
             }
             str_operator_rex = re.compile(r'''(?x)
-                \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
+                \s*(?P<key>[a-zA-Z0-9._-]+)
                 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
                 \s*(?P<value>[a-zA-Z0-9._-]+)
                 \s*$
@@ -1135,6 +1216,9 @@ def syntax_error(note, start):
         GROUP = 'GROUP'
         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
 
+        allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', True),
+                                  'video': self.params.get('allow_multiple_video_streams', True)}
+
         def _parse_filter(tokens):
             filter_parts = []
             for type, string, start, _, _ in tokens:
@@ -1217,11 +1301,13 @@ def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, ins
                         group = _parse_format_selection(tokens, inside_group=True)
                         current_selector = FormatSelector(GROUP, group, [])
                     elif string == '+':
-                        video_selector = current_selector
-                        audio_selector = _parse_format_selection(tokens, inside_merge=True)
-                        if not video_selector or not audio_selector:
-                            raise syntax_error('"+" must be between two format selectors', start)
-                        current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
+                        if not current_selector:
+                            raise syntax_error('Unexpected "+"', start)
+                        selector_1 = current_selector
+                        selector_2 = _parse_format_selection(tokens, inside_merge=True)
+                        if not selector_2:
+                            raise syntax_error('Expected a selector', start)
+                        current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
                     else:
                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
                 elif type == tokenize.ENDMARKER:
@@ -1231,7 +1317,7 @@ def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, ins
             return selectors
 
         def _build_selector_function(selector):
-            if isinstance(selector, list):
+            if isinstance(selector, list):  # ,
                 fs = [_build_selector_function(s) for s in selector]
 
                 def selector_function(ctx):
@@ -1239,9 +1325,11 @@ def selector_function(ctx):
                         for format in f(ctx):
                             yield format
                 return selector_function
-            elif selector.type == GROUP:
+
+            elif selector.type == GROUP:  # ()
                 selector_function = _build_selector_function(selector.selector)
-            elif selector.type == PICKFIRST:
+
+            elif selector.type == PICKFIRST:  # /
                 fs = [_build_selector_function(s) for s in selector.selector]
 
                 def selector_function(ctx):
@@ -1250,103 +1338,119 @@ def selector_function(ctx):
                         if picked_formats:
                             return picked_formats
                     return []
-            elif selector.type == SINGLE:
-                format_spec = selector.selector
 
-                def selector_function(ctx):
-                    formats = list(ctx['formats'])
-                    if not formats:
-                        return
-                    if format_spec == 'all':
-                        for f in formats:
-                            yield f
-                    elif format_spec in ['best', 'worst', None]:
-                        format_idx = 0 if format_spec == 'worst' else -1
-                        audiovideo_formats = [
-                            f for f in formats
-                            if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
-                        if audiovideo_formats:
-                            yield audiovideo_formats[format_idx]
-                        # for extractors with incomplete formats (audio only (soundcloud)
-                        # or video only (imgur)) we will fallback to best/worst
-                        # {video,audio}-only format
-                        elif ctx['incomplete_formats']:
-                            yield formats[format_idx]
-                    elif format_spec == 'bestaudio':
-                        audio_formats = [
-                            f for f in formats
-                            if f.get('vcodec') == 'none']
-                        if audio_formats:
-                            yield audio_formats[-1]
-                    elif format_spec == 'worstaudio':
-                        audio_formats = [
-                            f for f in formats
-                            if f.get('vcodec') == 'none']
-                        if audio_formats:
-                            yield audio_formats[0]
-                    elif format_spec == 'bestvideo':
-                        video_formats = [
-                            f for f in formats
-                            if f.get('acodec') == 'none']
-                        if video_formats:
-                            yield video_formats[-1]
-                    elif format_spec == 'worstvideo':
-                        video_formats = [
-                            f for f in formats
-                            if f.get('acodec') == 'none']
-                        if video_formats:
-                            yield video_formats[0]
+            elif selector.type == SINGLE:  # atom
+                format_spec = selector.selector if selector.selector is not None else 'best'
+
+                if format_spec == 'all':
+                    def selector_function(ctx):
+                        formats = list(ctx['formats'])
+                        if formats:
+                            for f in formats:
+                                yield f
+
+                else:
+                    format_fallback = False
+                    format_spec_obj = re.match(r'(best|worst|b|w)(video|audio|v|a)?(\*)?$', format_spec)
+                    if format_spec_obj is not None:
+                        format_idx = 0 if format_spec_obj.group(1)[0] == 'w' else -1
+                        format_type = format_spec_obj.group(2)[0] if format_spec_obj.group(2) else False
+                        not_format_type = 'v' if format_type == 'a' else 'a'
+                        format_modified = format_spec_obj.group(3) is not None
+
+                        format_fallback = not format_type and not format_modified  # for b, w
+                        filter_f = ((lambda f: f.get(format_type + 'codec') != 'none')
+                                    if format_type and format_modified  # bv*, ba*, wv*, wa*
+                                    else (lambda f: f.get(not_format_type + 'codec') == 'none')
+                                    if format_type  # bv, ba, wv, wa
+                                    else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
+                                    if not format_modified  # b, w
+                                    else None)  # b*, w*
                     else:
-                        extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
-                        if format_spec in extensions:
-                            filter_f = lambda f: f['ext'] == format_spec
-                        else:
-                            filter_f = lambda f: f['format_id'] == format_spec
-                        matches = list(filter(filter_f, formats))
+                        format_idx = -1
+                        filter_f = ((lambda f: f.get('ext') == format_spec)
+                                    if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']  # extension
+                                    else (lambda f: f.get('format_id') == format_spec))  # id
+
+                    def selector_function(ctx):
+                        formats = list(ctx['formats'])
+                        if not formats:
+                            return
+                        matches = list(filter(filter_f, formats)) if filter_f is not None else formats
                         if matches:
-                            yield matches[-1]
-            elif selector.type == MERGE:
-                def _merge(formats_info):
-                    format_1, format_2 = [f['format_id'] for f in formats_info]
-                    # The first format must contain the video and the
-                    # second the audio
-                    if formats_info[0].get('vcodec') == 'none':
-                        self.report_error('The first format must '
-                                          'contain the video, try using '
-                                          '"-f %s+%s"' % (format_2, format_1))
-                        return
-                    # Formats must be opposite (video+audio)
-                    if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none':
-                        self.report_error(
-                            'Both formats %s and %s are video-only, you must specify "-f video+audio"'
-                            % (format_1, format_2))
-                        return
-                    output_ext = (
-                        formats_info[0]['ext']
-                        if self.params.get('merge_output_format') is None
-                        else self.params['merge_output_format'])
-                    return {
+                            yield matches[format_idx]
+                        elif format_fallback == 'force' or (format_fallback and ctx['incomplete_formats']):
+                            # for extractors with incomplete formats (audio only (soundcloud)
+                            # or video only (imgur)) best/worst will fallback to
+                            # best/worst {video,audio}-only format
+                            yield formats[format_idx]
+
+            elif selector.type == MERGE:        # +
+                def _merge(formats_pair):
+                    format_1, format_2 = formats_pair
+
+                    formats_info = []
+                    formats_info.extend(format_1.get('requested_formats', (format_1,)))
+                    formats_info.extend(format_2.get('requested_formats', (format_2,)))
+
+                    if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
+                        get_no_more = {"video": False, "audio": False}
+                        for (i, fmt_info) in enumerate(formats_info):
+                            for aud_vid in ["audio", "video"]:
+                                if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
+                                    if get_no_more[aud_vid]:
+                                        formats_info.pop(i)
+                                    get_no_more[aud_vid] = True
+
+                    if len(formats_info) == 1:
+                        return formats_info[0]
+
+                    video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
+                    audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
+
+                    the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
+                    the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
+
+                    output_ext = self.params.get('merge_output_format')
+                    if not output_ext:
+                        if the_only_video:
+                            output_ext = the_only_video['ext']
+                        elif the_only_audio and not video_fmts:
+                            output_ext = the_only_audio['ext']
+                        else:
+                            output_ext = 'mkv'
+
+                    new_dict = {
                         'requested_formats': formats_info,
-                        'format': '%s+%s' % (formats_info[0].get('format'),
-                                             formats_info[1].get('format')),
-                        'format_id': '%s+%s' % (formats_info[0].get('format_id'),
-                                                formats_info[1].get('format_id')),
-                        'width': formats_info[0].get('width'),
-                        'height': formats_info[0].get('height'),
-                        'resolution': formats_info[0].get('resolution'),
-                        'fps': formats_info[0].get('fps'),
-                        'vcodec': formats_info[0].get('vcodec'),
-                        'vbr': formats_info[0].get('vbr'),
-                        'stretched_ratio': formats_info[0].get('stretched_ratio'),
-                        'acodec': formats_info[1].get('acodec'),
-                        'abr': formats_info[1].get('abr'),
+                        'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
+                        'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
                         'ext': output_ext,
                     }
-                video_selector, audio_selector = map(_build_selector_function, selector.selector)
+
+                    if the_only_video:
+                        new_dict.update({
+                            'width': the_only_video.get('width'),
+                            'height': the_only_video.get('height'),
+                            'resolution': the_only_video.get('resolution'),
+                            'fps': the_only_video.get('fps'),
+                            'vcodec': the_only_video.get('vcodec'),
+                            'vbr': the_only_video.get('vbr'),
+                            'stretched_ratio': the_only_video.get('stretched_ratio'),
+                        })
+
+                    if the_only_audio:
+                        new_dict.update({
+                            'acodec': the_only_audio.get('acodec'),
+                            'abr': the_only_audio.get('abr'),
+                        })
+
+                    return new_dict
+
+                selector_1, selector_2 = map(_build_selector_function, selector.selector)
 
                 def selector_function(ctx):
                     for pair in itertools.product(
-                            video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))):
+                            selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
                         yield _merge(pair)
 
             filters = [self._build_format_filter(f) for f in selector.filters]
@@ -1601,7 +1705,7 @@ def is_wellformed(f):
         if req_format is None:
             req_format = self._default_format_spec(info_dict, download=download)
             if self.params.get('verbose'):
-                self.to_stdout('[debug] Default format spec: %s' % req_format)
+                self._write_string('[debug] Default format spec: %s\n' % req_format)
 
         format_selector = self.build_format_selector(req_format)
 
@@ -1637,6 +1741,7 @@ def is_wellformed(f):
                                  expected=True)
 
         if download:
+            self.to_screen('[info] Downloading format(s) %s' % ", ".join([f['format_id'] for f in formats_to_download]))
             if len(formats_to_download) > 1:
                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
             for format in formats_to_download:
@@ -1754,8 +1859,11 @@ def process_info(self, info_dict):
         # Forced printings
         self.__forced_printings(info_dict, filename, incomplete=False)
 
-        # Do nothing else if in simulate mode
         if self.params.get('simulate', False):
+            if self.params.get('force_write_download_archive', False):
+                self.record_download_archive(info_dict)
+
+            # Do nothing else if in simulate mode
             return
 
         if filename is None:
@@ -1806,13 +1914,13 @@ def ensure_dir_exists(path):
                     self.report_error('Cannot write annotations file: ' + annofn)
                     return
 
-        def dl(name, info):
+        def dl(name, info, subtitle=False):
             fd = get_suitable_downloader(info, self.params)(self, self.params)
             for ph in self._progress_hooks:
                 fd.add_progress_hook(ph)
             if self.params.get('verbose'):
-                self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
-            return fd.download(name, info)
+                self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
+            return fd.download(name, info, subtitle)
 
         subtitles_are_requested = any([self.params.get('writesubtitles', False),
                                        self.params.get('writeautomaticsub')])
@@ -1821,12 +1929,14 @@ def dl(name, info):
             # subtitles download errors are already managed as troubles in relevant IE
             # that way it will silently go on when used with unsupporting IE
             subtitles = info_dict['requested_subtitles']
+            # ie = self.get_info_extractor(info_dict['extractor_key'])
             for sub_lang, sub_info in subtitles.items():
                 sub_format = sub_info['ext']
                 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
                 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
                     self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
                 else:
+                    self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
                     if sub_info.get('data') is not None:
                         try:
                             # Use newline='' to prevent conversion of newline characters
@@ -1838,11 +1948,17 @@ def dl(name, info):
                             return
                     else:
                         try:
-                            dl(sub_filename, sub_info)
-                        except (ExtractorError, IOError, OSError, ValueError,
-                                compat_urllib_error.URLError,
-                                compat_http_client.HTTPException,
-                                socket.error) as err:
+                            dl(sub_filename, sub_info, subtitle=True)
+                            '''
+                            if self.params.get('sleep_interval_subtitles', False):
+                                dl(sub_filename, sub_info)
+                            else:
+                                sub_data = ie._request_webpage(
+                                    sub_info['url'], info_dict['id'], note=False).read()
+                                with io.open(encodeFilename(sub_filename), 'wb') as subfile:
+                                    subfile.write(sub_data)
+                            '''
+                        except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
                             self.report_warning('Unable to download subtitle for "%s": %s' %
                                                 (sub_lang, error_to_compat_str(err)))
                             continue
@@ -1860,9 +1976,9 @@ def dl(name, info):
                     info_dict.setdefault('__postprocessors', [])
                     # info_dict['__postprocessors'].append(subconv)
                 if os.path.exists(encodeFilename(afilename)):
-                        self.to_screen(
-                            '[download] %s has already been downloaded and '
-                            'converted' % afilename)
+                    self.to_screen(
+                        '[download] %s has already been downloaded and '
+                        'converted' % afilename)
                 else:
                     try:
                         self.post_process(filename, info_dict)
@@ -1884,6 +2000,57 @@ def dl(name, info):
 
         self._write_thumbnails(info_dict, filename)
 
+        # Write internet shortcut files
+        url_link = webloc_link = desktop_link = False
+        if self.params.get('writelink', False):
+            if sys.platform == "darwin":  # macOS.
+                webloc_link = True
+            elif sys.platform.startswith("linux"):
+                desktop_link = True
+            else:  # if sys.platform in ['win32', 'cygwin']:
+                url_link = True
+        if self.params.get('writeurllink', False):
+            url_link = True
+        if self.params.get('writewebloclink', False):
+            webloc_link = True
+        if self.params.get('writedesktoplink', False):
+            desktop_link = True
+
+        if url_link or webloc_link or desktop_link:
+            if 'webpage_url' not in info_dict:
+                self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
+                return
+            ascii_url = iri_to_uri(info_dict['webpage_url'])
+
+        def _write_link_file(extension, template, newline, embed_filename):
+            linkfn = replace_extension(filename, extension, info_dict.get('ext'))
+            if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(linkfn)):
+                self.to_screen('[info] Internet shortcut is already present')
+            else:
+                try:
+                    self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
+                    with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
+                        template_vars = {'url': ascii_url}
+                        if embed_filename:
+                            template_vars['filename'] = linkfn[:-(len(extension) + 1)]
+                        linkfile.write(template % template_vars)
+                except (OSError, IOError):
+                    self.report_error('Cannot write internet shortcut ' + linkfn)
+                    return False
+            return True
+
+        if url_link:
+            if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
+                return
+        if webloc_link:
+            if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
+                return
+        if desktop_link:
+            if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
+                return
+
+        # Download
+        must_record_download_archive = False
         if not self.params.get('skip_download', False):
             try:
                 if info_dict.get('requested_formats') is not None:
@@ -1899,17 +2066,21 @@ def dl(name, info):
                         postprocessors = [merger]
 
                     def compatible_formats(formats):
-                        video, audio = formats
+                        # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
+                        video_formats = [format for format in formats if format.get('vcodec') != 'none']
+                        audio_formats = [format for format in formats if format.get('acodec') != 'none']
+                        if len(video_formats) > 2 or len(audio_formats) > 2:
+                            return False
+
                         # Check extension
-                        video_ext, audio_ext = video.get('ext'), audio.get('ext')
-                        if video_ext and audio_ext:
-                            COMPATIBLE_EXTS = (
-                                ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'),
-                                ('webm')
-                            )
-                            for exts in COMPATIBLE_EXTS:
-                                if video_ext in exts and audio_ext in exts:
-                                    return True
+                        exts = set(format.get('ext') for format in formats)
+                        COMPATIBLE_EXTS = (
+                            set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
+                            set(('webm',)),
+                        )
+                        for ext_sets in COMPATIBLE_EXTS:
+                            if ext_sets.issuperset(exts):
+                                return True
                         # TODO: Check acodec/vcodec
                         return False
 
@@ -1939,13 +2110,16 @@ def compatible_formats(formats):
                             if not ensure_dir_exists(fname):
                                 return
                             downloaded.append(fname)
-                            partial_success = dl(fname, new_info)
+                            partial_success, real_download = dl(fname, new_info)
                             success = success and partial_success
                         info_dict['__postprocessors'] = postprocessors
                         info_dict['__files_to_merge'] = downloaded
+                        # Even if there were no downloads, it is being merged only now
+                        info_dict['__real_download'] = True
                 else:
                     # Just a single file
-                    success = dl(filename, info_dict)
+                    success, real_download = dl(filename, info_dict)
+                    info_dict['__real_download'] = real_download
             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
                 return
@@ -2023,7 +2197,10 @@ def compatible_formats(formats):
                 except (PostProcessingError) as err:
                     self.report_error('postprocessing: %s' % str(err))
                     return
-                self.record_download_archive(info_dict)
+                must_record_download_archive = True
+
+        if must_record_download_archive or self.params.get('force_write_download_archive', False):
+            self.record_download_archive(info_dict)
 
     def download(self, url_list):
         """Download a given list of URLs."""
@@ -2088,7 +2265,7 @@ def post_process(self, filename, ie_info):
             except PostProcessingError as e:
                 self.report_error(e.msg)
             if files_to_delete and not self.params.get('keepvideo', False):
-                for old_filename in files_to_delete:
+                for old_filename in set(files_to_delete):
                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
                     try:
                         os.remove(encodeFilename(old_filename))
@@ -2124,15 +2301,7 @@ def in_download_archive(self, info_dict):
         if not vid_id:
             return False  # Incomplete video information
 
-        try:
-            with locked_file(fn, 'r', encoding='utf-8') as archive_file:
-                for line in archive_file:
-                    if line.strip() == vid_id:
-                        return True
-        except IOError as ioe:
-            if ioe.errno != errno.ENOENT:
-                raise
-        return False
+        return vid_id in self.archive
 
     def record_download_archive(self, info_dict):
         fn = self.params.get('download_archive')
@@ -2142,6 +2311,7 @@ def record_download_archive(self, info_dict):
         assert vid_id
         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
             archive_file.write(vid_id + '\n')
+        self.archive.add(vid_id)
 
     @staticmethod
     def format_resolution(format, default='unknown'):
@@ -2216,19 +2386,62 @@ def _format_note(self, fdict):
             res += '~' + format_bytes(fdict['filesize_approx'])
         return res
 
+    def _format_note_table(self, f):
+        def join_fields(*vargs):
+            return ', '.join((val for val in vargs if val != ''))
+
+        return join_fields(
+            'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
+            format_field(f, 'language', '[%s]'),
+            format_field(f, 'format_note'),
+            format_field(f, 'container', ignore=(None, f.get('ext'))),
+            format_field(f, 'asr', '%5dHz'))
+
     def list_formats(self, info_dict):
         formats = info_dict.get('formats', [info_dict])
-        table = [
-            [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
-            for f in formats
-            if f.get('preference') is None or f['preference'] >= -1000]
-        if len(formats) > 1:
-            table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
-
-        header_line = ['format code', 'extension', 'resolution', 'note']
+        new_format = self.params.get('listformats_table', False)
+        if new_format:
+            table = [
+                [
+                    format_field(f, 'format_id'),
+                    format_field(f, 'ext'),
+                    self.format_resolution(f),
+                    format_field(f, 'fps', '%d'),
+                    '|',
+                    format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
+                    format_field(f, 'tbr', '%4dk'),
+                    f.get('protocol').replace('http_dash_segments', 'dash').replace("native", "n"),
+                    '|',
+                    format_field(f, 'vcodec', default='unknown').replace('none', ''),
+                    format_field(f, 'vbr', '%4dk'),
+                    format_field(f, 'acodec', default='unknown').replace('none', ''),
+                    format_field(f, 'abr', '%3dk'),
+                    format_field(f, 'asr', '%5dHz'),
+                    self._format_note_table(f)]
+                for f in formats
+                if f.get('preference') is None or f['preference'] >= -1000]
+            header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', '  TBR', 'PROTO',
+                           '|', 'VCODEC', '  VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']
+        else:
+            table = [
+                [
+                    format_field(f, 'format_id'),
+                    format_field(f, 'ext'),
+                    self.format_resolution(f),
+                    self._format_note(f)]
+                for f in formats
+                if f.get('preference') is None or f['preference'] >= -1000]
+            header_line = ['format code', 'extension', 'resolution', 'note']
+
+        # if len(formats) > 1:
+        #     table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
         self.to_screen(
-            '[info] Available formats for %s:\n%s' %
-            (info_dict['id'], render_table(header_line, table)))
+            '[info] Available formats for %s:\n%s' % (info_dict['id'], render_table(
+                header_line,
+                table,
+                delim=new_format,
+                extraGap=(0 if new_format else 1),
+                hideEmpty=new_format)))
 
     def list_thumbnails(self, info_dict):
         thumbnails = info_dict.get('thumbnails')
@@ -2422,7 +2635,7 @@ def _write_thumbnails(self, info_dict, filename):
             thumb_ext = determine_ext(t['url'], 'jpg')
             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
-            t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
+            t['filename'] = thumb_filename = replace_extension(filename + suffix, thumb_ext, info_dict.get('ext'))
 
             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
                 self.to_screen('[%s] %s: Thumbnail %sis already present' %