Format selector `mergeall` to download and merge all formats

author pukkandan <redacted>

Sat, 10 Apr 2021 14:40:30 +0000 (20:10 +0530)

committer pukkandan <redacted>

Sat, 10 Apr 2021 15:27:27 +0000 (20:57 +0530)
author pukkandan <redacted>
Sat, 10 Apr 2021 14:40:30 +0000 (20:10 +0530)
committer pukkandan <redacted>
Sat, 10 Apr 2021 15:27:27 +0000 (20:57 +0530)
diff --git a/README.md b/README.md

index 5b9e4edd5e73750bd6563d1f6ea566b3109a4fb8..67c3c4923f4b244e0a084096dcbb65467838bd84 100644 (file)
--- a/README.md
+++ b/README.md
@@ -979,8 +979,9 @@ # FORMAT SELECTION
  You can also use special names to select particular edge case formats:
  
   - `all`: Select all formats
- - `b*`, `best*`: Select the best quality format irrespective of whether it contains video or audio.
- - `w*`, `worst*`: Select the worst quality format irrespective of whether it contains video or audio.
+ - `mergeall`: Select and merge all formats (Must be used with `--audio-multistreams`, `--video-multistreams` or both)
+ - `b*`, `best*`: Select the best quality format irrespective of whether it contains video or audio
+ - `w*`, `worst*`: Select the worst quality format irrespective of whether it contains video or audio
   - `b`, `best`: Select the best quality format that contains both video and audio. Equivalent to `best*[vcodec!=none][acodec!=none]`
   - `w`, `worst`: Select the worst quality format that contains both video and audio. Equivalent to `worst*[vcodec!=none][acodec!=none]`
   - `bv`, `bestvideo`: Select the best quality video-only format. Equivalent to `best*[acodec=none]`
@@ -1094,10 +1095,17 @@ # For this case, an output template should be used since
  # by default, bestvideo and bestaudio will have the same file name.
  $ yt-dlp -f 'bv,ba' -o '%(title)s.f%(format_id)s.%(ext)s'
  
+# Download and merge the best format that has a video stream,
+# and all audio-only formats into one file
+$ yt-dlp -f 'bv*+mergeall[vcodec=none]' --audio-multistreams
+
+# Download and merge the best format that has a video stream,
+# and the best 2 audio-only formats into one file
+$ yt-dlp -f 'bv*+ba+ba.2' --audio-multistreams
  
  
  # The following examples show the old method (without -S) of format selection
-# and how to use -S to achieve a similar but better result
+# and how to use -S to achieve a similar but (generally) better result
  
  # Download the worst video available (old method)
  $ yt-dlp -f 'wv*+wa/w'
@@ -1178,7 +1186,7 @@ # or the worst video (still prefering framerate greater than 30) if there is no
  $ yt-dlp -f '((bv*[fps>30]/bv*)[height<=720]/(wv*[fps>30]/wv*)) + ba / (b[fps>30]/b)[height<=720]/(w[fps>30]/w)'
  
  # Download the video with the largest resolution no better than 720p,
-# or the video with the smallest resolution available  if there is no such video,
+# or the video with the smallest resolution available if there is no such video,
  # prefering larger framerate for formats with the same resolution
  $ yt-dlp -S 'res:720,fps'
  
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py

index a77e1fe5e281a1e4664a6fc358668eb5de8cc724..249274fb60ab10236a89598076b843781161e750 100644 (file)
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -1541,6 +1541,66 @@ def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, ins
                  selectors.append(current_selector)
              return selectors
  
+        def _merge(formats_pair):
+            format_1, format_2 = formats_pair
+
+            formats_info = []
+            formats_info.extend(format_1.get('requested_formats', (format_1,)))
+            formats_info.extend(format_2.get('requested_formats', (format_2,)))
+
+            if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
+                get_no_more = {"video": False, "audio": False}
+                for (i, fmt_info) in enumerate(formats_info):
+                    for aud_vid in ["audio", "video"]:
+                        if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
+                            if get_no_more[aud_vid]:
+                                formats_info.pop(i)
+                            get_no_more[aud_vid] = True
+
+            if len(formats_info) == 1:
+                return formats_info[0]
+
+            video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
+            audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
+
+            the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
+            the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
+
+            output_ext = self.params.get('merge_output_format')
+            if not output_ext:
+                if the_only_video:
+                    output_ext = the_only_video['ext']
+                elif the_only_audio and not video_fmts:
+                    output_ext = the_only_audio['ext']
+                else:
+                    output_ext = 'mkv'
+
+            new_dict = {
+                'requested_formats': formats_info,
+                'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
+                'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
+                'ext': output_ext,
+            }
+
+            if the_only_video:
+                new_dict.update({
+                    'width': the_only_video.get('width'),
+                    'height': the_only_video.get('height'),
+                    'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
+                    'fps': the_only_video.get('fps'),
+                    'vcodec': the_only_video.get('vcodec'),
+                    'vbr': the_only_video.get('vbr'),
+                    'stretched_ratio': the_only_video.get('stretched_ratio'),
+                })
+
+            if the_only_audio:
+                new_dict.update({
+                    'acodec': the_only_audio.get('acodec'),
+                    'abr': the_only_audio.get('abr'),
+                })
+
+            return new_dict
+
          def _build_selector_function(selector):
              if isinstance(selector, list):  # ,
                  fs = [_build_selector_function(s) for s in selector]
@@ -1565,14 +1625,22 @@ def selector_function(ctx):
                      return []
  
              elif selector.type == SINGLE:  # atom
-                format_spec = selector.selector if selector.selector is not None else 'best'
+                format_spec = (selector.selector if selector.selector is not None else 'best').lower()
  
+                # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
                  if format_spec == 'all':
                      def selector_function(ctx):
                          formats = list(ctx['formats'])
                          if formats:
                              for f in formats:
                                  yield f
+                elif format_spec == 'mergeall':
+                    def selector_function(ctx):
+                        formats = list(ctx['formats'])
+                        merged_format = formats[0]
+                        for f in formats[1:]:
+                            merged_format = _merge((merged_format, f))
+                        yield merged_format
  
                  else:
                      format_fallback = False
@@ -1618,66 +1686,6 @@ def selector_function(ctx):
                                  yield formats[format_idx]
  
              elif selector.type == MERGE:        # +
-                def _merge(formats_pair):
-                    format_1, format_2 = formats_pair
-
-                    formats_info = []
-                    formats_info.extend(format_1.get('requested_formats', (format_1,)))
-                    formats_info.extend(format_2.get('requested_formats', (format_2,)))
-
-                    if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
-                        get_no_more = {"video": False, "audio": False}
-                        for (i, fmt_info) in enumerate(formats_info):
-                            for aud_vid in ["audio", "video"]:
-                                if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
-                                    if get_no_more[aud_vid]:
-                                        formats_info.pop(i)
-                                    get_no_more[aud_vid] = True
-
-                    if len(formats_info) == 1:
-                        return formats_info[0]
-
-                    video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
-                    audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
-
-                    the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
-                    the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
-
-                    output_ext = self.params.get('merge_output_format')
-                    if not output_ext:
-                        if the_only_video:
-                            output_ext = the_only_video['ext']
-                        elif the_only_audio and not video_fmts:
-                            output_ext = the_only_audio['ext']
-                        else:
-                            output_ext = 'mkv'
-
-                    new_dict = {
-                        'requested_formats': formats_info,
-                        'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
-                        'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
-                        'ext': output_ext,
-                    }
-
-                    if the_only_video:
-                        new_dict.update({
-                            'width': the_only_video.get('width'),
-                            'height': the_only_video.get('height'),
-                            'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
-                            'fps': the_only_video.get('fps'),
-                            'vcodec': the_only_video.get('vcodec'),
-                            'vbr': the_only_video.get('vbr'),
-                            'stretched_ratio': the_only_video.get('stretched_ratio'),
-                        })
-
-                    if the_only_audio:
-                        new_dict.update({
-                            'acodec': the_only_audio.get('acodec'),
-                            'abr': the_only_audio.get('abr'),
-                        })
-
-                    return new_dict
-
                  selector_1, selector_2 = map(_build_selector_function, selector.selector)
  
                  def selector_function(ctx):
author	pukkandan <redacted>
	Sat, 10 Apr 2021 14:40:30 +0000 (20:10 +0530)
committer	pukkandan <redacted>
	Sat, 10 Apr 2021 15:27:27 +0000 (20:57 +0530)
README.md		patch \| blob \| blame \| history
yt_dlp/YoutubeDL.py		patch \| blob \| blame \| history