]> jfr.im git - yt-dlp.git/commitdiff
[cleanup] Minor cleanup
authorpukkandan <redacted>
Fri, 19 Nov 2021 00:06:28 +0000 (05:36 +0530)
committerpukkandan <redacted>
Fri, 19 Nov 2021 00:06:28 +0000 (05:36 +0530)
Closes #1696, Closes #1673

12 files changed:
CONTRIBUTING.md
Changelog.md
README.md
test/test_youtube_signature.py
yt_dlp/YoutubeDL.py
yt_dlp/__init__.py
yt_dlp/extractor/francetv.py
yt_dlp/extractor/funimation.py
yt_dlp/extractor/linkedin.py
yt_dlp/extractor/pbs.py
yt_dlp/extractor/tenplay.py
yt_dlp/extractor/youtube.py

index cd22afed98b989054304b790fe91fc455c86c968..8a0178d94480a9eb95a27ddf93ffe96516a39b58 100644 (file)
@@ -209,7 +209,7 @@ ## Adding support for a new site
     ```
 1. Add an import in [`yt_dlp/extractor/extractors.py`](yt_dlp/extractor/extractors.py).
 1. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, the tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in. You can also run all the tests in one go with `TestDownload.test_YourExtractor_all`
-1. Make sure you have atleast one test for your extractor. Even if all videos covered by the extractor are expected to be inaccessible for automated testing, tests should still be added with a `skip` parameter indicating why the purticular test is disabled from running.
+1. Make sure you have atleast one test for your extractor. Even if all videos covered by the extractor are expected to be inaccessible for automated testing, tests should still be added with a `skip` parameter indicating why the particular test is disabled from running.
 1. Have a look at [`yt_dlp/extractor/common.py`](yt_dlp/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](yt_dlp/extractor/common.py#L91-L426). Add tests and code for as many as you want.
 1. Make sure your code follows [yt-dlp coding conventions](#yt-dlp-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart):
 
index 5ac2aa6157466dfcd1c6216c3e6381c114dadf8a..7bb8c7888f1a2d1f58ad230cdfcb16d1fc92cee6 100644 (file)
@@ -40,7 +40,7 @@ ### 2021.11.10
 * [fragment] Fix progress display in fragmented downloads
 * [downloader/ffmpeg] Fix vtt download with ffmpeg
 * [ffmpeg] Detect presence of setts and libavformat version
-* [ExtractAudio] Rescale --audio-quality correctly by [CrypticSignal](https://github.com/CrypticSignal), [pukkandan](https://github.com/pukkandan)
+* [ExtractAudio] Rescale `--audio-quality` correctly by [CrypticSignal](https://github.com/CrypticSignal), [pukkandan](https://github.com/pukkandan)
 * [ExtractAudio] Use `libfdk_aac` if available by [CrypticSignal](https://github.com/CrypticSignal)
 * [FormatSort] `eac3` is better than `ac3`
 * [FormatSort] Fix some fields' defaults
index 96f5d7ecb21ed28d475993e6bca83d67a39aeace..1a5f84cc98cb281ef100aff6fffd6d71dcade425 100644 (file)
--- a/README.md
+++ b/README.md
@@ -96,7 +96,7 @@ # NEW FEATURES
 
 * **New playlist extractors**: bilibili categories, eroprofile albums, hotstar series, hungama albums, newgrounds user, niconico search/users, paramountplus series, patreon user, peertube playlist/channels, roosterteeth series, sonyliv series, tiktok user, trovo channels, voot series
 
-* **Fixed/improved extractors**: 7plus, 9now, afreecatv, akamai, aljazeera, amcnetworks, animalplanet, archive.org, arte, atv, bbc, bilibili, bitchute, bravotv, camtube, cbc, cda, ceskatelevize, chingari, comedycentral, coub, crackle, crunchyroll, curiositystream, diynetwork, dw, eroprofile, facebook, francetv, funimation, globo, hearthisatie, hidive, hotstar, hungama, imdb, ina, instagram, iprima, itv, iwara, kakao, la7, linkedinlearning, linuxacadamy, mediaset, mediasite, motherless, mxplayer, nbcolympics, ndr, newgrounds, niconico, nitter, nova, nrk, nuvid, oreilly, paramountplus, parliamentlive, patreon, pbs, peertube, plutotv, polskieradio, pornhub, reddit, reddit, redtube, rmcdecouverte, roosterteeth, rtp, rumble, saml verizon login, skyit, sonyliv, soundcloud, southparkde, spankbang, spreaker, streamable, tagesschau, tbs, tennistv, tenplay, tiktok, tubi, tv2, tv2hu, tv5mondeplus, tvp, twitcasting, vh1, viafree, videa, vidio, vidme, viewlift, viki, vimeo, viu, vk, vlive, vrt, wakanim, xhamster, yahoo
+* **Fixed/improved extractors**: 7plus, 9now, afreecatv, akamai, aljazeera, amcnetworks, animalplanet, archive.org, arte, atv, bbc, bilibili, bitchute, bravotv, camtube, cbc, cda, ceskatelevize, chingari, comedycentral, coub, crackle, crunchyroll, curiositystream, diynetwork, dw, eroprofile, facebook, francetv, funimation, globo, hearthisatie, hidive, hotstar, hungama, imdb, ina, instagram, iprima, itv, iwara, kakao, la7, linkedinlearning, linuxacadamy, mediaset, mediasite, motherless, mxplayer, nbcolympics, ndr, newgrounds, niconico, nitter, nova, nrk, nuvid, oreilly, paramountplus, parliamentlive, patreon, pbs, peertube, plutotv, polskieradio, pornhub, reddit, redtube, rmcdecouverte, roosterteeth, rtp, rumble, saml verizon login, skyit, sonyliv, soundcloud, southparkde, spankbang, spreaker, streamable, tagesschau, tbs, tennistv, tenplay, tiktok, tubi, tv2, tv2hu, tv5mondeplus, tvp, twitcasting, vh1, viafree, videa, vidio, vidme, viewlift, viki, vimeo, viu, vk, vlive, vrt, wakanim, xhamster, yahoo
 
 * **New MSOs**: Philo, Spectrum, SlingTV, Cablevision, RCN
 
@@ -136,7 +136,7 @@ ### Differences in default behavior
 * Unlike youtube-dlc, yt-dlp does not allow merging multiple audio/video streams into one file by default (since this conflicts with the use of `-f bv*+ba`). If needed, this feature must be enabled using `--audio-multistreams` and `--video-multistreams`. You can also use `--compat-options multistreams` to enable both
 * `--ignore-errors` is enabled by default. Use `--abort-on-error` or `--compat-options abort-on-error` to abort on errors instead
 * When writing metadata files such as thumbnails, description or infojson, the same information (if available) is also written for playlists. Use `--no-write-playlist-metafiles` or `--compat-options no-playlist-metafiles` to not write these files
-* `--add-metadata` attaches the `infojson` to `mkv` files in addition to writing the metadata when used with `--write-infojson`. Use `--compat-options no-attach-info-json` to revert this
+* `--add-metadata` attaches the `infojson` to `mkv` files in addition to writing the metadata when used with `--write-info-json`. Use `--no-embed-info-json` or `--compat-options no-attach-info-json` to revert this
 * Some metadata are embedded into different fields when using `--add-metadata` as compared to youtube-dl. Most notably, `comment` field contains the `webpage_url` and `synopsis` contains the `description`. You can [use `--parse-metadata`](https://github.com/yt-dlp/yt-dlp#modifying-metadata) to modify this to your liking or use `--compat-options embed-metadata` to revert this
 * `playlist_index` behaves differently when used with options like `--playlist-reverse` and `--playlist-items`. See [#302](https://github.com/yt-dlp/yt-dlp/issues/302) for details. You can use `--compat-options playlist-index` if you want to keep the earlier behavior
 * The output of `-F` is listed in a new format. Use `--compat-options list-formats` to revert this
@@ -196,7 +196,7 @@ ### With [PIP](https://pypi.org/project/pip)
 
 If you want to be on the cutting edge, you can also install the master branch with:
 ```
-python3 -m pip3 install --force-reinstall https://github.com/yt-dlp/yt-dlp/archive/master.zip
+python3 -m pip install --force-reinstall https://github.com/yt-dlp/yt-dlp/archive/master.zip
 ```
 
 Note that on some systems, you may need to use `py` or `python` instead of `python3`
@@ -793,7 +793,7 @@ ## Post-Processing Options:
     --audio-format FORMAT            Specify audio format to convert the audio
                                      to when -x is used. Currently supported
                                      formats are: best (default) or one of
-                                     best|aac|flac|mp3|m4a|opus|vorbis|wav
+                                     best|aac|flac|mp3|m4a|opus|vorbis|wav|alac
     --audio-quality QUALITY          Specify ffmpeg audio quality, insert a
                                      value between 0 (best) and 10 (worst) for
                                      VBR or a specific bitrate like 128K
@@ -844,15 +844,20 @@ ## Post-Processing Options:
     --no-embed-subs                  Do not embed subtitles (default)
     --embed-thumbnail                Embed thumbnail in the video as cover art
     --no-embed-thumbnail             Do not embed thumbnail (default)
-    --embed-metadata                 Embed metadata to the video file. Also adds
-                                     chapters to file unless --no-add-chapters
-                                     is used (Alias: --add-metadata)
+    --embed-metadata                 Embed metadata to the video file. Also
+                                     embeds chapters/infojson if present unless
+                                     --no-embed-chapters/--no-embed-info-json
+                                     are used (Alias: --add-metadata)
     --no-embed-metadata              Do not add metadata to file (default)
                                      (Alias: --no-add-metadata)
     --embed-chapters                 Add chapter markers to the video file
                                      (Alias: --add-chapters)
     --no-embed-chapters              Do not add chapter markers (default)
                                      (Alias: --no-add-chapters)
+    --embed-info-json                Embed the infojson as an attachment to
+                                     mkv/mka video files
+    --no-embed-info-json             Do not embed the infojson as an attachment
+                                     to the video file
     --parse-metadata FROM:TO         Parse additional metadata like title/artist
                                      from other fields; see "MODIFYING METADATA"
                                      for details
@@ -1210,11 +1215,14 @@ #### Output template examples
 Note that on Windows you need to use double quotes instead of single.
 
 ```bash
+$ yt-dlp --get-filename -o 'test video.%(ext)s' BaW_jenozKc
+test video.webm    # Literal name with correct extension
+
 $ yt-dlp --get-filename -o '%(title)s.%(ext)s' BaW_jenozKc
-youtube-dl test video ''_ä↭𝕐.mp4    # All kinds of weird characters
+youtube-dl test video ''_ä↭𝕐.webm    # All kinds of weird characters
 
 $ yt-dlp --get-filename -o '%(title)s.%(ext)s' BaW_jenozKc --restrict-filenames
-youtube-dl_test_video_.mp4          # A simple file name
+youtube-dl_test_video_.webm    # Restricted file name
 
 # Download YouTube playlist videos in separate directory indexed by video order in a playlist
 $ yt-dlp -o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s' https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re
index 60d8eabf5ce40bd427d4741ee966d9be6725ca18..df4c36047337bcb2faf2accfb92f923deaddd42a 100644 (file)
         'https://www.youtube.com/s/player/f8cb7a3b/player_ias.vflset/en_US/base.js',
         'oBo2h5euWy6osrUt', 'ivXHpm7qJjJN',
     ),
+    (
+        'https://www.youtube.com/s/player/2dfe380c/player_ias.vflset/en_US/base.js',
+        'oBo2h5euWy6osrUt', '3DIBbn3qdQ',
+    ),
 ]
 
 
index 197ec11e6cfff8509ce755179b2079c22c46dd65..e078e62ef64def7412df8bfc2c0e611e6f1d16e7 100644 (file)
@@ -528,7 +528,6 @@ def __init__(self, params=None, auto_init=True):
         self.cache = Cache(self)
 
         windows_enable_vt_mode()
-        # FIXME: This will break if we ever print color to stdout
         self._allow_colors = {
             'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file),
             'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file),
@@ -2012,10 +2011,10 @@ def selector_function(ctx):
                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
                 if format_spec == 'all':
                     def selector_function(ctx):
-                        yield from _check_formats(ctx['formats'])
+                        yield from _check_formats(ctx['formats'][::-1])
                 elif format_spec == 'mergeall':
                     def selector_function(ctx):
-                        formats = list(_check_formats(ctx['formats']))
+                        formats = list(_check_formats(ctx['formats'][::-1]))
                         if not formats:
                             return
                         merged_format = formats[-1]
@@ -3163,7 +3162,7 @@ def format_resolution(format, default='unknown'):
             return 'images'
         else:
             return default
-        return f'{res} images' if is_images else res
+        return f'img {res}' if is_images else res
 
     def _format_note(self, fdict):
         res = ''
index 63b9b6e2f9f729161f71a344be0b059ef3710a7d..7960d3b039b6e476d084d83f18c2810ff8576eae 100644 (file)
@@ -378,8 +378,6 @@ def metadataparser_actions(f):
         opts.sponsorblock_remove = set()
     sponsorblock_query = opts.sponsorblock_mark | opts.sponsorblock_remove
 
-    if (opts.addmetadata or opts.sponsorblock_mark) and opts.addchapters is None:
-        opts.addchapters = True
     opts.remove_chapters = opts.remove_chapters or []
 
     if (opts.remove_chapters or sponsorblock_query) and opts.sponskrub is not False:
@@ -400,40 +398,32 @@ def metadataparser_actions(f):
         opts.remuxvideo = False
 
     if opts.allow_unplayable_formats:
-        if opts.extractaudio:
-            report_conflict('--allow-unplayable-formats', '--extract-audio')
-            opts.extractaudio = False
-        if opts.remuxvideo:
-            report_conflict('--allow-unplayable-formats', '--remux-video')
-            opts.remuxvideo = False
-        if opts.recodevideo:
-            report_conflict('--allow-unplayable-formats', '--recode-video')
-            opts.recodevideo = False
-        if opts.addmetadata:
-            report_conflict('--allow-unplayable-formats', '--add-metadata')
-            opts.addmetadata = False
-        if opts.embedsubtitles:
-            report_conflict('--allow-unplayable-formats', '--embed-subs')
-            opts.embedsubtitles = False
-        if opts.embedthumbnail:
-            report_conflict('--allow-unplayable-formats', '--embed-thumbnail')
-            opts.embedthumbnail = False
-        if opts.xattrs:
-            report_conflict('--allow-unplayable-formats', '--xattrs')
-            opts.xattrs = False
-        if opts.fixup and opts.fixup.lower() not in ('never', 'ignore'):
-            report_conflict('--allow-unplayable-formats', '--fixup')
+        def report_unplayable_conflict(opt_name, arg, default=False, allowed=None):
+            val = getattr(opts, opt_name)
+            if (not allowed and val) or not allowed(val):
+                report_conflict('--allow-unplayable-formats', arg)
+                setattr(opts, opt_name, default)
+
+        report_unplayable_conflict('extractaudio', '--extract-audio')
+        report_unplayable_conflict('remuxvideo', '--remux-video')
+        report_unplayable_conflict('recodevideo', '--recode-video')
+        report_unplayable_conflict('addmetadata', '--embed-metadata')
+        report_unplayable_conflict('addchapters', '--embed-chapters')
+        report_unplayable_conflict('embed_infojson', '--embed-info-json')
+        opts.embed_infojson = False
+        report_unplayable_conflict('embedsubtitles', '--embed-subs')
+        report_unplayable_conflict('embedthumbnail', '--embed-thumbnail')
+        report_unplayable_conflict('xattrs', '--xattrs')
+        report_unplayable_conflict('fixup', '--fixup', default='never', allowed=lambda x: x in (None, 'never', 'ignore'))
         opts.fixup = 'never'
-        if opts.remove_chapters:
-            report_conflict('--allow-unplayable-formats', '--remove-chapters')
-            opts.remove_chapters = []
-        if opts.sponsorblock_remove:
-            report_conflict('--allow-unplayable-formats', '--sponsorblock-remove')
-            opts.sponsorblock_remove = set()
-        if opts.sponskrub:
-            report_conflict('--allow-unplayable-formats', '--sponskrub')
+        report_unplayable_conflict('remove_chapters', '--remove-chapters', default=[])
+        report_unplayable_conflict('sponsorblock_remove', '--sponsorblock-remove', default=set())
+        report_unplayable_conflict('sponskrub', '--sponskrub', default=set())
         opts.sponskrub = False
 
+    if (opts.addmetadata or opts.sponsorblock_mark) and opts.addchapters is None:
+        opts.addchapters = True
+
     # PostProcessors
     postprocessors = list(opts.add_postprocessors)
     if sponsorblock_query:
index 3bbab69e61e96ceeb342c23c16dd106a7381950c..bc5ef4df97ff49cd1611fe9bf9446149a849e34e 100644 (file)
@@ -185,7 +185,7 @@ def _extract_video(self, video_id, catalogue=None):
                 'vcodec': 'none',
                 'ext': 'mhtml',
                 'protocol': 'mhtml',
-                'url': 'about:dummy',
+                'url': 'about:invalid',
                 'fragments': [{
                     'path': sheet,
                     # XXX: not entirely accurate; each spritesheet seems to be
index 42711083e8a0b2bed666577f2ada1d9c74639bc1..96dad2ca3451fcfcb2f4428ff549fa25b566d61c 100644 (file)
@@ -276,7 +276,7 @@ def _real_extract(self, url):
     def _get_subtitles(self, subtitles, experience_id, episode, display_id, format_name):
         if isinstance(episode, str):
             webpage = self._download_webpage(
-                f'https://www.funimation.com/player/{experience_id}', display_id,
+                f'https://www.funimation.com/player/{experience_id}/', display_id,
                 fatal=False, note=f'Downloading player webpage for {format_name}')
             episode, _, _ = self._get_episode(webpage, episode_id=episode, fatal=False)
 
index 9255b33012b6a7bcd06356bc9e2fcebdfde79c5d..bd76ae1664a3dd7b241b17aae073d50b71a8d4b2 100644 (file)
@@ -109,7 +109,7 @@ def _real_extract(self, url):
         description = clean_html(get_element_by_class('share-update-card__update-text', webpage))
         like_count = int_or_none(get_element_by_class('social-counts-reactions__social-counts-numRections', webpage))
         creator = strip_or_none(clean_html(get_element_by_class('comment__actor-name', webpage)))
-        
+
         sources = self._parse_json(extract_attributes(self._search_regex(r'(<video[^>]+>)', webpage, 'video'))['data-sources'], video_id)
         formats = [{
             'url': source['src'],
index 0eabf9beee8306c1f46fdbd4a7b88c0ea5ce4b5b..ffaa6bf929f0c9a1cc2c8d849c3cbf8e819dd85c 100644 (file)
@@ -193,7 +193,7 @@ class PBSIE(InfoExtractor):
            # Article with embedded player (or direct video)
            (?:www\.)?pbs\.org/(?:[^/]+/){1,5}(?P<presumptive_id>[^/]+?)(?:\.html)?/?(?:$|[?\#]) |
            # Player
-           (?:video|player)\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)/
+           (?:video|player)\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)
         )
     ''' % '|'.join(list(zip(*_STATIONS))[0])
 
index c810cfd0d5192c784353ec67b4d720c759cbc3a5..5b3222ecf8119f2ae82625eb5935ba0d3155f4e2 100644 (file)
@@ -58,7 +58,7 @@ def _get_bearer_token(self, video_id):
             'email': username,
             'password': password,
         }))
-        return "Bearer " + data['jwt']['accessToken']
+        return 'Bearer ' + data['jwt']['accessToken']
 
     def _real_extract(self, url):
         content_id = self._match_id(url)
index 41e7fce101699b09f2a7fb6b2f2ed21a85cee5bf..1fbdcd98b6e93e28498d1006008bd70d2ab547cc 100644 (file)
@@ -508,9 +508,9 @@ def _extract_visitor_data(*args):
         Extracts visitorData from an API response or ytcfg
         Appears to be used to track session state
         """
-        return traverse_obj(
-            args, (..., ('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))),
-            expected_type=compat_str, get_all=False)
+        return get_first(
+            args, (('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))),
+            expected_type=str)
 
     @property
     def is_authenticated(self):
@@ -1674,7 +1674,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             # shorts
             'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
             'only_matching': True,
-        },
+        }, {
+            'note': 'Storyboards',
+            'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
+            'info_dict': {
+                'id': '5KLPxDtMqe8',
+                'ext': 'mhtml',
+                'format_id': 'sb0',
+                'title': 'Your Brain is Plastic',
+                'uploader_id': 'scishow',
+                'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
+                'upload_date': '20140324',
+                'uploader': 'SciShow',
+            }, 'params': {'format': 'mhtml', 'skip_download': True}
+        }
     ]
 
     @classmethod
@@ -1920,9 +1933,9 @@ def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=F
         return sts
 
     def _mark_watched(self, video_id, player_responses):
-        playback_url = traverse_obj(
-            player_responses, (..., 'playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
-            expected_type=url_or_none, get_all=False)
+        playback_url = get_first(
+            player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
+            expected_type=url_or_none)
         if not playback_url:
             self.report_warning('Unable to mark watched')
             return