]> jfr.im git - yt-dlp.git/commitdiff
[cleanup] Misc fixes
authorpukkandan <redacted>
Fri, 3 Jun 2022 15:59:03 +0000 (21:29 +0530)
committerpukkandan <redacted>
Fri, 3 Jun 2022 16:15:35 +0000 (21:45 +0530)
Cherry-picks from: #3498, #3947
Related: #3949, https://github.com/yt-dlp/yt-dlp/issues/1839#issuecomment-1140313836
Authored by: pukkandan, flashdagger, gamer191

14 files changed:
.github/ISSUE_TEMPLATE/3_site_feature_request.yml
.github/ISSUE_TEMPLATE/5_feature_request.yml
.github/ISSUE_TEMPLATE/6_question.yml
.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml
.github/ISSUE_TEMPLATE_tmpl/5_feature_request.yml
.github/ISSUE_TEMPLATE_tmpl/6_question.yml
README.md
pyinst.py
yt_dlp/YoutubeDL.py
yt_dlp/downloader/fragment.py
yt_dlp/extractor/common.py
yt_dlp/extractor/youtube.py
yt_dlp/postprocessor/ffmpeg.py
yt_dlp/utils.py

index 7a81cede61ccbe21d2099a3dbd0f6f37b92f1918..b4bb2c839f83c028107d8dc2f05d092eb5cdd867 100644 (file)
@@ -9,7 +9,7 @@ body:
       description: |
         Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
       options:
-        - label: I'm reporting a site feature request
+        - label: I'm requesting a site-specific feature
           required: true
         - label: I've verified that I'm running yt-dlp version **2022.05.18** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
           required: true
index 08115f7995efa07c691ad40de05b28f642fa3fb7..5bfcbb6cb4a5e7172de0b7be7d818d336c3be137 100644 (file)
@@ -9,7 +9,7 @@ body:
       description: |
         Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
       options:
-        - label: I'm reporting a feature request
+        - label: I'm requesting a feature unrelated to a specific site
           required: true
         - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme)
           required: true
index 030d2cfe7812ce1eccf091b7c90cde93016e2eb8..8b434aef06e9ef657d030abdcc41887dc30749b6 100644 (file)
@@ -9,13 +9,13 @@ body:
       description: |
         Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
       options:
-        - label: I'm asking a question and **not** reporting a bug/feature request
+        - label: I'm asking a question and **not** reporting a bug or requesting a feature
           required: true
         - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme)
           required: true
-        - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue)
+        - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar questions including closed ones. DO NOT post duplicates
           required: true
-        - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar questions including closed ones
+        - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue)
           required: true
   - type: textarea
     id: question
index a175b92c3154984aa56e79a83b1fec52d99f9048..77e9d3469cb3270db4a3624517fe95495cd80d3d 100644 (file)
@@ -9,7 +9,7 @@ body:
       description: |
         Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
       options:
-        - label: I'm reporting a site feature request
+        - label: I'm requesting a site-specific feature
           required: true
         - label: I've verified that I'm running yt-dlp version **%(version)s** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
           required: true
index 1f33f09dc94d1a192128d05157de85b9a848a454..4686c1dff321dd2ab6cac04da16c464dd0476966 100644 (file)
@@ -9,7 +9,7 @@ body:
       description: |
         Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
       options:
-        - label: I'm reporting a feature request
+        - label: I'm requesting a feature unrelated to a specific site
           required: true
         - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme)
           required: true
index 030d2cfe7812ce1eccf091b7c90cde93016e2eb8..8b434aef06e9ef657d030abdcc41887dc30749b6 100644 (file)
@@ -9,13 +9,13 @@ body:
       description: |
         Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
       options:
-        - label: I'm asking a question and **not** reporting a bug/feature request
+        - label: I'm asking a question and **not** reporting a bug or requesting a feature
           required: true
         - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme)
           required: true
-        - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue)
+        - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar questions including closed ones. DO NOT post duplicates
           required: true
-        - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar questions including closed ones
+        - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue)
           required: true
   - type: textarea
     id: question
index 912a42f72a6de07e975e3615525cceda0af8854b..87986e4c3aac3143ee8937e67628bdaa39791e5d 100644 (file)
--- a/README.md
+++ b/README.md
@@ -1783,7 +1783,7 @@ # EMBEDDING YT-DLP
     ydl.download(URLS)
 ```
 
-Most likely, you'll want to use various options. For a list of options available, have a look at [`yt_dlp/YoutubeDL.py`](yt_dlp/YoutubeDL.py#L181).
+Most likely, you'll want to use various options. For a list of options available, have a look at [`yt_dlp/YoutubeDL.py`](yt_dlp/YoutubeDL.py#L180).
 
 **Tip**: If you are porting your code from youtube-dl to yt-dlp, one important point to look out for is that we do not guarantee the return value of `YoutubeDL.extract_info` to be json serializable, or even be a dictionary. It will be dictionary-like, but if you want to ensure it is a serializable dictionary, pass it through `YoutubeDL.sanitize_info` as shown in the [example below](#extracting-information)
 
index de3504b35544b5261563fdfe57f57ca7e83bfad6..292f5d71923f9dd034d579fe1ef8f355e5f6cfaf 100644 (file)
--- a/pyinst.py
+++ b/pyinst.py
@@ -105,7 +105,7 @@ def pycryptodome_module():
 
 
 def set_version_info(exe, version):
-    if OS_NAME == 'Windows':
+    if OS_NAME == 'win32':
         windows_set_version(exe, version)
 
 
index 5aae25707fcff229d7f591ac8ecc81bbfab64acb..e71e85d2e5b6d96e8c4cb3f5703e7b0ad41f0bde 100644 (file)
@@ -195,13 +195,6 @@ class YoutubeDL:
                        For compatibility, a single list is also accepted
     print_to_file:     A dict with keys WHEN (same as forceprint) mapped to
                        a list of tuples with (template, filename)
-    forceurl:          Force printing final URL. (Deprecated)
-    forcetitle:        Force printing title. (Deprecated)
-    forceid:           Force printing ID. (Deprecated)
-    forcethumbnail:    Force printing thumbnail URL. (Deprecated)
-    forcedescription:  Force printing description. (Deprecated)
-    forcefilename:     Force printing final filename. (Deprecated)
-    forceduration:     Force printing duration. (Deprecated)
     forcejson:         Force printing info_dict as JSON.
     dump_single_json:  Force printing the info_dict of the whole playlist
                        (or video) as a single JSON line.
@@ -278,9 +271,6 @@ class YoutubeDL:
     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
     writesubtitles:    Write the video subtitles to a file
     writeautomaticsub: Write the automatically generated subtitles to a file
-    allsubtitles:      Deprecated - Use subtitleslangs = ['all']
-                       Downloads all the subtitles of the video
-                       (requires writesubtitles or writeautomaticsub)
     listsubtitles:     Lists all available subtitles for the video
     subtitlesformat:   The format code for subtitles
     subtitleslangs:    List of languages of the subtitles to download (can be regex).
@@ -334,7 +324,6 @@ class YoutubeDL:
     bidi_workaround:   Work around buggy terminals without bidirectional text
                        support, using fridibi
     debug_printtraffic:Print out sent and received HTTP traffic
-    include_ads:       Download ads as well (deprecated)
     default_search:    Prepend this string if an input url is not valid.
                        'auto' for elaborate guessing
     encoding:          Use this encoding instead of the system-specified.
@@ -350,10 +339,6 @@ class YoutubeDL:
                        * when: When to run the postprocessor. Allowed values are
                                the entries of utils.POSTPROCESS_WHEN
                                Assumed to be 'post_process' if not given
-    post_hooks:        Deprecated - Register a custom postprocessor instead
-                       A list of functions that get called as the final step
-                       for each video file, after all postprocessors have been
-                       called. The filename will be passed as the only argument.
     progress_hooks:    A list of functions that get called on download
                        progress, with a dictionary with the entries
                        * status: One of "downloading", "error", or "finished".
@@ -398,8 +383,6 @@ class YoutubeDL:
                        - "detect_or_warn": check whether we can do anything
                                            about it, warn otherwise (default)
     source_address:    Client-side IP address to bind to.
-    call_home:         Boolean, true iff we are allowed to contact the
-                       yt-dlp servers for debugging. (BROKEN)
     sleep_interval_requests: Number of seconds to sleep between requests
                        during extraction
     sleep_interval:    Number of seconds to sleep before each download when
@@ -440,11 +423,6 @@ class YoutubeDL:
                        external downloader to use for it. The allowed protocols
                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
                        Set the value to 'native' to use the native downloader
-    hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
-                       or {'m3u8': 'ffmpeg'} instead.
-                       Use the native HLS downloader instead of ffmpeg/avconv
-                       if True, otherwise use ffmpeg/avconv if False, otherwise
-                       use downloader suggested by extractor if None.
     compat_opts:       Compatibility options. See "Differences in default behavior".
                        The following options do not work when used through the API:
                        filename, abort-on-error, multistreams, no-live-chat, format-sort
@@ -466,8 +444,6 @@ class YoutubeDL:
     external_downloader_args, concurrent_fragment_downloads.
 
     The following options are used by the post processors:
-    prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
-                       otherwise prefer ffmpeg. (avconv support is deprecated)
     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
                        to the binary or its containing directory.
     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
@@ -487,12 +463,48 @@ class YoutubeDL:
                        See "EXTRACTOR ARGUMENTS" for details.
                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
     mark_watched:      Mark videos watched (even with --simulate). Only for YouTube
-    youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
+
+    The following options are deprecated and may be removed in the future:
+
+    forceurl:          - Use forceprint
+                       Force printing final URL.
+    forcetitle:        - Use forceprint
+                       Force printing title.
+    forceid:           - Use forceprint
+                       Force printing ID.
+    forcethumbnail:    - Use forceprint
+                       Force printing thumbnail URL.
+    forcedescription:  - Use forceprint
+                       Force printing description.
+    forcefilename:     - Use forceprint
+                       Force printing final filename.
+    forceduration:     - Use forceprint
+                       Force printing duration.
+    allsubtitles:      - Use subtitleslangs = ['all']
+                       Downloads all the subtitles of the video
+                       (requires writesubtitles or writeautomaticsub)
+    include_ads:       - Doesn't work
+                       Download ads as well
+    call_home:         - Not implemented
+                       Boolean, true iff we are allowed to contact the
+                       yt-dlp servers for debugging.
+    post_hooks:        - Register a custom postprocessor
+                       A list of functions that get called as the final step
+                       for each video file, after all postprocessors have been
+                       called. The filename will be passed as the only argument.
+    hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
+                       Use the native HLS downloader instead of ffmpeg/avconv
+                       if True, otherwise use ffmpeg/avconv if False, otherwise
+                       use downloader suggested by extractor if None.
+    prefer_ffmpeg:     - avconv support is deprecated
+                       If False, use avconv instead of ffmpeg if both are available,
+                       otherwise prefer ffmpeg.
+    youtube_include_dash_manifest: - Use extractor_args
                        If True (default), DASH manifests and related
                        data will be downloaded and processed by extractor.
                        You can reduce network I/O by disabling it if you don't
                        care about DASH. (only for youtube)
-    youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
+    youtube_include_hls_manifest: - Use extractor_args
                        If True (default), HLS manifests and related
                        data will be downloaded and processed by extractor.
                        You can reduce network I/O by disabling it if you don't
index 493849001f384ba4540a4581fcd8f976527bb66d..d94cb49569393bc6b6ef2e04d1fcce118ce96246 100644 (file)
@@ -496,12 +496,20 @@ def _download_fragment(fragment):
 
             self.report_warning('The download speed shown is only of one thread. This is a known issue and patches are welcome')
             with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool:
-                for fragment, frag_index, frag_filename in pool.map(_download_fragment, fragments):
-                    ctx['fragment_filename_sanitized'] = frag_filename
-                    ctx['fragment_index'] = frag_index
-                    result = append_fragment(decrypt_fragment(fragment, self._read_fragment(ctx)), frag_index, ctx)
-                    if not result:
-                        return False
+                try:
+                    for fragment, frag_index, frag_filename in pool.map(_download_fragment, fragments):
+                        ctx.update({
+                            'fragment_filename_sanitized': frag_filename,
+                            'fragment_index': frag_index,
+                        })
+                        if not append_fragment(decrypt_fragment(fragment, self._read_fragment(ctx)), frag_index, ctx):
+                            return False
+                except KeyboardInterrupt:
+                    self._finish_multiline_status()
+                    self.report_error(
+                        'Interrupted by user. Waiting for all threads to shutdown...', is_error=False, tb=False)
+                    pool.shutdown(wait=False)
+                    raise
         else:
             for fragment in fragments:
                 if not interrupt_trigger[0]:
index c1a160e82ca845c5f225a4ee6cbdf4ba1d720e64..2e62660c75afb6c2d9d5fca0e027131c18e7a9a5 100644 (file)
@@ -786,7 +786,8 @@ def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fa
                 self.report_warning(errmsg)
                 return False
 
-    def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
+    def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True,
+                                 encoding=None, data=None, headers={}, query={}, expected_status=None):
         """
         Return a tuple (page content as string, URL handle).
 
@@ -943,7 +944,7 @@ def _parse_json(self, json_string, video_id, transform_source=None, fatal=True,
                 except ValueError:
                     raise e
         except ValueError as ve:
-            errmsg = '%s: Failed to parse JSON ' % video_id
+            errmsg = f'{video_id}: Failed to parse JSON'
             if fatal:
                 raise ExtractorError(errmsg, cause=ve)
             else:
index c9bdd309d89503ab6907bd0b8295def6ca79922f..8b2332dc11408e11e6a1ec55bb920b45523f1c87 100644 (file)
@@ -15,7 +15,7 @@
 import traceback
 
 from .common import InfoExtractor, SearchInfoExtractor
-from ..compat import functools
+from ..compat import functools  # isort: split
 from ..compat import (
     compat_chr,
     compat_HTTPError,
@@ -483,6 +483,11 @@ def extract_yt_initial_data(self, item_id, webpage, fatal=True):
         if data:
             return self._parse_json(data, item_id, fatal=fatal)
 
+    def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
+        return self._parse_json(self._search_regex(
+            (fr'{regex}\s*{self._YT_INITIAL_BOUNDARY_RE}',
+             regex), webpage, name, default='{}'), video_id, fatal=False, lenient=True)
+
     @staticmethod
     def _extract_session_index(*data):
         """
@@ -2733,54 +2738,38 @@ def _extract_chapters_from_engagement_panel(self, data, duration):
         chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
         chapter_title = lambda chapter: self._get_text(chapter, 'title')
 
-        return next((
-            filter(None, (
-                self._extract_chapters(
-                    traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
-                    chapter_time, chapter_title, duration)
-                for contents in content_list
-            ))), [])
+        return next(filter(None, (
+            self._extract_chapters(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
+                                   chapter_time, chapter_title, duration)
+            for contents in content_list)), [])
 
-    @staticmethod
-    def _extract_chapters_from_description(description, duration):
-        chapters = [{'start_time': 0}]
-        for timestamp, title in re.findall(
-                r'(?m)^((?:\d+:)?\d{1,2}:\d{2})\b\W*\s(.+?)\s*$', description or ''):
-            start = parse_duration(timestamp)
-            if start and title and chapters[-1]['start_time'] < start < duration:
-                chapters[-1]['end_time'] = start
-                chapters.append({
-                    'start_time': start,
-                    'title': title,
-                })
-        chapters[-1]['end_time'] = duration
-        return chapters[1:]
-
-    def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
-        chapters = []
-        last_chapter = {'start_time': 0}
-        for idx, chapter in enumerate(chapter_list or []):
-            title = chapter_title(chapter)
-            start_time = chapter_time(chapter)
-            if start_time is None:
-                continue
-            last_chapter['end_time'] = start_time
-            if start_time < last_chapter['start_time']:
-                if idx == 1:
-                    chapters.pop()
-                    self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
-                else:
-                    self.report_warning(f'Invalid start time for chapter "{title}"')
-                    continue
-            last_chapter = {'start_time': start_time, 'title': title}
-            chapters.append(last_chapter)
-        last_chapter['end_time'] = duration
-        return chapters
+    def _extract_chapters_from_description(self, description, duration):
+        return self._extract_chapters(
+            re.findall(r'(?m)^((?:\d+:)?\d{1,2}:\d{2})\b\W*\s(.+?)\s*$', description or ''),
+            chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1],
+            duration=duration, strict=False)
 
-    def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
-        return self._parse_json(self._search_regex(
-            (fr'{regex}\s*{self._YT_INITIAL_BOUNDARY_RE}',
-             regex), webpage, name, default='{}'), video_id, fatal=False, lenient=True)
+    def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True):
+        if not duration:
+            return
+        chapter_list = [{
+            'start_time': chapter_time(chapter),
+            'title': chapter_title(chapter),
+        } for chapter in chapter_list or []]
+        if not strict:
+            chapter_list.sort(key=lambda c: c['start_time'] or 0)
+
+        chapters = [{'start_time': 0, 'title': '<Untitled>'}]
+        for idx, chapter in enumerate(chapter_list):
+            if chapter['start_time'] is None or not chapter['title']:
+                self.report_warning(f'Incomplete chapter {idx}')
+            elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:
+                chapters[-1]['end_time'] = chapter['start_time']
+                chapters.append(chapter)
+            else:
+                self.report_warning(f'Invalid start time for chapter "{chapter["title"]}"')
+        chapters[-1]['end_time'] = duration
+        return chapters if len(chapters) > 1 and chapters[1]['start_time'] else chapters[1:]
 
     def _extract_comment(self, comment_renderer, parent=None):
         comment_id = comment_renderer.get('commentId')
@@ -3663,7 +3652,15 @@ def process_language(container, base_url, lang_code, sub_name, query):
 
         # Youtube Music Auto-generated description
         if video_description:
-            mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
+            mobj = re.search(
+                r'''(?xs)
+                    (?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+
+                    (?P<album>[^\n]+)
+                    (?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
+                    (?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
+                    (.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?
+                    .+\nAuto-generated\ by\ YouTube\.\s*$
+                ''', video_description)
             if mobj:
                 release_year = mobj.group('release_year')
                 release_date = mobj.group('release_date')
index 2a456e567256b659f9504c3e63747cc10ca1e1e3..dad8b7f8f1db616ce58c98958b5daef9ac52df5d 100644 (file)
@@ -776,7 +776,7 @@ def add(meta_list, info_list=None):
         for key, value in info.items():
             mobj = re.fullmatch(meta_regex, key)
             if value is not None and mobj:
-                metadata[mobj.group('i') or 'common'][mobj.group('key')] = value
+                metadata[mobj.group('i') or 'common'][mobj.group('key')] = value.replace('\0', '')
 
         # Write id3v1 metadata also since Windows Explorer can't handle id3v2 tags
         yield ('-write_id3v1', '1')
index b0300b724b0cbee9cd8eac42fc21d9a92103c978..00721eb4673c7ab67cbd63e41c4c34fb6bd5c177 100644 (file)
@@ -1936,7 +1936,7 @@ def intlist_to_bytes(xs):
 
 
 class LockingUnsupportedError(OSError):
-    msg = 'File locking is not supported on this platform'
+    msg = 'File locking is not supported'
 
     def __init__(self):
         super().__init__(self.msg)
@@ -2061,8 +2061,11 @@ def __enter__(self):
             try:
                 self.f.truncate()
             except OSError as e:
-                if e.errno != 29:  # Illegal seek, expected when self.f is a FIFO
-                    raise e
+                if e.errno not in (
+                    errno.ESPIPE,  # Illegal seek - expected for FIFO
+                    errno.EINVAL,  # Invalid argument - expected for /dev/null
+                ):
+                    raise
         return self
 
     def unlock(self):