[cleanup] Minor fixes

author pukkandan <redacted>

Wed, 18 May 2022 03:34:30 +0000 (09:04 +0530)

committer pukkandan <redacted>

Wed, 18 May 2022 03:34:30 +0000 (09:04 +0530)
author pukkandan <redacted>
Wed, 18 May 2022 03:34:30 +0000 (09:04 +0530)
committer pukkandan <redacted>
Wed, 18 May 2022 03:34:30 +0000 (09:04 +0530)
diff --git a/Changelog.md b/Changelog.md

index 3fb6260b8642cbbec79791f9cc254e5ab20b4b9a..52ea03367683df3a39dc559cac95275daade1a1f 100644 (file)
--- a/Changelog.md
+++ b/Changelog.md
@@ -785,7 +785,7 @@ ### 2021.10.22
  * [build] Improvements
      * Build standalone MacOS packages by [smplayer-dev](https://github.com/smplayer-dev)
      * Release windows exe built with `py2exe`
-    * Enable lazy-extractors in releases. 
+    * Enable lazy-extractors in releases
          * Set env var `YTDLP_NO_LAZY_EXTRACTORS` to forcefully disable this (experimental)
      * Clean up error reporting in update
      * Refactor `pyinst.py`, misc cleanup and improve docs
@@ -1393,7 +1393,7 @@ ### 2021.06.23
  * [youtube] Non-fatal alert reporting for unavailable videos page by [coletdjnz](https://github.com/coletdjnz)
  * [twitcasting] Websocket support by [nao20010128nao](https://github.com/nao20010128nao)
  * [mediasite] Extract slides by [fstirlitz](https://github.com/fstirlitz)
-* [funimation] Extract subtitles 
+* [funimation] Extract subtitles
  * [pornhub] Extract `cast`
  * [hotstar] Use server time for authentication instead of local time
  * [EmbedThumbnail] Fix for already downloaded thumbnail
@@ -1489,7 +1489,7 @@ ### 2021.06.01
  
  ### 2021.05.20
  
-* **Youtube improvements**: 
+* **Youtube improvements**:
      * Support youtube music `MP`, `VL` and `browse` pages
      * Extract more formats for youtube music by [craftingmod](https://github.com/craftingmod), [coletdjnz](https://github.com/coletdjnz) and [pukkandan](https://github.com/pukkandan)
      * Extract multiple subtitles in same language by [pukkandan](https://github.com/pukkandan) and [tpikonen](https://github.com/tpikonen)
@@ -2031,7 +2031,7 @@ ### 2021.01.05
  * **Format Sort:** Added `--format-sort` (`-S`), `--format-sort-force` (`--S-force`) - See [Sorting Formats](README.md#sorting-formats) for details
  * **Format Selection:** See [Format Selection](README.md#format-selection) for details
      * New format selectors: `best*`, `worst*`, `bestvideo*`, `bestaudio*`, `worstvideo*`, `worstaudio*`
-    * Changed video format sorting to show video only files and video+audio files together.
+    * Changed video format sorting to show video only files and video+audio files together
      * Added `--video-multistreams`, `--no-video-multistreams`, `--audio-multistreams`, `--no-audio-multistreams`
      * Added `b`,`w`,`v`,`a` as alias for `best`, `worst`, `video` and `audio` respectively
  * Shortcut Options: Added `--write-link`, `--write-url-link`, `--write-webloc-link`, `--write-desktop-link` by [h-h-h-h](https://github.com/h-h-h-h) - See [Internet Shortcut Options](README.md#internet-shortcut-options) for details
diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py

index 8c481bc2d4664e585c78d0aa25bb2da5f56cd14e..cd1985c8eead45313e9348bbdbf82ac6232a544a 100644 (file)
--- a/devscripts/make_lazy_extractors.py
+++ b/devscripts/make_lazy_extractors.py
@@ -1,6 +1,6 @@
  #!/usr/bin/env python3
-import os
  import optparse
+import os
  import sys
  from inspect import getsource
  
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py

index 31624f181a4b0d84c1dabeae3093e44e13a7fc66..ba08f6a7d39f71b044cdf75cfb2a70cee83eca4e 100644 (file)
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -1924,7 +1924,7 @@ def can_merge():
              and download
              and (
                  not can_merge()
-                or info_dict.get('is_live', False)
+                or info_dict.get('is_live') and not self.params.get('live_from_start')
                  or self.outtmpl_dict['default'] == '-'))
          compat = (
              prefer_best
diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py

index 0a8bf37b6501f23308652c9b85ac21f966a73029..8f890b34a6aee948ecbbb69ca509941944f527d7 100644 (file)
--- a/yt_dlp/__init__.py
+++ b/yt_dlp/__init__.py
@@ -869,6 +869,7 @@ def main(argv=None):
  
  
  from .extractor import gen_extractors, list_extractors
+
  __all__ = [
      'main',
      'YoutubeDL',
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py

index ebeca4395166f934104a48c942d6d1acb981302d..6a451c20bba2c8e65ae9f483d7a8ff16ea5e1342 100644 (file)
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -1343,7 +1343,7 @@ def _og_search_url(self, html, **kargs):
          return self._og_search_property('url', html, **kargs)
  
      def _html_extract_title(self, html, name='title', *, fatal=False, **kwargs):
-        return self._html_search_regex(r'(?s)<title>([^<]+)</title>', html, name, fatal=fatal, **kwargs)
+        return self._html_search_regex(r'(?s)<title\b[^>]*>([^<]+)</title>', html, name, fatal=fatal, **kwargs)
  
      def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs):
          name = variadic(name)
@@ -1509,8 +1509,9 @@ def extract_video_object(e):
                  'url': url_or_none(e.get('contentUrl')),
                  'title': unescapeHTML(e.get('name')),
                  'description': unescapeHTML(e.get('description')),
-                'thumbnails': [{'url': url_or_none(url)}
-                               for url in variadic(traverse_obj(e, 'thumbnailUrl', 'thumbnailURL'))],
+                'thumbnails': [{'url': url}
+                               for url in variadic(traverse_obj(e, 'thumbnailUrl', 'thumbnailURL'))
+                               if url_or_none(url)],
                  'duration': parse_duration(e.get('duration')),
                  'timestamp': unified_timestamp(e.get('uploadDate')),
                  # author can be an instance of 'Organization' or 'Person' types.
@@ -2803,13 +2804,18 @@ def extract_Initialization(source):
                      mime_type = representation_attrib['mimeType']
                      content_type = representation_attrib.get('contentType', mime_type.split('/')[0])
  
-                    codecs = parse_codecs(representation_attrib.get('codecs', ''))
+                    codec_str = representation_attrib.get('codecs', '')
+                    # Some kind of binary subtitle found in some youtube livestreams
+                    if mime_type == 'application/x-rawcc':
+                        codecs = {'scodec': codec_str}
+                    else:
+                        codecs = parse_codecs(codec_str)
                      if content_type not in ('video', 'audio', 'text'):
                          if mime_type == 'image/jpeg':
                              content_type = mime_type
-                        elif codecs['vcodec'] != 'none':
+                        elif codecs.get('vcodec', 'none') != 'none':
                              content_type = 'video'
-                        elif codecs['acodec'] != 'none':
+                        elif codecs.get('acodec', 'none') != 'none':
                              content_type = 'audio'
                          elif codecs.get('scodec', 'none') != 'none':
                              content_type = 'text'
diff --git a/yt_dlp/extractor/fc2.py b/yt_dlp/extractor/fc2.py

index 225677b00ec51bdcf7416157fbfa778ad8626dd7..54b136ec7d8d861feec2310fc44ac1d59c1f8133 100644 (file)
--- a/yt_dlp/extractor/fc2.py
+++ b/yt_dlp/extractor/fc2.py
@@ -10,7 +10,6 @@
      WebSocketsWrapper,
      js_to_json,
      sanitized_Request,
-    std_headers,
      traverse_obj,
      update_url_query,
      urlencode_postdata,
@@ -207,7 +206,7 @@ def _real_extract(self, url):
              'Cookie': str(self._get_cookies('https://live.fc2.com/'))[12:],
              'Origin': 'https://live.fc2.com',
              'Accept': '*/*',
-            'User-Agent': std_headers['User-Agent'],
+            'User-Agent': self.get_param('http_headers')['User-Agent'],
          })
  
          self.write_debug('[debug] Sending HLS server request')
diff --git a/yt_dlp/extractor/voicy.py b/yt_dlp/extractor/voicy.py

index e4570a03ae2bdf3ded795217bd19a6efbf9ed04a..feab79138a754b1c3837ac7d18cec7d89f164841 100644 (file)
--- a/yt_dlp/extractor/voicy.py
+++ b/yt_dlp/extractor/voicy.py
@@ -1,3 +1,5 @@
+import itertools
+
  from .common import InfoExtractor
  from ..compat import compat_str
  from ..utils import (
@@ -9,8 +11,6 @@
      unsmuggle_url,
  )
  
-import itertools
-
  
  class VoicyBaseIE(InfoExtractor):
      def _extract_from_playlist_data(self, value):
@@ -105,7 +105,7 @@ class VoicyChannelIE(VoicyBaseIE):
  
      @classmethod
      def suitable(cls, url):
-        return not VoicyIE.suitable(url) and super(VoicyChannelIE, cls).suitable(url)
+        return not VoicyIE.suitable(url) and super().suitable(url)
  
      def _entries(self, channel_id):
          pager = ''
diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py

index 3b0e6750c379c5461c37b08e5f5e7e6dafbf0453..bcdb7d55b61488c2dda9d22307c13a9fdac3673b 100644 (file)
--- a/yt_dlp/utils.py
+++ b/yt_dlp/utils.py
@@ -714,7 +714,9 @@ def sanitize_path(s, force=False):
  def sanitize_url(url):
      # Prepend protocol-less URLs with `http:` scheme in order to mitigate
      # the number of unwanted failures due to missing protocol
-    if url.startswith('//'):
+    if url is None:
+        return
+    elif url.startswith('//'):
          return 'http:%s' % url
      # Fix some common typos seen so far
      COMMON_TYPOS = (
author	pukkandan <redacted>
	Wed, 18 May 2022 03:34:30 +0000 (09:04 +0530)
committer	pukkandan <redacted>
	Wed, 18 May 2022 03:34:30 +0000 (09:04 +0530)
Changelog.md		patch \| blob \| blame \| history
devscripts/make_lazy_extractors.py		patch \| blob \| blame \| history
yt_dlp/YoutubeDL.py		patch \| blob \| blame \| history
yt_dlp/__init__.py		patch \| blob \| blame \| history
yt_dlp/extractor/common.py		patch \| blob \| blame \| history
yt_dlp/extractor/fc2.py		patch \| blob \| blame \| history
yt_dlp/extractor/voicy.py		patch \| blob \| blame \| history
yt_dlp/utils.py		patch \| blob \| blame \| history