Update to ytdl-commit-9f6c03

author pukkandan <redacted>

Sat, 17 Apr 2021 03:02:33 +0000 (08:32 +0530)

committer pukkandan <redacted>

Sat, 17 Apr 2021 03:10:31 +0000 (08:40 +0530)
author pukkandan <redacted>
Sat, 17 Apr 2021 03:02:33 +0000 (08:32 +0530)
committer pukkandan <redacted>
Sat, 17 Apr 2021 03:10:31 +0000 (08:40 +0530)
diff --git a/yt_dlp/extractor/cbsnews.py b/yt_dlp/extractor/cbsnews.py

index 345debcf0c1c48363092d24fcc5809d76dfbba5a..1285ed65e5bbea10d9b57cad861767543fa8fee4 100644 (file)
--- a/yt_dlp/extractor/cbsnews.py
+++ b/yt_dlp/extractor/cbsnews.py
@@ -26,7 +26,7 @@ class CBSNewsEmbedIE(CBSIE):
      def _real_extract(self, url):
          item = self._parse_json(zlib.decompress(compat_b64decode(
              compat_urllib_parse_unquote(self._match_id(url))),
-            -zlib.MAX_WBITS), None)['video']['items'][0]
+            -zlib.MAX_WBITS).decode('utf-8'), None)['video']['items'][0]
          return self._extract_video_info(item['mpxRefId'], 'cbsnews')
  
  
diff --git a/yt_dlp/extractor/lbry.py b/yt_dlp/extractor/lbry.py

index 865cda7618d1d910ef79ce17011e0f3700aa228a..cdfbefcd431e85ffc5b72151a666b55f704228e7 100644 (file)
--- a/yt_dlp/extractor/lbry.py
+++ b/yt_dlp/extractor/lbry.py
@@ -122,6 +122,26 @@ class LBRYIE(LBRYBaseIE):
              'channel_url': 'https://lbry.tv/@LBRYFoundation:0ed629d2b9c601300cacf7eabe9da0be79010212',
              'vcodec': 'none',
          }
+    }, {
+        # HLS
+        'url': 'https://odysee.com/@gardeningincanada:b/plants-i-will-never-grow-again.-the:e',
+        'md5': 'fc82f45ea54915b1495dd7cb5cc1289f',
+        'info_dict': {
+            'id': 'e51671357333fe22ae88aad320bde2f6f96b1410',
+            'ext': 'mp4',
+            'title': 'PLANTS I WILL NEVER GROW AGAIN. THE BLACK LIST PLANTS FOR A CANADIAN GARDEN | Gardening in Canada 🍁',
+            'description': 'md5:9c539c6a03fb843956de61a4d5288d5e',
+            'timestamp': 1618254123,
+            'upload_date': '20210412',
+            'release_timestamp': 1618254002,
+            'release_date': '20210412',
+            'tags': list,
+            'duration': 554,
+            'channel': 'Gardening In Canada',
+            'channel_id': 'b8be0e93b423dad221abe29545fbe8ec36e806bc',
+            'channel_url': 'https://odysee.com/@gardeningincanada:b8be0e93b423dad221abe29545fbe8ec36e806bc',
+            'formats': 'mincount:3',
+        }
      }, {
          'url': 'https://odysee.com/@BrodieRobertson:5/apple-is-tracking-everything-you-do-on:e',
          'only_matching': True,
@@ -168,10 +188,18 @@ def _real_extract(self, url):
          streaming_url = self._call_api_proxy(
              'get', claim_id, {'uri': uri}, 'streaming url')['streaming_url']
          info = self._parse_stream(result, url)
+        urlh = self._request_webpage(
+            streaming_url, display_id, note='Downloading streaming redirect url info')
+        if determine_ext(urlh.geturl()) == 'm3u8':
+            info['formats'] = self._extract_m3u8_formats(
+                urlh.geturl(), display_id, 'mp4', entry_protocol='m3u8_native',
+                m3u8_id='hls')
+            self._sort_formats(info['formats'])
+        else:
+            info['url'] = streaming_url
          info.update({
              'id': claim_id,
              'title': title,
-            'url': streaming_url,
          })
          return info
  
diff --git a/yt_dlp/extractor/pluralsight.py b/yt_dlp/extractor/pluralsight.py

index abd08bc285c3c107bd548ddf0ec1c86fe77ecd9a..2d63855df55bc4137644beaf688f384acddea333 100644 (file)
--- a/yt_dlp/extractor/pluralsight.py
+++ b/yt_dlp/extractor/pluralsight.py
@@ -393,7 +393,7 @@ def guess_allowed_qualities():
                  # To somewhat reduce the probability of these consequences
                  # we will sleep random amount of time before each call to ViewClip.
                  self._sleep(
-                    random.randint(2, 5), display_id,
+                    random.randint(5, 10), display_id,
                      '%(video_id)s: Waiting for %(timeout)s seconds to avoid throttling')
  
                  if not viewclip:
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py

index 4c9da101f5a09739296e41c620a86072b49cb75d..940f3def2f6a8cdd80eaa3d33d26931a9578ac86 100644 (file)
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -53,6 +53,10 @@
  )
  
  
+def parse_qs(url):
+    return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+
+
  class YoutubeBaseInfoExtractor(InfoExtractor):
      """Provide base functions for Youtube extractors"""
      _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
@@ -438,14 +442,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
          r'(?:(?:www|dev)\.)?invidio\.us',
          # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
          r'(?:www\.)?invidious\.pussthecat\.org',
-        r'(?:www\.)?invidious\.048596\.xyz',
          r'(?:www\.)?invidious\.zee\.li',
-        r'(?:www\.)?vid\.puffyan\.us',
          r'(?:(?:www|au)\.)?ytprivate\.com',
          r'(?:www\.)?invidious\.namazso\.eu',
          r'(?:www\.)?invidious\.ethibox\.fr',
-        r'(?:www\.)?inv\.skyn3t\.in',
-        r'(?:www\.)?invidious\.himiko\.cloud',
          r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
          r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
          r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
@@ -454,25 +454,32 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
          r'(?:(?:www|no)\.)?invidiou\.sh',
          r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
          r'(?:www\.)?invidious\.kabi\.tk',
-        r'(?:www\.)?invidious\.13ad\.de',
          r'(?:www\.)?invidious\.mastodon\.host',
          r'(?:www\.)?invidious\.zapashcanon\.fr',
          r'(?:www\.)?invidious\.kavin\.rocks',
+        r'(?:www\.)?invidious\.tinfoil-hat\.net',
+        r'(?:www\.)?invidious\.himiko\.cloud',
+        r'(?:www\.)?invidious\.reallyancient\.tech',
          r'(?:www\.)?invidious\.tube',
          r'(?:www\.)?invidiou\.site',
          r'(?:www\.)?invidious\.site',
          r'(?:www\.)?invidious\.xyz',
          r'(?:www\.)?invidious\.nixnet\.xyz',
+        r'(?:www\.)?invidious\.048596\.xyz',
          r'(?:www\.)?invidious\.drycat\.fr',
+        r'(?:www\.)?inv\.skyn3t\.in',
          r'(?:www\.)?tube\.poal\.co',
          r'(?:www\.)?tube\.connect\.cafe',
          r'(?:www\.)?vid\.wxzm\.sx',
          r'(?:www\.)?vid\.mint\.lgbt',
+        r'(?:www\.)?vid\.puffyan\.us',
          r'(?:www\.)?yewtu\.be',
          r'(?:www\.)?yt\.elukerio\.org',
          r'(?:www\.)?yt\.lelux\.fi',
          r'(?:www\.)?invidious\.ggc-project\.de',
          r'(?:www\.)?yt\.maisputain\.ovh',
+        r'(?:www\.)?ytprivate\.com',
+        r'(?:www\.)?invidious\.13ad\.de',
          r'(?:www\.)?invidious\.toot\.koeln',
          r'(?:www\.)?invidious\.fdn\.fr',
          r'(?:www\.)?watch\.nettohikari\.com',
@@ -515,16 +522,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                           |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
                           )
                       )?                                                       # all until now is optional -> you can pass the naked ID
-                     (?P<id>[0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
-                     (?!.*?\blist=
-                        (?:
-                            %(playlist_id)s|                                  # combined list/video URLs are handled by the playlist IE
-                            WL                                                # WL are handled by the watch later IE
-                        )
-                     )
+                     (?P<id>[0-9A-Za-z_-]{11})                                # here is it! the YouTube video ID
                       (?(1).+)?                                                # if we found the ID, everything can follow
                       $""" % {
-        'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
          'invidious': '|'.join(_INVIDIOUS_SITES),
      }
      _PLAYER_INFO_RE = (
@@ -1009,6 +1009,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
              },
              'skip': 'This video does not exist.',
          },
+        {
+            # Video with incomplete 'yt:stretch=16:'
+            'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
+            'only_matching': True,
+        },
          {
              # Video licensed under Creative Commons
              'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
@@ -1304,6 +1309,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
          },
      ]
  
+    @classmethod
+    def suitable(cls, url):
+        qs = parse_qs(url)
+        if qs.get('list', [None])[0]:
+            return False
+        return super(YoutubeIE, cls).suitable(url)
+
      def __init__(self, *args, **kwargs):
          super(YoutubeIE, self).__init__(*args, **kwargs)
          self._code_cache = {}
@@ -2079,15 +2091,16 @@ def feed_entry(name):
                  for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
          for keyword in keywords:
              if keyword.startswith('yt:stretch='):
-                stretch_ratio = map(
-                    lambda x: int_or_none(x, default=0),
-                    keyword.split('=')[1].split(':'))
-                w, h = (list(stretch_ratio) + [0])[:2]
-                if w > 0 and h > 0:
-                    ratio = w / h
-                    for f in formats:
-                        if f.get('vcodec') != 'none':
-                            f['stretched_ratio'] = ratio
+                mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
+                if mobj:
+                    # NB: float is intentional for forcing float division
+                    w, h = (float(v) for v in mobj.groups())
+                    if w > 0 and h > 0:
+                        ratio = w / h
+                        for f in formats:
+                            if f.get('vcodec') != 'none':
+                                f['stretched_ratio'] = ratio
+                        break
  
          thumbnails = []
          for container in (video_details, microformat):
@@ -2484,6 +2497,15 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
              'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
              'uploader': 'Игорь Клейнер',
          },
+    }, {
+        # playlists, series
+        'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
+        'playlist_mincount': 5,
+        'info_dict': {
+            'id': 'UCYO_jab_esuFRV4b17AJtAw',
+            'title': '3Blue1Brown - Playlists',
+            'description': 'md5:e1384e8a133307dd10edee76e875d62f',
+        },
      }, {
          # playlists, singlepage
          'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
@@ -2790,6 +2812,9 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
              'title': '#cctv9',
          },
          'playlist_mincount': 350,
+    }, {
+        'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
+        'only_matching': True,
      }]
  
      @classmethod
@@ -2813,14 +2838,16 @@ def _extract_channel_id(self, webpage):
      @staticmethod
      def _extract_basic_item_renderer(item):
          # Modified from _extract_grid_item_renderer
-        known_renderers = (
-            'playlistRenderer', 'videoRenderer', 'channelRenderer',
-            'gridPlaylistRenderer', 'gridVideoRenderer', 'gridChannelRenderer'
+        known_basic_renderers = (
+            'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
          )
          for key, renderer in item.items():
-            if key not in known_renderers:
+            if not isinstance(renderer, dict):
                  continue
-            return renderer
+            elif key in known_basic_renderers:
+                return renderer
+            elif key.startswith('grid') and key.endswith('Renderer'):
+                return renderer
  
      def _grid_entries(self, grid_renderer):
          for item in grid_renderer['items']:
@@ -2830,7 +2857,8 @@ def _grid_entries(self, grid_renderer):
              if not isinstance(renderer, dict):
                  continue
              title = try_get(
-                renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
+                renderer, (lambda x: x['title']['runs'][0]['text'],
+                           lambda x: x['title']['simpleText']), compat_str)
              # playlist
              playlist_id = renderer.get('playlistId')
              if playlist_id:
@@ -2838,10 +2866,12 @@ def _grid_entries(self, grid_renderer):
                      'https://www.youtube.com/playlist?list=%s' % playlist_id,
                      ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
                      video_title=title)
+                continue
              # video
              video_id = renderer.get('videoId')
              if video_id:
                  yield self._extract_video(renderer)
+                continue
              # channel
              channel_id = renderer.get('channelId')
              if channel_id:
@@ -2850,6 +2880,17 @@ def _grid_entries(self, grid_renderer):
                  yield self.url_result(
                      'https://www.youtube.com/channel/%s' % channel_id,
                      ie=YoutubeTabIE.ie_key(), video_title=title)
+                continue
+            # generic endpoint URL support
+            ep_url = urljoin('https://www.youtube.com/', try_get(
+                renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
+                compat_str))
+            if ep_url:
+                for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
+                    if ie.suitable(ep_url):
+                        yield self.url_result(
+                            ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
+                        break
  
      def _shelf_entries_from_content(self, shelf_renderer):
          content = shelf_renderer.get('content')
@@ -3444,7 +3485,7 @@ def _real_extract(self, url):
              url = '%s/videos%s' % (mobj.get('pre'), mobj.get('post') or '')
  
          # Handle both video/playlist URLs
-        qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+        qs = parse_qs(url)
          video_id = qs.get('v', [None])[0]
          playlist_id = qs.get('list', [None])[0]
  
@@ -3550,12 +3591,16 @@ class YoutubePlaylistIE(InfoExtractor):
  
      @classmethod
      def suitable(cls, url):
-        return False if YoutubeTabIE.suitable(url) else super(
-            YoutubePlaylistIE, cls).suitable(url)
+        if YoutubeTabIE.suitable(url):
+            return False
+        qs = parse_qs(url)
+        if qs.get('v', [None])[0]:
+            return False
+        return super(YoutubePlaylistIE, cls).suitable(url)
  
      def _real_extract(self, url):
          playlist_id = self._match_id(url)
-        qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+        qs = parse_qs(url)
          if not qs:
              qs = {'list': playlist_id}
          return self.url_result(
diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py

index 3ba2a1ec8dbcf8232277701c228325e698f7b0ac..3e566285f7c50f99f8ec7daf7dfe1323921d1930 100644 (file)
--- a/yt_dlp/utils.py
+++ b/yt_dlp/utils.py
@@ -40,6 +40,7 @@
  from .compat import (
      compat_HTMLParseError,
      compat_HTMLParser,
+    compat_HTTPError,
      compat_basestring,
      compat_chr,
      compat_cookiejar,
@@ -2925,12 +2926,60 @@ def http_response(self, request, response):
  
  
  class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
-    if sys.version_info[0] < 3:
-        def redirect_request(self, req, fp, code, msg, headers, newurl):
-            # On python 2 urlh.geturl() may sometimes return redirect URL
-            # as byte string instead of unicode. This workaround allows
-            # to force it always return unicode.
-            return compat_urllib_request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, compat_str(newurl))
+    """YoutubeDL redirect handler
+
+    The code is based on HTTPRedirectHandler implementation from CPython [1].
+
+    This redirect handler solves two issues:
+     - ensures redirect URL is always unicode under python 2
+     - introduces support for experimental HTTP response status code
+       308 Permanent Redirect [2] used by some sites [3]
+
+    1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
+    2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
+    3. https://github.com/ytdl-org/youtube-dl/issues/28768
+    """
+
+    http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
+
+    def redirect_request(self, req, fp, code, msg, headers, newurl):
+        """Return a Request or None in response to a redirect.
+
+        This is called by the http_error_30x methods when a
+        redirection response is received.  If a redirection should
+        take place, return a new Request to allow http_error_30x to
+        perform the redirect.  Otherwise, raise HTTPError if no-one
+        else should try to handle this url.  Return None if you can't
+        but another Handler might.
+        """
+        m = req.get_method()
+        if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
+                 or code in (301, 302, 303) and m == "POST")):
+            raise compat_HTTPError(req.full_url, code, msg, headers, fp)
+        # Strictly (according to RFC 2616), 301 or 302 in response to
+        # a POST MUST NOT cause a redirection without confirmation
+        # from the user (of urllib.request, in this case).  In practice,
+        # essentially all clients do redirect in this case, so we do
+        # the same.
+
+        # On python 2 urlh.geturl() may sometimes return redirect URL
+        # as byte string instead of unicode. This workaround allows
+        # to force it always return unicode.
+        if sys.version_info[0] < 3:
+            newurl = compat_str(newurl)
+
+        # Be conciliant with URIs containing a space.  This is mainly
+        # redundant with the more complete encoding done in http_error_302(),
+        # but it is kept for compatibility with other callers.
+        newurl = newurl.replace(' ', '%20')
+
+        CONTENT_HEADERS = ("content-length", "content-type")
+        # NB: don't use dict comprehension for python 2.6 compatibility
+        newheaders = dict((k, v) for k, v in req.headers.items()
+                          if k.lower() not in CONTENT_HEADERS)
+        return compat_urllib_request.Request(
+            newurl, headers=newheaders, origin_req_host=req.origin_req_host,
+            unverifiable=True)
  
  
  def extract_timezone(date_str):
author	pukkandan <redacted>
	Sat, 17 Apr 2021 03:02:33 +0000 (08:32 +0530)
committer	pukkandan <redacted>
	Sat, 17 Apr 2021 03:10:31 +0000 (08:40 +0530)
yt_dlp/extractor/cbsnews.py		patch \| blob \| blame \| history
yt_dlp/extractor/lbry.py		patch \| blob \| blame \| history
yt_dlp/extractor/pluralsight.py		patch \| blob \| blame \| history
yt_dlp/extractor/youtube.py		patch \| blob \| blame \| history
yt_dlp/utils.py		patch \| blob \| blame \| history