Update to ytdl-commit-a726009

author pukkandan <redacted>

Thu, 6 May 2021 16:01:20 +0000 (21:31 +0530)

committer pukkandan <redacted>

Thu, 6 May 2021 16:01:20 +0000 (21:31 +0530)
author pukkandan <redacted>
Thu, 6 May 2021 16:01:20 +0000 (21:31 +0530)
committer pukkandan <redacted>
Thu, 6 May 2021 16:01:20 +0000 (21:31 +0530)
diff --git a/.github/workflows/core.yml b/.github/workflows/core.yml

index d7c35d76355ee058a8fdf75c2ca0c72e61929672..ea3d5ca412c757e51f41ad480d3123b8525200fc 100644 (file)
--- a/.github/workflows/core.yml
+++ b/.github/workflows/core.yml
@@ -41,11 +41,18 @@ jobs:
      - name: Install Jython
        if: ${{ matrix.python-impl == 'jython' }}
        run: |
-        wget http://search.maven.org/remotecontent?filepath=org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar -O jython-installer.jar
+        wget https://repo1.maven.org/maven2/org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar -O jython-installer.jar
          java -jar jython-installer.jar -s -d "$HOME/jython"
          echo "$HOME/jython/bin" >> $GITHUB_PATH
      - name: Install nose
+      if: ${{ matrix.python-impl != 'jython' }}
        run: pip install nose
+    - name: Install nose (Jython)
+      if: ${{ matrix.python-impl == 'jython' }}
+      # Working around deprecation of support for non-SNI clients at PyPI CDN (see https://status.python.org/incidents/hzmjhqsdjqgb)
+      run: |
+        wget https://files.pythonhosted.org/packages/99/4f/13fb671119e65c4dce97c60e67d3fd9e6f7f809f2b307e2611f4701205cb/nose-1.3.7-py2-none-any.whl
+        pip install nose-1.3.7-py2-none-any.whl
      - name: Run tests
        continue-on-error: ${{ matrix.ytdl-test-set == 'download' || matrix.python-impl == 'jython' }}
        env:
diff --git a/.github/workflows/download.yml b/.github/workflows/download.yml

index d0edc80d61a7f9e49f942fdc999eb088d05a1d26..6c8ddb25faeca63861991f731c2374ff1edc1e8b 100644 (file)
--- a/.github/workflows/download.yml
+++ b/.github/workflows/download.yml
@@ -41,11 +41,18 @@ jobs:
      - name: Install Jython
        if: ${{ matrix.python-impl == 'jython' }}
        run: |
-        wget http://search.maven.org/remotecontent?filepath=org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar -O jython-installer.jar
+        wget https://repo1.maven.org/maven2/org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar -O jython-installer.jar
          java -jar jython-installer.jar -s -d "$HOME/jython"
          echo "$HOME/jython/bin" >> $GITHUB_PATH
      - name: Install nose
+      if: ${{ matrix.python-impl != 'jython' }}
        run: pip install nose
+    - name: Install nose (Jython)
+      if: ${{ matrix.python-impl == 'jython' }}
+      # Working around deprecation of support for non-SNI clients at PyPI CDN (see https://status.python.org/incidents/hzmjhqsdjqgb)
+      run: |
+        wget https://files.pythonhosted.org/packages/99/4f/13fb671119e65c4dce97c60e67d3fd9e6f7f809f2b307e2611f4701205cb/nose-1.3.7-py2-none-any.whl
+        pip install nose-1.3.7-py2-none-any.whl
      - name: Run tests
        continue-on-error: ${{ matrix.ytdl-test-set == 'download' || matrix.python-impl == 'jython' }}
        env:
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py

index 33fcc73228e1dcac90237a5d872a1d82fda69263..e5079a8590a37cb32d4c50a491db2e5b45f1477b 100644 (file)
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -1056,11 +1056,20 @@ def add_extra_info(info_dict, extra_info):
  
      def extract_info(self, url, download=True, ie_key=None, extra_info={},
                       process=True, force_generic_extractor=False):
-        '''
-        Returns a list with a dictionary for each video we find.
-        If 'download', also downloads the videos.
-        extra_info is a dict containing the extra values to add to each result
-        '''
+        """
+        Return a list with a dictionary for each video extracted.
+
+        Arguments:
+        url -- URL to extract
+
+        Keyword arguments:
+        download -- whether to download videos during extraction
+        ie_key -- extractor key hint
+        extra_info -- dictionary containing the extra values to add to each result
+        process -- whether to resolve all unresolved references (URLs, playlist items),
+            must be True for download to work.
+        force_generic_extractor -- force using the generic extractor
+        """
  
          if not ie_key and force_generic_extractor:
              ie_key = 'Generic'
diff --git a/yt_dlp/extractor/cda.py b/yt_dlp/extractor/cda.py

index 1b4362144a180da10ba52c06c856a498802f7527..e1b391937169208331233bbda6a75d9427a4228f 100644 (file)
--- a/yt_dlp/extractor/cda.py
+++ b/yt_dlp/extractor/cda.py
@@ -133,6 +133,8 @@ def _real_extract(self, url):
              'age_limit': 18 if need_confirm_age else 0,
          }
  
+        info = self._search_json_ld(webpage, video_id, default={})
+
          # Source: https://www.cda.pl/js/player.js?t=1606154898
          def decrypt_file(a):
              for p in ('_XDDD', '_CDA', '_ADC', '_CXD', '_QWE', '_Q5', '_IKSDE'):
@@ -197,7 +199,7 @@ def extract_format(page, version):
                  handler = self._download_webpage
  
              webpage = handler(
-                self._BASE_URL + href, video_id,
+                urljoin(self._BASE_URL, href), video_id,
                  'Downloading %s version information' % resolution, fatal=False)
              if not webpage:
                  # Manually report warning because empty page is returned when
@@ -209,6 +211,4 @@ def extract_format(page, version):
  
          self._sort_formats(formats)
  
-        info = self._search_json_ld(webpage, video_id, default={})
-
          return merge_dicts(info_dict, info)
diff --git a/yt_dlp/extractor/dispeak.py b/yt_dlp/extractor/dispeak.py

index b1c02ca2bb2922ff260c87bcd881d52b5dff487b..be7ad1202bde2ef4df97b4d667458025c5c1679b 100644 (file)
--- a/yt_dlp/extractor/dispeak.py
+++ b/yt_dlp/extractor/dispeak.py
@@ -32,6 +32,18 @@ class DigitallySpeakingIE(InfoExtractor):
          # From http://www.gdcvault.com/play/1013700/Advanced-Material
          'url': 'http://sevt.dispeak.com/ubm/gdc/eur10/xml/11256_1282118587281VNIT.xml',
          'only_matching': True,
+    }, {
+        # From https://gdcvault.com/play/1016624, empty speakerVideo
+        'url': 'https://sevt.dispeak.com/ubm/gdc/online12/xml/201210-822101_1349794556671DDDD.xml',
+        'info_dict': {
+            'id': '201210-822101_1349794556671DDDD',
+            'ext': 'flv',
+            'title': 'Pre-launch - Preparing to Take the Plunge',
+        },
+    }, {
+        # From http://www.gdcvault.com/play/1014846/Conference-Keynote-Shigeru, empty slideVideo
+        'url': 'http://events.digitallyspeaking.com/gdc/project25/xml/p25-miyamoto1999_1282467389849HSVB.xml',
+        'only_matching': True,
      }]
  
      def _parse_mp4(self, metadata):
@@ -85,25 +97,19 @@ def _parse_flv(self, metadata):
                  'quality': 1,
                  'format_id': audio.get('code'),
              })
-        slide_video_path = xpath_text(metadata, './slideVideo', fatal=True)
-        formats.append({
-            'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
-            'play_path': remove_end(slide_video_path, '.flv'),
-            'ext': 'flv',
-            'format_note': 'slide deck video',
-            'quality': -2,
-            'format_id': 'slides',
-            'acodec': 'none',
-        })
-        speaker_video_path = xpath_text(metadata, './speakerVideo', fatal=True)
-        formats.append({
-            'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
-            'play_path': remove_end(speaker_video_path, '.flv'),
-            'ext': 'flv',
-            'format_note': 'speaker video',
-            'quality': -1,
-            'format_id': 'speaker',
-        })
+        for video_key, format_id, preference in (
+                ('slide', 'slides', -2), ('speaker', 'speaker', -1)):
+            video_path = xpath_text(metadata, './%sVideo' % video_key)
+            if not video_path:
+                continue
+            formats.append({
+                'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
+                'play_path': remove_end(video_path, '.flv'),
+                'ext': 'flv',
+                'format_note': '%s video' % video_key,
+                'quality': preference,
+                'format_id': format_id,
+            })
          return formats
  
      def _real_extract(self, url):
diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py

index 79f9c74a32b77b0170aa04914e86b2565e9946a4..b835ca72ce126ddceaf8c5f97811dd5fe6e1b925 100644 (file)
--- a/yt_dlp/extractor/extractors.py
+++ b/yt_dlp/extractor/extractors.py
@@ -151,7 +151,6 @@
      BleacherReportIE,
      BleacherReportCMSIE,
  )
-from .blinkx import BlinkxIE
  from .bloomberg import BloombergIE
  from .bokecc import BokeCCIE
  from .bongacams import BongaCamsIE
diff --git a/yt_dlp/extractor/francetv.py b/yt_dlp/extractor/francetv.py

index e57e165fc94e7d375d9d5fc7039bf36a3664ad88..ee8a22f9df5c12342951b55318d51dea3cc84e7a 100644 (file)
--- a/yt_dlp/extractor/francetv.py
+++ b/yt_dlp/extractor/francetv.py
@@ -402,6 +402,10 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
      }, {
          'url': 'http://france3-regions.francetvinfo.fr/limousin/emissions/jt-1213-limousin',
          'only_matching': True,
+    }, {
+        # "<figure id=" pattern (#28792)
+        'url': 'https://www.francetvinfo.fr/culture/patrimoine/incendie-de-notre-dame-de-paris/notre-dame-de-paris-de-l-incendie-de-la-cathedrale-a-sa-reconstruction_4372291.html',
+        'only_matching': True,
      }]
  
      def _real_extract(self, url):
@@ -419,8 +423,7 @@ def _real_extract(self, url):
              (r'player\.load[^;]+src:\s*["\']([^"\']+)',
               r'id-video=([^@]+@[^"]+)',
               r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"',
-             r'data-id=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
-             r'<figure[^>]+id=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'),
+             r'(?:data-id|<figure[^<]+\bid)=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'),
              webpage, 'video id')
  
          return self._make_url_result(video_id)
diff --git a/yt_dlp/extractor/funimation.py b/yt_dlp/extractor/funimation.py

index 8bbedca269233b2ba4bdd02febf7d8e63007feb4..d8f1e169af25fb09d3dec558c6cae78a3ac84b42 100644 (file)
--- a/yt_dlp/extractor/funimation.py
+++ b/yt_dlp/extractor/funimation.py
@@ -16,7 +16,7 @@
  
  
  class FunimationIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?funimation(?:\.com|now\.uk)/shows/[^/]+/(?P<id>[^/?#&]+)'
+    _VALID_URL = r'https?://(?:www\.)?funimation(?:\.com|now\.uk)/(?:[^/]+/)?shows/[^/]+/(?P<id>[^/?#&]+)'
  
      _NETRC_MACHINE = 'funimation'
      _TOKEN = None
@@ -51,6 +51,10 @@ class FunimationIE(InfoExtractor):
      }, {
          'url': 'https://www.funimationnow.uk/shows/puzzle-dragons-x/drop-impact/simulcast/',
          'only_matching': True,
+    }, {
+        # with lang code
+        'url': 'https://www.funimation.com/en/shows/hacksign/role-play/',
+        'only_matching': True,
      }]
  
      def _login(self):
diff --git a/yt_dlp/extractor/gdcvault.py b/yt_dlp/extractor/gdcvault.py

index a248a170db5959844f127d724915ef588a7fda39..acc6478b86930f2ebd417d1f053003a8957e3f16 100644 (file)
--- a/yt_dlp/extractor/gdcvault.py
+++ b/yt_dlp/extractor/gdcvault.py
@@ -5,7 +5,10 @@
  from .common import InfoExtractor
  from .kaltura import KalturaIE
  from ..utils import (
+    HEADRequest,
+    remove_start,
      sanitized_Request,
+    smuggle_url,
      urlencode_postdata,
  )
  
@@ -100,6 +103,26 @@ class GDCVaultIE(InfoExtractor):
                  'format': 'mp4-408',
              },
          },
+        {
+            # Kaltura embed, whitespace between quote and embedded URL in iframe's src
+            'url': 'https://www.gdcvault.com/play/1025699',
+            'info_dict': {
+                'id': '0_zagynv0a',
+                'ext': 'mp4',
+                'title': 'Tech Toolbox',
+                'upload_date': '20190408',
+                'uploader_id': 'joe@blazestreaming.com',
+                'timestamp': 1554764629,
+            },
+            'params': {
+                'skip_download': True,
+            },
+        },
+        {
+            # HTML5 video
+            'url': 'http://www.gdcvault.com/play/1014846/Conference-Keynote-Shigeru',
+            'only_matching': True,
+        },
      ]
  
      def _login(self, webpage_url, display_id):
@@ -120,38 +143,78 @@ def _login(self, webpage_url, display_id):
          request = sanitized_Request(login_url, urlencode_postdata(login_form))
          request.add_header('Content-Type', 'application/x-www-form-urlencoded')
          self._download_webpage(request, display_id, 'Logging in')
-        webpage = self._download_webpage(webpage_url, display_id, 'Getting authenticated video page')
+        start_page = self._download_webpage(webpage_url, display_id, 'Getting authenticated video page')
          self._download_webpage(logout_url, display_id, 'Logging out')
  
-        return webpage
+        return start_page
  
      def _real_extract(self, url):
          video_id, name = re.match(self._VALID_URL, url).groups()
          display_id = name or video_id
  
-        webpage = self._download_webpage(url, display_id)
-
-        title = self._html_search_regex(
-            r'<td><strong>Session Name:?</strong></td>\s*<td>(.*?)</td>',
-            webpage, 'title')
-
-        PLAYER_REGEX = r'<iframe src=\"(?P<manifest_url>.*?)\".*?</iframe>'
-        manifest_url = self._html_search_regex(
-            PLAYER_REGEX, webpage, 'manifest_url')
-
-        partner_id = self._search_regex(
-            r'/p(?:artner_id)?/(\d+)', manifest_url, 'partner id',
-            default='1670711')
+        webpage_url = 'http://www.gdcvault.com/play/' + video_id
+        start_page = self._download_webpage(webpage_url, display_id)
+
+        direct_url = self._search_regex(
+            r's1\.addVariable\("file",\s*encodeURIComponent\("(/[^"]+)"\)\);',
+            start_page, 'url', default=None)
+        if direct_url:
+            title = self._html_search_regex(
+                r'<td><strong>Session Name:?</strong></td>\s*<td>(.*?)</td>',
+                start_page, 'title')
+            video_url = 'http://www.gdcvault.com' + direct_url
+            # resolve the url so that we can detect the correct extension
+            video_url = self._request_webpage(
+                HEADRequest(video_url), video_id).geturl()
+
+            return {
+                'id': video_id,
+                'display_id': display_id,
+                'url': video_url,
+                'title': title,
+            }
  
-        kaltura_id = self._search_regex(
-            r'entry_id=(?P<id>(?:[^&])+)', manifest_url,
-            'kaltura id', group='id')
+        embed_url = KalturaIE._extract_url(start_page)
+        if embed_url:
+            embed_url = smuggle_url(embed_url, {'source_url': url})
+            ie_key = 'Kaltura'
+        else:
+            PLAYER_REGEX = r'<iframe src="(?P<xml_root>.+?)/(?:gdc-)?player.*?\.html.*?".*?</iframe>'
+
+            xml_root = self._html_search_regex(
+                PLAYER_REGEX, start_page, 'xml root', default=None)
+            if xml_root is None:
+                # Probably need to authenticate
+                login_res = self._login(webpage_url, display_id)
+                if login_res is None:
+                    self.report_warning('Could not login.')
+                else:
+                    start_page = login_res
+                    # Grab the url from the authenticated page
+                    xml_root = self._html_search_regex(
+                        PLAYER_REGEX, start_page, 'xml root')
+
+            xml_name = self._html_search_regex(
+                r'<iframe src=".*?\?xml(?:=|URL=xml/)(.+?\.xml).*?".*?</iframe>',
+                start_page, 'xml filename', default=None)
+            if not xml_name:
+                info = self._parse_html5_media_entries(url, start_page, video_id)[0]
+                info.update({
+                    'title': remove_start(self._search_regex(
+                        r'>Session Name:\s*<.*?>\s*<td>(.+?)</td>', start_page,
+                        'title', default=None) or self._og_search_title(
+                        start_page, default=None), 'GDC Vault - '),
+                    'id': video_id,
+                    'display_id': display_id,
+                })
+                return info
+            embed_url = '%s/xml/%s' % (xml_root, xml_name)
+            ie_key = 'DigitallySpeaking'
  
          return {
              '_type': 'url_transparent',
-            'url': 'kaltura:%s:%s' % (partner_id, kaltura_id),
-            'ie_key': KalturaIE.ie_key(),
              'id': video_id,
              'display_id': display_id,
-            'title': title,
+            'url': embed_url,
+            'ie_key': ie_key,
          }
diff --git a/yt_dlp/extractor/kaltura.py b/yt_dlp/extractor/kaltura.py

index c8097249eeeeb094e43f30a04b09e33f13815f53..f109160819124d930a76f742ba58cd0bd7ca7ccf 100644 (file)
--- a/yt_dlp/extractor/kaltura.py
+++ b/yt_dlp/extractor/kaltura.py
@@ -120,7 +120,7 @@ def _extract_url(webpage):
      def _extract_urls(webpage):
          # Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site
          finditer = (
-            re.finditer(
+            list(re.finditer(
                  r"""(?xs)
                      kWidget\.(?:thumb)?[Ee]mbed\(
                      \{.*?
@@ -128,8 +128,8 @@ def _extract_urls(webpage):
                          (?P<q2>['"])_?(?P<partner_id>(?:(?!(?P=q2)).)+)(?P=q2),.*?
                          (?P<q3>['"])entry_?[Ii]d(?P=q3)\s*:\s*
                          (?P<q4>['"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\})
-                """, webpage)
-            or re.finditer(
+                """, webpage))
+            or list(re.finditer(
                  r'''(?xs)
                      (?P<q1>["'])
                          (?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
@@ -142,16 +142,16 @@ def _extract_urls(webpage):
                          \[\s*(?P<q2_1>["'])entry_?[Ii]d(?P=q2_1)\s*\]\s*=\s*
                      )
                      (?P<q3>["'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3)
-                ''', webpage)
-            or re.finditer(
+                ''', webpage))
+            or list(re.finditer(
                  r'''(?xs)
-                    <(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["'])
+                    <(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["'])\s*
                        (?:https?:)?//(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)
                        (?:(?!(?P=q1)).)*
                        [?&;]entry_id=(?P<id>(?:(?!(?P=q1))[^&])+)
                        (?:(?!(?P=q1)).)*
                      (?P=q1)
-                ''', webpage)
+                ''', webpage))
          )
          urls = []
          for mobj in finditer:
diff --git a/yt_dlp/extractor/medaltv.py b/yt_dlp/extractor/medaltv.py

index 4bca6f053c981f702f4708815134af955cea9305..2ece5aac4afd83ff2f96f06042be11c7a56ac2d4 100644 (file)
--- a/yt_dlp/extractor/medaltv.py
+++ b/yt_dlp/extractor/medaltv.py
@@ -15,33 +15,39 @@
  
  
  class MedalTVIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?medal\.tv/clips/(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://(?:www\.)?medal\.tv/clips/(?P<id>[^/?#&]+)'
      _TESTS = [{
-        'url': 'https://medal.tv/clips/34934644/3Is9zyGMoBMr',
+        'url': 'https://medal.tv/clips/2mA60jWAGQCBH',
          'md5': '7b07b064331b1cf9e8e5c52a06ae68fa',
          'info_dict': {
-            'id': '34934644',
+            'id': '2mA60jWAGQCBH',
              'ext': 'mp4',
              'title': 'Quad Cold',
              'description': 'Medal,https://medal.tv/desktop/',
              'uploader': 'MowgliSB',
              'timestamp': 1603165266,
              'upload_date': '20201020',
-            'uploader_id': 10619174,
+            'uploader_id': '10619174',
          }
      }, {
-        'url': 'https://medal.tv/clips/36787208',
+        'url': 'https://medal.tv/clips/2um24TWdty0NA',
          'md5': 'b6dc76b78195fff0b4f8bf4a33ec2148',
          'info_dict': {
-            'id': '36787208',
+            'id': '2um24TWdty0NA',
              'ext': 'mp4',
              'title': 'u tk me i tk u bigger',
              'description': 'Medal,https://medal.tv/desktop/',
              'uploader': 'Mimicc',
              'timestamp': 1605580939,
              'upload_date': '20201117',
-            'uploader_id': 5156321,
+            'uploader_id': '5156321',
          }
+    }, {
+        'url': 'https://medal.tv/clips/37rMeFpryCC-9',
+        'only_matching': True,
+    }, {
+        'url': 'https://medal.tv/clips/2WRj40tpY_EU9',
+        'only_matching': True,
      }]
  
      def _real_extract(self, url):
diff --git a/yt_dlp/extractor/svt.py b/yt_dlp/extractor/svt.py

index 5b377ea838add3a5bc046b58590906eb74995455..4b6284a8d77df8d0b0f49a20370331cb8402c5ee 100644 (file)
--- a/yt_dlp/extractor/svt.py
+++ b/yt_dlp/extractor/svt.py
@@ -146,7 +146,7 @@ class SVTPlayIE(SVTPlayBaseIE):
                          )
                          (?P<svt_id>[^/?#&]+)|
                          https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp|kanaler)/(?P<id>[^/?#&]+)
-                        (?:.*?modalId=(?P<modal_id>[\da-zA-Z-]+))?
+                        (?:.*?(?:modalId|id)=(?P<modal_id>[\da-zA-Z-]+))?
                      )
                      '''
      _TESTS = [{
@@ -177,6 +177,9 @@ class SVTPlayIE(SVTPlayBaseIE):
      }, {
          'url': 'https://www.svtplay.se/video/30479064/husdrommar/husdrommar-sasong-8-designdrommar-i-stenungsund?modalId=8zVbDPA',
          'only_matching': True,
+    }, {
+        'url': 'https://www.svtplay.se/video/30684086/rapport/rapport-24-apr-18-00-7?id=e72gVpa',
+        'only_matching': True,
      }, {
          # geo restricted to Sweden
          'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten',
@@ -259,7 +262,7 @@ def _real_extract(self, url):
          if not svt_id:
              svt_id = self._search_regex(
                  (r'<video[^>]+data-video-id=["\']([\da-zA-Z-]+)',
-                 r'<[^>]+\bdata-rt=["\']top-area-play-button["\'][^>]+\bhref=["\'][^"\']*video/%s/[^"\']*\bmodalId=([\da-zA-Z-]+)' % re.escape(video_id),
+                 r'<[^>]+\bdata-rt=["\']top-area-play-button["\'][^>]+\bhref=["\'][^"\']*video/%s/[^"\']*\b(?:modalId|id)=([\da-zA-Z-]+)' % re.escape(video_id),
                   r'["\']videoSvtId["\']\s*:\s*["\']([\da-zA-Z-]+)',
                   r'["\']videoSvtId\\?["\']\s*:\s*\\?["\']([\da-zA-Z-]+)',
                   r'"content"\s*:\s*{.*?"id"\s*:\s*"([\da-zA-Z-]+)"',
diff --git a/yt_dlp/extractor/tv2dk.py b/yt_dlp/extractor/tv2dk.py

index 8bda9348d723073b894d2d77b6556b51d89dad80..8bd5fd6401fa801d836a59ddfaec035cc7425ef1 100644 (file)
--- a/yt_dlp/extractor/tv2dk.py
+++ b/yt_dlp/extractor/tv2dk.py
@@ -74,6 +74,12 @@ def _real_extract(self, url):
          webpage = self._download_webpage(url, video_id)
  
          entries = []
+
+        def add_entry(partner_id, kaltura_id):
+            entries.append(self.url_result(
+                'kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura',
+                video_id=kaltura_id))
+
          for video_el in re.findall(r'(?s)<[^>]+\bdata-entryid\s*=[^>]*>', webpage):
              video = extract_attributes(video_el)
              kaltura_id = video.get('data-entryid')
@@ -82,9 +88,14 @@ def _real_extract(self, url):
              partner_id = video.get('data-partnerid')
              if not partner_id:
                  continue
-            entries.append(self.url_result(
-                'kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura',
-                video_id=kaltura_id))
+            add_entry(partner_id, kaltura_id)
+        if not entries:
+            kaltura_id = self._search_regex(
+                r'entry_id\s*:\s*["\']([0-9a-z_]+)', webpage, 'kaltura id')
+            partner_id = self._search_regex(
+                (r'\\u002Fp\\u002F(\d+)\\u002F', r'/p/(\d+)/'), webpage,
+                'partner id')
+            add_entry(partner_id, kaltura_id)
          return self.playlist_result(entries)
  
  
diff --git a/yt_dlp/extractor/tver.py b/yt_dlp/extractor/tver.py

index a54f49319a41ece1864c132dce53683102b626ea..a4a30b1e63e7fd56fad8ef5c15347797924ee24c 100644 (file)
--- a/yt_dlp/extractor/tver.py
+++ b/yt_dlp/extractor/tver.py
@@ -9,7 +9,6 @@
      int_or_none,
      remove_start,
      smuggle_url,
-    strip_or_none,
      try_get,
  )
  
@@ -45,32 +44,18 @@ def _real_extract(self, url):
              query={'token': self._TOKEN})['main']
          p_id = main['publisher_id']
          service = remove_start(main['service'], 'ts_')
-        info = {
+
+        r_id = main['reference_id']
+        if service not in ('tx', 'russia2018', 'sebare2018live', 'gorin'):
+            r_id = 'ref:' + r_id
+        bc_url = smuggle_url(
+            self.BRIGHTCOVE_URL_TEMPLATE % (p_id, r_id),
+            {'geo_countries': ['JP']})
+
+        return {
              '_type': 'url_transparent',
              'description': try_get(main, lambda x: x['note'][0]['text'], compat_str),
              'episode_number': int_or_none(try_get(main, lambda x: x['ext']['episode_number'])),
+            'url': bc_url,
+            'ie_key': 'BrightcoveNew',
          }
-
-        if service == 'cx':
-            title = main['title']
-            subtitle = strip_or_none(main.get('subtitle'))
-            if subtitle:
-                title += ' - ' + subtitle
-            info.update({
-                'title': title,
-                'url': 'https://i.fod.fujitv.co.jp/plus7/web/%s/%s.html' % (p_id[:4], p_id),
-                'ie_key': 'FujiTVFODPlus7',
-            })
-        else:
-            r_id = main['reference_id']
-            if service not in ('tx', 'russia2018', 'sebare2018live', 'gorin'):
-                r_id = 'ref:' + r_id
-            bc_url = smuggle_url(
-                self.BRIGHTCOVE_URL_TEMPLATE % (p_id, r_id),
-                {'geo_countries': ['JP']})
-            info.update({
-                'url': bc_url,
-                'ie_key': 'BrightcoveNew',
-            })
-
-        return info
diff --git a/yt_dlp/extractor/twitter.py b/yt_dlp/extractor/twitter.py

index 63c11bd4795529cb5402cba1994db6790e22ea2c..ae79ec6e003013fafeadfc99bacd9d19f0eb0782 100644 (file)
--- a/yt_dlp/extractor/twitter.py
+++ b/yt_dlp/extractor/twitter.py
@@ -19,6 +19,7 @@
      strip_or_none,
      unified_timestamp,
      update_url_query,
+    url_or_none,
      xpath_text,
  )
  
@@ -52,6 +53,9 @@ def _extract_variant_formats(self, variant, video_id):
              return [f], {}
  
      def _extract_formats_from_vmap_url(self, vmap_url, video_id):
+        vmap_url = url_or_none(vmap_url)
+        if not vmap_url:
+            return []
          vmap_data = self._download_xml(vmap_url, video_id)
          formats = []
          subtitles = {}
diff --git a/yt_dlp/extractor/xfileshare.py b/yt_dlp/extractor/xfileshare.py

index cbd5d1cbbb331cbd14777e70950b9c3c2a41175c..df9efa9faed512c0a62a3c0762fd935bc91f2dab 100644 (file)
--- a/yt_dlp/extractor/xfileshare.py
+++ b/yt_dlp/extractor/xfileshare.py
@@ -58,6 +58,7 @@ class XFileShareIE(InfoExtractor):
          (r'vidlocker\.xyz', 'VidLocker'),
          (r'vidshare\.tv', 'VidShare'),
          (r'vup\.to', 'VUp'),
+        (r'wolfstream\.tv', 'WolfStream'),
          (r'xvideosharing\.com', 'XVideoSharing'),
      )
  
@@ -82,6 +83,9 @@ class XFileShareIE(InfoExtractor):
      }, {
          'url': 'https://aparat.cam/n4d6dh0wvlpr',
          'only_matching': True,
+    }, {
+        'url': 'https://wolfstream.tv/nthme29v9u2x',
+        'only_matching': True,
      }]
  
      @staticmethod
diff --git a/yt_dlp/extractor/xtube.py b/yt_dlp/extractor/xtube.py

index 98d2adb995b933f964bb619df9897b73d46ce21d..682e45bef3f820756ae6d53f443c9ccd7bd09a5f 100644 (file)
--- a/yt_dlp/extractor/xtube.py
+++ b/yt_dlp/extractor/xtube.py
@@ -11,6 +11,7 @@
      parse_duration,
      sanitized_Request,
      str_to_int,
+    url_or_none,
  )
  
  
@@ -71,10 +72,10 @@ def _real_extract(self, url):
                  'Cookie': 'age_verified=1; cookiesAccepted=1',
              })
  
-        title, thumbnail, duration = [None] * 3
+        title, thumbnail, duration, sources, media_definition = [None] * 5
  
          config = self._parse_json(self._search_regex(
-            r'playerConf\s*=\s*({.+?})\s*,\s*(?:\n|loaderConf)', webpage, 'config',
+            r'playerConf\s*=\s*({.+?})\s*,\s*(?:\n|loaderConf|playerWrapper)', webpage, 'config',
              default='{}'), video_id, transform_source=js_to_json, fatal=False)
          if config:
              config = config.get('mainRoll')
@@ -83,20 +84,52 @@ def _real_extract(self, url):
                  thumbnail = config.get('poster')
                  duration = int_or_none(config.get('duration'))
                  sources = config.get('sources') or config.get('format')
+                media_definition = config.get('mediaDefinition')
  
-        if not isinstance(sources, dict):
+        if not isinstance(sources, dict) and not media_definition:
              sources = self._parse_json(self._search_regex(
                  r'(["\'])?sources\1?\s*:\s*(?P<sources>{.+?}),',
                  webpage, 'sources', group='sources'), video_id,
                  transform_source=js_to_json)
  
          formats = []
-        for format_id, format_url in sources.items():
-            formats.append({
-                'url': format_url,
-                'format_id': format_id,
-                'height': int_or_none(format_id),
-            })
+        format_urls = set()
+
+        if isinstance(sources, dict):
+            for format_id, format_url in sources.items():
+                format_url = url_or_none(format_url)
+                if not format_url:
+                    continue
+                if format_url in format_urls:
+                    continue
+                format_urls.add(format_url)
+                formats.append({
+                    'url': format_url,
+                    'format_id': format_id,
+                    'height': int_or_none(format_id),
+                })
+
+        if isinstance(media_definition, list):
+            for media in media_definition:
+                video_url = url_or_none(media.get('videoUrl'))
+                if not video_url:
+                    continue
+                if video_url in format_urls:
+                    continue
+                format_urls.add(video_url)
+                format_id = media.get('format')
+                if format_id == 'hls':
+                    formats.extend(self._extract_m3u8_formats(
+                        video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+                        m3u8_id='hls', fatal=False))
+                elif format_id == 'mp4':
+                    height = int_or_none(media.get('quality'))
+                    formats.append({
+                        'url': video_url,
+                        'format_id': '%s-%d' % (format_id, height) if height else format_id,
+                        'height': height,
+                    })
+
          self._remove_duplicate_formats(formats)
          self._sort_formats(formats)
author	pukkandan <redacted>
	Thu, 6 May 2021 16:01:20 +0000 (21:31 +0530)
committer	pukkandan <redacted>
	Thu, 6 May 2021 16:01:20 +0000 (21:31 +0530)
.github/workflows/core.yml		patch \| blob \| blame \| history
.github/workflows/download.yml		patch \| blob \| blame \| history
yt_dlp/YoutubeDL.py		patch \| blob \| blame \| history
yt_dlp/extractor/cda.py		patch \| blob \| blame \| history
yt_dlp/extractor/dispeak.py		patch \| blob \| blame \| history
yt_dlp/extractor/extractors.py		patch \| blob \| blame \| history
yt_dlp/extractor/francetv.py		patch \| blob \| blame \| history
yt_dlp/extractor/funimation.py		patch \| blob \| blame \| history
yt_dlp/extractor/gdcvault.py		patch \| blob \| blame \| history
yt_dlp/extractor/kaltura.py		patch \| blob \| blame \| history
yt_dlp/extractor/medaltv.py		patch \| blob \| blame \| history
yt_dlp/extractor/svt.py		patch \| blob \| blame \| history
yt_dlp/extractor/tv2dk.py		patch \| blob \| blame \| history
yt_dlp/extractor/tver.py		patch \| blob \| blame \| history
yt_dlp/extractor/twitter.py		patch \| blob \| blame \| history
yt_dlp/extractor/xfileshare.py		patch \| blob \| blame \| history
yt_dlp/extractor/xtube.py		patch \| blob \| blame \| history