[extractors] Use new framework for existing embeds (#4307)

[yt-dlp.git] / yt_dlp / extractor / videa.py
diff --git a/yt_dlp/extractor/videa.py b/yt_dlp/extractor/videa.py

index ab2c15cdec154bf7d760c588d114eed106221b60..fa16da28b49eb6f194d9a9fcd9e9ffc4f3c37f9d 100644 (file)
--- a/yt_dlp/extractor/videa.py
+++ b/yt_dlp/extractor/videa.py
@@ -1,25 +1,20 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
  import random
-import re
  import string
+import struct
  
  from .common import InfoExtractor
+from ..compat import compat_b64decode, compat_ord
  from ..utils import (
      ExtractorError,
      int_or_none,
      mimetype2ext,
      parse_codecs,
+    parse_qs,
      update_url_query,
+    urljoin,
      xpath_element,
      xpath_text,
  )
-from ..compat import (
-    compat_b64decode,
-    compat_ord,
-    compat_struct_pack,
-)
  
  
  class VideaIE(InfoExtractor):
@@ -33,6 +28,7 @@ class VideaIE(InfoExtractor):
                          )
                          (?P<id>[^?#&]+)
                      '''
+    _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//videa\.hu/player\?.*?\bv=.+?)\1']
      _TESTS = [{
          'url': 'http://videa.hu/videok/allatok/az-orult-kigyasz-285-kigyot-kigyo-8YfIAjxwWGwT8HVQ',
          'md5': '97a7af41faeaffd9f1fc864a7c7e7603',
@@ -45,10 +41,24 @@ class VideaIE(InfoExtractor):
          },
      }, {
          'url': 'http://videa.hu/videok/origo/jarmuvek/supercars-elozes-jAHDWfWSJH5XuFhH',
-        'only_matching': True,
+        'md5': 'd57ccd8812c7fd491d33b1eab8c99975',
+        'info_dict': {
+            'id': 'jAHDWfWSJH5XuFhH',
+            'ext': 'mp4',
+            'title': 'Supercars előzés',
+            'thumbnail': r're:^https?://.*',
+            'duration': 64,
+        },
      }, {
          'url': 'http://videa.hu/player?v=8YfIAjxwWGwT8HVQ',
-        'only_matching': True,
+        'md5': '97a7af41faeaffd9f1fc864a7c7e7603',
+        'info_dict': {
+            'id': '8YfIAjxwWGwT8HVQ',
+            'ext': 'mp4',
+            'title': 'Az őrült kígyász 285 kígyót enged szabadon',
+            'thumbnail': r're:^https?://.*',
+            'duration': 21,
+        },
      }, {
          'url': 'http://videa.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1',
          'only_matching': True,
@@ -64,12 +74,6 @@ class VideaIE(InfoExtractor):
      }]
      _STATIC_SECRET = 'xHb0ZvME5q8CBcoQi6AngerDu3FGO9fkUlwPmLVY_RTzj2hJIS4NasXWKy1td7p'
  
-    @staticmethod
-    def _extract_urls(webpage):
-        return [url for _, url in re.findall(
-            r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//videa\.hu/player\?.*?\bv=.+?)\1',
-            webpage)]
-
      @staticmethod
      def rc4(cipher_text, key):
          res = b''
@@ -89,15 +93,22 @@ def rc4(cipher_text, key):
              j = (j + S[i]) % 256
              S[i], S[j] = S[j], S[i]
              k = S[(S[i] + S[j]) % 256]
-            res += compat_struct_pack('B', k ^ compat_ord(cipher_text[m]))
+            res += struct.pack('B', k ^ compat_ord(cipher_text[m]))
  
          return res.decode()
  
      def _real_extract(self, url):
          video_id = self._match_id(url)
-        query = {'v': video_id}
-        player_page = self._download_webpage(
-            'https://videa.hu/player', video_id, query=query)
+        video_page = self._download_webpage(url, video_id)
+
+        if 'videa.hu/player' in url:
+            player_url = url
+            player_page = video_page
+        else:
+            player_url = self._search_regex(
+                r'<iframe.*?src="(/player\?[^"]+)"', video_page, 'player url')
+            player_url = urljoin(url, player_url)
+            player_page = self._download_webpage(player_url, video_id)
  
          nonce = self._search_regex(
              r'_xt\s*=\s*"([^"]+)"', player_page, 'nonce')
@@ -107,6 +118,7 @@ def _real_extract(self, url):
          for i in range(0, 32):
              result += s[i - (self._STATIC_SECRET.index(l[i]) - 31)]
  
+        query = parse_qs(player_url)
          random_seed = ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(8))
          query['_s'] = random_seed
          query['_t'] = result[:16]
@@ -121,13 +133,13 @@ def _real_extract(self, url):
                  compat_b64decode(b64_info), key), video_id)
  
          video = xpath_element(info, './video', 'video')
-        if not video:
+        if video is None:
              raise ExtractorError(xpath_element(
                  info, './error', fatal=True), expected=True)
          sources = xpath_element(
              info, './video_sources', 'sources', fatal=True)
          hash_values = xpath_element(
-            info, './hash_values', 'hash values', fatal=True)
+            info, './hash_values', 'hash values', fatal=False)
  
          title = xpath_text(video, './title', fatal=True)
  
@@ -136,15 +148,16 @@ def _real_extract(self, url):
              source_url = source.text
              source_name = source.get('name')
              source_exp = source.get('exp')
-            if not (source_url and source_name and source_exp):
+            if not (source_url and source_name):
                  continue
-            hash_value = xpath_text(hash_values, 'hash_value_' + source_name)
-            if not hash_value:
-                continue
-            source_url = update_url_query(source_url, {
-                'md5': hash_value,
-                'expires': source_exp,
-            })
+            hash_value = (
+                xpath_text(hash_values, 'hash_value_' + source_name)
+                if hash_values is not None else None)
+            if hash_value and source_exp:
+                source_url = update_url_query(source_url, {
+                    'md5': hash_value,
+                    'expires': source_exp,
+                })
              f = parse_codecs(source.get('codecs'))
              f.update({
                  'url': self._proto_relative_url(source_url),