]> jfr.im git - yt-dlp.git/commitdiff
[Biqle] Fix extractor (#2731)
authorBricio <redacted>
Fri, 18 Feb 2022 16:02:14 +0000 (13:02 -0300)
committerGitHub <redacted>
Fri, 18 Feb 2022 16:02:14 +0000 (08:02 -0800)
Closes #193
Authored by: Bricio

yt_dlp/extractor/biqle.py

index 17ebbb25766bb500e6401f55b6105c37fcfd25f5..2b57bade3dff83b0906c0faaa1af060343a36384 100644 (file)
@@ -3,27 +3,28 @@
 
 from .common import InfoExtractor
 from .vk import VKIE
-from ..compat import (
-    compat_b64decode,
-    compat_urllib_parse_unquote,
+from ..compat import compat_b64decode
+from ..utils import (
+    int_or_none,
+    js_to_json,
+    traverse_obj,
+    unified_timestamp,
 )
-from ..utils import int_or_none
 
 
 class BIQLEIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?biqle\.(?:com|org|ru)/watch/(?P<id>-?\d+_\d+)'
     _TESTS = [{
-        # Youtube embed
-        'url': 'https://biqle.ru/watch/-115995369_456239081',
-        'md5': '97af5a06ee4c29bbf9c001bdb1cf5c06',
+        'url': 'https://biqle.ru/watch/-2000421746_85421746',
+        'md5': 'ae6ef4f04d19ac84e4658046d02c151c',
         'info_dict': {
-            'id': '8v4f-avW-VI',
+            'id': '-2000421746_85421746',
             'ext': 'mp4',
-            'title': "PASSE-PARTOUT - L'ete c'est fait pour jouer",
-            'description': 'Passe-Partout',
-            'uploader_id': 'mrsimpsonstef3',
-            'uploader': 'Phanolito',
-            'upload_date': '20120822',
+            'title': 'Forsaken By Hope Studio Clip',
+            'description': 'Forsaken By Hope Studio Clip — Смотреть онлайн',
+            'upload_date': '19700101',
+            'thumbnail': r're:https://[^/]+/impf/7vN3ACwSTgChP96OdOfzFjUCzFR6ZglDQgWsIw/KPaACiVJJxM\.jpg\?size=800x450&quality=96&keep_aspect_ratio=1&background=000000&sign=b48ea459c4d33dbcba5e26d63574b1cb&type=video_thumb',
+            'timestamp': 0,
         },
     }, {
         'url': 'http://biqle.org/watch/-44781847_168547604',
@@ -32,53 +33,62 @@ class BIQLEIE(InfoExtractor):
             'id': '-44781847_168547604',
             'ext': 'mp4',
             'title': 'Ребенок в шоке от автоматической мойки',
+            'description': 'Ребенок в шоке от автоматической мойки — Смотреть онлайн',
             'timestamp': 1396633454,
-            'uploader': 'Dmitry Kotov',
             'upload_date': '20140404',
-            'uploader_id': '47850140',
+            'thumbnail': r're:https://[^/]+/c535507/u190034692/video/l_b84df002\.jpg',
         },
     }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
-        embed_url = self._proto_relative_url(self._search_regex(
-            r'<iframe.+?src="((?:https?:)?//(?:daxab\.com|dxb\.to|[^/]+/player)/[^"]+)".*?></iframe>',
-            webpage, 'embed url'))
+
+        title = self._html_search_meta('name', webpage, 'Title', fatal=False)
+        timestamp = unified_timestamp(self._html_search_meta('uploadDate', webpage, 'Upload Date', default=None))
+        description = self._html_search_meta('description', webpage, 'Description', default=None)
+
+        global_embed_url = self._search_regex(
+            r'<script[^<]+?window.globEmbedUrl\s*=\s*\'((?:https?:)?//(?:daxab\.com|dxb\.to|[^/]+/player)/[^\']+)\'',
+            webpage, 'global Embed url')
+        hash = self._search_regex(
+            r'<script id="data-embed-video[^<]+?hash: "([^"]+)"[^<]*</script>', webpage, 'Hash')
+
+        embed_url = global_embed_url + hash
+
         if VKIE.suitable(embed_url):
             return self.url_result(embed_url, VKIE.ie_key(), video_id)
 
         embed_page = self._download_webpage(
-            embed_url, video_id, headers={'Referer': url})
-        video_ext = self._get_cookies(embed_url).get('video_ext')
-        if video_ext:
-            video_ext = compat_urllib_parse_unquote(video_ext.value)
-        if not video_ext:
-            video_ext = compat_b64decode(self._search_regex(
-                r'video_ext\s*:\s*[\'"]([A-Za-z0-9+/=]+)',
-                embed_page, 'video_ext')).decode()
-        video_id, sig, _, access_token = video_ext.split(':')
+            embed_url, video_id, 'Downloading embed webpage', headers={'Referer': url})
+
+        glob_params = self._parse_json(self._search_regex(
+            r'<script id="globParams">[^<]*window.globParams = ([^;]+);[^<]+</script>',
+            embed_page, 'Global Parameters'), video_id, transform_source=js_to_json)
+        host_name = compat_b64decode(glob_params['server'][::-1]).decode()
+
         item = self._download_json(
-            'https://api.vk.com/method/video.get', video_id,
-            headers={'User-Agent': 'okhttp/3.4.1'}, query={
-                'access_token': access_token,
-                'sig': sig,
-                'v': 5.44,
+            f'https://{host_name}/method/video.get/{video_id}', video_id,
+            headers={'Referer': url}, query={
+                'token': glob_params['video']['access_token'],
                 'videos': video_id,
+                'ckey': glob_params['c_key'],
+                'credentials': glob_params['video']['credentials'],
             })['response']['items'][0]
-        title = item['title']
 
         formats = []
         for f_id, f_url in item.get('files', {}).items():
             if f_id == 'external':
                 return self.url_result(f_url)
             ext, height = f_id.split('_')
-            formats.append({
-                'format_id': height + 'p',
-                'url': f_url,
-                'height': int_or_none(height),
-                'ext': ext,
-            })
+            height_extra_key = traverse_obj(glob_params, ('video', 'partial', 'quality', height))
+            if height_extra_key:
+                formats.append({
+                    'format_id': f'{height}p',
+                    'url': f'https://{host_name}/{f_url[8:]}&videos={video_id}&extra_key={height_extra_key}',
+                    'height': int_or_none(height),
+                    'ext': ext,
+                })
         self._sort_formats(formats)
 
         thumbnails = []
@@ -96,10 +106,9 @@ def _real_extract(self, url):
             'title': title,
             'formats': formats,
             'comment_count': int_or_none(item.get('comments')),
-            'description': item.get('description'),
+            'description': description,
             'duration': int_or_none(item.get('duration')),
             'thumbnails': thumbnails,
-            'timestamp': int_or_none(item.get('date')),
-            'uploader': item.get('owner_id'),
+            'timestamp': timestamp,
             'view_count': int_or_none(item.get('views')),
         }