]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/picarto.py
[ie/youtube] Suppress "Unavailable videos are hidden" warning (#10159)
[yt-dlp.git] / yt_dlp / extractor / picarto.py
index 1abda865d1df14968cbb642c5ad5b390a70c0882..726fe414257158afc52fece4aa7d6b7e9881ff47 100644 (file)
@@ -1,22 +1,15 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-import time
+import urllib.parse
 
 from .common import InfoExtractor
-from ..compat import compat_str
 from ..utils import (
     ExtractorError,
-    js_to_json,
-    try_get,
-    update_url_query,
-    urlencode_postdata,
+    str_or_none,
+    traverse_obj,
 )
 
 
 class PicartoIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)(?:/(?P<token>[a-zA-Z0-9]+))?'
+    _VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)'
     _TEST = {
         'url': 'https://picarto.tv/Setz',
         'info_dict': {
@@ -24,76 +17,56 @@ class PicartoIE(InfoExtractor):
             'ext': 'mp4',
             'title': 're:^Setz [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
             'timestamp': int,
-            'is_live': True
+            'is_live': True,
         },
         'skip': 'Stream is offline',
     }
 
     @classmethod
     def suitable(cls, url):
-        return False if PicartoVodIE.suitable(url) else super(PicartoIE, cls).suitable(url)
+        return False if PicartoVodIE.suitable(url) else super().suitable(url)
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        channel_id = mobj.group('id')
-
-        metadata = self._download_json(
-            'https://api.picarto.tv/v1/channel/name/' + channel_id,
-            channel_id)
-
-        if metadata.get('online') is False:
+        channel_id = self._match_id(url)
+
+        data = self._download_json(
+            'https://ptvintern.picarto.tv/ptvapi', channel_id, query={
+                'query': '''{
+  channel(name: "%s") {
+    adult
+    id
+    online
+    stream_name
+    title
+  }
+  getLoadBalancerUrl(channel_name: "%s") {
+    url
+  }
+}''' % (channel_id, channel_id),  # noqa: UP031
+            })['data']
+        metadata = data['channel']
+
+        if metadata.get('online') == 0:
             raise ExtractorError('Stream is offline', expected=True)
+        title = metadata['title']
 
         cdn_data = self._download_json(
-            'https://picarto.tv/process/channel', channel_id,
-            data=urlencode_postdata({'loadbalancinginfo': channel_id}),
-            note='Downloading load balancing info')
-
-        token = mobj.group('token') or 'public'
-        params = {
-            'con': int(time.time() * 1000),
-            'token': token,
-        }
+            data['getLoadBalancerUrl']['url'] + '/stream/json_' + metadata['stream_name'] + '.js',
+            channel_id, 'Downloading load balancing info')
 
-        prefered_edge = cdn_data.get('preferedEdge')
         formats = []
-
-        for edge in cdn_data['edges']:
-            edge_ep = edge.get('ep')
-            if not edge_ep or not isinstance(edge_ep, compat_str):
+        for source in (cdn_data.get('source') or []):
+            source_url = source.get('url')
+            if not source_url:
                 continue
-            edge_id = edge.get('id')
-            for tech in cdn_data['techs']:
-                tech_label = tech.get('label')
-                tech_type = tech.get('type')
-                preference = 0
-                if edge_id == prefered_edge:
-                    preference += 1
-                format_id = []
-                if edge_id:
-                    format_id.append(edge_id)
-                if tech_type == 'application/x-mpegurl' or tech_label == 'HLS':
-                    format_id.append('hls')
-                    formats.extend(self._extract_m3u8_formats(
-                        update_url_query(
-                            'https://%s/hls/%s/index.m3u8'
-                            % (edge_ep, channel_id), params),
-                        channel_id, 'mp4', quality=preference,
-                        m3u8_id='-'.join(format_id), fatal=False))
-                    continue
-                elif tech_type == 'video/mp4' or tech_label == 'MP4':
-                    format_id.append('mp4')
-                    formats.append({
-                        'url': update_url_query(
-                            'https://%s/mp4/%s.mp4' % (edge_ep, channel_id),
-                            params),
-                        'format_id': '-'.join(format_id),
-                        'quality': preference,
-                    })
-                else:
-                    # rtmp format does not seem to work
-                    continue
-        self._sort_formats(formats)
+            source_type = source.get('type')
+            if source_type == 'html5/application/vnd.apple.mpegurl':
+                formats.extend(self._extract_m3u8_formats(
+                    source_url, channel_id, 'mp4', m3u8_id='hls', fatal=False))
+            elif source_type == 'html5/video/mp4':
+                formats.append({
+                    'url': source_url,
+                })
 
         mature = metadata.get('adult')
         if mature is None:
@@ -103,18 +76,18 @@ def _real_extract(self, url):
 
         return {
             'id': channel_id,
-            'title': self._live_title(metadata.get('title') or channel_id),
+            'title': title.strip(),
             'is_live': True,
-            'thumbnail': try_get(metadata, lambda x: x['thumbnails']['web']),
             'channel': channel_id,
-            'channel_url': 'https://picarto.tv/%s' % channel_id,
+            'channel_id': metadata.get('id'),
+            'channel_url': f'https://picarto.tv/{channel_id}',
             'age_limit': age_limit,
             'formats': formats,
         }
 
 
 class PicartoVodIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www.)?picarto\.tv/videopopout/(?P<id>[^/?#&]+)'
+    _VALID_URL = r'https?://(?:www\.)?picarto\.tv/(?:videopopout|\w+/videos)/(?P<id>[^/?#&]+)'
     _TESTS = [{
         'url': 'https://picarto.tv/videopopout/ArtofZod_2017.12.12.00.13.23.flv',
         'md5': '3ab45ba4352c52ee841a28fb73f2d9ca',
@@ -122,7 +95,19 @@ class PicartoVodIE(InfoExtractor):
             'id': 'ArtofZod_2017.12.12.00.13.23.flv',
             'ext': 'mp4',
             'title': 'ArtofZod_2017.12.12.00.13.23.flv',
-            'thumbnail': r're:^https?://.*\.jpg'
+            'thumbnail': r're:^https?://.*\.jpg',
+        },
+        'skip': 'The VOD does not exist',
+    }, {
+        'url': 'https://picarto.tv/ArtofZod/videos/772650',
+        'md5': '00067a0889f1f6869cc512e3e79c521b',
+        'info_dict': {
+            'id': '772650',
+            'ext': 'mp4',
+            'title': 'Art of Zod - Drawing and Painting',
+            'thumbnail': r're:^https?://.*\.jpg',
+            'channel': 'ArtofZod',
+            'age_limit': 18,
         },
     }, {
         'url': 'https://picarto.tv/videopopout/Plague',
@@ -132,22 +117,36 @@ class PicartoVodIE(InfoExtractor):
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
-        webpage = self._download_webpage(url, video_id)
-
-        vod_info = self._parse_json(
-            self._search_regex(
-                r'(?s)#vod-player["\']\s*,\s*(\{.+?\})\s*\)', webpage,
-                video_id),
-            video_id, transform_source=js_to_json)
+        data = self._download_json(
+            'https://ptvintern.picarto.tv/ptvapi', video_id, query={
+                'query': f'''{{
+  video(id: "{video_id}") {{
+    id
+    title
+    adult
+    file_name
+    video_recording_image_url
+    channel {{
+      name
+    }}
+  }}
+}}''',
+            })['data']['video']
+
+        file_name = data['file_name']
+        netloc = urllib.parse.urlparse(data['video_recording_image_url']).netloc
 
         formats = self._extract_m3u8_formats(
-            vod_info['vod'], video_id, 'mp4', entry_protocol='m3u8_native',
-            m3u8_id='hls')
-        self._sort_formats(formats)
+            f'https://{netloc}/stream/hls/{file_name}/index.m3u8', video_id, 'mp4', m3u8_id='hls')
 
         return {
             'id': video_id,
-            'title': video_id,
-            'thumbnail': vod_info.get('vodThumb'),
+            **traverse_obj(data, {
+                'id': ('id', {str_or_none}),
+                'title': ('title', {str}),
+                'thumbnail': 'video_recording_image_url',
+                'channel': ('channel', 'name', {str}),
+                'age_limit': ('adult', {lambda x: 18 if x else 0}),
+            }),
             'formats': formats,
         }