yt_dlp/extractor/picarto.py

   1 from .common import InfoExtractor
   2 from ..utils import (
   3     ExtractorError,
   4     js_to_json,
   5 )
   6
   7
   8 class PicartoIE(InfoExtractor):
   9     _VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)'
  10     _TEST = {
  11         'url': 'https://picarto.tv/Setz',
  12         'info_dict': {
  13             'id': 'Setz',
  14             'ext': 'mp4',
  15             'title': 're:^Setz [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
  16             'timestamp': int,
  17             'is_live': True
  18         },
  19         'skip': 'Stream is offline',
  20     }
  21
  22     @classmethod
  23     def suitable(cls, url):
  24         return False if PicartoVodIE.suitable(url) else super(PicartoIE, cls).suitable(url)
  25
  26     def _real_extract(self, url):
  27         channel_id = self._match_id(url)
  28
  29         data = self._download_json(
  30             'https://ptvintern.picarto.tv/ptvapi', channel_id, query={
  31                 'query': '''{
  32   channel(name: "%s") {
  33     adult
  34     id
  35     online
  36     stream_name
  37     title
  38   }
  39   getLoadBalancerUrl(channel_name: "%s") {
  40     url
  41   }
  42 }''' % (channel_id, channel_id),
  43             })['data']
  44         metadata = data['channel']
  45
  46         if metadata.get('online') == 0:
  47             raise ExtractorError('Stream is offline', expected=True)
  48         title = metadata['title']
  49
  50         cdn_data = self._download_json(
  51             data['getLoadBalancerUrl']['url'] + '/stream/json_' + metadata['stream_name'] + '.js',
  52             channel_id, 'Downloading load balancing info')
  53
  54         formats = []
  55         for source in (cdn_data.get('source') or []):
  56             source_url = source.get('url')
  57             if not source_url:
  58                 continue
  59             source_type = source.get('type')
  60             if source_type == 'html5/application/vnd.apple.mpegurl':
  61                 formats.extend(self._extract_m3u8_formats(
  62                     source_url, channel_id, 'mp4', m3u8_id='hls', fatal=False))
  63             elif source_type == 'html5/video/mp4':
  64                 formats.append({
  65                     'url': source_url,
  66                 })
  67
  68         mature = metadata.get('adult')
  69         if mature is None:
  70             age_limit = None
  71         else:
  72             age_limit = 18 if mature is True else 0
  73
  74         return {
  75             'id': channel_id,
  76             'title': title.strip(),
  77             'is_live': True,
  78             'channel': channel_id,
  79             'channel_id': metadata.get('id'),
  80             'channel_url': 'https://picarto.tv/%s' % channel_id,
  81             'age_limit': age_limit,
  82             'formats': formats,
  83         }
  84
  85
  86 class PicartoVodIE(InfoExtractor):
  87     _VALID_URL = r'https?://(?:www.)?picarto\.tv/videopopout/(?P<id>[^/?#&]+)'
  88     _TESTS = [{
  89         'url': 'https://picarto.tv/videopopout/ArtofZod_2017.12.12.00.13.23.flv',
  90         'md5': '3ab45ba4352c52ee841a28fb73f2d9ca',
  91         'info_dict': {
  92             'id': 'ArtofZod_2017.12.12.00.13.23.flv',
  93             'ext': 'mp4',
  94             'title': 'ArtofZod_2017.12.12.00.13.23.flv',
  95             'thumbnail': r're:^https?://.*\.jpg'
  96         },
  97     }, {
  98         'url': 'https://picarto.tv/videopopout/Plague',
  99         'only_matching': True,
 100     }]
 101
 102     def _real_extract(self, url):
 103         video_id = self._match_id(url)
 104
 105         webpage = self._download_webpage(url, video_id)
 106
 107         vod_info = self._parse_json(
 108             self._search_regex(
 109                 r'(?s)#vod-player["\']\s*,\s*(\{.+?\})\s*\)', webpage,
 110                 'vod player'),
 111             video_id, transform_source=js_to_json)
 112
 113         formats = self._extract_m3u8_formats(
 114             vod_info['vod'], video_id, 'mp4', entry_protocol='m3u8_native',
 115             m3u8_id='hls')
 116
 117         return {
 118             'id': video_id,
 119             'title': video_id,
 120             'thumbnail': vod_info.get('vodThumb'),
 121             'formats': formats,
 122         }