yt_dlp/extractor/picarto.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 from .common import InfoExtractor
   5 from ..utils import (
   6     ExtractorError,
   7     js_to_json,
   8 )
   9
  10
  11 class PicartoIE(InfoExtractor):
  12     _VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)'
  13     _TEST = {
  14         'url': 'https://picarto.tv/Setz',
  15         'info_dict': {
  16             'id': 'Setz',
  17             'ext': 'mp4',
  18             'title': 're:^Setz [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
  19             'timestamp': int,
  20             'is_live': True
  21         },
  22         'skip': 'Stream is offline',
  23     }
  24
  25     @classmethod
  26     def suitable(cls, url):
  27         return False if PicartoVodIE.suitable(url) else super(PicartoIE, cls).suitable(url)
  28
  29     def _real_extract(self, url):
  30         channel_id = self._match_id(url)
  31
  32         data = self._download_json(
  33             'https://ptvintern.picarto.tv/ptvapi', channel_id, query={
  34                 'query': '''{
  35   channel(name: "%s") {
  36     adult
  37     id
  38     online
  39     stream_name
  40     title
  41   }
  42   getLoadBalancerUrl(channel_name: "%s") {
  43     url
  44   }
  45 }''' % (channel_id, channel_id),
  46             })['data']
  47         metadata = data['channel']
  48
  49         if metadata.get('online') == 0:
  50             raise ExtractorError('Stream is offline', expected=True)
  51         title = metadata['title']
  52
  53         cdn_data = self._download_json(
  54             data['getLoadBalancerUrl']['url'] + '/stream/json_' + metadata['stream_name'] + '.js',
  55             channel_id, 'Downloading load balancing info')
  56
  57         formats = []
  58         for source in (cdn_data.get('source') or []):
  59             source_url = source.get('url')
  60             if not source_url:
  61                 continue
  62             source_type = source.get('type')
  63             if source_type == 'html5/application/vnd.apple.mpegurl':
  64                 formats.extend(self._extract_m3u8_formats(
  65                     source_url, channel_id, 'mp4', m3u8_id='hls', fatal=False))
  66             elif source_type == 'html5/video/mp4':
  67                 formats.append({
  68                     'url': source_url,
  69                 })
  70         self._sort_formats(formats)
  71
  72         mature = metadata.get('adult')
  73         if mature is None:
  74             age_limit = None
  75         else:
  76             age_limit = 18 if mature is True else 0
  77
  78         return {
  79             'id': channel_id,
  80             'title': title.strip(),
  81             'is_live': True,
  82             'channel': channel_id,
  83             'channel_id': metadata.get('id'),
  84             'channel_url': 'https://picarto.tv/%s' % channel_id,
  85             'age_limit': age_limit,
  86             'formats': formats,
  87         }
  88
  89
  90 class PicartoVodIE(InfoExtractor):
  91     _VALID_URL = r'https?://(?:www.)?picarto\.tv/videopopout/(?P<id>[^/?#&]+)'
  92     _TESTS = [{
  93         'url': 'https://picarto.tv/videopopout/ArtofZod_2017.12.12.00.13.23.flv',
  94         'md5': '3ab45ba4352c52ee841a28fb73f2d9ca',
  95         'info_dict': {
  96             'id': 'ArtofZod_2017.12.12.00.13.23.flv',
  97             'ext': 'mp4',
  98             'title': 'ArtofZod_2017.12.12.00.13.23.flv',
  99             'thumbnail': r're:^https?://.*\.jpg'
 100         },
 101     }, {
 102         'url': 'https://picarto.tv/videopopout/Plague',
 103         'only_matching': True,
 104     }]
 105
 106     def _real_extract(self, url):
 107         video_id = self._match_id(url)
 108
 109         webpage = self._download_webpage(url, video_id)
 110
 111         vod_info = self._parse_json(
 112             self._search_regex(
 113                 r'(?s)#vod-player["\']\s*,\s*(\{.+?\})\s*\)', webpage,
 114                 'vod player'),
 115             video_id, transform_source=js_to_json)
 116
 117         formats = self._extract_m3u8_formats(
 118             vod_info['vod'], video_id, 'mp4', entry_protocol='m3u8_native',
 119             m3u8_id='hls')
 120         self._sort_formats(formats)
 121
 122         return {
 123             'id': video_id,
 124             'title': video_id,
 125             'thumbnail': vod_info.get('vodThumb'),
 126             'formats': formats,
 127         }