yt_dlp/extractor/gronkh.py

   1 import functools
   2
   3 from .common import InfoExtractor
   4 from ..utils import (
   5     OnDemandPagedList,
   6     traverse_obj,
   7     unified_strdate,
   8 )
   9
  10
  11 class GronkhIE(InfoExtractor):
  12     _VALID_URL = r'https?://(?:www\.)?gronkh\.tv/(?:watch/)?streams?/(?P<id>\d+)'
  13
  14     _TESTS = [{
  15         'url': 'https://gronkh.tv/streams/657',
  16         'info_dict': {
  17             'id': '657',
  18             'ext': 'mp4',
  19             'title': 'H.O.R.D.E. - DAS ZWEiTE ZEiTALTER 🎲 Session 1',
  20             'view_count': int,
  21             'thumbnail': 'https://01.cdn.vod.farm/preview/9e2555d3a23bf4e5c5b7c6b3b70a9d84.jpg',
  22             'upload_date': '20221111'
  23         },
  24         'params': {'skip_download': True}
  25     }, {
  26         'url': 'https://gronkh.tv/stream/536',
  27         'info_dict': {
  28             'id': '536',
  29             'ext': 'mp4',
  30             'title': 'GTV0536, 2021-10-01 - MARTHA IS DEAD  #FREiAB1830  !FF7 !horde !archiv',
  31             'view_count': int,
  32             'thumbnail': 'https://01.cdn.vod.farm/preview/6436746cce14e25f751260a692872b9b.jpg',
  33             'upload_date': '20211001'
  34         },
  35         'params': {'skip_download': True}
  36     }, {
  37         'url': 'https://gronkh.tv/watch/stream/546',
  38         'only_matching': True,
  39     }]
  40
  41     def _real_extract(self, url):
  42         id = self._match_id(url)
  43         data_json = self._download_json(f'https://api.gronkh.tv/v1/video/info?episode={id}', id)
  44         m3u8_url = self._download_json(f'https://api.gronkh.tv/v1/video/playlist?episode={id}', id)['playlist_url']
  45         formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, id)
  46         if data_json.get('vtt_url'):
  47             subtitles.setdefault('en', []).append({
  48                 'url': data_json['vtt_url'],
  49                 'ext': 'vtt',
  50             })
  51         return {
  52             'id': id,
  53             'title': data_json.get('title'),
  54             'view_count': data_json.get('views'),
  55             'thumbnail': data_json.get('preview_url'),
  56             'upload_date': unified_strdate(data_json.get('created_at')),
  57             'formats': formats,
  58             'subtitles': subtitles,
  59         }
  60
  61
  62 class GronkhFeedIE(InfoExtractor):
  63     _VALID_URL = r'https?://(?:www\.)?gronkh\.tv(?:/feed)?/?(?:#|$)'
  64     IE_NAME = 'gronkh:feed'
  65
  66     _TESTS = [{
  67         'url': 'https://gronkh.tv/feed',
  68         'info_dict': {
  69             'id': 'feed',
  70         },
  71         'playlist_count': 16,
  72     }, {
  73         'url': 'https://gronkh.tv',
  74         'only_matching': True,
  75     }]
  76
  77     def _entries(self):
  78         for type_ in ('recent', 'views'):
  79             info = self._download_json(
  80                 f'https://api.gronkh.tv/v1/video/discovery/{type_}', 'feed', note=f'Downloading {type_} API JSON')
  81             for item in traverse_obj(info, ('discovery', ...)) or []:
  82                 yield self.url_result(f'https://gronkh.tv/watch/stream/{item["episode"]}', GronkhIE, item.get('title'))
  83
  84     def _real_extract(self, url):
  85         return self.playlist_result(self._entries(), 'feed')
  86
  87
  88 class GronkhVodsIE(InfoExtractor):
  89     _VALID_URL = r'https?://(?:www\.)?gronkh\.tv/vods/streams/?(?:#|$)'
  90     IE_NAME = 'gronkh:vods'
  91
  92     _TESTS = [{
  93         'url': 'https://gronkh.tv/vods/streams',
  94         'info_dict': {
  95             'id': 'vods',
  96         },
  97         'playlist_mincount': 150,
  98     }]
  99     _PER_PAGE = 25
 100
 101     def _fetch_page(self, page):
 102         items = traverse_obj(self._download_json(
 103             'https://api.gronkh.tv/v1/search', 'vods', query={'offset': self._PER_PAGE * page, 'first': self._PER_PAGE},
 104             note=f'Downloading stream video page {page + 1}'), ('results', 'videos', ...))
 105         for item in items or []:
 106             yield self.url_result(f'https://gronkh.tv/watch/stream/{item["episode"]}', GronkhIE, item['episode'], item.get('title'))
 107
 108     def _real_extract(self, url):
 109         entries = OnDemandPagedList(functools.partial(self._fetch_page), self._PER_PAGE)
 110         return self.playlist_result(entries, 'vods')