yt_dlp/extractor/islamchannel.py

   1 import re
   2
   3 from .common import InfoExtractor
   4 from ..utils import traverse_obj, urljoin
   5
   6
   7 class IslamChannelIE(InfoExtractor):
   8     _VALID_URL = r'https?://watch\.islamchannel\.tv/watch/(?P<id>\d+)'
   9     _TESTS = [{
  10         'url': 'https://watch.islamchannel.tv/watch/38604310',
  11         'info_dict': {
  12             'id': '38604310',
  13             'title': 'Omar - Young Omar',
  14             'description': 'md5:5cc7ddecef064ea7afe52eb5e0e33b55',
  15             'thumbnail': r're:https?://.+',
  16             'ext': 'mp4',
  17         },
  18     }]
  19
  20     def _real_extract(self, url):
  21         video_id = self._match_id(url)
  22         webpage = self._download_webpage(url, video_id)
  23
  24         thumbnail = self._search_regex(
  25             r'data-poster="([^"]+)"', webpage, 'data poster', fatal=False) or \
  26             self._html_search_meta(('og:image', 'twitter:image'), webpage)
  27
  28         headers = {
  29             'Token': self._search_regex(r'data-token="([^"]+)"', webpage, 'data token'),
  30             'Token-Expiry': self._search_regex(r'data-expiry="([^"]+)"', webpage, 'data expiry'),
  31             'Uvid': video_id,
  32         }
  33         show_stream = self._download_json(
  34             f'https://v2-streams-elb.simplestreamcdn.com/api/show/stream/{video_id}', video_id,
  35             query={
  36                 'key': self._search_regex(r'data-key="([^"]+)"', webpage, 'data key'),
  37                 'platform': 'chrome',
  38             }, headers=headers)
  39         # TODO: show_stream['stream'] and show_stream['drm'] may contain something interesting
  40         streams = self._download_json(
  41             traverse_obj(show_stream, ('response', 'tokenization', 'url')), video_id,
  42             headers=headers)
  43         formats, subs = self._extract_m3u8_formats_and_subtitles(traverse_obj(streams, ('Streams', 'Adaptive')), video_id, 'mp4')
  44
  45         return {
  46             'id': video_id,
  47             'title': self._html_search_meta(('og:title', 'twitter:title'), webpage),
  48             'description': self._html_search_meta(('og:description', 'twitter:description', 'description'), webpage),
  49             'formats': formats,
  50             'subtitles': subs,
  51             'thumbnails': [{
  52                 'id': 'unscaled',
  53                 'url': thumbnail.split('?')[0],
  54                 'ext': 'jpg',
  55                 'preference': 2,
  56             }, {
  57                 'id': 'orig',
  58                 'url': thumbnail,
  59                 'ext': 'jpg',
  60                 'preference': 1,
  61             }] if thumbnail else None,
  62         }
  63
  64
  65 class IslamChannelSeriesIE(InfoExtractor):
  66     _VALID_URL = r'https?://watch\.islamchannel\.tv/series/(?P<id>[a-f\d-]+)'
  67     _TESTS = [{
  68         'url': 'https://watch.islamchannel.tv/series/a6cccef3-3ef1-11eb-bc19-06b69c2357cd',
  69         'info_dict': {
  70             'id': 'a6cccef3-3ef1-11eb-bc19-06b69c2357cd',
  71         },
  72         'playlist_mincount': 31,
  73     }]
  74
  75     def _real_extract(self, url):
  76         pl_id = self._match_id(url)
  77         webpage = self._download_webpage(url, pl_id)
  78
  79         return self.playlist_from_matches(
  80             re.finditer(r'<a\s+href="(/watch/\d+)"[^>]+?data-video-type="show">', webpage),
  81             pl_id, getter=lambda x: urljoin(url, x.group(1)), ie=IslamChannelIE)