yt_dlp/extractor/openrec.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 from .common import InfoExtractor
   5 from ..utils import (
   6     ExtractorError,
   7     traverse_obj,
   8     try_get,
   9     unified_strdate
  10 )
  11 from ..compat import compat_str
  12
  13
  14 class OpenRecIE(InfoExtractor):
  15     IE_NAME = 'openrec'
  16     _VALID_URL = r'https?://(?:www\.)?openrec\.tv/live/(?P<id>[^/]+)'
  17     _TESTS = [{
  18         'url': 'https://www.openrec.tv/live/2p8v31qe4zy',
  19         'only_matching': True,
  20     }, {
  21         'url': 'https://www.openrec.tv/live/wez93eqvjzl',
  22         'only_matching': True,
  23     }]
  24
  25     def _real_extract(self, url):
  26         video_id = self._match_id(url)
  27         webpage = self._download_webpage('https://www.openrec.tv/live/%s' % video_id, video_id)
  28
  29         window_stores = self._parse_json(
  30             self._search_regex(r'(?m)window\.pageStore\s*=\s*(\{.+?\});$', webpage, 'window.pageStore'), video_id)
  31         movie_store = traverse_obj(
  32             window_stores,
  33             ('v8', 'state', 'movie'),
  34             ('v8', 'movie'),
  35             expected_type=dict)
  36         if not movie_store:
  37             raise ExtractorError('Failed to extract live info')
  38
  39         title = movie_store.get('title')
  40         description = movie_store.get('introduction')
  41         thumbnail = movie_store.get('thumbnailUrl')
  42
  43         channel_user = movie_store.get('channel', {}).get('user')
  44         uploader = try_get(channel_user, lambda x: x['name'], compat_str)
  45         uploader_id = try_get(channel_user, lambda x: x['id'], compat_str)
  46
  47         timestamp = traverse_obj(movie_store, ('startedAt', 'time'), expected_type=int)
  48
  49         m3u8_playlists = movie_store.get('media')
  50         formats = []
  51         for (name, m3u8_url) in m3u8_playlists.items():
  52             if not m3u8_url:
  53                 continue
  54             formats.extend(self._extract_m3u8_formats(
  55                 m3u8_url, video_id, ext='mp4', entry_protocol='m3u8',
  56                 m3u8_id='hls-%s' % name, live=True))
  57
  58         self._sort_formats(formats)
  59
  60         return {
  61             'id': video_id,
  62             'title': title,
  63             'description': description,
  64             'thumbnail': thumbnail,
  65             'formats': formats,
  66             'uploader': uploader,
  67             'uploader_id': uploader_id,
  68             'timestamp': timestamp,
  69             'is_live': True,
  70         }
  71
  72
  73 class OpenRecCaptureIE(InfoExtractor):
  74     IE_NAME = 'openrec:capture'
  75     _VALID_URL = r'https?://(?:www\.)?openrec\.tv/capture/(?P<id>[^/]+)'
  76     _TESTS = [{
  77         'url': 'https://www.openrec.tv/capture/l9nk2x4gn14',
  78         'only_matching': True,
  79     }, {
  80         'url': 'https://www.openrec.tv/capture/mldjr82p7qk',
  81         'info_dict': {
  82             'id': 'mldjr82p7qk',
  83             'title': 'たいじの恥ずかしい英語力',
  84             'uploader': 'たいちゃんねる',
  85             'uploader_id': 'Yaritaiji',
  86             'upload_date': '20210803',
  87         },
  88     }]
  89
  90     def _real_extract(self, url):
  91         video_id = self._match_id(url)
  92         webpage = self._download_webpage('https://www.openrec.tv/capture/%s' % video_id, video_id)
  93
  94         window_stores = self._parse_json(
  95             self._search_regex(r'(?m)window\.pageStore\s*=\s*(\{.+?\});$', webpage, 'window.pageStore'), video_id)
  96         movie_store = window_stores.get('movie')
  97
  98         capture_data = window_stores.get('capture')
  99         if not capture_data:
 100             raise ExtractorError('Cannot extract title')
 101         title = capture_data.get('title')
 102         thumbnail = capture_data.get('thumbnailUrl')
 103         upload_date = unified_strdate(capture_data.get('createdAt'))
 104
 105         channel_info = movie_store.get('channel') or {}
 106         uploader = channel_info.get('name')
 107         uploader_id = channel_info.get('id')
 108
 109         m3u8_url = capture_data.get('source')
 110         if not m3u8_url:
 111             raise ExtractorError('Cannot extract m3u8 url')
 112         formats = self._extract_m3u8_formats(
 113             m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native',
 114             m3u8_id='hls')
 115
 116         self._sort_formats(formats)
 117
 118         return {
 119             'id': video_id,
 120             'title': title,
 121             'thumbnail': thumbnail,
 122             'formats': formats,
 123             'uploader': uploader,
 124             'uploader_id': uploader_id,
 125             'upload_date': upload_date,
 126         }