]>
Commit | Line | Data |
---|---|---|
9997eee4 THD |
1 | from .common import InfoExtractor |
2 | from ..utils import ( | |
3 | ExtractorError, | |
6e6beffd | 4 | get_first, |
b8eeced2 | 5 | int_or_none, |
9997eee4 | 6 | traverse_obj, |
265e586d | 7 | try_get, |
b8eeced2 | 8 | unified_strdate, |
6e6beffd | 9 | unified_timestamp, |
9997eee4 THD |
10 | ) |
11 | from ..compat import compat_str | |
12 | ||
13 | ||
b8eeced2 | 14 | class OpenRecBaseIE(InfoExtractor): |
f591e605 | 15 | _M3U8_HEADERS = {'Referer': 'https://www.openrec.tv/'} |
16 | ||
b8eeced2 LTHD |
17 | def _extract_pagestore(self, webpage, video_id): |
18 | return self._parse_json( | |
9997eee4 | 19 | self._search_regex(r'(?m)window\.pageStore\s*=\s*(\{.+?\});$', webpage, 'window.pageStore'), video_id) |
b8eeced2 | 20 | |
265e586d LNO |
21 | def _expand_media(self, video_id, media): |
22 | for name, m3u8_url in (media or {}).items(): | |
23 | if not m3u8_url: | |
24 | continue | |
25 | yield from self._extract_m3u8_formats( | |
f591e605 | 26 | m3u8_url, video_id, ext='mp4', m3u8_id=name, headers=self._M3U8_HEADERS) |
265e586d | 27 | |
b8eeced2 LTHD |
28 | def _extract_movie(self, webpage, video_id, name, is_live): |
29 | window_stores = self._extract_pagestore(webpage, video_id) | |
6e6beffd LNO |
30 | movie_stores = [ |
31 | # extract all three important data (most of data are duplicated each other, but slightly different!) | |
32 | traverse_obj(window_stores, ('v8', 'state', 'movie'), expected_type=dict), | |
33 | traverse_obj(window_stores, ('v8', 'movie'), expected_type=dict), | |
34 | traverse_obj(window_stores, 'movieStore', expected_type=dict), | |
35 | ] | |
36 | if not any(movie_stores): | |
b8eeced2 | 37 | raise ExtractorError(f'Failed to extract {name} info') |
9997eee4 | 38 | |
265e586d | 39 | formats = list(self._expand_media(video_id, get_first(movie_stores, 'media'))) |
fdfc8149 L |
40 | if not formats: |
41 | # archived livestreams or subscriber-only videos | |
265e586d LNO |
42 | cookies = self._get_cookies('https://www.openrec.tv/') |
43 | detail = self._download_json( | |
44 | f'https://apiv5.openrec.tv/api/v5/movies/{video_id}/detail', video_id, | |
45 | headers={ | |
46 | 'Origin': 'https://www.openrec.tv', | |
47 | 'Referer': 'https://www.openrec.tv/', | |
48 | 'access-token': try_get(cookies, lambda x: x.get('access_token').value), | |
49 | 'uuid': try_get(cookies, lambda x: x.get('uuid').value), | |
50 | }) | |
51 | new_media = traverse_obj(detail, ('data', 'items', ..., 'media'), get_all=False) | |
52 | formats = list(self._expand_media(video_id, new_media)) | |
53 | is_live = False | |
9997eee4 | 54 | |
9997eee4 THD |
55 | return { |
56 | 'id': video_id, | |
6e6beffd LNO |
57 | 'title': get_first(movie_stores, 'title'), |
58 | 'description': get_first(movie_stores, 'introduction'), | |
59 | 'thumbnail': get_first(movie_stores, 'thumbnailUrl'), | |
9997eee4 | 60 | 'formats': formats, |
6e6beffd LNO |
61 | 'uploader': get_first(movie_stores, ('channel', 'user', 'name')), |
62 | 'uploader_id': get_first(movie_stores, ('channel', 'user', 'id')), | |
63 | 'timestamp': int_or_none(get_first(movie_stores, ['publishedAt', 'time']), scale=1000) or unified_timestamp(get_first(movie_stores, 'publishedAt')), | |
b8eeced2 | 64 | 'is_live': is_live, |
f591e605 | 65 | 'http_headers': self._M3U8_HEADERS, |
9997eee4 THD |
66 | } |
67 | ||
68 | ||
b8eeced2 LTHD |
69 | class OpenRecIE(OpenRecBaseIE): |
70 | IE_NAME = 'openrec' | |
71 | _VALID_URL = r'https?://(?:www\.)?openrec\.tv/live/(?P<id>[^/]+)' | |
72 | _TESTS = [{ | |
73 | 'url': 'https://www.openrec.tv/live/2p8v31qe4zy', | |
74 | 'only_matching': True, | |
75 | }, { | |
76 | 'url': 'https://www.openrec.tv/live/wez93eqvjzl', | |
77 | 'only_matching': True, | |
78 | }] | |
79 | ||
80 | def _real_extract(self, url): | |
81 | video_id = self._match_id(url) | |
6e6beffd | 82 | webpage = self._download_webpage(f'https://www.openrec.tv/live/{video_id}', video_id) |
b8eeced2 LTHD |
83 | |
84 | return self._extract_movie(webpage, video_id, 'live', True) | |
85 | ||
86 | ||
87 | class OpenRecCaptureIE(OpenRecBaseIE): | |
9997eee4 THD |
88 | IE_NAME = 'openrec:capture' |
89 | _VALID_URL = r'https?://(?:www\.)?openrec\.tv/capture/(?P<id>[^/]+)' | |
90 | _TESTS = [{ | |
91 | 'url': 'https://www.openrec.tv/capture/l9nk2x4gn14', | |
92 | 'only_matching': True, | |
93 | }, { | |
94 | 'url': 'https://www.openrec.tv/capture/mldjr82p7qk', | |
95 | 'info_dict': { | |
96 | 'id': 'mldjr82p7qk', | |
97 | 'title': 'たいじの恥ずかしい英語力', | |
98 | 'uploader': 'たいちゃんねる', | |
99 | 'uploader_id': 'Yaritaiji', | |
100 | 'upload_date': '20210803', | |
101 | }, | |
102 | }] | |
103 | ||
104 | def _real_extract(self, url): | |
105 | video_id = self._match_id(url) | |
6e6beffd | 106 | webpage = self._download_webpage(f'https://www.openrec.tv/capture/{video_id}', video_id) |
9997eee4 | 107 | |
b8eeced2 | 108 | window_stores = self._extract_pagestore(webpage, video_id) |
9997eee4 THD |
109 | movie_store = window_stores.get('movie') |
110 | ||
111 | capture_data = window_stores.get('capture') | |
112 | if not capture_data: | |
113 | raise ExtractorError('Cannot extract title') | |
9997eee4 | 114 | |
b8eeced2 | 115 | formats = self._extract_m3u8_formats( |
f591e605 | 116 | capture_data.get('source'), video_id, ext='mp4', headers=self._M3U8_HEADERS) |
9997eee4 THD |
117 | |
118 | return { | |
119 | 'id': video_id, | |
6e6beffd LNO |
120 | 'title': capture_data.get('title'), |
121 | 'thumbnail': capture_data.get('thumbnailUrl'), | |
9997eee4 | 122 | 'formats': formats, |
6e6beffd LNO |
123 | 'timestamp': unified_timestamp(traverse_obj(movie_store, 'createdAt', expected_type=compat_str)), |
124 | 'uploader': traverse_obj(movie_store, ('channel', 'name'), expected_type=compat_str), | |
125 | 'uploader_id': traverse_obj(movie_store, ('channel', 'id'), expected_type=compat_str), | |
126 | 'upload_date': unified_strdate(capture_data.get('createdAt')), | |
f591e605 | 127 | 'http_headers': self._M3U8_HEADERS, |
9997eee4 | 128 | } |
b8eeced2 LTHD |
129 | |
130 | ||
131 | class OpenRecMovieIE(OpenRecBaseIE): | |
132 | IE_NAME = 'openrec:movie' | |
133 | _VALID_URL = r'https?://(?:www\.)?openrec\.tv/movie/(?P<id>[^/]+)' | |
134 | _TESTS = [{ | |
135 | 'url': 'https://www.openrec.tv/movie/nqz5xl5km8v', | |
136 | 'info_dict': { | |
137 | 'id': 'nqz5xl5km8v', | |
138 | 'title': '限定コミュニティ(Discord)参加方法ご説明動画', | |
139 | 'description': 'md5:ebd563e5f5b060cda2f02bf26b14d87f', | |
140 | 'thumbnail': r're:https://.+', | |
141 | 'uploader': 'タイキとカズヒロ', | |
142 | 'uploader_id': 'taiki_to_kazuhiro', | |
143 | 'timestamp': 1638856800, | |
144 | }, | |
145 | }] | |
146 | ||
147 | def _real_extract(self, url): | |
148 | video_id = self._match_id(url) | |
6e6beffd | 149 | webpage = self._download_webpage(f'https://www.openrec.tv/movie/{video_id}', video_id) |
b8eeced2 LTHD |
150 | |
151 | return self._extract_movie(webpage, video_id, 'movie', False) |