]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/fourzerostudio.py
[ie/box] Fix formats extraction (#8649)
[yt-dlp.git] / yt_dlp / extractor / fourzerostudio.py
CommitLineData
0bea4fd8 1from .common import InfoExtractor
8072ef2b 2from ..utils import traverse_obj, unified_timestamp
0bea4fd8
L
3
4
5class FourZeroStudioArchiveIE(InfoExtractor):
6 _VALID_URL = r'https?://0000\.studio/(?P<uploader_id>[^/]+)/broadcasts/(?P<id>[^/]+)/archive'
7 IE_NAME = '0000studio:archive'
8 _TESTS = [{
9 'url': 'https://0000.studio/mumeijiten/broadcasts/1290f433-fce0-4909-a24a-5f7df09665dc/archive',
10 'info_dict': {
11 'id': '1290f433-fce0-4909-a24a-5f7df09665dc',
12 'title': 'noteで『canape』様へのファンレターを執筆します。(数秘術その2)',
13 'timestamp': 1653802534,
14 'release_timestamp': 1653796604,
15 'thumbnails': 'count:1',
16 'comments': 'count:7',
17 'uploader': '『中崎雄心』の執務室。',
18 'uploader_id': 'mumeijiten',
19 }
20 }]
21
22 def _real_extract(self, url):
23 video_id, uploader_id = self._match_valid_url(url).group('id', 'uploader_id')
24 webpage = self._download_webpage(url, video_id)
8072ef2b 25 nuxt_data = self._search_nuxt_data(webpage, video_id, traverse=None)
0bea4fd8
L
26
27 pcb = traverse_obj(nuxt_data, ('ssrRefs', lambda _, v: v['__typename'] == 'PublicCreatorBroadcast'), get_all=False)
28 uploader_internal_id = traverse_obj(nuxt_data, (
29 'ssrRefs', lambda _, v: v['__typename'] == 'PublicUser', 'id'), get_all=False)
30
31 formats, subs = self._extract_m3u8_formats_and_subtitles(pcb['archiveUrl'], video_id, ext='mp4')
0bea4fd8
L
32
33 return {
34 'id': video_id,
35 'title': pcb.get('title'),
36 'age_limit': 18 if pcb.get('isAdult') else None,
37 'timestamp': unified_timestamp(pcb.get('finishTime')),
38 'release_timestamp': unified_timestamp(pcb.get('createdAt')),
39 'thumbnails': [{
40 'url': pcb['thumbnailUrl'],
41 'ext': 'png',
42 }] if pcb.get('thumbnailUrl') else None,
43 'formats': formats,
44 'subtitles': subs,
45 'comments': [{
46 'author': c.get('username'),
47 'author_id': c.get('postedUserId'),
48 'author_thumbnail': c.get('userThumbnailUrl'),
49 'id': c.get('id'),
50 'text': c.get('body'),
51 'timestamp': unified_timestamp(c.get('createdAt')),
52 'like_count': c.get('likeCount'),
53 'is_favorited': c.get('isLikedByOwner'),
54 'author_is_uploader': c.get('postedUserId') == uploader_internal_id,
55 } for c in traverse_obj(nuxt_data, (
56 'ssrRefs', ..., lambda _, v: v['__typename'] == 'PublicCreatorBroadcastComment')) or []],
57 'uploader_id': uploader_id,
58 'uploader': traverse_obj(nuxt_data, (
59 'ssrRefs', lambda _, v: v['__typename'] == 'PublicUser', 'username'), get_all=False),
60 }
61
62
63class FourZeroStudioClipIE(InfoExtractor):
64 _VALID_URL = r'https?://0000\.studio/(?P<uploader_id>[^/]+)/archive-clip/(?P<id>[^/]+)'
65 IE_NAME = '0000studio:clip'
66 _TESTS = [{
67 'url': 'https://0000.studio/soeji/archive-clip/e46b0278-24cd-40a8-92e1-b8fc2b21f34f',
68 'info_dict': {
69 'id': 'e46b0278-24cd-40a8-92e1-b8fc2b21f34f',
70 'title': 'わたベーさんからイラスト差し入れいただきました。ありがとうございました!',
71 'timestamp': 1652109105,
72 'like_count': 1,
73 'uploader': 'ソエジマケイタ',
74 'uploader_id': 'soeji',
75 }
76 }]
77
78 def _real_extract(self, url):
79 video_id, uploader_id = self._match_valid_url(url).group('id', 'uploader_id')
80 webpage = self._download_webpage(url, video_id)
8072ef2b 81 nuxt_data = self._search_nuxt_data(webpage, video_id, traverse=None)
0bea4fd8
L
82
83 clip_info = traverse_obj(nuxt_data, ('ssrRefs', lambda _, v: v['__typename'] == 'PublicCreatorArchivedClip'), get_all=False)
84
85 info = next((
86 m for m in self._parse_html5_media_entries(url, webpage, video_id)
87 if 'mp4' in traverse_obj(m, ('formats', ..., 'ext'))
88 ), None)
89 if not info:
90 self.report_warning('Failed to find a desired media element. Falling back to using NUXT data.')
91 info = {
92 'formats': [{
93 'ext': 'mp4',
94 'url': url,
95 } for url in clip_info.get('mediaFiles') or [] if url],
96 }
97 return {
98 **info,
99 'id': video_id,
100 'title': clip_info.get('clipComment'),
101 'timestamp': unified_timestamp(clip_info.get('createdAt')),
102 'like_count': clip_info.get('likeCount'),
103 'uploader_id': uploader_id,
104 'uploader': traverse_obj(nuxt_data, (
105 'ssrRefs', lambda _, v: v['__typename'] == 'PublicUser', 'username'), get_all=False),
106 }