]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/mixch.py
[ie/mixch] Extract comments (#9860)
[yt-dlp.git] / yt_dlp / extractor / mixch.py
1 from .common import InfoExtractor
2 from ..networking.exceptions import HTTPError
3 from ..utils import (
4 ExtractorError,
5 UserNotLive,
6 int_or_none,
7 str_or_none,
8 url_or_none,
9 )
10 from ..utils.traversal import traverse_obj
11
12
13 class MixchIE(InfoExtractor):
14 IE_NAME = 'mixch'
15 _VALID_URL = r'https?://(?:www\.)?mixch\.tv/u/(?P<id>\d+)'
16
17 _TESTS = [{
18 'url': 'https://mixch.tv/u/16943797/live',
19 'skip': 'don\'t know if this live persists',
20 'info_dict': {
21 'id': '16943797',
22 'ext': 'mp4',
23 'title': '#EntView #カリナ #セブチ 2024-05-05 06:58',
24 'comment_count': int,
25 'view_count': int,
26 'timestamp': 1714726805,
27 'uploader': 'Ent.View K-news🎶💕',
28 'uploader_id': '16943797',
29 'live_status': 'is_live',
30 'upload_date': '20240503',
31 },
32 }, {
33 'url': 'https://mixch.tv/u/16137876/live',
34 'only_matching': True,
35 }]
36
37 def _real_extract(self, url):
38 video_id = self._match_id(url)
39 data = self._download_json(f'https://mixch.tv/api-web/users/{video_id}/live', video_id)
40 if not traverse_obj(data, ('liveInfo', {dict})):
41 raise UserNotLive(video_id=video_id)
42
43 return {
44 'id': video_id,
45 'uploader_id': video_id,
46 **traverse_obj(data, {
47 'title': ('liveInfo', 'title', {str}),
48 'comment_count': ('liveInfo', 'comments', {int_or_none}),
49 'view_count': ('liveInfo', 'visitor', {int_or_none}),
50 'timestamp': ('liveInfo', 'created', {int_or_none}),
51 'uploader': ('broadcasterInfo', 'name', {str}),
52 }),
53 'formats': [{
54 'format_id': 'hls',
55 'url': data['liveInfo']['hls'],
56 'ext': 'mp4',
57 'protocol': 'm3u8',
58 }],
59 'is_live': True,
60 '__post_extractor': self.extract_comments(video_id),
61 }
62
63 def _get_comments(self, video_id):
64 yield from traverse_obj(self._download_json(
65 f'https://mixch.tv/api-web/lives/{video_id}/messages', video_id,
66 note='Downloading comments', errnote='Failed to download comments'), (..., {
67 'author': ('name', {str}),
68 'author_id': ('user_id', {str_or_none}),
69 'id': ('message_id', {str}, {lambda x: x or None}),
70 'text': ('body', {str}),
71 'timestamp': ('created', {int}),
72 }))
73
74
75 class MixchArchiveIE(InfoExtractor):
76 IE_NAME = 'mixch:archive'
77 _VALID_URL = r'https?://(?:www\.)?mixch\.tv/archive/(?P<id>\d+)'
78
79 _TESTS = [{
80 'url': 'https://mixch.tv/archive/421',
81 'skip': 'paid video, no DRM. expires at Jan 23',
82 'info_dict': {
83 'id': '421',
84 'ext': 'mp4',
85 'title': '96NEKO SHOW TIME',
86 }
87 }, {
88 'url': 'https://mixch.tv/archive/1213',
89 'skip': 'paid video, no DRM. expires at Dec 31, 2023',
90 'info_dict': {
91 'id': '1213',
92 'ext': 'mp4',
93 'title': '【特別トーク番組アーカイブス】Merm4id×燐舞曲 2nd LIVE「VERSUS」',
94 'release_date': '20231201',
95 'thumbnail': str,
96 }
97 }, {
98 'url': 'https://mixch.tv/archive/1214',
99 'only_matching': True,
100 }]
101
102 def _real_extract(self, url):
103 video_id = self._match_id(url)
104
105 try:
106 info_json = self._download_json(
107 f'https://mixch.tv/api-web/archive/{video_id}', video_id)['archive']
108 except ExtractorError as e:
109 if isinstance(e.cause, HTTPError) and e.cause.status == 401:
110 self.raise_login_required()
111 raise
112
113 return {
114 'id': video_id,
115 'title': traverse_obj(info_json, ('title', {str})),
116 'formats': self._extract_m3u8_formats(info_json['archiveURL'], video_id),
117 'thumbnail': traverse_obj(info_json, ('thumbnailURL', {url_or_none})),
118 }