]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/afreecatv.py
[ie/dropbox] Fix formats extraction (#9627)
[yt-dlp.git] / yt_dlp / extractor / afreecatv.py
CommitLineData
bd4073c5 1import functools
1dbfd787
PR
2import re
3
57cf9b7f 4from .common import InfoExtractor
57cf9b7f 5from ..utils import (
bd4073c5
HTL
6 ExtractorError,
7 OnDemandPagedList,
9073ae64 8 UserNotLive,
b2eeee0c 9 date_from_str,
6b9466de 10 determine_ext,
9073ae64 11 filter_dict,
57cf9b7f 12 int_or_none,
b2eeee0c 13 unified_strdate,
f76ca2dd 14 unified_timestamp,
3052a30d 15 url_or_none,
e51762be 16 urlencode_postdata,
833b644f 17 xpath_text,
57cf9b7f 18)
9073ae64 19from ..utils.traversal import traverse_obj
57cf9b7f
PR
20
21
9073ae64
DHH
22class AfreecaTVBaseIE(InfoExtractor):
23 _NETRC_MACHINE = 'afreecatv'
24
25 def _perform_login(self, username, password):
26 login_form = {
27 'szWork': 'login',
28 'szType': 'json',
29 'szUid': username,
30 'szPassword': password,
31 'isSaveId': 'false',
32 'szScriptVar': 'oLoginRet',
33 'szAction': '',
34 }
35
36 response = self._download_json(
37 'https://login.afreecatv.com/app/LoginAction.php', None,
38 'Logging in', data=urlencode_postdata(login_form))
39
40 _ERRORS = {
41 -4: 'Your account has been suspended due to a violation of our terms and policies.',
42 -5: 'https://member.afreecatv.com/app/user_delete_progress.php',
43 -6: 'https://login.afreecatv.com/membership/changeMember.php',
44 -8: "Hello! AfreecaTV here.\nThe username you have entered belongs to \n an account that requires a legal guardian's consent. \nIf you wish to use our services without restriction, \nplease make sure to go through the necessary verification process.",
45 -9: 'https://member.afreecatv.com/app/pop_login_block.php',
46 -11: 'https://login.afreecatv.com/afreeca/second_login.php',
47 -12: 'https://member.afreecatv.com/app/user_security.php',
48 0: 'The username does not exist or you have entered the wrong password.',
49 -1: 'The username does not exist or you have entered the wrong password.',
50 -3: 'You have entered your username/password incorrectly.',
51 -7: 'You cannot use your Global AfreecaTV account to access Korean AfreecaTV.',
52 -10: 'Sorry for the inconvenience. \nYour account has been blocked due to an unauthorized access. \nPlease contact our Help Center for assistance.',
53 -32008: 'You have failed to log in. Please contact our Help Center.',
54 }
55
56 result = int_or_none(response.get('RESULT'))
57 if result != 1:
58 error = _ERRORS.get(result, 'You have failed to log in.')
59 raise ExtractorError(
60 'Unable to login: %s said: %s' % (self.IE_NAME, error),
61 expected=True)
62
63
64class AfreecaTVIE(AfreecaTVBaseIE):
c60089c0 65 IE_NAME = 'afreecatv'
57cf9b7f 66 IE_DESC = 'afreecatv.com'
e58609b2
S
67 _VALID_URL = r'''(?x)
68 https?://
69 (?:
70 (?:(?:live|afbbs|www)\.)?afreeca(?:tv)?\.com(?::\d+)?
71 (?:
72 /app/(?:index|read_ucc_bbs)\.cgi|
73 /player/[Pp]layer\.(?:swf|html)
74 )\?.*?\bnTitleNo=|
028f6437 75 vod\.afreecatv\.com/(PLAYER/STATION|player)/
e58609b2
S
76 )
77 (?P<id>\d+)
78 '''
e51762be 79 _NETRC_MACHINE = 'afreecatv'
8d93c214 80 _TESTS = [{
57cf9b7f
PR
81 'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=',
82 'md5': 'f72c89fe7ecc14c1b5ce506c4996046e',
83 'info_dict': {
84 'id': '36164052',
85 'ext': 'mp4',
86 'title': '데일리 에이프릴 요정들의 시상식!',
3452c3a2 87 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
57cf9b7f
PR
88 'uploader': 'dailyapril',
89 'uploader_id': 'dailyapril',
8d93c214 90 'upload_date': '20160503',
51ef4919
YCH
91 },
92 'skip': 'Video is gone',
8d93c214
PR
93 }, {
94 'url': 'http://afbbs.afreecatv.com:8080/app/read_ucc_bbs.cgi?nStationNo=16711924&nTitleNo=36153164&szBjId=dailyapril&nBbsNo=18605867',
95 'info_dict': {
96 'id': '36153164',
97 'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
3452c3a2 98 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
8d93c214
PR
99 'uploader': 'dailyapril',
100 'uploader_id': 'dailyapril',
101 },
102 'playlist_count': 2,
103 'playlist': [{
104 'md5': 'd8b7c174568da61d774ef0203159bf97',
105 'info_dict': {
106 'id': '36153164_1',
107 'ext': 'mp4',
108 'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
109 'upload_date': '20160502',
110 },
111 }, {
112 'md5': '58f2ce7f6044e34439ab2d50612ab02b',
113 'info_dict': {
114 'id': '36153164_2',
115 'ext': 'mp4',
116 'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
117 'upload_date': '20160502',
118 },
119 }],
51ef4919 120 'skip': 'Video is gone',
e109f1ff
S
121 }, {
122 # non standard key
123 'url': 'http://vod.afreecatv.com/PLAYER/STATION/20515605',
124 'info_dict': {
125 'id': '20170411_BE689A0E_190960999_1_2_h',
126 'ext': 'mp4',
127 'title': '혼자사는여자집',
128 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
129 'uploader': '♥이슬이',
130 'uploader_id': 'dasl8121',
131 'upload_date': '20170411',
132 'duration': 213,
133 },
134 'params': {
135 'skip_download': True,
136 },
839728f5 137 }, {
fdd69db3
JH
138 # adult content
139 'url': 'https://vod.afreecatv.com/player/97267690',
839728f5 140 'info_dict': {
86693c49 141 'id': '20180327_27901457_202289533_1',
839728f5 142 'ext': 'mp4',
86693c49 143 'title': '[생]빨개요♥ (part 1)',
839728f5 144 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
86693c49 145 'uploader': '[SA]서아',
839728f5 146 'uploader_id': 'bjdyrksu',
86693c49
S
147 'upload_date': '20180327',
148 'duration': 3601,
839728f5
S
149 },
150 'params': {
151 'skip_download': True,
152 },
fdd69db3 153 'skip': 'The VOD does not exist',
3452c3a2
PR
154 }, {
155 'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
156 'only_matching': True,
e58609b2 157 }, {
fdd69db3
JH
158 'url': 'https://vod.afreecatv.com/player/96753363',
159 'info_dict': {
160 'id': '20230108_9FF5BEE1_244432674_1',
161 'ext': 'mp4',
162 'uploader_id': 'rlantnghks',
163 'uploader': '페이즈으',
164 'duration': 10840,
165 'thumbnail': 'http://videoimg.afreecatv.com/php/SnapshotLoad.php?rowKey=20230108_9FF5BEE1_244432674_1_r',
166 'upload_date': '20230108',
167 'title': '젠지 페이즈',
168 },
169 'params': {
170 'skip_download': True,
171 },
8d93c214 172 }]
57cf9b7f 173
1dbfd787
PR
174 @staticmethod
175 def parse_video_key(key):
0fdbe314 176 video_key = {}
1dbfd787
PR
177 m = re.match(r'^(?P<upload_date>\d{8})_\w+_(?P<part>\d+)$', key)
178 if m:
179 video_key['upload_date'] = m.group('upload_date')
6b9466de 180 video_key['part'] = int(m.group('part'))
1dbfd787
PR
181 return video_key
182
57cf9b7f
PR
183 def _real_extract(self, url):
184 video_id = self._match_id(url)
e58609b2 185
86693c49 186 partial_view = False
875cfb8c 187 adult_view = False
86693c49 188 for _ in range(2):
fdd69db3
JH
189 data = self._download_json(
190 'https://api.m.afreecatv.com/station/video/a/view',
191 video_id, headers={'Referer': url}, data=urlencode_postdata({
192 'nTitleNo': video_id,
193 'nApiLevel': 10,
194 }))['data']
195 if traverse_obj(data, ('code', {int})) == -6221:
196 raise ExtractorError('The VOD does not exist', expected=True)
86693c49 197 query = {
839728f5 198 'nTitleNo': video_id,
fdd69db3
JH
199 'nStationNo': data['station_no'],
200 'nBbsNo': data['bbs_no'],
86693c49
S
201 }
202 if partial_view:
203 query['partialView'] = 'SKIP_ADULT'
875cfb8c
LR
204 if adult_view:
205 query['adultView'] = 'ADULT_VIEW'
86693c49
S
206 video_xml = self._download_xml(
207 'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php',
208 video_id, 'Downloading video info XML%s'
209 % (' (skipping adult)' if partial_view else ''),
210 video_id, headers={
211 'Referer': url,
212 }, query=query)
839728f5 213
86693c49
S
214 flag = xpath_text(video_xml, './track/flag', 'flag', default=None)
215 if flag and flag == 'SUCCEED':
216 break
217 if flag == 'PARTIAL_ADULT':
6a39ee13 218 self.report_warning(
86693c49
S
219 'In accordance with local laws and regulations, underage users are restricted from watching adult content. '
220 'Only content suitable for all ages will be downloaded. '
221 'Provide account credentials if you wish to download restricted content.')
222 partial_view = True
223 continue
224 elif flag == 'ADULT':
875cfb8c
LR
225 if not adult_view:
226 adult_view = True
227 continue
86693c49
S
228 error = 'Only users older than 19 are able to watch this video. Provide account credentials to download this content.'
229 else:
230 error = flag
839728f5 231 raise ExtractorError(
86693c49
S
232 '%s said: %s' % (self.IE_NAME, error), expected=True)
233 else:
234 raise ExtractorError('Unable to download video info')
57cf9b7f 235
f9934b96 236 video_element = video_xml.findall('./track/video')[-1]
51ef4919 237 if video_element is None or video_element.text is None:
f9f10268 238 raise ExtractorError(
8bdd16b4 239 'Video %s does not exist' % video_id, expected=True)
e7d85c4e 240
6b9466de 241 video_url = video_element.text.strip()
51ef4919
YCH
242
243 title = xpath_text(video_xml, './track/title', 'title', fatal=True)
6b9466de 244
833b644f
PR
245 uploader = xpath_text(video_xml, './track/nickname', 'uploader')
246 uploader_id = xpath_text(video_xml, './track/bj_id', 'uploader id')
6b9466de
S
247 duration = int_or_none(xpath_text(
248 video_xml, './track/duration', 'duration'))
833b644f 249 thumbnail = xpath_text(video_xml, './track/titleImage', 'thumbnail')
57cf9b7f 250
6b9466de
S
251 common_entry = {
252 'uploader': uploader,
253 'uploader_id': uploader_id,
254 'thumbnail': thumbnail,
255 }
256
257 info = common_entry.copy()
258 info.update({
259 'id': video_id,
260 'title': title,
261 'duration': duration,
262 })
263
264 if not video_url:
265 entries = []
f9934b96 266 file_elements = video_element.findall('./file')
e109f1ff
S
267 one = len(file_elements) == 1
268 for file_num, file_element in enumerate(file_elements, start=1):
3052a30d 269 file_url = url_or_none(file_element.text)
6b9466de
S
270 if not file_url:
271 continue
e109f1ff 272 key = file_element.get('key', '')
b2eeee0c
LR
273 upload_date = unified_strdate(self._search_regex(
274 r'^(\d{8})_', key, 'upload date', default=None))
275 if upload_date is not None:
276 # sometimes the upload date isn't included in the file name
277 # instead, another random ID is, which may parse as a valid
278 # date but be wildly out of a reasonable range
279 parsed_date = date_from_str(upload_date)
280 if parsed_date.year < 2000 or parsed_date.year >= 2100:
281 upload_date = None
6b9466de 282 file_duration = int_or_none(file_element.get('duration'))
e109f1ff 283 format_id = key if key else '%s_%s' % (video_id, file_num)
4a109f81
S
284 if determine_ext(file_url) == 'm3u8':
285 formats = self._extract_m3u8_formats(
286 file_url, video_id, 'mp4', entry_protocol='m3u8_native',
287 m3u8_id='hls',
288 note='Downloading part %d m3u8 information' % file_num)
289 else:
290 formats = [{
291 'url': file_url,
292 'format_id': 'http',
293 }]
a06916d9 294 if not formats and not self.get_param('ignore_no_formats'):
4a109f81 295 continue
6b9466de
S
296 file_info = common_entry.copy()
297 file_info.update({
298 'id': format_id,
6b4ddd33 299 'title': title if one else '%s (part %d)' % (title, file_num),
e109f1ff 300 'upload_date': upload_date,
6b9466de
S
301 'duration': file_duration,
302 'formats': formats,
303 })
304 entries.append(file_info)
305 entries_info = info.copy()
306 entries_info.update({
307 '_type': 'multi_video',
308 'entries': entries,
309 })
310 return entries_info
311
312 info = {
57cf9b7f
PR
313 'id': video_id,
314 'title': title,
315 'uploader': uploader,
316 'uploader_id': uploader_id,
317 'duration': duration,
318 'thumbnail': thumbnail,
319 }
320
6b9466de
S
321 if determine_ext(video_url) == 'm3u8':
322 info['formats'] = self._extract_m3u8_formats(
323 video_url, video_id, 'mp4', entry_protocol='m3u8_native',
324 m3u8_id='hls')
325 else:
326 app, playpath = video_url.split('mp4:')
327 info.update({
328 'url': app,
329 'ext': 'flv',
330 'play_path': 'mp4:' + playpath,
331 'rtmp_live': True, # downloading won't end without this
332 })
333
334 return info
f76ca2dd
LR
335
336
9073ae64 337class AfreecaTVLiveIE(AfreecaTVBaseIE):
f76ca2dd 338 IE_NAME = 'afreecatv:live'
9073ae64 339 IE_DESC = 'afreecatv.com livestreams'
f76ca2dd
LR
340 _VALID_URL = r'https?://play\.afreeca(?:tv)?\.com/(?P<id>[^/]+)(?:/(?P<bno>\d+))?'
341 _TESTS = [{
342 'url': 'https://play.afreecatv.com/pyh3646/237852185',
343 'info_dict': {
344 'id': '237852185',
345 'ext': 'mp4',
346 'title': '【 우루과이 오늘은 무슨일이? 】',
347 'uploader': '박진우[JINU]',
348 'uploader_id': 'pyh3646',
349 'timestamp': 1640661495,
350 'is_live': True,
351 },
352 'skip': 'Livestream has ended',
353 }, {
9073ae64 354 'url': 'https://play.afreecatv.com/pyh3646/237852185',
f76ca2dd
LR
355 'only_matching': True,
356 }, {
9073ae64 357 'url': 'https://play.afreecatv.com/pyh3646',
f76ca2dd
LR
358 'only_matching': True,
359 }]
360
361 _LIVE_API_URL = 'https://live.afreecatv.com/afreeca/player_live_api.php'
362
f76ca2dd
LR
363 def _real_extract(self, url):
364 broadcaster_id, broadcast_no = self._match_valid_url(url).group('id', 'bno')
9073ae64
DHH
365 channel_info = traverse_obj(self._download_json(
366 self._LIVE_API_URL, broadcaster_id, data=urlencode_postdata({'bid': broadcaster_id})),
367 ('CHANNEL', {dict})) or {}
f76ca2dd 368
f76ca2dd
LR
369 broadcaster_id = channel_info.get('BJID') or broadcaster_id
370 broadcast_no = channel_info.get('BNO') or broadcast_no
371 if not broadcast_no:
9073ae64
DHH
372 raise UserNotLive(video_id=broadcaster_id)
373
374 password = self.get_param('videopassword')
375 if channel_info.get('BPWD') == 'Y' and password is None:
5dee3ad0
LR
376 raise ExtractorError(
377 'This livestream is protected by a password, use the --video-password option',
378 expected=True)
f76ca2dd 379
9073ae64
DHH
380 aid = self._download_json(
381 self._LIVE_API_URL, broadcast_no, 'Downloading access token for stream',
382 'Unable to download access token for stream', data=urlencode_postdata(filter_dict({
5dee3ad0
LR
383 'bno': broadcast_no,
384 'stream_type': 'common',
385 'type': 'aid',
9073ae64
DHH
386 'quality': 'master',
387 'pwd': password,
388 })))['CHANNEL']['AID']
389
390 stream_base_url = channel_info.get('RMD') or 'https://livestream-manager.afreecatv.com'
391 stream_info = self._download_json(f'{stream_base_url}/broad_stream_assign.html', broadcast_no, query={
392 # works: gs_cdn_pc_app, gs_cdn_mobile_web, gs_cdn_pc_web
393 'return_type': 'gs_cdn_pc_app',
394 'broad_key': f'{broadcast_no}-common-master-hls',
395 }, note='Downloading metadata for stream', errnote='Unable to download metadata for stream')
396
397 formats = self._extract_m3u8_formats(
398 stream_info['view_url'], broadcast_no, 'mp4', m3u8_id='hls',
399 query={'aid': aid}, headers={'Referer': url})
400
401 station_info = traverse_obj(self._download_json(
f76ca2dd 402 'https://st.afreecatv.com/api/get_station_status.php', broadcast_no,
9073ae64
DHH
403 'Downloading channel metadata', 'Unable to download channel metadata',
404 query={'szBjId': broadcaster_id}, fatal=False), {dict}) or {}
f76ca2dd
LR
405
406 return {
407 'id': broadcast_no,
408 'title': channel_info.get('TITLE') or station_info.get('station_title'),
409 'uploader': channel_info.get('BJNICK') or station_info.get('station_name'),
410 'uploader_id': broadcaster_id,
411 'timestamp': unified_timestamp(station_info.get('broad_start')),
412 'formats': formats,
413 'is_live': True,
9073ae64 414 'http_headers': {'Referer': url},
f76ca2dd 415 }
bd4073c5
HTL
416
417
418class AfreecaTVUserIE(InfoExtractor):
419 IE_NAME = 'afreecatv:user'
420 _VALID_URL = r'https?://bj\.afreeca(?:tv)?\.com/(?P<id>[^/]+)/vods/?(?P<slug_type>[^/]+)?'
421 _TESTS = [{
422 'url': 'https://bj.afreecatv.com/ryuryu24/vods/review',
423 'info_dict': {
424 '_type': 'playlist',
425 'id': 'ryuryu24',
426 'title': 'ryuryu24 - review',
427 },
428 'playlist_count': 218,
429 }, {
430 'url': 'https://bj.afreecatv.com/parang1995/vods/highlight',
431 'info_dict': {
432 '_type': 'playlist',
433 'id': 'parang1995',
434 'title': 'parang1995 - highlight',
435 },
436 'playlist_count': 997,
437 }, {
438 'url': 'https://bj.afreecatv.com/ryuryu24/vods',
439 'info_dict': {
440 '_type': 'playlist',
441 'id': 'ryuryu24',
442 'title': 'ryuryu24 - all',
443 },
444 'playlist_count': 221,
445 }, {
446 'url': 'https://bj.afreecatv.com/ryuryu24/vods/balloonclip',
447 'info_dict': {
448 '_type': 'playlist',
449 'id': 'ryuryu24',
450 'title': 'ryuryu24 - balloonclip',
451 },
452 'playlist_count': 0,
453 }]
454 _PER_PAGE = 60
455
456 def _fetch_page(self, user_id, user_type, page):
457 page += 1
458 info = self._download_json(f'https://bjapi.afreecatv.com/api/{user_id}/vods/{user_type}', user_id,
459 query={'page': page, 'per_page': self._PER_PAGE, 'orderby': 'reg_date'},
460 note=f'Downloading {user_type} video page {page}')
461 for item in info['data']:
462 yield self.url_result(
463 f'https://vod.afreecatv.com/player/{item["title_no"]}/', AfreecaTVIE, item['title_no'])
464
465 def _real_extract(self, url):
466 user_id, user_type = self._match_valid_url(url).group('id', 'slug_type')
467 user_type = user_type or 'all'
468 entries = OnDemandPagedList(functools.partial(self._fetch_page, user_id, user_type), self._PER_PAGE)
469 return self.playlist_result(entries, user_id, f'{user_id} - {user_type}')