]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/afreecatv.py
[cleanup] Mark some compat variables for removal (#2173)
[yt-dlp.git] / yt_dlp / extractor / afreecatv.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import functools
5 import re
6
7 from .common import InfoExtractor
8 from ..utils import (
9 ExtractorError,
10 OnDemandPagedList,
11 date_from_str,
12 determine_ext,
13 int_or_none,
14 qualities,
15 traverse_obj,
16 unified_strdate,
17 unified_timestamp,
18 update_url_query,
19 url_or_none,
20 urlencode_postdata,
21 xpath_text,
22 )
23
24
25 class AfreecaTVIE(InfoExtractor):
26 IE_NAME = 'afreecatv'
27 IE_DESC = 'afreecatv.com'
28 _VALID_URL = r'''(?x)
29 https?://
30 (?:
31 (?:(?:live|afbbs|www)\.)?afreeca(?:tv)?\.com(?::\d+)?
32 (?:
33 /app/(?:index|read_ucc_bbs)\.cgi|
34 /player/[Pp]layer\.(?:swf|html)
35 )\?.*?\bnTitleNo=|
36 vod\.afreecatv\.com/(PLAYER/STATION|player)/
37 )
38 (?P<id>\d+)
39 '''
40 _NETRC_MACHINE = 'afreecatv'
41 _TESTS = [{
42 'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=',
43 'md5': 'f72c89fe7ecc14c1b5ce506c4996046e',
44 'info_dict': {
45 'id': '36164052',
46 'ext': 'mp4',
47 'title': '데일리 에이프릴 요정들의 시상식!',
48 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
49 'uploader': 'dailyapril',
50 'uploader_id': 'dailyapril',
51 'upload_date': '20160503',
52 },
53 'skip': 'Video is gone',
54 }, {
55 'url': 'http://afbbs.afreecatv.com:8080/app/read_ucc_bbs.cgi?nStationNo=16711924&nTitleNo=36153164&szBjId=dailyapril&nBbsNo=18605867',
56 'info_dict': {
57 'id': '36153164',
58 'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
59 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
60 'uploader': 'dailyapril',
61 'uploader_id': 'dailyapril',
62 },
63 'playlist_count': 2,
64 'playlist': [{
65 'md5': 'd8b7c174568da61d774ef0203159bf97',
66 'info_dict': {
67 'id': '36153164_1',
68 'ext': 'mp4',
69 'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
70 'upload_date': '20160502',
71 },
72 }, {
73 'md5': '58f2ce7f6044e34439ab2d50612ab02b',
74 'info_dict': {
75 'id': '36153164_2',
76 'ext': 'mp4',
77 'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
78 'upload_date': '20160502',
79 },
80 }],
81 'skip': 'Video is gone',
82 }, {
83 'url': 'http://vod.afreecatv.com/PLAYER/STATION/18650793',
84 'info_dict': {
85 'id': '18650793',
86 'ext': 'mp4',
87 'title': '오늘은 다르다! 쏘님의 우월한 위아래~ 댄스리액션!',
88 'thumbnail': r're:^https?://.*\.jpg$',
89 'uploader': '윈아디',
90 'uploader_id': 'badkids',
91 'duration': 107,
92 },
93 'params': {
94 'skip_download': True,
95 },
96 }, {
97 'url': 'http://vod.afreecatv.com/PLAYER/STATION/10481652',
98 'info_dict': {
99 'id': '10481652',
100 'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
101 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
102 'uploader': 'dailyapril',
103 'uploader_id': 'dailyapril',
104 'duration': 6492,
105 },
106 'playlist_count': 2,
107 'playlist': [{
108 'md5': 'd8b7c174568da61d774ef0203159bf97',
109 'info_dict': {
110 'id': '20160502_c4c62b9d_174361386_1',
111 'ext': 'mp4',
112 'title': "BJ유트루와 함께하는 '팅커벨 메이크업!' (part 1)",
113 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
114 'uploader': 'dailyapril',
115 'uploader_id': 'dailyapril',
116 'upload_date': '20160502',
117 'duration': 3601,
118 },
119 }, {
120 'md5': '58f2ce7f6044e34439ab2d50612ab02b',
121 'info_dict': {
122 'id': '20160502_39e739bb_174361386_2',
123 'ext': 'mp4',
124 'title': "BJ유트루와 함께하는 '팅커벨 메이크업!' (part 2)",
125 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
126 'uploader': 'dailyapril',
127 'uploader_id': 'dailyapril',
128 'upload_date': '20160502',
129 'duration': 2891,
130 },
131 }],
132 'params': {
133 'skip_download': True,
134 },
135 }, {
136 # non standard key
137 'url': 'http://vod.afreecatv.com/PLAYER/STATION/20515605',
138 'info_dict': {
139 'id': '20170411_BE689A0E_190960999_1_2_h',
140 'ext': 'mp4',
141 'title': '혼자사는여자집',
142 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
143 'uploader': '♥이슬이',
144 'uploader_id': 'dasl8121',
145 'upload_date': '20170411',
146 'duration': 213,
147 },
148 'params': {
149 'skip_download': True,
150 },
151 }, {
152 # PARTIAL_ADULT
153 'url': 'http://vod.afreecatv.com/PLAYER/STATION/32028439',
154 'info_dict': {
155 'id': '20180327_27901457_202289533_1',
156 'ext': 'mp4',
157 'title': '[생]빨개요♥ (part 1)',
158 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
159 'uploader': '[SA]서아',
160 'uploader_id': 'bjdyrksu',
161 'upload_date': '20180327',
162 'duration': 3601,
163 },
164 'params': {
165 'skip_download': True,
166 },
167 'expected_warnings': ['adult content'],
168 }, {
169 'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
170 'only_matching': True,
171 }, {
172 'url': 'http://vod.afreecatv.com/PLAYER/STATION/15055030',
173 'only_matching': True,
174 }, {
175 'url': 'http://vod.afreecatv.com/player/15055030',
176 'only_matching': True,
177 }]
178
179 @staticmethod
180 def parse_video_key(key):
181 video_key = {}
182 m = re.match(r'^(?P<upload_date>\d{8})_\w+_(?P<part>\d+)$', key)
183 if m:
184 video_key['upload_date'] = m.group('upload_date')
185 video_key['part'] = int(m.group('part'))
186 return video_key
187
188 def _perform_login(self, username, password):
189 login_form = {
190 'szWork': 'login',
191 'szType': 'json',
192 'szUid': username,
193 'szPassword': password,
194 'isSaveId': 'false',
195 'szScriptVar': 'oLoginRet',
196 'szAction': '',
197 }
198
199 response = self._download_json(
200 'https://login.afreecatv.com/app/LoginAction.php', None,
201 'Logging in', data=urlencode_postdata(login_form))
202
203 _ERRORS = {
204 -4: 'Your account has been suspended due to a violation of our terms and policies.',
205 -5: 'https://member.afreecatv.com/app/user_delete_progress.php',
206 -6: 'https://login.afreecatv.com/membership/changeMember.php',
207 -8: "Hello! AfreecaTV here.\nThe username you have entered belongs to \n an account that requires a legal guardian's consent. \nIf you wish to use our services without restriction, \nplease make sure to go through the necessary verification process.",
208 -9: 'https://member.afreecatv.com/app/pop_login_block.php',
209 -11: 'https://login.afreecatv.com/afreeca/second_login.php',
210 -12: 'https://member.afreecatv.com/app/user_security.php',
211 0: 'The username does not exist or you have entered the wrong password.',
212 -1: 'The username does not exist or you have entered the wrong password.',
213 -3: 'You have entered your username/password incorrectly.',
214 -7: 'You cannot use your Global AfreecaTV account to access Korean AfreecaTV.',
215 -10: 'Sorry for the inconvenience. \nYour account has been blocked due to an unauthorized access. \nPlease contact our Help Center for assistance.',
216 -32008: 'You have failed to log in. Please contact our Help Center.',
217 }
218
219 result = int_or_none(response.get('RESULT'))
220 if result != 1:
221 error = _ERRORS.get(result, 'You have failed to log in.')
222 raise ExtractorError(
223 'Unable to login: %s said: %s' % (self.IE_NAME, error),
224 expected=True)
225
226 def _real_extract(self, url):
227 video_id = self._match_id(url)
228
229 webpage = self._download_webpage(url, video_id)
230
231 if re.search(r'alert\(["\']This video has been deleted', webpage):
232 raise ExtractorError(
233 'Video %s has been deleted' % video_id, expected=True)
234
235 station_id = self._search_regex(
236 r'nStationNo\s*=\s*(\d+)', webpage, 'station')
237 bbs_id = self._search_regex(
238 r'nBbsNo\s*=\s*(\d+)', webpage, 'bbs')
239 video_id = self._search_regex(
240 r'nTitleNo\s*=\s*(\d+)', webpage, 'title', default=video_id)
241
242 partial_view = False
243 adult_view = False
244 for _ in range(2):
245 query = {
246 'nTitleNo': video_id,
247 'nStationNo': station_id,
248 'nBbsNo': bbs_id,
249 }
250 if partial_view:
251 query['partialView'] = 'SKIP_ADULT'
252 if adult_view:
253 query['adultView'] = 'ADULT_VIEW'
254 video_xml = self._download_xml(
255 'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php',
256 video_id, 'Downloading video info XML%s'
257 % (' (skipping adult)' if partial_view else ''),
258 video_id, headers={
259 'Referer': url,
260 }, query=query)
261
262 flag = xpath_text(video_xml, './track/flag', 'flag', default=None)
263 if flag and flag == 'SUCCEED':
264 break
265 if flag == 'PARTIAL_ADULT':
266 self.report_warning(
267 'In accordance with local laws and regulations, underage users are restricted from watching adult content. '
268 'Only content suitable for all ages will be downloaded. '
269 'Provide account credentials if you wish to download restricted content.')
270 partial_view = True
271 continue
272 elif flag == 'ADULT':
273 if not adult_view:
274 adult_view = True
275 continue
276 error = 'Only users older than 19 are able to watch this video. Provide account credentials to download this content.'
277 else:
278 error = flag
279 raise ExtractorError(
280 '%s said: %s' % (self.IE_NAME, error), expected=True)
281 else:
282 raise ExtractorError('Unable to download video info')
283
284 video_element = video_xml.findall('./track/video')[-1]
285 if video_element is None or video_element.text is None:
286 raise ExtractorError(
287 'Video %s does not exist' % video_id, expected=True)
288
289 video_url = video_element.text.strip()
290
291 title = xpath_text(video_xml, './track/title', 'title', fatal=True)
292
293 uploader = xpath_text(video_xml, './track/nickname', 'uploader')
294 uploader_id = xpath_text(video_xml, './track/bj_id', 'uploader id')
295 duration = int_or_none(xpath_text(
296 video_xml, './track/duration', 'duration'))
297 thumbnail = xpath_text(video_xml, './track/titleImage', 'thumbnail')
298
299 common_entry = {
300 'uploader': uploader,
301 'uploader_id': uploader_id,
302 'thumbnail': thumbnail,
303 }
304
305 info = common_entry.copy()
306 info.update({
307 'id': video_id,
308 'title': title,
309 'duration': duration,
310 })
311
312 if not video_url:
313 entries = []
314 file_elements = video_element.findall('./file')
315 one = len(file_elements) == 1
316 for file_num, file_element in enumerate(file_elements, start=1):
317 file_url = url_or_none(file_element.text)
318 if not file_url:
319 continue
320 key = file_element.get('key', '')
321 upload_date = unified_strdate(self._search_regex(
322 r'^(\d{8})_', key, 'upload date', default=None))
323 if upload_date is not None:
324 # sometimes the upload date isn't included in the file name
325 # instead, another random ID is, which may parse as a valid
326 # date but be wildly out of a reasonable range
327 parsed_date = date_from_str(upload_date)
328 if parsed_date.year < 2000 or parsed_date.year >= 2100:
329 upload_date = None
330 file_duration = int_or_none(file_element.get('duration'))
331 format_id = key if key else '%s_%s' % (video_id, file_num)
332 if determine_ext(file_url) == 'm3u8':
333 formats = self._extract_m3u8_formats(
334 file_url, video_id, 'mp4', entry_protocol='m3u8_native',
335 m3u8_id='hls',
336 note='Downloading part %d m3u8 information' % file_num)
337 else:
338 formats = [{
339 'url': file_url,
340 'format_id': 'http',
341 }]
342 if not formats and not self.get_param('ignore_no_formats'):
343 continue
344 self._sort_formats(formats)
345 file_info = common_entry.copy()
346 file_info.update({
347 'id': format_id,
348 'title': title if one else '%s (part %d)' % (title, file_num),
349 'upload_date': upload_date,
350 'duration': file_duration,
351 'formats': formats,
352 })
353 entries.append(file_info)
354 entries_info = info.copy()
355 entries_info.update({
356 '_type': 'multi_video',
357 'entries': entries,
358 })
359 return entries_info
360
361 info = {
362 'id': video_id,
363 'title': title,
364 'uploader': uploader,
365 'uploader_id': uploader_id,
366 'duration': duration,
367 'thumbnail': thumbnail,
368 }
369
370 if determine_ext(video_url) == 'm3u8':
371 info['formats'] = self._extract_m3u8_formats(
372 video_url, video_id, 'mp4', entry_protocol='m3u8_native',
373 m3u8_id='hls')
374 else:
375 app, playpath = video_url.split('mp4:')
376 info.update({
377 'url': app,
378 'ext': 'flv',
379 'play_path': 'mp4:' + playpath,
380 'rtmp_live': True, # downloading won't end without this
381 })
382
383 return info
384
385
386 class AfreecaTVLiveIE(AfreecaTVIE):
387
388 IE_NAME = 'afreecatv:live'
389 _VALID_URL = r'https?://play\.afreeca(?:tv)?\.com/(?P<id>[^/]+)(?:/(?P<bno>\d+))?'
390 _TESTS = [{
391 'url': 'https://play.afreecatv.com/pyh3646/237852185',
392 'info_dict': {
393 'id': '237852185',
394 'ext': 'mp4',
395 'title': '【 우루과이 오늘은 무슨일이? 】',
396 'uploader': '박진우[JINU]',
397 'uploader_id': 'pyh3646',
398 'timestamp': 1640661495,
399 'is_live': True,
400 },
401 'skip': 'Livestream has ended',
402 }, {
403 'url': 'http://play.afreeca.com/pyh3646/237852185',
404 'only_matching': True,
405 }, {
406 'url': 'http://play.afreeca.com/pyh3646',
407 'only_matching': True,
408 }]
409
410 _LIVE_API_URL = 'https://live.afreecatv.com/afreeca/player_live_api.php'
411
412 _QUALITIES = ('sd', 'hd', 'hd2k', 'original')
413
414 def _real_extract(self, url):
415 broadcaster_id, broadcast_no = self._match_valid_url(url).group('id', 'bno')
416 password = self.get_param('videopassword')
417
418 info = self._download_json(self._LIVE_API_URL, broadcaster_id, fatal=False,
419 data=urlencode_postdata({'bid': broadcaster_id})) or {}
420 channel_info = info.get('CHANNEL') or {}
421 broadcaster_id = channel_info.get('BJID') or broadcaster_id
422 broadcast_no = channel_info.get('BNO') or broadcast_no
423 password_protected = channel_info.get('BPWD')
424 if not broadcast_no:
425 raise ExtractorError(f'Unable to extract broadcast number ({broadcaster_id} may not be live)', expected=True)
426 if password_protected == 'Y' and password is None:
427 raise ExtractorError(
428 'This livestream is protected by a password, use the --video-password option',
429 expected=True)
430
431 formats = []
432 quality_key = qualities(self._QUALITIES)
433 for quality_str in self._QUALITIES:
434 params = {
435 'bno': broadcast_no,
436 'stream_type': 'common',
437 'type': 'aid',
438 'quality': quality_str,
439 }
440 if password is not None:
441 params['pwd'] = password
442 aid_response = self._download_json(
443 self._LIVE_API_URL, broadcast_no, fatal=False,
444 data=urlencode_postdata(params),
445 note=f'Downloading access token for {quality_str} stream',
446 errnote=f'Unable to download access token for {quality_str} stream')
447 aid = traverse_obj(aid_response, ('CHANNEL', 'AID'))
448 if not aid:
449 continue
450
451 stream_base_url = channel_info.get('RMD') or 'https://livestream-manager.afreecatv.com'
452 stream_info = self._download_json(
453 f'{stream_base_url}/broad_stream_assign.html', broadcast_no, fatal=False,
454 query={
455 'return_type': channel_info.get('CDN', 'gcp_cdn'),
456 'broad_key': f'{broadcast_no}-common-{quality_str}-hls',
457 },
458 note=f'Downloading metadata for {quality_str} stream',
459 errnote=f'Unable to download metadata for {quality_str} stream') or {}
460
461 if stream_info.get('view_url'):
462 formats.append({
463 'format_id': quality_str,
464 'url': update_url_query(stream_info['view_url'], {'aid': aid}),
465 'ext': 'mp4',
466 'protocol': 'm3u8',
467 'quality': quality_key(quality_str),
468 })
469
470 self._sort_formats(formats)
471
472 station_info = self._download_json(
473 'https://st.afreecatv.com/api/get_station_status.php', broadcast_no,
474 query={'szBjId': broadcaster_id}, fatal=False,
475 note='Downloading channel metadata', errnote='Unable to download channel metadata') or {}
476
477 return {
478 'id': broadcast_no,
479 'title': channel_info.get('TITLE') or station_info.get('station_title'),
480 'uploader': channel_info.get('BJNICK') or station_info.get('station_name'),
481 'uploader_id': broadcaster_id,
482 'timestamp': unified_timestamp(station_info.get('broad_start')),
483 'formats': formats,
484 'is_live': True,
485 }
486
487
488 class AfreecaTVUserIE(InfoExtractor):
489 IE_NAME = 'afreecatv:user'
490 _VALID_URL = r'https?://bj\.afreeca(?:tv)?\.com/(?P<id>[^/]+)/vods/?(?P<slug_type>[^/]+)?'
491 _TESTS = [{
492 'url': 'https://bj.afreecatv.com/ryuryu24/vods/review',
493 'info_dict': {
494 '_type': 'playlist',
495 'id': 'ryuryu24',
496 'title': 'ryuryu24 - review',
497 },
498 'playlist_count': 218,
499 }, {
500 'url': 'https://bj.afreecatv.com/parang1995/vods/highlight',
501 'info_dict': {
502 '_type': 'playlist',
503 'id': 'parang1995',
504 'title': 'parang1995 - highlight',
505 },
506 'playlist_count': 997,
507 }, {
508 'url': 'https://bj.afreecatv.com/ryuryu24/vods',
509 'info_dict': {
510 '_type': 'playlist',
511 'id': 'ryuryu24',
512 'title': 'ryuryu24 - all',
513 },
514 'playlist_count': 221,
515 }, {
516 'url': 'https://bj.afreecatv.com/ryuryu24/vods/balloonclip',
517 'info_dict': {
518 '_type': 'playlist',
519 'id': 'ryuryu24',
520 'title': 'ryuryu24 - balloonclip',
521 },
522 'playlist_count': 0,
523 }]
524 _PER_PAGE = 60
525
526 def _fetch_page(self, user_id, user_type, page):
527 page += 1
528 info = self._download_json(f'https://bjapi.afreecatv.com/api/{user_id}/vods/{user_type}', user_id,
529 query={'page': page, 'per_page': self._PER_PAGE, 'orderby': 'reg_date'},
530 note=f'Downloading {user_type} video page {page}')
531 for item in info['data']:
532 yield self.url_result(
533 f'https://vod.afreecatv.com/player/{item["title_no"]}/', AfreecaTVIE, item['title_no'])
534
535 def _real_extract(self, url):
536 user_id, user_type = self._match_valid_url(url).group('id', 'slug_type')
537 user_type = user_type or 'all'
538 entries = OnDemandPagedList(functools.partial(self._fetch_page, user_id, user_type), self._PER_PAGE)
539 return self.playlist_result(entries, user_id, f'{user_id} - {user_type}')