]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/mbn.py
[ie/orf:on] Improve extraction (#9677)
[yt-dlp.git] / yt_dlp / extractor / mbn.py
1 import re
2
3 from .common import InfoExtractor
4 from ..utils import (
5 int_or_none,
6 unified_strdate,
7 url_or_none,
8 )
9 from ..utils.traversal import traverse_obj
10
11
12 class MBNIE(InfoExtractor):
13 IE_DESC = 'mbn.co.kr (매일방송)'
14 _VALID_URL = r'https?://(?:www\.)?mbn\.co\.kr/vod/programContents/preview(?:list)?/\d+/\d+/(?P<id>\d+)'
15 _TESTS = [{
16 'url': 'https://mbn.co.kr/vod/programContents/previewlist/861/5433/1276155',
17 'md5': '85e1694e5b247c04d1386b7e3c90fd76',
18 'info_dict': {
19 'id': '1276155',
20 'ext': 'mp4',
21 'title': '결국 사로잡힌 권유리, 그녀를 목숨 걸고 구하려는 정일우!',
22 'duration': 3891,
23 'release_date': '20210703',
24 'thumbnail': 'http://img.vod.mbn.co.kr/mbnvod2img/861/2021/07/03/20210703230811_20_861_1276155_360_7_0.jpg',
25 'series': '보쌈 - 운명을 훔치다',
26 'episode': 'Episode 19',
27 'episode_number': 19,
28 },
29 }, {
30 'url': 'https://www.mbn.co.kr/vod/programContents/previewlist/835/5294/1084744',
31 'md5': 'fc65d3aac85e85e0b5056f4ef99cde4a',
32 'info_dict': {
33 'id': '1084744',
34 'ext': 'mp4',
35 'title': '김정은♥최원영, 제자리를 찾은 위험한 부부! "결혼은 투쟁이면서, 어려운 방식이야.."',
36 'duration': 93,
37 'release_date': '20201124',
38 'thumbnail': 'http://img.vod.mbn.co.kr/mbnvod2img/835/2020/11/25/20201125000221_21_835_1084744_360_7_0.jpg',
39 'series': '나의 위험한 아내',
40 },
41 }, {
42 'url': 'https://www.mbn.co.kr/vod/programContents/preview/952/6088/1054797?next=1',
43 'md5': 'c711103c72aeac8323a5cf1751f10097',
44 'info_dict': {
45 'id': '1054797',
46 'ext': 'mp4',
47 'title': '[2차 티저] MBN 주말 미니시리즈 <완벽한 결혼의 정석> l 그녀에게 주어진 두 번째 인생',
48 'duration': 65,
49 'release_date': '20231028',
50 'thumbnail': 'http://img.vod.mbn.co.kr/vod2/952/2023/09/11/20230911130223_22_952_1054797_1080_7.jpg',
51 'series': '완벽한 결혼의 정석',
52 },
53 }]
54
55 def _real_extract(self, url):
56 content_id = self._match_id(url)
57 webpage = self._download_webpage(url, content_id)
58
59 content_cls_cd = self._search_regex(
60 r'"\?content_cls_cd=(\d+)&', webpage, 'content cls cd', fatal=False) or '20'
61 media_info = self._download_json(
62 'https://www.mbn.co.kr/player/mbnVodPlayer_2020.mbn', content_id,
63 note='Fetching playback data', query={
64 'content_cls_cd': content_cls_cd,
65 'content_id': content_id,
66 'relay_type': '1',
67 })
68
69 formats = []
70 for stream_url in traverse_obj(media_info, ('movie_list', ..., 'url', {url_or_none})):
71 stream_url = re.sub(r'/(?:chunk|play)list(?:_pd\d+)?\.m3u8', '/manifest.m3u8', stream_url)
72 final_url = url_or_none(self._download_webpage(
73 f'https://www.mbn.co.kr/player/mbnStreamAuth_new_vod.mbn?vod_url={stream_url}',
74 content_id, note='Fetching authenticated m3u8 url'))
75
76 formats.extend(self._extract_m3u8_formats(final_url, content_id, fatal=False))
77
78 return {
79 'id': content_id,
80 **traverse_obj(media_info, {
81 'title': ('movie_title', {str}),
82 'duration': ('play_sec', {int_or_none}),
83 'release_date': ('bcast_date', {lambda x: x.replace('.', '')}, {unified_strdate}),
84 'thumbnail': ('movie_start_Img', {url_or_none}),
85 'series': ('prog_nm', {str}),
86 'episode_number': ('ad_contentnumber', {int_or_none}),
87 }),
88 'formats': formats,
89 }