]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/sbscokr.py
[cleanup] Add more ruff rules (#10149)
[yt-dlp.git] / yt_dlp / extractor / sbscokr.py
1 from .common import InfoExtractor
2 from ..utils import (
3 clean_html,
4 int_or_none,
5 parse_iso8601,
6 parse_resolution,
7 url_or_none,
8 )
9 from ..utils.traversal import traverse_obj
10
11
12 class SBSCoKrIE(InfoExtractor):
13 IE_NAME = 'sbs.co.kr'
14 _VALID_URL = [r'https?://allvod\.sbs\.co\.kr/allvod/vod(?:Package)?EndPage\.do\?(?:[^#]+&)?mdaId=(?P<id>\d+)',
15 r'https?://programs\.sbs\.co\.kr/(?:enter|drama|culture|sports|plus|mtv|kth)/[a-z0-9]+/(?:vod|clip|movie)/\d+/(?P<id>(?:OC)?\d+)']
16
17 _TESTS = [{
18 'url': 'https://programs.sbs.co.kr/enter/dongsang2/clip/52007/OC467706746?div=main_pop_clip',
19 'md5': 'c3f6d45e1fb5682039d94cda23c36f19',
20 'info_dict': {
21 'id': 'OC467706746',
22 'ext': 'mp4',
23 'title': '‘아슬아슬’ 박군♥한영의 새 집 인테리어 대첩♨',
24 'description': 'md5:6a71eb1979ee4a94ea380310068ccab4',
25 'thumbnail': 'https://img2.sbs.co.kr/ops_clip_img/2023/10/10/34c4c0f9-a9a5-4ff6-a92e-9bb4b5f6fa65915w1280.jpg',
26 'release_timestamp': 1696889400,
27 'release_date': '20231009',
28 'view_count': int,
29 'like_count': int,
30 'duration': 238,
31 'age_limit': 15,
32 'series': '동상이몽2_너는 내 운명',
33 'episode': '레이디제인, ‘혼전임신설’ ‘3개월’ 앞당긴 결혼식 비하인드 스토리 최초 공개!',
34 'episode_number': 311,
35 },
36 }, {
37 'url': 'https://allvod.sbs.co.kr/allvod/vodPackageEndPage.do?mdaId=22000489324&combiId=PA000000284&packageType=A&isFreeYN=',
38 'md5': 'bf46b2e89fda7ae7de01f5743cef7236',
39 'info_dict': {
40 'id': '22000489324',
41 'ext': 'mp4',
42 'title': '[다시보기] 트롤리 15회',
43 'description': 'md5:0e55d74bef1ac55c61ae90c73ac485f4',
44 'thumbnail': 'https://img2.sbs.co.kr/img/sbs_cms/WE/2023/02/14/arC1676333794938-1280-720.jpg',
45 'release_timestamp': 1676325600,
46 'release_date': '20230213',
47 'view_count': int,
48 'like_count': int,
49 'duration': 5931,
50 'age_limit': 15,
51 'series': '트롤리',
52 'episode': '이거 다 거짓말이야',
53 'episode_number': 15,
54 },
55 }, {
56 'url': 'https://programs.sbs.co.kr/enter/fourman/vod/69625/22000508948',
57 'md5': '41e8ae4cc6c8424f4e4d76661a4becbf',
58 'info_dict': {
59 'id': '22000508948',
60 'ext': 'mp4',
61 'title': '[다시보기] 신발 벗고 돌싱포맨 104회',
62 'description': 'md5:c6a247383c4dd661e4b956bf4d3b586e',
63 'thumbnail': 'https://img2.sbs.co.kr/img/sbs_cms/WE/2023/08/30/2vb1693355446261-1280-720.jpg',
64 'release_timestamp': 1693342800,
65 'release_date': '20230829',
66 'view_count': int,
67 'like_count': int,
68 'duration': 7036,
69 'age_limit': 15,
70 'series': '신발 벗고 돌싱포맨',
71 'episode': '돌싱포맨 저격수들 등장!',
72 'episode_number': 104,
73 },
74 }]
75
76 def _call_api(self, video_id, rscuse=''):
77 return self._download_json(
78 f'https://api.play.sbs.co.kr/1.0/sbs_vodall/{video_id}', video_id,
79 note=f'Downloading m3u8 information {rscuse}',
80 query={
81 'platform': 'pcweb',
82 'protocol': 'download',
83 'absolute_show': 'Y',
84 'service': 'program',
85 'ssl': 'Y',
86 'rscuse': rscuse,
87 })
88
89 def _real_extract(self, url):
90 video_id = self._match_id(url)
91
92 details = self._call_api(video_id)
93 source = traverse_obj(details, ('vod', 'source', 'mediasource', {dict})) or {}
94
95 formats = []
96 for stream in traverse_obj(details, (
97 'vod', 'source', 'mediasourcelist', lambda _, v: v['mediaurl'] or v['mediarscuse'],
98 ), default=[source]):
99 if not stream.get('mediaurl'):
100 new_source = traverse_obj(
101 self._call_api(video_id, rscuse=stream['mediarscuse']),
102 ('vod', 'source', 'mediasource', {dict})) or {}
103 if new_source.get('mediarscuse') == source.get('mediarscuse') or not new_source.get('mediaurl'):
104 continue
105 stream = new_source
106 formats.append({
107 'url': stream['mediaurl'],
108 'format_id': stream.get('mediarscuse'),
109 'format_note': stream.get('medianame'),
110 **parse_resolution(stream.get('quality')),
111 'preference': int_or_none(stream.get('mediarscuse')),
112 })
113
114 caption_url = traverse_obj(details, ('vod', 'source', 'subtitle', {url_or_none}))
115
116 return {
117 'id': video_id,
118 **traverse_obj(details, ('vod', {
119 'title': ('info', 'title'),
120 'duration': ('info', 'duration', {int_or_none}),
121 'view_count': ('info', 'viewcount', {int_or_none}),
122 'like_count': ('info', 'likecount', {int_or_none}),
123 'description': ('info', 'synopsis', {clean_html}),
124 'episode': ('info', 'content', ('contenttitle', 'title')),
125 'episode_number': ('info', 'content', 'number', {int_or_none}),
126 'series': ('info', 'program', 'programtitle'),
127 'age_limit': ('info', 'targetage', {int_or_none}),
128 'release_timestamp': ('info', 'broaddate', {parse_iso8601}),
129 'thumbnail': ('source', 'thumbnail', 'origin', {url_or_none}),
130 }), get_all=False),
131 'formats': formats,
132 'subtitles': {'ko': [{'url': caption_url}]} if caption_url else None,
133 }
134
135
136 class SBSCoKrAllvodProgramIE(InfoExtractor):
137 IE_NAME = 'sbs.co.kr:allvod_program'
138 _VALID_URL = r'https?://allvod\.sbs\.co\.kr/allvod/vod(?:Free)?ProgramDetail\.do\?(?:[^#]+&)?pgmId=(?P<id>P?\d+)'
139
140 _TESTS = [{
141 'url': 'https://allvod.sbs.co.kr/allvod/vodFreeProgramDetail.do?type=legend&pgmId=22000010159&listOrder=vodCntAsc',
142 'info_dict': {
143 '_type': 'playlist',
144 'id': '22000010159',
145 },
146 'playlist_count': 18,
147 }, {
148 'url': 'https://allvod.sbs.co.kr/allvod/vodProgramDetail.do?pgmId=P460810577',
149 'info_dict': {
150 '_type': 'playlist',
151 'id': 'P460810577',
152 },
153 'playlist_count': 13,
154 }]
155
156 def _real_extract(self, url):
157 program_id = self._match_id(url)
158
159 details = self._download_json(
160 'https://allvod.sbs.co.kr/allvod/vodProgramDetail/vodProgramDetailAjax.do',
161 program_id, note='Downloading program details',
162 query={
163 'pgmId': program_id,
164 'currentCount': '10000',
165 })
166
167 return self.playlist_result(
168 [self.url_result(f'https://allvod.sbs.co.kr/allvod/vodEndPage.do?mdaId={video_id}', SBSCoKrIE)
169 for video_id in traverse_obj(details, ('list', ..., 'mdaId'))], program_id)
170
171
172 class SBSCoKrProgramsVodIE(InfoExtractor):
173 IE_NAME = 'sbs.co.kr:programs_vod'
174 _VALID_URL = r'https?://programs\.sbs\.co\.kr/(?:enter|drama|culture|sports|plus|mtv)/(?P<id>[a-z0-9]+)/vods'
175
176 _TESTS = [{
177 'url': 'https://programs.sbs.co.kr/culture/morningwide/vods/65007',
178 'info_dict': {
179 '_type': 'playlist',
180 'id': '00000210215',
181 },
182 'playlist_mincount': 9782,
183 }, {
184 'url': 'https://programs.sbs.co.kr/enter/dongsang2/vods/52006',
185 'info_dict': {
186 '_type': 'playlist',
187 'id': '22000010476',
188 },
189 'playlist_mincount': 312,
190 }]
191
192 def _real_extract(self, url):
193 program_slug = self._match_id(url)
194
195 program_id = self._download_json(
196 f'https://static.apis.sbs.co.kr/program-api/1.0/menu/{program_slug}', program_slug,
197 note='Downloading program menu data')['program']['programid']
198
199 return self.url_result(
200 f'https://allvod.sbs.co.kr/allvod/vodProgramDetail.do?pgmId={program_id}', SBSCoKrAllvodProgramIE)