]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/ondemandkorea.py
[ie/TrtWorld] Add extractor (#8701)
[yt-dlp.git] / yt_dlp / extractor / ondemandkorea.py
CommitLineData
05adfd88 1import functools
c031b041 2import re
05adfd88 3import uuid
c031b041 4
a4a554a7 5from .common import InfoExtractor
47c914f9
S
6from ..utils import (
7 ExtractorError,
05adfd88 8 OnDemandPagedList,
9 float_or_none,
10 int_or_none,
11 join_nonempty,
12 parse_age_limit,
13 parse_qs,
14 unified_strdate,
15 url_or_none,
47c914f9 16)
05adfd88 17from ..utils.traversal import traverse_obj
594601f5 18
594601f5 19
a4a554a7 20class OnDemandKoreaIE(InfoExtractor):
05adfd88 21 _VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?:en/)?player/vod/[a-z0-9-]+\?(?:[^#]+&)?contentId=(?P<id>\d+)'
4248dad9 22 _GEO_COUNTRIES = ['US', 'CA']
05adfd88 23
efef1714 24 _TESTS = [{
05adfd88 25 'url': 'https://www.ondemandkorea.com/player/vod/ask-us-anything?contentId=686471',
26 'md5': 'e2ff77255d989e3135bde0c5889fbce8',
594601f5 27 'info_dict': {
05adfd88 28 'id': '686471',
594601f5 29 'ext': 'mp4',
05adfd88 30 'title': 'Ask Us Anything: Jung Sung-ho, Park Seul-gi, Kim Bo-min, Yang Seung-won',
31 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)',
32 'duration': 5486.955,
33 'release_date': '20220924',
34 'series': 'Ask Us Anything',
35 'series_id': 11790,
36 'episode_number': 351,
37 'episode': 'Jung Sung-ho, Park Seul-gi, Kim Bo-min, Yang Seung-won',
594601f5 38 },
efef1714 39 }, {
05adfd88 40 'url': 'https://www.ondemandkorea.com/player/vod/breakup-probation-a-week?contentId=1595796',
41 'md5': '57266c720006962be7ff415b24775caa',
efef1714 42 'info_dict': {
05adfd88 43 'id': '1595796',
efef1714 44 'ext': 'mp4',
05adfd88 45 'title': 'Breakup Probation, A Week: E08',
46 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)',
47 'duration': 1586.0,
48 'release_date': '20231001',
49 'series': 'Breakup Probation, A Week',
50 'series_id': 22912,
51 'episode_number': 8,
52 'episode': 'E08',
efef1714 53 },
05adfd88 54 }, {
55 'url': 'https://www.ondemandkorea.com/player/vod/the-outlaws?contentId=369531',
56 'md5': 'fa5523b87aa1f6d74fc622a97f2b47cd',
57 'info_dict': {
58 'id': '369531',
59 'ext': 'mp4',
60 'release_date': '20220519',
61 'duration': 7267.0,
62 'title': 'The Outlaws: Main Movie',
63 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)',
64 'age_limit': 18,
65 },
66 }, {
67 'url': 'https://www.ondemandkorea.com/en/player/vod/capture-the-moment-how-is-that-possible?contentId=1605006',
68 'only_matching': True,
efef1714 69 }]
594601f5 70
71 def _real_extract(self, url):
72 video_id = self._match_id(url)
05adfd88 73
74 data = self._download_json(
75 f'https://odkmedia.io/odx/api/v3/playback/{video_id}/', video_id, fatal=False,
76 headers={'service-name': 'odk'}, query={'did': str(uuid.uuid4())}, expected_status=(403, 404))
77 if not traverse_obj(data, ('result', {dict})):
78 msg = traverse_obj(data, ('messages', '__default'), 'title', expected_type=str)
79 raise ExtractorError(msg or 'Got empty response from playback API', expected=True)
80
81 data = data['result']
82
83 def try_geo_bypass(url):
84 return traverse_obj(url, ({parse_qs}, 'stream_url', 0, {url_or_none})) or url
85
05adfd88 86 formats = []
87 for m3u8_url in traverse_obj(data, (('sources', 'manifest'), ..., 'url', {url_or_none}, {try_geo_bypass})):
04a5e063 88 mod_url = re.sub(r'_720(p?)\.m3u8', r'_1080\1.m3u8', m3u8_url)
89 if mod_url != m3u8_url:
90 mod_format = self._extract_m3u8_formats(
91 mod_url, video_id, note='Checking for higher quality format',
92 errnote='No higher quality format found', fatal=False)
93 if mod_format:
94 formats.extend(mod_format)
95 continue
96 formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, fatal=False))
05adfd88 97
98 subtitles = {}
99 for track in traverse_obj(data, ('text_tracks', lambda _, v: url_or_none(v['url']))):
100 subtitles.setdefault(track.get('language', 'und'), []).append({
101 'url': track['url'],
102 'ext': track.get('codec'),
103 'name': track.get('label'),
104 })
105
106 def if_series(key=None):
107 return lambda obj: obj[key] if key and obj['kind'] == 'series' else None
108
109 return {
110 'id': video_id,
111 'title': join_nonempty(
112 ('episode', 'program', 'title'),
113 ('episode', 'title'), from_dict=data, delim=': '),
114 **traverse_obj(data, {
115 'thumbnail': ('episode', 'images', 'thumbnail', {url_or_none}),
116 'release_date': ('episode', 'release_date', {lambda x: x.replace('-', '')}, {unified_strdate}),
117 'duration': ('duration', {functools.partial(float_or_none, scale=1000)}),
118 'age_limit': ('age_rating', 'name', {lambda x: x.replace('R', '')}, {parse_age_limit}),
119 'series': ('episode', {if_series(key='program')}, 'title'),
120 'series_id': ('episode', {if_series(key='program')}, 'id'),
121 'episode': ('episode', {if_series(key='title')}),
122 'episode_number': ('episode', {if_series(key='number')}, {int_or_none}),
123 }, get_all=False),
124 'formats': formats,
125 'subtitles': subtitles,
126 }
127
128
129class OnDemandKoreaProgramIE(InfoExtractor):
130 _VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?:en/)?player/vod/(?P<id>[a-z0-9-]+)(?:$|#)'
131 _GEO_COUNTRIES = ['US', 'CA']
132
133 _TESTS = [{
134 'url': 'https://www.ondemandkorea.com/player/vod/uskn-news',
135 'info_dict': {
136 'id': 'uskn-news',
137 },
138 'playlist_mincount': 755,
139 }, {
140 'url': 'https://www.ondemandkorea.com/en/player/vod/the-land',
141 'info_dict': {
142 'id': 'the-land',
143 },
144 'playlist_count': 52,
145 }]
146
147 _PAGE_SIZE = 100
148
149 def _fetch_page(self, display_id, page):
150 page += 1
151 page_data = self._download_json(
152 f'https://odkmedia.io/odx/api/v3/program/{display_id}/episodes/', display_id,
153 headers={'service-name': 'odk'}, query={
154 'page': page,
155 'page_size': self._PAGE_SIZE,
156 }, note=f'Downloading page {page}', expected_status=404)
157 for episode in traverse_obj(page_data, ('result', 'results', ...)):
158 yield self.url_result(
159 f'https://www.ondemandkorea.com/player/vod/{display_id}?contentId={episode["id"]}',
160 ie=OnDemandKoreaIE, video_title=episode.get('title'))
161
162 def _real_extract(self, url):
163 display_id = self._match_id(url)
164
165 entries = OnDemandPagedList(functools.partial(
166 self._fetch_page, display_id), self._PAGE_SIZE)
167
168 return self.playlist_result(entries, display_id)