]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/odnoklassniki.py
[extractors] Use new framework for existing embeds (#4307)
[yt-dlp.git] / yt_dlp / extractor / odnoklassniki.py
CommitLineData
4ffbf778 1from .common import InfoExtractor
c9fd5306 2from ..compat import (
1c35b3da 3 compat_etree_fromstring,
c9fd5306
S
4 compat_parse_qs,
5 compat_urllib_parse_unquote,
6 compat_urllib_parse_urlparse,
7)
4ffbf778 8from ..utils import (
1806a754 9 ExtractorError,
d984a98d 10 float_or_none,
4ffbf778
S
11 unified_strdate,
12 int_or_none,
13 qualities,
372744c5 14 unescapeHTML,
a3474aa5 15 urlencode_postdata,
4ffbf778
S
16)
17
18
19class OdnoklassnikiIE(InfoExtractor):
d04ca976
S
20 _VALID_URL = r'''(?x)
21 https?://
22 (?:(?:www|m|mobile)\.)?
23 (?:odnoklassniki|ok)\.ru/
24 (?:
25 video(?:embed)?/|
26 web-api/video/moviePlayer/|
27 live/|
28 dk\?.*?st\.mvId=
29 )
30 (?P<id>[\d-]+)
31 '''
bfd973ec 32 _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1']
4ffbf778 33 _TESTS = [{
b8b3f456
K
34 'note': 'Coub embedded',
35 'url': 'http://ok.ru/video/1484130554189',
36 'info_dict': {
37 'id': '1keok9',
38 'ext': 'mp4',
39 'timestamp': 1545580896,
40 'view_count': int,
41 'thumbnail': 'https://coub-anubis-a.akamaized.net/coub_storage/coub/simple/cw_image/c5ac87553bd/608e806a1239c210ab692/1545580913_00026.jpg',
42 'title': 'Народная забава',
43 'uploader': 'Nevata',
44 'upload_date': '20181223',
45 'age_limit': 0,
46 'uploader_id': 'nevata.s',
47 'like_count': int,
48 'duration': 8.08,
49 'repost_count': int,
50 },
51 }, {
52 'note': 'vk.com embedded',
53 'url': 'https://ok.ru/video/3568183087575',
54 'info_dict': {
55 'id': '-165101755_456243749',
56 'ext': 'mp4',
57 'uploader_id': '-165101755',
58 'duration': 132,
59 'timestamp': 1642869935,
60 'upload_date': '20220122',
61 'thumbnail': str,
62 'title': str,
63 'uploader': str,
64 },
65 }, {
c6bbdadd 66 # metadata in JSON
4ffbf778 67 'url': 'http://ok.ru/video/20079905452',
8005dc68 68 'md5': '0b62089b479e06681abaaca9d204f152',
4ffbf778
S
69 'info_dict': {
70 'id': '20079905452',
71 'ext': 'mp4',
72 'title': 'Культура меняет нас (прекрасный ролик!))',
73 'duration': 100,
887e9bc7 74 'upload_date': '20141207',
4ffbf778
S
75 'uploader_id': '330537914540',
76 'uploader': 'Виталий Добровольский',
77 'like_count': int,
9f2e7c2f 78 'age_limit': 0,
c6bbdadd
S
79 },
80 }, {
81 # metadataUrl
c9fd5306 82 'url': 'http://ok.ru/video/63567059965189-0?fromTime=5',
58f6ab72 83 'md5': '6ff470ea2dd51d5d18c295a355b0b6bc',
c6bbdadd
S
84 'info_dict': {
85 'id': '63567059965189-0',
86 'ext': 'mp4',
87 'title': 'Девушка без комплексов ...',
88 'duration': 191,
887e9bc7 89 'upload_date': '20150518',
c6bbdadd 90 'uploader_id': '534380003155',
887e9bc7 91 'uploader': '☭ Андрей Мещанинов ☭',
c6bbdadd 92 'like_count': int,
9f2e7c2f 93 'age_limit': 0,
c9fd5306 94 'start_time': 5,
4ffbf778 95 },
88720ed0
S
96 }, {
97 # YouTube embed (metadataUrl, provider == USER_YOUTUBE)
98 'url': 'http://ok.ru/video/64211978996595-1',
58f6ab72 99 'md5': '2f206894ffb5dbfcce2c5a14b909eea5',
88720ed0 100 'info_dict': {
a3474aa5 101 'id': 'V_VztHT5BzY',
88720ed0
S
102 'ext': 'mp4',
103 'title': 'Космическая среда от 26 августа 2015',
104 'description': 'md5:848eb8b85e5e3471a3a803dae1343ed0',
105 'duration': 440,
106 'upload_date': '20150826',
58f6ab72
RA
107 'uploader_id': 'tvroscosmos',
108 'uploader': 'Телестудия Роскосмоса',
88720ed0
S
109 'age_limit': 0,
110 },
749b0046
S
111 }, {
112 # YouTube embed (metadata, provider == USER_YOUTUBE, no metadata.movie.title field)
113 'url': 'http://ok.ru/video/62036049272859-0',
114 'info_dict': {
115 'id': '62036049272859-0',
116 'ext': 'mp4',
117 'title': 'МУЗЫКА ДОЖДЯ .',
118 'description': 'md5:6f1867132bd96e33bf53eda1091e8ed0',
119 'upload_date': '20120106',
120 'uploader_id': '473534735899',
121 'uploader': 'МARINA D',
122 'age_limit': 0,
123 },
124 'params': {
125 'skip_download': True,
126 },
58f6ab72 127 'skip': 'Video has not been found',
d984a98d
THD
128 }, {
129 'note': 'Only available in mobile webpage',
130 'url': 'https://m.ok.ru/video/2361249957145',
131 'info_dict': {
132 'id': '2361249957145',
133 'title': 'Быковское крещение',
134 'duration': 3038.181,
135 },
4ffbf778
S
136 }, {
137 'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452',
138 'only_matching': True,
cdc8d0c3
YCH
139 }, {
140 'url': 'http://www.ok.ru/video/20648036891',
141 'only_matching': True,
d762f86e
S
142 }, {
143 'url': 'http://www.ok.ru/videoembed/20648036891',
144 'only_matching': True,
10e6ed93
S
145 }, {
146 'url': 'http://m.ok.ru/video/20079905452',
147 'only_matching': True,
148 }, {
149 'url': 'http://mobile.ok.ru/video/20079905452',
150 'only_matching': True,
8005dc68
S
151 }, {
152 'url': 'https://www.ok.ru/live/484531969818',
153 'only_matching': True,
608c738c
G
154 }, {
155 'url': 'https://m.ok.ru/dk?st.cmd=movieLayer&st.discId=863789452017&st.retLoc=friend&st.rtu=%2Fdk%3Fst.cmd%3DfriendMovies%26st.mode%3Down%26st.mrkId%3D%257B%2522uploadedMovieMarker%2522%253A%257B%2522marker%2522%253A%25221519410114503%2522%252C%2522hasMore%2522%253Atrue%257D%252C%2522sharedMovieMarker%2522%253A%257B%2522marker%2522%253Anull%252C%2522hasMore%2522%253Afalse%257D%257D%26st.friendId%3D561722190321%26st.frwd%3Don%26_prevCmd%3DfriendMovies%26tkn%3D7257&st.discType=MOVIE&st.mvId=863789452017&_prevCmd=friendMovies&tkn=3648#lst#',
156 'only_matching': True,
15870747 157 }, {
158 # Paid video
159 'url': 'https://ok.ru/video/954886983203',
160 'only_matching': True,
4ffbf778
S
161 }]
162
163 def _real_extract(self, url):
d984a98d
THD
164 try:
165 return self._extract_desktop(url)
166 except ExtractorError as e:
167 try:
168 return self._extract_mobile(url)
169 except ExtractorError:
170 # error message of desktop webpage is in English
171 raise e
172
173 def _extract_desktop(self, url):
c9fd5306
S
174 start_time = int_or_none(compat_parse_qs(
175 compat_urllib_parse_urlparse(url).query).get('fromTime', [None])[0])
176
4ffbf778
S
177 video_id = self._match_id(url)
178
ba2df04b 179 webpage = self._download_webpage(
d984a98d
THD
180 'http://ok.ru/video/%s' % video_id, video_id,
181 note='Downloading desktop webpage')
4ffbf778 182
1806a754
S
183 error = self._search_regex(
184 r'[^>]+class="vp_video_stub_txt"[^>]*>([^<]+)<',
185 webpage, 'error', default=None)
186 if error:
187 raise ExtractorError(error, expected=True)
188
4ffbf778 189 player = self._parse_json(
372744c5 190 unescapeHTML(self._search_regex(
1e804244
S
191 r'data-options=(?P<quote>["\'])(?P<player>{.+?%s.+?})(?P=quote)' % video_id,
192 webpage, 'player', group='player')),
4ffbf778
S
193 video_id)
194
b8b3f456
K
195 # embedded external player
196 if player.get('isExternalPlayer') and player.get('url'):
197 return self.url_result(player['url'])
198
c6bbdadd
S
199 flashvars = player['flashvars']
200
201 metadata = flashvars.get('metadata')
202 if metadata:
203 metadata = self._parse_json(metadata, video_id)
204 else:
a3474aa5
RA
205 data = {}
206 st_location = flashvars.get('location')
207 if st_location:
208 data['st.location'] = st_location
c6bbdadd 209 metadata = self._download_json(
b78f5ec4 210 compat_urllib_parse_unquote(flashvars['metadataUrl']),
a3474aa5
RA
211 video_id, 'Downloading metadata JSON',
212 data=urlencode_postdata(data))
4ffbf778
S
213
214 movie = metadata['movie']
749b0046
S
215
216 # Some embedded videos may not contain title in movie dict (e.g.
217 # http://ok.ru/video/62036049272859-0) thus we allow missing title
218 # here and it's going to be extracted later by an extractor that
219 # will process the actual embed.
220 provider = metadata.get('provider')
221 title = movie['title'] if provider == 'UPLOADED_ODKL' else movie.get('title')
222
4ffbf778
S
223 thumbnail = movie.get('poster')
224 duration = int_or_none(movie.get('duration'))
225
226 author = metadata.get('author', {})
227 uploader_id = author.get('id')
228 uploader = author.get('name')
229
230 upload_date = unified_strdate(self._html_search_meta(
c6bbdadd 231 'ya:ovs:upload_date', webpage, 'upload date', default=None))
4ffbf778
S
232
233 age_limit = None
234 adult = self._html_search_meta(
c6bbdadd 235 'ya:ovs:adult', webpage, 'age limit', default=None)
4ffbf778
S
236 if adult:
237 age_limit = 18 if adult == 'true' else 0
238
239 like_count = int_or_none(metadata.get('likeCount'))
240
88720ed0 241 info = {
4ffbf778
S
242 'id': video_id,
243 'title': title,
244 'thumbnail': thumbnail,
245 'duration': duration,
246 'upload_date': upload_date,
247 'uploader': uploader,
248 'uploader_id': uploader_id,
249 'like_count': like_count,
250 'age_limit': age_limit,
c9fd5306 251 'start_time': start_time,
4ffbf778 252 }
88720ed0 253
b8b3f456
K
254 # pladform
255 if provider == 'OPEN_GRAPH':
256 info.update({
257 '_type': 'url_transparent',
258 'url': movie['contentId'],
259 })
260 return info
261
749b0046 262 if provider == 'USER_YOUTUBE':
88720ed0
S
263 info.update({
264 '_type': 'url_transparent',
265 'url': movie['contentId'],
266 })
267 return info
268
8005dc68
S
269 assert title
270 if provider == 'LIVE_TV_APP':
39ca3b5c 271 info['title'] = title
8005dc68 272
1c35b3da 273 quality = qualities(('4', '0', '1', '2', '3', '5'))
88720ed0
S
274
275 formats = [{
276 'url': f['url'],
277 'ext': 'mp4',
278 'format_id': f['name'],
88720ed0 279 } for f in metadata['videos']]
1c35b3da
RA
280
281 m3u8_url = metadata.get('hlsManifestUrl')
282 if m3u8_url:
283 formats.extend(self._extract_m3u8_formats(
284 m3u8_url, video_id, 'mp4', 'm3u8_native',
285 m3u8_id='hls', fatal=False))
286
287 dash_manifest = metadata.get('metadataEmbedded')
288 if dash_manifest:
289 formats.extend(self._parse_mpd_formats(
290 compat_etree_fromstring(dash_manifest), 'mpd'))
291
292 for fmt in formats:
293 fmt_type = self._search_regex(
294 r'\btype[/=](\d)', fmt['url'],
295 'format type', default=None)
296 if fmt_type:
297 fmt['quality'] = quality(fmt_type)
298
8005dc68
S
299 # Live formats
300 m3u8_url = metadata.get('hlsMasterPlaylistUrl')
301 if m3u8_url:
302 formats.extend(self._extract_m3u8_formats(
177877c5 303 m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
8005dc68
S
304 rtmp_url = metadata.get('rtmpUrl')
305 if rtmp_url:
306 formats.append({
307 'url': rtmp_url,
308 'format_id': 'rtmp',
309 'ext': 'flv',
310 })
311
15870747 312 if not formats:
313 payment_info = metadata.get('paymentInfo')
314 if payment_info:
b7da73eb 315 self.raise_no_formats('This video is paid, subscribe to download it', expected=True)
15870747 316
e8dcfa3d 317 self._sort_formats(formats)
88720ed0
S
318
319 info['formats'] = formats
320 return info
d984a98d
THD
321
322 def _extract_mobile(self, url):
323 video_id = self._match_id(url)
324
325 webpage = self._download_webpage(
326 'http://m.ok.ru/video/%s' % video_id, video_id,
327 note='Downloading mobile webpage')
328
329 error = self._search_regex(
330 r'видео</a>\s*<div\s+class="empty">(.+?)</div>',
331 webpage, 'error', default=None)
332 if error:
333 raise ExtractorError(error, expected=True)
334
335 json_data = self._search_regex(
336 r'data-video="(.+?)"', webpage, 'json data')
337 json_data = self._parse_json(unescapeHTML(json_data), video_id) or {}
338
339 return {
340 'id': video_id,
341 'title': json_data.get('videoName'),
342 'duration': float_or_none(json_data.get('videoDuration'), scale=1000),
343 'thumbnail': json_data.get('videoPosterSrc'),
344 'formats': [{
345 'format_id': 'mobile',
346 'url': json_data.get('videoSrc'),
347 'ext': 'mp4',
348 }]
349 }