]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/odnoklassniki.py
[extractor] Standardize `_live_title`
[yt-dlp.git] / yt_dlp / extractor / odnoklassniki.py
CommitLineData
4ffbf778
S
1# coding: utf-8
2from __future__ import unicode_literals
3
416c3ca7
RA
4import re
5
4ffbf778 6from .common import InfoExtractor
c9fd5306 7from ..compat import (
1c35b3da 8 compat_etree_fromstring,
c9fd5306
S
9 compat_parse_qs,
10 compat_urllib_parse_unquote,
11 compat_urllib_parse_urlparse,
12)
4ffbf778 13from ..utils import (
1806a754 14 ExtractorError,
d984a98d 15 float_or_none,
4ffbf778
S
16 unified_strdate,
17 int_or_none,
18 qualities,
372744c5 19 unescapeHTML,
a3474aa5 20 urlencode_postdata,
4ffbf778
S
21)
22
23
24class OdnoklassnikiIE(InfoExtractor):
d04ca976
S
25 _VALID_URL = r'''(?x)
26 https?://
27 (?:(?:www|m|mobile)\.)?
28 (?:odnoklassniki|ok)\.ru/
29 (?:
30 video(?:embed)?/|
31 web-api/video/moviePlayer/|
32 live/|
33 dk\?.*?st\.mvId=
34 )
35 (?P<id>[\d-]+)
36 '''
4ffbf778 37 _TESTS = [{
c6bbdadd 38 # metadata in JSON
4ffbf778 39 'url': 'http://ok.ru/video/20079905452',
8005dc68 40 'md5': '0b62089b479e06681abaaca9d204f152',
4ffbf778
S
41 'info_dict': {
42 'id': '20079905452',
43 'ext': 'mp4',
44 'title': 'Культура меняет нас (прекрасный ролик!))',
45 'duration': 100,
887e9bc7 46 'upload_date': '20141207',
4ffbf778
S
47 'uploader_id': '330537914540',
48 'uploader': 'Виталий Добровольский',
49 'like_count': int,
9f2e7c2f 50 'age_limit': 0,
c6bbdadd
S
51 },
52 }, {
53 # metadataUrl
c9fd5306 54 'url': 'http://ok.ru/video/63567059965189-0?fromTime=5',
58f6ab72 55 'md5': '6ff470ea2dd51d5d18c295a355b0b6bc',
c6bbdadd
S
56 'info_dict': {
57 'id': '63567059965189-0',
58 'ext': 'mp4',
59 'title': 'Девушка без комплексов ...',
60 'duration': 191,
887e9bc7 61 'upload_date': '20150518',
c6bbdadd 62 'uploader_id': '534380003155',
887e9bc7 63 'uploader': '☭ Андрей Мещанинов ☭',
c6bbdadd 64 'like_count': int,
9f2e7c2f 65 'age_limit': 0,
c9fd5306 66 'start_time': 5,
4ffbf778 67 },
88720ed0
S
68 }, {
69 # YouTube embed (metadataUrl, provider == USER_YOUTUBE)
70 'url': 'http://ok.ru/video/64211978996595-1',
58f6ab72 71 'md5': '2f206894ffb5dbfcce2c5a14b909eea5',
88720ed0 72 'info_dict': {
a3474aa5 73 'id': 'V_VztHT5BzY',
88720ed0
S
74 'ext': 'mp4',
75 'title': 'Космическая среда от 26 августа 2015',
76 'description': 'md5:848eb8b85e5e3471a3a803dae1343ed0',
77 'duration': 440,
78 'upload_date': '20150826',
58f6ab72
RA
79 'uploader_id': 'tvroscosmos',
80 'uploader': 'Телестудия Роскосмоса',
88720ed0
S
81 'age_limit': 0,
82 },
749b0046
S
83 }, {
84 # YouTube embed (metadata, provider == USER_YOUTUBE, no metadata.movie.title field)
85 'url': 'http://ok.ru/video/62036049272859-0',
86 'info_dict': {
87 'id': '62036049272859-0',
88 'ext': 'mp4',
89 'title': 'МУЗЫКА ДОЖДЯ .',
90 'description': 'md5:6f1867132bd96e33bf53eda1091e8ed0',
91 'upload_date': '20120106',
92 'uploader_id': '473534735899',
93 'uploader': 'МARINA D',
94 'age_limit': 0,
95 },
96 'params': {
97 'skip_download': True,
98 },
58f6ab72 99 'skip': 'Video has not been found',
d984a98d
THD
100 }, {
101 'note': 'Only available in mobile webpage',
102 'url': 'https://m.ok.ru/video/2361249957145',
103 'info_dict': {
104 'id': '2361249957145',
105 'title': 'Быковское крещение',
106 'duration': 3038.181,
107 },
4ffbf778
S
108 }, {
109 'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452',
110 'only_matching': True,
cdc8d0c3
YCH
111 }, {
112 'url': 'http://www.ok.ru/video/20648036891',
113 'only_matching': True,
d762f86e
S
114 }, {
115 'url': 'http://www.ok.ru/videoembed/20648036891',
116 'only_matching': True,
10e6ed93
S
117 }, {
118 'url': 'http://m.ok.ru/video/20079905452',
119 'only_matching': True,
120 }, {
121 'url': 'http://mobile.ok.ru/video/20079905452',
122 'only_matching': True,
8005dc68
S
123 }, {
124 'url': 'https://www.ok.ru/live/484531969818',
125 'only_matching': True,
608c738c
G
126 }, {
127 'url': 'https://m.ok.ru/dk?st.cmd=movieLayer&st.discId=863789452017&st.retLoc=friend&st.rtu=%2Fdk%3Fst.cmd%3DfriendMovies%26st.mode%3Down%26st.mrkId%3D%257B%2522uploadedMovieMarker%2522%253A%257B%2522marker%2522%253A%25221519410114503%2522%252C%2522hasMore%2522%253Atrue%257D%252C%2522sharedMovieMarker%2522%253A%257B%2522marker%2522%253Anull%252C%2522hasMore%2522%253Afalse%257D%257D%26st.friendId%3D561722190321%26st.frwd%3Don%26_prevCmd%3DfriendMovies%26tkn%3D7257&st.discType=MOVIE&st.mvId=863789452017&_prevCmd=friendMovies&tkn=3648#lst#',
128 'only_matching': True,
15870747 129 }, {
130 # Paid video
131 'url': 'https://ok.ru/video/954886983203',
132 'only_matching': True,
4ffbf778
S
133 }]
134
416c3ca7
RA
135 @staticmethod
136 def _extract_url(webpage):
137 mobj = re.search(
138 r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1', webpage)
139 if mobj:
140 return mobj.group('url')
141
4ffbf778 142 def _real_extract(self, url):
d984a98d
THD
143 try:
144 return self._extract_desktop(url)
145 except ExtractorError as e:
146 try:
147 return self._extract_mobile(url)
148 except ExtractorError:
149 # error message of desktop webpage is in English
150 raise e
151
152 def _extract_desktop(self, url):
c9fd5306
S
153 start_time = int_or_none(compat_parse_qs(
154 compat_urllib_parse_urlparse(url).query).get('fromTime', [None])[0])
155
4ffbf778
S
156 video_id = self._match_id(url)
157
ba2df04b 158 webpage = self._download_webpage(
d984a98d
THD
159 'http://ok.ru/video/%s' % video_id, video_id,
160 note='Downloading desktop webpage')
4ffbf778 161
1806a754
S
162 error = self._search_regex(
163 r'[^>]+class="vp_video_stub_txt"[^>]*>([^<]+)<',
164 webpage, 'error', default=None)
165 if error:
166 raise ExtractorError(error, expected=True)
167
4ffbf778 168 player = self._parse_json(
372744c5 169 unescapeHTML(self._search_regex(
1e804244
S
170 r'data-options=(?P<quote>["\'])(?P<player>{.+?%s.+?})(?P=quote)' % video_id,
171 webpage, 'player', group='player')),
4ffbf778
S
172 video_id)
173
c6bbdadd
S
174 flashvars = player['flashvars']
175
176 metadata = flashvars.get('metadata')
177 if metadata:
178 metadata = self._parse_json(metadata, video_id)
179 else:
a3474aa5
RA
180 data = {}
181 st_location = flashvars.get('location')
182 if st_location:
183 data['st.location'] = st_location
c6bbdadd 184 metadata = self._download_json(
b78f5ec4 185 compat_urllib_parse_unquote(flashvars['metadataUrl']),
a3474aa5
RA
186 video_id, 'Downloading metadata JSON',
187 data=urlencode_postdata(data))
4ffbf778
S
188
189 movie = metadata['movie']
749b0046
S
190
191 # Some embedded videos may not contain title in movie dict (e.g.
192 # http://ok.ru/video/62036049272859-0) thus we allow missing title
193 # here and it's going to be extracted later by an extractor that
194 # will process the actual embed.
195 provider = metadata.get('provider')
196 title = movie['title'] if provider == 'UPLOADED_ODKL' else movie.get('title')
197
4ffbf778
S
198 thumbnail = movie.get('poster')
199 duration = int_or_none(movie.get('duration'))
200
201 author = metadata.get('author', {})
202 uploader_id = author.get('id')
203 uploader = author.get('name')
204
205 upload_date = unified_strdate(self._html_search_meta(
c6bbdadd 206 'ya:ovs:upload_date', webpage, 'upload date', default=None))
4ffbf778
S
207
208 age_limit = None
209 adult = self._html_search_meta(
c6bbdadd 210 'ya:ovs:adult', webpage, 'age limit', default=None)
4ffbf778
S
211 if adult:
212 age_limit = 18 if adult == 'true' else 0
213
214 like_count = int_or_none(metadata.get('likeCount'))
215
88720ed0 216 info = {
4ffbf778
S
217 'id': video_id,
218 'title': title,
219 'thumbnail': thumbnail,
220 'duration': duration,
221 'upload_date': upload_date,
222 'uploader': uploader,
223 'uploader_id': uploader_id,
224 'like_count': like_count,
225 'age_limit': age_limit,
c9fd5306 226 'start_time': start_time,
4ffbf778 227 }
88720ed0 228
749b0046 229 if provider == 'USER_YOUTUBE':
88720ed0
S
230 info.update({
231 '_type': 'url_transparent',
232 'url': movie['contentId'],
233 })
234 return info
235
8005dc68
S
236 assert title
237 if provider == 'LIVE_TV_APP':
39ca3b5c 238 info['title'] = title
8005dc68 239
1c35b3da 240 quality = qualities(('4', '0', '1', '2', '3', '5'))
88720ed0
S
241
242 formats = [{
243 'url': f['url'],
244 'ext': 'mp4',
245 'format_id': f['name'],
88720ed0 246 } for f in metadata['videos']]
1c35b3da
RA
247
248 m3u8_url = metadata.get('hlsManifestUrl')
249 if m3u8_url:
250 formats.extend(self._extract_m3u8_formats(
251 m3u8_url, video_id, 'mp4', 'm3u8_native',
252 m3u8_id='hls', fatal=False))
253
254 dash_manifest = metadata.get('metadataEmbedded')
255 if dash_manifest:
256 formats.extend(self._parse_mpd_formats(
257 compat_etree_fromstring(dash_manifest), 'mpd'))
258
259 for fmt in formats:
260 fmt_type = self._search_regex(
261 r'\btype[/=](\d)', fmt['url'],
262 'format type', default=None)
263 if fmt_type:
264 fmt['quality'] = quality(fmt_type)
265
8005dc68
S
266 # Live formats
267 m3u8_url = metadata.get('hlsMasterPlaylistUrl')
268 if m3u8_url:
269 formats.extend(self._extract_m3u8_formats(
177877c5 270 m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
8005dc68
S
271 rtmp_url = metadata.get('rtmpUrl')
272 if rtmp_url:
273 formats.append({
274 'url': rtmp_url,
275 'format_id': 'rtmp',
276 'ext': 'flv',
277 })
278
15870747 279 if not formats:
280 payment_info = metadata.get('paymentInfo')
281 if payment_info:
b7da73eb 282 self.raise_no_formats('This video is paid, subscribe to download it', expected=True)
15870747 283
e8dcfa3d 284 self._sort_formats(formats)
88720ed0
S
285
286 info['formats'] = formats
287 return info
d984a98d
THD
288
289 def _extract_mobile(self, url):
290 video_id = self._match_id(url)
291
292 webpage = self._download_webpage(
293 'http://m.ok.ru/video/%s' % video_id, video_id,
294 note='Downloading mobile webpage')
295
296 error = self._search_regex(
297 r'видео</a>\s*<div\s+class="empty">(.+?)</div>',
298 webpage, 'error', default=None)
299 if error:
300 raise ExtractorError(error, expected=True)
301
302 json_data = self._search_regex(
303 r'data-video="(.+?)"', webpage, 'json data')
304 json_data = self._parse_json(unescapeHTML(json_data), video_id) or {}
305
306 return {
307 'id': video_id,
308 'title': json_data.get('videoName'),
309 'duration': float_or_none(json_data.get('videoDuration'), scale=1000),
310 'thumbnail': json_data.get('videoPosterSrc'),
311 'formats': [{
312 'format_id': 'mobile',
313 'url': json_data.get('videoSrc'),
314 'ext': 'mp4',
315 }]
316 }