]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/odnoklassniki.py
[divxstage] Remove extractor
[yt-dlp.git] / youtube_dl / extractor / odnoklassniki.py
CommitLineData
4ffbf778
S
1# coding: utf-8
2from __future__ import unicode_literals
3
4from .common import InfoExtractor
b78f5ec4 5from ..compat import compat_urllib_parse_unquote
4ffbf778
S
6from ..utils import (
7 unified_strdate,
8 int_or_none,
9 qualities,
372744c5 10 unescapeHTML,
4ffbf778
S
11)
12
13
14class OdnoklassnikiIE(InfoExtractor):
cdc8d0c3 15 _VALID_URL = r'https?://(?:www\.)?(?:odnoklassniki|ok)\.ru/(?:video|web-api/video/moviePlayer)/(?P<id>[\d-]+)'
4ffbf778 16 _TESTS = [{
c6bbdadd 17 # metadata in JSON
4ffbf778 18 'url': 'http://ok.ru/video/20079905452',
887e9bc7 19 'md5': '6ba728d85d60aa2e6dd37c9e70fdc6bc',
4ffbf778
S
20 'info_dict': {
21 'id': '20079905452',
22 'ext': 'mp4',
23 'title': 'Культура меняет нас (прекрасный ролик!))',
24 'duration': 100,
887e9bc7 25 'upload_date': '20141207',
4ffbf778
S
26 'uploader_id': '330537914540',
27 'uploader': 'Виталий Добровольский',
28 'like_count': int,
9f2e7c2f 29 'age_limit': 0,
c6bbdadd
S
30 },
31 }, {
32 # metadataUrl
33 'url': 'http://ok.ru/video/63567059965189-0',
34 'md5': '9676cf86eff5391d35dea675d224e131',
35 'info_dict': {
36 'id': '63567059965189-0',
37 'ext': 'mp4',
38 'title': 'Девушка без комплексов ...',
39 'duration': 191,
887e9bc7 40 'upload_date': '20150518',
c6bbdadd 41 'uploader_id': '534380003155',
887e9bc7 42 'uploader': '☭ Андрей Мещанинов ☭',
c6bbdadd 43 'like_count': int,
9f2e7c2f 44 'age_limit': 0,
4ffbf778 45 },
88720ed0
S
46 }, {
47 # YouTube embed (metadataUrl, provider == USER_YOUTUBE)
48 'url': 'http://ok.ru/video/64211978996595-1',
49 'md5': '5d7475d428845cd2e13bae6f1a992278',
50 'info_dict': {
51 'id': '64211978996595-1',
52 'ext': 'mp4',
53 'title': 'Космическая среда от 26 августа 2015',
54 'description': 'md5:848eb8b85e5e3471a3a803dae1343ed0',
55 'duration': 440,
56 'upload_date': '20150826',
57 'uploader_id': '750099571',
58 'uploader': 'Алина П',
59 'age_limit': 0,
60 },
4ffbf778
S
61 }, {
62 'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452',
63 'only_matching': True,
cdc8d0c3
YCH
64 }, {
65 'url': 'http://www.ok.ru/video/20648036891',
66 'only_matching': True,
4ffbf778
S
67 }]
68
69 def _real_extract(self, url):
70 video_id = self._match_id(url)
71
ba2df04b
S
72 webpage = self._download_webpage(
73 'http://ok.ru/video/%s' % video_id, video_id)
4ffbf778
S
74
75 player = self._parse_json(
372744c5 76 unescapeHTML(self._search_regex(
1e804244
S
77 r'data-options=(?P<quote>["\'])(?P<player>{.+?%s.+?})(?P=quote)' % video_id,
78 webpage, 'player', group='player')),
4ffbf778
S
79 video_id)
80
c6bbdadd
S
81 flashvars = player['flashvars']
82
83 metadata = flashvars.get('metadata')
84 if metadata:
85 metadata = self._parse_json(metadata, video_id)
86 else:
87 metadata = self._download_json(
b78f5ec4 88 compat_urllib_parse_unquote(flashvars['metadataUrl']),
c6bbdadd 89 video_id, 'Downloading metadata JSON')
4ffbf778
S
90
91 movie = metadata['movie']
92 title = movie['title']
93 thumbnail = movie.get('poster')
94 duration = int_or_none(movie.get('duration'))
95
96 author = metadata.get('author', {})
97 uploader_id = author.get('id')
98 uploader = author.get('name')
99
100 upload_date = unified_strdate(self._html_search_meta(
c6bbdadd 101 'ya:ovs:upload_date', webpage, 'upload date', default=None))
4ffbf778
S
102
103 age_limit = None
104 adult = self._html_search_meta(
c6bbdadd 105 'ya:ovs:adult', webpage, 'age limit', default=None)
4ffbf778
S
106 if adult:
107 age_limit = 18 if adult == 'true' else 0
108
109 like_count = int_or_none(metadata.get('likeCount'))
110
88720ed0 111 info = {
4ffbf778
S
112 'id': video_id,
113 'title': title,
114 'thumbnail': thumbnail,
115 'duration': duration,
116 'upload_date': upload_date,
117 'uploader': uploader,
118 'uploader_id': uploader_id,
119 'like_count': like_count,
120 'age_limit': age_limit,
4ffbf778 121 }
88720ed0
S
122
123 if metadata.get('provider') == 'USER_YOUTUBE':
124 info.update({
125 '_type': 'url_transparent',
126 'url': movie['contentId'],
127 })
128 return info
129
130 quality = qualities(('mobile', 'lowest', 'low', 'sd', 'hd'))
131
132 formats = [{
133 'url': f['url'],
134 'ext': 'mp4',
135 'format_id': f['name'],
136 'quality': quality(f['name']),
137 } for f in metadata['videos']]
e8dcfa3d 138 self._sort_formats(formats)
88720ed0
S
139
140 info['formats'] = formats
141 return info