]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/vevo.py
[vevo] Use _match_id
[yt-dlp.git] / youtube_dl / extractor / vevo.py
CommitLineData
45d7bc2f
JMF
1from __future__ import unicode_literals
2
70d1924f 3import re
70d1924f
JMF
4
5from .common import InfoExtractor
5c2266df 6from ..compat import compat_etree_fromstring
1cc79574 7from ..utils import (
70d1924f 8 ExtractorError,
7d3d06a1 9 int_or_none,
5c2266df 10 sanitized_Request,
70d1924f
JMF
11)
12
88bd97e3 13
70d1924f 14class VevoIE(InfoExtractor):
1c251cd9 15 """
0577177e 16 Accepts urls from vevo.com or in the format 'vevo:{id}'
3266f0c6 17 (currently used by MTVIE and MySpaceIE)
1c251cd9 18 """
f25571ff 19 _VALID_URL = r'''(?x)
91683085 20 (?:https?://www\.vevo\.com/watch/(?:[^/]+/(?:[^/]+/)?)?|
f25571ff 21 https?://cache\.vevo\.com/m/html/embed\.html\?video=|
ebce53b3 22 https?://videoplayer\.vevo\.com/embed/embedded\?videoId=|
f25571ff
PH
23 vevo:)
24 (?P<id>[^&?#]+)'''
fd5e6f7e 25
72321ead 26 _TESTS = [{
45d7bc2f 27 'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
b686fc18 28 "md5": "95ee28ee45e70130e3ab02b0f579ae23",
45d7bc2f 29 'info_dict': {
27579b9e
PH
30 'id': 'GB1101300280',
31 'ext': 'mp4',
45d7bc2f
JMF
32 "upload_date": "20130624",
33 "uploader": "Hurts",
34 "title": "Somebody to Die For",
35 "duration": 230.12,
36 "width": 1920,
37 "height": 1080,
fd5e6f7e
PH
38 # timestamp and upload_date are often incorrect; seem to change randomly
39 'timestamp': int,
6f5ac90c 40 }
27579b9e
PH
41 }, {
42 'note': 'v3 SMIL format',
43 'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923',
b686fc18 44 'md5': 'f6ab09b034f8c22969020b042e5ac7fc',
27579b9e
PH
45 'info_dict': {
46 'id': 'USUV71302923',
47 'ext': 'mp4',
48 'upload_date': '20140219',
49 'uploader': 'Cassadee Pope',
50 'title': 'I Wish I Could Break Your Heart',
51 'duration': 226.101,
6cadf8c8 52 'age_limit': 0,
fd5e6f7e 53 'timestamp': int,
6cadf8c8
PH
54 }
55 }, {
56 'note': 'Age-limited video',
57 'url': 'https://www.vevo.com/watch/justin-timberlake/tunnel-vision-explicit/USRV81300282',
58 'info_dict': {
59 'id': 'USRV81300282',
60 'ext': 'mp4',
61 'age_limit': 18,
62 'title': 'Tunnel Vision (Explicit)',
63 'uploader': 'Justin Timberlake',
aba77bbf
PH
64 'upload_date': 're:2013070[34]',
65 'timestamp': int,
6cadf8c8
PH
66 },
67 'params': {
68 'skip_download': 'true',
27579b9e 69 }
72321ead
PH
70 }]
71 _SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/'
70d1924f 72
f0b5d6af 73 def _real_initialize(self):
5c2266df 74 req = sanitized_Request(
f0b5d6af
PH
75 'http://www.vevo.com/auth', data=b'')
76 webpage = self._download_webpage(
77 req, None,
78 note='Retrieving oauth token',
79 errnote='Unable to retrieve oauth token',
80 fatal=False)
81 if webpage is False:
82 self._oauth_token = None
83 else:
4bf99ade
YCH
84 if 'THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION' in webpage:
85 raise ExtractorError('%s said: This page is currently unavailable in your region.' % self.IE_NAME, expected=True)
86
f0b5d6af
PH
87 self._oauth_token = self._search_regex(
88 r'access_token":\s*"([^"]+)"',
89 webpage, 'access token', fatal=False)
90
72321ead 91 def _formats_from_json(self, video_info):
88bd97e3
JMF
92 last_version = {'version': -1}
93 for version in video_info['videoVersions']:
94 # These are the HTTP downloads, other types are for different manifests
95 if version['sourceType'] == 2:
96 if version['version'] > last_version['version']:
97 last_version = version
98 if last_version['version'] == -1:
45d7bc2f 99 raise ExtractorError('Unable to extract last version of the video')
88bd97e3 100
36e6f62c 101 renditions = compat_etree_fromstring(last_version['data'])
88bd97e3
JMF
102 formats = []
103 # Already sorted from worst to best quality
104 for rend in renditions.findall('rendition'):
105 attr = rend.attrib
72321ead 106 format_note = '%(videoCodec)s@%(videoBitrate)4sk, %(audioCodec)s@%(audioBitrate)3sk' % attr
88bd97e3 107 formats.append({
e54fd4b2
PH
108 'url': attr['url'],
109 'format_id': attr['name'],
110 'format_note': format_note,
88bd97e3
JMF
111 'height': int(attr['frameheight']),
112 'width': int(attr['frameWidth']),
113 })
72321ead
PH
114 return formats
115
116 def _formats_from_smil(self, smil_xml):
117 formats = []
36e6f62c 118 smil_doc = compat_etree_fromstring(smil_xml.encode('utf-8'))
72321ead
PH
119 els = smil_doc.findall('.//{http://www.w3.org/2001/SMIL20/Language}video')
120 for el in els:
121 src = el.attrib['src']
122 m = re.match(r'''(?xi)
123 (?P<ext>[a-z0-9]+):
124 (?P<path>
125 [/a-z0-9]+ # The directory and main part of the URL
126 _(?P<cbr>[0-9]+)k
127 _(?P<width>[0-9]+)x(?P<height>[0-9]+)
128 _(?P<vcodec>[a-z0-9]+)
129 _(?P<vbr>[0-9]+)
130 _(?P<acodec>[a-z0-9]+)
131 _(?P<abr>[0-9]+)
132 \.[a-z0-9]+ # File extension
133 )''', src)
134 if not m:
135 continue
136
137 format_url = self._SMIL_BASE_URL + m.group('path')
72321ead
PH
138 formats.append({
139 'url': format_url,
45d7bc2f 140 'format_id': 'SMIL_' + m.group('cbr'),
91c7271a
PH
141 'vcodec': m.group('vcodec'),
142 'acodec': m.group('acodec'),
143 'vbr': int(m.group('vbr')),
144 'abr': int(m.group('abr')),
72321ead
PH
145 'ext': m.group('ext'),
146 'width': int(m.group('width')),
147 'height': int(m.group('height')),
148 })
149 return formats
150
f0b5d6af
PH
151 def _download_api_formats(self, video_id):
152 if not self._oauth_token:
153 self._downloader.report_warning(
154 'No oauth token available, skipping API HLS download')
155 return []
156
157 api_url = 'https://apiv2.vevo.com/video/%s/streams/hls?token=%s' % (
158 video_id, self._oauth_token)
159 api_data = self._download_json(
160 api_url, video_id,
161 note='Downloading HLS formats',
162 errnote='Failed to download HLS format list', fatal=False)
163 if api_data is None:
164 return []
165
166 m3u8_url = api_data[0]['url']
167 return self._extract_m3u8_formats(
168 m3u8_url, video_id, entry_protocol='m3u8_native', ext='mp4',
169 preference=0)
170
72321ead 171 def _real_extract(self, url):
4b942883 172 video_id = self._match_id(url)
72321ead
PH
173
174 json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
e97e53ee
S
175 response = self._download_json(json_url, video_id)
176 video_info = response['video']
177
178 if not video_info:
179 if 'statusMessage' in response:
180 raise ExtractorError('%s said: %s' % (self.IE_NAME, response['statusMessage']), expected=True)
181 raise ExtractorError('Unable to extract videos')
72321ead
PH
182
183 formats = self._formats_from_json(video_info)
27579b9e 184
6cadf8c8
PH
185 is_explicit = video_info.get('isExplicit')
186 if is_explicit is True:
187 age_limit = 18
188 elif is_explicit is False:
189 age_limit = 0
190 else:
191 age_limit = None
192
f0b5d6af
PH
193 # Download via HLS API
194 formats.extend(self._download_api_formats(video_id))
195
7d3d06a1
PH
196 # Download SMIL
197 smil_blocks = sorted((
198 f for f in video_info['videoVersions']
199 if f['sourceType'] == 13),
200 key=lambda f: f['version'])
201 smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (
202 self._SMIL_BASE_URL, video_id, video_id.lower())
203 if smil_blocks:
204 smil_url_m = self._search_regex(
205 r'url="([^"]+)"', smil_blocks[-1]['data'], 'SMIL URL',
206 default=None)
207 if smil_url_m is not None:
208 smil_url = smil_url_m
209 if smil_url:
210 smil_xml = self._download_webpage(
211 smil_url, video_id, 'Downloading SMIL info', fatal=False)
212 if smil_xml:
213 formats.extend(self._formats_from_smil(smil_xml))
214
bff74bdd 215 self._sort_formats(formats)
7d3d06a1
PH
216 timestamp_ms = int_or_none(self._search_regex(
217 r'/Date\((\d+)\)/',
218 video_info['launchDate'], 'launch date', fatal=False))
9d2ecdbc 219
45d7bc2f 220 return {
88bd97e3
JMF
221 'id': video_id,
222 'title': video_info['title'],
223 'formats': formats,
224 'thumbnail': video_info['imageUrl'],
955c4514 225 'timestamp': timestamp_ms // 1000,
88bd97e3
JMF
226 'uploader': video_info['mainArtists'][0]['artistName'],
227 'duration': video_info['duration'],
6cadf8c8 228 'age_limit': age_limit,
88bd97e3 229 }