]>
Commit | Line | Data |
---|---|---|
fc287219 PH |
1 | import re |
2 | import socket | |
3 | import xml.etree.ElementTree | |
4 | ||
5 | from .common import InfoExtractor | |
6 | from ..utils import ( | |
7 | compat_http_client, | |
8 | compat_str, | |
9 | compat_urllib_error, | |
10 | compat_urllib_request, | |
11 | ||
12 | ExtractorError, | |
13 | ) | |
14 | ||
15 | ||
16 | class MTVIE(InfoExtractor): | |
17 | _VALID_URL = r'^(?P<proto>https?://)?(?:www\.)?mtv\.com/videos/[^/]+/(?P<videoid>[0-9]+)/[^/]+$' | |
18 | ||
19 | def _real_extract(self, url): | |
20 | mobj = re.match(self._VALID_URL, url) | |
21 | if mobj is None: | |
22 | raise ExtractorError(u'Invalid URL: %s' % url) | |
23 | if not mobj.group('proto'): | |
24 | url = 'http://' + url | |
25 | video_id = mobj.group('videoid') | |
26 | ||
27 | webpage = self._download_webpage(url, video_id) | |
28 | ||
29 | song_name = self._html_search_regex(r'<meta name="mtv_vt" content="([^"]+)"/>', | |
30 | webpage, u'song name', fatal=False) | |
31 | ||
32 | video_title = self._html_search_regex(r'<meta name="mtv_an" content="([^"]+)"/>', | |
33 | webpage, u'title') | |
34 | ||
35 | mtvn_uri = self._html_search_regex(r'<meta name="mtvn_uri" content="([^"]+)"/>', | |
36 | webpage, u'mtvn_uri', fatal=False) | |
37 | ||
38 | content_id = self._search_regex(r'MTVN.Player.defaultPlaylistId = ([0-9]+);', | |
39 | webpage, u'content id', fatal=False) | |
40 | ||
41 | videogen_url = 'http://www.mtv.com/player/includes/mediaGen.jhtml?uri=' + mtvn_uri + '&id=' + content_id + '&vid=' + video_id + '&ref=www.mtvn.com&viewUri=' + mtvn_uri | |
42 | self.report_extraction(video_id) | |
43 | request = compat_urllib_request.Request(videogen_url) | |
44 | try: | |
45 | metadataXml = compat_urllib_request.urlopen(request).read() | |
46 | except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | |
47 | raise ExtractorError(u'Unable to download video metadata: %s' % compat_str(err)) | |
48 | ||
49 | mdoc = xml.etree.ElementTree.fromstring(metadataXml) | |
50 | print(metadataXml) | |
51 | renditions = mdoc.findall('.//rendition') | |
52 | ||
53 | # For now, always pick the highest quality. | |
54 | rendition = renditions[-1] | |
55 | ||
56 | try: | |
57 | _,_,ext = rendition.attrib['type'].partition('/') | |
58 | format = ext + '-' + rendition.attrib['width'] + 'x' + rendition.attrib['height'] + '_' + rendition.attrib['bitrate'] | |
59 | video_url = rendition.find('./src').text | |
60 | except KeyError: | |
61 | raise ExtractorError('Invalid rendition field.') | |
62 | ||
63 | info = { | |
64 | 'id': video_id, | |
65 | 'url': video_url, | |
66 | 'uploader': performer, | |
67 | 'upload_date': None, | |
68 | 'title': video_title, | |
69 | 'ext': ext, | |
70 | 'format': format, | |
71 | } | |
72 | ||
73 | return [info] |