]>
Commit | Line | Data |
---|---|---|
67b22dd0 JMF |
1 | import re |
2 | import json | |
3 | ||
4 | from .common import InfoExtractor | |
5 | from ..utils import ( | |
6 | clean_html, | |
7 | get_element_by_id, | |
8 | ) | |
9 | ||
10 | ||
11 | class TechTVMITIE(InfoExtractor): | |
12 | IE_NAME = u'techtv.mit.edu' | |
13 | _VALID_URL = r'https?://techtv\.mit\.edu/(videos|embeds)/(?P<id>\d+)' | |
14 | ||
15 | _TEST = { | |
16 | u'url': u'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set', | |
17 | u'file': u'25418.mp4', | |
18 | u'md5': u'1f8cb3e170d41fd74add04d3c9330e5f', | |
19 | u'info_dict': { | |
20 | u'title': u'MIT DNA Learning Center Set', | |
21 | u'description': u'md5:82313335e8a8a3f243351ba55bc1b474', | |
22 | }, | |
23 | } | |
24 | ||
25 | def _real_extract(self, url): | |
26 | mobj = re.match(self._VALID_URL, url) | |
27 | video_id = mobj.group('id') | |
28 | webpage = self._download_webpage( | |
29 | 'http://techtv.mit.edu/videos/%s' % video_id, video_id) | |
30 | embed_page = self._download_webpage( | |
31 | 'http://techtv.mit.edu/embeds/%s/' % video_id, video_id, | |
32 | note=u'Downloading embed page') | |
33 | ||
34 | base_url = self._search_regex(r'ipadUrl: \'(.+?cloudfront.net/)', | |
35 | embed_page, u'base url') | |
36 | formats_json = self._search_regex(r'bitrates: (\[.+?\])', embed_page, | |
37 | u'video formats') | |
38 | formats = json.loads(formats_json) | |
39 | formats = sorted(formats, key=lambda f: f['bitrate']) | |
40 | ||
41 | title = get_element_by_id('edit-title', webpage) | |
42 | description = clean_html(get_element_by_id('edit-description', webpage)) | |
43 | thumbnail = self._search_regex(r'playlist:.*?url: \'(.+?)\'', | |
44 | embed_page, u'thumbnail', flags=re.DOTALL) | |
45 | ||
46 | return {'id': video_id, | |
47 | 'title': title, | |
48 | 'url': base_url + formats[-1]['url'].replace('mp4:', ''), | |
49 | 'ext': 'mp4', | |
50 | 'description': description, | |
51 | 'thumbnail': thumbnail, | |
52 | } | |
53 | ||
54 | ||
55 | class MITIE(TechTVMITIE): | |
56 | IE_NAME = u'video.mit.edu' | |
57 | _VALID_URL = r'https?://video\.mit\.edu/watch/(?P<title>[^/]+)' | |
58 | ||
59 | _TEST = { | |
60 | u'url': u'http://video.mit.edu/watch/the-government-is-profiling-you-13222/', | |
61 | u'file': u'21783.mp4', | |
62 | u'md5': u'7db01d5ccc1895fc5010e9c9e13648da', | |
63 | u'info_dict': { | |
64 | u'title': u'The Government is Profiling You', | |
65 | u'description': u'md5:ad5795fe1e1623b73620dbfd47df9afd', | |
66 | }, | |
67 | } | |
68 | ||
69 | def _real_extract(self, url): | |
70 | mobj = re.match(self._VALID_URL, url) | |
71 | page_title = mobj.group('title') | |
72 | webpage = self._download_webpage(url, page_title) | |
73 | self.to_screen('%s: Extracting %s url' % (page_title, TechTVMITIE.IE_NAME)) | |
74 | embed_url = self._search_regex(r'<iframe .*?src="(.+?)"', webpage, | |
75 | u'embed url') | |
76 | return self.url_result(embed_url, ie='TechTVMIT') |