]>
Commit | Line | Data |
---|---|---|
67b22dd0 JMF |
1 | import re |
2 | import json | |
3 | ||
4 | from .common import InfoExtractor | |
5 | from ..utils import ( | |
6 | clean_html, | |
7 | get_element_by_id, | |
8 | ) | |
9 | ||
10 | ||
11 | class TechTVMITIE(InfoExtractor): | |
12 | IE_NAME = u'techtv.mit.edu' | |
13 | _VALID_URL = r'https?://techtv\.mit\.edu/(videos|embeds)/(?P<id>\d+)' | |
14 | ||
15 | _TEST = { | |
16 | u'url': u'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set', | |
17 | u'file': u'25418.mp4', | |
18 | u'md5': u'1f8cb3e170d41fd74add04d3c9330e5f', | |
19 | u'info_dict': { | |
20 | u'title': u'MIT DNA Learning Center Set', | |
21 | u'description': u'md5:82313335e8a8a3f243351ba55bc1b474', | |
22 | }, | |
23 | } | |
24 | ||
25 | def _real_extract(self, url): | |
26 | mobj = re.match(self._VALID_URL, url) | |
27 | video_id = mobj.group('id') | |
b5ba7b9d | 28 | raw_page = self._download_webpage( |
67b22dd0 | 29 | 'http://techtv.mit.edu/videos/%s' % video_id, video_id) |
b5ba7b9d | 30 | clean_page = re.compile(u'<!--.*?-->', re.S).sub(u'', raw_page) |
67b22dd0 JMF |
31 | |
32 | base_url = self._search_regex(r'ipadUrl: \'(.+?cloudfront.net/)', | |
b5ba7b9d JS |
33 | raw_page, u'base url') |
34 | formats_json = self._search_regex(r'bitrates: (\[.+?\])', raw_page, | |
67b22dd0 JMF |
35 | u'video formats') |
36 | formats = json.loads(formats_json) | |
37 | formats = sorted(formats, key=lambda f: f['bitrate']) | |
38 | ||
b5ba7b9d JS |
39 | title = get_element_by_id('edit-title', clean_page) |
40 | description = clean_html(get_element_by_id('edit-description', clean_page)) | |
67b22dd0 | 41 | thumbnail = self._search_regex(r'playlist:.*?url: \'(.+?)\'', |
b5ba7b9d | 42 | raw_page, u'thumbnail', flags=re.DOTALL) |
67b22dd0 JMF |
43 | |
44 | return {'id': video_id, | |
45 | 'title': title, | |
46 | 'url': base_url + formats[-1]['url'].replace('mp4:', ''), | |
47 | 'ext': 'mp4', | |
48 | 'description': description, | |
49 | 'thumbnail': thumbnail, | |
50 | } | |
51 | ||
52 | ||
53 | class MITIE(TechTVMITIE): | |
54 | IE_NAME = u'video.mit.edu' | |
55 | _VALID_URL = r'https?://video\.mit\.edu/watch/(?P<title>[^/]+)' | |
56 | ||
57 | _TEST = { | |
58 | u'url': u'http://video.mit.edu/watch/the-government-is-profiling-you-13222/', | |
59 | u'file': u'21783.mp4', | |
60 | u'md5': u'7db01d5ccc1895fc5010e9c9e13648da', | |
61 | u'info_dict': { | |
62 | u'title': u'The Government is Profiling You', | |
63 | u'description': u'md5:ad5795fe1e1623b73620dbfd47df9afd', | |
64 | }, | |
65 | } | |
66 | ||
67 | def _real_extract(self, url): | |
68 | mobj = re.match(self._VALID_URL, url) | |
69 | page_title = mobj.group('title') | |
70 | webpage = self._download_webpage(url, page_title) | |
71 | self.to_screen('%s: Extracting %s url' % (page_title, TechTVMITIE.IE_NAME)) | |
72 | embed_url = self._search_regex(r'<iframe .*?src="(.+?)"', webpage, | |
73 | u'embed url') | |
74 | return self.url_result(embed_url, ie='TechTVMIT') |