]>
Commit | Line | Data |
---|---|---|
d21ab292 JMF |
1 | import re |
2 | ||
3 | from .common import InfoExtractor | |
4 | from ..utils import ( | |
5 | get_element_by_attribute, | |
6 | clean_html, | |
7 | ) | |
8 | ||
9 | ||
10 | class TechTalksIE(InfoExtractor): | |
11 | _VALID_URL = r'https?://techtalks\.tv/talks/[^/]*/(?P<id>\d+)/' | |
12 | ||
13 | _TEST = { | |
14 | u'url': u'http://techtalks.tv/talks/learning-topic-models-going-beyond-svd/57758/', | |
15 | u'playlist': [ | |
16 | { | |
17 | u'file': u'57758.flv', | |
18 | u'info_dict': { | |
19 | u'title': u'Learning Topic Models --- Going beyond SVD', | |
20 | }, | |
21 | }, | |
22 | { | |
23 | u'file': u'57758-slides.flv', | |
24 | u'info_dict': { | |
25 | u'title': u'Learning Topic Models --- Going beyond SVD', | |
26 | }, | |
27 | }, | |
28 | ], | |
29 | u'params': { | |
30 | # rtmp download | |
31 | u'skip_download': True, | |
32 | }, | |
33 | } | |
34 | ||
35 | def _real_extract(self, url): | |
36 | mobj = re.match(self._VALID_URL, url) | |
37 | talk_id = mobj.group('id') | |
38 | webpage = self._download_webpage(url, talk_id) | |
39 | rtmp_url = self._search_regex(r'netConnectionUrl: \'(.*?)\'', webpage, | |
40 | u'rtmp url') | |
41 | play_path = self._search_regex(r'href=\'(.*?)\' [^>]*id="flowplayer_presenter"', | |
42 | webpage, u'presenter play path') | |
43 | title = clean_html(get_element_by_attribute('class', 'title', webpage)) | |
44 | video_info = { | |
45 | 'id': talk_id, | |
46 | 'title': title, | |
47 | 'url': rtmp_url, | |
48 | 'play_path': play_path, | |
49 | 'ext': 'flv', | |
50 | } | |
51 | m_slides = re.search(r'<a class="slides" href=\'(.*?)\'', webpage) | |
52 | if m_slides is None: | |
53 | return video_info | |
54 | else: | |
55 | return [ | |
56 | video_info, | |
57 | # The slides video | |
58 | { | |
59 | 'id': talk_id + '-slides', | |
60 | 'title': title, | |
61 | 'url': rtmp_url, | |
62 | 'play_path': m_slides.group(1), | |
63 | 'ext': 'flv', | |
64 | }, | |
65 | ] |