]>
Commit | Line | Data |
---|---|---|
bb198c95 PH |
1 | from __future__ import unicode_literals |
2 | ||
cd8b8302 PH |
3 | import re |
4 | ||
5 | from .common import InfoExtractor | |
6 | from ..utils import ( | |
7 | ExtractorError, | |
8 | ) | |
9 | ||
10 | ||
11 | class TeamcocoIE(InfoExtractor): | |
04ee53ec | 12 | _VALID_URL = r'http://teamcoco\.com/video/(?P<video_id>\d*)?/?(?P<url_title>.*)' |
bb799e81 A |
13 | _TESTS = [ |
14 | { | |
15 | 'url': 'http://teamcoco.com/video/80187/conan-becomes-a-mary-kay-beauty-consultant', | |
16 | 'file': '80187.mp4', | |
17 | 'md5': '3f7746aa0dc86de18df7539903d399ea', | |
18 | 'info_dict': { | |
19 | 'title': 'Conan Becomes A Mary Kay Beauty Consultant', | |
20 | 'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.' | |
21 | } | |
22 | }, | |
23 | { | |
bb198c95 PH |
24 | 'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush', |
25 | 'file': '19705.mp4', | |
26 | 'md5': 'cde9ba0fa3506f5f017ce11ead928f9a', | |
27 | 'info_dict': { | |
28 | "description": "Louis C.K. got starstruck by George W. Bush, so what? Part one.", | |
29 | "title": "Louis C.K. Interview Pt. 1 11/3/11" | |
6f5ac90c PH |
30 | } |
31 | } | |
bb799e81 | 32 | ] |
cd8b8302 PH |
33 | |
34 | def _real_extract(self, url): | |
35 | mobj = re.match(self._VALID_URL, url) | |
36 | if mobj is None: | |
bb198c95 | 37 | raise ExtractorError('Invalid URL: %s' % url) |
cd8b8302 PH |
38 | url_title = mobj.group('url_title') |
39 | webpage = self._download_webpage(url, url_title) | |
04ee53ec A |
40 | |
41 | video_id = mobj.group("video_id") | |
42 | if video_id == '': | |
43 | video_id = self._html_search_regex( | |
44 | r'<article class="video" data-id="(\d+?)"', | |
45 | webpage, 'video id') | |
46 | ||
cd8b8302 PH |
47 | self.report_extraction(video_id) |
48 | ||
cd8b8302 | 49 | data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id |
e26f8712 | 50 | data = self._download_xml(data_url, video_id, 'Downloading data webpage') |
cd8b8302 | 51 | |
e7e6b54d JMF |
52 | qualities = ['500k', '480p', '1000k', '720p', '1080p'] |
53 | formats = [] | |
befdc8f3 PH |
54 | for filed in data.findall('files/file'): |
55 | if filed.attrib.get('playmode') == 'all': | |
e7e6b54d JMF |
56 | # it just duplicates one of the entries |
57 | break | |
befdc8f3 | 58 | file_url = filed.text |
e7e6b54d JMF |
59 | m_format = re.search(r'(\d+(k|p))\.mp4', file_url) |
60 | if m_format is not None: | |
61 | format_id = m_format.group(1) | |
62 | else: | |
befdc8f3 PH |
63 | format_id = filed.attrib['bitrate'] |
64 | tbr = ( | |
65 | int(filed.attrib['bitrate']) | |
66 | if filed.attrib['bitrate'].isdigit() | |
67 | else None) | |
68 | ||
69 | try: | |
70 | quality = qualities.index(format_id) | |
71 | except ValueError: | |
72 | quality = -1 | |
e7e6b54d JMF |
73 | formats.append({ |
74 | 'url': file_url, | |
75 | 'ext': 'mp4', | |
befdc8f3 | 76 | 'tbr': tbr, |
e7e6b54d | 77 | 'format_id': format_id, |
befdc8f3 | 78 | 'quality': quality, |
e7e6b54d | 79 | }) |
befdc8f3 PH |
80 | |
81 | self._sort_formats(formats) | |
cd8b8302 | 82 | |
e7e6b54d | 83 | return { |
bb198c95 | 84 | 'id': video_id, |
e7e6b54d | 85 | 'formats': formats, |
bb198c95 PH |
86 | 'title': self._og_search_title(webpage), |
87 | 'thumbnail': self._og_search_thumbnail(webpage), | |
46720279 | 88 | 'description': self._og_search_description(webpage), |
e7e6b54d | 89 | } |