]>
Commit | Line | Data |
---|---|---|
bb198c95 PH |
1 | from __future__ import unicode_literals |
2 | ||
cd8b8302 PH |
3 | import re |
4 | ||
5 | from .common import InfoExtractor | |
cd8b8302 PH |
6 | |
7 | ||
8 | class TeamcocoIE(InfoExtractor): | |
dfb2cb5c | 9 | _VALID_URL = r'http://teamcoco\.com/video/(?P<video_id>[0-9]+)?/?(?P<display_id>.*)' |
bb799e81 | 10 | _TESTS = [ |
9e1a5b84 JW |
11 | { |
12 | 'url': 'http://teamcoco.com/video/80187/conan-becomes-a-mary-kay-beauty-consultant', | |
13 | 'file': '80187.mp4', | |
14 | 'md5': '3f7746aa0dc86de18df7539903d399ea', | |
15 | 'info_dict': { | |
16 | 'title': 'Conan Becomes A Mary Kay Beauty Consultant', | |
17 | 'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.' | |
18 | } | |
19 | }, { | |
20 | 'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush', | |
21 | 'file': '19705.mp4', | |
22 | 'md5': 'cde9ba0fa3506f5f017ce11ead928f9a', | |
23 | 'info_dict': { | |
24 | "description": "Louis C.K. got starstruck by George W. Bush, so what? Part one.", | |
25 | "title": "Louis C.K. Interview Pt. 1 11/3/11" | |
26 | } | |
bb799e81 | 27 | } |
bb799e81 | 28 | ] |
cd8b8302 PH |
29 | |
30 | def _real_extract(self, url): | |
31 | mobj = re.match(self._VALID_URL, url) | |
dfb2cb5c PH |
32 | |
33 | display_id = mobj.group('display_id') | |
34 | webpage = self._download_webpage(url, display_id) | |
5f6a1245 | 35 | |
fa387d2d | 36 | video_id = mobj.group("video_id") |
dfb2cb5c | 37 | if not video_id: |
04ee53ec | 38 | video_id = self._html_search_regex( |
f83dda12 | 39 | r'data-node-id="(\d+?)"', |
04ee53ec | 40 | webpage, 'video id') |
cd8b8302 | 41 | |
cd8b8302 | 42 | data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id |
dfb2cb5c PH |
43 | data = self._download_xml( |
44 | data_url, display_id, 'Downloading data webpage') | |
cd8b8302 | 45 | |
e7e6b54d JMF |
46 | qualities = ['500k', '480p', '1000k', '720p', '1080p'] |
47 | formats = [] | |
befdc8f3 PH |
48 | for filed in data.findall('files/file'): |
49 | if filed.attrib.get('playmode') == 'all': | |
e7e6b54d JMF |
50 | # it just duplicates one of the entries |
51 | break | |
befdc8f3 | 52 | file_url = filed.text |
e7e6b54d JMF |
53 | m_format = re.search(r'(\d+(k|p))\.mp4', file_url) |
54 | if m_format is not None: | |
55 | format_id = m_format.group(1) | |
56 | else: | |
befdc8f3 PH |
57 | format_id = filed.attrib['bitrate'] |
58 | tbr = ( | |
59 | int(filed.attrib['bitrate']) | |
60 | if filed.attrib['bitrate'].isdigit() | |
61 | else None) | |
62 | ||
63 | try: | |
64 | quality = qualities.index(format_id) | |
65 | except ValueError: | |
66 | quality = -1 | |
e7e6b54d JMF |
67 | formats.append({ |
68 | 'url': file_url, | |
69 | 'ext': 'mp4', | |
befdc8f3 | 70 | 'tbr': tbr, |
e7e6b54d | 71 | 'format_id': format_id, |
befdc8f3 | 72 | 'quality': quality, |
e7e6b54d | 73 | }) |
befdc8f3 PH |
74 | |
75 | self._sort_formats(formats) | |
cd8b8302 | 76 | |
e7e6b54d | 77 | return { |
bb198c95 | 78 | 'id': video_id, |
dfb2cb5c | 79 | 'display_id': display_id, |
e7e6b54d | 80 | 'formats': formats, |
bb198c95 PH |
81 | 'title': self._og_search_title(webpage), |
82 | 'thumbnail': self._og_search_thumbnail(webpage), | |
46720279 | 83 | 'description': self._og_search_description(webpage), |
e7e6b54d | 84 | } |