]>
Commit | Line | Data |
---|---|---|
9c5335a0 | 1 | # -*- coding: utf-8 -*- |
bb198c95 PH |
2 | from __future__ import unicode_literals |
3 | ||
314368c8 | 4 | import base64 |
ac6c358c | 5 | import binascii |
cd8b8302 | 6 | import re |
3a105f7b | 7 | import json |
cd8b8302 PH |
8 | |
9 | from .common import InfoExtractor | |
ce9f47de NJ |
10 | from ..utils import ( |
11 | ExtractorError, | |
12 | qualities, | |
f9f3e3df | 13 | determine_ext, |
ce9f47de | 14 | ) |
ac6c358c | 15 | from ..compat import compat_ord |
cd8b8302 PH |
16 | |
17 | ||
18 | class TeamcocoIE(InfoExtractor): | |
5886b38d | 19 | _VALID_URL = r'https?://teamcoco\.com/video/(?P<video_id>[0-9]+)?/?(?P<display_id>.*)' |
bb799e81 | 20 | _TESTS = [ |
9e1a5b84 JW |
21 | { |
22 | 'url': 'http://teamcoco.com/video/80187/conan-becomes-a-mary-kay-beauty-consultant', | |
9e1a5b84 JW |
23 | 'md5': '3f7746aa0dc86de18df7539903d399ea', |
24 | 'info_dict': { | |
17d2712d PH |
25 | 'id': '80187', |
26 | 'ext': 'mp4', | |
9e1a5b84 | 27 | 'title': 'Conan Becomes A Mary Kay Beauty Consultant', |
641eb10d | 28 | 'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.', |
7088f5b5 | 29 | 'duration': 504, |
641eb10d | 30 | 'age_limit': 0, |
9e1a5b84 JW |
31 | } |
32 | }, { | |
33 | 'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush', | |
9e1a5b84 JW |
34 | 'md5': 'cde9ba0fa3506f5f017ce11ead928f9a', |
35 | 'info_dict': { | |
17d2712d PH |
36 | 'id': '19705', |
37 | 'ext': 'mp4', | |
314368c8 NJ |
38 | 'description': 'Louis C.K. got starstruck by George W. Bush, so what? Part one.', |
39 | 'title': 'Louis C.K. Interview Pt. 1 11/3/11', | |
7088f5b5 | 40 | 'duration': 288, |
641eb10d | 41 | 'age_limit': 0, |
9e1a5b84 | 42 | } |
9c5335a0 YCH |
43 | }, { |
44 | 'url': 'http://teamcoco.com/video/timothy-olyphant-drinking-whiskey', | |
45 | 'info_dict': { | |
46 | 'id': '88748', | |
47 | 'ext': 'mp4', | |
48 | 'title': 'Timothy Olyphant Raises A Toast To “Justified”', | |
49 | 'description': 'md5:15501f23f020e793aeca761205e42c24', | |
50 | }, | |
51 | 'params': { | |
52 | 'skip_download': True, # m3u8 downloads | |
53 | } | |
d31573fa YCH |
54 | }, { |
55 | 'url': 'http://teamcoco.com/video/full-episode-mon-6-1-joel-mchale-jake-tapper-and-musical-guest-courtney-barnett?playlist=x;eyJ0eXBlIjoidGFnIiwiaWQiOjl9', | |
56 | 'info_dict': { | |
57 | 'id': '89341', | |
58 | 'ext': 'mp4', | |
59 | 'title': 'Full Episode - Mon. 6/1 - Joel McHale, Jake Tapper, And Musical Guest Courtney Barnett', | |
60 | 'description': 'Guests: Joel McHale, Jake Tapper, And Musical Guest Courtney Barnett', | |
61 | }, | |
62 | 'params': { | |
63 | 'skip_download': True, # m3u8 downloads | |
64 | } | |
bb799e81 | 65 | } |
bb799e81 | 66 | ] |
3811c567 NJ |
67 | _VIDEO_ID_REGEXES = ( |
68 | r'"eVar42"\s*:\s*(\d+)', | |
69 | r'Ginger\.TeamCoco\.openInApp\("video",\s*"([^"]+)"', | |
70 | r'"id_not"\s*:\s*(\d+)' | |
71 | ) | |
cd8b8302 PH |
72 | |
73 | def _real_extract(self, url): | |
74 | mobj = re.match(self._VALID_URL, url) | |
dfb2cb5c PH |
75 | |
76 | display_id = mobj.group('display_id') | |
12675275 YCH |
77 | webpage, urlh = self._download_webpage_handle(url, display_id) |
78 | if 'src=expired' in urlh.geturl(): | |
79 | raise ExtractorError('This video is expired.', expected=True) | |
5f6a1245 | 80 | |
314368c8 | 81 | video_id = mobj.group('video_id') |
dfb2cb5c | 82 | if not video_id: |
04ee53ec | 83 | video_id = self._html_search_regex( |
3811c567 | 84 | self._VIDEO_ID_REGEXES, webpage, 'video id') |
cd8b8302 | 85 | |
3a105f7b | 86 | data = None |
9c5335a0 | 87 | |
3a105f7b YCH |
88 | preload_codes = self._html_search_regex( |
89 | r'(function.+)setTimeout\(function\(\)\{playlist', | |
90 | webpage, 'preload codes') | |
791ff52f | 91 | base64_fragments = re.findall(r'"([a-zA-Z0-9+/=]+)"', preload_codes) |
3a105f7b | 92 | base64_fragments.remove('init') |
9c5335a0 | 93 | |
3a105f7b YCH |
94 | def _check_sequence(cur_fragments): |
95 | if not cur_fragments: | |
96 | return | |
97 | for i in range(len(cur_fragments)): | |
98 | cur_sequence = (''.join(cur_fragments[i:] + cur_fragments[:i])).encode('ascii') | |
ac6c358c YCH |
99 | try: |
100 | raw_data = base64.b64decode(cur_sequence) | |
3a105f7b YCH |
101 | if compat_ord(raw_data[0]) == compat_ord('{'): |
102 | return json.loads(raw_data.decode('utf-8')) | |
103 | except (TypeError, binascii.Error, UnicodeDecodeError, ValueError): | |
ac6c358c | 104 | continue |
ac6c358c | 105 | |
3a105f7b YCH |
106 | def _check_data(): |
107 | for i in range(len(base64_fragments) + 1): | |
108 | for j in range(i, len(base64_fragments) + 1): | |
109 | data = _check_sequence(base64_fragments[:i] + base64_fragments[j:]) | |
110 | if data: | |
111 | return data | |
112 | ||
113 | self.to_screen('Try to compute possible data sequence. This may take some time.') | |
114 | data = _check_data() | |
9c5335a0 | 115 | |
ac6c358c | 116 | if not data: |
3a105f7b YCH |
117 | raise ExtractorError( |
118 | 'Preload information could not be extracted', expected=True) | |
cd8b8302 | 119 | |
e7e6b54d | 120 | formats = [] |
314368c8 NJ |
121 | get_quality = qualities(['500k', '480p', '1000k', '720p', '1080p']) |
122 | for filed in data['files']: | |
f9f3e3df | 123 | if determine_ext(filed['url']) == 'm3u8': |
d31573fa YCH |
124 | # compat_urllib_parse.urljoin does not work here |
125 | if filed['url'].startswith('/'): | |
126 | m3u8_url = 'http://ht.cdn.turner.com/tbs/big/teamcoco' + filed['url'] | |
127 | else: | |
128 | m3u8_url = filed['url'] | |
129 | m3u8_formats = self._extract_m3u8_formats( | |
130 | m3u8_url, video_id, ext='mp4') | |
131 | for m3u8_format in m3u8_formats: | |
132 | if m3u8_format not in formats: | |
133 | formats.append(m3u8_format) | |
134 | elif determine_ext(filed['url']) == 'f4m': | |
135 | # TODO Correct f4m extraction | |
136 | continue | |
e7e6b54d | 137 | else: |
d31573fa YCH |
138 | if filed['url'].startswith('/mp4:protected/'): |
139 | # TODO Correct extraction for these files | |
140 | continue | |
5bb6328c NJ |
141 | m_format = re.search(r'(\d+(k|p))\.mp4', filed['url']) |
142 | if m_format is not None: | |
143 | format_id = m_format.group(1) | |
144 | else: | |
145 | format_id = filed['bitrate'] | |
146 | tbr = ( | |
147 | int(filed['bitrate']) | |
148 | if filed['bitrate'].isdigit() | |
149 | else None) | |
befdc8f3 | 150 | |
5bb6328c NJ |
151 | formats.append({ |
152 | 'url': filed['url'], | |
153 | 'ext': 'mp4', | |
154 | 'tbr': tbr, | |
155 | 'format_id': format_id, | |
156 | 'quality': get_quality(format_id), | |
157 | }) | |
befdc8f3 PH |
158 | |
159 | self._sort_formats(formats) | |
cd8b8302 | 160 | |
e7e6b54d | 161 | return { |
bb198c95 | 162 | 'id': video_id, |
dfb2cb5c | 163 | 'display_id': display_id, |
e7e6b54d | 164 | 'formats': formats, |
314368c8 NJ |
165 | 'title': data['title'], |
166 | 'thumbnail': data.get('thumb', {}).get('href'), | |
167 | 'description': data.get('teaser'), | |
7088f5b5 | 168 | 'duration': data.get('duration'), |
641eb10d | 169 | 'age_limit': self._family_friendly_search(webpage), |
e7e6b54d | 170 | } |