]>
Commit | Line | Data |
---|---|---|
ebfe352b JMF |
1 | from __future__ import unicode_literals |
2 | ||
825e0984 PH |
3 | import re |
4 | ||
5 | from .common import InfoExtractor | |
221ce325 | 6 | from ..compat import compat_str |
d397c0b3 S |
7 | from ..utils import ( |
8 | int_or_none, | |
9 | parse_age_limit, | |
10 | ) | |
825e0984 PH |
11 | |
12 | ||
13 | class BreakIE(InfoExtractor): | |
221ce325 | 14 | _VALID_URL = r'https?://(?:www\.)?(?P<site>break|screenjunkies)\.com/video/(?P<display_id>[^/]+?)(?:-(?P<id>\d+))?(?:[/?#&]|$)' |
f78c01f6 | 15 | _TESTS = [{ |
ebfe352b | 16 | 'url': 'http://www.break.com/video/when-girls-act-like-guys-2468056', |
ebfe352b JMF |
17 | 'info_dict': { |
18 | 'id': '2468056', | |
19 | 'ext': 'mp4', | |
20 | 'title': 'When Girls Act Like D-Bags', | |
d7bb8884 | 21 | 'age_limit': 13, |
6f5ac90c | 22 | } |
221ce325 RA |
23 | }, { |
24 | 'url': 'http://www.screenjunkies.com/video/best-quentin-tarantino-movie-2841915', | |
25 | 'md5': '5c2b686bec3d43de42bde9ec047536b0', | |
26 | 'info_dict': { | |
27 | 'id': '2841915', | |
28 | 'display_id': 'best-quentin-tarantino-movie', | |
29 | 'ext': 'mp4', | |
30 | 'title': 'Best Quentin Tarantino Movie', | |
31 | 'thumbnail': r're:^https?://.*\.jpg', | |
32 | 'duration': 3671, | |
33 | 'age_limit': 13, | |
34 | 'tags': list, | |
35 | }, | |
36 | }, { | |
37 | 'url': 'http://www.screenjunkies.com/video/honest-trailers-the-dark-knight', | |
38 | 'info_dict': { | |
39 | 'id': '2348808', | |
40 | 'display_id': 'honest-trailers-the-dark-knight', | |
41 | 'ext': 'mp4', | |
42 | 'title': 'Honest Trailers - The Dark Knight', | |
43 | 'thumbnail': r're:^https?://.*\.(?:jpg|png)', | |
44 | 'age_limit': 10, | |
45 | 'tags': list, | |
46 | }, | |
47 | }, { | |
48 | # requires subscription but worked around | |
49 | 'url': 'http://www.screenjunkies.com/video/knocking-dead-ep-1-the-show-so-far-3003285', | |
50 | 'info_dict': { | |
51 | 'id': '3003285', | |
52 | 'display_id': 'knocking-dead-ep-1-the-show-so-far', | |
53 | 'ext': 'mp4', | |
54 | 'title': 'State of The Dead Recap: Knocking Dead Pilot', | |
55 | 'thumbnail': r're:^https?://.*\.jpg', | |
56 | 'duration': 3307, | |
57 | 'age_limit': 13, | |
58 | 'tags': list, | |
59 | }, | |
f78c01f6 S |
60 | }, { |
61 | 'url': 'http://www.break.com/video/ugc/baby-flex-2773063', | |
62 | 'only_matching': True, | |
63 | }] | |
825e0984 | 64 | |
221ce325 RA |
65 | _DEFAULT_BITRATES = (48, 150, 320, 496, 864, 2240, 3264) |
66 | ||
825e0984 | 67 | def _real_extract(self, url): |
221ce325 RA |
68 | site, display_id, video_id = re.match(self._VALID_URL, url).groups() |
69 | ||
70 | if not video_id: | |
71 | webpage = self._download_webpage(url, display_id) | |
72 | video_id = self._search_regex( | |
73 | (r'src=["\']/embed/(\d+)', r'data-video-content-id=["\'](\d+)'), | |
74 | webpage, 'video id') | |
75 | ||
f78c01f6 | 76 | webpage = self._download_webpage( |
221ce325 RA |
77 | 'http://www.%s.com/embed/%s' % (site, video_id), |
78 | display_id, 'Downloading video embed page') | |
79 | embed_vars = self._parse_json( | |
80 | self._search_regex( | |
81 | r'(?s)embedVars\s*=\s*({.+?})\s*</script>', webpage, 'embed vars'), | |
82 | display_id) | |
d397c0b3 | 83 | |
221ce325 | 84 | youtube_id = embed_vars.get('youtubeId') |
659eb98a JMF |
85 | if youtube_id: |
86 | return self.url_result(youtube_id, 'Youtube') | |
87 | ||
221ce325 | 88 | title = embed_vars['contentName'] |
d397c0b3 | 89 | |
221ce325 RA |
90 | formats = [] |
91 | bitrates = [] | |
92 | for f in embed_vars.get('media', []): | |
93 | if not f.get('uri') or f.get('mediaPurpose') != 'play': | |
94 | continue | |
95 | bitrate = int_or_none(f.get('bitRate')) | |
96 | if bitrate: | |
97 | bitrates.append(bitrate) | |
d397c0b3 | 98 | formats.append({ |
221ce325 RA |
99 | 'url': f['uri'], |
100 | 'format_id': 'http-%d' % bitrate if bitrate else 'http', | |
101 | 'width': int_or_none(f.get('width')), | |
102 | 'height': int_or_none(f.get('height')), | |
103 | 'tbr': bitrate, | |
104 | 'format': 'mp4', | |
d397c0b3 S |
105 | }) |
106 | ||
221ce325 RA |
107 | if not bitrates: |
108 | # When subscriptionLevel > 0, i.e. plus subscription is required | |
109 | # media list will be empty. However, hds and hls uris are still | |
110 | # available. We can grab them assuming bitrates to be default. | |
111 | bitrates = self._DEFAULT_BITRATES | |
112 | ||
113 | auth_token = embed_vars.get('AuthToken') | |
d397c0b3 | 114 | |
221ce325 RA |
115 | def construct_manifest_url(base_url, ext): |
116 | pieces = [base_url] | |
117 | pieces.extend([compat_str(b) for b in bitrates]) | |
118 | pieces.append('_kbps.mp4.%s?%s' % (ext, auth_token)) | |
119 | return ','.join(pieces) | |
120 | ||
121 | if bitrates and auth_token: | |
122 | hds_url = embed_vars.get('hdsUri') | |
123 | if hds_url: | |
124 | formats.extend(self._extract_f4m_formats( | |
125 | construct_manifest_url(hds_url, 'f4m'), | |
126 | display_id, f4m_id='hds', fatal=False)) | |
127 | hls_url = embed_vars.get('hlsUri') | |
128 | if hls_url: | |
129 | formats.extend(self._extract_m3u8_formats( | |
130 | construct_manifest_url(hls_url, 'm3u8'), | |
131 | display_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) | |
132 | self._sort_formats(formats) | |
d397c0b3 | 133 | |
ebfe352b JMF |
134 | return { |
135 | 'id': video_id, | |
221ce325 RA |
136 | 'display_id': display_id, |
137 | 'title': title, | |
138 | 'thumbnail': embed_vars.get('thumbUri'), | |
139 | 'duration': int_or_none(embed_vars.get('videoLengthInSeconds')) or None, | |
140 | 'age_limit': parse_age_limit(embed_vars.get('audienceRating')), | |
141 | 'tags': embed_vars.get('tags', '').split(','), | |
d397c0b3 | 142 | 'formats': formats, |
ebfe352b | 143 | } |