]>
Commit | Line | Data |
---|---|---|
85e787f5 S |
1 | # encoding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
fd3a1f3d | 4 | from .common import InfoExtractor |
dabe1570 | 5 | from .cbs import CBSIE |
8b809a07 | 6 | from ..utils import ( |
7 | parse_duration, | |
8b809a07 | 8 | ) |
85e787f5 S |
9 | |
10 | ||
dabe1570 | 11 | class CBSNewsIE(CBSIE): |
85e787f5 | 12 | IE_DESC = 'CBS News' |
5886b38d | 13 | _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P<id>[\da-z_-]+)' |
85e787f5 S |
14 | |
15 | _TESTS = [ | |
16 | { | |
17 | 'url': 'http://www.cbsnews.com/news/tesla-and-spacex-elon-musks-industrial-empire/', | |
18 | 'info_dict': { | |
19 | 'id': 'tesla-and-spacex-elon-musks-industrial-empire', | |
20 | 'ext': 'flv', | |
21 | 'title': 'Tesla and SpaceX: Elon Musk\'s industrial empire', | |
22 | 'thumbnail': 'http://beta.img.cbsnews.com/i/2014/03/30/60147937-2f53-4565-ad64-1bdd6eb64679/60-0330-pelley-640x360.jpg', | |
23 | 'duration': 791, | |
24 | }, | |
25 | 'params': { | |
26 | # rtmp download | |
27 | 'skip_download': True, | |
28 | }, | |
ae7b8462 | 29 | 'skip': 'Subscribers only', |
85e787f5 S |
30 | }, |
31 | { | |
32 | 'url': 'http://www.cbsnews.com/videos/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/', | |
33 | 'info_dict': { | |
43518503 | 34 | 'id': 'SNJBOYzXiWBOvaLsdzwH8fmtP1SCd91Y', |
f125d911 | 35 | 'ext': 'mp4', |
85e787f5 | 36 | 'title': 'Fort Hood shooting: Army downplays mental illness as cause of attack', |
43518503 | 37 | 'description': 'md5:4a6983e480542d8b333a947bfc64ddc7', |
dabe1570 RA |
38 | 'upload_date': '20140404', |
39 | 'timestamp': 1396650660, | |
43518503 | 40 | 'uploader': 'CBSI-NEW', |
e8cfacae | 41 | 'thumbnail': 're:^https?://.*\.jpg$', |
85e787f5 | 42 | 'duration': 205, |
220ee33f S |
43 | 'subtitles': { |
44 | 'en': [{ | |
45 | 'ext': 'ttml', | |
46 | }], | |
47 | }, | |
4118cc02 JA |
48 | }, |
49 | 'params': { | |
f125d911 | 50 | # m3u8 download |
4118cc02 JA |
51 | 'skip_download': True, |
52 | }, | |
53 | }, | |
85e787f5 S |
54 | ] |
55 | ||
56 | def _real_extract(self, url): | |
fd3a1f3d | 57 | video_id = self._match_id(url) |
85e787f5 S |
58 | |
59 | webpage = self._download_webpage(url, video_id) | |
60 | ||
fd3a1f3d | 61 | video_info = self._parse_json(self._html_search_regex( |
85e787f5 | 62 | r'(?:<ul class="media-list items" id="media-related-items"><li data-video-info|<div id="cbsNewsVideoPlayer" data-video-player-options)=\'({.+?})\'', |
fd3a1f3d | 63 | webpage, 'video JSON info'), video_id) |
85e787f5 S |
64 | |
65 | item = video_info['item'] if 'item' in video_info else video_info | |
43518503 | 66 | guid = item['mpxRefId'] |
dabe1570 | 67 | return self._extract_video_info(guid) |
fd3a1f3d | 68 | |
69 | ||
70 | class CBSNewsLiveVideoIE(InfoExtractor): | |
71 | IE_DESC = 'CBS News Live Videos' | |
5886b38d | 72 | _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/video/(?P<id>[\da-z_-]+)' |
fd3a1f3d | 73 | |
69eb4d69 YCH |
74 | # Live videos get deleted soon. See http://www.cbsnews.com/live/ for the latest examples |
75 | _TEST = { | |
fd3a1f3d | 76 | 'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/', |
77 | 'info_dict': { | |
78 | 'id': 'clinton-sanders-prepare-to-face-off-in-nh', | |
79 | 'ext': 'flv', | |
80 | 'title': 'Clinton, Sanders Prepare To Face Off In NH', | |
81 | 'duration': 334, | |
82 | }, | |
69eb4d69 YCH |
83 | 'skip': 'Video gone', |
84 | } | |
fd3a1f3d | 85 | |
86 | def _real_extract(self, url): | |
87 | video_id = self._match_id(url) | |
88 | ||
89 | webpage = self._download_webpage(url, video_id) | |
90 | ||
91 | video_info = self._parse_json(self._html_search_regex( | |
92 | r'data-story-obj=\'({.+?})\'', webpage, 'video JSON info'), video_id)['story'] | |
93 | ||
94 | hdcore_sign = 'hdcore=3.3.1' | |
95 | f4m_formats = self._extract_f4m_formats(video_info['url'] + '&' + hdcore_sign, video_id) | |
96 | if f4m_formats: | |
97 | for entry in f4m_formats: | |
98 | # URLs without the extra param induce an 404 error | |
99 | entry.update({'extra_param_to_segment_url': hdcore_sign}) | |
19dbaeec | 100 | self._sort_formats(f4m_formats) |
fd3a1f3d | 101 | |
102 | return { | |
103 | 'id': video_id, | |
104 | 'title': video_info['headline'], | |
105 | 'thumbnail': video_info.get('thumbnail_url_hd') or video_info.get('thumbnail_url_sd'), | |
106 | 'duration': parse_duration(video_info.get('segmentDur')), | |
107 | 'formats': f4m_formats, | |
108 | } |