]>
Commit | Line | Data |
---|---|---|
055f0d3d YCH |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import calendar | |
5 | import re | |
6 | import time | |
7 | ||
8 | from .amp import AMPIE | |
9 | from .common import InfoExtractor | |
5113b691 | 10 | from .youtube import YoutubeIE |
055f0d3d YCH |
11 | from ..compat import compat_urlparse |
12 | ||
13 | ||
14 | class AbcNewsVideoIE(AMPIE): | |
15 | IE_NAME = 'abcnews:video' | |
bc22df29 | 16 | _VALID_URL = r'''(?x) |
164fcbfe S |
17 | https?:// |
18 | abcnews\.go\.com/ | |
19 | (?: | |
20 | [^/]+/video/(?P<display_id>[0-9a-z-]+)-| | |
21 | video/embed\?.*?\bid= | |
22 | ) | |
23 | (?P<id>\d+) | |
24 | ''' | |
055f0d3d YCH |
25 | |
26 | _TESTS = [{ | |
27 | 'url': 'http://abcnews.go.com/ThisWeek/video/week-exclusive-irans-foreign-minister-zarif-20411932', | |
28 | 'info_dict': { | |
29 | 'id': '20411932', | |
30 | 'ext': 'mp4', | |
31 | 'display_id': 'week-exclusive-irans-foreign-minister-zarif', | |
32 | 'title': '\'This Week\' Exclusive: Iran\'s Foreign Minister Zarif', | |
33 | 'description': 'George Stephanopoulos goes one-on-one with Iranian Foreign Minister Dr. Javad Zarif.', | |
34 | 'duration': 180, | |
ec85ded8 | 35 | 'thumbnail': r're:^https?://.*\.jpg$', |
055f0d3d YCH |
36 | }, |
37 | 'params': { | |
38 | # m3u8 download | |
39 | 'skip_download': True, | |
40 | }, | |
bc22df29 TF |
41 | }, { |
42 | 'url': 'http://abcnews.go.com/video/embed?id=46979033', | |
164fcbfe | 43 | 'only_matching': True, |
055f0d3d YCH |
44 | }, { |
45 | 'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478', | |
46 | 'only_matching': True, | |
47 | }] | |
48 | ||
49 | def _real_extract(self, url): | |
50 | mobj = re.match(self._VALID_URL, url) | |
51 | display_id = mobj.group('display_id') | |
52 | video_id = mobj.group('id') | |
53 | info_dict = self._extract_feed_info( | |
54 | 'http://abcnews.go.com/video/itemfeed?id=%s' % video_id) | |
55 | info_dict.update({ | |
56 | 'id': video_id, | |
57 | 'display_id': display_id, | |
58 | }) | |
59 | return info_dict | |
60 | ||
61 | ||
62 | class AbcNewsIE(InfoExtractor): | |
63 | IE_NAME = 'abcnews' | |
25042f73 | 64 | _VALID_URL = r'https?://abcnews\.go\.com/(?:[^/]+/)+(?P<display_id>[0-9a-z-]+)/story\?id=(?P<id>\d+)' |
055f0d3d YCH |
65 | |
66 | _TESTS = [{ | |
67 | 'url': 'http://abcnews.go.com/Blotter/News/dramatic-video-rare-death-job-america/story?id=10498713#.UIhwosWHLjY', | |
68 | 'info_dict': { | |
2d17c631 | 69 | 'id': '10505354', |
055f0d3d YCH |
70 | 'ext': 'flv', |
71 | 'display_id': 'dramatic-video-rare-death-job-america', | |
72 | 'title': 'Occupational Hazards', | |
73 | 'description': 'Nightline investigates the dangers that lurk at various jobs.', | |
ec85ded8 | 74 | 'thumbnail': r're:^https?://.*\.jpg$', |
055f0d3d YCH |
75 | 'upload_date': '20100428', |
76 | 'timestamp': 1272412800, | |
77 | }, | |
78 | 'add_ie': ['AbcNewsVideo'], | |
79 | }, { | |
80 | 'url': 'http://abcnews.go.com/Entertainment/justin-timberlake-performs-stop-feeling-eurovision-2016/story?id=39125818', | |
81 | 'info_dict': { | |
2d17c631 | 82 | 'id': '38897857', |
055f0d3d YCH |
83 | 'ext': 'mp4', |
84 | 'display_id': 'justin-timberlake-performs-stop-feeling-eurovision-2016', | |
85 | 'title': 'Justin Timberlake Drops Hints For Secret Single', | |
86 | 'description': 'Lara Spencer reports the buzziest stories of the day in "GMA" Pop News.', | |
87 | 'upload_date': '20160515', | |
88 | 'timestamp': 1463329500, | |
89 | }, | |
90 | 'params': { | |
91 | # m3u8 download | |
92 | 'skip_download': True, | |
93 | # The embedded YouTube video is blocked due to copyright issues | |
94 | 'playlist_items': '1', | |
95 | }, | |
96 | 'add_ie': ['AbcNewsVideo'], | |
97 | }, { | |
98 | 'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343', | |
99 | 'only_matching': True, | |
100 | }] | |
101 | ||
102 | def _real_extract(self, url): | |
103 | mobj = re.match(self._VALID_URL, url) | |
104 | display_id = mobj.group('display_id') | |
105 | video_id = mobj.group('id') | |
106 | ||
107 | webpage = self._download_webpage(url, video_id) | |
108 | video_url = self._search_regex( | |
109 | r'window\.abcnvideo\.url\s*=\s*"([^"]+)"', webpage, 'video URL') | |
110 | full_video_url = compat_urlparse.urljoin(url, video_url) | |
111 | ||
5113b691 | 112 | youtube_url = YoutubeIE._extract_url(webpage) |
055f0d3d YCH |
113 | |
114 | timestamp = None | |
115 | date_str = self._html_search_regex( | |
116 | r'<span[^>]+class="timestamp">([^<]+)</span>', | |
117 | webpage, 'timestamp', fatal=False) | |
118 | if date_str: | |
119 | tz_offset = 0 | |
120 | if date_str.endswith(' ET'): # Eastern Time | |
121 | tz_offset = -5 | |
122 | date_str = date_str[:-3] | |
123 | date_formats = ['%b. %d, %Y', '%b %d, %Y, %I:%M %p'] | |
124 | for date_format in date_formats: | |
125 | try: | |
126 | timestamp = calendar.timegm(time.strptime(date_str.strip(), date_format)) | |
127 | except ValueError: | |
128 | continue | |
129 | if timestamp is not None: | |
130 | timestamp -= tz_offset * 3600 | |
131 | ||
132 | entry = { | |
133 | '_type': 'url_transparent', | |
134 | 'ie_key': AbcNewsVideoIE.ie_key(), | |
135 | 'url': full_video_url, | |
136 | 'id': video_id, | |
137 | 'display_id': display_id, | |
138 | 'timestamp': timestamp, | |
139 | } | |
140 | ||
141 | if youtube_url: | |
5113b691 | 142 | entries = [entry, self.url_result(youtube_url, ie=YoutubeIE.ie_key())] |
055f0d3d YCH |
143 | return self.playlist_result(entries) |
144 | ||
145 | return entry |