]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/foxnews.py
[ie/matchtv] Fix extractor (#10190)
[yt-dlp.git] / yt_dlp / extractor / foxnews.py
1 import re
2
3 from .amp import AMPIE
4 from .common import InfoExtractor
5
6
7 class FoxNewsIE(AMPIE):
8 IE_NAME = 'foxnews'
9 IE_DESC = 'Fox News and Fox Business Video'
10 _VALID_URL = r'https?://video\.(?:insider\.)?fox(?:news|business)\.com/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)'
11 _TESTS = [
12 {
13 'url': 'https://video.foxnews.com/v/6320653836112',
14 'info_dict': {
15 'id': '6320653836112',
16 'ext': 'mp4',
17 'title': 'Tucker Carlson joins \'Gutfeld!\' to discuss his new documentary',
18 'thumbnail': r're:^https?://.*\.jpg$',
19 'duration': 404,
20 'upload_date': '20230217',
21 'description': 'md5:858a8a36f59e9ca897d758855bcdfa02',
22 'timestamp': 1676611344.0,
23 },
24 'params': {'skip_download': 'm3u8'},
25 },
26 {
27 # From http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words
28 'url': 'http://video.insider.foxnews.com/v/video-embed.html?video_id=5099377331001&autoplay=true&share_url=http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words&share_title=Student%20Group:%20Saying%20%27Politically%20Correct,%27%20%27Trash%27%20and%20%27Lame%27%20Is%20Offensive&share=true',
29 'info_dict': {
30 'id': '5099377331001',
31 'ext': 'mp4',
32 'title': '82416_censoring',
33 'description': '82416_censoring',
34 'upload_date': '20160826',
35 'timestamp': 1472169708.0,
36 'thumbnail': r're:^https?://.*\.jpg$',
37 'duration': 521,
38 },
39 'params': {'skip_download': 'm3u8'},
40 },
41 {
42 'url': 'http://video.foxnews.com/v/3937480/frozen-in-time/#sp=show-clips',
43 'md5': '32aaded6ba3ef0d1c04e238d01031e5e',
44 'info_dict': {
45 'id': '3937480',
46 'ext': 'flv',
47 'title': 'Frozen in Time',
48 'description': '16-year-old girl is size of toddler',
49 'duration': 265,
50 'timestamp': 1304411491,
51 'upload_date': '20110503',
52 'thumbnail': r're:^https?://.*\.jpg$',
53 },
54 'skip': '404 page',
55 },
56 {
57 'url': 'http://video.foxnews.com/v/3922535568001/rep-luis-gutierrez-on-if-obamas-immigration-plan-is-legal/#sp=show-clips',
58 'md5': '5846c64a1ea05ec78175421b8323e2df',
59 'info_dict': {
60 'id': '3922535568001',
61 'ext': 'mp4',
62 'title': "Rep. Luis Gutierrez on if Obama's immigration plan is legal",
63 'description': "Congressman discusses president's plan",
64 'duration': 292,
65 'timestamp': 1417662047,
66 'upload_date': '20141204',
67 'thumbnail': r're:^https?://.*\.jpg$',
68 },
69 'skip': 'm3u8 HTTP error 400 in web browser',
70 },
71 {
72 'url': 'http://video.foxnews.com/v/video-embed.html?video_id=3937480&d=video.foxnews.com',
73 'only_matching': True,
74 },
75 {
76 'url': 'http://video.foxbusiness.com/v/4442309889001',
77 'only_matching': True,
78 },
79 ]
80
81 @classmethod
82 def _extract_embed_urls(cls, url, webpage):
83 for mobj in re.finditer(
84 r'''(?x)
85 <(?:script|(?:amp-)?iframe)[^>]+\bsrc=["\']
86 (?:https?:)?//video\.foxnews\.com/v/(?:video-embed\.html|embed\.js)\?
87 (?:[^>"\']+&)?(?:video_)?id=(?P<video_id>\d+)
88 ''', webpage):
89 yield f'https://video.foxnews.com/v/video-embed.html?video_id={mobj.group("video_id")}'
90
91 def _real_extract(self, url):
92 video_id = self._match_id(url)
93
94 info = self._extract_feed_info(
95 f'https://api.foxnews.com/v3/video-player/{video_id}?callback=uid_{video_id}')
96 info['id'] = video_id
97 return info
98
99
100 class FoxNewsVideoIE(InfoExtractor):
101 _VALID_URL = r'https?://(?:www\.)?foxnews\.com/video/(?P<id>\d+)'
102 _TESTS = [{
103 'url': 'https://www.foxnews.com/video/6328632286112',
104 'info_dict': {
105 'id': '6328632286112',
106 'ext': 'mp4',
107 'title': 'Review: 2023 Toyota Prius Prime',
108 'duration': 155,
109 'thumbnail': r're:^https://.+\.jpg$',
110 'timestamp': 1685720177.0,
111 'upload_date': '20230602',
112 'description': 'md5:b69aafb125b41c1402e9744f53d6edc4',
113 },
114 'params': {'skip_download': 'm3u8'},
115 }, {
116 'url': 'https://www.foxnews.com/video/6313058664112',
117 'info_dict': {
118 'id': '6313058664112',
119 'ext': 'mp4',
120 'thumbnail': r're:https://.+/1280x720/match/image\.jpg',
121 'upload_date': '20220930',
122 'description': 'New York City, Kids Therapy, Biden',
123 'duration': 2415,
124 'title': 'Gutfeld! - Thursday, September 29',
125 'timestamp': 1664527538,
126 },
127 'skip': '404 page',
128 }]
129
130 def _real_extract(self, url):
131 video_id = self._match_id(url)
132 return self.url_result(f'https://video.foxnews.com/v/{video_id}', FoxNewsIE, video_id)
133
134
135 class FoxNewsArticleIE(InfoExtractor):
136 _VALID_URL = r'https?://(?:www\.)?(?:insider\.)?foxnews\.com/(?!v)([^/]+/)+(?P<id>[a-z-]+)'
137 IE_NAME = 'foxnews:article'
138
139 _TESTS = [{
140 # data-video-id
141 'url': 'https://www.foxnews.com/politics/2016/09/08/buzz-about-bud-clinton-camp-denies-claims-wore-earpiece-at-forum.html',
142 'md5': 'd2dd6ce809cedeefa96460e964821437',
143 'info_dict': {
144 'id': '5116295019001',
145 'ext': 'mp4',
146 'title': 'Trump and Clinton asked to defend positions on Iraq War',
147 'description': 'Veterans and Fox News host Dana Perino react on \'The Kelly File\' to NBC\'s presidential forum',
148 'timestamp': 1473301045,
149 'upload_date': '20160908',
150 'thumbnail': r're:^https?://.*\.jpg$',
151 'duration': 426,
152 },
153 'params': {'skip_download': 'm3u8'},
154 }, {
155 # iframe embed
156 'url': 'https://www.foxnews.com/us/2018/03/09/parkland-survivor-kyle-kashuv-on-meeting-trump-his-app-to-prevent-another-school-shooting.amp.html?__twitter_impression=true',
157 'info_dict': {
158 'id': '5748266721001',
159 'ext': 'flv',
160 'title': 'Kyle Kashuv has a positive message for the Trump White House',
161 'description': 'Marjory Stoneman Douglas student disagrees with classmates.',
162 'thumbnail': r're:^https?://.*\.jpg$',
163 'duration': 229,
164 'timestamp': 1520594670,
165 'upload_date': '20180309',
166 },
167 'skip': '404 page',
168 }, {
169 'url': 'http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words',
170 'only_matching': True,
171 }]
172
173 def _real_extract(self, url):
174 display_id = self._match_id(url)
175 webpage = self._download_webpage(url, display_id)
176
177 video_id = self._html_search_regex(
178 r'data-video-id=([\'"])(?P<id>[^\'"]+)\1',
179 webpage, 'video ID', group='id', default=None)
180 if video_id:
181 return self.url_result(
182 'http://video.foxnews.com/v/' + video_id, FoxNewsIE.ie_key())
183
184 return self.url_result(
185 next(FoxNewsIE._extract_embed_urls(url, webpage)), FoxNewsIE.ie_key())