]>
Commit | Line | Data |
---|---|---|
1 | import re | |
2 | ||
3 | from .common import InfoExtractor | |
4 | ||
5 | ||
6 | class BloombergIE(InfoExtractor): | |
7 | _VALID_URL = r'https?://(?:www\.)?bloomberg\.com/(?:[^/]+/)*(?P<id>[^/?#]+)' | |
8 | ||
9 | _TESTS = [{ | |
10 | 'url': 'https://www.bloomberg.com/news/videos/2021-09-14/apple-unveils-the-new-iphone-13-stock-doesn-t-move-much-video', | |
11 | 'info_dict': { | |
12 | 'id': 'V8cFcYMxTHaMcEiiYVr39A', | |
13 | 'ext': 'flv', | |
14 | 'title': 'Apple Unveils the New IPhone 13, Stock Doesn\'t Move Much', | |
15 | }, | |
16 | 'params': { | |
17 | 'format': 'best[format_id^=hds]', | |
18 | }, | |
19 | }, { | |
20 | # video ID in BPlayer(...) | |
21 | 'url': 'http://www.bloomberg.com/features/2016-hello-world-new-zealand/', | |
22 | 'info_dict': { | |
23 | 'id': '938c7e72-3f25-4ddb-8b85-a9be731baa74', | |
24 | 'ext': 'flv', | |
25 | 'title': 'Meet the Real-Life Tech Wizards of Middle Earth', | |
26 | 'description': 'Hello World, Episode 1: New Zealand’s freaky AI babies, robot exoskeletons, and a virtual you.', | |
27 | }, | |
28 | 'params': { | |
29 | 'format': 'best[format_id^=hds]', | |
30 | }, | |
31 | }, { | |
32 | # data-bmmrid= | |
33 | 'url': 'https://www.bloomberg.com/politics/articles/2017-02-08/le-pen-aide-briefed-french-central-banker-on-plan-to-print-money', | |
34 | 'only_matching': True, | |
35 | }, { | |
36 | 'url': 'http://www.bloomberg.com/news/articles/2015-11-12/five-strange-things-that-have-been-happening-in-financial-markets', | |
37 | 'only_matching': True, | |
38 | }, { | |
39 | 'url': 'http://www.bloomberg.com/politics/videos/2015-11-25/karl-rove-on-jeb-bush-s-struggles-stopping-trump', | |
40 | 'only_matching': True, | |
41 | }] | |
42 | ||
43 | def _real_extract(self, url): | |
44 | name = self._match_id(url) | |
45 | webpage = self._download_webpage(url, name) | |
46 | video_id = self._search_regex( | |
47 | (r'["\']bmmrId["\']\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1', | |
48 | r'videoId\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1', | |
49 | r'data-bmmrid=(["\'])(?P<id>(?:(?!\1).)+)\1'), | |
50 | webpage, 'id', group='id', default=None) | |
51 | if not video_id: | |
52 | bplayer_data = self._parse_json(self._search_regex( | |
53 | r'BPlayer\(null,\s*({[^;]+})\);', webpage, 'id'), name) | |
54 | video_id = bplayer_data['id'] | |
55 | title = re.sub(': Video$', '', self._og_search_title(webpage)) | |
56 | ||
57 | embed_info = self._download_json( | |
58 | 'http://www.bloomberg.com/multimedia/api/embed?id=%s' % video_id, video_id) | |
59 | formats = [] | |
60 | for stream in embed_info['streams']: | |
61 | stream_url = stream.get('url') | |
62 | if not stream_url: | |
63 | continue | |
64 | if stream['muxing_format'] == 'TS': | |
65 | formats.extend(self._extract_m3u8_formats( | |
66 | stream_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) | |
67 | else: | |
68 | formats.extend(self._extract_f4m_formats( | |
69 | stream_url, video_id, f4m_id='hds', fatal=False)) | |
70 | ||
71 | return { | |
72 | 'id': video_id, | |
73 | 'title': title, | |
74 | 'formats': formats, | |
75 | 'description': self._og_search_description(webpage), | |
76 | 'thumbnail': self._og_search_thumbnail(webpage), | |
77 | } |