]>
Commit | Line | Data |
---|---|---|
4b6462fc JMF |
1 | import re |
2 | ||
3 | from .common import InfoExtractor | |
4 | ||
5 | ||
6 | class BloombergIE(InfoExtractor): | |
7ad4258a | 7 | _VALID_URL = r'https?://(?:www\.)?bloomberg\.com/(?:[^/]+/)*(?P<id>[^/?#]+)' |
4b6462fc | 8 | |
342609a1 | 9 | _TESTS = [{ |
ff2be6e1 | 10 | 'url': 'http://www.bloomberg.com/news/videos/b/aaeae121-5949-481e-a1ce-4562db6f5df2', |
31bb8d3f | 11 | # The md5 checksum changes |
7e70ac36 JMF |
12 | 'info_dict': { |
13 | 'id': 'qurhIVlJSB6hzkVi229d8g', | |
14 | 'ext': 'flv', | |
15 | 'title': 'Shah\'s Presentation on Foreign-Exchange Strategies', | |
ff2be6e1 | 16 | 'description': 'md5:a8ba0302912d03d246979735c17d2761', |
4b6462fc | 17 | }, |
a3fa6024 YCH |
18 | 'params': { |
19 | 'format': 'best[format_id^=hds]', | |
20 | }, | |
116e7e0d YCH |
21 | }, { |
22 | # video ID in BPlayer(...) | |
23 | 'url': 'http://www.bloomberg.com/features/2016-hello-world-new-zealand/', | |
24 | 'info_dict': { | |
25 | 'id': '938c7e72-3f25-4ddb-8b85-a9be731baa74', | |
26 | 'ext': 'flv', | |
27 | 'title': 'Meet the Real-Life Tech Wizards of Middle Earth', | |
28 | 'description': 'Hello World, Episode 1: New Zealand’s freaky AI babies, robot exoskeletons, and a virtual you.', | |
29 | }, | |
30 | 'params': { | |
31 | 'format': 'best[format_id^=hds]', | |
32 | }, | |
95107095 S |
33 | }, { |
34 | # data-bmmrid= | |
35 | 'url': 'https://www.bloomberg.com/politics/articles/2017-02-08/le-pen-aide-briefed-french-central-banker-on-plan-to-print-money', | |
36 | 'only_matching': True, | |
342609a1 S |
37 | }, { |
38 | 'url': 'http://www.bloomberg.com/news/articles/2015-11-12/five-strange-things-that-have-been-happening-in-financial-markets', | |
39 | 'only_matching': True, | |
7ad4258a S |
40 | }, { |
41 | 'url': 'http://www.bloomberg.com/politics/videos/2015-11-25/karl-rove-on-jeb-bush-s-struggles-stopping-trump', | |
42 | 'only_matching': True, | |
342609a1 | 43 | }] |
4b6462fc JMF |
44 | |
45 | def _real_extract(self, url): | |
ec5913b5 | 46 | name = self._match_id(url) |
4b6462fc | 47 | webpage = self._download_webpage(url, name) |
4191fdf1 | 48 | video_id = self._search_regex( |
95107095 S |
49 | (r'["\']bmmrId["\']\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1', |
50 | r'videoId\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1', | |
51 | r'data-bmmrid=(["\'])(?P<id>(?:(?!\1).)+)\1'), | |
52 | webpage, 'id', group='id', default=None) | |
116e7e0d YCH |
53 | if not video_id: |
54 | bplayer_data = self._parse_json(self._search_regex( | |
55 | r'BPlayer\(null,\s*({[^;]+})\);', webpage, 'id'), name) | |
56 | video_id = bplayer_data['id'] | |
7e70ac36 JMF |
57 | title = re.sub(': Video$', '', self._og_search_title(webpage)) |
58 | ||
ff2be6e1 JMF |
59 | embed_info = self._download_json( |
60 | 'http://www.bloomberg.com/api/embed?id=%s' % video_id, video_id) | |
61 | formats = [] | |
62 | for stream in embed_info['streams']: | |
b7faebba S |
63 | stream_url = stream.get('url') |
64 | if not stream_url: | |
65 | continue | |
9a4f12be | 66 | if stream['muxing_format'] == 'TS': |
7e5edcfd S |
67 | formats.extend(self._extract_m3u8_formats( |
68 | stream_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) | |
ff2be6e1 | 69 | else: |
7e5edcfd S |
70 | formats.extend(self._extract_f4m_formats( |
71 | stream_url, video_id, f4m_id='hds', fatal=False)) | |
ff2be6e1 JMF |
72 | self._sort_formats(formats) |
73 | ||
7e70ac36 | 74 | return { |
ff2be6e1 | 75 | 'id': video_id, |
7e70ac36 | 76 | 'title': title, |
ff2be6e1 | 77 | 'formats': formats, |
7e70ac36 JMF |
78 | 'description': self._og_search_description(webpage), |
79 | 'thumbnail': self._og_search_thumbnail(webpage), | |
80 | } |