]>
Commit | Line | Data |
---|---|---|
116e7e0d | 1 | # coding: utf-8 |
7e70ac36 JMF |
2 | from __future__ import unicode_literals |
3 | ||
4b6462fc JMF |
4 | import re |
5 | ||
6 | from .common import InfoExtractor | |
7 | ||
8 | ||
9 | class BloombergIE(InfoExtractor): | |
7ad4258a | 10 | _VALID_URL = r'https?://(?:www\.)?bloomberg\.com/(?:[^/]+/)*(?P<id>[^/?#]+)' |
4b6462fc | 11 | |
342609a1 | 12 | _TESTS = [{ |
ff2be6e1 | 13 | 'url': 'http://www.bloomberg.com/news/videos/b/aaeae121-5949-481e-a1ce-4562db6f5df2', |
31bb8d3f | 14 | # The md5 checksum changes |
7e70ac36 JMF |
15 | 'info_dict': { |
16 | 'id': 'qurhIVlJSB6hzkVi229d8g', | |
17 | 'ext': 'flv', | |
18 | 'title': 'Shah\'s Presentation on Foreign-Exchange Strategies', | |
ff2be6e1 | 19 | 'description': 'md5:a8ba0302912d03d246979735c17d2761', |
4b6462fc | 20 | }, |
a3fa6024 YCH |
21 | 'params': { |
22 | 'format': 'best[format_id^=hds]', | |
23 | }, | |
116e7e0d YCH |
24 | }, { |
25 | # video ID in BPlayer(...) | |
26 | 'url': 'http://www.bloomberg.com/features/2016-hello-world-new-zealand/', | |
27 | 'info_dict': { | |
28 | 'id': '938c7e72-3f25-4ddb-8b85-a9be731baa74', | |
29 | 'ext': 'flv', | |
30 | 'title': 'Meet the Real-Life Tech Wizards of Middle Earth', | |
31 | 'description': 'Hello World, Episode 1: New Zealand’s freaky AI babies, robot exoskeletons, and a virtual you.', | |
32 | }, | |
33 | 'params': { | |
34 | 'format': 'best[format_id^=hds]', | |
35 | }, | |
342609a1 S |
36 | }, { |
37 | 'url': 'http://www.bloomberg.com/news/articles/2015-11-12/five-strange-things-that-have-been-happening-in-financial-markets', | |
38 | 'only_matching': True, | |
7ad4258a S |
39 | }, { |
40 | 'url': 'http://www.bloomberg.com/politics/videos/2015-11-25/karl-rove-on-jeb-bush-s-struggles-stopping-trump', | |
41 | 'only_matching': True, | |
342609a1 | 42 | }] |
4b6462fc JMF |
43 | |
44 | def _real_extract(self, url): | |
ec5913b5 | 45 | name = self._match_id(url) |
4b6462fc | 46 | webpage = self._download_webpage(url, name) |
4191fdf1 S |
47 | video_id = self._search_regex( |
48 | r'["\']bmmrId["\']\s*:\s*(["\'])(?P<url>.+?)\1', | |
116e7e0d YCH |
49 | webpage, 'id', group='url', default=None) |
50 | if not video_id: | |
51 | bplayer_data = self._parse_json(self._search_regex( | |
52 | r'BPlayer\(null,\s*({[^;]+})\);', webpage, 'id'), name) | |
53 | video_id = bplayer_data['id'] | |
7e70ac36 JMF |
54 | title = re.sub(': Video$', '', self._og_search_title(webpage)) |
55 | ||
ff2be6e1 JMF |
56 | embed_info = self._download_json( |
57 | 'http://www.bloomberg.com/api/embed?id=%s' % video_id, video_id) | |
58 | formats = [] | |
59 | for stream in embed_info['streams']: | |
b7faebba S |
60 | stream_url = stream.get('url') |
61 | if not stream_url: | |
62 | continue | |
9a4f12be | 63 | if stream['muxing_format'] == 'TS': |
7e5edcfd S |
64 | formats.extend(self._extract_m3u8_formats( |
65 | stream_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) | |
ff2be6e1 | 66 | else: |
7e5edcfd S |
67 | formats.extend(self._extract_f4m_formats( |
68 | stream_url, video_id, f4m_id='hds', fatal=False)) | |
ff2be6e1 JMF |
69 | self._sort_formats(formats) |
70 | ||
7e70ac36 | 71 | return { |
ff2be6e1 | 72 | 'id': video_id, |
7e70ac36 | 73 | 'title': title, |
ff2be6e1 | 74 | 'formats': formats, |
7e70ac36 JMF |
75 | 'description': self._og_search_description(webpage), |
76 | 'thumbnail': self._og_search_thumbnail(webpage), | |
77 | } |