[yt-dlp.git] / youtube_dl / extractor / bloomberg.py

from __future__ import unicode_literals

import re

from .common import InfoExtractor


class BloombergIE(InfoExtractor):
    _VALID_URL = r'https?://www\.bloomberg\.com/news/videos/[^/]+/(?P<id>[^/?#]+)'

    _TEST = {
        'url': 'http://www.bloomberg.com/news/videos/b/aaeae121-5949-481e-a1ce-4562db6f5df2',
        # The md5 checksum changes
        'info_dict': {
            'id': 'qurhIVlJSB6hzkVi229d8g',
            'ext': 'flv',
            'title': 'Shah\'s Presentation on Foreign-Exchange Strategies',
            'description': 'md5:a8ba0302912d03d246979735c17d2761',
        },
    }

    def _real_extract(self, url):
        name = self._match_id(url)
        webpage = self._download_webpage(url, name)
        video_id = self._search_regex(r'"bmmrId":"(.+?)"', webpage, 'id')
        title = re.sub(': Video$', '', self._og_search_title(webpage))

        embed_info = self._download_json(
            'http://www.bloomberg.com/api/embed?id=%s' % video_id, video_id)
        formats = []
        for stream in embed_info['streams']:
            if stream["muxing_format"] == "TS":
                formats.extend(self._extract_m3u8_formats(stream['url'], video_id))
            else:
                formats.extend(self._extract_f4m_formats(stream['url'], video_id))
        self._sort_formats(formats)

        return {
            'id': video_id,
            'title': title,
            'formats': formats,
            'description': self._og_search_description(webpage),
            'thumbnail': self._og_search_thumbnail(webpage),
        }
Commit	Line	Data
7e70ac36 JMF	1	from __future__ import unicode_literals
7e70ac36 JMF	2
4b6462fc JMF	3	import re
	4
	5	from .common import InfoExtractor
	6
	7
	8	class BloombergIE(InfoExtractor):
ff2be6e1	9	_VALID_URL = r'https?://www\.bloomberg\.com/news/videos/[^/]+/(?P<id>[^/?#]+)'
4b6462fc JMF	10
4b6462fc JMF	11	_TEST = {
ff2be6e1	12	'url': 'http://www.bloomberg.com/news/videos/b/aaeae121-5949-481e-a1ce-4562db6f5df2',
31bb8d3f	13	# The md5 checksum changes
7e70ac36 JMF	14	'info_dict': {
	15	'id': 'qurhIVlJSB6hzkVi229d8g',
	16	'ext': 'flv',
	17	'title': 'Shah\'s Presentation on Foreign-Exchange Strategies',
ff2be6e1	18	'description': 'md5:a8ba0302912d03d246979735c17d2761',
4b6462fc JMF	19	},
	20	}
	21
	22	def _real_extract(self, url):
ec5913b5	23	name = self._match_id(url)
4b6462fc	24	webpage = self._download_webpage(url, name)
ff2be6e1	25	video_id = self._search_regex(r'"bmmrId":"(.+?)"', webpage, 'id')
7e70ac36 JMF	26	title = re.sub(': Video$', '', self._og_search_title(webpage))
7e70ac36 JMF	27
ff2be6e1 JMF	28	embed_info = self._download_json(
	29	'http://www.bloomberg.com/api/embed?id=%s' % video_id, video_id)
	30	formats = []
	31	for stream in embed_info['streams']:
	32	if stream["muxing_format"] == "TS":
	33	formats.extend(self._extract_m3u8_formats(stream['url'], video_id))
	34	else:
	35	formats.extend(self._extract_f4m_formats(stream['url'], video_id))
	36	self._sort_formats(formats)
	37
7e70ac36	38	return {
ff2be6e1	39	'id': video_id,
7e70ac36	40	'title': title,
ff2be6e1	41	'formats': formats,
7e70ac36 JMF	42	'description': self._og_search_description(webpage),
	43	'thumbnail': self._og_search_thumbnail(webpage),
	44	}