[yt-dlp.git] / youtube_dl / extractor / livestream.py

from __future__ import unicode_literals

import re
import json

from .common import InfoExtractor
from ..utils import (
    compat_urllib_parse_urlparse,
    compat_urlparse,
    xpath_with_ns,
    compat_str,
    orderedSet,
)


class LivestreamIE(InfoExtractor):
    IE_NAME = 'livestream'
    _VALID_URL = r'http://new\.livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>\d+))?/?$'
    _TEST = {
        'url': 'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
        'md5': '53274c76ba7754fb0e8d072716f2292b',
        'info_dict': {
            'id': '4719370',
            'ext': 'mp4',
            'title': 'Live from Webster Hall NYC',
            'upload_date': '20121012',
        }
    }

    def _extract_video_info(self, video_data):
        video_url = video_data.get('progressive_url_hd') or video_data.get('progressive_url')
        return {
            'id': compat_str(video_data['id']),
            'url': video_url,
            'ext': 'mp4',
            'title': video_data['caption'],
            'thumbnail': video_data['thumbnail_url'],
            'upload_date': video_data['updated_at'].replace('-', '')[:8],
        }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        event_name = mobj.group('event_name')
        webpage = self._download_webpage(url, video_id or event_name)

        if video_id is None:
            # This is an event page:
            config_json = self._search_regex(
                r'window.config = ({.*?});', webpage, 'window config')
            info = json.loads(config_json)['event']
            videos = [self._extract_video_info(video_data['data'])
                for video_data in info['feed']['data'] if video_data['type'] == 'video']
            return self.playlist_result(videos, info['id'], info['full_name'])
        else:
            og_video = self._og_search_video_url(webpage, 'player url')
            query_str = compat_urllib_parse_urlparse(og_video).query
            query = compat_urlparse.parse_qs(query_str)
            api_url = query['play_url'][0].replace('.smil', '')
            info = json.loads(self._download_webpage(
                api_url, video_id, 'Downloading video info'))
            return self._extract_video_info(info)


# The original version of Livestream uses a different system
class LivestreamOriginalIE(InfoExtractor):
    IE_NAME = 'livestream:original'
    _VALID_URL = r'''(?x)https?://www\.livestream\.com/
        (?P<user>[^/]+)/(?P<type>video|folder)
        (?:\?.*?Id=|/)(?P<id>.*?)(&|$)
        '''
    _TEST = {
        'url': 'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
        'info_dict': {
            'id': 'pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
            'ext': 'flv',
            'title': 'Spark 1 (BitCoin) with Cameron Winklevoss & Tyler Winklevoss of Winklevoss Capital',
        },
        'params': {
            # rtmp
            'skip_download': True,
        },
    }

    def _extract_video(self, user, video_id):
        api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id)

        info = self._download_xml(api_url, video_id)
        item = info.find('channel').find('item')
        ns = {'media': 'http://search.yahoo.com/mrss'}
        thumbnail_url = item.find(xpath_with_ns('media:thumbnail', ns)).attrib['url']
        # Remove the extension and number from the path (like 1.jpg)
        path = self._search_regex(r'(user-files/.+)_.*?\.jpg$', thumbnail_url, 'path')

        return {
            'id': video_id,
            'title': item.find('title').text,
            'url': 'rtmp://extondemand.livestream.com/ondemand',
            'play_path': 'mp4:trans/dv15/mogulus-{0}.mp4'.format(path),
            'ext': 'flv',
            'thumbnail': thumbnail_url,
        }

    def _extract_folder(self, url, folder_id):
        webpage = self._download_webpage(url, folder_id)
        urls = orderedSet(re.findall(r'<a href="(https?://livestre\.am/.*?)"', webpage))

        return {
            '_type': 'playlist',
            'id': folder_id,
            'entries': [{
                '_type': 'url',
                'url': video_url,
            } for video_url in urls],
        }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        id = mobj.group('id')
        user = mobj.group('user')
        url_type = mobj.group('type')
        if url_type == 'folder':
            return self._extract_folder(url, id)
        else:
            return self._extract_video(user, id)


# The server doesn't support HEAD request, the generic extractor can't detect
# the redirection
class LivestreamShortenerIE(InfoExtractor):
    IE_NAME = 'livestream:shortener'
    IE_DESC = False  # Do not list
    _VALID_URL = r'https?://livestre\.am/(?P<id>.+)'

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        id = mobj.group('id')
        webpage = self._download_webpage(url, id)

        return {
            '_type': 'url',
            'url': self._og_search_url(webpage),
        }
Commit	Line	Data
c5469e04 S	1	from __future__ import unicode_literals
c5469e04 S	2
b4444d5c JMF	3	import re
	4	import json
	5
	6	from .common import InfoExtractor
b00ca882 JMF	7	from ..utils import (
	8	compat_urllib_parse_urlparse,
	9	compat_urlparse,
c66d2baa	10	xpath_with_ns,
c5469e04	11	compat_str,
78338f71	12	orderedSet,
b00ca882	13	)
b4444d5c JMF	14
	15
	16	class LivestreamIE(InfoExtractor):
c5469e04	17	IE_NAME = 'livestream'
c0ade33e	18	_VALID_URL = r'http://new\.livestream\.com/.?/(?P<event_name>.?)(/videos/(?P<id>\d+))?/?$'
b4444d5c	19	_TEST = {
c5469e04 S	20	'url': 'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
	21	'md5': '53274c76ba7754fb0e8d072716f2292b',
	22	'info_dict': {
	23	'id': '4719370',
	24	'ext': 'mp4',
	25	'title': 'Live from Webster Hall NYC',
	26	'upload_date': '20121012',
b4444d5c JMF	27	}
	28	}
	29
	30	def _extract_video_info(self, video_data):
	31	video_url = video_data.get('progressive_url_hd') or video_data.get('progressive_url')
c5469e04 S	32	return {
	33	'id': compat_str(video_data['id']),
	34	'url': video_url,
	35	'ext': 'mp4',
	36	'title': video_data['caption'],
	37	'thumbnail': video_data['thumbnail_url'],
	38	'upload_date': video_data['updated_at'].replace('-', '')[:8],
	39	}
b4444d5c JMF	40
	41	def _real_extract(self, url):
	42	mobj = re.match(self._VALID_URL, url)
	43	video_id = mobj.group('id')
	44	event_name = mobj.group('event_name')
	45	webpage = self._download_webpage(url, video_id or event_name)
	46
	47	if video_id is None:
	48	# This is an event page:
c5469e04 S	49	config_json = self._search_regex(
c5469e04 S	50	r'window.config = ({.*?});', webpage, 'window config')
5f1ea943	51	info = json.loads(config_json)['event']
b4444d5c	52	videos = [self._extract_video_info(video_data['data'])
c5469e04	53	for video_data in info['feed']['data'] if video_data['type'] == 'video']
b4444d5c JMF	54	return self.playlist_result(videos, info['id'], info['full_name'])
b4444d5c JMF	55	else:
c5469e04	56	og_video = self._og_search_video_url(webpage, 'player url')
b4444d5c JMF	57	query_str = compat_urllib_parse_urlparse(og_video).query
	58	query = compat_urlparse.parse_qs(query_str)
	59	api_url = query['play_url'][0].replace('.smil', '')
c5469e04 S	60	info = json.loads(self._download_webpage(
c5469e04 S	61	api_url, video_id, 'Downloading video info'))
b4444d5c	62	return self._extract_video_info(info)
c66d2baa JMF	63
	64
	65	# The original version of Livestream uses a different system
	66	class LivestreamOriginalIE(InfoExtractor):
c5469e04	67	IE_NAME = 'livestream:original'
78338f71 JMF	68	_VALID_URL = r'''(?x)https?://www\.livestream\.com/
	69	(?P<user>[^/]+)/(?P<type>video\|folder)
	70	(?:\?.?Id=\|/)(?P<id>.?)(&\|$)
	71	'''
c66d2baa	72	_TEST = {
c5469e04 S	73	'url': 'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
	74	'info_dict': {
	75	'id': 'pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
	76	'ext': 'flv',
	77	'title': 'Spark 1 (BitCoin) with Cameron Winklevoss & Tyler Winklevoss of Winklevoss Capital',
c66d2baa	78	},
c5469e04	79	'params': {
c66d2baa	80	# rtmp
c5469e04	81	'skip_download': True,
c66d2baa JMF	82	},
	83	}
	84
78338f71	85	def _extract_video(self, user, video_id):
c66d2baa JMF	86	api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id)
c66d2baa JMF	87
e26f8712	88	info = self._download_xml(api_url, video_id)
c66d2baa JMF	89	item = info.find('channel').find('item')
	90	ns = {'media': 'http://search.yahoo.com/mrss'}
	91	thumbnail_url = item.find(xpath_with_ns('media:thumbnail', ns)).attrib['url']
	92	# Remove the extension and number from the path (like 1.jpg)
c5469e04	93	path = self._search_regex(r'(user-files/.+)_.*?\.jpg$', thumbnail_url, 'path')
c66d2baa JMF	94
	95	return {
	96	'id': video_id,
	97	'title': item.find('title').text,
	98	'url': 'rtmp://extondemand.livestream.com/ondemand',
	99	'play_path': 'mp4:trans/dv15/mogulus-{0}.mp4'.format(path),
	100	'ext': 'flv',
	101	'thumbnail': thumbnail_url,
	102	}
78338f71 JMF	103
	104	def _extract_folder(self, url, folder_id):
	105	webpage = self._download_webpage(url, folder_id)
	106	urls = orderedSet(re.findall(r'<a href="(https?://livestre\.am/.*?)"', webpage))
	107
	108	return {
	109	'_type': 'playlist',
	110	'id': folder_id,
	111	'entries': [{
	112	'_type': 'url',
	113	'url': video_url,
	114	} for video_url in urls],
	115	}
	116
	117	def _real_extract(self, url):
	118	mobj = re.match(self._VALID_URL, url)
	119	id = mobj.group('id')
	120	user = mobj.group('user')
	121	url_type = mobj.group('type')
	122	if url_type == 'folder':
	123	return self._extract_folder(url, id)
	124	else:
	125	return self._extract_video(user, id)
	126
	127
	128	# The server doesn't support HEAD request, the generic extractor can't detect
	129	# the redirection
	130	class LivestreamShortenerIE(InfoExtractor):
	131	IE_NAME = 'livestream:shortener'
	132	IE_DESC = False # Do not list
	133	_VALID_URL = r'https?://livestre\.am/(?P<id>.+)'
	134
	135	def _real_extract(self, url):
	136	mobj = re.match(self._VALID_URL, url)
	137	id = mobj.group('id')
	138	webpage = self._download_webpage(url, id)
	139
	140	return {
	141	'_type': 'url',
	142	'url': self._og_search_url(webpage),
	143	}