[yt-dlp.git] / youtube_dl / extractor / nhl.py

from __future__ import unicode_literals

import re
import json

from .common import InfoExtractor
from ..utils import (
    compat_urlparse,
    compat_urllib_parse,
    determine_ext,
    unified_strdate,
)


class NHLBaseInfoExtractor(InfoExtractor):
    @staticmethod
    def _fix_json(json_string):
        return json_string.replace('\\\'', '\'')

    def _extract_video(self, info):
        video_id = info['id']
        self.report_extraction(video_id)

        initial_video_url = info['publishPoint']
        data = compat_urllib_parse.urlencode({
            'type': 'fvod',
            'path': initial_video_url.replace('.mp4', '_sd.mp4'),
        })
        path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data
        path_doc = self._download_xml(
            path_url, video_id, 'Downloading final video url')
        video_url = path_doc.find('path').text

        join = compat_urlparse.urljoin
        return {
            'id': video_id,
            'title': info['name'],
            'url': video_url,
            'ext': determine_ext(video_url),
            'description': info['description'],
            'duration': int(info['duration']),
            'thumbnail': join(join(video_url, '/u/'), info['bigImage']),
            'upload_date': unified_strdate(info['releaseDate'].split('.')[0]),
        }


class NHLIE(NHLBaseInfoExtractor):
    IE_NAME = 'nhl.com'
    _VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console\?.*?(?:[?&])id=(?P<id>[0-9]+)'

    _TEST = {
        'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',
        'info_dict': {
            'id': '453614',
            'ext': 'mp4',
            'title': 'Quick clip: Weise 4-3 goal vs Flames',
            'description': 'Dale Weise scores his first of the season to put the Canucks up 4-3.',
            'duration': 18,
            'upload_date': '20131006',
        },
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        json_url = 'http://video.nhl.com/videocenter/servlets/playlist?ids=%s&format=json' % video_id
        data = self._download_json(
            json_url, video_id, transform_source=self._fix_json)
        return self._extract_video(data[0])


class NHLVideocenterIE(NHLBaseInfoExtractor):
    IE_NAME = 'nhl.com:videocenter'
    IE_DESC = 'NHL videocenter category'
    _VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?.*?catid=(?P<catid>[0-9]+)(?![&?]id=).*?)?$'
    _TEST = {
        'url': 'http://video.canucks.nhl.com/videocenter/console?catid=999',
        'info_dict': {
            'id': '999',
            'title': 'Highlights',
        },
        'playlist_count': 12,
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        team = mobj.group('team')
        webpage = self._download_webpage(url, team)
        cat_id = self._search_regex(
            [r'var defaultCatId = "(.+?)";',
             r'{statusIndex:0,index:0,.*?id:(.*?),'],
            webpage, 'category id')
        playlist_title = self._html_search_regex(
            r'tab0"[^>]*?>(.*?)</td>',
            webpage, 'playlist title', flags=re.DOTALL).lower().capitalize()

        data = compat_urllib_parse.urlencode({
            'cid': cat_id,
            # This is the default value
            'count': 12,
            'ptrs': 3,
            'format': 'json',
        })
        path = '/videocenter/servlets/browse?' + data
        request_url = compat_urlparse.urljoin(url, path)
        response = self._download_webpage(request_url, playlist_title)
        response = self._fix_json(response)
        if not response.strip():
            self._downloader.report_warning(u'Got an empty reponse, trying '
                                            'adding the "newvideos" parameter')
            response = self._download_webpage(request_url + '&newvideos=true',
                playlist_title)
            response = self._fix_json(response)
        videos = json.loads(response)

        return {
            '_type': 'playlist',
            'title': playlist_title,
            'id': cat_id,
            'entries': [self._extract_video(v) for v in videos],
        }
Commit	Line	Data
25945452 PH	1	from __future__ import unicode_literals
25945452 PH	2
2e1fa03b JMF	3	import re
2e1fa03b JMF	4	import json
2e1fa03b JMF	5
	6	from .common import InfoExtractor
	7	from ..utils import (
	8	compat_urlparse,
	9	compat_urllib_parse,
	10	determine_ext,
	11	unified_strdate,
	12	)
	13
	14
91dbaef4 JMF	15	class NHLBaseInfoExtractor(InfoExtractor):
	16	@staticmethod
	17	def _fix_json(json_string):
	18	return json_string.replace('\\\'', '\'')
	19
	20	def _extract_video(self, info):
	21	video_id = info['id']
	22	self.report_extraction(video_id)
	23
	24	initial_video_url = info['publishPoint']
	25	data = compat_urllib_parse.urlencode({
	26	'type': 'fvod',
	27	'path': initial_video_url.replace('.mp4', '_sd.mp4'),
	28	})
	29	path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data
25945452 PH	30	path_doc = self._download_xml(
25945452 PH	31	path_url, video_id, 'Downloading final video url')
91dbaef4 JMF	32	video_url = path_doc.find('path').text
	33
	34	join = compat_urlparse.urljoin
	35	return {
	36	'id': video_id,
	37	'title': info['name'],
	38	'url': video_url,
	39	'ext': determine_ext(video_url),
	40	'description': info['description'],
	41	'duration': int(info['duration']),
	42	'thumbnail': join(join(video_url, '/u/'), info['bigImage']),
	43	'upload_date': unified_strdate(info['releaseDate'].split('.')[0]),
	44	}
	45
	46
	47	class NHLIE(NHLBaseInfoExtractor):
25945452	48	IE_NAME = 'nhl.com'
22a6f150	49	_VALID_URL = r'https?://video(?P<team>\.[^.])?\.nhl\.com/videocenter/console\?.?(?:[?&])id=(?P<id>[0-9]+)'
2e1fa03b JMF	50
2e1fa03b JMF	51	_TEST = {
25945452 PH	52	'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',
	53	'info_dict': {
	54	'id': '453614',
	55	'ext': 'mp4',
	56	'title': 'Quick clip: Weise 4-3 goal vs Flames',
	57	'description': 'Dale Weise scores his first of the season to put the Canucks up 4-3.',
	58	'duration': 18,
	59	'upload_date': '20131006',
2e1fa03b JMF	60	},
	61	}
	62
	63	def _real_extract(self, url):
	64	mobj = re.match(self._VALID_URL, url)
	65	video_id = mobj.group('id')
	66	json_url = 'http://video.nhl.com/videocenter/servlets/playlist?ids=%s&format=json' % video_id
25945452 PH	67	data = self._download_json(
	68	json_url, video_id, transform_source=self._fix_json)
	69	return self._extract_video(data[0])
91dbaef4 JMF	70
	71
	72	class NHLVideocenterIE(NHLBaseInfoExtractor):
25945452 PH	73	IE_NAME = 'nhl.com:videocenter'
25945452 PH	74	IE_DESC = 'NHL videocenter category'
22a6f150	75	_VALID_URL = r'https?://video\.(?P<team>[^.])\.nhl\.com/videocenter/(console\?.?catid=(?P<catid>[0-9]+)(?![&?]id=).*?)?$'
25945452 PH	76	_TEST = {
	77	'url': 'http://video.canucks.nhl.com/videocenter/console?catid=999',
	78	'info_dict': {
	79	'id': '999',
	80	'title': 'Highlights',
	81	},
	82	'playlist_count': 12,
	83	}
91dbaef4 JMF	84
	85	def _real_extract(self, url):
	86	mobj = re.match(self._VALID_URL, url)
	87	team = mobj.group('team')
	88	webpage = self._download_webpage(url, team)
	89	cat_id = self._search_regex(
	90	[r'var defaultCatId = "(.+?)";',
	91	r'{statusIndex:0,index:0,.?id:(.?),'],
25945452	92	webpage, 'category id')
91dbaef4	93	playlist_title = self._html_search_regex(
ce68b590	94	r'tab0"[^>]?>(.?)</td>',
25945452	95	webpage, 'playlist title', flags=re.DOTALL).lower().capitalize()
2e1fa03b	96
2e1fa03b	97	data = compat_urllib_parse.urlencode({
91dbaef4 JMF	98	'cid': cat_id,
	99	# This is the default value
	100	'count': 12,
	101	'ptrs': 3,
	102	'format': 'json',
2e1fa03b	103	})
91dbaef4 JMF	104	path = '/videocenter/servlets/browse?' + data
	105	request_url = compat_urlparse.urljoin(url, path)
	106	response = self._download_webpage(request_url, playlist_title)
	107	response = self._fix_json(response)
	108	if not response.strip():
	109	self._downloader.report_warning(u'Got an empty reponse, trying '
25945452	110	'adding the "newvideos" parameter')
91dbaef4 JMF	111	response = self._download_webpage(request_url + '&newvideos=true',
	112	playlist_title)
	113	response = self._fix_json(response)
	114	videos = json.loads(response)
2e1fa03b	115
2e1fa03b	116	return {
91dbaef4 JMF	117	'_type': 'playlist',
	118	'title': playlist_title,
	119	'id': cat_id,
25945452	120	'entries': [self._extract_video(v) for v in videos],
2e1fa03b	121	}