[yt-dlp.git] / youtube_dl / extractor / nfl.py

# coding: utf-8
from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..utils import (
    ExtractorError,
    int_or_none,
    remove_end,
)


class NFLIE(InfoExtractor):
    IE_NAME = 'nfl.com'
    _VALID_URL = r'(?x)https?://(?:www\.)?nfl\.com/(?:videos/(?:.+)/|.*?\#video=)(?P<id>\d..[0-9]+)'
    _PLAYER_CONFIG_URL = 'http://www.nfl.com/static/content/static/config/video/config.json'
    _TEST = {
        'url': 'http://www.nfl.com/videos/nfl-game-highlights/0ap3000000398478/Week-3-Redskins-vs-Eagles-highlights',
        'md5': '394ef771ddcd1354f665b471d78ec4c6',
        'info_dict': {
            'id': '0ap3000000398478',
            'ext': 'mp4',
            'title': 'Week 3: Redskins vs. Eagles highlights',
            'description': 'md5:56323bfb0ac4ee5ab24bd05fdf3bf478',
            'upload_date': '20140921',
            'timestamp': 1411337580,
            'thumbnail': 're:^https?://.*\.jpg$',
        }
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')

        config = self._download_json(self._PLAYER_CONFIG_URL, video_id,
                                     note='Downloading player config')
        url_template = 'http://nfl.com{contentURLTemplate:s}'.format(**config)
        video_data = self._download_json(url_template.format(id=video_id), video_id)

        cdns = config.get('cdns')
        if not cdns:
            raise ExtractorError('Failed to get CDN data', expected=True)

        formats = []
        streams = video_data.get('cdnData', {}).get('bitrateInfo', [])
        for name, cdn in cdns.items():
            # LimeLight streams don't seem to work
            if cdn.get('name') == 'LIMELIGHT':
                continue

            protocol = cdn.get('protocol')
            host = remove_end(cdn.get('host', ''), '/')
            if not (protocol and host):
                continue

            path_prefix = cdn.get('pathprefix', '')
            if path_prefix and not path_prefix.endswith('/'):
                path_prefix = '%s/' % path_prefix

            get_url = lambda p: '{protocol:s}://{host:s}/{prefix:s}{path:}'.format(
                protocol=protocol,
                host=host,
                prefix=path_prefix,
                path=p,
            )

            if protocol == 'rtmp':
                preference = -1
            elif 'prog' in name.lower():
                preference = 1
            else:
                preference = 0

            for stream in streams:
                path = stream.get('path')
                if not path:
                    continue

                formats.append({
                    'url': get_url(path),
                    'vbr': int_or_none(stream.get('rate', 0), 1000),
                    'preference': preference,
                    'format_note': name,
                })

        self._sort_formats(formats)

        thumbnail = None
        for q in ('xl', 'l', 'm', 's', 'xs'):
            thumbnail = video_data.get('imagePaths', {}).get(q)
            if thumbnail:
                break

        return {
            'id': video_id,
            'title': video_data.get('headline'),
            'formats': formats,
            'description': video_data.get('caption'),
            'duration': video_data.get('duration'),
            'thumbnail': thumbnail,
            'timestamp': int_or_none(video_data.get('posted'), 1000),
        }
Commit	Line	Data
632e5684 NJ	1	# coding: utf-8
	2	from __future__ import unicode_literals
	3
	4	import re
	5
	6	from .common import InfoExtractor
	7	from ..utils import (
	8	ExtractorError,
	9	int_or_none,
	10	remove_end,
	11	)
	12
	13
	14	class NFLIE(InfoExtractor):
	15	IE_NAME = 'nfl.com'
	16	_VALID_URL = r'(?x)https?://(?:www\.)?nfl\.com/(?:videos/(?:.+)/\|.*?\#video=)(?P<id>\d..[0-9]+)'
	17	_PLAYER_CONFIG_URL = 'http://www.nfl.com/static/content/static/config/video/config.json'
	18	_TEST = {
	19	'url': 'http://www.nfl.com/videos/nfl-game-highlights/0ap3000000398478/Week-3-Redskins-vs-Eagles-highlights',
dfee8323	20	'md5': '394ef771ddcd1354f665b471d78ec4c6',
632e5684 NJ	21	'info_dict': {
	22	'id': '0ap3000000398478',
	23	'ext': 'mp4',
dfee8323	24	'title': 'Week 3: Redskins vs. Eagles highlights',
632e5684 NJ	25	'description': 'md5:56323bfb0ac4ee5ab24bd05fdf3bf478',
	26	'upload_date': '20140921',
	27	'timestamp': 1411337580,
	28	'thumbnail': 're:^https?://.*\.jpg$',
	29	}
	30	}
	31
	32	def _real_extract(self, url):
	33	mobj = re.match(self._VALID_URL, url)
	34	video_id = mobj.group('id')
	35
	36	config = self._download_json(self._PLAYER_CONFIG_URL, video_id,
	37	note='Downloading player config')
	38	url_template = 'http://nfl.com{contentURLTemplate:s}'.format(**config)
	39	video_data = self._download_json(url_template.format(id=video_id), video_id)
	40
	41	cdns = config.get('cdns')
	42	if not cdns:
	43	raise ExtractorError('Failed to get CDN data', expected=True)
	44
	45	formats = []
	46	streams = video_data.get('cdnData', {}).get('bitrateInfo', [])
	47	for name, cdn in cdns.items():
	48	# LimeLight streams don't seem to work
	49	if cdn.get('name') == 'LIMELIGHT':
	50	continue
	51
	52	protocol = cdn.get('protocol')
	53	host = remove_end(cdn.get('host', ''), '/')
	54	if not (protocol and host):
	55	continue
	56
	57	path_prefix = cdn.get('pathprefix', '')
	58	if path_prefix and not path_prefix.endswith('/'):
	59	path_prefix = '%s/' % path_prefix
	60
	61	get_url = lambda p: '{protocol:s}://{host:s}/{prefix:s}{path:}'.format(
	62	protocol=protocol,
	63	host=host,
	64	prefix=path_prefix,
	65	path=p,
	66	)
	67
	68	if protocol == 'rtmp':
632e5684	69	preference = -1
dfee8323 NJ	70	elif 'prog' in name.lower():
dfee8323 NJ	71	preference = 1
632e5684 NJ	72	else:
	73	preference = 0
	74
	75	for stream in streams:
	76	path = stream.get('path')
	77	if not path:
	78	continue
	79
	80	formats.append({
	81	'url': get_url(path),
	82	'vbr': int_or_none(stream.get('rate', 0), 1000),
	83	'preference': preference,
	84	'format_note': name,
	85	})
	86
	87	self._sort_formats(formats)
	88
	89	thumbnail = None
	90	for q in ('xl', 'l', 'm', 's', 'xs'):
	91	thumbnail = video_data.get('imagePaths', {}).get(q)
	92	if thumbnail:
	93	break
	94
	95	return {
	96	'id': video_id,
dfee8323	97	'title': video_data.get('headline'),
632e5684 NJ	98	'formats': formats,
	99	'description': video_data.get('caption'),
	100	'duration': video_data.get('duration'),
	101	'thumbnail': thumbnail,
	102	'timestamp': int_or_none(video_data.get('posted'), 1000),
	103	}