[yt-dlp.git] / youtube_dl / extractor / rds.py

# coding: utf-8
from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..utils import (
    parse_duration,
    parse_iso8601,
)


class RDSIE(InfoExtractor):
    IE_DESC = 'RDS.ca'
    _VALID_URL = r'https?://(?:www\.)?rds\.ca/videos/(?:[^/]+/)+(?P<display_id>[^/]+)-(?P<id>\d+\.\d+)'

    _TEST = {
        'url': 'http://www.rds.ca/videos/football/nfl/fowler-jr-prend-la-direction-de-jacksonville-3.1132799',
        'info_dict': {
            'id': '3.1132799',
            'display_id': 'fowler-jr-prend-la-direction-de-jacksonville',
            'ext': 'mp4',
            'title': 'Fowler Jr. prend la direction de Jacksonville',
            'description': 'Dante Fowler Jr. est le troisième choix du repêchage 2015 de la NFL. ',
            'timestamp': 1430397346,
            'upload_date': '20150430',
            'duration': 154.354,
            'age_limit': 0,
        }
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        display_id = mobj.group('display_id')

        webpage = self._download_webpage(url, display_id)

        # TODO: extract f4m from 9c9media.com
        video_url = self._search_regex(
            r'<span[^>]+itemprop="contentURL"[^>]+content="([^"]+)"',
            webpage, 'video url')

        title = self._og_search_title(webpage) or self._html_search_meta(
            'title', webpage, 'title', fatal=True)
        description = self._og_search_description(webpage) or self._html_search_meta(
            'description', webpage, 'description')
        thumbnail = self._og_search_thumbnail(webpage) or self._search_regex(
            [r'<link[^>]+itemprop="thumbnailUrl"[^>]+href="([^"]+)"',
             r'<span[^>]+itemprop="thumbnailUrl"[^>]+content="([^"]+)"'],
            webpage, 'thumbnail', fatal=False)
        timestamp = parse_iso8601(self._search_regex(
            r'<span[^>]+itemprop="uploadDate"[^>]+content="([^"]+)"',
            webpage, 'upload date', fatal=False))
        duration = parse_duration(self._search_regex(
            r'<span[^>]+itemprop="duration"[^>]+content="([^"]+)"',
            webpage, 'duration', fatal=False))
        age_limit = self._family_friendly_search(webpage)

        return {
            'id': video_id,
            'display_id': display_id,
            'url': video_url,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'timestamp': timestamp,
            'duration': duration,
            'age_limit': age_limit,
        }
Commit	Line	Data
b6ea9ef2 S	1	# coding: utf-8
	2	from __future__ import unicode_literals
	3
	4	import re
	5
	6	from .common import InfoExtractor
	7	from ..utils import (
	8	parse_duration,
	9	parse_iso8601,
	10	)
	11
	12
	13	class RDSIE(InfoExtractor):
	14	IE_DESC = 'RDS.ca'
	15	_VALID_URL = r'https?://(?:www\.)?rds\.ca/videos/(?:[^/]+/)+(?P<display_id>[^/]+)-(?P<id>\d+\.\d+)'
	16
	17	_TEST = {
	18	'url': 'http://www.rds.ca/videos/football/nfl/fowler-jr-prend-la-direction-de-jacksonville-3.1132799',
	19	'info_dict': {
	20	'id': '3.1132799',
	21	'display_id': 'fowler-jr-prend-la-direction-de-jacksonville',
	22	'ext': 'mp4',
	23	'title': 'Fowler Jr. prend la direction de Jacksonville',
	24	'description': 'Dante Fowler Jr. est le troisième choix du repêchage 2015 de la NFL. ',
	25	'timestamp': 1430397346,
	26	'upload_date': '20150430',
	27	'duration': 154.354,
	28	'age_limit': 0,
	29	}
	30	}
	31
	32	def _real_extract(self, url):
	33	mobj = re.match(self._VALID_URL, url)
	34	video_id = mobj.group('id')
	35	display_id = mobj.group('display_id')
	36
	37	webpage = self._download_webpage(url, display_id)
	38
	39	# TODO: extract f4m from 9c9media.com
	40	video_url = self._search_regex(
	41	r'<span[^>]+itemprop="contentURL"[^>]+content="([^"]+)"',
	42	webpage, 'video url')
	43
	44	title = self._og_search_title(webpage) or self._html_search_meta(
	45	'title', webpage, 'title', fatal=True)
	46	description = self._og_search_description(webpage) or self._html_search_meta(
	47	'description', webpage, 'description')
	48	thumbnail = self._og_search_thumbnail(webpage) or self._search_regex(
	49	[r'<link[^>]+itemprop="thumbnailUrl"[^>]+href="([^"]+)"',
	50	r'<span[^>]+itemprop="thumbnailUrl"[^>]+content="([^"]+)"'],
	51	webpage, 'thumbnail', fatal=False)
	52	timestamp = parse_iso8601(self._search_regex(
	53	r'<span[^>]+itemprop="uploadDate"[^>]+content="([^"]+)"',
	54	webpage, 'upload date', fatal=False))
	55	duration = parse_duration(self._search_regex(
	56	r'<span[^>]+itemprop="duration"[^>]+content="([^"]+)"',
	57	webpage, 'duration', fatal=False))
	58	age_limit = self._family_friendly_search(webpage)
	59
	60	return {
	61	'id': video_id,
	62	'display_id': display_id,
	63	'url': video_url,
	64	'title': title,
65	'description': description,
66	'thumbnail': thumbnail,
67	'timestamp': timestamp,
68	'duration': duration,
69	'age_limit': age_limit,
70	}