[yt-dlp.git] / youtube_dlc / extractor / rai.py

from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..compat import (
    compat_urlparse,
    compat_str,
)
from ..utils import (
    ExtractorError,
    determine_ext,
    find_xpath_attr,
    fix_xml_ampersands,
    GeoRestrictedError,
    int_or_none,
    parse_duration,
    strip_or_none,
    try_get,
    unescapeHTML,
    unified_strdate,
    unified_timestamp,
    update_url_query,
    urljoin,
    xpath_text,
)


class RaiBaseIE(InfoExtractor):
    _UUID_RE = r'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}'
    _GEO_COUNTRIES = ['IT']
    _GEO_BYPASS = False

    def _extract_relinker_info(self, relinker_url, video_id):
        if not re.match(r'https?://', relinker_url):
            return {'formats': [{'url': relinker_url}]}

        formats = []
        geoprotection = None
        is_live = None
        duration = None

        for platform in ('mon', 'flash', 'native'):
            relinker = self._download_xml(
                relinker_url, video_id,
                note='Downloading XML metadata for platform %s' % platform,
                transform_source=fix_xml_ampersands,
                query={'output': 45, 'pl': platform},
                headers=self.geo_verification_headers())

            if not geoprotection:
                geoprotection = xpath_text(
                    relinker, './geoprotection', default=None) == 'Y'

            if not is_live:
                is_live = xpath_text(
                    relinker, './is_live', default=None) == 'Y'
            if not duration:
                duration = parse_duration(xpath_text(
                    relinker, './duration', default=None))

            url_elem = find_xpath_attr(relinker, './url', 'type', 'content')
            if url_elem is None:
                continue

            media_url = url_elem.text

            # This does not imply geo restriction (e.g.
            # http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html)
            if media_url == 'http://download.rai.it/video_no_available.mp4':
                continue

            ext = determine_ext(media_url)
            if (ext == 'm3u8' and platform != 'mon') or (ext == 'f4m' and platform != 'flash'):
                continue

            if ext == 'm3u8' or 'format=m3u8' in media_url or platform == 'mon':
                formats.extend(self._extract_m3u8_formats(
                    media_url, video_id, 'mp4', 'm3u8_native',
                    m3u8_id='hls', fatal=False))
            elif ext == 'f4m' or platform == 'flash':
                manifest_url = update_url_query(
                    media_url.replace('manifest#live_hds.f4m', 'manifest.f4m'),
                    {'hdcore': '3.7.0', 'plugin': 'aasp-3.7.0.39.44'})
                formats.extend(self._extract_f4m_formats(
                    manifest_url, video_id, f4m_id='hds', fatal=False))
            else:
                bitrate = int_or_none(xpath_text(relinker, 'bitrate'))
                formats.append({
                    'url': media_url,
                    'tbr': bitrate if bitrate > 0 else None,
                    'format_id': 'http-%d' % bitrate if bitrate > 0 else 'http',
                })

        if not formats and geoprotection is True:
            self.raise_geo_restricted(countries=self._GEO_COUNTRIES)

        return dict((k, v) for k, v in {
            'is_live': is_live,
            'duration': duration,
            'formats': formats,
        }.items() if v is not None)

    @staticmethod
    def _extract_subtitles(url, subtitle_url):
        subtitles = {}
        if subtitle_url and isinstance(subtitle_url, compat_str):
            subtitle_url = urljoin(url, subtitle_url)
            STL_EXT = '.stl'
            SRT_EXT = '.srt'
            subtitles['it'] = [{
                'ext': 'stl',
                'url': subtitle_url,
            }]
            if subtitle_url.endswith(STL_EXT):
                srt_url = subtitle_url[:-len(STL_EXT)] + SRT_EXT
                subtitles['it'].append({
                    'ext': 'srt',
                    'url': srt_url,
                })
        return subtitles


class RaiPlayIE(RaiBaseIE):
    _VALID_URL = r'(?P<url>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>%s)\.html)' % RaiBaseIE._UUID_RE
    _TESTS = [{
        'url': 'http://www.raiplay.it/video/2016/10/La-Casa-Bianca-e06118bb-59a9-4636-b914-498e4cfd2c66.html?source=twitter',
        'md5': '340aa3b7afb54bfd14a8c11786450d76',
        'info_dict': {
            'id': 'e06118bb-59a9-4636-b914-498e4cfd2c66',
            'ext': 'mp4',
            'title': 'La Casa Bianca',
            'alt_title': 'S2016 - Puntata del 23/10/2016',
            'description': 'md5:a09d45890850458077d1f68bb036e0a5',
            'thumbnail': r're:^https?://.*\.jpg$',
            'uploader': 'Rai 3',
            'creator': 'Rai 3',
            'duration': 3278,
            'timestamp': 1477764300,
            'upload_date': '20161029',
            'series': 'La Casa Bianca',
            'season': '2016',
        },
    }, {
        'url': 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html',
        'md5': '8970abf8caf8aef4696e7b1f2adfc696',
        'info_dict': {
            'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391',
            'ext': 'mp4',
            'title': 'Report del 07/04/2014',
            'alt_title': 'S2013/14 - Puntata del 07/04/2014',
            'description': 'md5:f27c544694cacb46a078db84ec35d2d9',
            'thumbnail': r're:^https?://.*\.jpg$',
            'uploader': 'Rai 5',
            'creator': 'Rai 5',
            'duration': 6160,
            'series': 'Report',
            'season_number': 5,
            'season': '2013/14',
        },
        'params': {
            'skip_download': True,
        },
    }, {
        'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        url, video_id = mobj.group('url', 'id')

        media = self._download_json(
            '%s?json' % url, video_id, 'Downloading video JSON')

        title = media['name']

        video = media['video']

        relinker_info = self._extract_relinker_info(video['contentUrl'], video_id)
        self._sort_formats(relinker_info['formats'])

        thumbnails = []
        if 'images' in media:
            for _, value in media.get('images').items():
                if value:
                    thumbnails.append({
                        'url': value.replace('[RESOLUTION]', '600x400')
                    })

        timestamp = unified_timestamp(try_get(
            media, lambda x: x['availabilities'][0]['start'], compat_str))

        subtitles = self._extract_subtitles(url, video.get('subtitles'))

        info = {
            'id': video_id,
            'title': self._live_title(title) if relinker_info.get(
                'is_live') else title,
            'alt_title': media.get('subtitle'),
            'description': media.get('description'),
            'uploader': strip_or_none(media.get('channel')),
            'creator': strip_or_none(media.get('editor')),
            'duration': parse_duration(video.get('duration')),
            'timestamp': timestamp,
            'thumbnails': thumbnails,
            'series': try_get(
                media, lambda x: x['isPartOf']['name'], compat_str),
            'season_number': int_or_none(try_get(
                media, lambda x: x['isPartOf']['numeroStagioni'])),
            'season': media.get('stagione') or None,
            'subtitles': subtitles,
        }

        info.update(relinker_info)
        return info


class RaiPlayLiveIE(RaiBaseIE):
    _VALID_URL = r'https?://(?:www\.)?raiplay\.it/dirette/(?P<id>[^/?#&]+)'
    _TEST = {
        'url': 'http://www.raiplay.it/dirette/rainews24',
        'info_dict': {
            'id': 'd784ad40-e0ae-4a69-aa76-37519d238a9c',
            'display_id': 'rainews24',
            'ext': 'mp4',
            'title': 're:^Diretta di Rai News 24 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
            'description': 'md5:6eca31500550f9376819f174e5644754',
            'uploader': 'Rai News 24',
            'creator': 'Rai News 24',
            'is_live': True,
        },
        'params': {
            'skip_download': True,
        },
    }

    def _real_extract(self, url):
        display_id = self._match_id(url)

        webpage = self._download_webpage(url, display_id)

        video_id = self._search_regex(
            r'data-uniquename=["\']ContentItem-(%s)' % RaiBaseIE._UUID_RE,
            webpage, 'content id')

        return {
            '_type': 'url_transparent',
            'ie_key': RaiPlayIE.ie_key(),
            'url': 'http://www.raiplay.it/dirette/ContentItem-%s.html' % video_id,
            'id': video_id,
            'display_id': display_id,
        }


class RaiPlayPlaylistIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?raiplay\.it/programmi/(?P<id>[^/?#&]+)'
    _TESTS = [{
        'url': 'http://www.raiplay.it/programmi/nondirloalmiocapo/',
        'info_dict': {
            'id': 'nondirloalmiocapo',
            'title': 'Non dirlo al mio capo',
            'description': 'md5:9f3d603b2947c1c7abb098f3b14fac86',
        },
        'playlist_mincount': 12,
    }]

    def _real_extract(self, url):
        playlist_id = self._match_id(url)

        webpage = self._download_webpage(url, playlist_id)

        title = self._html_search_meta(
            ('programma', 'nomeProgramma'), webpage, 'title')
        description = unescapeHTML(self._html_search_meta(
            ('description', 'og:description'), webpage, 'description'))

        entries = []
        for mobj in re.finditer(
                r'<a\b[^>]+\bhref=(["\'])(?P<path>/raiplay/video/.+?)\1',
                webpage):
            video_url = urljoin(url, mobj.group('path'))
            entries.append(self.url_result(
                video_url, ie=RaiPlayIE.ie_key(),
                video_id=RaiPlayIE._match_id(video_url)))

        return self.playlist_result(entries, playlist_id, title, description)


class RaiIE(RaiBaseIE):
    _VALID_URL = r'https?://[^/]+\.(?:rai\.(?:it|tv)|rainews\.it)/.+?-(?P<id>%s)(?:-.+?)?\.html' % RaiBaseIE._UUID_RE
    _TESTS = [{
        # var uniquename = "ContentItem-..."
        # data-id="ContentItem-..."
        'url': 'http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html',
        'info_dict': {
            'id': '04a9f4bd-b563-40cf-82a6-aad3529cb4a9',
            'ext': 'mp4',
            'title': 'TG PRIMO TEMPO',
            'thumbnail': r're:^https?://.*\.jpg$',
            'duration': 1758,
            'upload_date': '20140612',
        }
    }, {
        # with ContentItem in many metas
        'url': 'http://www.rainews.it/dl/rainews/media/Weekend-al-cinema-da-Hollywood-arriva-il-thriller-di-Tate-Taylor-La-ragazza-del-treno-1632c009-c843-4836-bb65-80c33084a64b.html',
        'info_dict': {
            'id': '1632c009-c843-4836-bb65-80c33084a64b',
            'ext': 'mp4',
            'title': 'Weekend al cinema, da Hollywood arriva il thriller di Tate Taylor "La ragazza del treno"',
            'description': 'I film in uscita questa settimana.',
            'thumbnail': r're:^https?://.*\.png$',
            'duration': 833,
            'upload_date': '20161103',
        }
    }, {
        # with ContentItem in og:url
        'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-efb17665-691c-45d5-a60c-5301333cbb0c.html',
        'md5': '11959b4e44fa74de47011b5799490adf',
        'info_dict': {
            'id': 'efb17665-691c-45d5-a60c-5301333cbb0c',
            'ext': 'mp4',
            'title': 'TG1 ore 20:00 del 03/11/2016',
            'description': 'TG1 edizione integrale ore 20:00 del giorno 03/11/2016',
            'thumbnail': r're:^https?://.*\.jpg$',
            'duration': 2214,
            'upload_date': '20161103',
        }
    }, {
        # drawMediaRaiTV(...)
        'url': 'http://www.report.rai.it/dl/Report/puntata/ContentItem-0c7a664b-d0f4-4b2c-8835-3f82e46f433e.html',
        'md5': '2dd727e61114e1ee9c47f0da6914e178',
        'info_dict': {
            'id': '59d69d28-6bb6-409d-a4b5-ed44096560af',
            'ext': 'mp4',
            'title': 'Il pacco',
            'description': 'md5:4b1afae1364115ce5d78ed83cd2e5b3a',
            'thumbnail': r're:^https?://.*\.jpg$',
            'upload_date': '20141221',
        },
    }, {
        # initEdizione('ContentItem-...'
        'url': 'http://www.tg1.rai.it/dl/tg1/2010/edizioni/ContentSet-9b6e0cba-4bef-4aef-8cf0-9f7f665b7dfb-tg1.html?item=undefined',
        'info_dict': {
            'id': 'c2187016-8484-4e3a-8ac8-35e475b07303',
            'ext': 'mp4',
            'title': r're:TG1 ore \d{2}:\d{2} del \d{2}/\d{2}/\d{4}',
            'duration': 2274,
            'upload_date': '20170401',
        },
        'skip': 'Changes daily',
    }, {
        # HDS live stream with only relinker URL
        'url': 'http://www.rai.tv/dl/RaiTV/dirette/PublishingBlock-1912dbbf-3f96-44c3-b4cf-523681fbacbc.html?channel=EuroNews',
        'info_dict': {
            'id': '1912dbbf-3f96-44c3-b4cf-523681fbacbc',
            'ext': 'flv',
            'title': 'EuroNews',
        },
        'params': {
            'skip_download': True,
        },
    }, {
        # HLS live stream with ContentItem in og:url
        'url': 'http://www.rainews.it/dl/rainews/live/ContentItem-3156f2f2-dc70-4953-8e2f-70d7489d4ce9.html',
        'info_dict': {
            'id': '3156f2f2-dc70-4953-8e2f-70d7489d4ce9',
            'ext': 'mp4',
            'title': 'La diretta di Rainews24',
        },
        'params': {
            'skip_download': True,
        },
    }, {
        # Direct MMS URL
        'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-b63a4089-ac28-48cf-bca5-9f5b5bc46df5.html',
        'only_matching': True,
    }, {
        'url': 'https://www.rainews.it/tgr/marche/notiziari/video/2019/02/ContentItem-6ba945a2-889c-4a80-bdeb-8489c70a8db9.html',
        'only_matching': True,
    }]

    def _extract_from_content_id(self, content_id, url):
        media = self._download_json(
            'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-%s.html?json' % content_id,
            content_id, 'Downloading video JSON')

        title = media['name'].strip()

        media_type = media['type']
        if 'Audio' in media_type:
            relinker_info = {
                'formats': [{
                    'format_id': media.get('formatoAudio'),
                    'url': media['audioUrl'],
                    'ext': media.get('formatoAudio'),
                }]
            }
        elif 'Video' in media_type:
            relinker_info = self._extract_relinker_info(media['mediaUri'], content_id)
        else:
            raise ExtractorError('not a media file')

        self._sort_formats(relinker_info['formats'])

        thumbnails = []
        for image_type in ('image', 'image_medium', 'image_300'):
            thumbnail_url = media.get(image_type)
            if thumbnail_url:
                thumbnails.append({
                    'url': compat_urlparse.urljoin(url, thumbnail_url),
                })

        subtitles = self._extract_subtitles(url, media.get('subtitlesUrl'))

        info = {
            'id': content_id,
            'title': title,
            'description': strip_or_none(media.get('desc')),
            'thumbnails': thumbnails,
            'uploader': media.get('author'),
            'upload_date': unified_strdate(media.get('date')),
            'duration': parse_duration(media.get('length')),
            'subtitles': subtitles,
        }

        info.update(relinker_info)

        return info

    def _real_extract(self, url):
        video_id = self._match_id(url)

        webpage = self._download_webpage(url, video_id)

        content_item_id = None

        content_item_url = self._html_search_meta(
            ('og:url', 'og:video', 'og:video:secure_url', 'twitter:url',
             'twitter:player', 'jsonlink'), webpage, default=None)
        if content_item_url:
            content_item_id = self._search_regex(
                r'ContentItem-(%s)' % self._UUID_RE, content_item_url,
                'content item id', default=None)

        if not content_item_id:
            content_item_id = self._search_regex(
                r'''(?x)
                    (?:
                        (?:initEdizione|drawMediaRaiTV)\(|
                        <(?:[^>]+\bdata-id|var\s+uniquename)=
                    )
                    (["\'])
                    (?:(?!\1).)*\bContentItem-(?P<id>%s)
                ''' % self._UUID_RE,
                webpage, 'content item id', default=None, group='id')

        content_item_ids = set()
        if content_item_id:
            content_item_ids.add(content_item_id)
        if video_id not in content_item_ids:
            content_item_ids.add(video_id)

        for content_item_id in content_item_ids:
            try:
                return self._extract_from_content_id(content_item_id, url)
            except GeoRestrictedError:
                raise
            except ExtractorError:
                pass

        relinker_url = self._search_regex(
            r'''(?x)
                (?:
                    var\s+videoURL|
                    mediaInfo\.mediaUri
                )\s*=\s*
                ([\'"])
                (?P<url>
                    (?:https?:)?
                    //mediapolis(?:vod)?\.rai\.it/relinker/relinkerServlet\.htm\?
                    (?:(?!\1).)*\bcont=(?:(?!\1).)+)\1
            ''',
            webpage, 'relinker URL', group='url')

        relinker_info = self._extract_relinker_info(
            urljoin(url, relinker_url), video_id)
        self._sort_formats(relinker_info['formats'])

        title = self._search_regex(
            r'var\s+videoTitolo\s*=\s*([\'"])(?P<title>[^\'"]+)\1',
            webpage, 'title', group='title',
            default=None) or self._og_search_title(webpage)

        info = {
            'id': video_id,
            'title': title,
        }

        info.update(relinker_info)

        return info
Commit	Line	Data
b0adbe98 S	1	from __future__ import unicode_literals
b0adbe98 S	2
b8d8cced S	3	import re
b8d8cced S	4
afbdd3ac	5	from .common import InfoExtractor
b8d8cced S	6	from ..compat import (
	7	compat_urlparse,
	8	compat_str,
	9	)
b0adbe98	10	from ..utils import (
f1388739	11	ExtractorError,
51342717	12	determine_ext,
f1388739 YCH	13	find_xpath_attr,
f1388739 YCH	14	fix_xml_ampersands,
b8d8cced	15	GeoRestrictedError,
f1388739	16	int_or_none,
b0adbe98	17	parse_duration,
b8d8cced S	18	strip_or_none,
b8d8cced S	19	try_get,
1115271a	20	unescapeHTML,
b0adbe98	21	unified_strdate,
b8d8cced	22	unified_timestamp,
f1388739	23	update_url_query,
b8d8cced	24	urljoin,
06d5556d	25	xpath_text,
b0adbe98 S	26	)
	27
	28
034a8849	29	class RaiBaseIE(InfoExtractor):
b8d8cced S	30	_UUID_RE = r'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}'
	31	_GEO_COUNTRIES = ['IT']
	32	_GEO_BYPASS = False
	33
	34	def _extract_relinker_info(self, relinker_url, video_id):
0c7b4f49 RA	35	if not re.match(r'https?://', relinker_url):
	36	return {'formats': [{'url': relinker_url}]}
	37
034a8849	38	formats = []
b8d8cced S	39	geoprotection = None
	40	is_live = None
	41	duration = None
034a8849 YCH	42
034a8849 YCH	43	for platform in ('mon', 'flash', 'native'):
034a8849 YCH	44	relinker = self._download_xml(
	45	relinker_url, video_id,
	46	note='Downloading XML metadata for platform %s' % platform,
	47	transform_source=fix_xml_ampersands,
38cce791 YCH	48	query={'output': 45, 'pl': platform},
38cce791 YCH	49	headers=self.geo_verification_headers())
034a8849	50
b8d8cced S	51	if not geoprotection:
	52	geoprotection = xpath_text(
	53	relinker, './geoprotection', default=None) == 'Y'
	54
	55	if not is_live:
	56	is_live = xpath_text(
	57	relinker, './is_live', default=None) == 'Y'
	58	if not duration:
	59	duration = parse_duration(xpath_text(
	60	relinker, './duration', default=None))
	61
	62	url_elem = find_xpath_attr(relinker, './url', 'type', 'content')
	63	if url_elem is None:
	64	continue
	65
	66	media_url = url_elem.text
	67
	68	# This does not imply geo restriction (e.g.
	69	# http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html)
034a8849	70	if media_url == 'http://download.rai.it/video_no_available.mp4':
b8d8cced	71	continue
034a8849 YCH	72
	73	ext = determine_ext(media_url)
	74	if (ext == 'm3u8' and platform != 'mon') or (ext == 'f4m' and platform != 'flash'):
	75	continue
	76
c17eb5b4	77	if ext == 'm3u8' or 'format=m3u8' in media_url or platform == 'mon':
034a8849 YCH	78	formats.extend(self._extract_m3u8_formats(
	79	media_url, video_id, 'mp4', 'm3u8_native',
	80	m3u8_id='hls', fatal=False))
c17eb5b4	81	elif ext == 'f4m' or platform == 'flash':
034a8849 YCH	82	manifest_url = update_url_query(
	83	media_url.replace('manifest#live_hds.f4m', 'manifest.f4m'),
	84	{'hdcore': '3.7.0', 'plugin': 'aasp-3.7.0.39.44'})
	85	formats.extend(self._extract_f4m_formats(
	86	manifest_url, video_id, f4m_id='hds', fatal=False))
	87	else:
	88	bitrate = int_or_none(xpath_text(relinker, 'bitrate'))
	89	formats.append({
	90	'url': media_url,
	91	'tbr': bitrate if bitrate > 0 else None,
	92	'format_id': 'http-%d' % bitrate if bitrate > 0 else 'http',
	93	})
	94
b8d8cced S	95	if not formats and geoprotection is True:
	96	self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
	97
	98	return dict((k, v) for k, v in {
	99	'is_live': is_live,
	100	'duration': duration,
	101	'formats': formats,
	102	}.items() if v is not None)
034a8849	103
1b3feca0 S	104	@staticmethod
	105	def _extract_subtitles(url, subtitle_url):
	106	subtitles = {}
	107	if subtitle_url and isinstance(subtitle_url, compat_str):
	108	subtitle_url = urljoin(url, subtitle_url)
	109	STL_EXT = '.stl'
	110	SRT_EXT = '.srt'
	111	subtitles['it'] = [{
	112	'ext': 'stl',
	113	'url': subtitle_url,
	114	}]
	115	if subtitle_url.endswith(STL_EXT):
	116	srt_url = subtitle_url[:-len(STL_EXT)] + SRT_EXT
	117	subtitles['it'].append({
	118	'ext': 'srt',
	119	'url': srt_url,
	120	})
	121	return subtitles
	122
2b28b892	123
51342717	124	class RaiPlayIE(RaiBaseIE):
b8d8cced	125	_VALID_URL = r'(?P<url>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>%s)\.html)' % RaiBaseIE._UUID_RE
51342717 T	126	_TESTS = [{
	127	'url': 'http://www.raiplay.it/video/2016/10/La-Casa-Bianca-e06118bb-59a9-4636-b914-498e4cfd2c66.html?source=twitter',
	128	'md5': '340aa3b7afb54bfd14a8c11786450d76',
	129	'info_dict': {
	130	'id': 'e06118bb-59a9-4636-b914-498e4cfd2c66',
	131	'ext': 'mp4',
	132	'title': 'La Casa Bianca',
b8d8cced S	133	'alt_title': 'S2016 - Puntata del 23/10/2016',
b8d8cced S	134	'description': 'md5:a09d45890850458077d1f68bb036e0a5',
51342717	135	'thumbnail': r're:^https?://.*\.jpg$',
b8d8cced S	136	'uploader': 'Rai 3',
	137	'creator': 'Rai 3',
	138	'duration': 3278,
	139	'timestamp': 1477764300,
	140	'upload_date': '20161029',
	141	'series': 'La Casa Bianca',
	142	'season': '2016',
	143	},
51342717 T	144	}, {
	145	'url': 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html',
	146	'md5': '8970abf8caf8aef4696e7b1f2adfc696',
	147	'info_dict': {
	148	'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391',
	149	'ext': 'mp4',
b8d8cced S	150	'title': 'Report del 07/04/2014',
	151	'alt_title': 'S2013/14 - Puntata del 07/04/2014',
	152	'description': 'md5:f27c544694cacb46a078db84ec35d2d9',
51342717	153	'thumbnail': r're:^https?://.*\.jpg$',
b8d8cced S	154	'uploader': 'Rai 5',
	155	'creator': 'Rai 5',
	156	'duration': 6160,
	157	'series': 'Report',
	158	'season_number': 5,
	159	'season': '2013/14',
	160	},
	161	'params': {
	162	'skip_download': True,
	163	},
	164	}, {
	165	'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?',
	166	'only_matching': True,
51342717	167	}]
2b28b892	168
51342717	169	def _real_extract(self, url):
b8d8cced S	170	mobj = re.match(self._VALID_URL, url)
b8d8cced S	171	url, video_id = mobj.group('url', 'id')
2b28b892	172
b8d8cced S	173	media = self._download_json(
b8d8cced S	174	'%s?json' % url, video_id, 'Downloading video JSON')
2b28b892	175
b8d8cced S	176	title = media['name']
	177
	178	video = media['video']
	179
	180	relinker_info = self._extract_relinker_info(video['contentUrl'], video_id)
	181	self._sort_formats(relinker_info['formats'])
2b28b892	182
51342717 T	183	thumbnails = []
	184	if 'images' in media:
	185	for _, value in media.get('images').items():
	186	if value:
	187	thumbnails.append({
b8d8cced	188	'url': value.replace('[RESOLUTION]', '600x400')
51342717	189	})
034a8849	190
b8d8cced S	191	timestamp = unified_timestamp(try_get(
b8d8cced S	192	media, lambda x: x['availabilities'][0]['start'], compat_str))
b0adbe98	193
1b3feca0 S	194	subtitles = self._extract_subtitles(url, video.get('subtitles'))
1b3feca0 S	195
b8d8cced	196	info = {
51342717	197	'id': video_id,
9c48b5a1 S	198	'title': self._live_title(title) if relinker_info.get(
9c48b5a1 S	199	'is_live') else title,
b8d8cced S	200	'alt_title': media.get('subtitle'),
b8d8cced S	201	'description': media.get('description'),
9c48b5a1 S	202	'uploader': strip_or_none(media.get('channel')),
9c48b5a1 S	203	'creator': strip_or_none(media.get('editor')),
b8d8cced S	204	'duration': parse_duration(video.get('duration')),
b8d8cced S	205	'timestamp': timestamp,
51342717	206	'thumbnails': thumbnails,
b8d8cced S	207	'series': try_get(
	208	media, lambda x: x['isPartOf']['name'], compat_str),
	209	'season_number': int_or_none(try_get(
	210	media, lambda x: x['isPartOf']['numeroStagioni'])),
	211	'season': media.get('stagione') or None,
1b3feca0	212	'subtitles': subtitles,
51342717	213	}
b0adbe98	214
b8d8cced	215	info.update(relinker_info)
b8d8cced S	216	return info
b8d8cced S	217
06d5556d	218
449c6657	219	class RaiPlayLiveIE(RaiBaseIE):
9c48b5a1	220	_VALID_URL = r'https?://(?:www\.)?raiplay\.it/dirette/(?P<id>[^/?#&]+)'
449c6657	221	_TEST = {
9c48b5a1 S	222	'url': 'http://www.raiplay.it/dirette/rainews24',
	223	'info_dict': {
	224	'id': 'd784ad40-e0ae-4a69-aa76-37519d238a9c',
	225	'display_id': 'rainews24',
	226	'ext': 'mp4',
	227	'title': 're:^Diretta di Rai News 24 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
	228	'description': 'md5:6eca31500550f9376819f174e5644754',
	229	'uploader': 'Rai News 24',
	230	'creator': 'Rai News 24',
	231	'is_live': True,
	232	},
	233	'params': {
	234	'skip_download': True,
	235	},
449c6657	236	}
	237
	238	def _real_extract(self, url):
9c48b5a1 S	239	display_id = self._match_id(url)
	240
	241	webpage = self._download_webpage(url, display_id)
449c6657	242
9c48b5a1 S	243	video_id = self._search_regex(
	244	r'data-uniquename=["\']ContentItem-(%s)' % RaiBaseIE._UUID_RE,
	245	webpage, 'content id')
449c6657	246
9c48b5a1 S	247	return {
	248	'_type': 'url_transparent',
	249	'ie_key': RaiPlayIE.ie_key(),
	250	'url': 'http://www.raiplay.it/dirette/ContentItem-%s.html' % video_id,
	251	'id': video_id,
	252	'display_id': display_id,
	253	}
449c6657	254
449c6657	255
1115271a S	256	class RaiPlayPlaylistIE(InfoExtractor):
	257	_VALID_URL = r'https?://(?:www\.)?raiplay\.it/programmi/(?P<id>[^/?#&]+)'
	258	_TESTS = [{
	259	'url': 'http://www.raiplay.it/programmi/nondirloalmiocapo/',
	260	'info_dict': {
	261	'id': 'nondirloalmiocapo',
	262	'title': 'Non dirlo al mio capo',
	263	'description': 'md5:9f3d603b2947c1c7abb098f3b14fac86',
	264	},
	265	'playlist_mincount': 12,
	266	}]
	267
	268	def _real_extract(self, url):
	269	playlist_id = self._match_id(url)
	270
	271	webpage = self._download_webpage(url, playlist_id)
	272
	273	title = self._html_search_meta(
	274	('programma', 'nomeProgramma'), webpage, 'title')
	275	description = unescapeHTML(self._html_search_meta(
	276	('description', 'og:description'), webpage, 'description'))
1115271a S	277
	278	entries = []
	279	for mobj in re.finditer(
	280	r'<a\b[^>]+\bhref=(["\'])(?P<path>/raiplay/video/.+?)\1',
	281	webpage):
	282	video_url = urljoin(url, mobj.group('path'))
	283	entries.append(self.url_result(
	284	video_url, ie=RaiPlayIE.ie_key(),
	285	video_id=RaiPlayIE._match_id(video_url)))
	286
	287	return self.playlist_result(entries, playlist_id, title, description)
	288
	289
034a8849	290	class RaiIE(RaiBaseIE):
2b2da3ba	291	_VALID_URL = r'https?://[^/]+\.(?:rai\.(?:it\|tv)\|rainews\.it)/.+?-(?P<id>%s)(?:-.+?)?\.html' % RaiBaseIE._UUID_RE
51342717	292	_TESTS = [{
b8d8cced S	293	# var uniquename = "ContentItem-..."
b8d8cced S	294	# data-id="ContentItem-..."
51342717 T	295	'url': 'http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html',
	296	'info_dict': {
	297	'id': '04a9f4bd-b563-40cf-82a6-aad3529cb4a9',
	298	'ext': 'mp4',
	299	'title': 'TG PRIMO TEMPO',
b8d8cced	300	'thumbnail': r're:^https?://.*\.jpg$',
51342717	301	'duration': 1758,
b8d8cced	302	'upload_date': '20140612',
51342717 T	303	}
51342717 T	304	}, {
b8d8cced	305	# with ContentItem in many metas
51342717 T	306	'url': 'http://www.rainews.it/dl/rainews/media/Weekend-al-cinema-da-Hollywood-arriva-il-thriller-di-Tate-Taylor-La-ragazza-del-treno-1632c009-c843-4836-bb65-80c33084a64b.html',
	307	'info_dict': {
	308	'id': '1632c009-c843-4836-bb65-80c33084a64b',
	309	'ext': 'mp4',
b8d8cced S	310	'title': 'Weekend al cinema, da Hollywood arriva il thriller di Tate Taylor "La ragazza del treno"',
b8d8cced S	311	'description': 'I film in uscita questa settimana.',
51342717	312	'thumbnail': r're:^https?://.*\.png$',
b8d8cced S	313	'duration': 833,
b8d8cced S	314	'upload_date': '20161103',
51342717 T	315	}
51342717 T	316	}, {
b8d8cced	317	# with ContentItem in og:url
51342717 T	318	'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-efb17665-691c-45d5-a60c-5301333cbb0c.html',
	319	'md5': '11959b4e44fa74de47011b5799490adf',
	320	'info_dict': {
	321	'id': 'efb17665-691c-45d5-a60c-5301333cbb0c',
	322	'ext': 'mp4',
	323	'title': 'TG1 ore 20:00 del 03/11/2016',
b8d8cced	324	'description': 'TG1 edizione integrale ore 20:00 del giorno 03/11/2016',
51342717	325	'thumbnail': r're:^https?://.*\.jpg$',
b8d8cced	326	'duration': 2214,
51342717	327	'upload_date': '20161103',
51342717 T	328	}
51342717 T	329	}, {
b8d8cced	330	# drawMediaRaiTV(...)
51342717 T	331	'url': 'http://www.report.rai.it/dl/Report/puntata/ContentItem-0c7a664b-d0f4-4b2c-8835-3f82e46f433e.html',
	332	'md5': '2dd727e61114e1ee9c47f0da6914e178',
	333	'info_dict': {
	334	'id': '59d69d28-6bb6-409d-a4b5-ed44096560af',
	335	'ext': 'mp4',
	336	'title': 'Il pacco',
	337	'description': 'md5:4b1afae1364115ce5d78ed83cd2e5b3a',
b8d8cced	338	'thumbnail': r're:^https?://.*\.jpg$',
51342717	339	'upload_date': '20141221',
034a8849	340	},
51342717	341	}, {
b8d8cced S	342	# initEdizione('ContentItem-...'
	343	'url': 'http://www.tg1.rai.it/dl/tg1/2010/edizioni/ContentSet-9b6e0cba-4bef-4aef-8cf0-9f7f665b7dfb-tg1.html?item=undefined',
	344	'info_dict': {
	345	'id': 'c2187016-8484-4e3a-8ac8-35e475b07303',
	346	'ext': 'mp4',
	347	'title': r're:TG1 ore \d{2}:\d{2} del \d{2}/\d{2}/\d{4}',
	348	'duration': 2274,
	349	'upload_date': '20170401',
	350	},
	351	'skip': 'Changes daily',
	352	}, {
	353	# HDS live stream with only relinker URL
51342717	354	'url': 'http://www.rai.tv/dl/RaiTV/dirette/PublishingBlock-1912dbbf-3f96-44c3-b4cf-523681fbacbc.html?channel=EuroNews',
51342717 T	355	'info_dict': {
	356	'id': '1912dbbf-3f96-44c3-b4cf-523681fbacbc',
	357	'ext': 'flv',
	358	'title': 'EuroNews',
2b28b892	359	},
b8d8cced S	360	'params': {
b8d8cced S	361	'skip_download': True,
2b28b892	362	},
51342717	363	}, {
b8d8cced	364	# HLS live stream with ContentItem in og:url
51342717	365	'url': 'http://www.rainews.it/dl/rainews/live/ContentItem-3156f2f2-dc70-4953-8e2f-70d7489d4ce9.html',
51342717 T	366	'info_dict': {
	367	'id': '3156f2f2-dc70-4953-8e2f-70d7489d4ce9',
	368	'ext': 'mp4',
	369	'title': 'La diretta di Rainews24',
15e4b6b7	370	},
b8d8cced S	371	'params': {
	372	'skip_download': True,
	373	},
0c7b4f49 RA	374	}, {
	375	# Direct MMS URL
	376	'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-b63a4089-ac28-48cf-bca5-9f5b5bc46df5.html',
	377	'only_matching': True,
2b2da3ba S	378	}, {
	379	'url': 'https://www.rainews.it/tgr/marche/notiziari/video/2019/02/ContentItem-6ba945a2-889c-4a80-bdeb-8489c70a8db9.html',
	380	'only_matching': True,
51342717	381	}]
06d5556d	382
51342717 T	383	def _extract_from_content_id(self, content_id, url):
	384	media = self._download_json(
	385	'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-%s.html?json' % content_id,
	386	content_id, 'Downloading video JSON')
	387
b8d8cced S	388	title = media['name'].strip()
	389
	390	media_type = media['type']
	391	if 'Audio' in media_type:
	392	relinker_info = {
085d9dd9	393	'formats': [{
b8d8cced S	394	'format_id': media.get('formatoAudio'),
	395	'url': media['audioUrl'],
	396	'ext': media.get('formatoAudio'),
085d9dd9	397	}]
b8d8cced S	398	}
	399	elif 'Video' in media_type:
	400	relinker_info = self._extract_relinker_info(media['mediaUri'], content_id)
	401	else:
	402	raise ExtractorError('not a media file')
	403
	404	self._sort_formats(relinker_info['formats'])
	405
51342717 T	406	thumbnails = []
	407	for image_type in ('image', 'image_medium', 'image_300'):
	408	thumbnail_url = media.get(image_type)
	409	if thumbnail_url:
	410	thumbnails.append({
	411	'url': compat_urlparse.urljoin(url, thumbnail_url),
	412	})
	413
1b3feca0	414	subtitles = self._extract_subtitles(url, media.get('subtitlesUrl'))
51342717	415
b8d8cced	416	info = {
51342717	417	'id': content_id,
b8d8cced S	418	'title': title,
b8d8cced S	419	'description': strip_or_none(media.get('desc')),
51342717 T	420	'thumbnails': thumbnails,
	421	'uploader': media.get('author'),
	422	'upload_date': unified_strdate(media.get('date')),
	423	'duration': parse_duration(media.get('length')),
51342717 T	424	'subtitles': subtitles,
51342717 T	425	}
b8d8cced S	426
	427	info.update(relinker_info)
	428
	429	return info
	430
	431	def _real_extract(self, url):
	432	video_id = self._match_id(url)
	433
	434	webpage = self._download_webpage(url, video_id)
	435
	436	content_item_id = None
	437
	438	content_item_url = self._html_search_meta(
	439	('og:url', 'og:video', 'og:video:secure_url', 'twitter:url',
	440	'twitter:player', 'jsonlink'), webpage, default=None)
	441	if content_item_url:
	442	content_item_id = self._search_regex(
	443	r'ContentItem-(%s)' % self._UUID_RE, content_item_url,
	444	'content item id', default=None)
	445
	446	if not content_item_id:
	447	content_item_id = self._search_regex(
	448	r'''(?x)
	449	(?:
	450	(?:initEdizione\|drawMediaRaiTV)\(\|
	451	<(?:[^>]+\bdata-id\|var\s+uniquename)=
	452	)
	453	(["\'])
	454	(?:(?!\1).)*\bContentItem-(?P<id>%s)
	455	''' % self._UUID_RE,
	456	webpage, 'content item id', default=None, group='id')
	457
	458	content_item_ids = set()
361f293a S	459	if content_item_id:
361f293a S	460	content_item_ids.add(content_item_id)
b8d8cced S	461	if video_id not in content_item_ids:
	462	content_item_ids.add(video_id)
	463
	464	for content_item_id in content_item_ids:
	465	try:
	466	return self._extract_from_content_id(content_item_id, url)
	467	except GeoRestrictedError:
	468	raise
	469	except ExtractorError:
	470	pass
	471
	472	relinker_url = self._search_regex(
	473	r'''(?x)
	474	(?:
	475	var\s+videoURL\|
	476	mediaInfo\.mediaUri
	477	)\s=\s
	478	([\'"])
	479	(?P<url>
	480	(?:https?:)?
	481	//mediapolis(?:vod)?\.rai\.it/relinker/relinkerServlet\.htm\?
	482	(?:(?!\1).)*\bcont=(?:(?!\1).)+)\1
	483	''',
	484	webpage, 'relinker URL', group='url')
	485
	486	relinker_info = self._extract_relinker_info(
	487	urljoin(url, relinker_url), video_id)
	488	self._sort_formats(relinker_info['formats'])
	489
	490	title = self._search_regex(
	491	r'var\s+videoTitolo\s=\s([\'"])(?P<title>[^\'"]+)\1',
	492	webpage, 'title', group='title',
	493	default=None) or self._og_search_title(webpage)
	494
	495	info = {
	496	'id': video_id,
	497	'title': title,
	498	}
	499
	500	info.update(relinker_info)
	501
	502	return info