[yt-dlp.git] / yt_dlp / extractor / mediaset.py

import functools
import re

from .theplatform import ThePlatformBaseIE
from ..utils import (
    ExtractorError,
    GeoRestrictedError,
    int_or_none,
    OnDemandPagedList,
    try_get,
    urljoin,
    update_url_query,
)


class MediasetIE(ThePlatformBaseIE):
    _TP_TLD = 'eu'
    _GUID_RE = r'F[0-9A-Z]{15}'
    _VALID_URL = rf'''(?x)
                    (?:
                        mediaset:|
                        https?://
                            (?:\w+\.)+mediaset\.it/
                            (?:
                                (?:video|on-demand|movie)/(?:[^/]+/)+[^/]+_|
                                player/(?:v\d+/)?index\.html\?\S*?\bprogramGuid=
                            )
                    )(?P<id>{_GUID_RE})
                    '''

    _EMBED_REGEX = [
        rf'<iframe[^>]+src=[\'"](?P<url>(?:https?:)?//(?:\w+\.)+mediaset\.it/player/(?:v\d+/)?index\.html\?\S*?programGuid={_GUID_RE})[\'"&]'
    ]
    _TESTS = [{
        # full episode
        'url': 'https://mediasetinfinity.mediaset.it/video/mrwronglezionidamore/episodio-1_F310575103000102',
        'md5': 'a7e75c6384871f322adb781d3bd72c26',
        'info_dict': {
            'id': 'F310575103000102',
            'ext': 'mp4',
            'title': 'Episodio 1',
            'description': 'md5:e8017b7d7194e9bfb75299c2b8d81e02',
            'thumbnail': r're:^https?://.*\.jpg$',
            'duration': 2682.0,
            'upload_date': '20210530',
            'series': 'Mr Wrong - Lezioni d\'amore',
            'timestamp': 1622413946,
            'uploader': 'Canale 5',
            'uploader_id': 'C5',
            'season': 'Season 1',
            'episode': 'Episode 1',
            'season_number': 1,
            'episode_number': 1,
            'chapters': [{'start_time': 0.0, 'end_time': 439.88}, {'start_time': 439.88, 'end_time': 1685.84}, {'start_time': 1685.84, 'end_time': 2682.0}],
        },
    }, {
        'url': 'https://mediasetinfinity.mediaset.it/video/matrix/puntata-del-25-maggio_F309013801000501',
        'md5': '1276f966ac423d16ba255ce867de073e',
        'info_dict': {
            'id': 'F309013801000501',
            'ext': 'mp4',
            'title': 'Puntata del 25 maggio',
            'description': 'md5:ee2e456e3eb1dba5e814596655bb5296',
            'thumbnail': r're:^https?://.*\.jpg$',
            'duration': 6565.008,
            'upload_date': '20200903',
            'series': 'Matrix',
            'timestamp': 1599172492,
            'uploader': 'Canale 5',
            'uploader_id': 'C5',
            'season': 'Season 5',
            'episode': 'Episode 5',
            'season_number': 5,
            'episode_number': 5,
            'chapters': [{'start_time': 0.0, 'end_time': 3409.08}, {'start_time': 3409.08, 'end_time': 6565.008}],
        },
    }, {
        # DRM
        'url': 'https://mediasetinfinity.mediaset.it/movie/selvaggi/selvaggi_F006474501000101',
        'info_dict': {
            'id': 'F006474501000101',
            'ext': 'mp4',
            'title': 'Selvaggi',
            'description': 'md5:cfdedbbfdd12d4d0e5dcf1fa1b75284f',
            'thumbnail': r're:^https?://.*\.jpg$',
            'duration': 5233.01,
            'upload_date': '20210729',
            'timestamp': 1627594716,
            'uploader': 'Cine34',
            'uploader_id': 'B6',
            'chapters': [{'start_time': 0.0, 'end_time': 1938.56}, {'start_time': 1938.56, 'end_time': 5233.01}],
        },
        'params': {
            'ignore_no_formats_error': True,
        },
        'expected_warnings': [
            'None of the available releases match the specified AssetType, ProtectionScheme, and/or Format preferences',
            'Content behind paywall and DRM',
        ],
        'skip': True,
    }, {
        # old domain
        'url': 'https://www.mediasetplay.mediaset.it/video/mrwronglezionidamore/episodio-1_F310575103000102',
        'only_matching': True,
    }, {
        # iframe
        'url': 'https://static3.mediasetplay.mediaset.it/player/index.html?appKey=5ad3966b1de1c4000d5cec48&programGuid=FAFU000000665924&id=665924',
        'only_matching': True,
    }, {
        'url': 'mediaset:FAFU000000665924',
        'only_matching': True,
    }]
    _WEBPAGE_TESTS = [{
        # Mediaset embed
        'url': 'http://www.tgcom24.mediaset.it/politica/serracchiani-voglio-vivere-in-una-societa-aperta-reazioni-sproporzionate-_3071354-201702a.shtml',
        'info_dict': {
            'id': 'FD00000000004929',
            'ext': 'mp4',
            'title': 'Serracchiani: "Voglio vivere in una società aperta, con tutela del patto di fiducia"',
            'duration': 67.013,
            'thumbnail': r're:^https?://.*\.jpg$',
            'uploader': 'Mediaset Play',
            'uploader_id': 'QY',
            'upload_date': '20201005',
            'timestamp': 1601866168,
            'chapters': [],
        },
        'params': {
            'skip_download': True,
        },
        'skip': 'Dead link',
    }, {
        # WittyTV embed
        'url': 'https://www.wittytv.it/mauriziocostanzoshow/ultima-puntata-venerdi-25-novembre/',
        'info_dict': {
            'id': 'F312172801000801',
            'ext': 'mp4',
            'title': 'Ultima puntata - Venerdì 25 novembre',
            'description': 'Una serata all\'insegna della musica e del buonumore ma non priva di spunti di riflessione',
            'duration': 6203.01,
            'thumbnail': r're:^https?://.*\.jpg$',
            'uploader': 'Canale 5',
            'uploader_id': 'C5',
            'upload_date': '20221126',
            'timestamp': 1669428689,
            'chapters': list,
            'series': 'Maurizio Costanzo Show',
            'season': 'Season 12',
            'season_number': 12,
            'episode': 'Episode 8',
            'episode_number': 8,
        },
        'params': {
            'skip_download': True,
        }
    }]

    def _parse_smil_formats_and_subtitles(
            self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
        for video in smil.findall(self._xpath_ns('.//video', namespace)):
            video.attrib['src'] = re.sub(r'(https?://vod05)t(-mediaset-it\.akamaized\.net/.+?.mpd)\?.+', r'\1\2', video.attrib['src'])
        return super(MediasetIE, self)._parse_smil_formats_and_subtitles(
            smil, smil_url, video_id, namespace, f4m_params, transform_rtmp_url)

    def _check_drm_formats(self, tp_formats, video_id):
        has_nondrm, drm_manifest = False, ''
        for f in tp_formats:
            if '_sampleaes/' in (f.get('manifest_url') or ''):
                drm_manifest = drm_manifest or f['manifest_url']
                f['has_drm'] = True
            if not f.get('has_drm') and f.get('manifest_url'):
                has_nondrm = True

        nodrm_manifest = re.sub(r'_sampleaes/(\w+)_fp_', r'/\1_no_', drm_manifest)
        if has_nondrm or nodrm_manifest == drm_manifest:
            return

        tp_formats.extend(self._extract_m3u8_formats(
            nodrm_manifest, video_id, m3u8_id='hls', fatal=False) or [])

    def _real_extract(self, url):
        guid = self._match_id(url)
        tp_path = f'PR1GhC/media/guid/2702976343/{guid}'
        info = self._extract_theplatform_metadata(tp_path, guid)

        formats = []
        subtitles = {}
        first_e = geo_e = None
        asset_type = 'geoNo:HD,browser,geoIT|geoNo:HD,geoIT|geoNo:SD,browser,geoIT|geoNo:SD,geoIT|geoNo|HD|SD'
        # TODO: fixup ISM+none manifest URLs
        for f in ('MPEG4', 'MPEG-DASH', 'M3U'):
            try:
                tp_formats, tp_subtitles = self._extract_theplatform_smil(
                    update_url_query(f'http://link.theplatform.{self._TP_TLD}/s/{tp_path}', {
                        'mbr': 'true',
                        'formats': f,
                        'assetTypes': asset_type,
                    }), guid, f'Downloading {f.split("+")[0]} SMIL data')
            except ExtractorError as e:
                if e.orig_msg == 'None of the available releases match the specified AssetType, ProtectionScheme, and/or Format preferences':
                    e.orig_msg = 'This video is DRM protected'
                if not geo_e and isinstance(e, GeoRestrictedError):
                    geo_e = e
                if not first_e:
                    first_e = e
                continue
            self._check_drm_formats(tp_formats, guid)
            formats.extend(tp_formats)
            subtitles = self._merge_subtitles(subtitles, tp_subtitles)

        # check for errors and report them
        if (first_e or geo_e) and not formats:
            raise geo_e or first_e

        feed_data = self._download_json(
            f'https://feed.entertainment.tv.theplatform.eu/f/PR1GhC/mediaset-prod-all-programs-v2/guid/-/{guid}',
            guid, fatal=False)
        if feed_data:
            publish_info = feed_data.get('mediasetprogram$publishInfo') or {}
            thumbnails = feed_data.get('thumbnails') or {}
            thumbnail = None
            for key, value in thumbnails.items():
                if key.startswith('image_keyframe_poster-'):
                    thumbnail = value.get('url')
                    break

            info.update({
                'description': info.get('description') or feed_data.get('description') or feed_data.get('longDescription'),
                'uploader': publish_info.get('description'),
                'uploader_id': publish_info.get('channel'),
                'view_count': int_or_none(feed_data.get('mediasetprogram$numberOfViews')),
                'thumbnail': thumbnail,
            })

            if feed_data.get('programType') == 'episode':
                info.update({
                    'episode_number': int_or_none(
                        feed_data.get('tvSeasonEpisodeNumber')),
                    'season_number': int_or_none(
                        feed_data.get('tvSeasonNumber')),
                    'series': feed_data.get('mediasetprogram$brandTitle'),
                })

        info.update({
            'id': guid,
            'formats': formats,
            'subtitles': subtitles,
        })
        return info


class MediasetShowIE(MediasetIE):  # XXX: Do not subclass from concrete IE
    _VALID_URL = r'''(?x)
                    (?:
                        https?://
                            (\w+\.)+mediaset\.it/
                            (?:
                                (?:fiction|programmi-tv|serie-tv|kids)/(?:.+?/)?
                                    (?:[a-z-]+)_SE(?P<id>\d{12})
                                    (?:,ST(?P<st>\d{12}))?
                                    (?:,sb(?P<sb>\d{9}))?$
                            )
                    )
                    '''
    _TESTS = [{
        # TV Show webpage (general webpage)
        'url': 'https://mediasetinfinity.mediaset.it/programmi-tv/leiene/leiene_SE000000000061',
        'info_dict': {
            'id': '000000000061',
            'title': 'Le Iene 2022/2023',
        },
        'playlist_mincount': 6,
    }, {
        # TV Show webpage (specific season)
        'url': 'https://mediasetinfinity.mediaset.it/programmi-tv/leiene/leiene_SE000000000061,ST000000002763',
        'info_dict': {
            'id': '000000002763',
            'title': 'Le Iene 2021/2022',
        },
        'playlist_mincount': 7,
    }, {
        # TV Show specific playlist (with multiple pages)
        'url': 'https://mediasetinfinity.mediaset.it/programmi-tv/leiene/iservizi_SE000000000061,ST000000002763,sb100013375',
        'info_dict': {
            'id': '100013375',
            'title': 'I servizi',
        },
        'playlist_mincount': 50,
    }]

    _BY_SUBBRAND = 'https://feed.entertainment.tv.theplatform.eu/f/PR1GhC/mediaset-prod-all-programs-v2?byCustomValue={subBrandId}{%s}&sort=:publishInfo_lastPublished|desc,tvSeasonEpisodeNumber|desc&range=%d-%d'
    _PAGE_SIZE = 25

    def _fetch_page(self, sb, page):
        lower_limit = page * self._PAGE_SIZE + 1
        upper_limit = lower_limit + self._PAGE_SIZE - 1
        content = self._download_json(
            self._BY_SUBBRAND % (sb, lower_limit, upper_limit), sb)
        for entry in content.get('entries') or []:
            yield self.url_result(
                'mediaset:' + entry['guid'],
                playlist_title=entry['mediasetprogram$subBrandDescription'])

    def _real_extract(self, url):
        playlist_id, st, sb = self._match_valid_url(url).group('id', 'st', 'sb')
        if not sb:
            page = self._download_webpage(url, st or playlist_id)
            entries = [self.url_result(urljoin('https://mediasetinfinity.mediaset.it', url))
                       for url in re.findall(r'href="([^<>=]+SE\d{12},ST\d{12},sb\d{9})">[^<]+<', page)]
            title = self._html_extract_title(page).split('|')[0].strip()
            return self.playlist_result(entries, st or playlist_id, title)

        entries = OnDemandPagedList(
            functools.partial(self._fetch_page, sb),
            self._PAGE_SIZE)
        title = try_get(entries, lambda x: x[0]['playlist_title'])

        return self.playlist_result(entries, sb, title)
Commit	Line	Data
e6ff66ef	1	import functools
ca04de46 S	2	import re
ca04de46 S	3
38f1eb0a	4	from .theplatform import ThePlatformBaseIE
0de13634	5	from ..utils import (
38f1eb0a	6	ExtractorError,
22219f2d	7	GeoRestrictedError,
38f1eb0a	8	int_or_none,
e6ff66ef	9	OnDemandPagedList,
e6ff66ef	10	try_get,
e6ff66ef	11	urljoin,
38f1eb0a	12	update_url_query,
0de13634 T	13	)
	14
	15
38f1eb0a RA	16	class MediasetIE(ThePlatformBaseIE):
38f1eb0a RA	17	_TP_TLD = 'eu'
10dc8592	18	_GUID_RE = r'F[0-9A-Z]{15}'
10dc8592	19	_VALID_URL = rf'''(?x)
ca04de46 S	20	(?:
	21	mediaset:\|
	22	https?://
8102a599	23	(?:\w+\.)+mediaset\.it/
ca04de46	24	(?:
29f7c58a	25	(?:video\|on-demand\|movie)/(?:[^/]+/)+[^/]+_\|
10dc8592	26	player/(?:v\d+/)?index\.html\?\S*?\bprogramGuid=
ca04de46	27	)
10dc8592	28	)(?P<id>{_GUID_RE})
56f9c77f	29	'''
10dc8592	30
	31	_EMBED_REGEX = [
	32	rf'<iframe[^>]+src=[\'"](?P<url>(?:https?:)?//(?:\w+\.)+mediaset\.it/player/(?:v\d+/)?index\.html\?\S*?programGuid={_GUID_RE})[\'"&]'
	33	]
0de13634 T	34	_TESTS = [{
0de13634 T	35	# full episode
10dc8592	36	'url': 'https://mediasetinfinity.mediaset.it/video/mrwronglezionidamore/episodio-1_F310575103000102',
02226207	37	'md5': 'a7e75c6384871f322adb781d3bd72c26',
0de13634	38	'info_dict': {
02226207	39	'id': 'F310575103000102',
0de13634	40	'ext': 'mp4',
02226207	41	'title': 'Episodio 1',
22219f2d	42	'description': 'md5:e8017b7d7194e9bfb75299c2b8d81e02',
56f9c77f	43	'thumbnail': r're:^https?://.*\.jpg$',
02226207	44	'duration': 2682.0,
	45	'upload_date': '20210530',
	46	'series': 'Mr Wrong - Lezioni d\'amore',
	47	'timestamp': 1622413946,
	48	'uploader': 'Canale 5',
	49	'uploader_id': 'C5',
22219f2d	50	'season': 'Season 1',
	51	'episode': 'Episode 1',
	52	'season_number': 1,
	53	'episode_number': 1,
	54	'chapters': [{'start_time': 0.0, 'end_time': 439.88}, {'start_time': 439.88, 'end_time': 1685.84}, {'start_time': 1685.84, 'end_time': 2682.0}],
56f9c77f	55	},
9cf648c9	56	}, {
10dc8592	57	'url': 'https://mediasetinfinity.mediaset.it/video/matrix/puntata-del-25-maggio_F309013801000501',
49ca8db0	58	'md5': '1276f966ac423d16ba255ce867de073e',
9cf648c9	59	'info_dict': {
38f1eb0a	60	'id': 'F309013801000501',
9cf648c9 T	61	'ext': 'mp4',
9cf648c9 T	62	'title': 'Puntata del 25 maggio',
22219f2d	63	'description': 'md5:ee2e456e3eb1dba5e814596655bb5296',
9cf648c9	64	'thumbnail': r're:^https?://.*\.jpg$',
02226207	65	'duration': 6565.008,
02226207	66	'upload_date': '20200903',
9cf648c9	67	'series': 'Matrix',
02226207	68	'timestamp': 1599172492,
38f1eb0a RA	69	'uploader': 'Canale 5',
38f1eb0a RA	70	'uploader_id': 'C5',
22219f2d	71	'season': 'Season 5',
	72	'episode': 'Episode 5',
	73	'season_number': 5,
	74	'episode_number': 5,
	75	'chapters': [{'start_time': 0.0, 'end_time': 3409.08}, {'start_time': 3409.08, 'end_time': 6565.008}],
9cf648c9	76	},
ee57a19d	77	}, {
10dc8592	78	# DRM
10dc8592	79	'url': 'https://mediasetinfinity.mediaset.it/movie/selvaggi/selvaggi_F006474501000101',
22219f2d	80	'info_dict': {
	81	'id': 'F006474501000101',
	82	'ext': 'mp4',
	83	'title': 'Selvaggi',
	84	'description': 'md5:cfdedbbfdd12d4d0e5dcf1fa1b75284f',
	85	'thumbnail': r're:^https?://.*\.jpg$',
	86	'duration': 5233.01,
	87	'upload_date': '20210729',
	88	'timestamp': 1627594716,
	89	'uploader': 'Cine34',
	90	'uploader_id': 'B6',
	91	'chapters': [{'start_time': 0.0, 'end_time': 1938.56}, {'start_time': 1938.56, 'end_time': 5233.01}],
49ca8db0	92	},
10dc8592	93	'params': {
	94	'ignore_no_formats_error': True,
	95	},
	96	'expected_warnings': [
	97	'None of the available releases match the specified AssetType, ProtectionScheme, and/or Format preferences',
	98	'Content behind paywall and DRM',
	99	],
	100	'skip': True,
0de13634	101	}, {
10dc8592	102	# old domain
10dc8592	103	'url': 'https://www.mediasetplay.mediaset.it/video/mrwronglezionidamore/episodio-1_F310575103000102',
56f9c77f	104	'only_matching': True,
0de13634	105	}, {
10dc8592	106	# iframe
38f1eb0a	107	'url': 'https://static3.mediasetplay.mediaset.it/player/index.html?appKey=5ad3966b1de1c4000d5cec48&programGuid=FAFU000000665924&id=665924',
56f9c77f	108	'only_matching': True,
ca04de46	109	}, {
38f1eb0a	110	'url': 'mediaset:FAFU000000665924',
ca04de46	111	'only_matching': True,
10dc8592	112	}]
	113	_WEBPAGE_TESTS = [{
	114	# Mediaset embed
	115	'url': 'http://www.tgcom24.mediaset.it/politica/serracchiani-voglio-vivere-in-una-societa-aperta-reazioni-sproporzionate-_3071354-201702a.shtml',
	116	'info_dict': {
	117	'id': 'FD00000000004929',
	118	'ext': 'mp4',
	119	'title': 'Serracchiani: "Voglio vivere in una società aperta, con tutela del patto di fiducia"',
	120	'duration': 67.013,
	121	'thumbnail': r're:^https?://.*\.jpg$',
	122	'uploader': 'Mediaset Play',
	123	'uploader_id': 'QY',
	124	'upload_date': '20201005',
	125	'timestamp': 1601866168,
	126	'chapters': [],
	127	},
	128	'params': {
	129	'skip_download': True,
19c90e40	130	},
19c90e40	131	'skip': 'Dead link',
bf45295c	132	}, {
10dc8592	133	# WittyTV embed
	134	'url': 'https://www.wittytv.it/mauriziocostanzoshow/ultima-puntata-venerdi-25-novembre/',
	135	'info_dict': {
	136	'id': 'F312172801000801',
	137	'ext': 'mp4',
	138	'title': 'Ultima puntata - Venerdì 25 novembre',
	139	'description': 'Una serata all\'insegna della musica e del buonumore ma non priva di spunti di riflessione',
	140	'duration': 6203.01,
	141	'thumbnail': r're:^https?://.*\.jpg$',
	142	'uploader': 'Canale 5',
	143	'uploader_id': 'C5',
	144	'upload_date': '20221126',
	145	'timestamp': 1669428689,
	146	'chapters': list,
	147	'series': 'Maurizio Costanzo Show',
	148	'season': 'Season 12',
	149	'season_number': 12,
	150	'episode': 'Episode 8',
	151	'episode_number': 8,
	152	},
	153	'params': {
	154	'skip_download': True,
	155	}
0de13634 T	156	}]
0de13634 T	157
550e6541	158	def _parse_smil_formats_and_subtitles(
550e6541	159	self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
ef382405 RA	160	for video in smil.findall(self._xpath_ns('.//video', namespace)):
ef382405 RA	161	video.attrib['src'] = re.sub(r'(https?://vod05)t(-mediaset-it\.akamaized\.net/.+?.mpd)\?.+', r'\1\2', video.attrib['src'])
550e6541	162	return super(MediasetIE, self)._parse_smil_formats_and_subtitles(
550e6541	163	smil, smil_url, video_id, namespace, f4m_params, transform_rtmp_url)
ef382405	164
22219f2d	165	def _check_drm_formats(self, tp_formats, video_id):
	166	has_nondrm, drm_manifest = False, ''
	167	for f in tp_formats:
	168	if '_sampleaes/' in (f.get('manifest_url') or ''):
	169	drm_manifest = drm_manifest or f['manifest_url']
	170	f['has_drm'] = True
	171	if not f.get('has_drm') and f.get('manifest_url'):
	172	has_nondrm = True
	173
	174	nodrm_manifest = re.sub(r'_sampleaes/(\w+)_fp_', r'/\1_no_', drm_manifest)
	175	if has_nondrm or nodrm_manifest == drm_manifest:
	176	return
	177
	178	tp_formats.extend(self._extract_m3u8_formats(
	179	nodrm_manifest, video_id, m3u8_id='hls', fatal=False) or [])
	180
0de13634	181	def _real_extract(self, url):
38f1eb0a	182	guid = self._match_id(url)
10dc8592	183	tp_path = f'PR1GhC/media/guid/2702976343/{guid}'
38f1eb0a	184	info = self._extract_theplatform_metadata(tp_path, guid)
0de13634	185
56f9c77f	186	formats = []
38f1eb0a	187	subtitles = {}
22219f2d	188	first_e = geo_e = None
49ca8db0	189	asset_type = 'geoNo:HD,browser,geoIT\|geoNo:HD,geoIT\|geoNo:SD,browser,geoIT\|geoNo:SD,geoIT\|geoNo\|HD\|SD'
02226207	190	# TODO: fixup ISM+none manifest URLs
10dc8592	191	for f in ('MPEG4', 'MPEG-DASH', 'M3U'):
02226207	192	try:
02226207	193	tp_formats, tp_subtitles = self._extract_theplatform_smil(
10dc8592	194	update_url_query(f'http://link.theplatform.{self._TP_TLD}/s/{tp_path}', {
02226207	195	'mbr': 'true',
	196	'formats': f,
	197	'assetTypes': asset_type,
10dc8592	198	}), guid, f'Downloading {f.split("+")[0]} SMIL data')
02226207	199	except ExtractorError as e:
10dc8592	200	if e.orig_msg == 'None of the available releases match the specified AssetType, ProtectionScheme, and/or Format preferences':
10dc8592	201	e.orig_msg = 'This video is DRM protected'
22219f2d	202	if not geo_e and isinstance(e, GeoRestrictedError):
22219f2d	203	geo_e = e
02226207	204	if not first_e:
02226207	205	first_e = e
22219f2d	206	continue
22219f2d	207	self._check_drm_formats(tp_formats, guid)
02226207	208	formats.extend(tp_formats)
02226207	209	subtitles = self._merge_subtitles(subtitles, tp_subtitles)
22219f2d	210
	211	# check for errors and report them
	212	if (first_e or geo_e) and not formats:
	213	raise geo_e or first_e
	214
38f1eb0a	215	feed_data = self._download_json(
10dc8592	216	f'https://feed.entertainment.tv.theplatform.eu/f/PR1GhC/mediaset-prod-all-programs-v2/guid/-/{guid}',
02226207	217	guid, fatal=False)
38f1eb0a RA	218	if feed_data:
38f1eb0a RA	219	publish_info = feed_data.get('mediasetprogram$publishInfo') or {}
02226207	220	thumbnails = feed_data.get('thumbnails') or {}
	221	thumbnail = None
	222	for key, value in thumbnails.items():
	223	if key.startswith('image_keyframe_poster-'):
	224	thumbnail = value.get('url')
	225	break
	226
38f1eb0a	227	info.update({
22219f2d	228	'description': info.get('description') or feed_data.get('description') or feed_data.get('longDescription'),
38f1eb0a RA	229	'uploader': publish_info.get('description'),
	230	'uploader_id': publish_info.get('channel'),
	231	'view_count': int_or_none(feed_data.get('mediasetprogram$numberOfViews')),
02226207	232	'thumbnail': thumbnail,
38f1eb0a	233	})
0de13634	234
22219f2d	235	if feed_data.get('programType') == 'episode':
	236	info.update({
	237	'episode_number': int_or_none(
	238	feed_data.get('tvSeasonEpisodeNumber')),
	239	'season_number': int_or_none(
	240	feed_data.get('tvSeasonNumber')),
	241	'series': feed_data.get('mediasetprogram$brandTitle'),
	242	})
	243
38f1eb0a RA	244	info.update({
38f1eb0a RA	245	'id': guid,
56f9c77f	246	'formats': formats,
38f1eb0a RA	247	'subtitles': subtitles,
	248	})
	249	return info
e6ff66ef	250
e6ff66ef	251
6368e2e6	252	class MediasetShowIE(MediasetIE): # XXX: Do not subclass from concrete IE
e6ff66ef	253	_VALID_URL = r'''(?x)
	254	(?:
	255	https?://
8102a599	256	(\w+\.)+mediaset\.it/
e6ff66ef	257	(?:
22219f2d	258	(?:fiction\|programmi-tv\|serie-tv\|kids)/(?:.+?/)?
22219f2d	259	(?:[a-z-]+)_SE(?P<id>\d{12})
e6ff66ef	260	(?:,ST(?P<st>\d{12}))?
	261	(?:,sb(?P<sb>\d{9}))?$
	262	)
	263	)
	264	'''
	265	_TESTS = [{
22219f2d	266	# TV Show webpage (general webpage)
10dc8592	267	'url': 'https://mediasetinfinity.mediaset.it/programmi-tv/leiene/leiene_SE000000000061',
e6ff66ef	268	'info_dict': {
22219f2d	269	'id': '000000000061',
10dc8592	270	'title': 'Le Iene 2022/2023',
e6ff66ef	271	},
10dc8592	272	'playlist_mincount': 6,
e6ff66ef	273	}, {
22219f2d	274	# TV Show webpage (specific season)
10dc8592	275	'url': 'https://mediasetinfinity.mediaset.it/programmi-tv/leiene/leiene_SE000000000061,ST000000002763',
e6ff66ef	276	'info_dict': {
e6ff66ef	277	'id': '000000002763',
10dc8592	278	'title': 'Le Iene 2021/2022',
e6ff66ef	279	},
22219f2d	280	'playlist_mincount': 7,
e6ff66ef	281	}, {
e6ff66ef	282	# TV Show specific playlist (with multiple pages)
10dc8592	283	'url': 'https://mediasetinfinity.mediaset.it/programmi-tv/leiene/iservizi_SE000000000061,ST000000002763,sb100013375',
e6ff66ef	284	'info_dict': {
	285	'id': '100013375',
	286	'title': 'I servizi',
	287	},
22219f2d	288	'playlist_mincount': 50,
e6ff66ef	289	}]
	290
	291	_BY_SUBBRAND = 'https://feed.entertainment.tv.theplatform.eu/f/PR1GhC/mediaset-prod-all-programs-v2?byCustomValue={subBrandId}{%s}&sort=:publishInfo_lastPublished\|desc,tvSeasonEpisodeNumber\|desc&range=%d-%d'
	292	_PAGE_SIZE = 25
	293
	294	def _fetch_page(self, sb, page):
	295	lower_limit = page * self._PAGE_SIZE + 1
	296	upper_limit = lower_limit + self._PAGE_SIZE - 1
	297	content = self._download_json(
	298	self._BY_SUBBRAND % (sb, lower_limit, upper_limit), sb)
	299	for entry in content.get('entries') or []:
	300	yield self.url_result(
	301	'mediaset:' + entry['guid'],
	302	playlist_title=entry['mediasetprogram$subBrandDescription'])
	303
	304	def _real_extract(self, url):
	305	playlist_id, st, sb = self._match_valid_url(url).group('id', 'st', 'sb')
	306	if not sb:
22219f2d	307	page = self._download_webpage(url, st or playlist_id)
10dc8592	308	entries = [self.url_result(urljoin('https://mediasetinfinity.mediaset.it', url))
e6ff66ef	309	for url in re.findall(r'href="([^<>=]+SE\d{12},ST\d{12},sb\d{9})">[^<]+<', page)]
10dc8592	310	title = self._html_extract_title(page).split('\|')[0].strip()
e6ff66ef	311	return self.playlist_result(entries, st or playlist_id, title)
	312
	313	entries = OnDemandPagedList(
	314	functools.partial(self._fetch_page, sb),
	315	self._PAGE_SIZE)
	316	title = try_get(entries, lambda x: x[0]['playlist_title'])
	317
	318	return self.playlist_result(entries, sb, title)