[yt-dlp.git] / yt_dlp / extractor / mediaset.py

import functools
import re

from .theplatform import ThePlatformBaseIE
from ..utils import (
    ExtractorError,
    GeoRestrictedError,
    OnDemandPagedList,
    int_or_none,
    try_get,
    update_url_query,
    urljoin,
)


class MediasetIE(ThePlatformBaseIE):
    _TP_TLD = 'eu'
    _GUID_RE = r'F[0-9A-Z]{15}'
    _VALID_URL = rf'''(?x)
                    (?:
                        mediaset:|
                        https?://
                            (?:\w+\.)+mediaset\.it/
                            (?:
                                (?:video|on-demand|movie)/(?:[^/]+/)+[^/]+_|
                                player/(?:v\d+/)?index\.html\?\S*?\bprogramGuid=
                            )
                    )(?P<id>{_GUID_RE})
                    '''

    _EMBED_REGEX = [
        rf'<iframe[^>]+src=[\'"](?P<url>(?:https?:)?//(?:\w+\.)+mediaset\.it/player/(?:v\d+/)?index\.html\?\S*?programGuid={_GUID_RE})[\'"&]'
    ]
    _TESTS = [{
        # full episode
        'url': 'https://mediasetinfinity.mediaset.it/video/mrwronglezionidamore/episodio-1_F310575103000102',
        'md5': 'a7e75c6384871f322adb781d3bd72c26',
        'info_dict': {
            'id': 'F310575103000102',
            'ext': 'mp4',
            'title': 'Episodio 1',
            'description': 'md5:e8017b7d7194e9bfb75299c2b8d81e02',
            'thumbnail': r're:^https?://.*\.jpg$',
            'duration': 2682.0,
            'upload_date': '20210530',
            'series': 'Mr Wrong - Lezioni d\'amore',
            'timestamp': 1622413946,
            'uploader': 'Canale 5',
            'uploader_id': 'C5',
            'season': 'Season 1',
            'episode': 'Episode 1',
            'season_number': 1,
            'episode_number': 1,
            'chapters': [{'start_time': 0.0, 'end_time': 439.88}, {'start_time': 439.88, 'end_time': 1685.84}, {'start_time': 1685.84, 'end_time': 2682.0}],
        },
    }, {
        'url': 'https://mediasetinfinity.mediaset.it/video/matrix/puntata-del-25-maggio_F309013801000501',
        'md5': '1276f966ac423d16ba255ce867de073e',
        'info_dict': {
            'id': 'F309013801000501',
            'ext': 'mp4',
            'title': 'Puntata del 25 maggio',
            'description': 'md5:ee2e456e3eb1dba5e814596655bb5296',
            'thumbnail': r're:^https?://.*\.jpg$',
            'duration': 6565.008,
            'upload_date': '20200903',
            'series': 'Matrix',
            'timestamp': 1599172492,
            'uploader': 'Canale 5',
            'uploader_id': 'C5',
            'season': 'Season 5',
            'episode': 'Episode 5',
            'season_number': 5,
            'episode_number': 5,
            'chapters': [{'start_time': 0.0, 'end_time': 3409.08}, {'start_time': 3409.08, 'end_time': 6565.008}],
            'categories': ['Informazione'],
        },
    }, {
        # DRM
        'url': 'https://mediasetinfinity.mediaset.it/movie/selvaggi/selvaggi_F006474501000101',
        'info_dict': {
            'id': 'F006474501000101',
            'ext': 'mp4',
            'title': 'Selvaggi',
            'description': 'md5:cfdedbbfdd12d4d0e5dcf1fa1b75284f',
            'thumbnail': r're:^https?://.*\.jpg$',
            'duration': 5233.01,
            'upload_date': '20210729',
            'timestamp': 1627594716,
            'uploader': 'Cine34',
            'uploader_id': 'B6',
            'chapters': [{'start_time': 0.0, 'end_time': 1938.56}, {'start_time': 1938.56, 'end_time': 5233.01}],
        },
        'params': {
            'ignore_no_formats_error': True,
        },
        'expected_warnings': [
            'None of the available releases match the specified AssetType, ProtectionScheme, and/or Format preferences',
            'Content behind paywall and DRM',
        ],
        'skip': True,
    }, {
        # old domain
        'url': 'https://www.mediasetplay.mediaset.it/video/mrwronglezionidamore/episodio-1_F310575103000102',
        'only_matching': True,
    }, {
        # iframe
        'url': 'https://static3.mediasetplay.mediaset.it/player/index.html?appKey=5ad3966b1de1c4000d5cec48&programGuid=FAFU000000665924&id=665924',
        'only_matching': True,
    }, {
        'url': 'mediaset:FAFU000000665924',
        'only_matching': True,
    }]
    _WEBPAGE_TESTS = [{
        # Mediaset embed
        'url': 'http://www.tgcom24.mediaset.it/politica/serracchiani-voglio-vivere-in-una-societa-aperta-reazioni-sproporzionate-_3071354-201702a.shtml',
        'info_dict': {
            'id': 'FD00000000004929',
            'ext': 'mp4',
            'title': 'Serracchiani: "Voglio vivere in una società aperta, con tutela del patto di fiducia"',
            'duration': 67.013,
            'thumbnail': r're:^https?://.*\.jpg$',
            'uploader': 'Mediaset Play',
            'uploader_id': 'QY',
            'upload_date': '20201005',
            'timestamp': 1601866168,
            'chapters': [],
        },
        'params': {
            'skip_download': True,
        },
        'skip': 'Dead link',
    }, {
        # WittyTV embed
        'url': 'https://www.wittytv.it/mauriziocostanzoshow/ultima-puntata-venerdi-25-novembre/',
        'info_dict': {
            'id': 'F312172801000801',
            'ext': 'mp4',
            'title': 'Ultima puntata - Venerdì 25 novembre',
            'description': 'Una serata all\'insegna della musica e del buonumore ma non priva di spunti di riflessione',
            'duration': 6203.01,
            'thumbnail': r're:^https?://.*\.jpg$',
            'uploader': 'Canale 5',
            'uploader_id': 'C5',
            'upload_date': '20221126',
            'timestamp': 1669428689,
            'chapters': list,
            'series': 'Maurizio Costanzo Show',
            'season': 'Season 12',
            'season_number': 12,
            'episode': 'Episode 8',
            'episode_number': 8,
            'categories': ['Intrattenimento'],
        },
        'params': {
            'skip_download': True,
        }
    }]

    def _parse_smil_formats_and_subtitles(
            self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
        for video in smil.findall(self._xpath_ns('.//video', namespace)):
            video.attrib['src'] = re.sub(r'(https?://vod05)t(-mediaset-it\.akamaized\.net/.+?.mpd)\?.+', r'\1\2', video.attrib['src'])
        return super(MediasetIE, self)._parse_smil_formats_and_subtitles(
            smil, smil_url, video_id, namespace, f4m_params, transform_rtmp_url)

    def _check_drm_formats(self, tp_formats, video_id):
        has_nondrm, drm_manifest = False, ''
        for f in tp_formats:
            if '_sampleaes/' in (f.get('manifest_url') or ''):
                drm_manifest = drm_manifest or f['manifest_url']
                f['has_drm'] = True
            if not f.get('has_drm') and f.get('manifest_url'):
                has_nondrm = True

        nodrm_manifest = re.sub(r'_sampleaes/(\w+)_fp_', r'/\1_no_', drm_manifest)
        if has_nondrm or nodrm_manifest == drm_manifest:
            return

        tp_formats.extend(self._extract_m3u8_formats(
            nodrm_manifest, video_id, m3u8_id='hls', fatal=False) or [])

    def _real_extract(self, url):
        guid = self._match_id(url)
        tp_path = f'PR1GhC/media/guid/2702976343/{guid}'
        info = self._extract_theplatform_metadata(tp_path, guid)

        formats = []
        subtitles = {}
        first_e = geo_e = None
        asset_type = 'geoNo:HD,browser,geoIT|geoNo:HD,geoIT|geoNo:SD,browser,geoIT|geoNo:SD,geoIT|geoNo|HD|SD'
        # TODO: fixup ISM+none manifest URLs
        for f in ('MPEG4', 'MPEG-DASH', 'M3U'):
            try:
                tp_formats, tp_subtitles = self._extract_theplatform_smil(
                    update_url_query(f'http://link.theplatform.{self._TP_TLD}/s/{tp_path}', {
                        'mbr': 'true',
                        'formats': f,
                        'assetTypes': asset_type,
                    }), guid, f'Downloading {f.split("+")[0]} SMIL data')
            except ExtractorError as e:
                if e.orig_msg == 'None of the available releases match the specified AssetType, ProtectionScheme, and/or Format preferences':
                    e.orig_msg = 'This video is DRM protected'
                if not geo_e and isinstance(e, GeoRestrictedError):
                    geo_e = e
                if not first_e:
                    first_e = e
                continue
            self._check_drm_formats(tp_formats, guid)
            formats.extend(tp_formats)
            subtitles = self._merge_subtitles(subtitles, tp_subtitles)

        # check for errors and report them
        if (first_e or geo_e) and not formats:
            raise geo_e or first_e

        feed_data = self._download_json(
            f'https://feed.entertainment.tv.theplatform.eu/f/PR1GhC/mediaset-prod-all-programs-v2/guid/-/{guid}',
            guid, fatal=False)
        if feed_data:
            publish_info = feed_data.get('mediasetprogram$publishInfo') or {}
            thumbnails = feed_data.get('thumbnails') or {}
            thumbnail = None
            for key, value in thumbnails.items():
                if key.startswith('image_keyframe_poster-'):
                    thumbnail = value.get('url')
                    break

            info.update({
                'description': info.get('description') or feed_data.get('description') or feed_data.get('longDescription'),
                'uploader': publish_info.get('description'),
                'uploader_id': publish_info.get('channel'),
                'view_count': int_or_none(feed_data.get('mediasetprogram$numberOfViews')),
                'thumbnail': thumbnail,
            })

            if feed_data.get('programType') == 'episode':
                info.update({
                    'episode_number': int_or_none(
                        feed_data.get('tvSeasonEpisodeNumber')),
                    'season_number': int_or_none(
                        feed_data.get('tvSeasonNumber')),
                    'series': feed_data.get('mediasetprogram$brandTitle'),
                })

        info.update({
            'id': guid,
            'formats': formats,
            'subtitles': subtitles,
        })
        return info


class MediasetShowIE(MediasetIE):  # XXX: Do not subclass from concrete IE
    _VALID_URL = r'''(?x)
                    (?:
                        https?://
                            (\w+\.)+mediaset\.it/
                            (?:
                                (?:fiction|programmi-tv|serie-tv|kids)/(?:.+?/)?
                                    (?:[a-z-]+)_SE(?P<id>\d{12})
                                    (?:,ST(?P<st>\d{12}))?
                                    (?:,sb(?P<sb>\d{9}))?$
                            )
                    )
                    '''
    _TESTS = [{
        # TV Show webpage (general webpage)
        'url': 'https://mediasetinfinity.mediaset.it/programmi-tv/leiene/leiene_SE000000000061',
        'info_dict': {
            'id': '000000000061',
            'title': 'Le Iene 2022/2023',
        },
        'playlist_mincount': 6,
    }, {
        # TV Show webpage (specific season)
        'url': 'https://mediasetinfinity.mediaset.it/programmi-tv/leiene/leiene_SE000000000061,ST000000002763',
        'info_dict': {
            'id': '000000002763',
            'title': 'Le Iene 2021/2022',
        },
        'playlist_mincount': 7,
    }, {
        # TV Show specific playlist (with multiple pages)
        'url': 'https://mediasetinfinity.mediaset.it/programmi-tv/leiene/iservizi_SE000000000061,ST000000002763,sb100013375',
        'info_dict': {
            'id': '100013375',
            'title': 'I servizi',
        },
        'playlist_mincount': 50,
    }]

    _BY_SUBBRAND = 'https://feed.entertainment.tv.theplatform.eu/f/PR1GhC/mediaset-prod-all-programs-v2?byCustomValue={subBrandId}{%s}&sort=:publishInfo_lastPublished|desc,tvSeasonEpisodeNumber|desc&range=%d-%d'
    _PAGE_SIZE = 25

    def _fetch_page(self, sb, page):
        lower_limit = page * self._PAGE_SIZE + 1
        upper_limit = lower_limit + self._PAGE_SIZE - 1
        content = self._download_json(
            self._BY_SUBBRAND % (sb, lower_limit, upper_limit), sb)
        for entry in content.get('entries') or []:
            yield self.url_result(
                'mediaset:' + entry['guid'],
                playlist_title=entry['mediasetprogram$subBrandDescription'])

    def _real_extract(self, url):
        playlist_id, st, sb = self._match_valid_url(url).group('id', 'st', 'sb')
        if not sb:
            page = self._download_webpage(url, st or playlist_id)
            entries = [self.url_result(urljoin('https://mediasetinfinity.mediaset.it', url))
                       for url in re.findall(r'href="([^<>=]+SE\d{12},ST\d{12},sb\d{9})">[^<]+<', page)]
            title = self._html_extract_title(page).split('|')[0].strip()
            return self.playlist_result(entries, st or playlist_id, title)

        entries = OnDemandPagedList(
            functools.partial(self._fetch_page, sb),
            self._PAGE_SIZE)
        title = try_get(entries, lambda x: x[0]['playlist_title'])

        return self.playlist_result(entries, sb, title)
Commit	Line	Data
e6ff66ef	1	import functools
ca04de46 S	2	import re
ca04de46 S	3
38f1eb0a	4	from .theplatform import ThePlatformBaseIE
0de13634	5	from ..utils import (
38f1eb0a	6	ExtractorError,
22219f2d	7	GeoRestrictedError,
e6ff66ef	8	OnDemandPagedList,
e897bd82	9	int_or_none,
e6ff66ef	10	try_get,
38f1eb0a	11	update_url_query,
e897bd82	12	urljoin,
0de13634 T	13	)
	14
	15
38f1eb0a RA	16	class MediasetIE(ThePlatformBaseIE):
38f1eb0a RA	17	_TP_TLD = 'eu'
10dc8592	18	_GUID_RE = r'F[0-9A-Z]{15}'
10dc8592	19	_VALID_URL = rf'''(?x)
ca04de46 S	20	(?:
	21	mediaset:\|
	22	https?://
8102a599	23	(?:\w+\.)+mediaset\.it/
ca04de46	24	(?:
29f7c58a	25	(?:video\|on-demand\|movie)/(?:[^/]+/)+[^/]+_\|
10dc8592	26	player/(?:v\d+/)?index\.html\?\S*?\bprogramGuid=
ca04de46	27	)
10dc8592	28	)(?P<id>{_GUID_RE})
56f9c77f	29	'''
10dc8592	30
	31	_EMBED_REGEX = [
	32	rf'<iframe[^>]+src=[\'"](?P<url>(?:https?:)?//(?:\w+\.)+mediaset\.it/player/(?:v\d+/)?index\.html\?\S*?programGuid={_GUID_RE})[\'"&]'
	33	]
0de13634 T	34	_TESTS = [{
0de13634 T	35	# full episode
10dc8592	36	'url': 'https://mediasetinfinity.mediaset.it/video/mrwronglezionidamore/episodio-1_F310575103000102',
02226207	37	'md5': 'a7e75c6384871f322adb781d3bd72c26',
0de13634	38	'info_dict': {
02226207	39	'id': 'F310575103000102',
0de13634	40	'ext': 'mp4',
02226207	41	'title': 'Episodio 1',
22219f2d	42	'description': 'md5:e8017b7d7194e9bfb75299c2b8d81e02',
56f9c77f	43	'thumbnail': r're:^https?://.*\.jpg$',
02226207	44	'duration': 2682.0,
	45	'upload_date': '20210530',
	46	'series': 'Mr Wrong - Lezioni d\'amore',
	47	'timestamp': 1622413946,
	48	'uploader': 'Canale 5',
	49	'uploader_id': 'C5',
22219f2d	50	'season': 'Season 1',
	51	'episode': 'Episode 1',
	52	'season_number': 1,
	53	'episode_number': 1,
	54	'chapters': [{'start_time': 0.0, 'end_time': 439.88}, {'start_time': 439.88, 'end_time': 1685.84}, {'start_time': 1685.84, 'end_time': 2682.0}],
56f9c77f	55	},
9cf648c9	56	}, {
10dc8592	57	'url': 'https://mediasetinfinity.mediaset.it/video/matrix/puntata-del-25-maggio_F309013801000501',
49ca8db0	58	'md5': '1276f966ac423d16ba255ce867de073e',
9cf648c9	59	'info_dict': {
38f1eb0a	60	'id': 'F309013801000501',
9cf648c9 T	61	'ext': 'mp4',
9cf648c9 T	62	'title': 'Puntata del 25 maggio',
22219f2d	63	'description': 'md5:ee2e456e3eb1dba5e814596655bb5296',
9cf648c9	64	'thumbnail': r're:^https?://.*\.jpg$',
02226207	65	'duration': 6565.008,
02226207	66	'upload_date': '20200903',
9cf648c9	67	'series': 'Matrix',
02226207	68	'timestamp': 1599172492,
38f1eb0a RA	69	'uploader': 'Canale 5',
38f1eb0a RA	70	'uploader_id': 'C5',
22219f2d	71	'season': 'Season 5',
	72	'episode': 'Episode 5',
	73	'season_number': 5,
	74	'episode_number': 5,
	75	'chapters': [{'start_time': 0.0, 'end_time': 3409.08}, {'start_time': 3409.08, 'end_time': 6565.008}],
7e09c147	76	'categories': ['Informazione'],
9cf648c9	77	},
ee57a19d	78	}, {
10dc8592	79	# DRM
10dc8592	80	'url': 'https://mediasetinfinity.mediaset.it/movie/selvaggi/selvaggi_F006474501000101',
22219f2d	81	'info_dict': {
	82	'id': 'F006474501000101',
	83	'ext': 'mp4',
	84	'title': 'Selvaggi',
	85	'description': 'md5:cfdedbbfdd12d4d0e5dcf1fa1b75284f',
	86	'thumbnail': r're:^https?://.*\.jpg$',
	87	'duration': 5233.01,
	88	'upload_date': '20210729',
	89	'timestamp': 1627594716,
	90	'uploader': 'Cine34',
	91	'uploader_id': 'B6',
	92	'chapters': [{'start_time': 0.0, 'end_time': 1938.56}, {'start_time': 1938.56, 'end_time': 5233.01}],
49ca8db0	93	},
10dc8592	94	'params': {
	95	'ignore_no_formats_error': True,
	96	},
	97	'expected_warnings': [
	98	'None of the available releases match the specified AssetType, ProtectionScheme, and/or Format preferences',
	99	'Content behind paywall and DRM',
	100	],
	101	'skip': True,
0de13634	102	}, {
10dc8592	103	# old domain
10dc8592	104	'url': 'https://www.mediasetplay.mediaset.it/video/mrwronglezionidamore/episodio-1_F310575103000102',
56f9c77f	105	'only_matching': True,
0de13634	106	}, {
10dc8592	107	# iframe
38f1eb0a	108	'url': 'https://static3.mediasetplay.mediaset.it/player/index.html?appKey=5ad3966b1de1c4000d5cec48&programGuid=FAFU000000665924&id=665924',
56f9c77f	109	'only_matching': True,
ca04de46	110	}, {
38f1eb0a	111	'url': 'mediaset:FAFU000000665924',
ca04de46	112	'only_matching': True,
10dc8592	113	}]
	114	_WEBPAGE_TESTS = [{
	115	# Mediaset embed
	116	'url': 'http://www.tgcom24.mediaset.it/politica/serracchiani-voglio-vivere-in-una-societa-aperta-reazioni-sproporzionate-_3071354-201702a.shtml',
	117	'info_dict': {
	118	'id': 'FD00000000004929',
	119	'ext': 'mp4',
	120	'title': 'Serracchiani: "Voglio vivere in una società aperta, con tutela del patto di fiducia"',
	121	'duration': 67.013,
	122	'thumbnail': r're:^https?://.*\.jpg$',
	123	'uploader': 'Mediaset Play',
	124	'uploader_id': 'QY',
	125	'upload_date': '20201005',
	126	'timestamp': 1601866168,
	127	'chapters': [],
	128	},
	129	'params': {
	130	'skip_download': True,
19c90e40	131	},
19c90e40	132	'skip': 'Dead link',
bf45295c	133	}, {
10dc8592	134	# WittyTV embed
	135	'url': 'https://www.wittytv.it/mauriziocostanzoshow/ultima-puntata-venerdi-25-novembre/',
	136	'info_dict': {
	137	'id': 'F312172801000801',
	138	'ext': 'mp4',
	139	'title': 'Ultima puntata - Venerdì 25 novembre',
	140	'description': 'Una serata all\'insegna della musica e del buonumore ma non priva di spunti di riflessione',
	141	'duration': 6203.01,
	142	'thumbnail': r're:^https?://.*\.jpg$',
	143	'uploader': 'Canale 5',
	144	'uploader_id': 'C5',
	145	'upload_date': '20221126',
	146	'timestamp': 1669428689,
	147	'chapters': list,
	148	'series': 'Maurizio Costanzo Show',
	149	'season': 'Season 12',
	150	'season_number': 12,
	151	'episode': 'Episode 8',
	152	'episode_number': 8,
7e09c147	153	'categories': ['Intrattenimento'],
10dc8592	154	},
	155	'params': {
	156	'skip_download': True,
	157	}
0de13634 T	158	}]
0de13634 T	159
550e6541	160	def _parse_smil_formats_and_subtitles(
550e6541	161	self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
ef382405 RA	162	for video in smil.findall(self._xpath_ns('.//video', namespace)):
ef382405 RA	163	video.attrib['src'] = re.sub(r'(https?://vod05)t(-mediaset-it\.akamaized\.net/.+?.mpd)\?.+', r'\1\2', video.attrib['src'])
550e6541	164	return super(MediasetIE, self)._parse_smil_formats_and_subtitles(
550e6541	165	smil, smil_url, video_id, namespace, f4m_params, transform_rtmp_url)
ef382405	166
22219f2d	167	def _check_drm_formats(self, tp_formats, video_id):
	168	has_nondrm, drm_manifest = False, ''
	169	for f in tp_formats:
	170	if '_sampleaes/' in (f.get('manifest_url') or ''):
	171	drm_manifest = drm_manifest or f['manifest_url']
	172	f['has_drm'] = True
	173	if not f.get('has_drm') and f.get('manifest_url'):
	174	has_nondrm = True
	175
	176	nodrm_manifest = re.sub(r'_sampleaes/(\w+)_fp_', r'/\1_no_', drm_manifest)
	177	if has_nondrm or nodrm_manifest == drm_manifest:
	178	return
	179
	180	tp_formats.extend(self._extract_m3u8_formats(
	181	nodrm_manifest, video_id, m3u8_id='hls', fatal=False) or [])
	182
0de13634	183	def _real_extract(self, url):
38f1eb0a	184	guid = self._match_id(url)
10dc8592	185	tp_path = f'PR1GhC/media/guid/2702976343/{guid}'
38f1eb0a	186	info = self._extract_theplatform_metadata(tp_path, guid)
0de13634	187
56f9c77f	188	formats = []
38f1eb0a	189	subtitles = {}
22219f2d	190	first_e = geo_e = None
49ca8db0	191	asset_type = 'geoNo:HD,browser,geoIT\|geoNo:HD,geoIT\|geoNo:SD,browser,geoIT\|geoNo:SD,geoIT\|geoNo\|HD\|SD'
02226207	192	# TODO: fixup ISM+none manifest URLs
10dc8592	193	for f in ('MPEG4', 'MPEG-DASH', 'M3U'):
02226207	194	try:
02226207	195	tp_formats, tp_subtitles = self._extract_theplatform_smil(
10dc8592	196	update_url_query(f'http://link.theplatform.{self._TP_TLD}/s/{tp_path}', {
02226207	197	'mbr': 'true',
	198	'formats': f,
	199	'assetTypes': asset_type,
10dc8592	200	}), guid, f'Downloading {f.split("+")[0]} SMIL data')
02226207	201	except ExtractorError as e:
10dc8592	202	if e.orig_msg == 'None of the available releases match the specified AssetType, ProtectionScheme, and/or Format preferences':
10dc8592	203	e.orig_msg = 'This video is DRM protected'
22219f2d	204	if not geo_e and isinstance(e, GeoRestrictedError):
22219f2d	205	geo_e = e
02226207	206	if not first_e:
02226207	207	first_e = e
22219f2d	208	continue
22219f2d	209	self._check_drm_formats(tp_formats, guid)
02226207	210	formats.extend(tp_formats)
02226207	211	subtitles = self._merge_subtitles(subtitles, tp_subtitles)
22219f2d	212
	213	# check for errors and report them
	214	if (first_e or geo_e) and not formats:
	215	raise geo_e or first_e
	216
38f1eb0a	217	feed_data = self._download_json(
10dc8592	218	f'https://feed.entertainment.tv.theplatform.eu/f/PR1GhC/mediaset-prod-all-programs-v2/guid/-/{guid}',
02226207	219	guid, fatal=False)
38f1eb0a RA	220	if feed_data:
38f1eb0a RA	221	publish_info = feed_data.get('mediasetprogram$publishInfo') or {}
02226207	222	thumbnails = feed_data.get('thumbnails') or {}
	223	thumbnail = None
	224	for key, value in thumbnails.items():
	225	if key.startswith('image_keyframe_poster-'):
	226	thumbnail = value.get('url')
	227	break
	228
38f1eb0a	229	info.update({
22219f2d	230	'description': info.get('description') or feed_data.get('description') or feed_data.get('longDescription'),
38f1eb0a RA	231	'uploader': publish_info.get('description'),
	232	'uploader_id': publish_info.get('channel'),
	233	'view_count': int_or_none(feed_data.get('mediasetprogram$numberOfViews')),
02226207	234	'thumbnail': thumbnail,
38f1eb0a	235	})
0de13634	236
22219f2d	237	if feed_data.get('programType') == 'episode':
	238	info.update({
	239	'episode_number': int_or_none(
	240	feed_data.get('tvSeasonEpisodeNumber')),
	241	'season_number': int_or_none(
	242	feed_data.get('tvSeasonNumber')),
	243	'series': feed_data.get('mediasetprogram$brandTitle'),
	244	})
	245
38f1eb0a RA	246	info.update({
38f1eb0a RA	247	'id': guid,
56f9c77f	248	'formats': formats,
38f1eb0a RA	249	'subtitles': subtitles,
	250	})
	251	return info
e6ff66ef	252
e6ff66ef	253
6368e2e6	254	class MediasetShowIE(MediasetIE): # XXX: Do not subclass from concrete IE
e6ff66ef	255	_VALID_URL = r'''(?x)
	256	(?:
	257	https?://
8102a599	258	(\w+\.)+mediaset\.it/
e6ff66ef	259	(?:
22219f2d	260	(?:fiction\|programmi-tv\|serie-tv\|kids)/(?:.+?/)?
22219f2d	261	(?:[a-z-]+)_SE(?P<id>\d{12})
e6ff66ef	262	(?:,ST(?P<st>\d{12}))?
	263	(?:,sb(?P<sb>\d{9}))?$
	264	)
	265	)
	266	'''
	267	_TESTS = [{
22219f2d	268	# TV Show webpage (general webpage)
10dc8592	269	'url': 'https://mediasetinfinity.mediaset.it/programmi-tv/leiene/leiene_SE000000000061',
e6ff66ef	270	'info_dict': {
22219f2d	271	'id': '000000000061',
10dc8592	272	'title': 'Le Iene 2022/2023',
e6ff66ef	273	},
10dc8592	274	'playlist_mincount': 6,
e6ff66ef	275	}, {
22219f2d	276	# TV Show webpage (specific season)
10dc8592	277	'url': 'https://mediasetinfinity.mediaset.it/programmi-tv/leiene/leiene_SE000000000061,ST000000002763',
e6ff66ef	278	'info_dict': {
e6ff66ef	279	'id': '000000002763',
10dc8592	280	'title': 'Le Iene 2021/2022',
e6ff66ef	281	},
22219f2d	282	'playlist_mincount': 7,
e6ff66ef	283	}, {
e6ff66ef	284	# TV Show specific playlist (with multiple pages)
10dc8592	285	'url': 'https://mediasetinfinity.mediaset.it/programmi-tv/leiene/iservizi_SE000000000061,ST000000002763,sb100013375',
e6ff66ef	286	'info_dict': {
	287	'id': '100013375',
	288	'title': 'I servizi',
	289	},
22219f2d	290	'playlist_mincount': 50,
e6ff66ef	291	}]
	292
	293	_BY_SUBBRAND = 'https://feed.entertainment.tv.theplatform.eu/f/PR1GhC/mediaset-prod-all-programs-v2?byCustomValue={subBrandId}{%s}&sort=:publishInfo_lastPublished\|desc,tvSeasonEpisodeNumber\|desc&range=%d-%d'
	294	_PAGE_SIZE = 25
	295
	296	def _fetch_page(self, sb, page):
	297	lower_limit = page * self._PAGE_SIZE + 1
	298	upper_limit = lower_limit + self._PAGE_SIZE - 1
	299	content = self._download_json(
	300	self._BY_SUBBRAND % (sb, lower_limit, upper_limit), sb)
	301	for entry in content.get('entries') or []:
	302	yield self.url_result(
	303	'mediaset:' + entry['guid'],
	304	playlist_title=entry['mediasetprogram$subBrandDescription'])
	305
	306	def _real_extract(self, url):
	307	playlist_id, st, sb = self._match_valid_url(url).group('id', 'st', 'sb')
	308	if not sb:
22219f2d	309	page = self._download_webpage(url, st or playlist_id)
10dc8592	310	entries = [self.url_result(urljoin('https://mediasetinfinity.mediaset.it', url))
e6ff66ef	311	for url in re.findall(r'href="([^<>=]+SE\d{12},ST\d{12},sb\d{9})">[^<]+<', page)]
10dc8592	312	title = self._html_extract_title(page).split('\|')[0].strip()
e6ff66ef	313	return self.playlist_result(entries, st or playlist_id, title)
	314
	315	entries = OnDemandPagedList(
	316	functools.partial(self._fetch_page, sb),
	317	self._PAGE_SIZE)
	318	title = try_get(entries, lambda x: x[0]['playlist_title'])
	319
	320	return self.playlist_result(entries, sb, title)