]> jfr.im git - yt-dlp.git/commitdiff
[extractor/europarl] Add EuroParlWebstream Extractor (#5547)
authorHobbyistDev <redacted>
Sat, 10 Dec 2022 08:44:43 +0000 (17:44 +0900)
committerGitHub <redacted>
Sat, 10 Dec 2022 08:44:43 +0000 (14:14 +0530)
Authored by: HobbyistDev
Closes #4933

yt_dlp/extractor/_extractors.py
yt_dlp/extractor/europa.py

index b1bbc5b725c7afe824ddc40b2ee278a91067f1f0..e76a80ee1943289ee31c53a3e28307c71f61c142 100644 (file)
     ESPNCricInfoIE,
 )
 from .esri import EsriVideoIE
-from .europa import EuropaIE
+from .europa import EuropaIE, EuroParlWebstreamIE
 from .europeantour import EuropeanTourIE
 from .eurosport import EurosportIE
 from .euscreen import EUScreenIE
index c2b4937658671b10e5db8c3f2482de2de349c030..29daabe4a385bb1d813625318feeffd7a38beca1 100644 (file)
@@ -3,6 +3,7 @@
     int_or_none,
     orderedSet,
     parse_duration,
+    parse_iso8601,
     parse_qs,
     qualities,
     unified_strdate,
@@ -87,3 +88,86 @@ def get_item(type_, preference):
             'view_count': view_count,
             'formats': formats
         }
+
+
+class EuroParlWebstreamIE(InfoExtractor):
+    _VALID_URL = r'''(?x)
+        https?://(?:multimedia|webstreaming)\.europarl\.europa\.eu/[^/#?]+/
+        (?:embed/embed\.html\?event=|(?!video)[^/#?]+/[\w-]+_)(?P<id>[\w-]+)
+    '''
+    _TESTS = [{
+        'url': 'https://multimedia.europarl.europa.eu/pl/webstreaming/plenary-session_20220914-0900-PLENARY',
+        'info_dict': {
+            'id': 'bcaa1db4-76ef-7e06-8da7-839bd0ad1dbe',
+            'ext': 'mp4',
+            'release_timestamp': 1663137900,
+            'title': 'Plenary session',
+            'release_date': '20220914',
+        },
+        'params': {
+            'skip_download': True,
+        }
+    }, {
+        'url': 'https://multimedia.europarl.europa.eu/pl/webstreaming/eu-cop27-un-climate-change-conference-in-sharm-el-sheikh-egypt-ep-delegation-meets-with-ngo-represen_20221114-1600-SPECIAL-OTHER',
+        'info_dict': {
+            'id': 'a8428de8-b9cd-6a2e-11e4-3805d9c9ff5c',
+            'ext': 'mp4',
+            'release_timestamp': 1668434400,
+            'release_date': '20221114',
+            'title': 'md5:d3550280c33cc70e0678652e3d52c028',
+        },
+        'params': {
+            'skip_download': True,
+        }
+    }, {
+        # embed webpage
+        'url': 'https://webstreaming.europarl.europa.eu/ep/embed/embed.html?event=20220914-0900-PLENARY&language=en&autoplay=true&logo=true',
+        'info_dict': {
+            'id': 'bcaa1db4-76ef-7e06-8da7-839bd0ad1dbe',
+            'ext': 'mp4',
+            'title': 'Plenary session',
+            'release_date': '20220914',
+            'release_timestamp': 1663137900,
+        },
+        'params': {
+            'skip_download': True,
+        }
+    }, {
+        # live webstream
+        'url': 'https://multimedia.europarl.europa.eu/en/webstreaming/euroscola_20221115-1000-SPECIAL-EUROSCOLA',
+        'info_dict': {
+            'ext': 'mp4',
+            'id': '510eda7f-ba72-161b-7ee7-0e836cd2e715',
+            'release_timestamp': 1668502800,
+            'title': 'Euroscola 2022-11-15 19:21',
+            'release_date': '20221115',
+            'live_status': 'is_live',
+        },
+        'skip': 'not live anymore'
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        json_info = self._download_json(
+            'https://vis-api.vuplay.co.uk/event/external', display_id,
+            query={
+                'player_key': 'europarl|718f822c-a48c-4841-9947-c9cb9bb1743c',
+                'external_id': display_id,
+            })
+
+        formats, subtitles = self._extract_mpd_formats_and_subtitles(json_info['streaming_url'], display_id)
+        fmts, subs = self._extract_m3u8_formats_and_subtitles(
+            json_info['streaming_url'].replace('.mpd', '.m3u8'), display_id)
+
+        formats.extend(fmts)
+        self._merge_subtitles(subs, target=subtitles)
+
+        return {
+            'id': json_info['id'],
+            'title': json_info.get('title'),
+            'formats': formats,
+            'subtitles': subtitles,
+            'release_timestamp': parse_iso8601(json_info.get('published_start')),
+            'is_live': 'LIVE' in json_info.get('state', '')
+        }