]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/medialaan.py
[ie/matchtv] Fix extractor (#10190)
[yt-dlp.git] / yt_dlp / extractor / medialaan.py
CommitLineData
2a721cdf
S
1import re
2
2181983a 3from .common import InfoExtractor
2a721cdf 4from ..utils import (
2181983a 5 extract_attributes,
2a721cdf 6 int_or_none,
2181983a 7 mimetype2ext,
8 parse_iso8601,
2a721cdf
S
9)
10
11
2181983a 12class MedialaanIE(InfoExtractor):
2a721cdf
S
13 _VALID_URL = r'''(?x)
14 https?://
2a721cdf 15 (?:
2181983a 16 (?:embed\.)?mychannels.video/embed/|
17 embed\.mychannels\.video/(?:s(?:dk|cript)/)?production/|
18 (?:www\.)?(?:
19 (?:
20 7sur7|
21 demorgen|
22 hln|
23 joe|
24 qmusic
25 )\.be|
26 (?:
27 [abe]d|
28 bndestem|
29 destentor|
30 gelderlander|
31 pzc|
32 tubantia|
33 volkskrant
34 )\.nl
35 )/video/(?:[^/]+/)*[^/?&#]+~p
2a721cdf 36 )
2181983a 37 (?P<id>\d+)
2a721cdf 38 '''
2a721cdf 39 _TESTS = [{
2181983a 40 'url': 'https://www.bndestem.nl/video/de-terugkeer-van-ally-de-aap-en-wie-vertrekt-er-nog-bij-nac~p193993',
2a721cdf 41 'info_dict': {
2181983a 42 'id': '193993',
2a721cdf 43 'ext': 'mp4',
2181983a 44 'title': 'De terugkeer van Ally de Aap en wie vertrekt er nog bij NAC?',
45 'timestamp': 1611663540,
46 'upload_date': '20210126',
47 'duration': 238,
2a721cdf
S
48 },
49 'params': {
50 'skip_download': True,
51 },
2a721cdf 52 }, {
2181983a 53 'url': 'https://www.gelderlander.nl/video/kanalen/degelderlander~c320/series/snel-nieuws~s984/noodbevel-in-doetinchem-politie-stuurt-mensen-centrum-uit~p194093',
2a721cdf
S
54 'only_matching': True,
55 }, {
2181983a 56 'url': 'https://embed.mychannels.video/sdk/production/193993?options=TFTFF_default',
2a721cdf
S
57 'only_matching': True,
58 }, {
2181983a 59 'url': 'https://embed.mychannels.video/script/production/193993',
2a721cdf 60 'only_matching': True,
2a721cdf 61 }, {
2181983a 62 'url': 'https://embed.mychannels.video/production/193993',
2a721cdf 63 'only_matching': True,
4050be78 64 }, {
2181983a 65 'url': 'https://mychannels.video/embed/193993',
66 'only_matching': True,
4050be78 67 }, {
2181983a 68 'url': 'https://embed.mychannels.video/embed/193993',
4050be78 69 'only_matching': True,
2a721cdf
S
70 }]
71
bfd973ec 72 @classmethod
73 def _extract_embed_urls(cls, url, webpage):
2181983a 74 entries = []
75 for element in re.findall(r'(<div[^>]+data-mychannels-type="video"[^>]*>)', webpage):
76 mychannels_id = extract_attributes(element).get('data-mychannels-id')
77 if mychannels_id:
78 entries.append('https://mychannels.video/embed/' + mychannels_id)
79 return entries
2a721cdf
S
80
81 def _real_extract(self, url):
2181983a 82 production_id = self._match_id(url)
83 production = self._download_json(
84 'https://embed.mychannels.video/sdk/production/' + production_id,
85 production_id, query={'options': 'UUUU_default'})['productions'][0]
86 title = production['title']
87
88 formats = []
89 for source in (production.get('sources') or []):
90 src = source.get('src')
91 if not src:
92 continue
93 ext = mimetype2ext(source.get('type'))
94 if ext == 'm3u8':
95 formats.extend(self._extract_m3u8_formats(
96 src, production_id, 'mp4', 'm3u8_native',
97 m3u8_id='hls', fatal=False))
2a721cdf 98 else:
2181983a 99 formats.append({
100 'ext': ext,
101 'url': src,
2a721cdf 102 })
2181983a 103
104 return {
105 'id': production_id,
106 'title': title,
107 'formats': formats,
108 'thumbnail': production.get('posterUrl'),
109 'timestamp': parse_iso8601(production.get('publicationDate'), ' '),
110 'duration': int_or_none(production.get('duration')) or None,
111 }