]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/medialaan.py
Update to ytdl v2021-04-01
[yt-dlp.git] / yt_dlp / extractor / medialaan.py
CommitLineData
2a721cdf
S
1from __future__ import unicode_literals
2
3import re
4
2181983a 5from .common import InfoExtractor
2a721cdf 6from ..utils import (
2181983a 7 extract_attributes,
2a721cdf 8 int_or_none,
2181983a 9 mimetype2ext,
10 parse_iso8601,
2a721cdf
S
11)
12
13
2181983a 14class MedialaanIE(InfoExtractor):
2a721cdf
S
15 _VALID_URL = r'''(?x)
16 https?://
2a721cdf 17 (?:
2181983a 18 (?:embed\.)?mychannels.video/embed/|
19 embed\.mychannels\.video/(?:s(?:dk|cript)/)?production/|
20 (?:www\.)?(?:
21 (?:
22 7sur7|
23 demorgen|
24 hln|
25 joe|
26 qmusic
27 )\.be|
28 (?:
29 [abe]d|
30 bndestem|
31 destentor|
32 gelderlander|
33 pzc|
34 tubantia|
35 volkskrant
36 )\.nl
37 )/video/(?:[^/]+/)*[^/?&#]+~p
2a721cdf 38 )
2181983a 39 (?P<id>\d+)
2a721cdf 40 '''
2a721cdf 41 _TESTS = [{
2181983a 42 'url': 'https://www.bndestem.nl/video/de-terugkeer-van-ally-de-aap-en-wie-vertrekt-er-nog-bij-nac~p193993',
2a721cdf 43 'info_dict': {
2181983a 44 'id': '193993',
2a721cdf 45 'ext': 'mp4',
2181983a 46 'title': 'De terugkeer van Ally de Aap en wie vertrekt er nog bij NAC?',
47 'timestamp': 1611663540,
48 'upload_date': '20210126',
49 'duration': 238,
2a721cdf
S
50 },
51 'params': {
52 'skip_download': True,
53 },
2a721cdf 54 }, {
2181983a 55 'url': 'https://www.gelderlander.nl/video/kanalen/degelderlander~c320/series/snel-nieuws~s984/noodbevel-in-doetinchem-politie-stuurt-mensen-centrum-uit~p194093',
2a721cdf
S
56 'only_matching': True,
57 }, {
2181983a 58 'url': 'https://embed.mychannels.video/sdk/production/193993?options=TFTFF_default',
2a721cdf
S
59 'only_matching': True,
60 }, {
2181983a 61 'url': 'https://embed.mychannels.video/script/production/193993',
2a721cdf 62 'only_matching': True,
2a721cdf 63 }, {
2181983a 64 'url': 'https://embed.mychannels.video/production/193993',
2a721cdf 65 'only_matching': True,
4050be78 66 }, {
2181983a 67 'url': 'https://mychannels.video/embed/193993',
68 'only_matching': True,
4050be78 69 }, {
2181983a 70 'url': 'https://embed.mychannels.video/embed/193993',
4050be78 71 'only_matching': True,
2a721cdf
S
72 }]
73
2181983a 74 @staticmethod
75 def _extract_urls(webpage):
76 entries = []
77 for element in re.findall(r'(<div[^>]+data-mychannels-type="video"[^>]*>)', webpage):
78 mychannels_id = extract_attributes(element).get('data-mychannels-id')
79 if mychannels_id:
80 entries.append('https://mychannels.video/embed/' + mychannels_id)
81 return entries
2a721cdf
S
82
83 def _real_extract(self, url):
2181983a 84 production_id = self._match_id(url)
85 production = self._download_json(
86 'https://embed.mychannels.video/sdk/production/' + production_id,
87 production_id, query={'options': 'UUUU_default'})['productions'][0]
88 title = production['title']
89
90 formats = []
91 for source in (production.get('sources') or []):
92 src = source.get('src')
93 if not src:
94 continue
95 ext = mimetype2ext(source.get('type'))
96 if ext == 'm3u8':
97 formats.extend(self._extract_m3u8_formats(
98 src, production_id, 'mp4', 'm3u8_native',
99 m3u8_id='hls', fatal=False))
2a721cdf 100 else:
2181983a 101 formats.append({
102 'ext': ext,
103 'url': src,
2a721cdf 104 })
2181983a 105 self._sort_formats(formats)
106
107 return {
108 'id': production_id,
109 'title': title,
110 'formats': formats,
111 'thumbnail': production.get('posterUrl'),
112 'timestamp': parse_iso8601(production.get('publicationDate'), ' '),
113 'duration': int_or_none(production.get('duration')) or None,
114 }