]>
jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/vrt.py
2 from __future__
import unicode_literals
5 from .common
import InfoExtractor
15 class VRTIE(InfoExtractor
):
16 IE_DESC
= 'VRT NWS, Flanders News, Flandern Info and Sporza'
17 _VALID_URL
= r
'https?://(?:www\.)?(?P<site>vrt\.be/vrtnws|sporza\.be)/[a-z]{2}/\d{4}/\d{2}/\d{2}/(?P<id>[^/?&#]+)'
19 'url': 'https://www.vrt.be/vrtnws/nl/2019/05/15/beelden-van-binnenkant-notre-dame-een-maand-na-de-brand/',
20 'md5': 'e1663accf5cf13f375f3cd0d10476669',
22 'id': 'pbs-pub-7855fc7b-1448-49bc-b073-316cb60caa71$vid-2ca50305-c38a-4762-9890-65cbd098b7bd',
24 'title': 'Beelden van binnenkant Notre-Dame, één maand na de brand',
25 'description': 'Op maandagavond 15 april ging een deel van het dakgebinte van de Parijse kathedraal in vlammen op.',
26 'timestamp': 1557924660,
27 'upload_date': '20190515',
31 'url': 'https://sporza.be/nl/2019/05/15/de-belgian-cats-zijn-klaar-voor-het-ek/',
32 'md5': '910bba927566e9ab992278f647eb4b75',
34 'id': 'pbs-pub-f2c86a46-8138-413a-a4b9-a0015a16ce2c$vid-1f112b31-e58e-4379-908d-aca6d80f8818',
36 'title': 'De Belgian Cats zijn klaar voor het EK mét Ann Wauters',
37 'timestamp': 1557923760,
38 'upload_date': '20190515',
42 'url': 'https://www.vrt.be/vrtnws/en/2019/05/15/belgium_s-eurovision-entry-falls-at-the-first-hurdle/',
43 'only_matching': True,
45 'url': 'https://www.vrt.be/vrtnws/de/2019/05/15/aus-fuer-eliott-im-halbfinale-des-eurosongfestivals/',
46 'only_matching': True,
49 'vrt.be/vrtnws': 'vrtnieuws',
50 'sporza.be': 'sporza',
53 def _real_extract(self
, url
):
54 site
, display_id
= self
._match
_valid
_url
(url
).groups()
55 webpage
= self
._download
_webpage
(url
, display_id
)
56 attrs
= extract_attributes(self
._search
_regex
(
57 r
'(<[^>]+class="vrtvideo( [^"]*)?"[^>]*>)', webpage
, 'vrt video'))
59 asset_id
= attrs
['data-video-id']
60 publication_id
= attrs
.get('data-publication-id')
62 asset_id
= publication_id
+ '$' + asset_id
63 client
= attrs
.get('data-client-code') or self
._CLIENT
_MAP
[site
]
65 title
= strip_or_none(get_element_by_class(
66 'vrt-title', webpage
) or self
._html
_search
_meta
(
67 ['og:title', 'twitter:title', 'name'], webpage
))
68 description
= self
._html
_search
_meta
(
69 ['og:description', 'twitter:description', 'description'], webpage
)
70 if description
== '…':
72 timestamp
= unified_timestamp(self
._html
_search
_meta
(
73 'article:published_time', webpage
))
76 '_type': 'url_transparent',
78 'display_id': display_id
,
80 'description': description
,
81 'thumbnail': attrs
.get('data-posterimage'),
82 'timestamp': timestamp
,
83 'duration': float_or_none(attrs
.get('data-duration'), 1000),
84 'url': 'https://mediazone.vrt.be/api/v1/%s/assets/%s' % (client
, asset_id
),