]>
jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/tagesschau.py
3 from .common
import InfoExtractor
12 class TagesschauIE(InfoExtractor
):
13 _VALID_URL
= r
'https?://(?:www\.)?tagesschau\.de/(?P<path>[^/]+/(?:[^/]+/)*?(?P<id>[^/#?]+?(?:-?[0-9]+)?))(?:~_?[^/#?]+?)?\.html'
16 'url': 'http://www.tagesschau.de/multimedia/video/video-102143.html',
17 'md5': '7a7287612fa881a1ae1d087df45c2fd6',
19 'id': 'video-102143-1',
21 'title': 'Regierungsumbildung in Athen: Neue Minister in Griechenland vereidigt',
24 'url': 'http://www.tagesschau.de/multimedia/sendung/ts-5727.html',
25 'md5': '3c54c1f6243d279b706bde660ceec633',
29 'title': 'Ganze Sendung',
33 'url': 'http://www.tagesschau.de/multimedia/audio/audio-29417.html',
34 'md5': '4cf22023c285f35e99c24d290ba58cc9',
36 'id': 'audio-29417-1',
38 'title': 'Brasilianischer Präsident Bolsonaro unter Druck: Corona-Bericht wird vorgestellt',
41 'url': 'http://www.tagesschau.de/inland/bnd-303.html',
42 'md5': '12cfb212d9325b5ba0d52b625f1aa61c',
46 'title': 'SPD-Gruppenbild mit Bärbel Bas nach der Fraktionssitzung | dpa',
49 'url': 'http://www.tagesschau.de/inland/afd-parteitag-135.html',
51 'id': 'afd-parteitag-135',
56 'url': 'https://www.tagesschau.de/multimedia/audio/audio-29417~player.html',
58 'id': 'audio-29417-1',
60 'title': 'Brasilianischer Präsident Bolsonaro unter Druck: Corona-Bericht wird vorgestellt',
63 'url': 'http://www.tagesschau.de/multimedia/sendung/tsg-3771.html',
64 'only_matching': True,
66 'url': 'http://www.tagesschau.de/multimedia/sendung/tt-3827.html',
67 'only_matching': True,
69 'url': 'http://www.tagesschau.de/multimedia/sendung/nm-3475.html',
70 'only_matching': True,
72 'url': 'http://www.tagesschau.de/multimedia/sendung/weltspiegel-3167.html',
73 'only_matching': True,
75 'url': 'http://www.tagesschau.de/multimedia/tsvorzwanzig-959.html',
76 'only_matching': True,
78 'url': 'http://www.tagesschau.de/multimedia/sendung/bab/bab-3299~_bab-sendung-209.html',
79 'only_matching': True,
81 'url': 'http://www.tagesschau.de/multimedia/video/video-102303~_bab-sendung-211.html',
82 'only_matching': True,
84 'url': 'http://www.tagesschau.de/100sekunden/index.html',
85 'only_matching': True,
87 # playlist article with collapsing sections
88 'url': 'http://www.tagesschau.de/wirtschaft/faq-freihandelszone-eu-usa-101.html',
89 'only_matching': True,
92 def _real_extract(self
, url
):
93 mobj
= self
._match
_valid
_url
(url
)
94 video_id
= mobj
.group('id') or mobj
.group('path')
95 display_id
= video_id
.lstrip('-')
97 webpage
= self
._download
_webpage
(url
, display_id
)
99 title
= self
._html
_search
_regex
(
100 r
'<span[^>]*class="headline"[^>]*>(.+?)</span>',
101 webpage
, 'title', default
=None) or self
._og
_search
_title
(webpage
, fatal
=False)
104 videos
= re
.findall(r
'<div[^>]+>', webpage
)
107 video
= extract_attributes(video
).get('data-config')
110 video
= self
._parse
_json
(video
, video_id
, transform_source
=js_to_json
, fatal
=False)
111 video_formats
= try_get(video
, lambda x
: x
['mc']['_mediaArray'][0]['_mediaStreamArray'])
112 if not video_formats
:
115 for video_format
in video_formats
:
116 media_url
= video_format
.get('_stream') or ''
118 if media_url
.endswith('master.m3u8'):
119 formats
= self
._extract
_m
3u8_formats
(media_url
, video_id
, 'mp4', m3u8_id
='hls')
120 elif media_url
.endswith('.hi.mp3') and media_url
.startswith('https://download'):
128 'id': '%s-%d' % (display_id
, num
),
129 'title': try_get(video
, lambda x
: x
['mc']['_title']),
130 'duration': int_or_none(try_get(video
, lambda x
: x
['mc']['_duration'])),
134 return self
.playlist_result(entries
, display_id
, title
)
135 formats
= entries
[0]['formats']
136 video_info
= self
._search
_json
_ld
(webpage
, video_id
)
137 description
= video_info
.get('description')
138 thumbnail
= self
._og
_search
_thumbnail
(webpage
) or video_info
.get('thumbnail')
139 timestamp
= video_info
.get('timestamp')
140 title
= title
or video_info
.get('description')
145 'thumbnail': thumbnail
,
147 'timestamp': timestamp
,
148 'description': description
,