]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/phoenix.py
[ie/orf:on] Improve extraction (#9677)
[yt-dlp.git] / yt_dlp / extractor / phoenix.py
1 import re
2
3 from .youtube import YoutubeIE
4 from .zdf import ZDFBaseIE
5 from ..compat import compat_str
6 from ..utils import (
7 int_or_none,
8 merge_dicts,
9 try_get,
10 unified_timestamp,
11 urljoin,
12 )
13
14
15 class PhoenixIE(ZDFBaseIE):
16 IE_NAME = 'phoenix.de'
17 _VALID_URL = r'https?://(?:www\.)?phoenix\.de/(?:[^/]+/)*[^/?#&]*-a-(?P<id>\d+)\.html'
18 _TESTS = [{
19 # Same as https://www.zdf.de/politik/phoenix-sendungen/wohin-fuehrt-der-protest-in-der-pandemie-100.html
20 'url': 'https://www.phoenix.de/sendungen/ereignisse/corona-nachgehakt/wohin-fuehrt-der-protest-in-der-pandemie-a-2050630.html',
21 'md5': '34ec321e7eb34231fd88616c65c92db0',
22 'info_dict': {
23 'id': '210222_phx_nachgehakt_corona_protest',
24 'ext': 'mp4',
25 'title': 'Wohin führt der Protest in der Pandemie?',
26 'description': 'md5:7d643fe7f565e53a24aac036b2122fbd',
27 'duration': 1691,
28 'timestamp': 1613902500,
29 'upload_date': '20210221',
30 'uploader': 'Phoenix',
31 'series': 'corona nachgehakt',
32 'episode': 'Wohin führt der Protest in der Pandemie?',
33 },
34 }, {
35 # Youtube embed
36 'url': 'https://www.phoenix.de/sendungen/gespraeche/phoenix-streitgut-brennglas-corona-a-1965505.html',
37 'info_dict': {
38 'id': 'hMQtqFYjomk',
39 'ext': 'mp4',
40 'title': 'phoenix streitgut: Brennglas Corona - Wie gerecht ist unsere Gesellschaft?',
41 'description': 'md5:ac7a02e2eb3cb17600bc372e4ab28fdd',
42 'duration': 3509,
43 'upload_date': '20201219',
44 'uploader': 'phoenix',
45 'uploader_id': 'phoenix',
46 },
47 'params': {
48 'skip_download': True,
49 },
50 }, {
51 'url': 'https://www.phoenix.de/entwicklungen-in-russland-a-2044720.html',
52 'only_matching': True,
53 }, {
54 # no media
55 'url': 'https://www.phoenix.de/sendungen/dokumentationen/mit-dem-jumbo-durch-die-nacht-a-89625.html',
56 'only_matching': True,
57 }, {
58 # Same as https://www.zdf.de/politik/phoenix-sendungen/die-gesten-der-maechtigen-100.html
59 'url': 'https://www.phoenix.de/sendungen/dokumentationen/gesten-der-maechtigen-i-a-89468.html?ref=suche',
60 'only_matching': True,
61 }]
62
63 def _real_extract(self, url):
64 article_id = self._match_id(url)
65
66 article = self._download_json(
67 'https://www.phoenix.de/response/id/%s' % article_id, article_id,
68 'Downloading article JSON')
69
70 video = article['absaetze'][0]
71 title = video.get('titel') or article.get('subtitel')
72
73 if video.get('typ') == 'video-youtube':
74 video_id = video['id']
75 return self.url_result(
76 video_id, ie=YoutubeIE.ie_key(), video_id=video_id,
77 video_title=title)
78
79 video_id = compat_str(video.get('basename') or video.get('content'))
80
81 details = self._download_json(
82 'https://www.phoenix.de/php/mediaplayer/data/beitrags_details.php',
83 video_id, 'Downloading details JSON', query={
84 'ak': 'web',
85 'ptmd': 'true',
86 'id': video_id,
87 'profile': 'player2',
88 })
89
90 title = title or details['title']
91 content_id = details['tracking']['nielsen']['content']['assetid']
92
93 info = self._extract_ptmd(
94 'https://tmd.phoenix.de/tmd/2/ngplayer_2_3/vod/ptmd/phoenix/%s' % content_id,
95 content_id, None, url)
96
97 duration = int_or_none(try_get(
98 details, lambda x: x['tracking']['nielsen']['content']['length']))
99 timestamp = unified_timestamp(details.get('editorialDate'))
100 series = try_get(
101 details, lambda x: x['tracking']['nielsen']['content']['program'],
102 compat_str)
103 episode = title if details.get('contentType') == 'episode' else None
104
105 thumbnails = []
106 teaser_images = try_get(details, lambda x: x['teaserImageRef']['layouts'], dict) or {}
107 for thumbnail_key, thumbnail_url in teaser_images.items():
108 thumbnail_url = urljoin(url, thumbnail_url)
109 if not thumbnail_url:
110 continue
111 thumbnail = {
112 'url': thumbnail_url,
113 }
114 m = re.match('^([0-9]+)x([0-9]+)$', thumbnail_key)
115 if m:
116 thumbnail['width'] = int(m.group(1))
117 thumbnail['height'] = int(m.group(2))
118 thumbnails.append(thumbnail)
119
120 return merge_dicts(info, {
121 'id': content_id,
122 'title': title,
123 'description': details.get('leadParagraph'),
124 'duration': duration,
125 'thumbnails': thumbnails,
126 'timestamp': timestamp,
127 'uploader': details.get('tvService'),
128 'series': series,
129 'episode': episode,
130 })