]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/phoenix.py
[cbs] Add support for ParamountPlus (#138)
[yt-dlp.git] / yt_dlp / extractor / phoenix.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .youtube import YoutubeIE
7 from .zdf import ZDFBaseIE
8 from ..compat import compat_str
9 from ..utils import (
10 int_or_none,
11 merge_dicts,
12 unified_timestamp,
13 xpath_text,
14 )
15
16
17 class PhoenixIE(ZDFBaseIE):
18 IE_NAME = 'phoenix.de'
19 _VALID_URL = r'https?://(?:www\.)?phoenix\.de/(?:[^/]+/)*[^/?#&]*-a-(?P<id>\d+)\.html'
20 _TESTS = [{
21 # Same as https://www.zdf.de/politik/phoenix-sendungen/wohin-fuehrt-der-protest-in-der-pandemie-100.html
22 'url': 'https://www.phoenix.de/sendungen/ereignisse/corona-nachgehakt/wohin-fuehrt-der-protest-in-der-pandemie-a-2050630.html',
23 'md5': '34ec321e7eb34231fd88616c65c92db0',
24 'info_dict': {
25 'id': '210222_phx_nachgehakt_corona_protest',
26 'ext': 'mp4',
27 'title': 'Wohin führt der Protest in der Pandemie?',
28 'description': 'md5:7d643fe7f565e53a24aac036b2122fbd',
29 'duration': 1691,
30 'timestamp': 1613906100,
31 'upload_date': '20210221',
32 'uploader': 'Phoenix',
33 'channel': 'corona nachgehakt',
34 },
35 }, {
36 # Youtube embed
37 'url': 'https://www.phoenix.de/sendungen/gespraeche/phoenix-streitgut-brennglas-corona-a-1965505.html',
38 'info_dict': {
39 'id': 'hMQtqFYjomk',
40 'ext': 'mp4',
41 'title': 'phoenix streitgut: Brennglas Corona - Wie gerecht ist unsere Gesellschaft?',
42 'description': 'md5:ac7a02e2eb3cb17600bc372e4ab28fdd',
43 'duration': 3509,
44 'upload_date': '20201219',
45 'uploader': 'phoenix',
46 'uploader_id': 'phoenix',
47 },
48 'params': {
49 'skip_download': True,
50 },
51 }, {
52 'url': 'https://www.phoenix.de/entwicklungen-in-russland-a-2044720.html',
53 'only_matching': True,
54 }, {
55 # no media
56 'url': 'https://www.phoenix.de/sendungen/dokumentationen/mit-dem-jumbo-durch-die-nacht-a-89625.html',
57 'only_matching': True,
58 }, {
59 # Same as https://www.zdf.de/politik/phoenix-sendungen/die-gesten-der-maechtigen-100.html
60 'url': 'https://www.phoenix.de/sendungen/dokumentationen/gesten-der-maechtigen-i-a-89468.html?ref=suche',
61 'only_matching': True,
62 }]
63
64 def _real_extract(self, url):
65 article_id = self._match_id(url)
66
67 article = self._download_json(
68 'https://www.phoenix.de/response/id/%s' % article_id, article_id,
69 'Downloading article JSON')
70
71 video = article['absaetze'][0]
72 title = video.get('titel') or article.get('subtitel')
73
74 if video.get('typ') == 'video-youtube':
75 video_id = video['id']
76 return self.url_result(
77 video_id, ie=YoutubeIE.ie_key(), video_id=video_id,
78 video_title=title)
79
80 video_id = compat_str(video.get('basename') or video.get('content'))
81
82 details = self._download_xml(
83 'https://www.phoenix.de/php/mediaplayer/data/beitrags_details.php',
84 video_id, 'Downloading details XML', query={
85 'ak': 'web',
86 'ptmd': 'true',
87 'id': video_id,
88 'profile': 'player2',
89 })
90
91 title = title or xpath_text(
92 details, './/information/title', 'title', fatal=True)
93 content_id = xpath_text(
94 details, './/video/details/basename', 'content id', fatal=True)
95
96 info = self._extract_ptmd(
97 'https://tmd.phoenix.de/tmd/2/ngplayer_2_3/vod/ptmd/phoenix/%s' % content_id,
98 content_id, None, url)
99
100 timestamp = unified_timestamp(xpath_text(details, './/details/airtime'))
101
102 thumbnails = []
103 for node in details.findall('.//teaserimages/teaserimage'):
104 thumbnail_url = node.text
105 if not thumbnail_url:
106 continue
107 thumbnail = {
108 'url': thumbnail_url,
109 }
110 thumbnail_key = node.get('key')
111 if thumbnail_key:
112 m = re.match('^([0-9]+)x([0-9]+)$', thumbnail_key)
113 if m:
114 thumbnail['width'] = int(m.group(1))
115 thumbnail['height'] = int(m.group(2))
116 thumbnails.append(thumbnail)
117
118 return merge_dicts(info, {
119 'id': content_id,
120 'title': title,
121 'description': xpath_text(details, './/information/detail'),
122 'duration': int_or_none(xpath_text(details, './/details/lengthSec')),
123 'thumbnails': thumbnails,
124 'timestamp': timestamp,
125 'uploader': xpath_text(details, './/details/channel'),
126 'uploader_id': xpath_text(details, './/details/originChannelId'),
127 'channel': xpath_text(details, './/details/originChannelTitle'),
128 })