]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/europa.py
[extractor/youtube] Extract uploader metadata for feed/playlist items
[yt-dlp.git] / yt_dlp / extractor / europa.py
1 from .common import InfoExtractor
2 from ..utils import (
3 int_or_none,
4 orderedSet,
5 parse_duration,
6 parse_iso8601,
7 parse_qs,
8 qualities,
9 unified_strdate,
10 xpath_text
11 )
12
13
14 class EuropaIE(InfoExtractor):
15 _VALID_URL = r'https?://ec\.europa\.eu/avservices/(?:video/player|audio/audioDetails)\.cfm\?.*?\bref=(?P<id>[A-Za-z0-9-]+)'
16 _TESTS = [{
17 'url': 'http://ec.europa.eu/avservices/video/player.cfm?ref=I107758',
18 'md5': '574f080699ddd1e19a675b0ddf010371',
19 'info_dict': {
20 'id': 'I107758',
21 'ext': 'mp4',
22 'title': 'TRADE - Wikileaks on TTIP',
23 'description': 'NEW LIVE EC Midday press briefing of 11/08/2015',
24 'thumbnail': r're:^https?://.*\.jpg$',
25 'upload_date': '20150811',
26 'duration': 34,
27 'view_count': int,
28 'formats': 'mincount:3',
29 }
30 }, {
31 'url': 'http://ec.europa.eu/avservices/video/player.cfm?sitelang=en&ref=I107786',
32 'only_matching': True,
33 }, {
34 'url': 'http://ec.europa.eu/avservices/audio/audioDetails.cfm?ref=I-109295&sitelang=en',
35 'only_matching': True,
36 }]
37
38 def _real_extract(self, url):
39 video_id = self._match_id(url)
40
41 playlist = self._download_xml(
42 'http://ec.europa.eu/avservices/video/player/playlist.cfm?ID=%s' % video_id, video_id)
43
44 def get_item(type_, preference):
45 items = {}
46 for item in playlist.findall('./info/%s/item' % type_):
47 lang, label = xpath_text(item, 'lg', default=None), xpath_text(item, 'label', default=None)
48 if lang and label:
49 items[lang] = label.strip()
50 for p in preference:
51 if items.get(p):
52 return items[p]
53
54 query = parse_qs(url)
55 preferred_lang = query.get('sitelang', ('en', ))[0]
56
57 preferred_langs = orderedSet((preferred_lang, 'en', 'int'))
58
59 title = get_item('title', preferred_langs) or video_id
60 description = get_item('description', preferred_langs)
61 thumbnail = xpath_text(playlist, './info/thumburl', 'thumbnail')
62 upload_date = unified_strdate(xpath_text(playlist, './info/date', 'upload date'))
63 duration = parse_duration(xpath_text(playlist, './info/duration', 'duration'))
64 view_count = int_or_none(xpath_text(playlist, './info/views', 'views'))
65
66 language_preference = qualities(preferred_langs[::-1])
67
68 formats = []
69 for file_ in playlist.findall('./files/file'):
70 video_url = xpath_text(file_, './url')
71 if not video_url:
72 continue
73 lang = xpath_text(file_, './lg')
74 formats.append({
75 'url': video_url,
76 'format_id': lang,
77 'format_note': xpath_text(file_, './lglabel'),
78 'language_preference': language_preference(lang)
79 })
80
81 return {
82 'id': video_id,
83 'title': title,
84 'description': description,
85 'thumbnail': thumbnail,
86 'upload_date': upload_date,
87 'duration': duration,
88 'view_count': view_count,
89 'formats': formats
90 }
91
92
93 class EuroParlWebstreamIE(InfoExtractor):
94 _VALID_URL = r'''(?x)
95 https?://(?:multimedia|webstreaming)\.europarl\.europa\.eu/[^/#?]+/
96 (?:embed/embed\.html\?event=|(?!video)[^/#?]+/[\w-]+_)(?P<id>[\w-]+)
97 '''
98 _TESTS = [{
99 'url': 'https://multimedia.europarl.europa.eu/pl/webstreaming/plenary-session_20220914-0900-PLENARY',
100 'info_dict': {
101 'id': 'bcaa1db4-76ef-7e06-8da7-839bd0ad1dbe',
102 'ext': 'mp4',
103 'release_timestamp': 1663137900,
104 'title': 'Plenary session',
105 'release_date': '20220914',
106 },
107 'params': {
108 'skip_download': True,
109 }
110 }, {
111 'url': 'https://multimedia.europarl.europa.eu/pl/webstreaming/eu-cop27-un-climate-change-conference-in-sharm-el-sheikh-egypt-ep-delegation-meets-with-ngo-represen_20221114-1600-SPECIAL-OTHER',
112 'info_dict': {
113 'id': 'a8428de8-b9cd-6a2e-11e4-3805d9c9ff5c',
114 'ext': 'mp4',
115 'release_timestamp': 1668434400,
116 'release_date': '20221114',
117 'title': 'md5:d3550280c33cc70e0678652e3d52c028',
118 },
119 'params': {
120 'skip_download': True,
121 }
122 }, {
123 # embed webpage
124 'url': 'https://webstreaming.europarl.europa.eu/ep/embed/embed.html?event=20220914-0900-PLENARY&language=en&autoplay=true&logo=true',
125 'info_dict': {
126 'id': 'bcaa1db4-76ef-7e06-8da7-839bd0ad1dbe',
127 'ext': 'mp4',
128 'title': 'Plenary session',
129 'release_date': '20220914',
130 'release_timestamp': 1663137900,
131 },
132 'params': {
133 'skip_download': True,
134 }
135 }, {
136 # live webstream
137 'url': 'https://multimedia.europarl.europa.eu/en/webstreaming/euroscola_20221115-1000-SPECIAL-EUROSCOLA',
138 'info_dict': {
139 'ext': 'mp4',
140 'id': '510eda7f-ba72-161b-7ee7-0e836cd2e715',
141 'release_timestamp': 1668502800,
142 'title': 'Euroscola 2022-11-15 19:21',
143 'release_date': '20221115',
144 'live_status': 'is_live',
145 },
146 'skip': 'not live anymore'
147 }]
148
149 def _real_extract(self, url):
150 display_id = self._match_id(url)
151
152 json_info = self._download_json(
153 'https://vis-api.vuplay.co.uk/event/external', display_id,
154 query={
155 'player_key': 'europarl|718f822c-a48c-4841-9947-c9cb9bb1743c',
156 'external_id': display_id,
157 })
158
159 formats, subtitles = self._extract_mpd_formats_and_subtitles(json_info['streaming_url'], display_id)
160 fmts, subs = self._extract_m3u8_formats_and_subtitles(
161 json_info['streaming_url'].replace('.mpd', '.m3u8'), display_id)
162
163 formats.extend(fmts)
164 self._merge_subtitles(subs, target=subtitles)
165
166 return {
167 'id': json_info['id'],
168 'title': json_info.get('title'),
169 'formats': formats,
170 'subtitles': subtitles,
171 'release_timestamp': parse_iso8601(json_info.get('published_start')),
172 'is_live': 'LIVE' in json_info.get('state', '')
173 }