]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/folketinget.py
[ie/orf:on] Improve extraction (#9677)
[yt-dlp.git] / yt_dlp / extractor / folketinget.py
CommitLineData
6127693e
PH
1from .common import InfoExtractor
2from ..compat import compat_parse_qs
3from ..utils import (
4 int_or_none,
5 parse_duration,
6 parse_iso8601,
7 xpath_text,
8)
9
10
11class FolketingetIE(InfoExtractor):
12 IE_DESC = 'Folketinget (ft.dk; Danish parliament)'
13 _VALID_URL = r'https?://(?:www\.)?ft\.dk/webtv/video/[^?#]*?\.(?P<id>[0-9]+)\.aspx'
14 _TEST = {
15 'url': 'http://www.ft.dk/webtv/video/20141/eru/td.1165642.aspx?as=1#player',
e1ccc04e 16 'md5': '6269e8626fa1a891bf5369b386ae996a',
6127693e
PH
17 'info_dict': {
18 'id': '1165642',
19 'ext': 'mp4',
20 'title': 'Åbent samråd i Erhvervsudvalget',
21 'description': 'Åbent samråd med erhvervs- og vækstministeren om regeringens politik på teleområdet',
22 'view_count': int,
23 'width': 768,
24 'height': 432,
25 'tbr': 928000,
26 'timestamp': 1416493800,
27 'upload_date': '20141120',
28 'duration': 3960,
29 },
dc95bd50
S
30 'params': {
31 # rtmp download
32 'skip_download': True,
33 },
6127693e
PH
34 }
35
36 def _real_extract(self, url):
37 video_id = self._match_id(url)
38 webpage = self._download_webpage(url, video_id)
39
40 title = self._og_search_title(webpage)
41 description = self._html_search_regex(
42 r'(?s)<div class="video-item-agenda"[^>]*>(.*?)<',
43 webpage, 'description', fatal=False)
44
45 player_params = compat_parse_qs(self._search_regex(
46 r'<embed src="http://ft\.arkena\.tv/flash/ftplayer\.swf\?([^"]+)"',
47 webpage, 'player params'))
48 xml_url = player_params['xml'][0]
49 doc = self._download_xml(xml_url, video_id)
50
51 timestamp = parse_iso8601(xpath_text(doc, './/date'))
52 duration = parse_duration(xpath_text(doc, './/duration'))
53 width = int_or_none(xpath_text(doc, './/width'))
54 height = int_or_none(xpath_text(doc, './/height'))
55 view_count = int_or_none(xpath_text(doc, './/views'))
56
57 formats = [{
58 'format_id': n.attrib['bitrate'],
59 'url': xpath_text(n, './url', fatal=True),
60 'tbr': int_or_none(n.attrib['bitrate']),
61 } for n in doc.findall('.//streams/stream')]
6127693e
PH
62
63 return {
64 'id': video_id,
65 'title': title,
66 'formats': formats,
67 'description': description,
68 'timestamp': timestamp,
69 'width': width,
70 'height': height,
71 'duration': duration,
72 'view_count': view_count,
73 }