]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/xstream.py
[cleanup] Add more ruff rules (#10149)
[yt-dlp.git] / yt_dlp / extractor / xstream.py
CommitLineData
2c0c9dc4
S
1import re
2
3from .common import InfoExtractor
4from ..utils import (
e897bd82 5 find_xpath_attr,
2c0c9dc4
S
6 int_or_none,
7 parse_iso8601,
2c0c9dc4 8 xpath_text,
e897bd82 9 xpath_with_ns,
2c0c9dc4
S
10)
11
12
13class XstreamIE(InfoExtractor):
14 _VALID_URL = r'''(?x)
15 (?:
16 xstream:|
17 https?://frontend\.xstream\.(?:dk|net)/
18 )
19 (?P<partner_id>[^/]+)
20 (?:
21 :|
22 /feed/video/\?.*?\bid=
23 )
24 (?P<id>\d+)
25 '''
26 _TESTS = [{
27 'url': 'http://frontend.xstream.dk/btno/feed/video/?platform=web&id=86588',
28 'md5': 'd7d17e3337dc80de6d3a540aefbe441b',
29 'info_dict': {
30 'id': '86588',
31 'ext': 'mov',
32 'title': 'Otto Wollertsen',
33 'description': 'Vestlendingen Otto Fredrik Wollertsen',
34 'timestamp': 1430473209,
35 'upload_date': '20150501',
36 },
37 }, {
38 'url': 'http://frontend.xstream.dk/ap/feed/video/?platform=web&id=21039',
39 'only_matching': True,
40 }]
41
d50116b8 42 def _extract_video_info(self, partner_id, video_id):
2c0c9dc4 43 data = self._download_xml(
add96eb9 44 f'http://frontend.xstream.dk/{partner_id}/feed/video/?platform=web&id={video_id}',
2c0c9dc4
S
45 video_id)
46
47 NS_MAP = {
48 'atom': 'http://www.w3.org/2005/Atom',
49 'xt': 'http://xstream.dk/',
50 'media': 'http://search.yahoo.com/mrss/',
51 }
52
53 entry = data.find(xpath_with_ns('./atom:entry', NS_MAP))
54
55 title = xpath_text(
56 entry, xpath_with_ns('./atom:title', NS_MAP), 'title')
57 description = xpath_text(
58 entry, xpath_with_ns('./atom:summary', NS_MAP), 'description')
59 timestamp = parse_iso8601(xpath_text(
60 entry, xpath_with_ns('./atom:published', NS_MAP), 'upload date'))
61
62 formats = []
63 media_group = entry.find(xpath_with_ns('./media:group', NS_MAP))
64 for media_content in media_group.findall(xpath_with_ns('./media:content', NS_MAP)):
65 media_url = media_content.get('url')
66 if not media_url:
67 continue
68 tbr = int_or_none(media_content.get('bitrate'))
69 mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', media_url)
70 if mobj:
71 formats.append({
72 'url': mobj.group('url'),
add96eb9 73 'play_path': 'mp4:{}'.format(mobj.group('playpath')),
2c0c9dc4
S
74 'app': mobj.group('app'),
75 'ext': 'flv',
76 'tbr': tbr,
77 'format_id': 'rtmp-%d' % tbr,
78 })
79 else:
80 formats.append({
81 'url': media_url,
82 'tbr': tbr,
83 })
2c0c9dc4
S
84
85 link = find_xpath_attr(
86 entry, xpath_with_ns('./atom:link', NS_MAP), 'rel', 'original')
87 if link is not None:
88 formats.append({
89 'url': link.get('href'),
90 'format_id': link.get('rel'),
f983b875 91 'quality': 1,
2c0c9dc4
S
92 })
93
94 thumbnails = [{
95 'url': splash.get('url'),
96 'width': int_or_none(splash.get('width')),
97 'height': int_or_none(splash.get('height')),
98 } for splash in media_group.findall(xpath_with_ns('./xt:splash', NS_MAP))]
99
100 return {
101 'id': video_id,
102 'title': title,
103 'description': description,
104 'timestamp': timestamp,
105 'formats': formats,
106 'thumbnails': thumbnails,
107 }
d50116b8 108
109 def _real_extract(self, url):
5ad28e7f 110 mobj = self._match_valid_url(url)
d50116b8 111 partner_id = mobj.group('partner_id')
112 video_id = mobj.group('id')
113
114 return self._extract_video_info(partner_id, video_id)