]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/xstream.py
[misc] Add `hatch`, `ruff`, `pre-commit` and improve dev docs (#7409)
[yt-dlp.git] / yt_dlp / extractor / xstream.py
CommitLineData
2c0c9dc4
S
1import re
2
3from .common import InfoExtractor
4from ..utils import (
e897bd82 5 find_xpath_attr,
2c0c9dc4
S
6 int_or_none,
7 parse_iso8601,
2c0c9dc4 8 xpath_text,
e897bd82 9 xpath_with_ns,
2c0c9dc4
S
10)
11
12
13class XstreamIE(InfoExtractor):
14 _VALID_URL = r'''(?x)
15 (?:
16 xstream:|
17 https?://frontend\.xstream\.(?:dk|net)/
18 )
19 (?P<partner_id>[^/]+)
20 (?:
21 :|
22 /feed/video/\?.*?\bid=
23 )
24 (?P<id>\d+)
25 '''
26 _TESTS = [{
27 'url': 'http://frontend.xstream.dk/btno/feed/video/?platform=web&id=86588',
28 'md5': 'd7d17e3337dc80de6d3a540aefbe441b',
29 'info_dict': {
30 'id': '86588',
31 'ext': 'mov',
32 'title': 'Otto Wollertsen',
33 'description': 'Vestlendingen Otto Fredrik Wollertsen',
34 'timestamp': 1430473209,
35 'upload_date': '20150501',
36 },
37 }, {
38 'url': 'http://frontend.xstream.dk/ap/feed/video/?platform=web&id=21039',
39 'only_matching': True,
40 }]
41
d50116b8 42 def _extract_video_info(self, partner_id, video_id):
2c0c9dc4
S
43 data = self._download_xml(
44 'http://frontend.xstream.dk/%s/feed/video/?platform=web&id=%s'
45 % (partner_id, video_id),
46 video_id)
47
48 NS_MAP = {
49 'atom': 'http://www.w3.org/2005/Atom',
50 'xt': 'http://xstream.dk/',
51 'media': 'http://search.yahoo.com/mrss/',
52 }
53
54 entry = data.find(xpath_with_ns('./atom:entry', NS_MAP))
55
56 title = xpath_text(
57 entry, xpath_with_ns('./atom:title', NS_MAP), 'title')
58 description = xpath_text(
59 entry, xpath_with_ns('./atom:summary', NS_MAP), 'description')
60 timestamp = parse_iso8601(xpath_text(
61 entry, xpath_with_ns('./atom:published', NS_MAP), 'upload date'))
62
63 formats = []
64 media_group = entry.find(xpath_with_ns('./media:group', NS_MAP))
65 for media_content in media_group.findall(xpath_with_ns('./media:content', NS_MAP)):
66 media_url = media_content.get('url')
67 if not media_url:
68 continue
69 tbr = int_or_none(media_content.get('bitrate'))
70 mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', media_url)
71 if mobj:
72 formats.append({
73 'url': mobj.group('url'),
74 'play_path': 'mp4:%s' % mobj.group('playpath'),
75 'app': mobj.group('app'),
76 'ext': 'flv',
77 'tbr': tbr,
78 'format_id': 'rtmp-%d' % tbr,
79 })
80 else:
81 formats.append({
82 'url': media_url,
83 'tbr': tbr,
84 })
2c0c9dc4
S
85
86 link = find_xpath_attr(
87 entry, xpath_with_ns('./atom:link', NS_MAP), 'rel', 'original')
88 if link is not None:
89 formats.append({
90 'url': link.get('href'),
91 'format_id': link.get('rel'),
f983b875 92 'quality': 1,
2c0c9dc4
S
93 })
94
95 thumbnails = [{
96 'url': splash.get('url'),
97 'width': int_or_none(splash.get('width')),
98 'height': int_or_none(splash.get('height')),
99 } for splash in media_group.findall(xpath_with_ns('./xt:splash', NS_MAP))]
100
101 return {
102 'id': video_id,
103 'title': title,
104 'description': description,
105 'timestamp': timestamp,
106 'formats': formats,
107 'thumbnails': thumbnails,
108 }
d50116b8 109
110 def _real_extract(self, url):
5ad28e7f 111 mobj = self._match_valid_url(url)
d50116b8 112 partner_id = mobj.group('partner_id')
113 video_id = mobj.group('id')
114
115 return self._extract_video_info(partner_id, video_id)