]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/atvat.py
[misc] Add `hatch`, `ruff`, `pre-commit` and improve dev docs (#7409)
[yt-dlp.git] / yt_dlp / extractor / atvat.py
1 import datetime as dt
2
3 from .common import InfoExtractor
4 from ..utils import (
5 ExtractorError,
6 float_or_none,
7 jwt_encode_hs256,
8 try_get,
9 )
10
11
12 class ATVAtIE(InfoExtractor):
13 _VALID_URL = r'https?://(?:www\.)?atv\.at/tv/(?:[^/]+/){2,3}(?P<id>.*)'
14
15 _TESTS = [{
16 'url': 'https://www.atv.at/tv/bauer-sucht-frau/staffel-18/bauer-sucht-frau/bauer-sucht-frau-staffel-18-folge-3-die-hofwochen',
17 'md5': '3c3b4aaca9f63e32b35e04a9c2515903',
18 'info_dict': {
19 'id': 'v-ce9cgn1e70n5-1',
20 'ext': 'mp4',
21 'title': 'Bauer sucht Frau - Staffel 18 Folge 3 - Die Hofwochen',
22 }
23 }, {
24 'url': 'https://www.atv.at/tv/bauer-sucht-frau/staffel-18/episode-01/bauer-sucht-frau-staffel-18-vorstellungsfolge-1',
25 'only_matching': True,
26 }]
27
28 # extracted from bootstrap.js function (search for e.encryption_key and use your browser's debugger)
29 _ACCESS_ID = 'x_atv'
30 _ENCRYPTION_KEY = 'Hohnaekeishoogh2omaeghooquooshia'
31
32 def _extract_video_info(self, url, content, video):
33 clip_id = content.get('splitId', content['id'])
34 formats = []
35 clip_urls = video['urls']
36 for protocol, variant in clip_urls.items():
37 source_url = try_get(variant, lambda x: x['clear']['url'])
38 if not source_url:
39 continue
40 if protocol == 'dash':
41 formats.extend(self._extract_mpd_formats(
42 source_url, clip_id, mpd_id=protocol, fatal=False))
43 elif protocol == 'hls':
44 formats.extend(self._extract_m3u8_formats(
45 source_url, clip_id, 'mp4', 'm3u8_native',
46 m3u8_id=protocol, fatal=False))
47 else:
48 formats.append({
49 'url': source_url,
50 'format_id': protocol,
51 })
52
53 return {
54 'id': clip_id,
55 'title': content.get('title'),
56 'duration': float_or_none(content.get('duration')),
57 'series': content.get('tvShowTitle'),
58 'formats': formats,
59 }
60
61 def _real_extract(self, url):
62 video_id = self._match_id(url)
63 webpage = self._download_webpage(url, video_id)
64 json_data = self._parse_json(
65 self._search_regex(r'<script id="state" type="text/plain">(.*)</script>', webpage, 'json_data'),
66 video_id=video_id)
67
68 video_title = json_data['views']['default']['page']['title']
69 contentResource = json_data['views']['default']['page']['contentResource']
70 content_id = contentResource[0]['id']
71 content_ids = [{'id': id, 'subclip_start': content['start'], 'subclip_end': content['end']}
72 for id, content in enumerate(contentResource)]
73
74 time_of_request = dt.datetime.now()
75 not_before = time_of_request - dt.timedelta(minutes=5)
76 expire = time_of_request + dt.timedelta(minutes=5)
77 payload = {
78 'content_ids': {
79 content_id: content_ids,
80 },
81 'secure_delivery': True,
82 'iat': int(time_of_request.timestamp()),
83 'nbf': int(not_before.timestamp()),
84 'exp': int(expire.timestamp()),
85 }
86 jwt_token = jwt_encode_hs256(payload, self._ENCRYPTION_KEY, headers={'kid': self._ACCESS_ID})
87 videos = self._download_json(
88 'https://vas-v4.p7s1video.net/4.0/getsources',
89 content_id, 'Downloading videos JSON', query={
90 'token': jwt_token.decode('utf-8')
91 })
92
93 video_id, videos_data = list(videos['data'].items())[0]
94 error_msg = try_get(videos_data, lambda x: x['error']['title'])
95 if error_msg == 'Geo check failed':
96 self.raise_geo_restricted(error_msg)
97 elif error_msg:
98 raise ExtractorError(error_msg)
99 entries = [
100 self._extract_video_info(url, contentResource[video['id']], video)
101 for video in videos_data]
102
103 return {
104 '_type': 'multi_video',
105 'id': video_id,
106 'title': video_title,
107 'entries': entries,
108 }