]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/atvat.py
[youtube:comments] Add more options for limiting number of comments extracted (#1626)
[yt-dlp.git] / yt_dlp / extractor / atvat.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import datetime
5
6 from .common import InfoExtractor
7 from ..utils import (
8 float_or_none,
9 jwt_encode_hs256,
10 try_get,
11 )
12
13
14 class ATVAtIE(InfoExtractor):
15 _VALID_URL = r'https?://(?:www\.)?atv\.at/tv/(?:[^/]+/){2,3}(?P<id>.*)'
16
17 _TESTS = [{
18 'url': 'https://www.atv.at/tv/bauer-sucht-frau/staffel-18/bauer-sucht-frau/bauer-sucht-frau-staffel-18-folge-3-die-hofwochen',
19 'md5': '3c3b4aaca9f63e32b35e04a9c2515903',
20 'info_dict': {
21 'id': 'v-ce9cgn1e70n5-1',
22 'ext': 'mp4',
23 'title': 'Bauer sucht Frau - Staffel 18 Folge 3 - Die Hofwochen',
24 }
25 }, {
26 'url': 'https://www.atv.at/tv/bauer-sucht-frau/staffel-18/episode-01/bauer-sucht-frau-staffel-18-vorstellungsfolge-1',
27 'only_matching': True,
28 }]
29
30 # extracted from bootstrap.js function (search for e.encryption_key and use your browser's debugger)
31 _ACCESS_ID = 'x_atv'
32 _ENCRYPTION_KEY = 'Hohnaekeishoogh2omaeghooquooshia'
33
34 def _extract_video_info(self, url, content, video):
35 clip_id = content.get('splitId', content['id'])
36 formats = []
37 clip_urls = video['urls']
38 for protocol, variant in clip_urls.items():
39 source_url = try_get(variant, lambda x: x['clear']['url'])
40 if not source_url:
41 continue
42 if protocol == 'dash':
43 formats.extend(self._extract_mpd_formats(
44 source_url, clip_id, mpd_id=protocol, fatal=False))
45 elif protocol == 'hls':
46 formats.extend(self._extract_m3u8_formats(
47 source_url, clip_id, 'mp4', 'm3u8_native',
48 m3u8_id=protocol, fatal=False))
49 else:
50 formats.append({
51 'url': source_url,
52 'format_id': protocol,
53 })
54 self._sort_formats(formats)
55
56 return {
57 'id': clip_id,
58 'title': content.get('title'),
59 'duration': float_or_none(content.get('duration')),
60 'series': content.get('tvShowTitle'),
61 'formats': formats,
62 }
63
64 def _real_extract(self, url):
65 video_id = self._match_id(url)
66 webpage = self._download_webpage(url, video_id)
67 json_data = self._parse_json(
68 self._search_regex(r'<script id="state" type="text/plain">(.*)</script>', webpage, 'json_data'),
69 video_id=video_id)
70
71 video_title = json_data['views']['default']['page']['title']
72 contentResource = json_data['views']['default']['page']['contentResource']
73 content_id = contentResource[0]['id']
74 content_ids = [{'id': id, 'subclip_start': content['start'], 'subclip_end': content['end']}
75 for id, content in enumerate(contentResource)]
76
77 time_of_request = datetime.datetime.now()
78 not_before = time_of_request - datetime.timedelta(minutes=5)
79 expire = time_of_request + datetime.timedelta(minutes=5)
80 payload = {
81 'content_ids': {
82 content_id: content_ids,
83 },
84 'secure_delivery': True,
85 'iat': int(time_of_request.timestamp()),
86 'nbf': int(not_before.timestamp()),
87 'exp': int(expire.timestamp()),
88 }
89 jwt_token = jwt_encode_hs256(payload, self._ENCRYPTION_KEY, headers={'kid': self._ACCESS_ID})
90 videos = self._download_json(
91 'https://vas-v4.p7s1video.net/4.0/getsources',
92 content_id, 'Downloading videos JSON', query={
93 'token': jwt_token.decode('utf-8')
94 })
95
96 video_id, videos_data = list(videos['data'].items())[0]
97 entries = [
98 self._extract_video_info(url, contentResource[video['id']], video)
99 for video in videos_data]
100
101 return {
102 '_type': 'multi_video',
103 'id': video_id,
104 'title': video_title,
105 'entries': entries,
106 }