yt_dlp/extractor/atvat.py

   1 import datetime
   2
   3 from .common import InfoExtractor
   4 from ..utils import (
   5     float_or_none,
   6     jwt_encode_hs256,
   7     try_get,
   8     ExtractorError,
   9 )
  10
  11
  12 class ATVAtIE(InfoExtractor):
  13     _VALID_URL = r'https?://(?:www\.)?atv\.at/tv/(?:[^/]+/){2,3}(?P<id>.*)'
  14
  15     _TESTS = [{
  16         'url': 'https://www.atv.at/tv/bauer-sucht-frau/staffel-18/bauer-sucht-frau/bauer-sucht-frau-staffel-18-folge-3-die-hofwochen',
  17         'md5': '3c3b4aaca9f63e32b35e04a9c2515903',
  18         'info_dict': {
  19             'id': 'v-ce9cgn1e70n5-1',
  20             'ext': 'mp4',
  21             'title': 'Bauer sucht Frau - Staffel 18 Folge 3 - Die Hofwochen',
  22         }
  23     }, {
  24         'url': 'https://www.atv.at/tv/bauer-sucht-frau/staffel-18/episode-01/bauer-sucht-frau-staffel-18-vorstellungsfolge-1',
  25         'only_matching': True,
  26     }]
  27
  28     # extracted from bootstrap.js function (search for e.encryption_key and use your browser's debugger)
  29     _ACCESS_ID = 'x_atv'
  30     _ENCRYPTION_KEY = 'Hohnaekeishoogh2omaeghooquooshia'
  31
  32     def _extract_video_info(self, url, content, video):
  33         clip_id = content.get('splitId', content['id'])
  34         formats = []
  35         clip_urls = video['urls']
  36         for protocol, variant in clip_urls.items():
  37             source_url = try_get(variant, lambda x: x['clear']['url'])
  38             if not source_url:
  39                 continue
  40             if protocol == 'dash':
  41                 formats.extend(self._extract_mpd_formats(
  42                     source_url, clip_id, mpd_id=protocol, fatal=False))
  43             elif protocol == 'hls':
  44                 formats.extend(self._extract_m3u8_formats(
  45                     source_url, clip_id, 'mp4', 'm3u8_native',
  46                     m3u8_id=protocol, fatal=False))
  47             else:
  48                 formats.append({
  49                     'url': source_url,
  50                     'format_id': protocol,
  51                 })
  52         self._sort_formats(formats)
  53
  54         return {
  55             'id': clip_id,
  56             'title': content.get('title'),
  57             'duration': float_or_none(content.get('duration')),
  58             'series': content.get('tvShowTitle'),
  59             'formats': formats,
  60         }
  61
  62     def _real_extract(self, url):
  63         video_id = self._match_id(url)
  64         webpage = self._download_webpage(url, video_id)
  65         json_data = self._parse_json(
  66             self._search_regex(r'<script id="state" type="text/plain">(.*)</script>', webpage, 'json_data'),
  67             video_id=video_id)
  68
  69         video_title = json_data['views']['default']['page']['title']
  70         contentResource = json_data['views']['default']['page']['contentResource']
  71         content_id = contentResource[0]['id']
  72         content_ids = [{'id': id, 'subclip_start': content['start'], 'subclip_end': content['end']}
  73                        for id, content in enumerate(contentResource)]
  74
  75         time_of_request = datetime.datetime.now()
  76         not_before = time_of_request - datetime.timedelta(minutes=5)
  77         expire = time_of_request + datetime.timedelta(minutes=5)
  78         payload = {
  79             'content_ids': {
  80                 content_id: content_ids,
  81             },
  82             'secure_delivery': True,
  83             'iat': int(time_of_request.timestamp()),
  84             'nbf': int(not_before.timestamp()),
  85             'exp': int(expire.timestamp()),
  86         }
  87         jwt_token = jwt_encode_hs256(payload, self._ENCRYPTION_KEY, headers={'kid': self._ACCESS_ID})
  88         videos = self._download_json(
  89             'https://vas-v4.p7s1video.net/4.0/getsources',
  90             content_id, 'Downloading videos JSON', query={
  91                 'token': jwt_token.decode('utf-8')
  92             })
  93
  94         video_id, videos_data = list(videos['data'].items())[0]
  95         error_msg = try_get(videos_data, lambda x: x['error']['title'])
  96         if error_msg == 'Geo check failed':
  97             self.raise_geo_restricted(error_msg)
  98         elif error_msg:
  99             raise ExtractorError(error_msg)
 100         entries = [
 101             self._extract_video_info(url, contentResource[video['id']], video)
 102             for video in videos_data]
 103
 104         return {
 105             '_type': 'multi_video',
 106             'id': video_id,
 107             'title': video_title,
 108             'entries': entries,
 109         }