[atv.at] Use jwt for API (#1012)

author NeroBurner <redacted>

Thu, 23 Sep 2021 17:40:51 +0000 (19:40 +0200)

committer GitHub <redacted>

Thu, 23 Sep 2021 17:40:51 +0000 (23:10 +0530)
author NeroBurner <redacted>
Thu, 23 Sep 2021 17:40:51 +0000 (19:40 +0200)
committer GitHub <redacted>
Thu, 23 Sep 2021 17:40:51 +0000 (23:10 +0530)
diff --git a/yt_dlp/extractor/atvat.py b/yt_dlp/extractor/atvat.py

index bfcf88f1afa02b72686917c8f1a53ae7ee4fa000..7c30cfcbb9cee580a75a1e8286e89f6e5a9a21be 100644 (file)
--- a/yt_dlp/extractor/atvat.py
+++ b/yt_dlp/extractor/atvat.py
@@ -1,74 +1,106 @@
  # coding: utf-8
  from __future__ import unicode_literals
  
+import datetime
+
  from .common import InfoExtractor
  from ..utils import (
-    determine_ext,
-    dict_get,
-    int_or_none,
-    unescapeHTML,
+    float_or_none,
+    jwt_encode_hs256,
+    try_get,
  )
  
  
  class ATVAtIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?atv\.at/(?:[^/]+/){2}(?P<id>[dv]\d+)'
+    _VALID_URL = r'https?://(?:www\.)?atv\.at/tv/(?:[^/]+/){2,3}(?P<id>.*)'
+
      _TESTS = [{
-        'url': 'https://www.atv.at/bauer-sucht-frau-die-zweite-chance/folge-1/d3390693/',
-        'md5': 'c471605591009dfb6e6c54f7e62e2807',
+        'url': 'https://www.atv.at/tv/bauer-sucht-frau/staffel-18/bauer-sucht-frau/bauer-sucht-frau-staffel-18-folge-3-die-hofwochen',
+        'md5': '3c3b4aaca9f63e32b35e04a9c2515903',
          'info_dict': {
-            'id': '3390684',
+            'id': 'v-ce9cgn1e70n5-1',
              'ext': 'mp4',
-            'title': 'Bauer sucht Frau - Die zweite Chance Folge 1',
+            'title': 'Bauer sucht Frau - Staffel 18 Folge 3 - Die Hofwochen',
          }
      }, {
-        'url': 'https://www.atv.at/bauer-sucht-frau-staffel-17/fuenfte-eventfolge/d3339537/',
+        'url': 'https://www.atv.at/tv/bauer-sucht-frau/staffel-18/episode-01/bauer-sucht-frau-staffel-18-vorstellungsfolge-1',
          'only_matching': True,
      }]
  
-    def _process_source_entry(self, source, part_id):
-        source_url = source.get('url')
-        if not source_url:
-            return
-        if determine_ext(source_url) == 'm3u8':
-            return self._extract_m3u8_formats(
-                source_url, part_id, 'mp4', 'm3u8_native',
-                m3u8_id='hls', fatal=False)
-        else:
-            return [{
-                'url': source_url,
-            }]
+    # extracted from bootstrap.js function (search for e.encryption_key and use your browser's debugger)
+    _ACCESS_ID = 'x_atv'
+    _ENCRYPTION_KEY = 'Hohnaekeishoogh2omaeghooquooshia'
  
-    def _process_entry(self, entry):
-        part_id = entry.get('id')
-        if not part_id:
-            return
+    def _extract_video_info(self, url, content, video):
+        clip_id = content.get('splitId', content['id'])
          formats = []
-        for source in entry.get('sources', []):
-            formats.extend(self._process_source_entry(source, part_id) or [])
-
+        clip_urls = video['urls']
+        for protocol, variant in clip_urls.items():
+            source_url = try_get(variant, lambda x: x['clear']['url'])
+            if not source_url:
+                continue
+            if protocol == 'dash':
+                formats.extend(self._extract_mpd_formats(
+                    source_url, clip_id, mpd_id=protocol, fatal=False))
+            elif protocol == 'hls':
+                formats.extend(self._extract_m3u8_formats(
+                    source_url, clip_id, 'mp4', 'm3u8_native',
+                    m3u8_id=protocol, fatal=False))
+            else:
+                formats.append({
+                    'url': source_url,
+                    'format_id': protocol,
+                })
          self._sort_formats(formats)
+
          return {
-            'id': part_id,
-            'title': entry.get('title'),
-            'duration': int_or_none(entry.get('duration')),
-            'formats': formats
+            'id': clip_id,
+            'title': content.get('title'),
+            'duration': float_or_none(content.get('duration')),
+            'series': content.get('tvShowTitle'),
+            'formats': formats,
          }
  
      def _real_extract(self, url):
-        display_id = self._match_id(url)
-        webpage = self._download_webpage(url, display_id)
-        video_data = self._parse_json(unescapeHTML(self._search_regex(
-            r'var\splaylist\s*=\s*(?P<json>\[.*\]);',
-            webpage, 'player data', group='json')),
-            display_id)
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        json_data = self._parse_json(
+            self._search_regex(r'<script id="state" type="text/plain">(.*)</script>', webpage, 'json_data'),
+            video_id=video_id)
+
+        video_title = json_data['views']['default']['page']['title']
+        contentResource = json_data['views']['default']['page']['contentResource']
+        content_id = contentResource[0]['id']
+        content_ids = [{'id': id, 'subclip_start': content['start'], 'subclip_end': content['end']}
+                       for id, content in enumerate(contentResource)]
+
+        time_of_request = datetime.datetime.now()
+        not_before = time_of_request - datetime.timedelta(minutes=5)
+        expire = time_of_request + datetime.timedelta(minutes=5)
+        payload = {
+            'content_ids': {
+                content_id: content_ids,
+            },
+            'secure_delivery': True,
+            'iat': int(time_of_request.timestamp()),
+            'nbf': int(not_before.timestamp()),
+            'exp': int(expire.timestamp()),
+        }
+        jwt_token = jwt_encode_hs256(payload, self._ENCRYPTION_KEY, headers={'kid': self._ACCESS_ID})
+        videos = self._download_json(
+            'https://vas-v4.p7s1video.net/4.0/getsources',
+            content_id, 'Downloading videos JSON', query={
+                'token': jwt_token.decode('utf-8')
+            })
  
-        first_video = video_data[0]
-        video_id = first_video['id']
-        video_title = dict_get(first_video, ('tvShowTitle', 'title'))
+        video_id, videos_data = list(videos['data'].items())[0]
+        entries = [
+            self._extract_video_info(url, contentResource[video['id']], video)
+            for video in videos_data]
  
          return {
              '_type': 'multi_video',
              'id': video_id,
              'title': video_title,
-            'entries': (self._process_entry(entry) for entry in video_data),
+            'entries': entries,
          }
diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py

index 9eb47fccb17ec6283241d4979c89849cdb1021d3..141d2c9ccd7dde8463062e8e102468073b8357de 100644 (file)
--- a/yt_dlp/utils.py
+++ b/yt_dlp/utils.py
@@ -16,6 +16,8 @@
  import errno
  import functools
  import gzip
+import hashlib
+import hmac
  import imp
  import io
  import itertools
@@ -3290,6 +3292,14 @@ def platform_name():
      return res
  
  
+def get_windows_version():
+    ''' Get Windows version. None if it's not running on Windows '''
+    if compat_os_name == 'nt':
+        return version_tuple(platform.win32_ver()[1])
+    else:
+        return None
+
+
  def _windows_write_string(s, out):
      """ Returns True if the string was written using special methods,
      False if it has yet to be written out."""
@@ -6375,9 +6385,20 @@ def variadic(x, allowed_types=(str, bytes)):
      return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
  
  
-def get_windows_version():
-    ''' Get Windows version. None if it's not running on Windows '''
-    if compat_os_name == 'nt':
-        return version_tuple(platform.win32_ver()[1])
-    else:
-        return None
+# create a JSON Web Signature (jws) with HS256 algorithm
+# the resulting format is in JWS Compact Serialization
+# implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
+# implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
+def jwt_encode_hs256(payload_data, key, headers={}):
+    header_data = {
+        'alg': 'HS256',
+        'typ': 'JWT',
+    }
+    if headers:
+        header_data.update(headers)
+    header_b64 = base64.b64encode(json.dumps(header_data).encode('utf-8'))
+    payload_b64 = base64.b64encode(json.dumps(payload_data).encode('utf-8'))
+    h = hmac.new(key.encode('utf-8'), header_b64 + b'.' + payload_b64, hashlib.sha256)
+    signature_b64 = base64.b64encode(h.digest())
+    token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64
+    return token
author	NeroBurner <redacted>
	Thu, 23 Sep 2021 17:40:51 +0000 (19:40 +0200)
committer	GitHub <redacted>
	Thu, 23 Sep 2021 17:40:51 +0000 (23:10 +0530)
yt_dlp/extractor/atvat.py		patch \| blob \| blame \| history
yt_dlp/utils.py		patch \| blob \| blame \| history