yt_dlp/extractor/kanal2.py

   1 from .common import InfoExtractor
   2 from ..utils import (
   3     ExtractorError,
   4     join_nonempty,
   5     traverse_obj,
   6     unified_timestamp,
   7     update_url_query,
   8 )
   9
  10
  11 class Kanal2IE(InfoExtractor):
  12     _VALID_URL = r'https?://kanal2\.postimees\.ee/[^?#]+\?([^#]+&)?id=(?P<id>\d+)'
  13     _TESTS = [{
  14         'note': 'Test standard url (#5575)',
  15         'url': 'https://kanal2.postimees.ee/pluss/video/?id=40792',
  16         'md5': '7ea7b16266ec1798743777df241883dd',
  17         'info_dict': {
  18             'id': '40792',
  19             'ext': 'mp4',
  20             'title': 'Aedniku aabits / Osa 53  (05.08.2016 20:00)',
  21             'thumbnail': r're:https?://.*\.jpg$',
  22             'description': 'md5:53cabf3c5d73150d594747f727431248',
  23             'upload_date': '20160805',
  24             'timestamp': 1470420000,
  25         },
  26     }]
  27
  28     def _real_extract(self, url):
  29         video_id = self._match_id(url)
  30         playlist = self._download_json(
  31             f'https://kanal2.postimees.ee/player/playlist/{video_id}',
  32             video_id, query={'type': 'episodes'},
  33             headers={'X-Requested-With': 'XMLHttpRequest'})
  34
  35         return {
  36             'id': video_id,
  37             'title': join_nonempty(*traverse_obj(playlist, ('info', ('title', 'subtitle'))), delim=' / '),
  38             'description': traverse_obj(playlist, ('info', 'description')),
  39             'thumbnail': traverse_obj(playlist, ('data', 'image')),
  40             'formats': self.get_formats(playlist, video_id),
  41             'timestamp': unified_timestamp(self._search_regex(
  42                 r'\((\d{2}\.\d{2}\.\d{4}\s\d{2}:\d{2})\)$',
  43                 traverse_obj(playlist, ('info', 'subtitle')), 'timestamp', default='') + ' +0200'),
  44         }
  45
  46     def get_formats(self, playlist, video_id):
  47         path = traverse_obj(playlist, ('data', 'path'))
  48         if not path:
  49             raise ExtractorError('Path value not found in playlist JSON response')
  50         session = self._download_json(
  51             'https://sts.postimees.ee/session/register',
  52             video_id, note='Creating session', errnote='Error creating session',
  53             headers={
  54                 'X-Original-URI': path,
  55                 'Accept': 'application/json',
  56             })
  57         if session.get('reason') != 'OK' or not session.get('session'):
  58             reason = session.get('reason', 'unknown error')
  59             raise ExtractorError(f'Unable to obtain session: {reason}')
  60
  61         formats = []
  62         for stream in traverse_obj(playlist, ('data', 'streams', ..., 'file')):
  63             formats.extend(self._extract_m3u8_formats(
  64                 update_url_query(stream, {'s': session['session']}), video_id, 'mp4'))
  65
  66         return formats