yt_dlp/extractor/telegraaf.py

   1 from .common import InfoExtractor
   2 from ..utils import (
   3     determine_ext,
   4     int_or_none,
   5     parse_iso8601,
   6     try_get,
   7 )
   8
   9
  10 class TelegraafIE(InfoExtractor):
  11     _VALID_URL = r'https?://(?:www\.)?telegraaf\.nl/video/(?P<id>\d+)'
  12     _TEST = {
  13         'url': 'https://www.telegraaf.nl/video/734366489/historisch-scheepswrak-slaat-na-100-jaar-los',
  14         'info_dict': {
  15             'id': 'gaMItuoSeUg2',
  16             'ext': 'mp4',
  17             'title': 'Historisch scheepswrak slaat na 100 jaar los',
  18             'description': 'md5:6f53b7c4f55596722ac24d6c0ec00cfb',
  19             'thumbnail': r're:^https?://.*\.jpg',
  20             'duration': 55,
  21             'timestamp': 1572805527,
  22             'upload_date': '20191103',
  23         },
  24         'params': {
  25             # m3u8 download
  26             'skip_download': True,
  27         },
  28     }
  29
  30     def _real_extract(self, url):
  31         article_id = self._match_id(url)
  32
  33         video_id = self._download_json(
  34             'https://app.telegraaf.nl/graphql', article_id,
  35             headers={'User-Agent': 'De Telegraaf/6.8.11 (Android 11; en_US)'},
  36             query={
  37                 'query': '''{
  38   article(uid: %s) {
  39     videos {
  40       videoId
  41     }
  42   }
  43 }''' % article_id,  # noqa: UP031
  44             })['data']['article']['videos'][0]['videoId']
  45
  46         item = self._download_json(
  47             f'https://content.tmgvideo.nl/playlist/item={video_id}/playlist.json',
  48             video_id)['items'][0]
  49         title = item['title']
  50
  51         formats = []
  52         locations = item.get('locations') or {}
  53         for location in locations.get('adaptive', []):
  54             manifest_url = location.get('src')
  55             if not manifest_url:
  56                 continue
  57             ext = determine_ext(manifest_url)
  58             if ext == 'm3u8':
  59                 formats.extend(self._extract_m3u8_formats(
  60                     manifest_url, video_id, ext='mp4', m3u8_id='hls', fatal=False))
  61             elif ext == 'mpd':
  62                 formats.extend(self._extract_mpd_formats(
  63                     manifest_url, video_id, mpd_id='dash', fatal=False))
  64             else:
  65                 self.report_warning(f'Unknown adaptive format {ext}')
  66         for location in locations.get('progressive', []):
  67             src = try_get(location, lambda x: x['sources'][0]['src'])
  68             if not src:
  69                 continue
  70             label = location.get('label')
  71             formats.append({
  72                 'url': src,
  73                 'width': int_or_none(location.get('width')),
  74                 'height': int_or_none(location.get('height')),
  75                 'format_id': 'http' + (f'-{label}' if label else ''),
  76             })
  77
  78         return {
  79             'id': video_id,
  80             'title': title,
  81             'description': item.get('description'),
  82             'formats': formats,
  83             'duration': int_or_none(item.get('duration')),
  84             'thumbnail': item.get('poster'),
  85             'timestamp': parse_iso8601(item.get('datecreated'), ' '),
  86         }