yt_dlp/extractor/appleconnect.py

   1 from .common import InfoExtractor
   2 from ..utils import ExtractorError, str_to_int
   3
   4
   5 class AppleConnectIE(InfoExtractor):
   6     _VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/(?:id)?sa\.(?P<id>[\w-]+)'
   7     _TESTS = [{
   8         'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
   9         'md5': 'c1d41f72c8bcaf222e089434619316e4',
  10         'info_dict': {
  11             'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
  12             'ext': 'm4v',
  13             'title': 'Energy',
  14             'uploader': 'Drake',
  15             'thumbnail': r're:^https?://.*\.jpg$',
  16             'upload_date': '20150710',
  17             'timestamp': 1436545535,
  18         },
  19     }, {
  20         'url': 'https://itunes.apple.com/us/post/sa.0fe0229f-2457-11e5-9f40-1bb645f2d5d9',
  21         'only_matching': True,
  22     }]
  23
  24     def _real_extract(self, url):
  25         video_id = self._match_id(url)
  26         webpage = self._download_webpage(url, video_id)
  27
  28         try:
  29             video_json = self._html_search_regex(
  30                 r'class="auc-video-data">(\{.*?\})', webpage, 'json')
  31         except ExtractorError:
  32             raise ExtractorError('This post doesn\'t contain a video', expected=True)
  33
  34         video_data = self._parse_json(video_json, video_id)
  35         timestamp = str_to_int(self._html_search_regex(r'data-timestamp="(\d+)"', webpage, 'timestamp'))
  36         like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count', default=None))
  37
  38         return {
  39             'id': video_id,
  40             'url': video_data['sslSrc'],
  41             'title': video_data['title'],
  42             'description': video_data['description'],
  43             'uploader': video_data['artistName'],
  44             'thumbnail': video_data['artworkUrl'],
  45             'timestamp': timestamp,
  46             'like_count': like_count,
  47         }