yt_dlp/extractor/aenetworks.py

   1 from .theplatform import ThePlatformIE
   2 from ..utils import (
   3     ExtractorError,
   4     GeoRestrictedError,
   5     int_or_none,
   6     remove_start,
   7     traverse_obj,
   8     update_url_query,
   9     urlencode_postdata,
  10 )
  11
  12
  13 class AENetworksBaseIE(ThePlatformIE):  # XXX: Do not subclass from concrete IE
  14     _BASE_URL_REGEX = r'''(?x)https?://
  15         (?:(?:www|play|watch)\.)?
  16         (?P<domain>
  17             (?:history(?:vault)?|aetv|mylifetime|lifetimemovieclub)\.com|
  18             fyi\.tv
  19         )/'''
  20     _THEPLATFORM_KEY = '43jXaGRQud'
  21     _THEPLATFORM_SECRET = 'S10BPXHMlb'
  22     _DOMAIN_MAP = {
  23         'history.com': ('HISTORY', 'history'),
  24         'aetv.com': ('AETV', 'aetv'),
  25         'mylifetime.com': ('LIFETIME', 'lifetime'),
  26         'lifetimemovieclub.com': ('LIFETIMEMOVIECLUB', 'lmc'),
  27         'fyi.tv': ('FYI', 'fyi'),
  28         'historyvault.com': (None, 'historyvault'),
  29         'biography.com': (None, 'biography'),
  30     }
  31
  32     def _extract_aen_smil(self, smil_url, video_id, auth=None):
  33         query = {
  34             'mbr': 'true',
  35             'formats': 'M3U+none,MPEG-DASH+none,MPEG4,MP3',
  36         }
  37         if auth:
  38             query['auth'] = auth
  39         TP_SMIL_QUERY = [{
  40             'assetTypes': 'high_video_ak',
  41             'switch': 'hls_high_ak',
  42         }, {
  43             'assetTypes': 'high_video_s3',
  44         }, {
  45             'assetTypes': 'high_video_s3',
  46             'switch': 'hls_high_fastly',
  47         }]
  48         formats = []
  49         subtitles = {}
  50         last_e = None
  51         for q in TP_SMIL_QUERY:
  52             q.update(query)
  53             m_url = update_url_query(smil_url, q)
  54             m_url = self._sign_url(m_url, self._THEPLATFORM_KEY, self._THEPLATFORM_SECRET)
  55             try:
  56                 tp_formats, tp_subtitles = self._extract_theplatform_smil(
  57                     m_url, video_id, 'Downloading %s SMIL data' % (q.get('switch') or q['assetTypes']))
  58             except ExtractorError as e:
  59                 if isinstance(e, GeoRestrictedError):
  60                     raise
  61                 last_e = e
  62                 continue
  63             formats.extend(tp_formats)
  64             subtitles = self._merge_subtitles(subtitles, tp_subtitles)
  65         if last_e and not formats:
  66             raise last_e
  67         return {
  68             'id': video_id,
  69             'formats': formats,
  70             'subtitles': subtitles,
  71         }
  72
  73     def _extract_aetn_info(self, domain, filter_key, filter_value, url):
  74         requestor_id, brand = self._DOMAIN_MAP[domain]
  75         result = self._download_json(
  76             f'https://feeds.video.aetnd.com/api/v2/{brand}/videos',
  77             filter_value, query={f'filter[{filter_key}]': filter_value})
  78         result = traverse_obj(
  79             result, ('results',
  80                      lambda k, v: k == 0 and v[filter_key] == filter_value),
  81             get_all=False)
  82         if not result:
  83             raise ExtractorError('Show not found in A&E feed (too new?)', expected=True,
  84                                  video_id=remove_start(filter_value, '/'))
  85         title = result['title']
  86         video_id = result['id']
  87         media_url = result['publicUrl']
  88         theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
  89             r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
  90         info = self._parse_theplatform_metadata(theplatform_metadata)
  91         auth = None
  92         if theplatform_metadata.get('AETN$isBehindWall'):
  93             resource = self._get_mvpd_resource(
  94                 requestor_id, theplatform_metadata['title'],
  95                 theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'),
  96                 traverse_obj(theplatform_metadata, ('ratings', 0, 'rating')))
  97             auth = self._extract_mvpd_auth(
  98                 url, video_id, requestor_id, resource)
  99         info.update(self._extract_aen_smil(media_url, video_id, auth))
 100         info.update({
 101             'title': title,
 102             'series': result.get('seriesName'),
 103             'season_number': int_or_none(result.get('tvSeasonNumber')),
 104             'episode_number': int_or_none(result.get('tvSeasonEpisodeNumber')),
 105         })
 106         return info
 107
 108
 109 class AENetworksIE(AENetworksBaseIE):
 110     IE_NAME = 'aenetworks'
 111     IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault'
 112     _VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'''(?P<id>
 113         shows/[^/]+/season-\d+/episode-\d+|
 114         (?:
 115             (?:movie|special)s/[^/]+|
 116             (?:shows/[^/]+/)?videos
 117         )/[^/?#&]+
 118     )'''
 119     _TESTS = [{
 120         'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
 121         'info_dict': {
 122             'id': '22253814',
 123             'ext': 'mp4',
 124             'title': 'Winter Is Coming',
 125             'description': 'md5:a40e370925074260b1c8a633c632c63a',
 126             'timestamp': 1338306241,
 127             'upload_date': '20120529',
 128             'uploader': 'AENE-NEW',
 129             'duration': 2592.0,
 130             'thumbnail': r're:^https?://.*\.jpe?g$',
 131             'chapters': 'count:5',
 132             'tags': 'count:14',
 133             'categories': ['Mountain Men'],
 134             'episode_number': 1,
 135             'episode': 'Episode 1',
 136             'season': 'Season 1',
 137             'season_number': 1,
 138             'series': 'Mountain Men',
 139         },
 140         'params': {
 141             # m3u8 download
 142             'skip_download': True,
 143         },
 144         'add_ie': ['ThePlatform'],
 145         'skip': 'Geo-restricted - This content is not available in your location.',
 146     }, {
 147         'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1',
 148         'info_dict': {
 149             'id': '600587331957',
 150             'ext': 'mp4',
 151             'title': 'Inlawful Entry',
 152             'description': 'md5:57c12115a2b384d883fe64ca50529e08',
 153             'timestamp': 1452634428,
 154             'upload_date': '20160112',
 155             'uploader': 'AENE-NEW',
 156             'duration': 1277.695,
 157             'thumbnail': r're:^https?://.*\.jpe?g$',
 158             'chapters': 'count:4',
 159             'tags': 'count:23',
 160             'episode': 'Episode 1',
 161             'episode_number': 1,
 162             'season': 'Season 9',
 163             'season_number': 9,
 164             'series': 'Duck Dynasty',
 165         },
 166         'params': {
 167             # m3u8 download
 168             'skip_download': True,
 169         },
 170         'add_ie': ['ThePlatform'],
 171         'skip': 'This video is only available for users of participating TV providers.',
 172     }, {
 173         'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
 174         'only_matching': True,
 175     }, {
 176         'url': 'http://www.mylifetime.com/shows/project-runway-junior/season-1/episode-6',
 177         'only_matching': True,
 178     }, {
 179         'url': 'http://www.mylifetime.com/movies/center-stage-on-pointe/full-movie',
 180         'only_matching': True,
 181     }, {
 182         'url': 'https://watch.lifetimemovieclub.com/movies/10-year-reunion/full-movie',
 183         'only_matching': True,
 184     }, {
 185         'url': 'http://www.history.com/specials/sniper-into-the-kill-zone/full-special',
 186         'only_matching': True,
 187     }, {
 188         'url': 'https://www.aetv.com/specials/hunting-jonbenets-killer-the-untold-story/preview-hunting-jonbenets-killer-the-untold-story',
 189         'only_matching': True,
 190     }, {
 191         'url': 'http://www.history.com/videos/history-of-valentines-day',
 192         'only_matching': True,
 193     }, {
 194         'url': 'https://play.aetv.com/shows/duck-dynasty/videos/best-of-duck-dynasty-getting-quack-in-shape',
 195         'only_matching': True,
 196     }]
 197
 198     def _real_extract(self, url):
 199         domain, canonical = self._match_valid_url(url).groups()
 200         return self._extract_aetn_info(domain, 'canonical', '/' + canonical, url)
 201
 202
 203 class AENetworksListBaseIE(AENetworksBaseIE):
 204     def _call_api(self, resource, slug, brand, fields):
 205         return self._download_json(
 206             'https://yoga.appsvcs.aetnd.com/graphql',
 207             slug, query={'brand': brand}, data=urlencode_postdata({
 208                 'query': '''{
 209   %s(slug: "%s") {
 210     %s
 211   }
 212 }''' % (resource, slug, fields),  # noqa: UP031
 213             }))['data'][resource]
 214
 215     def _real_extract(self, url):
 216         domain, slug = self._match_valid_url(url).groups()
 217         _, brand = self._DOMAIN_MAP[domain]
 218         playlist = self._call_api(self._RESOURCE, slug, brand, self._FIELDS)
 219         base_url = f'http://watch.{domain}'
 220
 221         entries = []
 222         for item in (playlist.get(self._ITEMS_KEY) or []):
 223             doc = self._get_doc(item)
 224             canonical = doc.get('canonical')
 225             if not canonical:
 226                 continue
 227             entries.append(self.url_result(
 228                 base_url + canonical, AENetworksIE.ie_key(), doc.get('id')))
 229
 230         description = None
 231         if self._PLAYLIST_DESCRIPTION_KEY:
 232             description = playlist.get(self._PLAYLIST_DESCRIPTION_KEY)
 233
 234         return self.playlist_result(
 235             entries, playlist.get('id'),
 236             playlist.get(self._PLAYLIST_TITLE_KEY), description)
 237
 238
 239 class AENetworksCollectionIE(AENetworksListBaseIE):
 240     IE_NAME = 'aenetworks:collection'
 241     _VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'(?:[^/]+/)*(?:list|collections)/(?P<id>[^/?#&]+)/?(?:[?#&]|$)'
 242     _TESTS = [{
 243         'url': 'https://watch.historyvault.com/list/america-the-story-of-us',
 244         'info_dict': {
 245             'id': '282',
 246             'title': 'America The Story of Us',
 247         },
 248         'playlist_mincount': 12,
 249     }, {
 250         'url': 'https://watch.historyvault.com/shows/america-the-story-of-us-2/season-1/list/america-the-story-of-us',
 251         'only_matching': True,
 252     }, {
 253         'url': 'https://www.historyvault.com/collections/mysteryquest',
 254         'only_matching': True,
 255     }]
 256     _RESOURCE = 'list'
 257     _ITEMS_KEY = 'items'
 258     _PLAYLIST_TITLE_KEY = 'display_title'
 259     _PLAYLIST_DESCRIPTION_KEY = None
 260     _FIELDS = '''id
 261     display_title
 262     items {
 263       ... on ListVideoItem {
 264         doc {
 265           canonical
 266           id
 267         }
 268       }
 269     }'''
 270
 271     def _get_doc(self, item):
 272         return item.get('doc') or {}
 273
 274
 275 class AENetworksShowIE(AENetworksListBaseIE):
 276     IE_NAME = 'aenetworks:show'
 277     _VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'shows/(?P<id>[^/?#&]+)/?(?:[?#&]|$)'
 278     _TESTS = [{
 279         'url': 'http://www.history.com/shows/ancient-aliens',
 280         'info_dict': {
 281             'id': 'SERIES1574',
 282             'title': 'Ancient Aliens',
 283             'description': 'md5:3f6d74daf2672ff3ae29ed732e37ea7f',
 284         },
 285         'playlist_mincount': 150,
 286     }]
 287     _RESOURCE = 'series'
 288     _ITEMS_KEY = 'episodes'
 289     _PLAYLIST_TITLE_KEY = 'title'
 290     _PLAYLIST_DESCRIPTION_KEY = 'description'
 291     _FIELDS = '''description
 292     id
 293     title
 294     episodes {
 295       canonical
 296       id
 297     }'''
 298
 299     def _get_doc(self, item):
 300         return item
 301
 302
 303 class HistoryTopicIE(AENetworksBaseIE):
 304     IE_NAME = 'history:topic'
 305     IE_DESC = 'History.com Topic'
 306     _VALID_URL = r'https?://(?:www\.)?history\.com/topics/[^/]+/(?P<id>[\w+-]+?)-video'
 307     _TESTS = [{
 308         'url': 'https://www.history.com/topics/valentines-day/history-of-valentines-day-video',
 309         'info_dict': {
 310             'id': '40700995724',
 311             'ext': 'mp4',
 312             'title': 'History of Valentine’s Day',
 313             'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
 314             'timestamp': 1375819729,
 315             'upload_date': '20130806',
 316             'uploader': 'AENE-NEW',
 317         },
 318         'params': {
 319             # m3u8 download
 320             'skip_download': True,
 321         },
 322         'add_ie': ['ThePlatform'],
 323     }]
 324
 325     def _real_extract(self, url):
 326         display_id = self._match_id(url)
 327         return self.url_result(
 328             'http://www.history.com/videos/' + display_id,
 329             AENetworksIE.ie_key())
 330
 331
 332 class HistoryPlayerIE(AENetworksBaseIE):
 333     IE_NAME = 'history:player'
 334     _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|biography)\.com)/player/(?P<id>\d+)'
 335     _TESTS = []
 336
 337     def _real_extract(self, url):
 338         domain, video_id = self._match_valid_url(url).groups()
 339         return self._extract_aetn_info(domain, 'id', video_id, url)
 340
 341
 342 class BiographyIE(AENetworksBaseIE):
 343     _VALID_URL = r'https?://(?:www\.)?biography\.com/video/(?P<id>[^/?#&]+)'
 344     _TESTS = [{
 345         'url': 'https://www.biography.com/video/vincent-van-gogh-full-episode-2075049808',
 346         'info_dict': {
 347             'id': '30322987',
 348             'ext': 'mp4',
 349             'title': 'Vincent Van Gogh - Full Episode',
 350             'description': 'A full biography about the most influential 20th century painter, Vincent Van Gogh.',
 351             'timestamp': 1311970571,
 352             'upload_date': '20110729',
 353             'uploader': 'AENE-NEW',
 354         },
 355         'params': {
 356             # m3u8 download
 357             'skip_download': True,
 358         },
 359         'add_ie': ['ThePlatform'],
 360         'skip': '404 Not Found',
 361     }]
 362
 363     def _real_extract(self, url):
 364         display_id = self._match_id(url)
 365         webpage = self._download_webpage(url, display_id)
 366         player_url = self._search_regex(
 367             rf'<phoenix-iframe[^>]+src="({HistoryPlayerIE._VALID_URL})',
 368             webpage, 'player URL')
 369         return self.url_result(player_url, HistoryPlayerIE.ie_key())