yt_dlp/extractor/aenetworks.py

   1 from .theplatform import ThePlatformIE
   2 from ..utils import (
   3     ExtractorError,
   4     GeoRestrictedError,
   5     int_or_none,
   6     remove_start,
   7     traverse_obj,
   8     update_url_query,
   9     urlencode_postdata,
  10 )
  11
  12
  13 class AENetworksBaseIE(ThePlatformIE):  # XXX: Do not subclass from concrete IE
  14     _BASE_URL_REGEX = r'''(?x)https?://
  15         (?:(?:www|play|watch)\.)?
  16         (?P<domain>
  17             (?:history(?:vault)?|aetv|mylifetime|lifetimemovieclub)\.com|
  18             fyi\.tv
  19         )/'''
  20     _THEPLATFORM_KEY = '43jXaGRQud'
  21     _THEPLATFORM_SECRET = 'S10BPXHMlb'
  22     _DOMAIN_MAP = {
  23         'history.com': ('HISTORY', 'history'),
  24         'aetv.com': ('AETV', 'aetv'),
  25         'mylifetime.com': ('LIFETIME', 'lifetime'),
  26         'lifetimemovieclub.com': ('LIFETIMEMOVIECLUB', 'lmc'),
  27         'fyi.tv': ('FYI', 'fyi'),
  28         'historyvault.com': (None, 'historyvault'),
  29         'biography.com': (None, 'biography'),
  30     }
  31
  32     def _extract_aen_smil(self, smil_url, video_id, auth=None):
  33         query = {
  34             'mbr': 'true',
  35             'formats': 'M3U+none,MPEG-DASH+none,MPEG4,MP3',
  36         }
  37         if auth:
  38             query['auth'] = auth
  39         TP_SMIL_QUERY = [{
  40             'assetTypes': 'high_video_ak',
  41             'switch': 'hls_high_ak',
  42         }, {
  43             'assetTypes': 'high_video_s3',
  44         }, {
  45             'assetTypes': 'high_video_s3',
  46             'switch': 'hls_high_fastly',
  47         }]
  48         formats = []
  49         subtitles = {}
  50         last_e = None
  51         for q in TP_SMIL_QUERY:
  52             q.update(query)
  53             m_url = update_url_query(smil_url, q)
  54             m_url = self._sign_url(m_url, self._THEPLATFORM_KEY, self._THEPLATFORM_SECRET)
  55             try:
  56                 tp_formats, tp_subtitles = self._extract_theplatform_smil(
  57                     m_url, video_id, 'Downloading %s SMIL data' % (q.get('switch') or q['assetTypes']))
  58             except ExtractorError as e:
  59                 if isinstance(e, GeoRestrictedError):
  60                     raise
  61                 last_e = e
  62                 continue
  63             formats.extend(tp_formats)
  64             subtitles = self._merge_subtitles(subtitles, tp_subtitles)
  65         if last_e and not formats:
  66             raise last_e
  67         return {
  68             'id': video_id,
  69             'formats': formats,
  70             'subtitles': subtitles,
  71         }
  72
  73     def _extract_aetn_info(self, domain, filter_key, filter_value, url):
  74         requestor_id, brand = self._DOMAIN_MAP[domain]
  75         result = self._download_json(
  76             'https://feeds.video.aetnd.com/api/v2/%s/videos' % brand,
  77             filter_value, query={'filter[%s]' % filter_key: filter_value})
  78         result = traverse_obj(
  79             result, ('results',
  80                      lambda k, v: k == 0 and v[filter_key] == filter_value),
  81             get_all=False)
  82         if not result:
  83             raise ExtractorError('Show not found in A&E feed (too new?)', expected=True,
  84                                  video_id=remove_start(filter_value, '/'))
  85         title = result['title']
  86         video_id = result['id']
  87         media_url = result['publicUrl']
  88         theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
  89             r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
  90         info = self._parse_theplatform_metadata(theplatform_metadata)
  91         auth = None
  92         if theplatform_metadata.get('AETN$isBehindWall'):
  93             resource = self._get_mvpd_resource(
  94                 requestor_id, theplatform_metadata['title'],
  95                 theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'),
  96                 theplatform_metadata['ratings'][0]['rating'])
  97             auth = self._extract_mvpd_auth(
  98                 url, video_id, requestor_id, resource)
  99         info.update(self._extract_aen_smil(media_url, video_id, auth))
 100         info.update({
 101             'title': title,
 102             'series': result.get('seriesName'),
 103             'season_number': int_or_none(result.get('tvSeasonNumber')),
 104             'episode_number': int_or_none(result.get('tvSeasonEpisodeNumber')),
 105         })
 106         return info
 107
 108
 109 class AENetworksIE(AENetworksBaseIE):
 110     IE_NAME = 'aenetworks'
 111     IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault'
 112     _VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'''(?P<id>
 113         shows/[^/]+/season-\d+/episode-\d+|
 114         (?:
 115             (?:movie|special)s/[^/]+|
 116             (?:shows/[^/]+/)?videos
 117         )/[^/?#&]+
 118     )'''
 119     _TESTS = [{
 120         'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
 121         'info_dict': {
 122             'id': '22253814',
 123             'ext': 'mp4',
 124             'title': 'Winter is Coming',
 125             'description': 'md5:641f424b7a19d8e24f26dea22cf59d74',
 126             'timestamp': 1338306241,
 127             'upload_date': '20120529',
 128             'uploader': 'AENE-NEW',
 129         },
 130         'params': {
 131             # m3u8 download
 132             'skip_download': True,
 133         },
 134         'add_ie': ['ThePlatform'],
 135         'skip': 'Geo-restricted - This content is not available in your location.'
 136     }, {
 137         'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1',
 138         'info_dict': {
 139             'id': '600587331957',
 140             'ext': 'mp4',
 141             'title': 'Inlawful Entry',
 142             'description': 'md5:57c12115a2b384d883fe64ca50529e08',
 143             'timestamp': 1452634428,
 144             'upload_date': '20160112',
 145             'uploader': 'AENE-NEW',
 146         },
 147         'params': {
 148             # m3u8 download
 149             'skip_download': True,
 150         },
 151         'add_ie': ['ThePlatform'],
 152         'skip': 'This video is only available for users of participating TV providers.',
 153     }, {
 154         'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
 155         'only_matching': True
 156     }, {
 157         'url': 'http://www.mylifetime.com/shows/project-runway-junior/season-1/episode-6',
 158         'only_matching': True
 159     }, {
 160         'url': 'http://www.mylifetime.com/movies/center-stage-on-pointe/full-movie',
 161         'only_matching': True
 162     }, {
 163         'url': 'https://watch.lifetimemovieclub.com/movies/10-year-reunion/full-movie',
 164         'only_matching': True
 165     }, {
 166         'url': 'http://www.history.com/specials/sniper-into-the-kill-zone/full-special',
 167         'only_matching': True
 168     }, {
 169         'url': 'https://www.aetv.com/specials/hunting-jonbenets-killer-the-untold-story/preview-hunting-jonbenets-killer-the-untold-story',
 170         'only_matching': True
 171     }, {
 172         'url': 'http://www.history.com/videos/history-of-valentines-day',
 173         'only_matching': True
 174     }, {
 175         'url': 'https://play.aetv.com/shows/duck-dynasty/videos/best-of-duck-dynasty-getting-quack-in-shape',
 176         'only_matching': True
 177     }]
 178
 179     def _real_extract(self, url):
 180         domain, canonical = self._match_valid_url(url).groups()
 181         return self._extract_aetn_info(domain, 'canonical', '/' + canonical, url)
 182
 183
 184 class AENetworksListBaseIE(AENetworksBaseIE):
 185     def _call_api(self, resource, slug, brand, fields):
 186         return self._download_json(
 187             'https://yoga.appsvcs.aetnd.com/graphql',
 188             slug, query={'brand': brand}, data=urlencode_postdata({
 189                 'query': '''{
 190   %s(slug: "%s") {
 191     %s
 192   }
 193 }''' % (resource, slug, fields),
 194             }))['data'][resource]
 195
 196     def _real_extract(self, url):
 197         domain, slug = self._match_valid_url(url).groups()
 198         _, brand = self._DOMAIN_MAP[domain]
 199         playlist = self._call_api(self._RESOURCE, slug, brand, self._FIELDS)
 200         base_url = 'http://watch.%s' % domain
 201
 202         entries = []
 203         for item in (playlist.get(self._ITEMS_KEY) or []):
 204             doc = self._get_doc(item)
 205             canonical = doc.get('canonical')
 206             if not canonical:
 207                 continue
 208             entries.append(self.url_result(
 209                 base_url + canonical, AENetworksIE.ie_key(), doc.get('id')))
 210
 211         description = None
 212         if self._PLAYLIST_DESCRIPTION_KEY:
 213             description = playlist.get(self._PLAYLIST_DESCRIPTION_KEY)
 214
 215         return self.playlist_result(
 216             entries, playlist.get('id'),
 217             playlist.get(self._PLAYLIST_TITLE_KEY), description)
 218
 219
 220 class AENetworksCollectionIE(AENetworksListBaseIE):
 221     IE_NAME = 'aenetworks:collection'
 222     _VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'(?:[^/]+/)*(?:list|collections)/(?P<id>[^/?#&]+)/?(?:[?#&]|$)'
 223     _TESTS = [{
 224         'url': 'https://watch.historyvault.com/list/america-the-story-of-us',
 225         'info_dict': {
 226             'id': '282',
 227             'title': 'America The Story of Us',
 228         },
 229         'playlist_mincount': 12,
 230     }, {
 231         'url': 'https://watch.historyvault.com/shows/america-the-story-of-us-2/season-1/list/america-the-story-of-us',
 232         'only_matching': True
 233     }, {
 234         'url': 'https://www.historyvault.com/collections/mysteryquest',
 235         'only_matching': True
 236     }]
 237     _RESOURCE = 'list'
 238     _ITEMS_KEY = 'items'
 239     _PLAYLIST_TITLE_KEY = 'display_title'
 240     _PLAYLIST_DESCRIPTION_KEY = None
 241     _FIELDS = '''id
 242     display_title
 243     items {
 244       ... on ListVideoItem {
 245         doc {
 246           canonical
 247           id
 248         }
 249       }
 250     }'''
 251
 252     def _get_doc(self, item):
 253         return item.get('doc') or {}
 254
 255
 256 class AENetworksShowIE(AENetworksListBaseIE):
 257     IE_NAME = 'aenetworks:show'
 258     _VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'shows/(?P<id>[^/?#&]+)/?(?:[?#&]|$)'
 259     _TESTS = [{
 260         'url': 'http://www.history.com/shows/ancient-aliens',
 261         'info_dict': {
 262             'id': 'SERIES1574',
 263             'title': 'Ancient Aliens',
 264             'description': 'md5:3f6d74daf2672ff3ae29ed732e37ea7f',
 265         },
 266         'playlist_mincount': 150,
 267     }]
 268     _RESOURCE = 'series'
 269     _ITEMS_KEY = 'episodes'
 270     _PLAYLIST_TITLE_KEY = 'title'
 271     _PLAYLIST_DESCRIPTION_KEY = 'description'
 272     _FIELDS = '''description
 273     id
 274     title
 275     episodes {
 276       canonical
 277       id
 278     }'''
 279
 280     def _get_doc(self, item):
 281         return item
 282
 283
 284 class HistoryTopicIE(AENetworksBaseIE):
 285     IE_NAME = 'history:topic'
 286     IE_DESC = 'History.com Topic'
 287     _VALID_URL = r'https?://(?:www\.)?history\.com/topics/[^/]+/(?P<id>[\w+-]+?)-video'
 288     _TESTS = [{
 289         'url': 'https://www.history.com/topics/valentines-day/history-of-valentines-day-video',
 290         'info_dict': {
 291             'id': '40700995724',
 292             'ext': 'mp4',
 293             'title': "History of Valentine’s Day",
 294             'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
 295             'timestamp': 1375819729,
 296             'upload_date': '20130806',
 297             'uploader': 'AENE-NEW',
 298         },
 299         'params': {
 300             # m3u8 download
 301             'skip_download': True,
 302         },
 303         'add_ie': ['ThePlatform'],
 304     }]
 305
 306     def _real_extract(self, url):
 307         display_id = self._match_id(url)
 308         return self.url_result(
 309             'http://www.history.com/videos/' + display_id,
 310             AENetworksIE.ie_key())
 311
 312
 313 class HistoryPlayerIE(AENetworksBaseIE):
 314     IE_NAME = 'history:player'
 315     _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|biography)\.com)/player/(?P<id>\d+)'
 316     _TESTS = []
 317
 318     def _real_extract(self, url):
 319         domain, video_id = self._match_valid_url(url).groups()
 320         return self._extract_aetn_info(domain, 'id', video_id, url)
 321
 322
 323 class BiographyIE(AENetworksBaseIE):
 324     _VALID_URL = r'https?://(?:www\.)?biography\.com/video/(?P<id>[^/?#&]+)'
 325     _TESTS = [{
 326         'url': 'https://www.biography.com/video/vincent-van-gogh-full-episode-2075049808',
 327         'info_dict': {
 328             'id': '30322987',
 329             'ext': 'mp4',
 330             'title': 'Vincent Van Gogh - Full Episode',
 331             'description': 'A full biography about the most influential 20th century painter, Vincent Van Gogh.',
 332             'timestamp': 1311970571,
 333             'upload_date': '20110729',
 334             'uploader': 'AENE-NEW',
 335         },
 336         'params': {
 337             # m3u8 download
 338             'skip_download': True,
 339         },
 340         'add_ie': ['ThePlatform'],
 341         'skip': '404 Not Found',
 342     }]
 343
 344     def _real_extract(self, url):
 345         display_id = self._match_id(url)
 346         webpage = self._download_webpage(url, display_id)
 347         player_url = self._search_regex(
 348             r'<phoenix-iframe[^>]+src="(%s)' % HistoryPlayerIE._VALID_URL,
 349             webpage, 'player URL')
 350         return self.url_result(player_url, HistoryPlayerIE.ie_key())