yt_dlp/extractor/corus.py

   1 from .theplatform import ThePlatformFeedIE
   2 from ..utils import (
   3     dict_get,
   4     ExtractorError,
   5     float_or_none,
   6     int_or_none,
   7 )
   8
   9
  10 class CorusIE(ThePlatformFeedIE):  # XXX: Do not subclass from concrete IE
  11     _VALID_URL = r'''(?x)
  12                     https?://
  13                         (?:www\.)?
  14                         (?P<domain>
  15                             (?:
  16                                 globaltv|
  17                                 etcanada|
  18                                 seriesplus|
  19                                 wnetwork|
  20                                 ytv
  21                             )\.com|
  22                             (?:
  23                                 hgtv|
  24                                 foodnetwork|
  25                                 slice|
  26                                 history|
  27                                 showcase|
  28                                 bigbrothercanada|
  29                                 abcspark|
  30                                 disney(?:channel|lachaine)
  31                             )\.ca
  32                         )
  33                         /(?:[^/]+/)*
  34                         (?:
  35                             video\.html\?.*?\bv=|
  36                             videos?/(?:[^/]+/)*(?:[a-z0-9-]+-)?
  37                         )
  38                         (?P<id>
  39                             [\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}|
  40                             (?:[A-Z]{4})?\d{12,20}
  41                         )
  42                     '''
  43     _TESTS = [{
  44         'url': 'http://www.hgtv.ca/shows/bryan-inc/videos/movie-night-popcorn-with-bryan-870923331648/',
  45         'info_dict': {
  46             'id': '870923331648',
  47             'ext': 'mp4',
  48             'title': 'Movie Night Popcorn with Bryan',
  49             'description': 'Bryan whips up homemade popcorn, the old fashion way for Jojo and Lincoln.',
  50             'upload_date': '20170206',
  51             'timestamp': 1486392197,
  52         },
  53         'params': {
  54             'skip_download': True,
  55         },
  56         'expected_warnings': ['Failed to parse JSON'],
  57     }, {
  58         'url': 'http://www.foodnetwork.ca/shows/chopped/video/episode/chocolate-obsession/video.html?v=872683587753',
  59         'only_matching': True,
  60     }, {
  61         'url': 'http://etcanada.com/video/873675331955/meet-the-survivor-game-changers-castaways-part-2/',
  62         'only_matching': True,
  63     }, {
  64         'url': 'http://www.history.ca/the-world-without-canada/video/full-episodes/natural-resources/video.html?v=955054659646#video',
  65         'only_matching': True,
  66     }, {
  67         'url': 'http://www.showcase.ca/eyewitness/video/eyewitness++106/video.html?v=955070531919&p=1&s=da#video',
  68         'only_matching': True,
  69     }, {
  70         'url': 'http://www.bigbrothercanada.ca/video/1457812035894/',
  71         'only_matching': True
  72     }, {
  73         'url': 'https://www.bigbrothercanada.ca/video/big-brother-canada-704/1457812035894/',
  74         'only_matching': True
  75     }, {
  76         'url': 'https://www.seriesplus.com/emissions/dre-mary-mort-sur-ordonnance/videos/deux-coeurs-battant/SERP0055626330000200/',
  77         'only_matching': True
  78     }, {
  79         'url': 'https://www.disneychannel.ca/shows/gabby-duran-the-unsittables/video/crybaby-duran-clip/2f557eec-0588-11ea-ae2b-e2c6776b770e/',
  80         'only_matching': True
  81     }]
  82     _GEO_BYPASS = False
  83     _SITE_MAP = {
  84         'globaltv': 'series',
  85         'etcanada': 'series',
  86         'foodnetwork': 'food',
  87         'bigbrothercanada': 'series',
  88         'disneychannel': 'disneyen',
  89         'disneylachaine': 'disneyfr',
  90     }
  91
  92     def _real_extract(self, url):
  93         domain, video_id = self._match_valid_url(url).groups()
  94         site = domain.split('.')[0]
  95         path = self._SITE_MAP.get(site, site)
  96         if path != 'series':
  97             path = 'migration/' + path
  98         video = self._download_json(
  99             'https://globalcontent.corusappservices.com/templates/%s/playlist/' % path,
 100             video_id, query={'byId': video_id},
 101             headers={'Accept': 'application/json'})[0]
 102         title = video['title']
 103
 104         formats = []
 105         for source in video.get('sources', []):
 106             smil_url = source.get('file')
 107             if not smil_url:
 108                 continue
 109             source_type = source.get('type')
 110             note = 'Downloading%s smil file' % (' ' + source_type if source_type else '')
 111             resp = self._download_webpage(
 112                 smil_url, video_id, note, fatal=False,
 113                 headers=self.geo_verification_headers())
 114             if not resp:
 115                 continue
 116             error = self._parse_json(resp, video_id, fatal=False)
 117             if error:
 118                 if error.get('exception') == 'GeoLocationBlocked':
 119                     self.raise_geo_restricted(countries=['CA'])
 120                 raise ExtractorError(error['description'])
 121             smil = self._parse_xml(resp, video_id, fatal=False)
 122             if smil is None:
 123                 continue
 124             namespace = self._parse_smil_namespace(smil)
 125             formats.extend(self._parse_smil_formats(
 126                 smil, smil_url, video_id, namespace))
 127         if not formats and video.get('drm'):
 128             self.report_drm(video_id)
 129
 130         subtitles = {}
 131         for track in video.get('tracks', []):
 132             track_url = track.get('file')
 133             if not track_url:
 134                 continue
 135             lang = 'fr' if site in ('disneylachaine', 'seriesplus') else 'en'
 136             subtitles.setdefault(lang, []).append({'url': track_url})
 137
 138         metadata = video.get('metadata') or {}
 139         get_number = lambda x: int_or_none(video.get('pl1$' + x) or metadata.get(x + 'Number'))
 140
 141         return {
 142             'id': video_id,
 143             'title': title,
 144             'formats': formats,
 145             'thumbnail': dict_get(video, ('defaultThumbnailUrl', 'thumbnail', 'image')),
 146             'description': video.get('description'),
 147             'timestamp': int_or_none(video.get('availableDate'), 1000),
 148             'subtitles': subtitles,
 149             'duration': float_or_none(metadata.get('duration')),
 150             'series': dict_get(video, ('show', 'pl1$show')),
 151             'season_number': get_number('season'),
 152             'episode_number': get_number('episode'),
 153         }