yt_dlp/extractor/corus.py

   1 from .theplatform import ThePlatformFeedIE
   2 from ..utils import (
   3     ExtractorError,
   4     dict_get,
   5     float_or_none,
   6     int_or_none,
   7 )
   8
   9
  10 class CorusIE(ThePlatformFeedIE):  # XXX: Do not subclass from concrete IE
  11     _VALID_URL = r'''(?x)
  12                     https?://
  13                         (?:www\.)?
  14                         (?P<domain>
  15                             (?:
  16                                 globaltv|
  17                                 etcanada|
  18                                 seriesplus|
  19                                 wnetwork|
  20                                 ytv
  21                             )\.com|
  22                             (?:
  23                                 hgtv|
  24                                 foodnetwork|
  25                                 slice|
  26                                 history|
  27                                 showcase|
  28                                 bigbrothercanada|
  29                                 abcspark|
  30                                 disney(?:channel|lachaine)
  31                             )\.ca
  32                         )
  33                         /(?:[^/]+/)*
  34                         (?:
  35                             video\.html\?.*?\bv=|
  36                             videos?/(?:[^/]+/)*(?:[a-z0-9-]+-)?
  37                         )
  38                         (?P<id>
  39                             [\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}|
  40                             (?:[A-Z]{4})?\d{12,20}
  41                         )
  42                     '''
  43     _TESTS = [{
  44         'url': 'https://www.hgtv.ca/video/bryan-inc/movie-night-popcorn-with-bryan/870923331648/',
  45         'info_dict': {
  46             'id': '870923331648',
  47             'ext': 'mp4',
  48             'title': 'Movie Night Popcorn with Bryan',
  49             'description': 'Bryan whips up homemade popcorn, the old fashion way for Jojo and Lincoln.',
  50             'upload_date': '20170206',
  51             'timestamp': 1486392197,
  52         },
  53         'params': {
  54             'skip_download': True,
  55         },
  56         'expected_warnings': ['Failed to parse JSON'],
  57         # FIXME: yt-dlp wrongly raises for geo restriction
  58     }, {
  59         'url': 'http://www.foodnetwork.ca/shows/chopped/video/episode/chocolate-obsession/video.html?v=872683587753',
  60         'only_matching': True,
  61     }, {
  62         'url': 'http://etcanada.com/video/873675331955/meet-the-survivor-game-changers-castaways-part-2/',
  63         'only_matching': True,
  64     }, {
  65         'url': 'http://www.history.ca/the-world-without-canada/video/full-episodes/natural-resources/video.html?v=955054659646#video',
  66         'only_matching': True,
  67     }, {
  68         'url': 'http://www.showcase.ca/eyewitness/video/eyewitness++106/video.html?v=955070531919&p=1&s=da#video',
  69         'only_matching': True,
  70     }, {
  71         'url': 'http://www.bigbrothercanada.ca/video/1457812035894/',
  72         'only_matching': True
  73     }, {
  74         'url': 'https://www.bigbrothercanada.ca/video/big-brother-canada-704/1457812035894/',
  75         'only_matching': True
  76     }, {
  77         'url': 'https://www.seriesplus.com/emissions/dre-mary-mort-sur-ordonnance/videos/deux-coeurs-battant/SERP0055626330000200/',
  78         'only_matching': True
  79     }, {
  80         'url': 'https://www.disneychannel.ca/shows/gabby-duran-the-unsittables/video/crybaby-duran-clip/2f557eec-0588-11ea-ae2b-e2c6776b770e/',
  81         'only_matching': True
  82     }]
  83     _GEO_BYPASS = False
  84     _SITE_MAP = {
  85         'globaltv': 'series',
  86         'etcanada': 'series',
  87         'foodnetwork': 'food',
  88         'bigbrothercanada': 'series',
  89         'disneychannel': 'disneyen',
  90         'disneylachaine': 'disneyfr',
  91     }
  92
  93     def _real_extract(self, url):
  94         domain, video_id = self._match_valid_url(url).groups()
  95         site = domain.split('.')[0]
  96         path = self._SITE_MAP.get(site, site)
  97         if path != 'series':
  98             path = 'migration/' + path
  99         video = self._download_json(
 100             'https://globalcontent.corusappservices.com/templates/%s/playlist/' % path,
 101             video_id, query={'byId': video_id},
 102             headers={'Accept': 'application/json'})[0]
 103         title = video['title']
 104
 105         formats = []
 106         for source in video.get('sources', []):
 107             smil_url = source.get('file')
 108             if not smil_url:
 109                 continue
 110             source_type = source.get('type')
 111             note = 'Downloading%s smil file' % (' ' + source_type if source_type else '')
 112             resp = self._download_webpage(
 113                 smil_url, video_id, note, fatal=False,
 114                 headers=self.geo_verification_headers())
 115             if not resp:
 116                 continue
 117             error = self._parse_json(resp, video_id, fatal=False)
 118             if error:
 119                 if error.get('exception') == 'GeoLocationBlocked':
 120                     self.raise_geo_restricted(countries=['CA'])
 121                 raise ExtractorError(error['description'])
 122             smil = self._parse_xml(resp, video_id, fatal=False)
 123             if smil is None:
 124                 continue
 125             namespace = self._parse_smil_namespace(smil)
 126             formats.extend(self._parse_smil_formats(
 127                 smil, smil_url, video_id, namespace))
 128         if not formats and video.get('drm'):
 129             self.report_drm(video_id)
 130
 131         subtitles = {}
 132         for track in video.get('tracks', []):
 133             track_url = track.get('file')
 134             if not track_url:
 135                 continue
 136             lang = 'fr' if site in ('disneylachaine', 'seriesplus') else 'en'
 137             subtitles.setdefault(lang, []).append({'url': track_url})
 138
 139         metadata = video.get('metadata') or {}
 140         get_number = lambda x: int_or_none(video.get('pl1$' + x) or metadata.get(x + 'Number'))
 141
 142         return {
 143             'id': video_id,
 144             'title': title,
 145             'formats': formats,
 146             'thumbnail': dict_get(video, ('defaultThumbnailUrl', 'thumbnail', 'image')),
 147             'description': video.get('description'),
 148             'timestamp': int_or_none(video.get('availableDate'), 1000),
 149             'subtitles': subtitles,
 150             'duration': float_or_none(metadata.get('duration')),
 151             'series': dict_get(video, ('show', 'pl1$show')),
 152             'season_number': get_number('season'),
 153             'episode_number': get_number('episode'),
 154         }