yt_dlp/extractor/scrippsnetworks.py

   1 import hashlib
   2 import json
   3
   4 from .anvato import AnvatoIE
   5 from .aws import AWSIE
   6 from .common import InfoExtractor
   7 from ..utils import (
   8     smuggle_url,
   9     urlencode_postdata,
  10     xpath_text,
  11 )
  12
  13
  14 class ScrippsNetworksWatchIE(AWSIE):
  15     IE_NAME = 'scrippsnetworks:watch'
  16     _VALID_URL = r'''(?x)
  17                     https?://
  18                         watch\.
  19                         (?P<site>geniuskitchen)\.com/
  20                         (?:
  21                             player\.[A-Z0-9]+\.html\#|
  22                             show/(?:[^/]+/){2}|
  23                             player/
  24                         )
  25                         (?P<id>\d+)
  26                     '''
  27     _TESTS = [{
  28         'url': 'http://watch.geniuskitchen.com/player/3787617/Ample-Hills-Ice-Cream-Bike/',
  29         'info_dict': {
  30             'id': '4194875',
  31             'ext': 'mp4',
  32             'title': 'Ample Hills Ice Cream Bike',
  33             'description': 'Courtney Rada churns up a signature GK Now ice cream with The Scoopmaster.',
  34             'uploader': 'ANV',
  35             'upload_date': '20171011',
  36             'timestamp': 1507698000,
  37         },
  38         'params': {
  39             'skip_download': True,
  40         },
  41         'add_ie': [AnvatoIE.ie_key()],
  42         'skip': '404 Not Found',
  43     }]
  44
  45     _SNI_TABLE = {
  46         'geniuskitchen': 'genius',
  47     }
  48
  49     _AWS_API_KEY = 'E7wSQmq0qK6xPrF13WmzKiHo4BQ7tip4pQcSXVl1'
  50     _AWS_PROXY_HOST = 'web.api.video.snidigital.com'
  51
  52     _AWS_USER_AGENT = 'aws-sdk-js/2.80.0 callback'
  53
  54     def _real_extract(self, url):
  55         mobj = self._match_valid_url(url)
  56         site_id, video_id = mobj.group('site', 'id')
  57
  58         aws_identity_id_json = json.dumps({
  59             'IdentityId': '%s:7655847c-0ae7-4d9b-80d6-56c062927eb3' % self._AWS_REGION
  60         }).encode('utf-8')
  61         token = self._download_json(
  62             'https://cognito-identity.%s.amazonaws.com/' % self._AWS_REGION, video_id,
  63             data=aws_identity_id_json,
  64             headers={
  65                 'Accept': '*/*',
  66                 'Content-Type': 'application/x-amz-json-1.1',
  67                 'Referer': url,
  68                 'X-Amz-Content-Sha256': hashlib.sha256(aws_identity_id_json).hexdigest(),
  69                 'X-Amz-Target': 'AWSCognitoIdentityService.GetOpenIdToken',
  70                 'X-Amz-User-Agent': self._AWS_USER_AGENT,
  71             })['Token']
  72
  73         sts = self._download_xml(
  74             'https://sts.amazonaws.com/', video_id, data=urlencode_postdata({
  75                 'Action': 'AssumeRoleWithWebIdentity',
  76                 'RoleArn': 'arn:aws:iam::710330595350:role/Cognito_WebAPIUnauth_Role',
  77                 'RoleSessionName': 'web-identity',
  78                 'Version': '2011-06-15',
  79                 'WebIdentityToken': token,
  80             }), headers={
  81                 'Referer': url,
  82                 'X-Amz-User-Agent': self._AWS_USER_AGENT,
  83                 'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8',
  84             })
  85
  86         def get(key):
  87             return xpath_text(
  88                 sts, './/{https://sts.amazonaws.com/doc/2011-06-15/}%s' % key,
  89                 fatal=True)
  90
  91         mcp_id = self._aws_execute_api({
  92             'uri': '/1/web/brands/%s/episodes/scrid/%s' % (self._SNI_TABLE[site_id], video_id),
  93             'access_key': get('AccessKeyId'),
  94             'secret_key': get('SecretAccessKey'),
  95             'session_token': get('SessionToken'),
  96         }, video_id)['results'][0]['mcpId']
  97
  98         return self.url_result(
  99             smuggle_url(
 100                 'anvato:anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a:%s' % mcp_id,
 101                 {'geo_countries': ['US']}),
 102             AnvatoIE.ie_key(), video_id=mcp_id)
 103
 104
 105 class ScrippsNetworksIE(InfoExtractor):
 106     _VALID_URL = r'https?://(?:www\.)?(?P<site>cookingchanneltv|discovery|(?:diy|food)network|hgtv|travelchannel)\.com/videos/[0-9a-z-]+-(?P<id>\d+)'
 107     _TESTS = [{
 108         'url': 'https://www.cookingchanneltv.com/videos/the-best-of-the-best-0260338',
 109         'info_dict': {
 110             'id': '0260338',
 111             'ext': 'mp4',
 112             'title': 'The Best of the Best',
 113             'description': 'Catch a new episode of MasterChef Canada Tuedsay at 9/8c.',
 114             'timestamp': 1475678834,
 115             'upload_date': '20161005',
 116             'uploader': 'SCNI-SCND',
 117             'tags': 'count:10',
 118             'creator': 'Cooking Channel',
 119             'duration': 29.995,
 120             'chapters': [{'start_time': 0.0, 'end_time': 29.995, 'title': '<Untitled Chapter 1>'}],
 121             'thumbnail': 'https://images.dds.discovery.com/up/tp/Scripps_-_Food_Category_Prod/122/987/0260338_630x355.jpg',
 122         },
 123         'add_ie': ['ThePlatform'],
 124         'expected_warnings': ['No HLS formats found'],
 125     }, {
 126         'url': 'https://www.diynetwork.com/videos/diy-barnwood-tablet-stand-0265790',
 127         'only_matching': True,
 128     }, {
 129         'url': 'https://www.foodnetwork.com/videos/chocolate-strawberry-cake-roll-7524591',
 130         'only_matching': True,
 131     }, {
 132         'url': 'https://www.hgtv.com/videos/cookie-decorating-101-0301929',
 133         'only_matching': True,
 134     }, {
 135         'url': 'https://www.travelchannel.com/videos/two-climates-one-bag-5302184',
 136         'only_matching': True,
 137     }, {
 138         'url': 'https://www.discovery.com/videos/guardians-of-the-glades-cooking-with-tom-cobb-5578368',
 139         'only_matching': True,
 140     }]
 141     _ACCOUNT_MAP = {
 142         'cookingchanneltv': 2433005105,
 143         'discovery': 2706091867,
 144         'diynetwork': 2433004575,
 145         'foodnetwork': 2433005105,
 146         'hgtv': 2433004575,
 147         'travelchannel': 2433005739,
 148     }
 149     _TP_TEMPL = 'https://link.theplatform.com/s/ip77QC/media/guid/%d/%s?mbr=true'
 150
 151     def _real_extract(self, url):
 152         site, guid = self._match_valid_url(url).groups()
 153         return self.url_result(smuggle_url(
 154             self._TP_TEMPL % (self._ACCOUNT_MAP[site], guid),
 155             {'force_smil_url': True}), 'ThePlatform', guid)