]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/scrippsnetworks.py
[misc] Add `hatch`, `ruff`, `pre-commit` and improve dev docs (#7409)
[yt-dlp.git] / yt_dlp / extractor / scrippsnetworks.py
1 import hashlib
2 import json
3
4 from .anvato import AnvatoIE
5 from .aws import AWSIE
6 from .common import InfoExtractor
7 from ..utils import (
8 smuggle_url,
9 urlencode_postdata,
10 xpath_text,
11 )
12
13
14 class ScrippsNetworksWatchIE(AWSIE):
15 IE_NAME = 'scrippsnetworks:watch'
16 _VALID_URL = r'''(?x)
17 https?://
18 watch\.
19 (?P<site>geniuskitchen)\.com/
20 (?:
21 player\.[A-Z0-9]+\.html\#|
22 show/(?:[^/]+/){2}|
23 player/
24 )
25 (?P<id>\d+)
26 '''
27 _TESTS = [{
28 'url': 'http://watch.geniuskitchen.com/player/3787617/Ample-Hills-Ice-Cream-Bike/',
29 'info_dict': {
30 'id': '4194875',
31 'ext': 'mp4',
32 'title': 'Ample Hills Ice Cream Bike',
33 'description': 'Courtney Rada churns up a signature GK Now ice cream with The Scoopmaster.',
34 'uploader': 'ANV',
35 'upload_date': '20171011',
36 'timestamp': 1507698000,
37 },
38 'params': {
39 'skip_download': True,
40 },
41 'add_ie': [AnvatoIE.ie_key()],
42 'skip': '404 Not Found',
43 }]
44
45 _SNI_TABLE = {
46 'geniuskitchen': 'genius',
47 }
48
49 _AWS_API_KEY = 'E7wSQmq0qK6xPrF13WmzKiHo4BQ7tip4pQcSXVl1'
50 _AWS_PROXY_HOST = 'web.api.video.snidigital.com'
51
52 _AWS_USER_AGENT = 'aws-sdk-js/2.80.0 callback'
53
54 def _real_extract(self, url):
55 mobj = self._match_valid_url(url)
56 site_id, video_id = mobj.group('site', 'id')
57
58 aws_identity_id_json = json.dumps({
59 'IdentityId': '%s:7655847c-0ae7-4d9b-80d6-56c062927eb3' % self._AWS_REGION
60 }).encode('utf-8')
61 token = self._download_json(
62 'https://cognito-identity.%s.amazonaws.com/' % self._AWS_REGION, video_id,
63 data=aws_identity_id_json,
64 headers={
65 'Accept': '*/*',
66 'Content-Type': 'application/x-amz-json-1.1',
67 'Referer': url,
68 'X-Amz-Content-Sha256': hashlib.sha256(aws_identity_id_json).hexdigest(),
69 'X-Amz-Target': 'AWSCognitoIdentityService.GetOpenIdToken',
70 'X-Amz-User-Agent': self._AWS_USER_AGENT,
71 })['Token']
72
73 sts = self._download_xml(
74 'https://sts.amazonaws.com/', video_id, data=urlencode_postdata({
75 'Action': 'AssumeRoleWithWebIdentity',
76 'RoleArn': 'arn:aws:iam::710330595350:role/Cognito_WebAPIUnauth_Role',
77 'RoleSessionName': 'web-identity',
78 'Version': '2011-06-15',
79 'WebIdentityToken': token,
80 }), headers={
81 'Referer': url,
82 'X-Amz-User-Agent': self._AWS_USER_AGENT,
83 'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8',
84 })
85
86 def get(key):
87 return xpath_text(
88 sts, './/{https://sts.amazonaws.com/doc/2011-06-15/}%s' % key,
89 fatal=True)
90
91 mcp_id = self._aws_execute_api({
92 'uri': '/1/web/brands/%s/episodes/scrid/%s' % (self._SNI_TABLE[site_id], video_id),
93 'access_key': get('AccessKeyId'),
94 'secret_key': get('SecretAccessKey'),
95 'session_token': get('SessionToken'),
96 }, video_id)['results'][0]['mcpId']
97
98 return self.url_result(
99 smuggle_url(
100 'anvato:anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a:%s' % mcp_id,
101 {'geo_countries': ['US']}),
102 AnvatoIE.ie_key(), video_id=mcp_id)
103
104
105 class ScrippsNetworksIE(InfoExtractor):
106 _VALID_URL = r'https?://(?:www\.)?(?P<site>cookingchanneltv|discovery|(?:diy|food)network|hgtv|travelchannel)\.com/videos/[0-9a-z-]+-(?P<id>\d+)'
107 _TESTS = [{
108 'url': 'https://www.cookingchanneltv.com/videos/the-best-of-the-best-0260338',
109 'info_dict': {
110 'id': '0260338',
111 'ext': 'mp4',
112 'title': 'The Best of the Best',
113 'description': 'Catch a new episode of MasterChef Canada Tuedsay at 9/8c.',
114 'timestamp': 1475678834,
115 'upload_date': '20161005',
116 'uploader': 'SCNI-SCND',
117 'tags': 'count:10',
118 'creator': 'Cooking Channel',
119 'duration': 29.995,
120 'chapters': [{'start_time': 0.0, 'end_time': 29.995, 'title': '<Untitled Chapter 1>'}],
121 'thumbnail': 'https://images.dds.discovery.com/up/tp/Scripps_-_Food_Category_Prod/122/987/0260338_630x355.jpg',
122 },
123 'add_ie': ['ThePlatform'],
124 'expected_warnings': ['No HLS formats found'],
125 }, {
126 'url': 'https://www.diynetwork.com/videos/diy-barnwood-tablet-stand-0265790',
127 'only_matching': True,
128 }, {
129 'url': 'https://www.foodnetwork.com/videos/chocolate-strawberry-cake-roll-7524591',
130 'only_matching': True,
131 }, {
132 'url': 'https://www.hgtv.com/videos/cookie-decorating-101-0301929',
133 'only_matching': True,
134 }, {
135 'url': 'https://www.travelchannel.com/videos/two-climates-one-bag-5302184',
136 'only_matching': True,
137 }, {
138 'url': 'https://www.discovery.com/videos/guardians-of-the-glades-cooking-with-tom-cobb-5578368',
139 'only_matching': True,
140 }]
141 _ACCOUNT_MAP = {
142 'cookingchanneltv': 2433005105,
143 'discovery': 2706091867,
144 'diynetwork': 2433004575,
145 'foodnetwork': 2433005105,
146 'hgtv': 2433004575,
147 'travelchannel': 2433005739,
148 }
149 _TP_TEMPL = 'https://link.theplatform.com/s/ip77QC/media/guid/%d/%s?mbr=true'
150
151 def _real_extract(self, url):
152 site, guid = self._match_valid_url(url).groups()
153 return self.url_result(smuggle_url(
154 self._TP_TEMPL % (self._ACCOUNT_MAP[site], guid),
155 {'force_smil_url': True}), 'ThePlatform', guid)