]>
Commit | Line | Data |
---|---|---|
8dcf65c9 | 1 | # coding: utf-8 |
0de668af | 2 | |
cba5d1b6 | 3 | import itertools |
315ab3d5 | 4 | import hashlib |
27b1c73f | 5 | import json |
59fc531f | 6 | import re |
ab2ffab2 | 7 | import time |
59fc531f JMF |
8 | |
9 | from .common import InfoExtractor | |
238d42cf | 10 | from ..compat import ( |
238d42cf S |
11 | compat_HTTPError, |
12 | ) | |
e1ec9330 | 13 | from ..utils import ( |
238d42cf | 14 | ExtractorError, |
cce889b9 | 15 | float_or_none, |
c4096e8a | 16 | get_element_by_attribute, |
e1ec9330 | 17 | int_or_none, |
87696e78 | 18 | lowercase_escape, |
238d42cf | 19 | std_headers, |
eb56d132 | 20 | traverse_obj, |
3052a30d | 21 | url_or_none, |
ab2ffab2 | 22 | urlencode_postdata, |
e1ec9330 | 23 | ) |
59fc531f | 24 | |
0de668af | 25 | |
8dcf65c9 | 26 | class InstagramBaseIE(InfoExtractor): |
ab2ffab2 | 27 | _NETRC_MACHINE = 'instagram' |
8dcf65c9 | 28 | _IS_LOGGED_IN = False |
29 | ||
30 | def _login(self): | |
31 | username, password = self._get_login_info() | |
32 | if username is None or self._IS_LOGGED_IN: | |
33 | return | |
34 | ||
35 | login_webpage = self._download_webpage( | |
36 | 'https://www.instagram.com/accounts/login/', None, | |
37 | note='Downloading login webpage', errnote='Failed to download login webpage') | |
38 | ||
39 | shared_data = self._parse_json( | |
40 | self._search_regex( | |
41 | r'window\._sharedData\s*=\s*({.+?});', | |
42 | login_webpage, 'shared data', default='{}'), | |
43 | None) | |
44 | ||
45 | login = self._download_json('https://www.instagram.com/accounts/login/ajax/', None, note='Logging in', headers={ | |
46 | 'Accept': '*/*', | |
47 | 'X-IG-App-ID': '936619743392459', | |
48 | 'X-ASBD-ID': '198387', | |
49 | 'X-IG-WWW-Claim': '0', | |
50 | 'X-Requested-With': 'XMLHttpRequest', | |
51 | 'X-CSRFToken': shared_data['config']['csrf_token'], | |
52 | 'X-Instagram-AJAX': shared_data['rollout_hash'], | |
53 | 'Referer': 'https://www.instagram.com/', | |
54 | }, data=urlencode_postdata({ | |
55 | 'enc_password': f'#PWD_INSTAGRAM_BROWSER:0:{int(time.time())}:{password}', | |
56 | 'username': username, | |
57 | 'queryParams': '{}', | |
58 | 'optIntoOneTap': 'false', | |
59 | 'stopDeletionNonce': '', | |
60 | 'trustedDeviceRecords': '{}', | |
61 | })) | |
62 | ||
63 | if not login.get('authenticated'): | |
64 | if login.get('message'): | |
65 | raise ExtractorError(f'Unable to login: {login["message"]}') | |
66 | raise ExtractorError('Unable to login') | |
67 | InstagramBaseIE._IS_LOGGED_IN = True | |
68 | ||
69 | def _real_initialize(self): | |
70 | self._login() | |
71 | ||
eb56d132 | 72 | def _get_count(self, media, kind, *keys): |
73 | return traverse_obj( | |
74 | media, (kind, 'count'), *((f'edge_media_{key}', 'count') for key in keys), | |
75 | expected_type=int_or_none) | |
76 | ||
77 | def _get_dimension(self, name, media, webpage=None): | |
78 | return ( | |
79 | traverse_obj(media, ('dimensions', name), expected_type=int_or_none) | |
80 | or int_or_none(self._html_search_meta( | |
81 | (f'og:video:{name}', f'video:{name}'), webpage or '', default=None))) | |
82 | ||
83 | def _extract_nodes(self, nodes, is_direct=False): | |
84 | for idx, node in enumerate(nodes, start=1): | |
85 | if node.get('__typename') != 'GraphVideo' and node.get('is_video') is not True: | |
86 | continue | |
87 | ||
88 | video_id = node.get('shortcode') | |
89 | ||
90 | if is_direct: | |
91 | info = { | |
92 | 'id': video_id or node['id'], | |
93 | 'url': node.get('video_url'), | |
94 | 'width': self._get_dimension('width', node), | |
95 | 'height': self._get_dimension('height', node), | |
96 | 'http_headers': { | |
97 | 'Referer': 'https://www.instagram.com/', | |
98 | } | |
99 | } | |
100 | elif not video_id: | |
101 | continue | |
102 | else: | |
103 | info = { | |
104 | '_type': 'url', | |
105 | 'ie_key': 'Instagram', | |
106 | 'id': video_id, | |
107 | 'url': f'https://instagram.com/p/{video_id}', | |
108 | } | |
109 | ||
110 | yield { | |
111 | **info, | |
112 | 'title': node.get('title') or (f'Video {idx}' if is_direct else None), | |
113 | 'description': traverse_obj( | |
114 | node, ('edge_media_to_caption', 'edges', 0, 'node', 'text'), expected_type=str), | |
115 | 'thumbnail': traverse_obj( | |
116 | node, 'display_url', 'thumbnail_src', 'display_src', expected_type=url_or_none), | |
117 | 'duration': float_or_none(node.get('video_duration')), | |
118 | 'timestamp': int_or_none(node.get('taken_at_timestamp')), | |
119 | 'view_count': int_or_none(node.get('video_view_count')), | |
120 | 'comment_count': self._get_count(node, 'comments', 'preview_comment', 'to_comment', 'to_parent_comment'), | |
121 | 'like_count': self._get_count(node, 'likes', 'preview_like'), | |
122 | } | |
123 | ||
8dcf65c9 | 124 | |
fb2d1ee6 | 125 | class InstagramIOSIE(InfoExtractor): |
c586f9e8 | 126 | IE_DESC = 'IOS instagram:// URL' |
fb2d1ee6 | 127 | _VALID_URL = r'instagram://media\?id=(?P<id>[\d_]+)' |
128 | _TESTS = [{ | |
129 | 'url': 'instagram://media?id=482584233761418119', | |
130 | 'md5': '0d2da106a9d2631273e192b372806516', | |
131 | 'info_dict': { | |
132 | 'id': 'aye83DjauH', | |
133 | 'ext': 'mp4', | |
134 | 'title': 'Video by naomipq', | |
135 | 'description': 'md5:1f17f0ab29bd6fe2bfad705f58de3cb8', | |
136 | 'thumbnail': r're:^https?://.*\.jpg', | |
137 | 'duration': 0, | |
138 | 'timestamp': 1371748545, | |
139 | 'upload_date': '20130620', | |
140 | 'uploader_id': 'naomipq', | |
141 | 'uploader': 'B E A U T Y F O R A S H E S', | |
142 | 'like_count': int, | |
143 | 'comment_count': int, | |
144 | 'comments': list, | |
145 | }, | |
146 | 'add_ie': ['Instagram'] | |
147 | }] | |
148 | ||
149 | def _get_id(self, id): | |
150 | """Source: https://stackoverflow.com/questions/24437823/getting-instagram-post-url-from-media-id""" | |
151 | chrs = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_' | |
152 | media_id = int(id.split('_')[0]) | |
153 | shortened_id = '' | |
154 | while media_id > 0: | |
155 | r = media_id % 64 | |
156 | media_id = (media_id - r) // 64 | |
157 | shortened_id = chrs[r] + shortened_id | |
158 | return shortened_id | |
159 | ||
160 | def _real_extract(self, url): | |
161 | return { | |
162 | '_type': 'url_transparent', | |
163 | 'url': f'http://instagram.com/tv/{self._get_id(self._match_id(url))}/', | |
164 | 'ie_key': 'Instagram', | |
165 | } | |
166 | ||
167 | ||
8dcf65c9 | 168 | class InstagramIE(InstagramBaseIE): |
169 | _VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com/(?:p|tv|reel)/(?P<id>[^/?#&]+))' | |
4479600d | 170 | _TESTS = [{ |
fc6e75dd | 171 | 'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc', |
0de668af JMF |
172 | 'md5': '0d2da106a9d2631273e192b372806516', |
173 | 'info_dict': { | |
174 | 'id': 'aye83DjauH', | |
175 | 'ext': 'mp4', | |
0de668af JMF |
176 | 'title': 'Video by naomipq', |
177 | 'description': 'md5:1f17f0ab29bd6fe2bfad705f58de3cb8', | |
ec85ded8 | 178 | 'thumbnail': r're:^https?://.*\.jpg', |
cce889b9 | 179 | 'duration': 0, |
98960c91 S |
180 | 'timestamp': 1371748545, |
181 | 'upload_date': '20130620', | |
182 | 'uploader_id': 'naomipq', | |
29f7c58a | 183 | 'uploader': 'B E A U T Y F O R A S H E S', |
98960c91 S |
184 | 'like_count': int, |
185 | 'comment_count': int, | |
a56e74e2 | 186 | 'comments': list, |
98960c91 | 187 | }, |
fb4b3458 S |
188 | }, { |
189 | # missing description | |
190 | 'url': 'https://www.instagram.com/p/BA-pQFBG8HZ/?taken-by=britneyspears', | |
191 | 'info_dict': { | |
192 | 'id': 'BA-pQFBG8HZ', | |
193 | 'ext': 'mp4', | |
fb4b3458 | 194 | 'title': 'Video by britneyspears', |
ec85ded8 | 195 | 'thumbnail': r're:^https?://.*\.jpg', |
cce889b9 | 196 | 'duration': 0, |
98960c91 S |
197 | 'timestamp': 1453760977, |
198 | 'upload_date': '20160125', | |
199 | 'uploader_id': 'britneyspears', | |
200 | 'uploader': 'Britney Spears', | |
201 | 'like_count': int, | |
202 | 'comment_count': int, | |
a56e74e2 | 203 | 'comments': list, |
fb4b3458 S |
204 | }, |
205 | 'params': { | |
206 | 'skip_download': True, | |
207 | }, | |
ada77fa5 S |
208 | }, { |
209 | # multi video post | |
210 | 'url': 'https://www.instagram.com/p/BQ0eAlwhDrw/', | |
211 | 'playlist': [{ | |
212 | 'info_dict': { | |
213 | 'id': 'BQ0dSaohpPW', | |
214 | 'ext': 'mp4', | |
215 | 'title': 'Video 1', | |
216 | }, | |
217 | }, { | |
218 | 'info_dict': { | |
219 | 'id': 'BQ0dTpOhuHT', | |
220 | 'ext': 'mp4', | |
221 | 'title': 'Video 2', | |
222 | }, | |
223 | }, { | |
224 | 'info_dict': { | |
225 | 'id': 'BQ0dT7RBFeF', | |
226 | 'ext': 'mp4', | |
227 | 'title': 'Video 3', | |
228 | }, | |
229 | }], | |
230 | 'info_dict': { | |
231 | 'id': 'BQ0eAlwhDrw', | |
232 | 'title': 'Post by instagram', | |
233 | 'description': 'md5:0f9203fc6a2ce4d228da5754bcf54957', | |
234 | }, | |
cce889b9 | 235 | }, { |
236 | # IGTV | |
237 | 'url': 'https://www.instagram.com/tv/BkfuX9UB-eK/', | |
238 | 'info_dict': { | |
239 | 'id': 'BkfuX9UB-eK', | |
240 | 'ext': 'mp4', | |
241 | 'title': 'Fingerboarding Tricks with @cass.fb', | |
242 | 'thumbnail': r're:^https?://.*\.jpg', | |
243 | 'duration': 53.83, | |
244 | 'timestamp': 1530032919, | |
245 | 'upload_date': '20180626', | |
246 | 'uploader_id': 'instagram', | |
247 | 'uploader': 'Instagram', | |
248 | 'like_count': int, | |
249 | 'comment_count': int, | |
250 | 'comments': list, | |
251 | 'description': 'Meet Cass Hirst (@cass.fb), a fingerboarding pro who can perform tiny ollies and kickflips while blindfolded.', | |
252 | } | |
4479600d S |
253 | }, { |
254 | 'url': 'https://instagram.com/p/-Cmh1cukG2/', | |
255 | 'only_matching': True, | |
0dafea02 S |
256 | }, { |
257 | 'url': 'http://instagram.com/p/9o6LshA7zy/embed/', | |
258 | 'only_matching': True, | |
edb2820c RA |
259 | }, { |
260 | 'url': 'https://www.instagram.com/tv/aye83DjauH/', | |
261 | 'only_matching': True, | |
29f7c58a | 262 | }, { |
263 | 'url': 'https://www.instagram.com/reel/CDUMkliABpa/', | |
264 | 'only_matching': True, | |
4479600d | 265 | }] |
59fc531f | 266 | |
c4096e8a YCH |
267 | @staticmethod |
268 | def _extract_embed_url(webpage): | |
c23533a1 S |
269 | mobj = re.search( |
270 | r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?instagram\.com/p/[^/]+/embed.*?)\1', | |
271 | webpage) | |
272 | if mobj: | |
273 | return mobj.group('url') | |
274 | ||
c4096e8a YCH |
275 | blockquote_el = get_element_by_attribute( |
276 | 'class', 'instagram-media', webpage) | |
277 | if blockquote_el is None: | |
278 | return | |
279 | ||
280 | mobj = re.search( | |
281 | r'<a[^>]+href=([\'"])(?P<link>[^\'"]+)\1', blockquote_el) | |
282 | if mobj: | |
283 | return mobj.group('link') | |
284 | ||
59fc531f | 285 | def _real_extract(self, url): |
eb56d132 | 286 | video_id, url = self._match_valid_url(url).group('id', 'url') |
a0c716bb | 287 | webpage, urlh = self._download_webpage_handle(url, video_id) |
eb56d132 | 288 | if 'www.instagram.com/accounts/login' in urlh.geturl(): |
ab2ffab2 | 289 | self.raise_login_required('You need to log in to access this content') |
98960c91 | 290 | |
29f7c58a | 291 | shared_data = self._parse_json( |
292 | self._search_regex( | |
293 | r'window\._sharedData\s*=\s*({.+?});', | |
294 | webpage, 'shared data', default='{}'), | |
295 | video_id, fatal=False) | |
eb56d132 | 296 | media = traverse_obj( |
297 | shared_data, | |
298 | ('entry_data', 'PostPage', 0, 'graphql', 'shortcode_media'), | |
299 | ('entry_data', 'PostPage', 0, 'media'), | |
300 | expected_type=dict) | |
301 | ||
29f7c58a | 302 | # _sharedData.entry_data.PostPage is empty when authenticated (see |
303 | # https://github.com/ytdl-org/youtube-dl/pull/22880) | |
304 | if not media: | |
305 | additional_data = self._parse_json( | |
306 | self._search_regex( | |
307 | r'window\.__additionalDataLoaded\s*\(\s*[^,]+,\s*({.+?})\s*\)\s*;', | |
308 | webpage, 'additional data', default='{}'), | |
309 | video_id, fatal=False) | |
eb56d132 | 310 | media = traverse_obj(additional_data, ('graphql', 'shortcode_media'), expected_type=dict) or {} |
311 | ||
312 | uploader_id = traverse_obj(media, ('owner', 'username')) or self._search_regex( | |
313 | r'"owner"\s*:\s*{\s*"username"\s*:\s*"(.+?)"', webpage, 'uploader id', fatal=False) | |
314 | ||
315 | description = ( | |
316 | traverse_obj(media, ('edge_media_to_caption', 'edges', 0, 'node', 'text'), expected_type=str) | |
317 | or media.get('caption')) | |
318 | if not description: | |
319 | description = self._search_regex( | |
320 | r'"caption"\s*:\s*"(.+?)"', webpage, 'description', default=None) | |
321 | if description is not None: | |
322 | description = lowercase_escape(description) | |
98960c91 | 323 | |
eb56d132 | 324 | video_url = media.get('video_url') |
98960c91 | 325 | if not video_url: |
eb56d132 | 326 | nodes = traverse_obj(media, ('edge_sidecar_to_children', 'edges', ..., 'node'), expected_type=dict) or [] |
327 | if nodes: | |
328 | return self.playlist_result( | |
329 | self._extract_nodes(nodes, True), video_id, | |
330 | 'Post by %s' % uploader_id if uploader_id else None, description) | |
331 | ||
98960c91 S |
332 | video_url = self._og_search_video_url(webpage, secure=False) |
333 | ||
16097822 DR |
334 | formats = [{ |
335 | 'url': video_url, | |
eb56d132 | 336 | 'width': self._get_dimension('width', media, webpage), |
337 | 'height': self._get_dimension('height', media, webpage), | |
16097822 | 338 | }] |
eb56d132 | 339 | dash = traverse_obj(media, ('dash_info', 'video_dash_manifest')) |
cd9ea410 | 340 | if dash: |
341 | formats.extend(self._parse_mpd_formats(self._parse_xml(dash, video_id), mpd_id='dash')) | |
342 | self._sort_formats(formats) | |
16097822 | 343 | |
eb56d132 | 344 | comments = [{ |
345 | 'author': traverse_obj(comment_dict, ('node', 'owner', 'username')), | |
346 | 'author_id': traverse_obj(comment_dict, ('node', 'owner', 'id')), | |
347 | 'id': traverse_obj(comment_dict, ('node', 'id')), | |
348 | 'text': traverse_obj(comment_dict, ('node', 'text')), | |
349 | 'timestamp': traverse_obj(comment_dict, ('node', 'created_at'), expected_type=int_or_none), | |
350 | } for comment_dict in traverse_obj(media, ('edge_media_to_parent_comment', 'edges'))] | |
351 | ||
352 | display_resources = ( | |
353 | media.get('display_resources') | |
354 | or [{'src': media.get(key)} for key in ('display_src', 'display_url')] | |
355 | or [{'src': self._og_search_thumbnail(webpage)}]) | |
356 | thumbnails = [{ | |
357 | 'url': thumbnail['src'], | |
358 | 'width': thumbnail.get('config_width'), | |
359 | 'height': thumbnail.get('config_height'), | |
360 | } for thumbnail in display_resources if thumbnail.get('src')] | |
59fc531f | 361 | |
0de668af JMF |
362 | return { |
363 | 'id': video_id, | |
16097822 | 364 | 'formats': formats, |
eb56d132 | 365 | 'title': media.get('title') or 'Video by %s' % uploader_id, |
98960c91 | 366 | 'description': description, |
eb56d132 | 367 | 'duration': float_or_none(media.get('video_duration')), |
368 | 'timestamp': traverse_obj(media, 'taken_at_timestamp', 'date', expected_type=int_or_none), | |
0de668af | 369 | 'uploader_id': uploader_id, |
eb56d132 | 370 | 'uploader': traverse_obj(media, ('owner', 'full_name')), |
371 | 'like_count': self._get_count(media, 'likes', 'preview_like'), | |
372 | 'comment_count': self._get_count(media, 'comments', 'preview_comment', 'to_comment', 'to_parent_comment'), | |
a56e74e2 | 373 | 'comments': comments, |
eb56d132 | 374 | 'thumbnails': thumbnails, |
3dd39c5f S |
375 | 'http_headers': { |
376 | 'Referer': 'https://www.instagram.com/', | |
377 | } | |
0de668af | 378 | } |
ea38e55f PH |
379 | |
380 | ||
8dcf65c9 | 381 | class InstagramPlaylistBaseIE(InstagramBaseIE): |
31fbedc0 | 382 | _gis_tmpl = None # used to cache GIS request type |
ea38e55f | 383 | |
31fbedc0 | 384 | def _parse_graphql(self, webpage, item_id): |
385 | # Reads a webpage and returns its GraphQL data. | |
386 | return self._parse_json( | |
387 | self._search_regex( | |
388 | r'sharedData\s*=\s*({.+?})\s*;\s*[<\n]', webpage, 'data'), | |
389 | item_id) | |
238d42cf | 390 | |
31fbedc0 | 391 | def _extract_graphql(self, data, url): |
392 | # Parses GraphQL queries containing videos and generates a playlist. | |
31fbedc0 | 393 | uploader_id = self._match_id(url) |
dd9aea8c S |
394 | csrf_token = data['config']['csrf_token'] |
395 | rhx_gis = data.get('rhx_gis') or '3c7ca9dcefcf966d11dacf1f151335e8' | |
396 | ||
cba5d1b6 S |
397 | cursor = '' |
398 | for page_num in itertools.count(1): | |
31fbedc0 | 399 | variables = { |
9b3036bd | 400 | 'first': 12, |
dd9aea8c | 401 | 'after': cursor, |
31fbedc0 | 402 | } |
403 | variables.update(self._query_vars_for(data)) | |
404 | variables = json.dumps(variables) | |
238d42cf S |
405 | |
406 | if self._gis_tmpl: | |
407 | gis_tmpls = [self._gis_tmpl] | |
408 | else: | |
409 | gis_tmpls = [ | |
410 | '%s' % rhx_gis, | |
411 | '', | |
412 | '%s:%s' % (rhx_gis, csrf_token), | |
413 | '%s:%s:%s' % (rhx_gis, csrf_token, std_headers['User-Agent']), | |
414 | ] | |
415 | ||
31fbedc0 | 416 | # try all of the ways to generate a GIS query, and not only use the |
417 | # first one that works, but cache it for future requests | |
238d42cf S |
418 | for gis_tmpl in gis_tmpls: |
419 | try: | |
31fbedc0 | 420 | json_data = self._download_json( |
238d42cf S |
421 | 'https://www.instagram.com/graphql/query/', uploader_id, |
422 | 'Downloading JSON page %d' % page_num, headers={ | |
423 | 'X-Requested-With': 'XMLHttpRequest', | |
424 | 'X-Instagram-GIS': hashlib.md5( | |
425 | ('%s:%s' % (gis_tmpl, variables)).encode('utf-8')).hexdigest(), | |
426 | }, query={ | |
31fbedc0 | 427 | 'query_hash': self._QUERY_HASH, |
238d42cf | 428 | 'variables': variables, |
31fbedc0 | 429 | }) |
430 | media = self._parse_timeline_from(json_data) | |
238d42cf S |
431 | self._gis_tmpl = gis_tmpl |
432 | break | |
433 | except ExtractorError as e: | |
31fbedc0 | 434 | # if it's an error caused by a bad query, and there are |
435 | # more GIS templates to try, ignore it and keep trying | |
238d42cf S |
436 | if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: |
437 | if gis_tmpl != gis_tmpls[-1]: | |
438 | continue | |
439 | raise | |
cba5d1b6 | 440 | |
eb56d132 | 441 | nodes = traverse_obj(media, ('edges', ..., 'node'), expected_type=dict) or [] |
442 | if not nodes: | |
cba5d1b6 | 443 | break |
eb56d132 | 444 | yield from self._extract_nodes(nodes) |
cba5d1b6 | 445 | |
eb56d132 | 446 | has_next_page = traverse_obj(media, ('page_info', 'has_next_page')) |
447 | cursor = traverse_obj(media, ('page_info', 'end_cursor'), expected_type=str) | |
448 | if not has_next_page or not cursor: | |
cba5d1b6 | 449 | break |
5fc12b95 S |
450 | |
451 | def _real_extract(self, url): | |
31fbedc0 | 452 | user_or_tag = self._match_id(url) |
453 | webpage = self._download_webpage(url, user_or_tag) | |
454 | data = self._parse_graphql(webpage, user_or_tag) | |
dd9aea8c | 455 | |
31fbedc0 | 456 | self._set_cookie('instagram.com', 'ig_pr', '1') |
dd9aea8c | 457 | |
5fc12b95 | 458 | return self.playlist_result( |
31fbedc0 | 459 | self._extract_graphql(data, url), user_or_tag, user_or_tag) |
460 | ||
461 | ||
8dcf65c9 | 462 | class InstagramUserIE(InstagramPlaylistBaseIE): |
31fbedc0 | 463 | _VALID_URL = r'https?://(?:www\.)?instagram\.com/(?P<id>[^/]{2,})/?(?:$|[?#])' |
464 | IE_DESC = 'Instagram user profile' | |
465 | IE_NAME = 'instagram:user' | |
8dcf65c9 | 466 | _TESTS = [{ |
31fbedc0 | 467 | 'url': 'https://instagram.com/porsche', |
468 | 'info_dict': { | |
469 | 'id': 'porsche', | |
470 | 'title': 'porsche', | |
471 | }, | |
472 | 'playlist_count': 5, | |
473 | 'params': { | |
474 | 'extract_flat': True, | |
475 | 'skip_download': True, | |
476 | 'playlistend': 5, | |
477 | } | |
8dcf65c9 | 478 | }] |
31fbedc0 | 479 | |
480 | _QUERY_HASH = '42323d64886122307be10013ad2dcc44', | |
481 | ||
482 | @staticmethod | |
483 | def _parse_timeline_from(data): | |
484 | # extracts the media timeline data from a GraphQL result | |
485 | return data['data']['user']['edge_owner_to_timeline_media'] | |
486 | ||
487 | @staticmethod | |
488 | def _query_vars_for(data): | |
489 | # returns a dictionary of variables to add to the timeline query based | |
490 | # on the GraphQL of the original page | |
491 | return { | |
492 | 'id': data['entry_data']['ProfilePage'][0]['graphql']['user']['id'] | |
493 | } | |
494 | ||
495 | ||
8dcf65c9 | 496 | class InstagramTagIE(InstagramPlaylistBaseIE): |
31fbedc0 | 497 | _VALID_URL = r'https?://(?:www\.)?instagram\.com/explore/tags/(?P<id>[^/]+)' |
498 | IE_DESC = 'Instagram hashtag search' | |
499 | IE_NAME = 'instagram:tag' | |
8dcf65c9 | 500 | _TESTS = [{ |
31fbedc0 | 501 | 'url': 'https://instagram.com/explore/tags/lolcats', |
502 | 'info_dict': { | |
503 | 'id': 'lolcats', | |
504 | 'title': 'lolcats', | |
505 | }, | |
506 | 'playlist_count': 50, | |
507 | 'params': { | |
508 | 'extract_flat': True, | |
509 | 'skip_download': True, | |
510 | 'playlistend': 50, | |
511 | } | |
8dcf65c9 | 512 | }] |
31fbedc0 | 513 | |
514 | _QUERY_HASH = 'f92f56d47dc7a55b606908374b43a314', | |
515 | ||
516 | @staticmethod | |
517 | def _parse_timeline_from(data): | |
518 | # extracts the media timeline data from a GraphQL result | |
519 | return data['data']['hashtag']['edge_hashtag_to_media'] | |
520 | ||
521 | @staticmethod | |
522 | def _query_vars_for(data): | |
523 | # returns a dictionary of variables to add to the timeline query based | |
524 | # on the GraphQL of the original page | |
525 | return { | |
526 | 'tag_name': | |
527 | data['entry_data']['TagPage'][0]['graphql']['hashtag']['name'] | |
528 | } |