]>
Commit | Line | Data |
---|---|---|
0de668af JMF |
1 | from __future__ import unicode_literals |
2 | ||
cba5d1b6 | 3 | import itertools |
315ab3d5 | 4 | import hashlib |
27b1c73f | 5 | import json |
59fc531f | 6 | import re |
ab2ffab2 | 7 | import time |
59fc531f JMF |
8 | |
9 | from .common import InfoExtractor | |
238d42cf S |
10 | from ..compat import ( |
11 | compat_str, | |
12 | compat_HTTPError, | |
13 | ) | |
e1ec9330 | 14 | from ..utils import ( |
238d42cf | 15 | ExtractorError, |
cce889b9 | 16 | float_or_none, |
c4096e8a | 17 | get_element_by_attribute, |
e1ec9330 | 18 | int_or_none, |
87696e78 | 19 | lowercase_escape, |
238d42cf | 20 | std_headers, |
98960c91 | 21 | try_get, |
3052a30d | 22 | url_or_none, |
6606817a | 23 | variadic, |
ab2ffab2 | 24 | urlencode_postdata, |
e1ec9330 | 25 | ) |
59fc531f | 26 | |
0de668af | 27 | |
59fc531f | 28 | class InstagramIE(InfoExtractor): |
29f7c58a | 29 | _VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com/(?:p|tv|reel)/(?P<id>[^/?#&]+))' |
ab2ffab2 | 30 | _NETRC_MACHINE = 'instagram' |
4479600d | 31 | _TESTS = [{ |
fc6e75dd | 32 | 'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc', |
0de668af JMF |
33 | 'md5': '0d2da106a9d2631273e192b372806516', |
34 | 'info_dict': { | |
35 | 'id': 'aye83DjauH', | |
36 | 'ext': 'mp4', | |
0de668af JMF |
37 | 'title': 'Video by naomipq', |
38 | 'description': 'md5:1f17f0ab29bd6fe2bfad705f58de3cb8', | |
ec85ded8 | 39 | 'thumbnail': r're:^https?://.*\.jpg', |
cce889b9 | 40 | 'duration': 0, |
98960c91 S |
41 | 'timestamp': 1371748545, |
42 | 'upload_date': '20130620', | |
43 | 'uploader_id': 'naomipq', | |
29f7c58a | 44 | 'uploader': 'B E A U T Y F O R A S H E S', |
98960c91 S |
45 | 'like_count': int, |
46 | 'comment_count': int, | |
a56e74e2 | 47 | 'comments': list, |
98960c91 | 48 | }, |
fb4b3458 S |
49 | }, { |
50 | # missing description | |
51 | 'url': 'https://www.instagram.com/p/BA-pQFBG8HZ/?taken-by=britneyspears', | |
52 | 'info_dict': { | |
53 | 'id': 'BA-pQFBG8HZ', | |
54 | 'ext': 'mp4', | |
fb4b3458 | 55 | 'title': 'Video by britneyspears', |
ec85ded8 | 56 | 'thumbnail': r're:^https?://.*\.jpg', |
cce889b9 | 57 | 'duration': 0, |
98960c91 S |
58 | 'timestamp': 1453760977, |
59 | 'upload_date': '20160125', | |
60 | 'uploader_id': 'britneyspears', | |
61 | 'uploader': 'Britney Spears', | |
62 | 'like_count': int, | |
63 | 'comment_count': int, | |
a56e74e2 | 64 | 'comments': list, |
fb4b3458 S |
65 | }, |
66 | 'params': { | |
67 | 'skip_download': True, | |
68 | }, | |
ada77fa5 S |
69 | }, { |
70 | # multi video post | |
71 | 'url': 'https://www.instagram.com/p/BQ0eAlwhDrw/', | |
72 | 'playlist': [{ | |
73 | 'info_dict': { | |
74 | 'id': 'BQ0dSaohpPW', | |
75 | 'ext': 'mp4', | |
76 | 'title': 'Video 1', | |
77 | }, | |
78 | }, { | |
79 | 'info_dict': { | |
80 | 'id': 'BQ0dTpOhuHT', | |
81 | 'ext': 'mp4', | |
82 | 'title': 'Video 2', | |
83 | }, | |
84 | }, { | |
85 | 'info_dict': { | |
86 | 'id': 'BQ0dT7RBFeF', | |
87 | 'ext': 'mp4', | |
88 | 'title': 'Video 3', | |
89 | }, | |
90 | }], | |
91 | 'info_dict': { | |
92 | 'id': 'BQ0eAlwhDrw', | |
93 | 'title': 'Post by instagram', | |
94 | 'description': 'md5:0f9203fc6a2ce4d228da5754bcf54957', | |
95 | }, | |
cce889b9 | 96 | }, { |
97 | # IGTV | |
98 | 'url': 'https://www.instagram.com/tv/BkfuX9UB-eK/', | |
99 | 'info_dict': { | |
100 | 'id': 'BkfuX9UB-eK', | |
101 | 'ext': 'mp4', | |
102 | 'title': 'Fingerboarding Tricks with @cass.fb', | |
103 | 'thumbnail': r're:^https?://.*\.jpg', | |
104 | 'duration': 53.83, | |
105 | 'timestamp': 1530032919, | |
106 | 'upload_date': '20180626', | |
107 | 'uploader_id': 'instagram', | |
108 | 'uploader': 'Instagram', | |
109 | 'like_count': int, | |
110 | 'comment_count': int, | |
111 | 'comments': list, | |
112 | 'description': 'Meet Cass Hirst (@cass.fb), a fingerboarding pro who can perform tiny ollies and kickflips while blindfolded.', | |
113 | } | |
4479600d S |
114 | }, { |
115 | 'url': 'https://instagram.com/p/-Cmh1cukG2/', | |
116 | 'only_matching': True, | |
0dafea02 S |
117 | }, { |
118 | 'url': 'http://instagram.com/p/9o6LshA7zy/embed/', | |
119 | 'only_matching': True, | |
edb2820c RA |
120 | }, { |
121 | 'url': 'https://www.instagram.com/tv/aye83DjauH/', | |
122 | 'only_matching': True, | |
29f7c58a | 123 | }, { |
124 | 'url': 'https://www.instagram.com/reel/CDUMkliABpa/', | |
125 | 'only_matching': True, | |
4479600d | 126 | }] |
59fc531f | 127 | |
c4096e8a YCH |
128 | @staticmethod |
129 | def _extract_embed_url(webpage): | |
c23533a1 S |
130 | mobj = re.search( |
131 | r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?instagram\.com/p/[^/]+/embed.*?)\1', | |
132 | webpage) | |
133 | if mobj: | |
134 | return mobj.group('url') | |
135 | ||
c4096e8a YCH |
136 | blockquote_el = get_element_by_attribute( |
137 | 'class', 'instagram-media', webpage) | |
138 | if blockquote_el is None: | |
139 | return | |
140 | ||
141 | mobj = re.search( | |
142 | r'<a[^>]+href=([\'"])(?P<link>[^\'"]+)\1', blockquote_el) | |
143 | if mobj: | |
144 | return mobj.group('link') | |
145 | ||
ab2ffab2 | 146 | def _login(self): |
147 | username, password = self._get_login_info() | |
ad64a232 | 148 | if username is None: |
149 | return | |
ab2ffab2 | 150 | |
151 | login_webpage = self._download_webpage( | |
152 | 'https://www.instagram.com/accounts/login/', None, | |
153 | note='Downloading login webpage', errnote='Failed to download login webpage') | |
154 | ||
155 | shared_data = self._parse_json( | |
156 | self._search_regex( | |
157 | r'window\._sharedData\s*=\s*({.+?});', | |
158 | login_webpage, 'shared data', default='{}'), | |
159 | None) | |
160 | ||
161 | login = self._download_json('https://www.instagram.com/accounts/login/ajax/', None, note='Logging in', headers={ | |
162 | 'Accept': '*/*', | |
163 | 'X-IG-App-ID': '936619743392459', | |
164 | 'X-ASBD-ID': '198387', | |
165 | 'X-IG-WWW-Claim': '0', | |
166 | 'X-Requested-With': 'XMLHttpRequest', | |
167 | 'X-CSRFToken': shared_data['config']['csrf_token'], | |
168 | 'X-Instagram-AJAX': shared_data['rollout_hash'], | |
169 | 'Referer': 'https://www.instagram.com/', | |
170 | }, data=urlencode_postdata({ | |
171 | 'enc_password': f'#PWD_INSTAGRAM_BROWSER:0:{int(time.time())}:{password}', | |
172 | 'username': username, | |
173 | 'queryParams': '{}', | |
174 | 'optIntoOneTap': 'false', | |
175 | 'stopDeletionNonce': '', | |
176 | 'trustedDeviceRecords': '{}', | |
177 | })) | |
178 | ||
179 | if not login.get('authenticated'): | |
180 | if login.get('message'): | |
181 | raise ExtractorError(f'Unable to login: {login["message"]}') | |
182 | raise ExtractorError('Unable to login') | |
183 | ||
184 | def _real_initialize(self): | |
185 | self._login() | |
186 | ||
59fc531f | 187 | def _real_extract(self, url): |
5ad28e7f | 188 | mobj = self._match_valid_url(url) |
0dafea02 S |
189 | video_id = mobj.group('id') |
190 | url = mobj.group('url') | |
d2d8248f | 191 | |
a0c716bb | 192 | webpage, urlh = self._download_webpage_handle(url, video_id) |
193 | if 'www.instagram.com/accounts/login' in urlh.geturl().rstrip('/'): | |
ab2ffab2 | 194 | self.raise_login_required('You need to log in to access this content') |
98960c91 | 195 | |
29f7c58a | 196 | (media, video_url, description, thumbnail, timestamp, uploader, |
18848d22 | 197 | uploader_id, like_count, comment_count, comments, height, |
29f7c58a | 198 | width) = [None] * 12 |
199 | ||
200 | shared_data = self._parse_json( | |
201 | self._search_regex( | |
202 | r'window\._sharedData\s*=\s*({.+?});', | |
203 | webpage, 'shared data', default='{}'), | |
204 | video_id, fatal=False) | |
98960c91 S |
205 | if shared_data: |
206 | media = try_get( | |
18848d22 | 207 | shared_data, |
29f7c58a | 208 | (lambda x: x['entry_data']['PostPage'][0]['graphql']['shortcode_media'], |
209 | lambda x: x['entry_data']['PostPage'][0]['media']), | |
18848d22 | 210 | dict) |
29f7c58a | 211 | # _sharedData.entry_data.PostPage is empty when authenticated (see |
212 | # https://github.com/ytdl-org/youtube-dl/pull/22880) | |
213 | if not media: | |
214 | additional_data = self._parse_json( | |
215 | self._search_regex( | |
216 | r'window\.__additionalDataLoaded\s*\(\s*[^,]+,\s*({.+?})\s*\)\s*;', | |
217 | webpage, 'additional data', default='{}'), | |
218 | video_id, fatal=False) | |
219 | if additional_data: | |
220 | media = try_get( | |
221 | additional_data, lambda x: x['graphql']['shortcode_media'], | |
222 | dict) | |
223 | if media: | |
224 | video_url = media.get('video_url') | |
cd9ea410 | 225 | height = try_get(media, lambda x: x['dimensions']['height']) |
226 | width = try_get(media, lambda x: x['dimensions']['width']) | |
29f7c58a | 227 | description = try_get( |
228 | media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'], | |
229 | compat_str) or media.get('caption') | |
cce889b9 | 230 | title = media.get('title') |
29f7c58a | 231 | thumbnail = media.get('display_src') or media.get('display_url') |
cce889b9 | 232 | duration = float_or_none(media.get('video_duration')) |
29f7c58a | 233 | timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date')) |
cd9ea410 | 234 | uploader = try_get(media, lambda x: x['owner']['full_name']) |
235 | uploader_id = try_get(media, lambda x: x['owner']['username']) | |
29f7c58a | 236 | |
237 | def get_count(keys, kind): | |
6606817a | 238 | for key in variadic(keys): |
29f7c58a | 239 | count = int_or_none(try_get( |
9cbd4dda S |
240 | media, (lambda x: x['edge_media_%s' % key]['count'], |
241 | lambda x: x['%ss' % kind]['count']))) | |
29f7c58a | 242 | if count is not None: |
243 | return count | |
60c8fc73 | 244 | |
29f7c58a | 245 | like_count = get_count('preview_like', 'like') |
246 | comment_count = get_count( | |
247 | ('preview_comment', 'to_comment', 'to_parent_comment'), 'comment') | |
248 | ||
60c8fc73 S |
249 | comments = [] |
250 | for comment in try_get(media, lambda x: x['edge_media_to_parent_comment']['edges']): | |
251 | comment_dict = comment.get('node', {}) | |
252 | comment_text = comment_dict.get('text') | |
253 | if comment_text: | |
254 | comments.append({ | |
255 | 'author': try_get(comment_dict, lambda x: x['owner']['username']), | |
256 | 'author_id': try_get(comment_dict, lambda x: x['owner']['id']), | |
257 | 'id': comment_dict.get('id'), | |
258 | 'text': comment_text, | |
259 | 'timestamp': int_or_none(comment_dict.get('created_at')), | |
260 | }) | |
29f7c58a | 261 | if not video_url: |
262 | edges = try_get( | |
263 | media, lambda x: x['edge_sidecar_to_children']['edges'], | |
264 | list) or [] | |
265 | if edges: | |
266 | entries = [] | |
267 | for edge_num, edge in enumerate(edges, start=1): | |
268 | node = try_get(edge, lambda x: x['node'], dict) | |
269 | if not node: | |
270 | continue | |
271 | node_video_url = url_or_none(node.get('video_url')) | |
272 | if not node_video_url: | |
273 | continue | |
274 | entries.append({ | |
275 | 'id': node.get('shortcode') or node['id'], | |
cce889b9 | 276 | 'title': node.get('title') or 'Video %d' % edge_num, |
29f7c58a | 277 | 'url': node_video_url, |
278 | 'thumbnail': node.get('display_url'), | |
cce889b9 | 279 | 'duration': float_or_none(node.get('video_duration')), |
29f7c58a | 280 | 'width': int_or_none(try_get(node, lambda x: x['dimensions']['width'])), |
281 | 'height': int_or_none(try_get(node, lambda x: x['dimensions']['height'])), | |
282 | 'view_count': int_or_none(node.get('video_view_count')), | |
283 | }) | |
284 | return self.playlist_result( | |
285 | entries, video_id, | |
286 | 'Post by %s' % uploader_id if uploader_id else None, | |
287 | description) | |
98960c91 S |
288 | |
289 | if not video_url: | |
290 | video_url = self._og_search_video_url(webpage, secure=False) | |
291 | ||
16097822 DR |
292 | formats = [{ |
293 | 'url': video_url, | |
294 | 'width': width, | |
295 | 'height': height, | |
296 | }] | |
cd9ea410 | 297 | dash = try_get(media, lambda x: x['dash_info']['video_dash_manifest']) |
298 | if dash: | |
299 | formats.extend(self._parse_mpd_formats(self._parse_xml(dash, video_id), mpd_id='dash')) | |
300 | self._sort_formats(formats) | |
16097822 | 301 | |
98960c91 S |
302 | if not uploader_id: |
303 | uploader_id = self._search_regex( | |
304 | r'"owner"\s*:\s*{\s*"username"\s*:\s*"(.+?)"', | |
305 | webpage, 'uploader id', fatal=False) | |
306 | ||
307 | if not description: | |
308 | description = self._search_regex( | |
309 | r'"caption"\s*:\s*"(.+?)"', webpage, 'description', default=None) | |
310 | if description is not None: | |
311 | description = lowercase_escape(description) | |
312 | ||
313 | if not thumbnail: | |
314 | thumbnail = self._og_search_thumbnail(webpage) | |
59fc531f | 315 | |
0de668af JMF |
316 | return { |
317 | 'id': video_id, | |
16097822 | 318 | 'formats': formats, |
0de668af | 319 | 'ext': 'mp4', |
cce889b9 | 320 | 'title': title or 'Video by %s' % uploader_id, |
98960c91 | 321 | 'description': description, |
cce889b9 | 322 | 'duration': duration, |
98960c91 S |
323 | 'thumbnail': thumbnail, |
324 | 'timestamp': timestamp, | |
0de668af | 325 | 'uploader_id': uploader_id, |
98960c91 S |
326 | 'uploader': uploader, |
327 | 'like_count': like_count, | |
328 | 'comment_count': comment_count, | |
a56e74e2 | 329 | 'comments': comments, |
3dd39c5f S |
330 | 'http_headers': { |
331 | 'Referer': 'https://www.instagram.com/', | |
332 | } | |
0de668af | 333 | } |
ea38e55f PH |
334 | |
335 | ||
31fbedc0 | 336 | class InstagramPlaylistIE(InfoExtractor): |
337 | # A superclass for handling any kind of query based on GraphQL which | |
338 | # results in a playlist. | |
339 | ||
340 | _gis_tmpl = None # used to cache GIS request type | |
ea38e55f | 341 | |
31fbedc0 | 342 | def _parse_graphql(self, webpage, item_id): |
343 | # Reads a webpage and returns its GraphQL data. | |
344 | return self._parse_json( | |
345 | self._search_regex( | |
346 | r'sharedData\s*=\s*({.+?})\s*;\s*[<\n]', webpage, 'data'), | |
347 | item_id) | |
238d42cf | 348 | |
31fbedc0 | 349 | def _extract_graphql(self, data, url): |
350 | # Parses GraphQL queries containing videos and generates a playlist. | |
27b1c73f | 351 | def get_count(suffix): |
5fc12b95 | 352 | return int_or_none(try_get( |
27b1c73f RA |
353 | node, lambda x: x['edge_media_' + suffix]['count'])) |
354 | ||
31fbedc0 | 355 | uploader_id = self._match_id(url) |
dd9aea8c S |
356 | csrf_token = data['config']['csrf_token'] |
357 | rhx_gis = data.get('rhx_gis') or '3c7ca9dcefcf966d11dacf1f151335e8' | |
358 | ||
cba5d1b6 S |
359 | cursor = '' |
360 | for page_num in itertools.count(1): | |
31fbedc0 | 361 | variables = { |
9b3036bd | 362 | 'first': 12, |
dd9aea8c | 363 | 'after': cursor, |
31fbedc0 | 364 | } |
365 | variables.update(self._query_vars_for(data)) | |
366 | variables = json.dumps(variables) | |
238d42cf S |
367 | |
368 | if self._gis_tmpl: | |
369 | gis_tmpls = [self._gis_tmpl] | |
370 | else: | |
371 | gis_tmpls = [ | |
372 | '%s' % rhx_gis, | |
373 | '', | |
374 | '%s:%s' % (rhx_gis, csrf_token), | |
375 | '%s:%s:%s' % (rhx_gis, csrf_token, std_headers['User-Agent']), | |
376 | ] | |
377 | ||
31fbedc0 | 378 | # try all of the ways to generate a GIS query, and not only use the |
379 | # first one that works, but cache it for future requests | |
238d42cf S |
380 | for gis_tmpl in gis_tmpls: |
381 | try: | |
31fbedc0 | 382 | json_data = self._download_json( |
238d42cf S |
383 | 'https://www.instagram.com/graphql/query/', uploader_id, |
384 | 'Downloading JSON page %d' % page_num, headers={ | |
385 | 'X-Requested-With': 'XMLHttpRequest', | |
386 | 'X-Instagram-GIS': hashlib.md5( | |
387 | ('%s:%s' % (gis_tmpl, variables)).encode('utf-8')).hexdigest(), | |
388 | }, query={ | |
31fbedc0 | 389 | 'query_hash': self._QUERY_HASH, |
238d42cf | 390 | 'variables': variables, |
31fbedc0 | 391 | }) |
392 | media = self._parse_timeline_from(json_data) | |
238d42cf S |
393 | self._gis_tmpl = gis_tmpl |
394 | break | |
395 | except ExtractorError as e: | |
31fbedc0 | 396 | # if it's an error caused by a bad query, and there are |
397 | # more GIS templates to try, ignore it and keep trying | |
238d42cf S |
398 | if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: |
399 | if gis_tmpl != gis_tmpls[-1]: | |
400 | continue | |
401 | raise | |
cba5d1b6 S |
402 | |
403 | edges = media.get('edges') | |
404 | if not edges or not isinstance(edges, list): | |
405 | break | |
406 | ||
407 | for edge in edges: | |
408 | node = edge.get('node') | |
409 | if not node or not isinstance(node, dict): | |
410 | continue | |
411 | if node.get('__typename') != 'GraphVideo' and node.get('is_video') is not True: | |
412 | continue | |
413 | video_id = node.get('shortcode') | |
414 | if not video_id: | |
415 | continue | |
416 | ||
417 | info = self.url_result( | |
418 | 'https://instagram.com/p/%s/' % video_id, | |
419 | ie=InstagramIE.ie_key(), video_id=video_id) | |
420 | ||
421 | description = try_get( | |
422 | node, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'], | |
423 | compat_str) | |
424 | thumbnail = node.get('thumbnail_src') or node.get('display_src') | |
425 | timestamp = int_or_none(node.get('taken_at_timestamp')) | |
426 | ||
427 | comment_count = get_count('to_comment') | |
428 | like_count = get_count('preview_like') | |
429 | view_count = int_or_none(node.get('video_view_count')) | |
430 | ||
431 | info.update({ | |
432 | 'description': description, | |
433 | 'thumbnail': thumbnail, | |
434 | 'timestamp': timestamp, | |
435 | 'comment_count': comment_count, | |
436 | 'like_count': like_count, | |
437 | 'view_count': view_count, | |
ea38e55f | 438 | }) |
cba5d1b6 S |
439 | |
440 | yield info | |
441 | ||
442 | page_info = media.get('page_info') | |
443 | if not page_info or not isinstance(page_info, dict): | |
444 | break | |
445 | ||
446 | has_next_page = page_info.get('has_next_page') | |
447 | if not has_next_page: | |
448 | break | |
449 | ||
450 | cursor = page_info.get('end_cursor') | |
451 | if not cursor or not isinstance(cursor, compat_str): | |
452 | break | |
5fc12b95 S |
453 | |
454 | def _real_extract(self, url): | |
31fbedc0 | 455 | user_or_tag = self._match_id(url) |
456 | webpage = self._download_webpage(url, user_or_tag) | |
457 | data = self._parse_graphql(webpage, user_or_tag) | |
dd9aea8c | 458 | |
31fbedc0 | 459 | self._set_cookie('instagram.com', 'ig_pr', '1') |
dd9aea8c | 460 | |
5fc12b95 | 461 | return self.playlist_result( |
31fbedc0 | 462 | self._extract_graphql(data, url), user_or_tag, user_or_tag) |
463 | ||
464 | ||
465 | class InstagramUserIE(InstagramPlaylistIE): | |
466 | _VALID_URL = r'https?://(?:www\.)?instagram\.com/(?P<id>[^/]{2,})/?(?:$|[?#])' | |
467 | IE_DESC = 'Instagram user profile' | |
468 | IE_NAME = 'instagram:user' | |
469 | _TEST = { | |
470 | 'url': 'https://instagram.com/porsche', | |
471 | 'info_dict': { | |
472 | 'id': 'porsche', | |
473 | 'title': 'porsche', | |
474 | }, | |
475 | 'playlist_count': 5, | |
476 | 'params': { | |
477 | 'extract_flat': True, | |
478 | 'skip_download': True, | |
479 | 'playlistend': 5, | |
480 | } | |
481 | } | |
482 | ||
483 | _QUERY_HASH = '42323d64886122307be10013ad2dcc44', | |
484 | ||
485 | @staticmethod | |
486 | def _parse_timeline_from(data): | |
487 | # extracts the media timeline data from a GraphQL result | |
488 | return data['data']['user']['edge_owner_to_timeline_media'] | |
489 | ||
490 | @staticmethod | |
491 | def _query_vars_for(data): | |
492 | # returns a dictionary of variables to add to the timeline query based | |
493 | # on the GraphQL of the original page | |
494 | return { | |
495 | 'id': data['entry_data']['ProfilePage'][0]['graphql']['user']['id'] | |
496 | } | |
497 | ||
498 | ||
499 | class InstagramTagIE(InstagramPlaylistIE): | |
500 | _VALID_URL = r'https?://(?:www\.)?instagram\.com/explore/tags/(?P<id>[^/]+)' | |
501 | IE_DESC = 'Instagram hashtag search' | |
502 | IE_NAME = 'instagram:tag' | |
503 | _TEST = { | |
504 | 'url': 'https://instagram.com/explore/tags/lolcats', | |
505 | 'info_dict': { | |
506 | 'id': 'lolcats', | |
507 | 'title': 'lolcats', | |
508 | }, | |
509 | 'playlist_count': 50, | |
510 | 'params': { | |
511 | 'extract_flat': True, | |
512 | 'skip_download': True, | |
513 | 'playlistend': 50, | |
514 | } | |
515 | } | |
516 | ||
517 | _QUERY_HASH = 'f92f56d47dc7a55b606908374b43a314', | |
518 | ||
519 | @staticmethod | |
520 | def _parse_timeline_from(data): | |
521 | # extracts the media timeline data from a GraphQL result | |
522 | return data['data']['hashtag']['edge_hashtag_to_media'] | |
523 | ||
524 | @staticmethod | |
525 | def _query_vars_for(data): | |
526 | # returns a dictionary of variables to add to the timeline query based | |
527 | # on the GraphQL of the original page | |
528 | return { | |
529 | 'tag_name': | |
530 | data['entry_data']['TagPage'][0]['graphql']['hashtag']['name'] | |
531 | } |