]>
Commit | Line | Data |
---|---|---|
0de668af JMF |
1 | from __future__ import unicode_literals |
2 | ||
59fc531f JMF |
3 | import re |
4 | ||
5 | from .common import InfoExtractor | |
e1ec9330 YCH |
6 | from ..utils import ( |
7 | int_or_none, | |
8 | limit_length, | |
9 | ) | |
59fc531f | 10 | |
0de668af | 11 | |
59fc531f | 12 | class InstagramIE(InfoExtractor): |
67446fd4 | 13 | _VALID_URL = r'https?://(?:www\.)?instagram\.com/p/(?P<id>[^/?#&]+)' |
4479600d | 14 | _TESTS = [{ |
fc6e75dd | 15 | 'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc', |
0de668af JMF |
16 | 'md5': '0d2da106a9d2631273e192b372806516', |
17 | 'info_dict': { | |
18 | 'id': 'aye83DjauH', | |
19 | 'ext': 'mp4', | |
20 | 'uploader_id': 'naomipq', | |
21 | 'title': 'Video by naomipq', | |
22 | 'description': 'md5:1f17f0ab29bd6fe2bfad705f58de3cb8', | |
59fc531f | 23 | } |
fb4b3458 S |
24 | }, { |
25 | # missing description | |
26 | 'url': 'https://www.instagram.com/p/BA-pQFBG8HZ/?taken-by=britneyspears', | |
27 | 'info_dict': { | |
28 | 'id': 'BA-pQFBG8HZ', | |
29 | 'ext': 'mp4', | |
30 | 'uploader_id': 'britneyspears', | |
31 | 'title': 'Video by britneyspears', | |
32 | }, | |
33 | 'params': { | |
34 | 'skip_download': True, | |
35 | }, | |
4479600d S |
36 | }, { |
37 | 'url': 'https://instagram.com/p/-Cmh1cukG2/', | |
38 | 'only_matching': True, | |
39 | }] | |
59fc531f JMF |
40 | |
41 | def _real_extract(self, url): | |
d2d8248f S |
42 | video_id = self._match_id(url) |
43 | ||
59fc531f | 44 | webpage = self._download_webpage(url, video_id) |
3f402177 | 45 | uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"', |
9e1a5b84 | 46 | webpage, 'uploader id', fatal=False) |
fb4b3458 S |
47 | desc = self._search_regex( |
48 | r'"caption":"(.+?)"', webpage, 'description', default=None) | |
59fc531f | 49 | |
0de668af JMF |
50 | return { |
51 | 'id': video_id, | |
52 | 'url': self._og_search_video_url(webpage, secure=False), | |
53 | 'ext': 'mp4', | |
54 | 'title': 'Video by %s' % uploader_id, | |
46720279 | 55 | 'thumbnail': self._og_search_thumbnail(webpage), |
0de668af | 56 | 'uploader_id': uploader_id, |
3f402177 | 57 | 'description': desc, |
0de668af | 58 | } |
ea38e55f PH |
59 | |
60 | ||
61 | class InstagramUserIE(InfoExtractor): | |
dcdc3523 | 62 | _VALID_URL = r'https?://(?:www\.)?instagram\.com/(?P<username>[^/]{2,})/?(?:$|[?#])' |
ea38e55f PH |
63 | IE_DESC = 'Instagram user profile' |
64 | IE_NAME = 'instagram:user' | |
22a6f150 | 65 | _TEST = { |
fc6e75dd | 66 | 'url': 'https://instagram.com/porsche', |
22a6f150 PH |
67 | 'info_dict': { |
68 | 'id': 'porsche', | |
69 | 'title': 'porsche', | |
70 | }, | |
71 | 'playlist_mincount': 2, | |
72 | 'playlist': [{ | |
73 | 'info_dict': { | |
74 | 'id': '614605558512799803_462752227', | |
75 | 'ext': 'mp4', | |
76 | 'title': '#Porsche Intelligent Performance.', | |
77 | 'thumbnail': 're:^https?://.*\.jpg', | |
78 | 'uploader': 'Porsche', | |
79 | 'uploader_id': 'porsche', | |
80 | 'timestamp': 1387486713, | |
81 | 'upload_date': '20131219', | |
82 | }, | |
83 | }], | |
84 | 'params': { | |
85 | 'extract_flat': True, | |
86 | 'skip_download': True, | |
87 | } | |
88 | } | |
ea38e55f PH |
89 | |
90 | def _real_extract(self, url): | |
91 | mobj = re.match(self._VALID_URL, url) | |
92 | uploader_id = mobj.group('username') | |
93 | ||
94 | entries = [] | |
95 | page_count = 0 | |
96 | media_url = 'http://instagram.com/%s/media' % uploader_id | |
97 | while True: | |
98 | page = self._download_json( | |
99 | media_url, uploader_id, | |
100 | note='Downloading page %d ' % (page_count + 1), | |
101 | ) | |
102 | page_count += 1 | |
103 | ||
104 | for it in page['items']: | |
105 | if it.get('type') != 'video': | |
106 | continue | |
107 | like_count = int_or_none(it.get('likes', {}).get('count')) | |
108 | user = it.get('user', {}) | |
109 | ||
110 | formats = [{ | |
111 | 'format_id': k, | |
112 | 'height': v.get('height'), | |
113 | 'width': v.get('width'), | |
114 | 'url': v['url'], | |
115 | } for k, v in it['videos'].items()] | |
116 | self._sort_formats(formats) | |
117 | ||
118 | thumbnails_el = it.get('images', {}) | |
119 | thumbnail = thumbnails_el.get('thumbnail', {}).get('url') | |
120 | ||
edb99d4c YCH |
121 | # In some cases caption is null, which corresponds to None |
122 | # in python. As a result, it.get('caption', {}) gives None | |
123 | title = (it.get('caption') or {}).get('text', it['id']) | |
ea38e55f PH |
124 | |
125 | entries.append({ | |
126 | 'id': it['id'], | |
e1ec9330 | 127 | 'title': limit_length(title, 80), |
ea38e55f PH |
128 | 'formats': formats, |
129 | 'thumbnail': thumbnail, | |
130 | 'webpage_url': it.get('link'), | |
131 | 'uploader': user.get('full_name'), | |
132 | 'uploader_id': user.get('username'), | |
133 | 'like_count': like_count, | |
912b38b4 | 134 | 'timestamp': int_or_none(it.get('created_time')), |
ea38e55f PH |
135 | }) |
136 | ||
137 | if not page['items']: | |
138 | break | |
139 | max_id = page['items'][-1]['id'] | |
140 | media_url = ( | |
141 | 'http://instagram.com/%s/media?max_id=%s' % ( | |
142 | uploader_id, max_id)) | |
143 | ||
144 | return { | |
145 | '_type': 'playlist', | |
146 | 'entries': entries, | |
147 | 'id': uploader_id, | |
148 | 'title': uploader_id, | |
149 | } |