]>
Commit | Line | Data |
---|---|---|
9bc70948 JMF |
1 | from __future__ import unicode_literals |
2 | ||
38cbc40a | 3 | from .common import InfoExtractor |
c938c35f | 4 | from ..utils import int_or_none |
38cbc40a | 5 | |
9bc70948 | 6 | |
867274e9 | 7 | class IconosquareIE(InfoExtractor): |
c938c35f | 8 | _VALID_URL = r'https?://(?:www\.)?(?:iconosquare\.com|statigr\.am)/p/(?P<id>[^/]+)' |
6f5ac90c | 9 | _TEST = { |
9bc70948 JMF |
10 | 'url': 'http://statigr.am/p/522207370455279102_24101272', |
11 | 'md5': '6eb93b882a3ded7c378ee1d6884b1814', | |
12 | 'info_dict': { | |
13 | 'id': '522207370455279102_24101272', | |
14 | 'ext': 'mp4', | |
c938c35f | 15 | 'title': 'Instagram media by @aguynamedpatrick (Patrick Janelle)', |
867274e9 | 16 | 'description': 'md5:644406a9ec27457ed7aa7a9ebcd4ce3d', |
c938c35f S |
17 | 'timestamp': 1376471991, |
18 | 'upload_date': '20130814', | |
19 | 'uploader': 'aguynamedpatrick', | |
20 | 'uploader_id': '24101272', | |
21 | 'comment_count': int, | |
22 | 'like_count': int, | |
683e98a8 | 23 | }, |
6f5ac90c | 24 | } |
38cbc40a PH |
25 | |
26 | def _real_extract(self, url): | |
27c542c0 | 27 | video_id = self._match_id(url) |
c938c35f | 28 | |
38cbc40a | 29 | webpage = self._download_webpage(url, video_id) |
c938c35f S |
30 | |
31 | media = self._parse_json( | |
32 | self._search_regex( | |
33 | r'window\.media\s*=\s*({.+?});\n', webpage, 'media'), | |
34 | video_id) | |
35 | ||
36 | formats = [{ | |
37 | 'url': f['url'], | |
38 | 'format_id': format_id, | |
39 | 'width': int_or_none(f.get('width')), | |
40 | 'height': int_or_none(f.get('height')) | |
41 | } for format_id, f in media['videos'].items()] | |
42 | self._sort_formats(formats) | |
43 | ||
27c542c0 PH |
44 | title = self._html_search_regex( |
45 | r'<title>(.+?)(?: *\(Videos?\))? \| (?:Iconosquare|Statigram)</title>', | |
9bc70948 | 46 | webpage, 'title') |
c938c35f S |
47 | |
48 | timestamp = int_or_none(media.get('created_time') or media.get('caption', {}).get('created_time')) | |
49 | description = media.get('caption', {}).get('text') | |
50 | ||
51 | uploader = media.get('user', {}).get('username') | |
52 | uploader_id = media.get('user', {}).get('id') | |
53 | ||
54 | comment_count = int_or_none(media.get('comments', {}).get('count')) | |
55 | like_count = int_or_none(media.get('likes', {}).get('count')) | |
56 | ||
57 | thumbnails = [{ | |
58 | 'url': t['url'], | |
59 | 'id': thumbnail_id, | |
60 | 'width': int_or_none(t.get('width')), | |
61 | 'height': int_or_none(t.get('height')) | |
62 | } for thumbnail_id, t in media.get('images', {}).items()] | |
38cbc40a | 63 | |
9bc70948 JMF |
64 | return { |
65 | 'id': video_id, | |
9bc70948 | 66 | 'title': title, |
c938c35f S |
67 | 'description': description, |
68 | 'thumbnails': thumbnails, | |
69 | 'timestamp': timestamp, | |
70 | 'uploader': uploader, | |
71 | 'uploader_id': uploader_id, | |
72 | 'comment_count': comment_count, | |
73 | 'like_count': like_count, | |
74 | 'formats': formats, | |
9bc70948 | 75 | } |