]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/iconosquare.py
[aljazeera] Extend _VALID_URL
[yt-dlp.git] / youtube_dl / extractor / iconosquare.py
CommitLineData
9bc70948
JMF
1from __future__ import unicode_literals
2
38cbc40a 3from .common import InfoExtractor
85742102 4from ..utils import (
5 int_or_none,
6 get_element_by_id,
d4364f30 7 remove_end,
85742102 8)
38cbc40a 9
9bc70948 10
867274e9 11class IconosquareIE(InfoExtractor):
c938c35f 12 _VALID_URL = r'https?://(?:www\.)?(?:iconosquare\.com|statigr\.am)/p/(?P<id>[^/]+)'
6f5ac90c 13 _TEST = {
9bc70948
JMF
14 'url': 'http://statigr.am/p/522207370455279102_24101272',
15 'md5': '6eb93b882a3ded7c378ee1d6884b1814',
16 'info_dict': {
17 'id': '522207370455279102_24101272',
18 'ext': 'mp4',
d4364f30 19 'title': 'Instagram photo by @aguynamedpatrick (Patrick Janelle)',
867274e9 20 'description': 'md5:644406a9ec27457ed7aa7a9ebcd4ce3d',
c938c35f
S
21 'timestamp': 1376471991,
22 'upload_date': '20130814',
23 'uploader': 'aguynamedpatrick',
24 'uploader_id': '24101272',
25 'comment_count': int,
26 'like_count': int,
683e98a8 27 },
6f5ac90c 28 }
38cbc40a
PH
29
30 def _real_extract(self, url):
27c542c0 31 video_id = self._match_id(url)
c938c35f 32
38cbc40a 33 webpage = self._download_webpage(url, video_id)
c938c35f
S
34
35 media = self._parse_json(
85742102 36 get_element_by_id('mediaJson', webpage),
c938c35f
S
37 video_id)
38
39 formats = [{
40 'url': f['url'],
41 'format_id': format_id,
42 'width': int_or_none(f.get('width')),
43 'height': int_or_none(f.get('height'))
44 } for format_id, f in media['videos'].items()]
45 self._sort_formats(formats)
46
d4364f30 47 title = remove_end(self._og_search_title(webpage), ' - via Iconosquare')
c938c35f
S
48
49 timestamp = int_or_none(media.get('created_time') or media.get('caption', {}).get('created_time'))
50 description = media.get('caption', {}).get('text')
51
52 uploader = media.get('user', {}).get('username')
53 uploader_id = media.get('user', {}).get('id')
54
55 comment_count = int_or_none(media.get('comments', {}).get('count'))
56 like_count = int_or_none(media.get('likes', {}).get('count'))
57
58 thumbnails = [{
59 'url': t['url'],
60 'id': thumbnail_id,
61 'width': int_or_none(t.get('width')),
62 'height': int_or_none(t.get('height'))
63 } for thumbnail_id, t in media.get('images', {}).items()]
38cbc40a 64
9b166fc1
S
65 comments = [{
66 'id': comment.get('id'),
67 'text': comment['text'],
68 'timestamp': int_or_none(comment.get('created_time')),
69 'author': comment.get('from', {}).get('full_name'),
70 'author_id': comment.get('from', {}).get('username'),
71 } for comment in media.get('comments', {}).get('data', []) if 'text' in comment]
72
9bc70948
JMF
73 return {
74 'id': video_id,
9bc70948 75 'title': title,
c938c35f
S
76 'description': description,
77 'thumbnails': thumbnails,
78 'timestamp': timestamp,
79 'uploader': uploader,
80 'uploader_id': uploader_id,
81 'comment_count': comment_count,
82 'like_count': like_count,
83 'formats': formats,
9b166fc1 84 'comments': comments,
9bc70948 85 }