youtube_dl/extractor/vidme.py

   1 from __future__ import unicode_literals
   2
   3 import itertools
   4
   5 from .common import InfoExtractor
   6 from ..compat import compat_HTTPError
   7 from ..utils import (
   8     ExtractorError,
   9     int_or_none,
  10     float_or_none,
  11     parse_iso8601,
  12 )
  13
  14
  15 class VidmeIE(InfoExtractor):
  16     _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]{,5})(?:[^\da-zA-Z]|$)'
  17     _TESTS = [{
  18         'url': 'https://vid.me/QNB',
  19         'md5': 'f42d05e7149aeaec5c037b17e5d3dc82',
  20         'info_dict': {
  21             'id': 'QNB',
  22             'ext': 'mp4',
  23             'title': 'Fishing for piranha - the easy way',
  24             'description': 'source: https://www.facebook.com/photo.php?v=312276045600871',
  25             'thumbnail': 're:^https?://.*\.jpg',
  26             'timestamp': 1406313244,
  27             'upload_date': '20140725',
  28             'age_limit': 0,
  29             'duration': 119.92,
  30             'view_count': int,
  31             'like_count': int,
  32             'comment_count': int,
  33         },
  34     }, {
  35         'url': 'https://vid.me/Gc6M',
  36         'md5': 'f42d05e7149aeaec5c037b17e5d3dc82',
  37         'info_dict': {
  38             'id': 'Gc6M',
  39             'ext': 'mp4',
  40             'title': 'O Mere Dil ke chain - Arnav and Khushi VM',
  41             'thumbnail': 're:^https?://.*\.jpg',
  42             'timestamp': 1441211642,
  43             'upload_date': '20150902',
  44             'uploader': 'SunshineM',
  45             'uploader_id': '3552827',
  46             'age_limit': 0,
  47             'duration': 223.72,
  48             'view_count': int,
  49             'like_count': int,
  50             'comment_count': int,
  51         },
  52         'params': {
  53             'skip_download': True,
  54         },
  55     }, {
  56         # tests uploader field
  57         'url': 'https://vid.me/4Iib',
  58         'info_dict': {
  59             'id': '4Iib',
  60             'ext': 'mp4',
  61             'title': 'The Carver',
  62             'description': 'md5:e9c24870018ae8113be936645b93ba3c',
  63             'thumbnail': 're:^https?://.*\.jpg',
  64             'timestamp': 1433203629,
  65             'upload_date': '20150602',
  66             'uploader': 'Thomas',
  67             'uploader_id': '109747',
  68             'age_limit': 0,
  69             'duration': 97.859999999999999,
  70             'view_count': int,
  71             'like_count': int,
  72             'comment_count': int,
  73         },
  74         'params': {
  75             'skip_download': True,
  76         },
  77     }, {
  78         # nsfw test from http://naked-yogi.tumblr.com/post/118312946248/naked-smoking-stretching
  79         'url': 'https://vid.me/e/Wmur',
  80         'info_dict': {
  81             'id': 'Wmur',
  82             'ext': 'mp4',
  83             'title': 'naked smoking & stretching',
  84             'thumbnail': 're:^https?://.*\.jpg',
  85             'timestamp': 1430931613,
  86             'upload_date': '20150506',
  87             'uploader': 'naked-yogi',
  88             'uploader_id': '1638622',
  89             'age_limit': 18,
  90             'duration': 653.26999999999998,
  91             'view_count': int,
  92             'like_count': int,
  93             'comment_count': int,
  94         },
  95         'params': {
  96             'skip_download': True,
  97         },
  98     }, {
  99         # nsfw, user-disabled
 100         'url': 'https://vid.me/dzGJ',
 101         'only_matching': True,
 102     }, {
 103         # suspended
 104         'url': 'https://vid.me/Ox3G',
 105         'only_matching': True,
 106     }, {
 107         # deleted
 108         'url': 'https://vid.me/KTPm',
 109         'only_matching': True,
 110     }, {
 111         # no formats in the API response
 112         'url': 'https://vid.me/e5g',
 113         'info_dict': {
 114             'id': 'e5g',
 115             'ext': 'mp4',
 116             'title': 'Video upload (e5g)',
 117             'thumbnail': 're:^https?://.*\.jpg',
 118             'timestamp': 1401480195,
 119             'upload_date': '20140530',
 120             'uploader': None,
 121             'uploader_id': None,
 122             'age_limit': 0,
 123             'duration': 483,
 124             'view_count': int,
 125             'like_count': int,
 126             'comment_count': int,
 127         },
 128         'params': {
 129             'skip_download': True,
 130         },
 131     }]
 132
 133     def _real_extract(self, url):
 134         video_id = self._match_id(url)
 135
 136         try:
 137             response = self._download_json(
 138                 'https://api.vid.me/videoByUrl/%s' % video_id, video_id)
 139         except ExtractorError as e:
 140             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
 141                 response = self._parse_json(e.cause.read(), video_id)
 142             else:
 143                 raise
 144
 145         error = response.get('error')
 146         if error:
 147             raise ExtractorError(
 148                 '%s returned error: %s' % (self.IE_NAME, error), expected=True)
 149
 150         video = response['video']
 151
 152         if video.get('state') == 'deleted':
 153             raise ExtractorError(
 154                 'Vidme said: Sorry, this video has been deleted.',
 155                 expected=True)
 156
 157         if video.get('state') in ('user-disabled', 'suspended'):
 158             raise ExtractorError(
 159                 'Vidme said: This video has been suspended either due to a copyright claim, '
 160                 'or for violating the terms of use.',
 161                 expected=True)
 162
 163         formats = [{
 164             'format_id': f.get('type'),
 165             'url': f['uri'],
 166             'width': int_or_none(f.get('width')),
 167             'height': int_or_none(f.get('height')),
 168             'preference': 0 if f.get('type', '').endswith('clip') else 1,
 169         } for f in video.get('formats', []) if f.get('uri')]
 170
 171         if not formats and video.get('complete_url'):
 172             formats.append({
 173                 'url': video.get('complete_url'),
 174                 'width': int_or_none(video.get('width')),
 175                 'height': int_or_none(video.get('height')),
 176             })
 177
 178         self._sort_formats(formats)
 179
 180         title = video['title']
 181         description = video.get('description')
 182         thumbnail = video.get('thumbnail_url')
 183         timestamp = parse_iso8601(video.get('date_created'), ' ')
 184         uploader = video.get('user', {}).get('username')
 185         uploader_id = video.get('user', {}).get('user_id')
 186         age_limit = 18 if video.get('nsfw') is True else 0
 187         duration = float_or_none(video.get('duration'))
 188         view_count = int_or_none(video.get('view_count'))
 189         like_count = int_or_none(video.get('likes_count'))
 190         comment_count = int_or_none(video.get('comment_count'))
 191
 192         return {
 193             'id': video_id,
 194             'title': title or 'Video upload (%s)' % video_id,
 195             'description': description,
 196             'thumbnail': thumbnail,
 197             'uploader': uploader,
 198             'uploader_id': uploader_id,
 199             'age_limit': age_limit,
 200             'timestamp': timestamp,
 201             'duration': duration,
 202             'view_count': view_count,
 203             'like_count': like_count,
 204             'comment_count': comment_count,
 205             'formats': formats,
 206         }
 207
 208
 209 class VidmeUserIE(InfoExtractor):
 210     _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]{6,})'
 211     _TEST = {
 212         'url': 'https://vid.me/EFARCHIVE',
 213         'info_dict': {
 214             'id': '3834632',
 215             'title': 'EFARCHIVE',
 216         },
 217         'playlist_mincount': 238,
 218     }
 219
 220     # Max possible limit according to https://docs.vid.me/#api-Videos-List
 221     _LIMIT = 100
 222
 223     def _entries(self, user_id, user_name):
 224         for page_num in itertools.count(1):
 225             page = self._download_json(
 226                 'https://api.vid.me/videos/list?user=%s&limit=%d&offset=%d'
 227                 % (user_id, self._LIMIT, (page_num - 1) * self._LIMIT), user_name,
 228                 'Downloading user page %d' % page_num)
 229
 230             videos = page.get('videos', [])
 231             if not videos:
 232                 break
 233
 234             for video in videos:
 235                 video_url = video.get('full_url') or video.get('embed_url')
 236                 if video_url:
 237                     yield self.url_result(video_url, VidmeIE.ie_key())
 238
 239             total = int_or_none(page.get('page', {}).get('total'))
 240             if total and self._LIMIT * page_num >= total:
 241                 break
 242
 243     def _real_extract(self, url):
 244         user_name = self._match_id(url)
 245
 246         user_id = self._download_json(
 247             'https://api.vid.me/userByUsername?username=%s' % user_name,
 248             user_name)['user']['user_id']
 249
 250         return self.playlist_result(self._entries(user_id, user_name), user_id, user_name)