[yt-dlp.git] / youtube_dl / extractor / vidme.py

from __future__ import unicode_literals

import itertools

from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..utils import (
    ExtractorError,
    int_or_none,
    float_or_none,
    parse_iso8601,
)


class VidmeIE(InfoExtractor):
    IE_NAME = 'vidme'
    _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]{,5})(?:[^\da-zA-Z]|$)'
    _TESTS = [{
        'url': 'https://vid.me/QNB',
        'md5': 'f42d05e7149aeaec5c037b17e5d3dc82',
        'info_dict': {
            'id': 'QNB',
            'ext': 'mp4',
            'title': 'Fishing for piranha - the easy way',
            'description': 'source: https://www.facebook.com/photo.php?v=312276045600871',
            'thumbnail': 're:^https?://.*\.jpg',
            'timestamp': 1406313244,
            'upload_date': '20140725',
            'age_limit': 0,
            'duration': 119.92,
            'view_count': int,
            'like_count': int,
            'comment_count': int,
        },
    }, {
        'url': 'https://vid.me/Gc6M',
        'md5': 'f42d05e7149aeaec5c037b17e5d3dc82',
        'info_dict': {
            'id': 'Gc6M',
            'ext': 'mp4',
            'title': 'O Mere Dil ke chain - Arnav and Khushi VM',
            'thumbnail': 're:^https?://.*\.jpg',
            'timestamp': 1441211642,
            'upload_date': '20150902',
            'uploader': 'SunshineM',
            'uploader_id': '3552827',
            'age_limit': 0,
            'duration': 223.72,
            'view_count': int,
            'like_count': int,
            'comment_count': int,
        },
        'params': {
            'skip_download': True,
        },
    }, {
        # tests uploader field
        'url': 'https://vid.me/4Iib',
        'info_dict': {
            'id': '4Iib',
            'ext': 'mp4',
            'title': 'The Carver',
            'description': 'md5:e9c24870018ae8113be936645b93ba3c',
            'thumbnail': 're:^https?://.*\.jpg',
            'timestamp': 1433203629,
            'upload_date': '20150602',
            'uploader': 'Thomas',
            'uploader_id': '109747',
            'age_limit': 0,
            'duration': 97.859999999999999,
            'view_count': int,
            'like_count': int,
            'comment_count': int,
        },
        'params': {
            'skip_download': True,
        },
    }, {
        # nsfw test from http://naked-yogi.tumblr.com/post/118312946248/naked-smoking-stretching
        'url': 'https://vid.me/e/Wmur',
        'info_dict': {
            'id': 'Wmur',
            'ext': 'mp4',
            'title': 'naked smoking & stretching',
            'thumbnail': 're:^https?://.*\.jpg',
            'timestamp': 1430931613,
            'upload_date': '20150506',
            'uploader': 'naked-yogi',
            'uploader_id': '1638622',
            'age_limit': 18,
            'duration': 653.26999999999998,
            'view_count': int,
            'like_count': int,
            'comment_count': int,
        },
        'params': {
            'skip_download': True,
        },
    }, {
        # nsfw, user-disabled
        'url': 'https://vid.me/dzGJ',
        'only_matching': True,
    }, {
        # suspended
        'url': 'https://vid.me/Ox3G',
        'only_matching': True,
    }, {
        # deleted
        'url': 'https://vid.me/KTPm',
        'only_matching': True,
    }, {
        # no formats in the API response
        'url': 'https://vid.me/e5g',
        'info_dict': {
            'id': 'e5g',
            'ext': 'mp4',
            'title': 'Video upload (e5g)',
            'thumbnail': 're:^https?://.*\.jpg',
            'timestamp': 1401480195,
            'upload_date': '20140530',
            'uploader': None,
            'uploader_id': None,
            'age_limit': 0,
            'duration': 483,
            'view_count': int,
            'like_count': int,
            'comment_count': int,
        },
        'params': {
            'skip_download': True,
        },
    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)

        try:
            response = self._download_json(
                'https://api.vid.me/videoByUrl/%s' % video_id, video_id)
        except ExtractorError as e:
            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
                response = self._parse_json(e.cause.read(), video_id)
            else:
                raise

        error = response.get('error')
        if error:
            raise ExtractorError(
                '%s returned error: %s' % (self.IE_NAME, error), expected=True)

        video = response['video']

        if video.get('state') == 'deleted':
            raise ExtractorError(
                'Vidme said: Sorry, this video has been deleted.',
                expected=True)

        if video.get('state') in ('user-disabled', 'suspended'):
            raise ExtractorError(
                'Vidme said: This video has been suspended either due to a copyright claim, '
                'or for violating the terms of use.',
                expected=True)

        formats = [{
            'format_id': f.get('type'),
            'url': f['uri'],
            'width': int_or_none(f.get('width')),
            'height': int_or_none(f.get('height')),
            'preference': 0 if f.get('type', '').endswith('clip') else 1,
        } for f in video.get('formats', []) if f.get('uri')]

        if not formats and video.get('complete_url'):
            formats.append({
                'url': video.get('complete_url'),
                'width': int_or_none(video.get('width')),
                'height': int_or_none(video.get('height')),
            })

        self._sort_formats(formats)

        title = video['title']
        description = video.get('description')
        thumbnail = video.get('thumbnail_url')
        timestamp = parse_iso8601(video.get('date_created'), ' ')
        uploader = video.get('user', {}).get('username')
        uploader_id = video.get('user', {}).get('user_id')
        age_limit = 18 if video.get('nsfw') is True else 0
        duration = float_or_none(video.get('duration'))
        view_count = int_or_none(video.get('view_count'))
        like_count = int_or_none(video.get('likes_count'))
        comment_count = int_or_none(video.get('comment_count'))

        return {
            'id': video_id,
            'title': title or 'Video upload (%s)' % video_id,
            'description': description,
            'thumbnail': thumbnail,
            'uploader': uploader,
            'uploader_id': uploader_id,
            'age_limit': age_limit,
            'timestamp': timestamp,
            'duration': duration,
            'view_count': view_count,
            'like_count': like_count,
            'comment_count': comment_count,
            'formats': formats,
        }


class VidmeListBaseIE(InfoExtractor):
    # Max possible limit according to https://docs.vid.me/#api-Videos-List
    _LIMIT = 100

    def _entries(self, user_id, user_name):
        for page_num in itertools.count(1):
            page = self._download_json(
                'https://api.vid.me/videos/%s?user=%s&limit=%d&offset=%d'
                % (self._API_ITEM, user_id, self._LIMIT, (page_num - 1) * self._LIMIT),
                user_name, 'Downloading user %s page %d' % (self._API_ITEM, page_num))

            videos = page.get('videos', [])
            if not videos:
                break

            for video in videos:
                video_url = video.get('full_url') or video.get('embed_url')
                if video_url:
                    yield self.url_result(video_url, VidmeIE.ie_key())

            total = int_or_none(page.get('page', {}).get('total'))
            if total and self._LIMIT * page_num >= total:
                break

    def _real_extract(self, url):
        user_name = self._match_id(url)

        user_id = self._download_json(
            'https://api.vid.me/userByUsername?username=%s' % user_name,
            user_name)['user']['user_id']

        return self.playlist_result(
            self._entries(user_id, user_name), user_id,
            '%s - %s' % (user_name, self._TITLE))


class VidmeUserIE(VidmeListBaseIE):
    IE_NAME = 'vidme:user'
    _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]{6,})(?!/likes)(?:[^\da-zA-Z]|$)'
    _API_ITEM = 'list'
    _TITLE = 'Videos'
    _TEST = {
        'url': 'https://vid.me/EFARCHIVE',
        'info_dict': {
            'id': '3834632',
            'title': 'EFARCHIVE - %s' % _TITLE,
        },
        'playlist_mincount': 238,
    }


class VidmeUserLikesIE(VidmeListBaseIE):
    IE_NAME = 'vidme:user:likes'
    _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]{6,})/likes'
    _API_ITEM = 'likes'
    _TITLE = 'Likes'
    _TEST = {
        'url': 'https://vid.me/ErinAlexis/likes',
        'info_dict': {
            'id': '6483530',
            'title': 'ErinAlexis - %s' % _TITLE,
        },
        'playlist_mincount': 415,
    }
Commit	Line	Data
0138968a S	1	from __future__ import unicode_literals
0138968a S	2
b7b36506 S	3	import itertools
b7b36506 S	4
482aa3fe S	5	from .common import InfoExtractor
482aa3fe S	6	from ..compat import compat_HTTPError
0138968a	7	from ..utils import (
482aa3fe	8	ExtractorError,
0138968a S	9	int_or_none,
0138968a S	10	float_or_none,
d9c19db3	11	parse_iso8601,
0138968a S	12	)
	13
	14
	15	class VidmeIE(InfoExtractor):
d97b0e32	16	IE_NAME = 'vidme'
b7b36506	17	_VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]{,5})(?:[^\da-zA-Z]\|$)'
23dd1fc7	18	_TESTS = [{
0138968a	19	'url': 'https://vid.me/QNB',
d65889bb	20	'md5': 'f42d05e7149aeaec5c037b17e5d3dc82',
0138968a S	21	'info_dict': {
	22	'id': 'QNB',
	23	'ext': 'mp4',
	24	'title': 'Fishing for piranha - the easy way',
	25	'description': 'source: https://www.facebook.com/photo.php?v=312276045600871',
482aa3fe	26	'thumbnail': 're:^https?://.*\.jpg',
0138968a S	27	'timestamp': 1406313244,
0138968a S	28	'upload_date': '20140725',
482aa3fe S	29	'age_limit': 0,
482aa3fe S	30	'duration': 119.92,
3b58d94f S	31	'view_count': int,
3b58d94f S	32	'like_count': int,
d9c19db3 LL	33	'comment_count': int,
	34	},
	35	}, {
	36	'url': 'https://vid.me/Gc6M',
	37	'md5': 'f42d05e7149aeaec5c037b17e5d3dc82',
	38	'info_dict': {
	39	'id': 'Gc6M',
	40	'ext': 'mp4',
	41	'title': 'O Mere Dil ke chain - Arnav and Khushi VM',
482aa3fe	42	'thumbnail': 're:^https?://.*\.jpg',
d9c19db3 LL	43	'timestamp': 1441211642,
d9c19db3 LL	44	'upload_date': '20150902',
482aa3fe S	45	'uploader': 'SunshineM',
	46	'uploader_id': '3552827',
	47	'age_limit': 0,
	48	'duration': 223.72,
d9c19db3 LL	49	'view_count': int,
	50	'like_count': int,
	51	'comment_count': int,
d9c19db3 LL	52	},
	53	'params': {
	54	'skip_download': True,
3b58d94f S	55	},
	56	}, {
	57	# tests uploader field
	58	'url': 'https://vid.me/4Iib',
	59	'info_dict': {
	60	'id': '4Iib',
	61	'ext': 'mp4',
	62	'title': 'The Carver',
	63	'description': 'md5:e9c24870018ae8113be936645b93ba3c',
482aa3fe	64	'thumbnail': 're:^https?://.*\.jpg',
3b58d94f S	65	'timestamp': 1433203629,
	66	'upload_date': '20150602',
	67	'uploader': 'Thomas',
482aa3fe S	68	'uploader_id': '109747',
	69	'age_limit': 0,
	70	'duration': 97.859999999999999,
3b58d94f S	71	'view_count': int,
3b58d94f S	72	'like_count': int,
d9c19db3	73	'comment_count': int,
3b58d94f S	74	},
	75	'params': {
	76	'skip_download': True,
0138968a	77	},
23dd1fc7	78	}, {
482aa3fe	79	# nsfw test from http://naked-yogi.tumblr.com/post/118312946248/naked-smoking-stretching
23dd1fc7	80	'url': 'https://vid.me/e/Wmur',
482aa3fe S	81	'info_dict': {
	82	'id': 'Wmur',
	83	'ext': 'mp4',
	84	'title': 'naked smoking & stretching',
	85	'thumbnail': 're:^https?://.*\.jpg',
	86	'timestamp': 1430931613,
	87	'upload_date': '20150506',
	88	'uploader': 'naked-yogi',
	89	'uploader_id': '1638622',
	90	'age_limit': 18,
	91	'duration': 653.26999999999998,
	92	'view_count': int,
	93	'like_count': int,
	94	'comment_count': int,
	95	},
	96	'params': {
	97	'skip_download': True,
	98	},
9eb31b26 S	99	}, {
	100	# nsfw, user-disabled
	101	'url': 'https://vid.me/dzGJ',
	102	'only_matching': True,
0be30baf LL	103	}, {
	104	# suspended
	105	'url': 'https://vid.me/Ox3G',
	106	'only_matching': True,
5f9f87c0 LL	107	}, {
	108	# deleted
	109	'url': 'https://vid.me/KTPm',
	110	'only_matching': True,
0be30baf LL	111	}, {
	112	# no formats in the API response
	113	'url': 'https://vid.me/e5g',
	114	'info_dict': {
	115	'id': 'e5g',
	116	'ext': 'mp4',
4a896377	117	'title': 'Video upload (e5g)',
0be30baf LL	118	'thumbnail': 're:^https?://.*\.jpg',
	119	'timestamp': 1401480195,
	120	'upload_date': '20140530',
	121	'uploader': None,
	122	'uploader_id': None,
	123	'age_limit': 0,
	124	'duration': 483,
	125	'view_count': int,
	126	'like_count': int,
	127	'comment_count': int,
	128	},
	129	'params': {
	130	'skip_download': True,
	131	},
23dd1fc7	132	}]
0138968a S	133
0138968a S	134	def _real_extract(self, url):
9609f02e	135	video_id = self._match_id(url)
d9c19db3	136
482aa3fe S	137	try:
	138	response = self._download_json(
	139	'https://api.vid.me/videoByUrl/%s' % video_id, video_id)
	140	except ExtractorError as e:
	141	if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
	142	response = self._parse_json(e.cause.read(), video_id)
	143	else:
	144	raise
d9c19db3	145
482aa3fe S	146	error = response.get('error')
	147	if error:
	148	raise ExtractorError(
	149	'%s returned error: %s' % (self.IE_NAME, error), expected=True)
0138968a	150
482aa3fe	151	video = response['video']
0138968a	152
5f9f87c0 LL	153	if video.get('state') == 'deleted':
	154	raise ExtractorError(
	155	'Vidme said: Sorry, this video has been deleted.',
	156	expected=True)
	157
0be30baf	158	if video.get('state') in ('user-disabled', 'suspended'):
59fe4824 LL	159	raise ExtractorError(
	160	'Vidme said: This video has been suspended either due to a copyright claim, '
	161	'or for violating the terms of use.',
	162	expected=True)
	163
d9c19db3	164	formats = [{
482aa3fe S	165	'format_id': f.get('type'),
	166	'url': f['uri'],
	167	'width': int_or_none(f.get('width')),
	168	'height': int_or_none(f.get('height')),
05b476a2	169	'preference': 0 if f.get('type', '').endswith('clip') else 1,
482aa3fe	170	} for f in video.get('formats', []) if f.get('uri')]
0be30baf LL	171
	172	if not formats and video.get('complete_url'):
	173	formats.append({
	174	'url': video.get('complete_url'),
	175	'width': int_or_none(video.get('width')),
	176	'height': int_or_none(video.get('height')),
	177	})
	178
d9c19db3	179	self._sort_formats(formats)
0138968a	180
482aa3fe S	181	title = video['title']
	182	description = video.get('description')
	183	thumbnail = video.get('thumbnail_url')
	184	timestamp = parse_iso8601(video.get('date_created'), ' ')
	185	uploader = video.get('user', {}).get('username')
	186	uploader_id = video.get('user', {}).get('user_id')
	187	age_limit = 18 if video.get('nsfw') is True else 0
	188	duration = float_or_none(video.get('duration'))
	189	view_count = int_or_none(video.get('view_count'))
	190	like_count = int_or_none(video.get('likes_count'))
	191	comment_count = int_or_none(video.get('comment_count'))
	192
0138968a S	193	return {
0138968a S	194	'id': video_id,
4a896377	195	'title': title or 'Video upload (%s)' % video_id,
0138968a S	196	'description': description,
0138968a S	197	'thumbnail': thumbnail,
482aa3fe S	198	'uploader': uploader,
	199	'uploader_id': uploader_id,
	200	'age_limit': age_limit,
0138968a	201	'timestamp': timestamp,
0138968a S	202	'duration': duration,
	203	'view_count': view_count,
	204	'like_count': like_count,
d9c19db3	205	'comment_count': comment_count,
d9c19db3	206	'formats': formats,
0138968a	207	}
b7b36506 S	208
b7b36506 S	209
eb2533ec	210	class VidmeListBaseIE(InfoExtractor):
b7b36506 S	211	# Max possible limit according to https://docs.vid.me/#api-Videos-List
	212	_LIMIT = 100
	213
	214	def _entries(self, user_id, user_name):
	215	for page_num in itertools.count(1):
	216	page = self._download_json(
eb2533ec S	217	'https://api.vid.me/videos/%s?user=%s&limit=%d&offset=%d'
	218	% (self._API_ITEM, user_id, self._LIMIT, (page_num - 1) * self._LIMIT),
	219	user_name, 'Downloading user %s page %d' % (self._API_ITEM, page_num))
b7b36506 S	220
	221	videos = page.get('videos', [])
	222	if not videos:
	223	break
	224
	225	for video in videos:
	226	video_url = video.get('full_url') or video.get('embed_url')
	227	if video_url:
	228	yield self.url_result(video_url, VidmeIE.ie_key())
	229
	230	total = int_or_none(page.get('page', {}).get('total'))
	231	if total and self._LIMIT * page_num >= total:
	232	break
	233
	234	def _real_extract(self, url):
	235	user_name = self._match_id(url)
	236
	237	user_id = self._download_json(
	238	'https://api.vid.me/userByUsername?username=%s' % user_name,
	239	user_name)['user']['user_id']
	240
eb2533ec S	241	return self.playlist_result(
	242	self._entries(user_id, user_name), user_id,
	243	'%s - %s' % (user_name, self._TITLE))
	244
	245
	246	class VidmeUserIE(VidmeListBaseIE):
d97b0e32	247	IE_NAME = 'vidme:user'
eb2533ec S	248	_VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]{6,})(?!/likes)(?:[^\da-zA-Z]\|$)'
	249	_API_ITEM = 'list'
	250	_TITLE = 'Videos'
	251	_TEST = {
	252	'url': 'https://vid.me/EFARCHIVE',
	253	'info_dict': {
	254	'id': '3834632',
	255	'title': 'EFARCHIVE - %s' % _TITLE,
	256	},
	257	'playlist_mincount': 238,
	258	}
	259
	260
	261	class VidmeUserLikesIE(VidmeListBaseIE):
d97b0e32	262	IE_NAME = 'vidme:user:likes'
eb2533ec S	263	_VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]{6,})/likes'
	264	_API_ITEM = 'likes'
	265	_TITLE = 'Likes'
	266	_TEST = {
	267	'url': 'https://vid.me/ErinAlexis/likes',
	268	'info_dict': {
	269	'id': '6483530',
	270	'title': 'ErinAlexis - %s' % _TITLE,
	271	},
	272	'playlist_mincount': 415,
	273	}