[yt-dlp.git] / youtube_dlc / extractor / vidme.py

from __future__ import unicode_literals

import itertools

from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..utils import (
    ExtractorError,
    int_or_none,
    float_or_none,
    parse_iso8601,
    url_or_none,
)


class VidmeIE(InfoExtractor):
    IE_NAME = 'vidme'
    _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]{,5})(?:[^\da-zA-Z]|$)'
    _TESTS = [{
        'url': 'https://vid.me/QNB',
        'md5': 'f42d05e7149aeaec5c037b17e5d3dc82',
        'info_dict': {
            'id': 'QNB',
            'ext': 'mp4',
            'title': 'Fishing for piranha - the easy way',
            'description': 'source: https://www.facebook.com/photo.php?v=312276045600871',
            'thumbnail': r're:^https?://.*\.jpg',
            'timestamp': 1406313244,
            'upload_date': '20140725',
            'age_limit': 0,
            'duration': 119.92,
            'view_count': int,
            'like_count': int,
            'comment_count': int,
        },
    }, {
        'url': 'https://vid.me/Gc6M',
        'md5': 'f42d05e7149aeaec5c037b17e5d3dc82',
        'info_dict': {
            'id': 'Gc6M',
            'ext': 'mp4',
            'title': 'O Mere Dil ke chain - Arnav and Khushi VM',
            'thumbnail': r're:^https?://.*\.jpg',
            'timestamp': 1441211642,
            'upload_date': '20150902',
            'uploader': 'SunshineM',
            'uploader_id': '3552827',
            'age_limit': 0,
            'duration': 223.72,
            'view_count': int,
            'like_count': int,
            'comment_count': int,
        },
        'params': {
            'skip_download': True,
        },
    }, {
        # tests uploader field
        'url': 'https://vid.me/4Iib',
        'info_dict': {
            'id': '4Iib',
            'ext': 'mp4',
            'title': 'The Carver',
            'description': 'md5:e9c24870018ae8113be936645b93ba3c',
            'thumbnail': r're:^https?://.*\.jpg',
            'timestamp': 1433203629,
            'upload_date': '20150602',
            'uploader': 'Thomas',
            'uploader_id': '109747',
            'age_limit': 0,
            'duration': 97.859999999999999,
            'view_count': int,
            'like_count': int,
            'comment_count': int,
        },
        'params': {
            'skip_download': True,
        },
    }, {
        # nsfw test from http://naked-yogi.tumblr.com/post/118312946248/naked-smoking-stretching
        'url': 'https://vid.me/e/Wmur',
        'info_dict': {
            'id': 'Wmur',
            'ext': 'mp4',
            'title': 'naked smoking & stretching',
            'thumbnail': r're:^https?://.*\.jpg',
            'timestamp': 1430931613,
            'upload_date': '20150506',
            'uploader': 'naked-yogi',
            'uploader_id': '1638622',
            'age_limit': 18,
            'duration': 653.26999999999998,
            'view_count': int,
            'like_count': int,
            'comment_count': int,
        },
        'params': {
            'skip_download': True,
        },
    }, {
        # nsfw, user-disabled
        'url': 'https://vid.me/dzGJ',
        'only_matching': True,
    }, {
        # suspended
        'url': 'https://vid.me/Ox3G',
        'only_matching': True,
    }, {
        # deleted
        'url': 'https://vid.me/KTPm',
        'only_matching': True,
    }, {
        # no formats in the API response
        'url': 'https://vid.me/e5g',
        'info_dict': {
            'id': 'e5g',
            'ext': 'mp4',
            'title': 'Video upload (e5g)',
            'thumbnail': r're:^https?://.*\.jpg',
            'timestamp': 1401480195,
            'upload_date': '20140530',
            'uploader': None,
            'uploader_id': None,
            'age_limit': 0,
            'duration': 483,
            'view_count': int,
            'like_count': int,
            'comment_count': int,
        },
        'params': {
            'skip_download': True,
        },
    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)

        try:
            response = self._download_json(
                'https://api.vid.me/videoByUrl/%s' % video_id, video_id)
        except ExtractorError as e:
            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
                response = self._parse_json(e.cause.read(), video_id)
            else:
                raise

        error = response.get('error')
        if error:
            raise ExtractorError(
                '%s returned error: %s' % (self.IE_NAME, error), expected=True)

        video = response['video']

        if video.get('state') == 'deleted':
            raise ExtractorError(
                'Vidme said: Sorry, this video has been deleted.',
                expected=True)

        if video.get('state') in ('user-disabled', 'suspended'):
            raise ExtractorError(
                'Vidme said: This video has been suspended either due to a copyright claim, '
                'or for violating the terms of use.',
                expected=True)

        formats = []
        for f in video.get('formats', []):
            format_url = url_or_none(f.get('uri'))
            if not format_url:
                continue
            format_type = f.get('type')
            if format_type == 'dash':
                formats.extend(self._extract_mpd_formats(
                    format_url, video_id, mpd_id='dash', fatal=False))
            elif format_type == 'hls':
                formats.extend(self._extract_m3u8_formats(
                    format_url, video_id, 'mp4', entry_protocol='m3u8_native',
                    m3u8_id='hls', fatal=False))
            else:
                formats.append({
                    'format_id': f.get('type'),
                    'url': format_url,
                    'width': int_or_none(f.get('width')),
                    'height': int_or_none(f.get('height')),
                    'preference': 0 if f.get('type', '').endswith(
                        'clip') else 1,
                })

        if not formats and video.get('complete_url'):
            formats.append({
                'url': video.get('complete_url'),
                'width': int_or_none(video.get('width')),
                'height': int_or_none(video.get('height')),
            })

        self._sort_formats(formats)

        title = video['title']
        description = video.get('description')
        thumbnail = video.get('thumbnail_url')
        timestamp = parse_iso8601(video.get('date_created'), ' ')
        uploader = video.get('user', {}).get('username')
        uploader_id = video.get('user', {}).get('user_id')
        age_limit = 18 if video.get('nsfw') is True else 0
        duration = float_or_none(video.get('duration'))
        view_count = int_or_none(video.get('view_count'))
        like_count = int_or_none(video.get('likes_count'))
        comment_count = int_or_none(video.get('comment_count'))

        return {
            'id': video_id,
            'title': title or 'Video upload (%s)' % video_id,
            'description': description,
            'thumbnail': thumbnail,
            'uploader': uploader,
            'uploader_id': uploader_id,
            'age_limit': age_limit,
            'timestamp': timestamp,
            'duration': duration,
            'view_count': view_count,
            'like_count': like_count,
            'comment_count': comment_count,
            'formats': formats,
        }


class VidmeListBaseIE(InfoExtractor):
    # Max possible limit according to https://docs.vid.me/#api-Videos-List
    _LIMIT = 100

    def _entries(self, user_id, user_name):
        for page_num in itertools.count(1):
            page = self._download_json(
                'https://api.vid.me/videos/%s?user=%s&limit=%d&offset=%d'
                % (self._API_ITEM, user_id, self._LIMIT, (page_num - 1) * self._LIMIT),
                user_name, 'Downloading user %s page %d' % (self._API_ITEM, page_num))

            videos = page.get('videos', [])
            if not videos:
                break

            for video in videos:
                video_url = video.get('full_url') or video.get('embed_url')
                if video_url:
                    yield self.url_result(video_url, VidmeIE.ie_key())

            total = int_or_none(page.get('page', {}).get('total'))
            if total and self._LIMIT * page_num >= total:
                break

    def _real_extract(self, url):
        user_name = self._match_id(url)

        user_id = self._download_json(
            'https://api.vid.me/userByUsername?username=%s' % user_name,
            user_name)['user']['user_id']

        return self.playlist_result(
            self._entries(user_id, user_name), user_id,
            '%s - %s' % (user_name, self._TITLE))


class VidmeUserIE(VidmeListBaseIE):
    IE_NAME = 'vidme:user'
    _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z_-]{6,})(?!/likes)(?:[^\da-zA-Z_-]|$)'
    _API_ITEM = 'list'
    _TITLE = 'Videos'
    _TESTS = [{
        'url': 'https://vid.me/MasakoX',
        'info_dict': {
            'id': '16112341',
            'title': 'MasakoX - %s' % _TITLE,
        },
        'playlist_mincount': 191,
    }, {
        'url': 'https://vid.me/unsQuare_netWork',
        'only_matching': True,
    }]


class VidmeUserLikesIE(VidmeListBaseIE):
    IE_NAME = 'vidme:user:likes'
    _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z_-]{6,})/likes'
    _API_ITEM = 'likes'
    _TITLE = 'Likes'
    _TESTS = [{
        'url': 'https://vid.me/ErinAlexis/likes',
        'info_dict': {
            'id': '6483530',
            'title': 'ErinAlexis - %s' % _TITLE,
        },
        'playlist_mincount': 415,
    }, {
        'url': 'https://vid.me/Kaleidoscope-Ish/likes',
        'only_matching': True,
    }]
Commit	Line	Data
0138968a S	1	from __future__ import unicode_literals
0138968a S	2
b7b36506 S	3	import itertools
b7b36506 S	4
482aa3fe	5	from .common import InfoExtractor
3052a30d	6	from ..compat import compat_HTTPError
0138968a	7	from ..utils import (
482aa3fe	8	ExtractorError,
0138968a S	9	int_or_none,
0138968a S	10	float_or_none,
d9c19db3	11	parse_iso8601,
3052a30d	12	url_or_none,
0138968a S	13	)
	14
	15
	16	class VidmeIE(InfoExtractor):
d97b0e32	17	IE_NAME = 'vidme'
b7b36506	18	_VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]{,5})(?:[^\da-zA-Z]\|$)'
23dd1fc7	19	_TESTS = [{
0138968a	20	'url': 'https://vid.me/QNB',
d65889bb	21	'md5': 'f42d05e7149aeaec5c037b17e5d3dc82',
0138968a S	22	'info_dict': {
	23	'id': 'QNB',
	24	'ext': 'mp4',
	25	'title': 'Fishing for piranha - the easy way',
	26	'description': 'source: https://www.facebook.com/photo.php?v=312276045600871',
ec85ded8	27	'thumbnail': r're:^https?://.*\.jpg',
0138968a S	28	'timestamp': 1406313244,
0138968a S	29	'upload_date': '20140725',
482aa3fe S	30	'age_limit': 0,
482aa3fe S	31	'duration': 119.92,
3b58d94f S	32	'view_count': int,
3b58d94f S	33	'like_count': int,
d9c19db3 LL	34	'comment_count': int,
	35	},
	36	}, {
	37	'url': 'https://vid.me/Gc6M',
	38	'md5': 'f42d05e7149aeaec5c037b17e5d3dc82',
	39	'info_dict': {
	40	'id': 'Gc6M',
	41	'ext': 'mp4',
	42	'title': 'O Mere Dil ke chain - Arnav and Khushi VM',
ec85ded8	43	'thumbnail': r're:^https?://.*\.jpg',
d9c19db3 LL	44	'timestamp': 1441211642,
d9c19db3 LL	45	'upload_date': '20150902',
482aa3fe S	46	'uploader': 'SunshineM',
	47	'uploader_id': '3552827',
	48	'age_limit': 0,
	49	'duration': 223.72,
d9c19db3 LL	50	'view_count': int,
	51	'like_count': int,
	52	'comment_count': int,
d9c19db3 LL	53	},
	54	'params': {
	55	'skip_download': True,
3b58d94f S	56	},
	57	}, {
	58	# tests uploader field
	59	'url': 'https://vid.me/4Iib',
	60	'info_dict': {
	61	'id': '4Iib',
	62	'ext': 'mp4',
	63	'title': 'The Carver',
	64	'description': 'md5:e9c24870018ae8113be936645b93ba3c',
ec85ded8	65	'thumbnail': r're:^https?://.*\.jpg',
3b58d94f S	66	'timestamp': 1433203629,
	67	'upload_date': '20150602',
	68	'uploader': 'Thomas',
482aa3fe S	69	'uploader_id': '109747',
	70	'age_limit': 0,
	71	'duration': 97.859999999999999,
3b58d94f S	72	'view_count': int,
3b58d94f S	73	'like_count': int,
d9c19db3	74	'comment_count': int,
3b58d94f S	75	},
	76	'params': {
	77	'skip_download': True,
0138968a	78	},
23dd1fc7	79	}, {
482aa3fe	80	# nsfw test from http://naked-yogi.tumblr.com/post/118312946248/naked-smoking-stretching
23dd1fc7	81	'url': 'https://vid.me/e/Wmur',
482aa3fe S	82	'info_dict': {
	83	'id': 'Wmur',
	84	'ext': 'mp4',
	85	'title': 'naked smoking & stretching',
ec85ded8	86	'thumbnail': r're:^https?://.*\.jpg',
482aa3fe S	87	'timestamp': 1430931613,
	88	'upload_date': '20150506',
	89	'uploader': 'naked-yogi',
	90	'uploader_id': '1638622',
	91	'age_limit': 18,
	92	'duration': 653.26999999999998,
	93	'view_count': int,
	94	'like_count': int,
	95	'comment_count': int,
	96	},
	97	'params': {
	98	'skip_download': True,
	99	},
9eb31b26 S	100	}, {
	101	# nsfw, user-disabled
	102	'url': 'https://vid.me/dzGJ',
	103	'only_matching': True,
0be30baf LL	104	}, {
	105	# suspended
	106	'url': 'https://vid.me/Ox3G',
	107	'only_matching': True,
5f9f87c0 LL	108	}, {
	109	# deleted
	110	'url': 'https://vid.me/KTPm',
	111	'only_matching': True,
0be30baf LL	112	}, {
	113	# no formats in the API response
	114	'url': 'https://vid.me/e5g',
	115	'info_dict': {
	116	'id': 'e5g',
	117	'ext': 'mp4',
4a896377	118	'title': 'Video upload (e5g)',
ec85ded8	119	'thumbnail': r're:^https?://.*\.jpg',
0be30baf LL	120	'timestamp': 1401480195,
	121	'upload_date': '20140530',
	122	'uploader': None,
	123	'uploader_id': None,
	124	'age_limit': 0,
	125	'duration': 483,
	126	'view_count': int,
	127	'like_count': int,
	128	'comment_count': int,
	129	},
	130	'params': {
	131	'skip_download': True,
	132	},
23dd1fc7	133	}]
0138968a S	134
0138968a S	135	def _real_extract(self, url):
9609f02e	136	video_id = self._match_id(url)
d9c19db3	137
482aa3fe S	138	try:
	139	response = self._download_json(
	140	'https://api.vid.me/videoByUrl/%s' % video_id, video_id)
	141	except ExtractorError as e:
	142	if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
	143	response = self._parse_json(e.cause.read(), video_id)
	144	else:
	145	raise
d9c19db3	146
482aa3fe S	147	error = response.get('error')
	148	if error:
	149	raise ExtractorError(
	150	'%s returned error: %s' % (self.IE_NAME, error), expected=True)
0138968a	151
482aa3fe	152	video = response['video']
0138968a	153
5f9f87c0 LL	154	if video.get('state') == 'deleted':
	155	raise ExtractorError(
	156	'Vidme said: Sorry, this video has been deleted.',
	157	expected=True)
	158
0be30baf	159	if video.get('state') in ('user-disabled', 'suspended'):
59fe4824 LL	160	raise ExtractorError(
	161	'Vidme said: This video has been suspended either due to a copyright claim, '
	162	'or for violating the terms of use.',
	163	expected=True)
	164
f31fd069 S	165	formats = []
f31fd069 S	166	for f in video.get('formats', []):
3052a30d S	167	format_url = url_or_none(f.get('uri'))
3052a30d S	168	if not format_url:
f31fd069 S	169	continue
	170	format_type = f.get('type')
	171	if format_type == 'dash':
	172	formats.extend(self._extract_mpd_formats(
	173	format_url, video_id, mpd_id='dash', fatal=False))
	174	elif format_type == 'hls':
	175	formats.extend(self._extract_m3u8_formats(
	176	format_url, video_id, 'mp4', entry_protocol='m3u8_native',
	177	m3u8_id='hls', fatal=False))
	178	else:
	179	formats.append({
	180	'format_id': f.get('type'),
	181	'url': format_url,
	182	'width': int_or_none(f.get('width')),
	183	'height': int_or_none(f.get('height')),
	184	'preference': 0 if f.get('type', '').endswith(
	185	'clip') else 1,
	186	})
0be30baf LL	187
	188	if not formats and video.get('complete_url'):
	189	formats.append({
	190	'url': video.get('complete_url'),
	191	'width': int_or_none(video.get('width')),
	192	'height': int_or_none(video.get('height')),
	193	})
	194
d9c19db3	195	self._sort_formats(formats)
0138968a	196
482aa3fe S	197	title = video['title']
	198	description = video.get('description')
	199	thumbnail = video.get('thumbnail_url')
	200	timestamp = parse_iso8601(video.get('date_created'), ' ')
	201	uploader = video.get('user', {}).get('username')
	202	uploader_id = video.get('user', {}).get('user_id')
	203	age_limit = 18 if video.get('nsfw') is True else 0
	204	duration = float_or_none(video.get('duration'))
	205	view_count = int_or_none(video.get('view_count'))
	206	like_count = int_or_none(video.get('likes_count'))
	207	comment_count = int_or_none(video.get('comment_count'))
	208
0138968a S	209	return {
0138968a S	210	'id': video_id,
4a896377	211	'title': title or 'Video upload (%s)' % video_id,
0138968a S	212	'description': description,
0138968a S	213	'thumbnail': thumbnail,
482aa3fe S	214	'uploader': uploader,
	215	'uploader_id': uploader_id,
	216	'age_limit': age_limit,
0138968a	217	'timestamp': timestamp,
0138968a S	218	'duration': duration,
	219	'view_count': view_count,
	220	'like_count': like_count,
d9c19db3	221	'comment_count': comment_count,
d9c19db3	222	'formats': formats,
0138968a	223	}
b7b36506 S	224
b7b36506 S	225
eb2533ec	226	class VidmeListBaseIE(InfoExtractor):
b7b36506 S	227	# Max possible limit according to https://docs.vid.me/#api-Videos-List
	228	_LIMIT = 100
	229
	230	def _entries(self, user_id, user_name):
	231	for page_num in itertools.count(1):
	232	page = self._download_json(
eb2533ec S	233	'https://api.vid.me/videos/%s?user=%s&limit=%d&offset=%d'
	234	% (self._API_ITEM, user_id, self._LIMIT, (page_num - 1) * self._LIMIT),
	235	user_name, 'Downloading user %s page %d' % (self._API_ITEM, page_num))
b7b36506 S	236
	237	videos = page.get('videos', [])
	238	if not videos:
	239	break
	240
	241	for video in videos:
	242	video_url = video.get('full_url') or video.get('embed_url')
	243	if video_url:
	244	yield self.url_result(video_url, VidmeIE.ie_key())
	245
	246	total = int_or_none(page.get('page', {}).get('total'))
	247	if total and self._LIMIT * page_num >= total:
	248	break
	249
	250	def _real_extract(self, url):
	251	user_name = self._match_id(url)
	252
	253	user_id = self._download_json(
	254	'https://api.vid.me/userByUsername?username=%s' % user_name,
	255	user_name)['user']['user_id']
	256
eb2533ec S	257	return self.playlist_result(
	258	self._entries(user_id, user_name), user_id,
	259	'%s - %s' % (user_name, self._TITLE))
	260
	261
	262	class VidmeUserIE(VidmeListBaseIE):
d97b0e32	263	IE_NAME = 'vidme:user'
0b4a8eb3	264	_VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z_-]{6,})(?!/likes)(?:[^\da-zA-Z_-]\|$)'
eb2533ec S	265	_API_ITEM = 'list'
eb2533ec S	266	_TITLE = 'Videos'
0b4a8eb3	267	_TESTS = [{
0b4a8eb3	268	'url': 'https://vid.me/MasakoX',
eb2533ec	269	'info_dict': {
0b4a8eb3	270	'id': '16112341',
0b4a8eb3	271	'title': 'MasakoX - %s' % _TITLE,
eb2533ec	272	},
0b4a8eb3	273	'playlist_mincount': 191,
	274	}, {
	275	'url': 'https://vid.me/unsQuare_netWork',
bc35f075	276	'only_matching': True,
0b4a8eb3	277	}]
eb2533ec S	278
	279
	280	class VidmeUserLikesIE(VidmeListBaseIE):
d97b0e32	281	IE_NAME = 'vidme:user:likes'
0b4a8eb3	282	_VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z_-]{6,})/likes'
eb2533ec S	283	_API_ITEM = 'likes'
eb2533ec S	284	_TITLE = 'Likes'
0b4a8eb3	285	_TESTS = [{
eb2533ec S	286	'url': 'https://vid.me/ErinAlexis/likes',
	287	'info_dict': {
	288	'id': '6483530',
	289	'title': 'ErinAlexis - %s' % _TITLE,
	290	},
	291	'playlist_mincount': 415,
0b4a8eb3	292	}, {
0b4a8eb3	293	'url': 'https://vid.me/Kaleidoscope-Ish/likes',
bc35f075	294	'only_matching': True,
0b4a8eb3	295	}]