[yt-dlp.git] / youtube_dl / extractor / reddit.py

from __future__ import unicode_literals

from .common import InfoExtractor
from ..utils import (
    ExtractorError,
    int_or_none,
    float_or_none,
)


class RedditIE(InfoExtractor):
    _VALID_URL = r'https?://v\.redd\.it/(?P<id>[^/?#&]+)'
    _TEST = {
        # from https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/
        'url': 'https://v.redd.it/zv89llsvexdz',
        'md5': '655d06ace653ea3b87bccfb1b27ec99d',
        'info_dict': {
            'id': 'zv89llsvexdz',
            'ext': 'mp4',
            'title': 'zv89llsvexdz',
        },
        'params': {
            'format': 'bestvideo',
        },
    }

    def _real_extract(self, url):
        video_id = self._match_id(url)

        formats = self._extract_m3u8_formats(
            'https://v.redd.it/%s/HLSPlaylist.m3u8' % video_id, video_id,
            'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)

        formats.extend(self._extract_mpd_formats(
            'https://v.redd.it/%s/DASHPlaylist.mpd' % video_id, video_id,
            mpd_id='dash', fatal=False))

        self._sort_formats(formats)

        return {
            'id': video_id,
            'title': video_id,
            'formats': formats,
        }


class RedditRIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?reddit\.com/r/[^/]+/comments/(?P<id>[^/]+)'
    _TESTS = [{
        'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/',
        'info_dict': {
            'id': 'zv89llsvexdz',
            'ext': 'mp4',
            'title': 'That small heart attack.',
            'thumbnail': r're:^https?://.*\.jpg$',
            'timestamp': 1501941939,
            'upload_date': '20170805',
            'uploader': 'Antw87',
            'like_count': int,
            'dislike_count': int,
            'comment_count': int,
            'age_limit': 0,
        },
        'params': {
            'format': 'bestvideo',
            'skip_download': True,
        },
    }, {
        'url': 'https://www.reddit.com/r/videos/comments/6rrwyj',
        'only_matching': True,
    }, {
        # imgur
        'url': 'https://www.reddit.com/r/MadeMeSmile/comments/6t7wi5/wait_for_it/',
        'only_matching': True,
    }, {
        # streamable
        'url': 'https://www.reddit.com/r/videos/comments/6t7sg9/comedians_hilarious_joke_about_the_guam_flag/',
        'only_matching': True,
    }, {
        # youtube
        'url': 'https://www.reddit.com/r/videos/comments/6t75wq/southern_man_tries_to_speak_without_an_accent/',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)

        data = self._download_json(
            url + '.json', video_id)[0]['data']['children'][0]['data']

        video_url = data['url']

        # Avoid recursing into the same reddit URL
        if 'reddit.com/' in video_url and '/%s/' % video_id in video_url:
            raise ExtractorError('No media found', expected=True)

        over_18 = data.get('over_18')
        if over_18 is True:
            age_limit = 18
        elif over_18 is False:
            age_limit = 0
        else:
            age_limit = None

        return {
            '_type': 'url_transparent',
            'url': video_url,
            'title': data.get('title'),
            'thumbnail': data.get('thumbnail'),
            'timestamp': float_or_none(data.get('created_utc')),
            'uploader': data.get('author'),
            'like_count': int_or_none(data.get('ups')),
            'dislike_count': int_or_none(data.get('downs')),
            'comment_count': int_or_none(data.get('num_comments')),
            'age_limit': age_limit,
        }
Commit	Line	Data
0c43a481 S	1	from __future__ import unicode_literals
	2
	3	from .common import InfoExtractor
	4	from ..utils import (
	5	ExtractorError,
	6	int_or_none,
	7	float_or_none,
	8	)
	9
	10
	11	class RedditIE(InfoExtractor):
	12	_VALID_URL = r'https?://v\.redd\.it/(?P<id>[^/?#&]+)'
	13	_TEST = {
	14	# from https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/
	15	'url': 'https://v.redd.it/zv89llsvexdz',
	16	'md5': '655d06ace653ea3b87bccfb1b27ec99d',
	17	'info_dict': {
	18	'id': 'zv89llsvexdz',
	19	'ext': 'mp4',
	20	'title': 'zv89llsvexdz',
	21	},
	22	'params': {
	23	'format': 'bestvideo',
	24	},
	25	}
	26
	27	def _real_extract(self, url):
	28	video_id = self._match_id(url)
	29
	30	formats = self._extract_m3u8_formats(
	31	'https://v.redd.it/%s/HLSPlaylist.m3u8' % video_id, video_id,
	32	'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
	33
	34	formats.extend(self._extract_mpd_formats(
	35	'https://v.redd.it/%s/DASHPlaylist.mpd' % video_id, video_id,
	36	mpd_id='dash', fatal=False))
	37
665f42d8 S	38	self._sort_formats(formats)
665f42d8 S	39
0c43a481 S	40	return {
	41	'id': video_id,
	42	'title': video_id,
	43	'formats': formats,
	44	}
	45
	46
	47	class RedditRIE(InfoExtractor):
	48	_VALID_URL = r'https?://(?:www\.)?reddit\.com/r/[^/]+/comments/(?P<id>[^/]+)'
	49	_TESTS = [{
	50	'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/',
	51	'info_dict': {
	52	'id': 'zv89llsvexdz',
	53	'ext': 'mp4',
	54	'title': 'That small heart attack.',
	55	'thumbnail': r're:^https?://.*\.jpg$',
	56	'timestamp': 1501941939,
	57	'upload_date': '20170805',
	58	'uploader': 'Antw87',
	59	'like_count': int,
	60	'dislike_count': int,
	61	'comment_count': int,
	62	'age_limit': 0,
	63	},
	64	'params': {
	65	'format': 'bestvideo',
	66	'skip_download': True,
	67	},
	68	}, {
	69	'url': 'https://www.reddit.com/r/videos/comments/6rrwyj',
	70	'only_matching': True,
	71	}, {
	72	# imgur
	73	'url': 'https://www.reddit.com/r/MadeMeSmile/comments/6t7wi5/wait_for_it/',
	74	'only_matching': True,
	75	}, {
	76	# streamable
	77	'url': 'https://www.reddit.com/r/videos/comments/6t7sg9/comedians_hilarious_joke_about_the_guam_flag/',
	78	'only_matching': True,
	79	}, {
	80	# youtube
	81	'url': 'https://www.reddit.com/r/videos/comments/6t75wq/southern_man_tries_to_speak_without_an_accent/',
	82	'only_matching': True,
	83	}]
	84
	85	def _real_extract(self, url):
	86	video_id = self._match_id(url)
	87
	88	data = self._download_json(
	89	url + '.json', video_id)[0]['data']['children'][0]['data']
	90
	91	video_url = data['url']
	92
	93	# Avoid recursing into the same reddit URL
	94	if 'reddit.com/' in video_url and '/%s/' % video_id in video_url:
	95	raise ExtractorError('No media found', expected=True)
	96
	97	over_18 = data.get('over_18')
	98	if over_18 is True:
	99	age_limit = 18
	100	elif over_18 is False:
	101	age_limit = 0
	102	else:
	103	age_limit = None
104
105	return {
106	'_type': 'url_transparent',
107	'url': video_url,
108	'title': data.get('title'),
109	'thumbnail': data.get('thumbnail'),
110	'timestamp': float_or_none(data.get('created_utc')),
111	'uploader': data.get('author'),
112	'like_count': int_or_none(data.get('ups')),
113	'dislike_count': int_or_none(data.get('downs')),
114	'comment_count': int_or_none(data.get('num_comments')),
115	'age_limit': age_limit,
116	}