[yt-dlp.git] / youtube_dl / extractor / smotri.py

# encoding: utf-8
from __future__ import unicode_literals

import re
import json
import hashlib
import uuid

from .common import InfoExtractor
from ..utils import (
    compat_urllib_parse,
    compat_urllib_request,
    ExtractorError,
    int_or_none,
    unified_strdate,
)


class SmotriIE(InfoExtractor):
    IE_DESC = 'Smotri.com'
    IE_NAME = 'smotri'
    _VALID_URL = r'^https?://(?:www\.)?(?:smotri\.com/video/view/\?id=|pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=)(?P<id>v(?P<realvideoid>[0-9]+)[a-z0-9]{4})'
    _NETRC_MACHINE = 'smotri'

    _TESTS = [
        # real video id 2610366
        {
            'url': 'http://smotri.com/video/view/?id=v261036632ab',
            'md5': '2a7b08249e6f5636557579c368040eb9',
            'info_dict': {
                'id': 'v261036632ab',
                'ext': 'mp4',
                'title': 'катастрофа с камер видеонаблюдения',
                'uploader': 'rbc2008',
                'uploader_id': 'rbc08',
                'upload_date': '20131118',
                'thumbnail': 'http://frame6.loadup.ru/8b/a9/2610366.3.3.jpg',
            },
        },
        # real video id 57591
        {
            'url': 'http://smotri.com/video/view/?id=v57591cb20',
            'md5': '830266dfc21f077eac5afd1883091bcd',
            'info_dict': {
                'id': 'v57591cb20',
                'ext': 'flv',
                'title': 'test',
                'uploader': 'Support Photofile@photofile',
                'uploader_id': 'support-photofile',
                'upload_date': '20070704',
                'thumbnail': 'http://frame4.loadup.ru/03/ed/57591.2.3.jpg',
            },
        },
        # video-password
        {
            'url': 'http://smotri.com/video/view/?id=v1390466a13c',
            'md5': 'f6331cef33cad65a0815ee482a54440b',
            'info_dict': {
                'id': 'v1390466a13c',
                'ext': 'mp4',
                'title': 'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1',
                'uploader': 'timoxa40',
                'uploader_id': 'timoxa40',
                'upload_date': '20100404',
                'thumbnail': 'http://frame7.loadup.ru/af/3f/1390466.3.3.jpg',
            },
            'params': {
                'videopassword': 'qwerty',
            },
        },
        # age limit + video-password
        {
            'url': 'http://smotri.com/video/view/?id=v15408898bcf',
            'md5': '91e909c9f0521adf5ee86fbe073aad70',
            'info_dict': {
                'id': 'v15408898bcf',
                'ext': 'flv',
                'title': 'этот ролик не покажут по ТВ',
                'uploader': 'zzxxx',
                'uploader_id': 'ueggb',
                'upload_date': '20101001',
                'thumbnail': 'http://frame3.loadup.ru/75/75/1540889.1.3.jpg',
                'age_limit': 18,
            },
            'params': {
                'videopassword': '333'
            }
        },
        # swf player
        {
            'url': 'http://pics.smotri.com/scrubber_custom8.swf?file=v9188090500',
            'md5': '4d47034979d9390d14acdf59c4935bc2',
            'info_dict': {
                'id': 'v9188090500',
                'ext': 'mp4',
                'title': 'Shakira - Don\'t Bother',
                'uploader': 'HannahL',
                'uploader_id': 'lisaha95',
                'upload_date': '20090331',
                'thumbnail': 'http://frame8.loadup.ru/44/0b/918809.7.3.jpg',
            },
        },
    ]

    @classmethod
    def _extract_url(cls, webpage):
        mobj = re.search(
            r'<embed[^>]src=(["\'])(?P<url>http://pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=v.+?\1)',
            webpage)
        if mobj is not None:
            return mobj.group('url')

        mobj = re.search(
            r'''(?x)<div\s+class="video_file">http://smotri\.com/video/download/file/[^<]+</div>\s*
                    <div\s+class="video_image">[^<]+</div>\s*
                    <div\s+class="video_id">(?P<id>[^<]+)</div>''', webpage)
        if mobj is not None:
            return 'http://smotri.com/video/view/?id=%s' % mobj.group('id')

    def _search_meta(self, name, html, display_name=None):
        if display_name is None:
            display_name = name
        return self._html_search_regex(
            r'<meta itemprop="%s" content="([^"]+)" />' % re.escape(name),
            html, display_name, fatal=False)
        return self._html_search_meta(name, html, display_name)

    def _real_extract(self, url):
        video_id = self._match_id(url)

        video_form = {
            'ticket': video_id,
            'video_url': '1',
            'frame_url': '1',
            'devid': 'LoadupFlashPlayer',
            'getvideoinfo': '1',
        }

        request = compat_urllib_request.Request(
            'http://smotri.com/video/view/url/bot/', compat_urllib_parse.urlencode(video_form))
        request.add_header('Content-Type', 'application/x-www-form-urlencoded')

        video = self._download_json(request, video_id, 'Downloading video JSON')

        if video.get('_moderate_no') or not video.get('moderated'):
            raise ExtractorError('Video %s has not been approved by moderator' % video_id, expected=True)

        if video.get('error'):
            raise ExtractorError('Video %s does not exist' % video_id, expected=True)

        video_url = video.get('_vidURL') or video.get('_vidURL_mp4')
        title = video['title']
        thumbnail = video['_imgURL']
        upload_date = unified_strdate(video['added'])
        uploader = video['userNick']
        uploader_id = video['userLogin']
        duration = int_or_none(video['duration'])

        # Video JSON does not provide enough meta data
        # We will extract some from the video web page instead
        webpage_url = 'http://smotri.com/video/view/?id=%s' % video_id
        webpage = self._download_webpage(webpage_url, video_id, 'Downloading video page')

        # Warning if video is unavailable
        warning = self._html_search_regex(
            r'<div class="videoUnModer">(.*?)</div>', webpage,
            'warning message', default=None)
        if warning is not None:
            self._downloader.report_warning(
                'Video %s may not be available; smotri said: %s ' %
                (video_id, warning))

        # Adult content
        if re.search('EroConfirmText">', webpage) is not None:
            self.report_age_confirmation()
            confirm_string = self._html_search_regex(
                r'<a href="/video/view/\?id=%s&confirm=([^"]+)" title="[^"]+">' % video_id,
                webpage, 'confirm string')
            confirm_url = webpage_url + '&confirm=%s' % confirm_string
            webpage = self._download_webpage(confirm_url, video_id, 'Downloading video page (age confirmed)')
            adult_content = True
        else:
            adult_content = False

        view_count = self._html_search_regex(
            'Общее количество просмотров.*?<span class="Number">(\\d+)</span>',
            webpage, 'view count', fatal=False, flags=re.MULTILINE|re.DOTALL)

        return {
            'id': video_id,
            'url': video_url,
            'title': title,
            'thumbnail': thumbnail,
            'uploader': uploader,
            'upload_date': upload_date,
            'uploader_id': uploader_id,
            'duration': duration,
            'view_count': int_or_none(view_count),
            'age_limit': 18 if adult_content else 0,
        }


class SmotriCommunityIE(InfoExtractor):
    IE_DESC = 'Smotri.com community videos'
    IE_NAME = 'smotri:community'
    _VALID_URL = r'^https?://(?:www\.)?smotri\.com/community/video/(?P<communityid>[0-9A-Za-z_\'-]+)'
    _TEST = {
        'url': 'http://smotri.com/community/video/kommuna',
        'info_dict': {
            'id': 'kommuna',
            'title': 'КПРФ',
        },
        'playlist_mincount': 4,
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        community_id = mobj.group('communityid')

        url = 'http://smotri.com/export/rss/video/by/community/-/%s/video.xml' % community_id
        rss = self._download_xml(url, community_id, 'Downloading community RSS')

        entries = [self.url_result(video_url.text, 'Smotri')
                   for video_url in rss.findall('./channel/item/link')]

        description_text = rss.find('./channel/description').text
        community_title = self._html_search_regex(
            '^Видео сообщества "([^"]+)"$', description_text, 'community title')

        return self.playlist_result(entries, community_id, community_title)


class SmotriUserIE(InfoExtractor):
    IE_DESC = 'Smotri.com user videos'
    IE_NAME = 'smotri:user'
    _VALID_URL = r'^https?://(?:www\.)?smotri\.com/user/(?P<userid>[0-9A-Za-z_\'-]+)'
    _TESTS = [{
        'url': 'http://smotri.com/user/inspector',
        'info_dict': {
            'id': 'inspector',
            'title': 'Inspector',
        },
        'playlist_mincount': 9,
    }]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        user_id = mobj.group('userid')

        url = 'http://smotri.com/export/rss/user/video/-/%s/video.xml' % user_id
        rss = self._download_xml(url, user_id, 'Downloading user RSS')

        entries = [self.url_result(video_url.text, 'Smotri')
                   for video_url in rss.findall('./channel/item/link')]

        description_text = rss.find('./channel/description').text
        user_nickname = self._html_search_regex(
            '^Видео режиссера (.*)$', description_text,
            'user nickname')

        return self.playlist_result(entries, user_id, user_nickname)


class SmotriBroadcastIE(InfoExtractor):
    IE_DESC = 'Smotri.com broadcasts'
    IE_NAME = 'smotri:broadcast'
    _VALID_URL = r'^https?://(?:www\.)?(?P<url>smotri\.com/live/(?P<broadcastid>[^/]+))/?.*'

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        broadcast_id = mobj.group('broadcastid')

        broadcast_url = 'http://' + mobj.group('url')
        broadcast_page = self._download_webpage(broadcast_url, broadcast_id, 'Downloading broadcast page')

        if re.search('>Режиссер с логином <br/>"%s"<br/> <span>не существует<' % broadcast_id, broadcast_page) is not None:
            raise ExtractorError('Broadcast %s does not exist' % broadcast_id, expected=True)

        # Adult content
        if re.search('EroConfirmText">', broadcast_page) is not None:

            (username, password) = self._get_login_info()
            if username is None:
                raise ExtractorError('Erotic broadcasts allowed only for registered users, '
                    'use --username and --password options to provide account credentials.', expected=True)

            login_form = {
                'login-hint53': '1',
                'confirm_erotic': '1',
                'login': username,
                'password': password,
            }

            request = compat_urllib_request.Request(broadcast_url + '/?no_redirect=1', compat_urllib_parse.urlencode(login_form))
            request.add_header('Content-Type', 'application/x-www-form-urlencoded')
            broadcast_page = self._download_webpage(request, broadcast_id, 'Logging in and confirming age')

            if re.search('>Неверный логин или пароль<', broadcast_page) is not None:
                raise ExtractorError('Unable to log in: bad username or password', expected=True)

            adult_content = True
        else:
            adult_content = False

        ticket = self._html_search_regex(
            'window\.broadcast_control\.addFlashVar\\(\'file\', \'([^\']+)\'\\);',
            broadcast_page, 'broadcast ticket')

        url = 'http://smotri.com/broadcast/view/url/?ticket=%s' % ticket

        broadcast_password = self._downloader.params.get('videopassword', None)
        if broadcast_password:
            url += '&pass=%s' % hashlib.md5(broadcast_password.encode('utf-8')).hexdigest()

        broadcast_json_page = self._download_webpage(url, broadcast_id, 'Downloading broadcast JSON')

        try:
            broadcast_json = json.loads(broadcast_json_page)

            protected_broadcast = broadcast_json['_pass_protected'] == 1
            if protected_broadcast and not broadcast_password:
                raise ExtractorError('This broadcast is protected by a password, use the --video-password option', expected=True)

            broadcast_offline = broadcast_json['is_play'] == 0
            if broadcast_offline:
                raise ExtractorError('Broadcast %s is offline' % broadcast_id, expected=True)

            rtmp_url = broadcast_json['_server']
            if not rtmp_url.startswith('rtmp://'):
                raise ExtractorError('Unexpected broadcast rtmp URL')

            broadcast_playpath = broadcast_json['_streamName']
            broadcast_thumbnail = broadcast_json['_imgURL']
            broadcast_title = broadcast_json['title']
            broadcast_description = broadcast_json['description']
            broadcaster_nick = broadcast_json['nick']
            broadcaster_login = broadcast_json['login']
            rtmp_conn = 'S:%s' % uuid.uuid4().hex
        except KeyError:
            if protected_broadcast:
                raise ExtractorError('Bad broadcast password', expected=True)
            raise ExtractorError('Unexpected broadcast JSON')

        return {
            'id': broadcast_id,
            'url': rtmp_url,
            'title': broadcast_title,
            'thumbnail': broadcast_thumbnail,
            'description': broadcast_description,
            'uploader': broadcaster_nick,
            'uploader_id': broadcaster_login,
            'age_limit': 18 if adult_content else 0,
            'ext': 'flv',
            'play_path': broadcast_playpath,
            'rtmp_live': True,
            'rtmp_conn': rtmp_conn
        }
Commit	Line	Data
5270d8cb	1	# encoding: utf-8
ffe8f62d	2	from __future__ import unicode_literals
5270d8cb	3
	4	import re
	5	import json
	6	import hashlib
55f6597c	7	import uuid
5270d8cb	8
	9	from .common import InfoExtractor
	10	from ..utils import (
55f6597c	11	compat_urllib_parse,
55f6597c	12	compat_urllib_request,
693b8b2d	13	ExtractorError,
cb3ac1c6	14	int_or_none,
598c218f	15	unified_strdate,
5270d8cb	16	)
	17
	18
	19	class SmotriIE(InfoExtractor):
ffe8f62d	20	IE_DESC = 'Smotri.com'
ffe8f62d	21	IE_NAME = 'smotri'
598c218f	22	_VALID_URL = r'^https?://(?:www\.)?(?:smotri\.com/video/view/\?id=\|pics\.smotri\.com/(?:player\|scrubber_custom8)\.swf\?file=)(?P<id>v(?P<realvideoid>[0-9]+)[a-z0-9]{4})'
541cb26c	23	_NETRC_MACHINE = 'smotri'
aaebed13	24
5270d8cb	25	_TESTS = [
	26	# real video id 2610366
	27	{
ffe8f62d	28	'url': 'http://smotri.com/video/view/?id=v261036632ab',
ffe8f62d	29	'md5': '2a7b08249e6f5636557579c368040eb9',
ffe8f62d	30	'info_dict': {
cb3ac1c6 S	31	'id': 'v261036632ab',
cb3ac1c6 S	32	'ext': 'mp4',
ffe8f62d	33	'title': 'катастрофа с камер видеонаблюдения',
	34	'uploader': 'rbc2008',
	35	'uploader_id': 'rbc08',
	36	'upload_date': '20131118',
ffe8f62d	37	'thumbnail': 'http://frame6.loadup.ru/8b/a9/2610366.3.3.jpg',
5270d8cb	38	},
	39	},
	40	# real video id 57591
	41	{
ffe8f62d	42	'url': 'http://smotri.com/video/view/?id=v57591cb20',
ffe8f62d	43	'md5': '830266dfc21f077eac5afd1883091bcd',
ffe8f62d	44	'info_dict': {
cb3ac1c6 S	45	'id': 'v57591cb20',
cb3ac1c6 S	46	'ext': 'flv',
ffe8f62d	47	'title': 'test',
	48	'uploader': 'Support Photofile@photofile',
	49	'uploader_id': 'support-photofile',
	50	'upload_date': '20070704',
ffe8f62d	51	'thumbnail': 'http://frame4.loadup.ru/03/ed/57591.2.3.jpg',
aaebed13	52	},
5270d8cb	53	},
	54	# video-password
	55	{
ffe8f62d	56	'url': 'http://smotri.com/video/view/?id=v1390466a13c',
ffe8f62d	57	'md5': 'f6331cef33cad65a0815ee482a54440b',
ffe8f62d	58	'info_dict': {
cb3ac1c6 S	59	'id': 'v1390466a13c',
cb3ac1c6 S	60	'ext': 'mp4',
ffe8f62d	61	'title': 'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1',
	62	'uploader': 'timoxa40',
	63	'uploader_id': 'timoxa40',
	64	'upload_date': '20100404',
	65	'thumbnail': 'http://frame7.loadup.ru/af/3f/1390466.3.3.jpg',
5270d8cb	66	},
ffe8f62d	67	'params': {
ffe8f62d	68	'videopassword': 'qwerty',
5270d8cb	69	},
	70	},
	71	# age limit + video-password
	72	{
ffe8f62d	73	'url': 'http://smotri.com/video/view/?id=v15408898bcf',
ffe8f62d	74	'md5': '91e909c9f0521adf5ee86fbe073aad70',
ffe8f62d	75	'info_dict': {
cb3ac1c6 S	76	'id': 'v15408898bcf',
cb3ac1c6 S	77	'ext': 'flv',
ffe8f62d	78	'title': 'этот ролик не покажут по ТВ',
	79	'uploader': 'zzxxx',
	80	'uploader_id': 'ueggb',
	81	'upload_date': '20101001',
	82	'thumbnail': 'http://frame3.loadup.ru/75/75/1540889.1.3.jpg',
	83	'age_limit': 18,
aaebed13	84	},
ffe8f62d	85	'params': {
ffe8f62d	86	'videopassword': '333'
5270d8cb	87	}
cb3ac1c6 S	88	},
	89	# swf player
	90	{
	91	'url': 'http://pics.smotri.com/scrubber_custom8.swf?file=v9188090500',
	92	'md5': '4d47034979d9390d14acdf59c4935bc2',
	93	'info_dict': {
	94	'id': 'v9188090500',
	95	'ext': 'mp4',
	96	'title': 'Shakira - Don\'t Bother',
	97	'uploader': 'HannahL',
	98	'uploader_id': 'lisaha95',
	99	'upload_date': '20090331',
cb3ac1c6 S	100	'thumbnail': 'http://frame8.loadup.ru/44/0b/918809.7.3.jpg',
	101	},
	102	},
5270d8cb	103	]
ffe8f62d	104
cb3ac1c6 S	105	@classmethod
	106	def _extract_url(cls, webpage):
	107	mobj = re.search(
	108	r'<embed[^>]src=(["\'])(?P<url>http://pics\.smotri\.com/(?:player\|scrubber_custom8)\.swf\?file=v.+?\1)',
	109	webpage)
	110	if mobj is not None:
	111	return mobj.group('url')
	112
	113	mobj = re.search(
	114	r'''(?x)<div\s+class="video_file">http://smotri\.com/video/download/file/[^<]+</div>\s*
	115	<div\s+class="video_image">[^<]+</div>\s*
	116	<div\s+class="video_id">(?P<id>[^<]+)</div>''', webpage)
	117	if mobj is not None:
	118	return 'http://smotri.com/video/view/?id=%s' % mobj.group('id')
	119
5270d8cb	120	def _search_meta(self, name, html, display_name=None):
	121	if display_name is None:
	122	display_name = name
	123	return self._html_search_regex(
	124	r'<meta itemprop="%s" content="([^"]+)" />' % re.escape(name),
	125	html, display_name, fatal=False)
aaebed13 PH	126	return self._html_search_meta(name, html, display_name)
aaebed13 PH	127
5270d8cb	128	def _real_extract(self, url):
598c218f S	129	video_id = self._match_id(url)
	130
	131	video_form = {
	132	'ticket': video_id,
	133	'video_url': '1',
	134	'frame_url': '1',
	135	'devid': 'LoadupFlashPlayer',
	136	'getvideoinfo': '1',
	137	}
5270d8cb	138
598c218f S	139	request = compat_urllib_request.Request(
	140	'http://smotri.com/video/view/url/bot/', compat_urllib_parse.urlencode(video_form))
	141	request.add_header('Content-Type', 'application/x-www-form-urlencoded')
ffe8f62d	142
598c218f	143	video = self._download_json(request, video_id, 'Downloading video JSON')
ffe8f62d	144
598c218f S	145	if video.get('_moderate_no') or not video.get('moderated'):
598c218f S	146	raise ExtractorError('Video %s has not been approved by moderator' % video_id, expected=True)
ffe8f62d	147
598c218f S	148	if video.get('error'):
	149	raise ExtractorError('Video %s does not exist' % video_id, expected=True)
	150
	151	video_url = video.get('_vidURL') or video.get('_vidURL_mp4')
	152	title = video['title']
	153	thumbnail = video['_imgURL']
	154	upload_date = unified_strdate(video['added'])
	155	uploader = video['userNick']
	156	uploader_id = video['userLogin']
	157	duration = int_or_none(video['duration'])
ffe8f62d	158
5270d8cb	159	# Video JSON does not provide enough meta data
5270d8cb	160	# We will extract some from the video web page instead
598c218f S	161	webpage_url = 'http://smotri.com/video/view/?id=%s' % video_id
598c218f S	162	webpage = self._download_webpage(webpage_url, video_id, 'Downloading video page')
7dbf5ae5 PH	163
	164	# Warning if video is unavailable
	165	warning = self._html_search_regex(
598c218f	166	r'<div class="videoUnModer">(.*?)</div>', webpage,
ffe8f62d	167	'warning message', default=None)
7dbf5ae5 PH	168	if warning is not None:
7dbf5ae5 PH	169	self._downloader.report_warning(
ffe8f62d	170	'Video %s may not be available; smotri said: %s ' %
7dbf5ae5 PH	171	(video_id, warning))
7dbf5ae5 PH	172
5270d8cb	173	# Adult content
598c218f	174	if re.search('EroConfirmText">', webpage) is not None:
5270d8cb	175	self.report_age_confirmation()
5270d8cb	176	confirm_string = self._html_search_regex(
aaebed13	177	r'<a href="/video/view/\?id=%s&confirm=([^"]+)" title="[^"]+">' % video_id,
598c218f S	178	webpage, 'confirm string')
	179	confirm_url = webpage_url + '&confirm=%s' % confirm_string
	180	webpage = self._download_webpage(confirm_url, video_id, 'Downloading video page (age confirmed)')
5270d8cb	181	adult_content = True
	182	else:
	183	adult_content = False
ffe8f62d	184
598c218f	185	view_count = self._html_search_regex(
ffe8f62d	186	'Общее количество просмотров.*?<span class="Number">(\\d+)</span>',
598c218f	187	webpage, 'view count', fatal=False, flags=re.MULTILINE\|re.DOTALL)
ffe8f62d	188
5270d8cb	189	return {
	190	'id': video_id,
	191	'url': video_url,
598c218f S	192	'title': title,
	193	'thumbnail': thumbnail,
	194	'uploader': uploader,
	195	'upload_date': upload_date,
	196	'uploader_id': uploader_id,
	197	'duration': duration,
	198	'view_count': int_or_none(view_count),
5270d8cb	199	'age_limit': 18 if adult_content else 0,
5270d8cb	200	}
5270d8cb	201
aaebed13	202
5270d8cb	203	class SmotriCommunityIE(InfoExtractor):
ffe8f62d	204	IE_DESC = 'Smotri.com community videos'
ffe8f62d	205	IE_NAME = 'smotri:community'
aaebed13	206	_VALID_URL = r'^https?://(?:www\.)?smotri\.com/community/video/(?P<communityid>[0-9A-Za-z_\'-]+)'
22a6f150 PH	207	_TEST = {
	208	'url': 'http://smotri.com/community/video/kommuna',
	209	'info_dict': {
	210	'id': 'kommuna',
	211	'title': 'КПРФ',
	212	},
	213	'playlist_mincount': 4,
	214	}
5f6a1245	215
5270d8cb	216	def _real_extract(self, url):
	217	mobj = re.match(self._VALID_URL, url)
	218	community_id = mobj.group('communityid')
aaebed13	219
5270d8cb	220	url = 'http://smotri.com/export/rss/video/by/community/-/%s/video.xml' % community_id
ffe8f62d	221	rss = self._download_xml(url, community_id, 'Downloading community RSS')
aaebed13	222
5270d8cb	223	entries = [self.url_result(video_url.text, 'Smotri')
5270d8cb	224	for video_url in rss.findall('./channel/item/link')]
aaebed13 PH	225
aaebed13 PH	226	description_text = rss.find('./channel/description').text
5270d8cb	227	community_title = self._html_search_regex(
ffe8f62d	228	'^Видео сообщества "([^"]+)"$', description_text, 'community title')
5270d8cb	229
5270d8cb	230	return self.playlist_result(entries, community_id, community_title)
aaebed13 PH	231
aaebed13 PH	232
5270d8cb	233	class SmotriUserIE(InfoExtractor):
ffe8f62d	234	IE_DESC = 'Smotri.com user videos'
ffe8f62d	235	IE_NAME = 'smotri:user'
aaebed13	236	_VALID_URL = r'^https?://(?:www\.)?smotri\.com/user/(?P<userid>[0-9A-Za-z_\'-]+)'
22a6f150 PH	237	_TESTS = [{
	238	'url': 'http://smotri.com/user/inspector',
	239	'info_dict': {
	240	'id': 'inspector',
	241	'title': 'Inspector',
	242	},
	243	'playlist_mincount': 9,
	244	}]
aaebed13	245
5270d8cb	246	def _real_extract(self, url):
aaebed13	247	mobj = re.match(self._VALID_URL, url)
5270d8cb	248	user_id = mobj.group('userid')
aaebed13	249
5270d8cb	250	url = 'http://smotri.com/export/rss/user/video/-/%s/video.xml' % user_id
ffe8f62d	251	rss = self._download_xml(url, user_id, 'Downloading user RSS')
aaebed13	252
5270d8cb	253	entries = [self.url_result(video_url.text, 'Smotri')
5270d8cb	254	for video_url in rss.findall('./channel/item/link')]
aaebed13 PH	255
aaebed13 PH	256	description_text = rss.find('./channel/description').text
5270d8cb	257	user_nickname = self._html_search_regex(
ffe8f62d	258	'^Видео режиссера (.*)$', description_text,
ffe8f62d	259	'user nickname')
5270d8cb	260
5270d8cb	261	return self.playlist_result(entries, user_id, user_nickname)
55f6597c	262
	263
	264	class SmotriBroadcastIE(InfoExtractor):
ffe8f62d	265	IE_DESC = 'Smotri.com broadcasts'
ffe8f62d	266	IE_NAME = 'smotri:broadcast'
55f6597c	267	_VALID_URL = r'^https?://(?:www\.)?(?P<url>smotri\.com/live/(?P<broadcastid>[^/]+))/?.*'
	268
	269	def _real_extract(self, url):
	270	mobj = re.match(self._VALID_URL, url)
	271	broadcast_id = mobj.group('broadcastid')
	272
	273	broadcast_url = 'http://' + mobj.group('url')
ffe8f62d	274	broadcast_page = self._download_webpage(broadcast_url, broadcast_id, 'Downloading broadcast page')
55f6597c	275
ffe8f62d	276	if re.search('>Режиссер с логином <br/>"%s"<br/> <span>не существует<' % broadcast_id, broadcast_page) is not None:
ffe8f62d	277	raise ExtractorError('Broadcast %s does not exist' % broadcast_id, expected=True)
55f6597c	278
55f6597c	279	# Adult content
ffe8f62d	280	if re.search('EroConfirmText">', broadcast_page) is not None:
55f6597c	281
	282	(username, password) = self._get_login_info()
	283	if username is None:
ffe8f62d	284	raise ExtractorError('Erotic broadcasts allowed only for registered users, '
	285	'use --username and --password options to provide account credentials.', expected=True)
	286
	287	login_form = {
	288	'login-hint53': '1',
	289	'confirm_erotic': '1',
	290	'login': username,
	291	'password': password,
55f6597c	292	}
ffe8f62d	293
ffe8f62d	294	request = compat_urllib_request.Request(broadcast_url + '/?no_redirect=1', compat_urllib_parse.urlencode(login_form))
55f6597c	295	request.add_header('Content-Type', 'application/x-www-form-urlencoded')
ffe8f62d	296	broadcast_page = self._download_webpage(request, broadcast_id, 'Logging in and confirming age')
55f6597c	297
ffe8f62d	298	if re.search('>Неверный логин или пароль<', broadcast_page) is not None:
ffe8f62d	299	raise ExtractorError('Unable to log in: bad username or password', expected=True)
55f6597c	300
	301	adult_content = True
	302	else:
	303	adult_content = False
	304
	305	ticket = self._html_search_regex(
ffe8f62d	306	'window\.broadcast_control\.addFlashVar\\(\'file\', \'([^\']+)\'\\);',
ffe8f62d	307	broadcast_page, 'broadcast ticket')
55f6597c	308
	309	url = 'http://smotri.com/broadcast/view/url/?ticket=%s' % ticket
	310
	311	broadcast_password = self._downloader.params.get('videopassword', None)
	312	if broadcast_password:
	313	url += '&pass=%s' % hashlib.md5(broadcast_password.encode('utf-8')).hexdigest()
	314
ffe8f62d	315	broadcast_json_page = self._download_webpage(url, broadcast_id, 'Downloading broadcast JSON')
55f6597c	316
	317	try:
	318	broadcast_json = json.loads(broadcast_json_page)
	319
	320	protected_broadcast = broadcast_json['_pass_protected'] == 1
	321	if protected_broadcast and not broadcast_password:
ffe8f62d	322	raise ExtractorError('This broadcast is protected by a password, use the --video-password option', expected=True)
55f6597c	323
	324	broadcast_offline = broadcast_json['is_play'] == 0
	325	if broadcast_offline:
ffe8f62d	326	raise ExtractorError('Broadcast %s is offline' % broadcast_id, expected=True)
55f6597c	327
	328	rtmp_url = broadcast_json['_server']
	329	if not rtmp_url.startswith('rtmp://'):
ffe8f62d	330	raise ExtractorError('Unexpected broadcast rtmp URL')
55f6597c	331
	332	broadcast_playpath = broadcast_json['_streamName']
	333	broadcast_thumbnail = broadcast_json['_imgURL']
	334	broadcast_title = broadcast_json['title']
	335	broadcast_description = broadcast_json['description']
	336	broadcaster_nick = broadcast_json['nick']
	337	broadcaster_login = broadcast_json['login']
	338	rtmp_conn = 'S:%s' % uuid.uuid4().hex
	339	except KeyError:
	340	if protected_broadcast:
ffe8f62d	341	raise ExtractorError('Bad broadcast password', expected=True)
ffe8f62d	342	raise ExtractorError('Unexpected broadcast JSON')
55f6597c	343
	344	return {
	345	'id': broadcast_id,
	346	'url': rtmp_url,
	347	'title': broadcast_title,
	348	'thumbnail': broadcast_thumbnail,
	349	'description': broadcast_description,
	350	'uploader': broadcaster_nick,
	351	'uploader_id': broadcaster_login,
	352	'age_limit': 18 if adult_content else 0,
	353	'ext': 'flv',
	354	'play_path': broadcast_playpath,
	355	'rtmp_live': True,
	356	'rtmp_conn': rtmp_conn
693b8b2d	357	}