[yt-dlp.git] / youtube_dl / extractor / smotri.py

# encoding: utf-8
from __future__ import unicode_literals

import re
import json
import hashlib
import uuid

from .common import InfoExtractor
from ..compat import (
    compat_urllib_parse,
    compat_urllib_request,
)
from ..utils import (
    ExtractorError,
    int_or_none,
    unified_strdate,
)


class SmotriIE(InfoExtractor):
    IE_DESC = 'Smotri.com'
    IE_NAME = 'smotri'
    _VALID_URL = r'^https?://(?:www\.)?(?:smotri\.com/video/view/\?id=|pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=)(?P<id>v(?P<realvideoid>[0-9]+)[a-z0-9]{4})'
    _NETRC_MACHINE = 'smotri'

    _TESTS = [
        # real video id 2610366
        {
            'url': 'http://smotri.com/video/view/?id=v261036632ab',
            'md5': '2a7b08249e6f5636557579c368040eb9',
            'info_dict': {
                'id': 'v261036632ab',
                'ext': 'mp4',
                'title': 'катастрофа с камер видеонаблюдения',
                'uploader': 'rbc2008',
                'uploader_id': 'rbc08',
                'upload_date': '20131118',
                'thumbnail': 'http://frame6.loadup.ru/8b/a9/2610366.3.3.jpg',
            },
        },
        # real video id 57591
        {
            'url': 'http://smotri.com/video/view/?id=v57591cb20',
            'md5': '830266dfc21f077eac5afd1883091bcd',
            'info_dict': {
                'id': 'v57591cb20',
                'ext': 'flv',
                'title': 'test',
                'uploader': 'Support Photofile@photofile',
                'uploader_id': 'support-photofile',
                'upload_date': '20070704',
                'thumbnail': 'http://frame4.loadup.ru/03/ed/57591.2.3.jpg',
            },
        },
        # video-password
        {
            'url': 'http://smotri.com/video/view/?id=v1390466a13c',
            'md5': 'f6331cef33cad65a0815ee482a54440b',
            'info_dict': {
                'id': 'v1390466a13c',
                'ext': 'mp4',
                'title': 'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1',
                'uploader': 'timoxa40',
                'uploader_id': 'timoxa40',
                'upload_date': '20100404',
                'thumbnail': 'http://frame7.loadup.ru/af/3f/1390466.3.3.jpg',
            },
            'params': {
                'videopassword': 'qwerty',
            },
            'skip': 'Video is not approved by moderator',
        },
        # age limit + video-password
        {
            'url': 'http://smotri.com/video/view/?id=v15408898bcf',
            'md5': '91e909c9f0521adf5ee86fbe073aad70',
            'info_dict': {
                'id': 'v15408898bcf',
                'ext': 'flv',
                'title': 'этот ролик не покажут по ТВ',
                'uploader': 'zzxxx',
                'uploader_id': 'ueggb',
                'upload_date': '20101001',
                'thumbnail': 'http://frame3.loadup.ru/75/75/1540889.1.3.jpg',
                'age_limit': 18,
            },
            'params': {
                'videopassword': '333'
            },
            'skip': 'Video is not approved by moderator',
        },
        # swf player
        {
            'url': 'http://pics.smotri.com/scrubber_custom8.swf?file=v9188090500',
            'md5': '4d47034979d9390d14acdf59c4935bc2',
            'info_dict': {
                'id': 'v9188090500',
                'ext': 'mp4',
                'title': 'Shakira - Don\'t Bother',
                'uploader': 'HannahL',
                'uploader_id': 'lisaha95',
                'upload_date': '20090331',
                'thumbnail': 'http://frame8.loadup.ru/44/0b/918809.7.3.jpg',
            },
        },
    ]

    @classmethod
    def _extract_url(cls, webpage):
        mobj = re.search(
            r'<embed[^>]src=(["\'])(?P<url>http://pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=v.+?\1)',
            webpage)
        if mobj is not None:
            return mobj.group('url')

        mobj = re.search(
            r'''(?x)<div\s+class="video_file">http://smotri\.com/video/download/file/[^<]+</div>\s*
                    <div\s+class="video_image">[^<]+</div>\s*
                    <div\s+class="video_id">(?P<id>[^<]+)</div>''', webpage)
        if mobj is not None:
            return 'http://smotri.com/video/view/?id=%s' % mobj.group('id')

    def _search_meta(self, name, html, display_name=None):
        if display_name is None:
            display_name = name
        return self._html_search_regex(
            r'<meta itemprop="%s" content="([^"]+)" />' % re.escape(name),
            html, display_name, fatal=False)
        return self._html_search_meta(name, html, display_name)

    def _real_extract(self, url):
        video_id = self._match_id(url)

        video_form = {
            'ticket': video_id,
            'video_url': '1',
            'frame_url': '1',
            'devid': 'LoadupFlashPlayer',
            'getvideoinfo': '1',
        }

        request = compat_urllib_request.Request(
            'http://smotri.com/video/view/url/bot/', compat_urllib_parse.urlencode(video_form))
        request.add_header('Content-Type', 'application/x-www-form-urlencoded')

        video = self._download_json(request, video_id, 'Downloading video JSON')

        if video.get('_moderate_no') or not video.get('moderated'):
            raise ExtractorError('Video %s has not been approved by moderator' % video_id, expected=True)

        if video.get('error'):
            raise ExtractorError('Video %s does not exist' % video_id, expected=True)

        video_url = video.get('_vidURL') or video.get('_vidURL_mp4')
        title = video['title']
        thumbnail = video['_imgURL']
        upload_date = unified_strdate(video['added'])
        uploader = video['userNick']
        uploader_id = video['userLogin']
        duration = int_or_none(video['duration'])

        # Video JSON does not provide enough meta data
        # We will extract some from the video web page instead
        webpage_url = 'http://smotri.com/video/view/?id=%s' % video_id
        webpage = self._download_webpage(webpage_url, video_id, 'Downloading video page')

        # Warning if video is unavailable
        warning = self._html_search_regex(
            r'<div class="videoUnModer">(.*?)</div>', webpage,
            'warning message', default=None)
        if warning is not None:
            self._downloader.report_warning(
                'Video %s may not be available; smotri said: %s ' %
                (video_id, warning))

        # Adult content
        if re.search('EroConfirmText">', webpage) is not None:
            self.report_age_confirmation()
            confirm_string = self._html_search_regex(
                r'<a href="/video/view/\?id=%s&confirm=([^"]+)" title="[^"]+">' % video_id,
                webpage, 'confirm string')
            confirm_url = webpage_url + '&confirm=%s' % confirm_string
            webpage = self._download_webpage(confirm_url, video_id, 'Downloading video page (age confirmed)')
            adult_content = True
        else:
            adult_content = False

        view_count = self._html_search_regex(
            'Общее количество просмотров.*?<span class="Number">(\\d+)</span>',
            webpage, 'view count', fatal=False, flags=re.MULTILINE | re.DOTALL)

        return {
            'id': video_id,
            'url': video_url,
            'title': title,
            'thumbnail': thumbnail,
            'uploader': uploader,
            'upload_date': upload_date,
            'uploader_id': uploader_id,
            'duration': duration,
            'view_count': int_or_none(view_count),
            'age_limit': 18 if adult_content else 0,
        }


class SmotriCommunityIE(InfoExtractor):
    IE_DESC = 'Smotri.com community videos'
    IE_NAME = 'smotri:community'
    _VALID_URL = r'^https?://(?:www\.)?smotri\.com/community/video/(?P<communityid>[0-9A-Za-z_\'-]+)'
    _TEST = {
        'url': 'http://smotri.com/community/video/kommuna',
        'info_dict': {
            'id': 'kommuna',
            'title': 'КПРФ',
        },
        'playlist_mincount': 4,
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        community_id = mobj.group('communityid')

        url = 'http://smotri.com/export/rss/video/by/community/-/%s/video.xml' % community_id
        rss = self._download_xml(url, community_id, 'Downloading community RSS')

        entries = [self.url_result(video_url.text, 'Smotri')
                   for video_url in rss.findall('./channel/item/link')]

        description_text = rss.find('./channel/description').text
        community_title = self._html_search_regex(
            '^Видео сообщества "([^"]+)"$', description_text, 'community title')

        return self.playlist_result(entries, community_id, community_title)


class SmotriUserIE(InfoExtractor):
    IE_DESC = 'Smotri.com user videos'
    IE_NAME = 'smotri:user'
    _VALID_URL = r'^https?://(?:www\.)?smotri\.com/user/(?P<userid>[0-9A-Za-z_\'-]+)'
    _TESTS = [{
        'url': 'http://smotri.com/user/inspector',
        'info_dict': {
            'id': 'inspector',
            'title': 'Inspector',
        },
        'playlist_mincount': 9,
    }]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        user_id = mobj.group('userid')

        url = 'http://smotri.com/export/rss/user/video/-/%s/video.xml' % user_id
        rss = self._download_xml(url, user_id, 'Downloading user RSS')

        entries = [self.url_result(video_url.text, 'Smotri')
                   for video_url in rss.findall('./channel/item/link')]

        description_text = rss.find('./channel/description').text
        user_nickname = self._html_search_regex(
            '^Видео режиссера (.*)$', description_text,
            'user nickname')

        return self.playlist_result(entries, user_id, user_nickname)


class SmotriBroadcastIE(InfoExtractor):
    IE_DESC = 'Smotri.com broadcasts'
    IE_NAME = 'smotri:broadcast'
    _VALID_URL = r'^https?://(?:www\.)?(?P<url>smotri\.com/live/(?P<broadcastid>[^/]+))/?.*'

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        broadcast_id = mobj.group('broadcastid')

        broadcast_url = 'http://' + mobj.group('url')
        broadcast_page = self._download_webpage(broadcast_url, broadcast_id, 'Downloading broadcast page')

        if re.search('>Режиссер с логином <br/>"%s"<br/> <span>не существует<' % broadcast_id, broadcast_page) is not None:
            raise ExtractorError(
                'Broadcast %s does not exist' % broadcast_id, expected=True)

        # Adult content
        if re.search('EroConfirmText">', broadcast_page) is not None:

            (username, password) = self._get_login_info()
            if username is None:
                raise ExtractorError(
                    'Erotic broadcasts allowed only for registered users, '
                    'use --username and --password options to provide account credentials.',
                    expected=True)

            login_form = {
                'login-hint53': '1',
                'confirm_erotic': '1',
                'login': username,
                'password': password,
            }

            request = compat_urllib_request.Request(
                broadcast_url + '/?no_redirect=1', compat_urllib_parse.urlencode(login_form))
            request.add_header('Content-Type', 'application/x-www-form-urlencoded')
            broadcast_page = self._download_webpage(
                request, broadcast_id, 'Logging in and confirming age')

            if re.search('>Неверный логин или пароль<', broadcast_page) is not None:
                raise ExtractorError('Unable to log in: bad username or password', expected=True)

            adult_content = True
        else:
            adult_content = False

        ticket = self._html_search_regex(
            r"window\.broadcast_control\.addFlashVar\('file'\s*,\s*'([^']+)'\)",
            broadcast_page, 'broadcast ticket')

        url = 'http://smotri.com/broadcast/view/url/?ticket=%s' % ticket

        broadcast_password = self._downloader.params.get('videopassword', None)
        if broadcast_password:
            url += '&pass=%s' % hashlib.md5(broadcast_password.encode('utf-8')).hexdigest()

        broadcast_json_page = self._download_webpage(
            url, broadcast_id, 'Downloading broadcast JSON')

        try:
            broadcast_json = json.loads(broadcast_json_page)

            protected_broadcast = broadcast_json['_pass_protected'] == 1
            if protected_broadcast and not broadcast_password:
                raise ExtractorError(
                    'This broadcast is protected by a password, use the --video-password option',
                    expected=True)

            broadcast_offline = broadcast_json['is_play'] == 0
            if broadcast_offline:
                raise ExtractorError('Broadcast %s is offline' % broadcast_id, expected=True)

            rtmp_url = broadcast_json['_server']
            mobj = re.search(r'^rtmp://[^/]+/(?P<app>.+)/?$', rtmp_url)
            if not mobj:
                raise ExtractorError('Unexpected broadcast rtmp URL')

            broadcast_playpath = broadcast_json['_streamName']
            broadcast_app = '%s/%s' % (mobj.group('app'), broadcast_json['_vidURL'])
            broadcast_thumbnail = broadcast_json['_imgURL']
            broadcast_title = self._live_title(broadcast_json['title'])
            broadcast_description = broadcast_json['description']
            broadcaster_nick = broadcast_json['nick']
            broadcaster_login = broadcast_json['login']
            rtmp_conn = 'S:%s' % uuid.uuid4().hex
        except KeyError:
            if protected_broadcast:
                raise ExtractorError('Bad broadcast password', expected=True)
            raise ExtractorError('Unexpected broadcast JSON')

        return {
            'id': broadcast_id,
            'url': rtmp_url,
            'title': broadcast_title,
            'thumbnail': broadcast_thumbnail,
            'description': broadcast_description,
            'uploader': broadcaster_nick,
            'uploader_id': broadcaster_login,
            'age_limit': 18 if adult_content else 0,
            'ext': 'flv',
            'play_path': broadcast_playpath,
            'player_url': 'http://pics.smotri.com/broadcast_play.swf',
            'app': broadcast_app,
            'rtmp_live': True,
            'rtmp_conn': rtmp_conn,
            'is_live': True,
        }
Commit	Line	Data
5270d8cb	1	# encoding: utf-8
ffe8f62d	2	from __future__ import unicode_literals
5270d8cb	3
	4	import re
	5	import json
	6	import hashlib
55f6597c	7	import uuid
5270d8cb	8
5270d8cb	9	from .common import InfoExtractor
1cc79574	10	from ..compat import (
55f6597c	11	compat_urllib_parse,
55f6597c	12	compat_urllib_request,
1cc79574 PH	13	)
1cc79574 PH	14	from ..utils import (
693b8b2d	15	ExtractorError,
cb3ac1c6	16	int_or_none,
598c218f	17	unified_strdate,
5270d8cb	18	)
	19
	20
	21	class SmotriIE(InfoExtractor):
ffe8f62d	22	IE_DESC = 'Smotri.com'
ffe8f62d	23	IE_NAME = 'smotri'
598c218f	24	_VALID_URL = r'^https?://(?:www\.)?(?:smotri\.com/video/view/\?id=\|pics\.smotri\.com/(?:player\|scrubber_custom8)\.swf\?file=)(?P<id>v(?P<realvideoid>[0-9]+)[a-z0-9]{4})'
541cb26c	25	_NETRC_MACHINE = 'smotri'
aaebed13	26
5270d8cb	27	_TESTS = [
	28	# real video id 2610366
	29	{
ffe8f62d	30	'url': 'http://smotri.com/video/view/?id=v261036632ab',
ffe8f62d	31	'md5': '2a7b08249e6f5636557579c368040eb9',
ffe8f62d	32	'info_dict': {
cb3ac1c6 S	33	'id': 'v261036632ab',
cb3ac1c6 S	34	'ext': 'mp4',
ffe8f62d	35	'title': 'катастрофа с камер видеонаблюдения',
	36	'uploader': 'rbc2008',
	37	'uploader_id': 'rbc08',
	38	'upload_date': '20131118',
ffe8f62d	39	'thumbnail': 'http://frame6.loadup.ru/8b/a9/2610366.3.3.jpg',
5270d8cb	40	},
	41	},
	42	# real video id 57591
	43	{
ffe8f62d	44	'url': 'http://smotri.com/video/view/?id=v57591cb20',
ffe8f62d	45	'md5': '830266dfc21f077eac5afd1883091bcd',
ffe8f62d	46	'info_dict': {
cb3ac1c6 S	47	'id': 'v57591cb20',
cb3ac1c6 S	48	'ext': 'flv',
ffe8f62d	49	'title': 'test',
	50	'uploader': 'Support Photofile@photofile',
	51	'uploader_id': 'support-photofile',
	52	'upload_date': '20070704',
ffe8f62d	53	'thumbnail': 'http://frame4.loadup.ru/03/ed/57591.2.3.jpg',
aaebed13	54	},
5270d8cb	55	},
	56	# video-password
	57	{
ffe8f62d	58	'url': 'http://smotri.com/video/view/?id=v1390466a13c',
ffe8f62d	59	'md5': 'f6331cef33cad65a0815ee482a54440b',
ffe8f62d	60	'info_dict': {
cb3ac1c6 S	61	'id': 'v1390466a13c',
cb3ac1c6 S	62	'ext': 'mp4',
ffe8f62d	63	'title': 'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1',
	64	'uploader': 'timoxa40',
	65	'uploader_id': 'timoxa40',
	66	'upload_date': '20100404',
	67	'thumbnail': 'http://frame7.loadup.ru/af/3f/1390466.3.3.jpg',
5270d8cb	68	},
ffe8f62d	69	'params': {
ffe8f62d	70	'videopassword': 'qwerty',
5270d8cb	71	},
544dec62	72	'skip': 'Video is not approved by moderator',
5270d8cb	73	},
	74	# age limit + video-password
	75	{
ffe8f62d	76	'url': 'http://smotri.com/video/view/?id=v15408898bcf',
ffe8f62d	77	'md5': '91e909c9f0521adf5ee86fbe073aad70',
ffe8f62d	78	'info_dict': {
cb3ac1c6 S	79	'id': 'v15408898bcf',
cb3ac1c6 S	80	'ext': 'flv',
ffe8f62d	81	'title': 'этот ролик не покажут по ТВ',
	82	'uploader': 'zzxxx',
	83	'uploader_id': 'ueggb',
	84	'upload_date': '20101001',
	85	'thumbnail': 'http://frame3.loadup.ru/75/75/1540889.1.3.jpg',
	86	'age_limit': 18,
aaebed13	87	},
ffe8f62d	88	'params': {
ffe8f62d	89	'videopassword': '333'
544dec62 S	90	},
544dec62 S	91	'skip': 'Video is not approved by moderator',
cb3ac1c6 S	92	},
	93	# swf player
	94	{
	95	'url': 'http://pics.smotri.com/scrubber_custom8.swf?file=v9188090500',
	96	'md5': '4d47034979d9390d14acdf59c4935bc2',
	97	'info_dict': {
	98	'id': 'v9188090500',
	99	'ext': 'mp4',
	100	'title': 'Shakira - Don\'t Bother',
	101	'uploader': 'HannahL',
	102	'uploader_id': 'lisaha95',
	103	'upload_date': '20090331',
cb3ac1c6 S	104	'thumbnail': 'http://frame8.loadup.ru/44/0b/918809.7.3.jpg',
	105	},
	106	},
5270d8cb	107	]
ffe8f62d	108
cb3ac1c6 S	109	@classmethod
	110	def _extract_url(cls, webpage):
	111	mobj = re.search(
	112	r'<embed[^>]src=(["\'])(?P<url>http://pics\.smotri\.com/(?:player\|scrubber_custom8)\.swf\?file=v.+?\1)',
	113	webpage)
	114	if mobj is not None:
	115	return mobj.group('url')
	116
	117	mobj = re.search(
	118	r'''(?x)<div\s+class="video_file">http://smotri\.com/video/download/file/[^<]+</div>\s*
	119	<div\s+class="video_image">[^<]+</div>\s*
	120	<div\s+class="video_id">(?P<id>[^<]+)</div>''', webpage)
	121	if mobj is not None:
	122	return 'http://smotri.com/video/view/?id=%s' % mobj.group('id')
	123
5270d8cb	124	def _search_meta(self, name, html, display_name=None):
	125	if display_name is None:
	126	display_name = name
	127	return self._html_search_regex(
	128	r'<meta itemprop="%s" content="([^"]+)" />' % re.escape(name),
	129	html, display_name, fatal=False)
aaebed13 PH	130	return self._html_search_meta(name, html, display_name)
aaebed13 PH	131
5270d8cb	132	def _real_extract(self, url):
598c218f S	133	video_id = self._match_id(url)
	134
	135	video_form = {
	136	'ticket': video_id,
	137	'video_url': '1',
	138	'frame_url': '1',
	139	'devid': 'LoadupFlashPlayer',
	140	'getvideoinfo': '1',
	141	}
5270d8cb	142
598c218f S	143	request = compat_urllib_request.Request(
	144	'http://smotri.com/video/view/url/bot/', compat_urllib_parse.urlencode(video_form))
	145	request.add_header('Content-Type', 'application/x-www-form-urlencoded')
ffe8f62d	146
598c218f	147	video = self._download_json(request, video_id, 'Downloading video JSON')
ffe8f62d	148
598c218f S	149	if video.get('_moderate_no') or not video.get('moderated'):
598c218f S	150	raise ExtractorError('Video %s has not been approved by moderator' % video_id, expected=True)
ffe8f62d	151
598c218f S	152	if video.get('error'):
	153	raise ExtractorError('Video %s does not exist' % video_id, expected=True)
	154
	155	video_url = video.get('_vidURL') or video.get('_vidURL_mp4')
	156	title = video['title']
	157	thumbnail = video['_imgURL']
	158	upload_date = unified_strdate(video['added'])
	159	uploader = video['userNick']
	160	uploader_id = video['userLogin']
	161	duration = int_or_none(video['duration'])
ffe8f62d	162
5270d8cb	163	# Video JSON does not provide enough meta data
5270d8cb	164	# We will extract some from the video web page instead
598c218f S	165	webpage_url = 'http://smotri.com/video/view/?id=%s' % video_id
598c218f S	166	webpage = self._download_webpage(webpage_url, video_id, 'Downloading video page')
7dbf5ae5 PH	167
	168	# Warning if video is unavailable
	169	warning = self._html_search_regex(
598c218f	170	r'<div class="videoUnModer">(.*?)</div>', webpage,
ffe8f62d	171	'warning message', default=None)
7dbf5ae5 PH	172	if warning is not None:
7dbf5ae5 PH	173	self._downloader.report_warning(
ffe8f62d	174	'Video %s may not be available; smotri said: %s ' %
7dbf5ae5 PH	175	(video_id, warning))
7dbf5ae5 PH	176
5270d8cb	177	# Adult content
598c218f	178	if re.search('EroConfirmText">', webpage) is not None:
5270d8cb	179	self.report_age_confirmation()
5270d8cb	180	confirm_string = self._html_search_regex(
aaebed13	181	r'<a href="/video/view/\?id=%s&confirm=([^"]+)" title="[^"]+">' % video_id,
598c218f S	182	webpage, 'confirm string')
	183	confirm_url = webpage_url + '&confirm=%s' % confirm_string
	184	webpage = self._download_webpage(confirm_url, video_id, 'Downloading video page (age confirmed)')
5270d8cb	185	adult_content = True
	186	else:
	187	adult_content = False
ffe8f62d	188
598c218f	189	view_count = self._html_search_regex(
ffe8f62d	190	'Общее количество просмотров.*?<span class="Number">(\\d+)</span>',
2514d263	191	webpage, 'view count', fatal=False, flags=re.MULTILINE \| re.DOTALL)
ffe8f62d	192
5270d8cb	193	return {
	194	'id': video_id,
	195	'url': video_url,
598c218f S	196	'title': title,
	197	'thumbnail': thumbnail,
	198	'uploader': uploader,
	199	'upload_date': upload_date,
	200	'uploader_id': uploader_id,
	201	'duration': duration,
	202	'view_count': int_or_none(view_count),
5270d8cb	203	'age_limit': 18 if adult_content else 0,
5270d8cb	204	}
5270d8cb	205
aaebed13	206
5270d8cb	207	class SmotriCommunityIE(InfoExtractor):
ffe8f62d	208	IE_DESC = 'Smotri.com community videos'
ffe8f62d	209	IE_NAME = 'smotri:community'
aaebed13	210	_VALID_URL = r'^https?://(?:www\.)?smotri\.com/community/video/(?P<communityid>[0-9A-Za-z_\'-]+)'
22a6f150 PH	211	_TEST = {
	212	'url': 'http://smotri.com/community/video/kommuna',
	213	'info_dict': {
	214	'id': 'kommuna',
	215	'title': 'КПРФ',
	216	},
	217	'playlist_mincount': 4,
	218	}
5f6a1245	219
5270d8cb	220	def _real_extract(self, url):
	221	mobj = re.match(self._VALID_URL, url)
	222	community_id = mobj.group('communityid')
aaebed13	223
5270d8cb	224	url = 'http://smotri.com/export/rss/video/by/community/-/%s/video.xml' % community_id
ffe8f62d	225	rss = self._download_xml(url, community_id, 'Downloading community RSS')
aaebed13	226
5270d8cb	227	entries = [self.url_result(video_url.text, 'Smotri')
5270d8cb	228	for video_url in rss.findall('./channel/item/link')]
aaebed13 PH	229
aaebed13 PH	230	description_text = rss.find('./channel/description').text
5270d8cb	231	community_title = self._html_search_regex(
ffe8f62d	232	'^Видео сообщества "([^"]+)"$', description_text, 'community title')
5270d8cb	233
5270d8cb	234	return self.playlist_result(entries, community_id, community_title)
aaebed13 PH	235
aaebed13 PH	236
5270d8cb	237	class SmotriUserIE(InfoExtractor):
ffe8f62d	238	IE_DESC = 'Smotri.com user videos'
ffe8f62d	239	IE_NAME = 'smotri:user'
aaebed13	240	_VALID_URL = r'^https?://(?:www\.)?smotri\.com/user/(?P<userid>[0-9A-Za-z_\'-]+)'
22a6f150 PH	241	_TESTS = [{
	242	'url': 'http://smotri.com/user/inspector',
	243	'info_dict': {
	244	'id': 'inspector',
	245	'title': 'Inspector',
	246	},
	247	'playlist_mincount': 9,
	248	}]
aaebed13	249
5270d8cb	250	def _real_extract(self, url):
aaebed13	251	mobj = re.match(self._VALID_URL, url)
5270d8cb	252	user_id = mobj.group('userid')
aaebed13	253
5270d8cb	254	url = 'http://smotri.com/export/rss/user/video/-/%s/video.xml' % user_id
ffe8f62d	255	rss = self._download_xml(url, user_id, 'Downloading user RSS')
aaebed13	256
5270d8cb	257	entries = [self.url_result(video_url.text, 'Smotri')
5270d8cb	258	for video_url in rss.findall('./channel/item/link')]
aaebed13 PH	259
aaebed13 PH	260	description_text = rss.find('./channel/description').text
5270d8cb	261	user_nickname = self._html_search_regex(
ffe8f62d	262	'^Видео режиссера (.*)$', description_text,
ffe8f62d	263	'user nickname')
5270d8cb	264
5270d8cb	265	return self.playlist_result(entries, user_id, user_nickname)
55f6597c	266
	267
	268	class SmotriBroadcastIE(InfoExtractor):
ffe8f62d	269	IE_DESC = 'Smotri.com broadcasts'
ffe8f62d	270	IE_NAME = 'smotri:broadcast'
55f6597c	271	_VALID_URL = r'^https?://(?:www\.)?(?P<url>smotri\.com/live/(?P<broadcastid>[^/]+))/?.*'
	272
	273	def _real_extract(self, url):
	274	mobj = re.match(self._VALID_URL, url)
	275	broadcast_id = mobj.group('broadcastid')
	276
	277	broadcast_url = 'http://' + mobj.group('url')
ffe8f62d	278	broadcast_page = self._download_webpage(broadcast_url, broadcast_id, 'Downloading broadcast page')
55f6597c	279
ffe8f62d	280	if re.search('>Режиссер с логином <br/>"%s"<br/> <span>не существует<' % broadcast_id, broadcast_page) is not None:
a81bbebf S	281	raise ExtractorError(
a81bbebf S	282	'Broadcast %s does not exist' % broadcast_id, expected=True)
55f6597c	283
55f6597c	284	# Adult content
ffe8f62d	285	if re.search('EroConfirmText">', broadcast_page) is not None:
55f6597c	286
	287	(username, password) = self._get_login_info()
	288	if username is None:
a81bbebf S	289	raise ExtractorError(
	290	'Erotic broadcasts allowed only for registered users, '
	291	'use --username and --password options to provide account credentials.',
	292	expected=True)
ffe8f62d	293
	294	login_form = {
	295	'login-hint53': '1',
	296	'confirm_erotic': '1',
	297	'login': username,
	298	'password': password,
55f6597c	299	}
ffe8f62d	300
a81bbebf S	301	request = compat_urllib_request.Request(
a81bbebf S	302	broadcast_url + '/?no_redirect=1', compat_urllib_parse.urlencode(login_form))
55f6597c	303	request.add_header('Content-Type', 'application/x-www-form-urlencoded')
a81bbebf S	304	broadcast_page = self._download_webpage(
a81bbebf S	305	request, broadcast_id, 'Logging in and confirming age')
55f6597c	306
ffe8f62d	307	if re.search('>Неверный логин или пароль<', broadcast_page) is not None:
ffe8f62d	308	raise ExtractorError('Unable to log in: bad username or password', expected=True)
55f6597c	309
	310	adult_content = True
	311	else:
	312	adult_content = False
	313
	314	ticket = self._html_search_regex(
a81bbebf	315	r"window\.broadcast_control\.addFlashVar\('file'\s,\s'([^']+)'\)",
ffe8f62d	316	broadcast_page, 'broadcast ticket')
55f6597c	317
	318	url = 'http://smotri.com/broadcast/view/url/?ticket=%s' % ticket
	319
	320	broadcast_password = self._downloader.params.get('videopassword', None)
	321	if broadcast_password:
	322	url += '&pass=%s' % hashlib.md5(broadcast_password.encode('utf-8')).hexdigest()
	323
a81bbebf S	324	broadcast_json_page = self._download_webpage(
a81bbebf S	325	url, broadcast_id, 'Downloading broadcast JSON')
55f6597c	326
	327	try:
	328	broadcast_json = json.loads(broadcast_json_page)
	329
	330	protected_broadcast = broadcast_json['_pass_protected'] == 1
	331	if protected_broadcast and not broadcast_password:
a81bbebf S	332	raise ExtractorError(
	333	'This broadcast is protected by a password, use the --video-password option',
	334	expected=True)
55f6597c	335
	336	broadcast_offline = broadcast_json['is_play'] == 0
	337	if broadcast_offline:
ffe8f62d	338	raise ExtractorError('Broadcast %s is offline' % broadcast_id, expected=True)
55f6597c	339
55f6597c	340	rtmp_url = broadcast_json['_server']
a81bbebf S	341	mobj = re.search(r'^rtmp://[^/]+/(?P<app>.+)/?$', rtmp_url)
a81bbebf S	342	if not mobj:
ffe8f62d	343	raise ExtractorError('Unexpected broadcast rtmp URL')
55f6597c	344
55f6597c	345	broadcast_playpath = broadcast_json['_streamName']
a81bbebf	346	broadcast_app = '%s/%s' % (mobj.group('app'), broadcast_json['_vidURL'])
55f6597c	347	broadcast_thumbnail = broadcast_json['_imgURL']
a81bbebf	348	broadcast_title = self._live_title(broadcast_json['title'])
55f6597c	349	broadcast_description = broadcast_json['description']
	350	broadcaster_nick = broadcast_json['nick']
	351	broadcaster_login = broadcast_json['login']
	352	rtmp_conn = 'S:%s' % uuid.uuid4().hex
	353	except KeyError:
	354	if protected_broadcast:
ffe8f62d	355	raise ExtractorError('Bad broadcast password', expected=True)
ffe8f62d	356	raise ExtractorError('Unexpected broadcast JSON')
55f6597c	357
	358	return {
	359	'id': broadcast_id,
	360	'url': rtmp_url,
	361	'title': broadcast_title,
	362	'thumbnail': broadcast_thumbnail,
	363	'description': broadcast_description,
	364	'uploader': broadcaster_nick,
	365	'uploader_id': broadcaster_login,
	366	'age_limit': 18 if adult_content else 0,
	367	'ext': 'flv',
	368	'play_path': broadcast_playpath,
a81bbebf S	369	'player_url': 'http://pics.smotri.com/broadcast_play.swf',
a81bbebf S	370	'app': broadcast_app,
55f6597c	371	'rtmp_live': True,
a81bbebf S	372	'rtmp_conn': rtmp_conn,
a81bbebf S	373	'is_live': True,
693b8b2d	374	}