[yt-dlp.git] / yt_dlp / extractor / scte.py

from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..utils import (
    decode_packed_codes,
    ExtractorError,
    urlencode_postdata,
)


class SCTEBaseIE(InfoExtractor):
    _LOGIN_URL = 'https://www.scte.org/SCTE/Sign_In.aspx'
    _NETRC_MACHINE = 'scte'

    def _perform_login(self, username, password):
        login_popup = self._download_webpage(
            self._LOGIN_URL, None, 'Downloading login popup')

        def is_logged(webpage):
            return any(re.search(p, webpage) for p in (
                r'class=["\']welcome\b', r'>Sign Out<'))

        # already logged in
        if is_logged(login_popup):
            return

        login_form = self._hidden_inputs(login_popup)

        login_form.update({
            'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$signInUserName': username,
            'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$signInPassword': password,
            'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$RememberMe': 'on',
        })

        response = self._download_webpage(
            self._LOGIN_URL, None, 'Logging in',
            data=urlencode_postdata(login_form))

        if '|pageRedirect|' not in response and not is_logged(response):
            error = self._html_search_regex(
                r'(?s)<[^>]+class=["\']AsiError["\'][^>]*>(.+?)</',
                response, 'error message', default=None)
            if error:
                raise ExtractorError('Unable to login: %s' % error, expected=True)
            raise ExtractorError('Unable to log in')


class SCTEIE(SCTEBaseIE):
    _VALID_URL = r'https?://learning\.scte\.org/mod/scorm/view\.php?.*?\bid=(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://learning.scte.org/mod/scorm/view.php?id=31484',
        'info_dict': {
            'title': 'Introduction to DOCSIS Engineering Professional',
            'id': '31484',
        },
        'playlist_count': 5,
        'skip': 'Requires account credentials',
    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)

        webpage = self._download_webpage(url, video_id)

        title = self._search_regex(r'<h1>(.+?)</h1>', webpage, 'title')

        context_id = self._search_regex(r'context-(\d+)', webpage, video_id)
        content_base = 'https://learning.scte.org/pluginfile.php/%s/mod_scorm/content/8/' % context_id
        context = decode_packed_codes(self._download_webpage(
            '%smobile/data.js' % content_base, video_id))

        data = self._parse_xml(
            self._search_regex(
                r'CreateData\(\s*"(.+?)"', context, 'data').replace(r"\'", "'"),
            video_id)

        entries = []
        for asset in data.findall('.//asset'):
            asset_url = asset.get('url')
            if not asset_url or not asset_url.endswith('.mp4'):
                continue
            asset_id = self._search_regex(
                r'video_([^_]+)_', asset_url, 'asset id', default=None)
            if not asset_id:
                continue
            entries.append({
                'id': asset_id,
                'title': title,
                'url': content_base + asset_url,
            })

        return self.playlist_result(entries, video_id, title)


class SCTECourseIE(SCTEBaseIE):
    _VALID_URL = r'https?://learning\.scte\.org/(?:mod/sub)?course/view\.php?.*?\bid=(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://learning.scte.org/mod/subcourse/view.php?id=31491',
        'only_matching': True,
    }, {
        'url': 'https://learning.scte.org/course/view.php?id=3639',
        'only_matching': True,
    }, {
        'url': 'https://learning.scte.org/course/view.php?id=3073',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        course_id = self._match_id(url)

        webpage = self._download_webpage(url, course_id)

        title = self._search_regex(
            r'<h1>(.+?)</h1>', webpage, 'title', default=None)

        entries = []
        for mobj in re.finditer(
                r'''(?x)
                    <a[^>]+
                        href=(["\'])
                        (?P<url>
                            https?://learning\.scte\.org/mod/
                            (?P<kind>scorm|subcourse)/view\.php?(?:(?!\1).)*?
                            \bid=\d+
                        )
                    ''',
                webpage):
            item_url = mobj.group('url')
            if item_url == url:
                continue
            ie = (SCTEIE.ie_key() if mobj.group('kind') == 'scorm'
                  else SCTECourseIE.ie_key())
            entries.append(self.url_result(item_url, ie=ie))

        return self.playlist_result(entries, course_id, title)
Commit	Line	Data
20218040 S	1	from __future__ import unicode_literals
	2
	3	import re
	4
	5	from .common import InfoExtractor
	6	from ..utils import (
	7	decode_packed_codes,
	8	ExtractorError,
	9	urlencode_postdata,
	10	)
	11
	12
	13	class SCTEBaseIE(InfoExtractor):
	14	_LOGIN_URL = 'https://www.scte.org/SCTE/Sign_In.aspx'
	15	_NETRC_MACHINE = 'scte'
	16
52efa4b3	17	def _perform_login(self, username, password):
20218040 S	18	login_popup = self._download_webpage(
	19	self._LOGIN_URL, None, 'Downloading login popup')
	20
	21	def is_logged(webpage):
	22	return any(re.search(p, webpage) for p in (
	23	r'class=["\']welcome\b', r'>Sign Out<'))
	24
	25	# already logged in
	26	if is_logged(login_popup):
	27	return
	28
	29	login_form = self._hidden_inputs(login_popup)
	30
	31	login_form.update({
	32	'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$signInUserName': username,
	33	'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$signInPassword': password,
	34	'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$RememberMe': 'on',
	35	})
	36
	37	response = self._download_webpage(
	38	self._LOGIN_URL, None, 'Logging in',
	39	data=urlencode_postdata(login_form))
	40
	41	if '\|pageRedirect\|' not in response and not is_logged(response):
	42	error = self._html_search_regex(
	43	r'(?s)<[^>]+class=["\']AsiError["\'][^>]*>(.+?)</',
	44	response, 'error message', default=None)
	45	if error:
	46	raise ExtractorError('Unable to login: %s' % error, expected=True)
	47	raise ExtractorError('Unable to log in')
	48
	49
	50	class SCTEIE(SCTEBaseIE):
	51	_VALID_URL = r'https?://learning\.scte\.org/mod/scorm/view\.php?.*?\bid=(?P<id>\d+)'
	52	_TESTS = [{
	53	'url': 'https://learning.scte.org/mod/scorm/view.php?id=31484',
	54	'info_dict': {
	55	'title': 'Introduction to DOCSIS Engineering Professional',
	56	'id': '31484',
	57	},
	58	'playlist_count': 5,
	59	'skip': 'Requires account credentials',
	60	}]
	61
	62	def _real_extract(self, url):
	63	video_id = self._match_id(url)
	64
	65	webpage = self._download_webpage(url, video_id)
	66
	67	title = self._search_regex(r'<h1>(.+?)</h1>', webpage, 'title')
	68
	69	context_id = self._search_regex(r'context-(\d+)', webpage, video_id)
	70	content_base = 'https://learning.scte.org/pluginfile.php/%s/mod_scorm/content/8/' % context_id
	71	context = decode_packed_codes(self._download_webpage(
	72	'%smobile/data.js' % content_base, video_id))
	73
	74	data = self._parse_xml(
	75	self._search_regex(
	76	r'CreateData\(\s*"(.+?)"', context, 'data').replace(r"\'", "'"),
	77	video_id)
	78
	79	entries = []
	80	for asset in data.findall('.//asset'):
	81	asset_url = asset.get('url')
82	if not asset_url or not asset_url.endswith('.mp4'):
83	continue
84	asset_id = self._search_regex(
85	r'video_([^_]+)_', asset_url, 'asset id', default=None)
86	if not asset_id:
87	continue
88	entries.append({
89	'id': asset_id,
90	'title': title,
91	'url': content_base + asset_url,
92	})
93
94	return self.playlist_result(entries, video_id, title)
95
96
97	class SCTECourseIE(SCTEBaseIE):
98	_VALID_URL = r'https?://learning\.scte\.org/(?:mod/sub)?course/view\.php?.*?\bid=(?P<id>\d+)'
99	_TESTS = [{
100	'url': 'https://learning.scte.org/mod/subcourse/view.php?id=31491',
101	'only_matching': True,
102	}, {
103	'url': 'https://learning.scte.org/course/view.php?id=3639',
104	'only_matching': True,
105	}, {
106	'url': 'https://learning.scte.org/course/view.php?id=3073',
107	'only_matching': True,
108	}]
109
110	def _real_extract(self, url):
111	course_id = self._match_id(url)
112
113	webpage = self._download_webpage(url, course_id)
114
115	title = self._search_regex(
116	r'<h1>(.+?)</h1>', webpage, 'title', default=None)
117
118	entries = []
119	for mobj in re.finditer(
120	r'''(?x)
121	<a[^>]+
122	href=(["\'])
123	(?P<url>
124	https?://learning\.scte\.org/mod/
125	(?P<kind>scorm\|subcourse)/view\.php?(?:(?!\1).)*?
126	\bid=\d+
127	)
128	''',
129	webpage):
130	item_url = mobj.group('url')
131	if item_url == url:
132	continue
133	ie = (SCTEIE.ie_key() if mobj.group('kind') == 'scorm'
134	else SCTECourseIE.ie_key())
135	entries.append(self.url_result(item_url, ie=ie))
136
137	return self.playlist_result(entries, course_id, title)