[yt-dlp.git] / youtube_dl / extractor / streamango.py

# coding: utf-8
from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..compat import compat_chr
from ..utils import (
    determine_ext,
    ExtractorError,
    int_or_none,
    js_to_json,
)


class StreamangoIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?streamango\.com/(?:f|embed)/(?P<id>[^/?#&]+)'
    _TESTS = [{
        'url': 'https://streamango.com/f/clapasobsptpkdfe/20170315_150006_mp4',
        'md5': 'e992787515a182f55e38fc97588d802a',
        'info_dict': {
            'id': 'clapasobsptpkdfe',
            'ext': 'mp4',
            'title': '20170315_150006.mp4',
        }
    }, {
        # no og:title
        'url': 'https://streamango.com/embed/foqebrpftarclpob/asdf_asd_2_mp4',
        'info_dict': {
            'id': 'foqebrpftarclpob',
            'ext': 'mp4',
            'title': 'foqebrpftarclpob',
        },
        'params': {
            'skip_download': True,
        },
        'skip': 'gone',
    }, {
        'url': 'https://streamango.com/embed/clapasobsptpkdfe/20170315_150006_mp4',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        def decrypt_src(encoded, val):
            ALPHABET = '=/+9876543210zyxwvutsrqponmlkjihgfedcbaZYXWVUTSRQPONMLKJIHGFEDCBA'
            encoded = re.sub(r'[^A-Za-z0-9+/=]', '', encoded)
            decoded = ''
            sm = [None] * 4
            i = 0
            str_len = len(encoded)
            while i < str_len:
                for j in range(4):
                    sm[j % 4] = ALPHABET.index(encoded[i])
                    i += 1
                char_code = ((sm[0] << 0x2) | (sm[1] >> 0x4)) ^ val
                decoded += compat_chr(char_code)
                if sm[2] != 0x40:
                    char_code = ((sm[1] & 0xf) << 0x4) | (sm[2] >> 0x2)
                    decoded += compat_chr(char_code)
                if sm[3] != 0x40:
                    char_code = ((sm[2] & 0x3) << 0x6) | sm[3]
                    decoded += compat_chr(char_code)
            return decoded

        video_id = self._match_id(url)

        webpage = self._download_webpage(url, video_id)

        title = self._og_search_title(webpage, default=video_id)

        formats = []
        for format_ in re.findall(r'({[^}]*\bsrc\s*:\s*[^}]*})', webpage):
            mobj = re.search(r'(src\s*:\s*[^(]+\(([^)]*)\)[\s,]*)', format_)
            if mobj is None:
                continue

            format_ = format_.replace(mobj.group(0), '')

            video = self._parse_json(
                format_, video_id, transform_source=js_to_json,
                fatal=False) or {}

            mobj = re.search(
                r'([\'"])(?P<src>(?:(?!\1).)+)\1\s*,\s*(?P<val>\d+)',
                mobj.group(1))
            if mobj is None:
                continue

            src = decrypt_src(mobj.group('src'), int_or_none(mobj.group('val')))
            if not src:
                continue

            ext = determine_ext(src, default_ext=None)
            if video.get('type') == 'application/dash+xml' or ext == 'mpd':
                formats.extend(self._extract_mpd_formats(
                    src, video_id, mpd_id='dash', fatal=False))
            else:
                formats.append({
                    'url': src,
                    'ext': ext or 'mp4',
                    'width': int_or_none(video.get('width')),
                    'height': int_or_none(video.get('height')),
                    'tbr': int_or_none(video.get('bitrate')),
                })

        if not formats:
            error = self._search_regex(
                r'<p[^>]+\bclass=["\']lead[^>]+>(.+?)</p>', webpage,
                'error', default=None)
            if not error and '>Sorry' in webpage:
                error = 'Video %s is not available' % video_id
            if error:
                raise ExtractorError(error, expected=True)

        self._sort_formats(formats)

        return {
            'id': video_id,
            'url': url,
            'title': title,
            'formats': formats,
        }
Commit	Line	Data
4db79fa1 ME	1	# coding: utf-8
	2	from __future__ import unicode_literals
	3
80682962 S	4	import re
80682962 S	5
4db79fa1	6	from .common import InfoExtractor
c106237d	7	from ..compat import compat_chr
80682962 S	8	from ..utils import (
80682962 S	9	determine_ext,
ff274e3c	10	ExtractorError,
80682962 S	11	int_or_none,
	12	js_to_json,
	13	)
4db79fa1 ME	14
	15
	16	class StreamangoIE(InfoExtractor):
80682962	17	_VALID_URL = r'https?://(?:www\.)?streamango\.com/(?:f\|embed)/(?P<id>[^/?#&]+)'
4db79fa1 ME	18	_TESTS = [{
	19	'url': 'https://streamango.com/f/clapasobsptpkdfe/20170315_150006_mp4',
	20	'md5': 'e992787515a182f55e38fc97588d802a',
	21	'info_dict': {
	22	'id': 'clapasobsptpkdfe',
	23	'ext': 'mp4',
	24	'title': '20170315_150006.mp4',
4db79fa1	25	}
eb703e53	26	}, {
1508da30	27	# no og:title
eb703e53 LS	28	'url': 'https://streamango.com/embed/foqebrpftarclpob/asdf_asd_2_mp4',
	29	'info_dict': {
	30	'id': 'foqebrpftarclpob',
	31	'ext': 'mp4',
	32	'title': 'foqebrpftarclpob',
1508da30 S	33	},
	34	'params': {
	35	'skip_download': True,
	36	},
ece12e63	37	'skip': 'gone',
4db79fa1 ME	38	}, {
	39	'url': 'https://streamango.com/embed/clapasobsptpkdfe/20170315_150006_mp4',
	40	'only_matching': True,
	41	}]
	42
	43	def _real_extract(self, url):
c106237d S	44	def decrypt_src(encoded, val):
	45	ALPHABET = '=/+9876543210zyxwvutsrqponmlkjihgfedcbaZYXWVUTSRQPONMLKJIHGFEDCBA'
	46	encoded = re.sub(r'[^A-Za-z0-9+/=]', '', encoded)
	47	decoded = ''
6e72ea47	48	sm = [None] * 4
6e72ea47	49	i = 0
c106237d	50	str_len = len(encoded)
6e72ea47	51	while i < str_len:
6e72ea47	52	for j in range(4):
c106237d	53	sm[j % 4] = ALPHABET.index(encoded[i])
6e72ea47	54	i += 1
c106237d S	55	char_code = ((sm[0] << 0x2) \| (sm[1] >> 0x4)) ^ val
	56	decoded += compat_chr(char_code)
	57	if sm[2] != 0x40:
	58	char_code = ((sm[1] & 0xf) << 0x4) \| (sm[2] >> 0x2)
	59	decoded += compat_chr(char_code)
	60	if sm[3] != 0x40:
	61	char_code = ((sm[2] & 0x3) << 0x6) \| sm[3]
	62	decoded += compat_chr(char_code)
	63	return decoded
6e72ea47	64
4db79fa1	65	video_id = self._match_id(url)
80682962	66
4db79fa1 ME	67	webpage = self._download_webpage(url, video_id)
4db79fa1 ME	68
eb703e53	69	title = self._og_search_title(webpage, default=video_id)
4db79fa1	70
80682962	71	formats = []
c106237d S	72	for format_ in re.findall(r'({[^}]\bsrc\s:\s[^}]})', webpage):
c106237d S	73	mobj = re.search(r'(src\s:\s[^(]+\(([^)])\)[\s,])', format_)
6e72ea47	74	if mobj is None:
6e72ea47	75	continue
c106237d	76
6e72ea47	77	format_ = format_.replace(mobj.group(0), '')
6e72ea47	78
80682962	79	video = self._parse_json(
c106237d S	80	format_, video_id, transform_source=js_to_json,
c106237d S	81	fatal=False) or {}
6e72ea47	82
c106237d S	83	mobj = re.search(
	84	r'([\'"])(?P<src>(?:(?!\1).)+)\1\s,\s(?P<val>\d+)',
	85	mobj.group(1))
6e72ea47	86	if mobj is None:
80682962	87	continue
6e72ea47	88
6e72ea47	89	src = decrypt_src(mobj.group('src'), int_or_none(mobj.group('val')))
c106237d S	90	if not src:
	91	continue
	92
80682962 S	93	ext = determine_ext(src, default_ext=None)
	94	if video.get('type') == 'application/dash+xml' or ext == 'mpd':
	95	formats.extend(self._extract_mpd_formats(
	96	src, video_id, mpd_id='dash', fatal=False))
	97	else:
	98	formats.append({
	99	'url': src,
	100	'ext': ext or 'mp4',
	101	'width': int_or_none(video.get('width')),
	102	'height': int_or_none(video.get('height')),
	103	'tbr': int_or_none(video.get('bitrate')),
	104	})
ff274e3c S	105
	106	if not formats:
	107	error = self._search_regex(
	108	r'<p[^>]+\bclass=["\']lead[^>]+>(.+?)</p>', webpage,
	109	'error', default=None)
	110	if not error and '>Sorry' in webpage:
	111	error = 'Video %s is not available' % video_id
	112	if error:
	113	raise ExtractorError(error, expected=True)
	114
4db79fa1 ME	115	self._sort_formats(formats)
	116
	117	return {
	118	'id': video_id,
	119	'url': url,
	120	'title': title,
	121	'formats': formats,
	122	}