[yt-dlp.git] / youtube_dl / extractor / openload.py

# coding: utf-8
from __future__ import unicode_literals, division

from .common import InfoExtractor
from ..compat import (
    compat_chr,
    compat_ord,
)
from ..utils import (
    determine_ext,
    ExtractorError,
)


class OpenloadIE(InfoExtractor):
    _VALID_URL = r'https://openload.(?:co|io)/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'

    _TESTS = [{
        'url': 'https://openload.co/f/kUEfGclsU9o',
        'md5': 'bf1c059b004ebc7a256f89408e65c36e',
        'info_dict': {
            'id': 'kUEfGclsU9o',
            'ext': 'mp4',
            'title': 'skyrim_no-audio_1080.mp4',
            'thumbnail': 're:^https?://.*\.jpg$',
        },
    }, {
        'url': 'https://openload.co/embed/kUEfGclsU9o/skyrim_no-audio_1080.mp4',
        'only_matching': True,
    }, {
        'url': 'https://openload.io/f/ZAn6oz-VZGE/',
        'only_matching': True,
    }, {
        'url': 'https://openload.co/f/_-ztPaZtMhM/',
        'only_matching': True,
    }, {
        # unavailable via https://openload.co/f/Sxz5sADo82g/, different layout
        # for title and ext
        'url': 'https://openload.co/embed/Sxz5sADo82g/',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage('https://openload.co/embed/%s/' % video_id, video_id)

        if 'File not found' in webpage or 'deleted by the owner' in webpage:
            raise ExtractorError('File not found', expected=True)

        # The following decryption algorithm is written by @yokrysty and
        # declared to be freely used in youtube-dl
        # See https://github.com/rg3/youtube-dl/issues/10408
        enc_data = self._html_search_regex(
            r'<span[^>]+id="hiddenurl"[^>]*>([^<]+)</span>', webpage, 'encrypted data')

        video_url_chars = []

        for idx, c in enumerate(enc_data):
            j = compat_ord(c)
            if j >= 33 and j <= 126:
                j = ((j + 14) % 94) + 33
            if idx == len(enc_data) - 1:
                j += 1
            video_url_chars += compat_chr(j)

        video_url = 'https://openload.co/stream/%s?mime=true' % ''.join(video_url_chars)

        title = self._og_search_title(webpage, default=None) or self._search_regex(
            r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
            'title', default=None) or self._html_search_meta(
            'description', webpage, 'title', fatal=True)

        return {
            'id': video_id,
            'title': title,
            'thumbnail': self._og_search_thumbnail(webpage, default=None),
            'url': video_url,
            # Seems all videos have extensions in their titles
            'ext': determine_ext(title),
        }
Commit	Line	Data
2bfeee69	1	# coding: utf-8
c1decda5	2	from __future__ import unicode_literals, division
2bfeee69	3
2bfeee69	4	from .common import InfoExtractor
18b62161 YCH	5	from ..compat import (
	6	compat_chr,
	7	compat_ord,
	8	)
9e3c2f1d	9	from ..utils import (
594b0c4c	10	determine_ext,
9e3c2f1d YCH	11	ExtractorError,
9e3c2f1d YCH	12	)
2bfeee69 YCH	13
	14
	15	class OpenloadIE(InfoExtractor):
21efee5f	16	_VALID_URL = r'https://openload.(?:co\|io)/(?:f\|embed)/(?P<id>[a-zA-Z0-9-_]+)'
2bfeee69	17
9e3c2f1d	18	_TESTS = [{
2bfeee69 YCH	19	'url': 'https://openload.co/f/kUEfGclsU9o',
	20	'md5': 'bf1c059b004ebc7a256f89408e65c36e',
	21	'info_dict': {
	22	'id': 'kUEfGclsU9o',
	23	'ext': 'mp4',
	24	'title': 'skyrim_no-audio_1080.mp4',
9e3c2f1d	25	'thumbnail': 're:^https?://.*\.jpg$',
2bfeee69	26	},
9e3c2f1d YCH	27	}, {
	28	'url': 'https://openload.co/embed/kUEfGclsU9o/skyrim_no-audio_1080.mp4',
	29	'only_matching': True,
	30	}, {
	31	'url': 'https://openload.io/f/ZAn6oz-VZGE/',
	32	'only_matching': True,
21efee5f N	33	}, {
	34	'url': 'https://openload.co/f/_-ztPaZtMhM/',
	35	'only_matching': True,
e9063b5d S	36	}, {
	37	# unavailable via https://openload.co/f/Sxz5sADo82g/, different layout
	38	# for title and ext
	39	'url': 'https://openload.co/embed/Sxz5sADo82g/',
	40	'only_matching': True,
9e3c2f1d	41	}]
2bfeee69	42
2bfeee69 YCH	43	def _real_extract(self, url):
2bfeee69 YCH	44	video_id = self._match_id(url)
18b62161	45	webpage = self._download_webpage('https://openload.co/embed/%s/' % video_id, video_id)
9e3c2f1d	46
18b62161	47	if 'File not found' in webpage or 'deleted by the owner' in webpage:
9e3c2f1d YCH	48	raise ExtractorError('File not found', expected=True)
9e3c2f1d YCH	49
18b62161 YCH	50	# The following decryption algorithm is written by @yokrysty and
	51	# declared to be freely used in youtube-dl
	52	# See https://github.com/rg3/youtube-dl/issues/10408
	53	enc_data = self._html_search_regex(
	54	r'<span[^>]+id="hiddenurl"[^>]*>([^<]+)</span>', webpage, 'encrypted data')
c1decda5	55
18b62161	56	video_url_chars = []
c1decda5	57
40eec6b1	58	for idx, c in enumerate(enc_data):
18b62161 YCH	59	j = compat_ord(c)
	60	if j >= 33 and j <= 126:
	61	j = ((j + 14) % 94) + 33
40eec6b1	62	if idx == len(enc_data) - 1:
98908bcf	63	j += 1
18b62161	64	video_url_chars += compat_chr(j)
c1decda5	65
18b62161	66	video_url = 'https://openload.co/stream/%s?mime=true' % ''.join(video_url_chars)
2bfeee69	67
a1394b82 S	68	title = self._og_search_title(webpage, default=None) or self._search_regex(
	69	r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
	70	'title', default=None) or self._html_search_meta(
	71	'description', webpage, 'title', fatal=True)
	72
2bfeee69 YCH	73	return {
2bfeee69 YCH	74	'id': video_id,
a1394b82	75	'title': title,
c83a3522	76	'thumbnail': self._og_search_thumbnail(webpage, default=None),
2bfeee69	77	'url': video_url,
c1decda5 YCH	78	# Seems all videos have extensions in their titles
c1decda5 YCH	79	'ext': determine_ext(title),
2bfeee69	80	}