[yt-dlp.git] / youtube_dl / extractor / openload.py

# coding: utf-8
from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..compat import compat_chr
from ..utils import (
    determine_ext,
    ExtractorError,
)


class OpenloadIE(InfoExtractor):
    _VALID_URL = r'https?://(?:openload\.(?:co|io)|oload\.tv)/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'

    _TESTS = [{
        'url': 'https://openload.co/f/kUEfGclsU9o',
        'md5': 'bf1c059b004ebc7a256f89408e65c36e',
        'info_dict': {
            'id': 'kUEfGclsU9o',
            'ext': 'mp4',
            'title': 'skyrim_no-audio_1080.mp4',
            'thumbnail': r're:^https?://.*\.jpg$',
        },
    }, {
        'url': 'https://openload.co/embed/rjC09fkPLYs',
        'info_dict': {
            'id': 'rjC09fkPLYs',
            'ext': 'mp4',
            'title': 'movie.mp4',
            'thumbnail': r're:^https?://.*\.jpg$',
            'subtitles': {
                'en': [{
                    'ext': 'vtt',
                }],
            },
        },
        'params': {
            'skip_download': True,  # test subtitles only
        },
    }, {
        'url': 'https://openload.co/embed/kUEfGclsU9o/skyrim_no-audio_1080.mp4',
        'only_matching': True,
    }, {
        'url': 'https://openload.io/f/ZAn6oz-VZGE/',
        'only_matching': True,
    }, {
        'url': 'https://openload.co/f/_-ztPaZtMhM/',
        'only_matching': True,
    }, {
        # unavailable via https://openload.co/f/Sxz5sADo82g/, different layout
        # for title and ext
        'url': 'https://openload.co/embed/Sxz5sADo82g/',
        'only_matching': True,
    }, {
        'url': 'https://oload.tv/embed/KnG-kKZdcfY/',
        'only_matching': True,
    }]

    @staticmethod
    def _extract_urls(webpage):
        return re.findall(
            r'<iframe[^>]+src=["\']((?:https?://)?(?:openload\.(?:co|io)|oload\.tv)/embed/[a-zA-Z0-9-_]+)',
            webpage)

    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage('https://openload.co/embed/%s/' % video_id, video_id)

        if 'File not found' in webpage or 'deleted by the owner' in webpage:
            raise ExtractorError('File not found', expected=True)

        ol_id = self._search_regex(
            '<span[^>]+id="[^"]+"[^>]*>([0-9A-Za-z]+)</span>',
            webpage, 'openload ID')

        video_url_chars = []

        first_char = ord(ol_id[0])
        key = first_char - 50
        maxKey = max(2, key)
        key = min(maxKey, len(ol_id) - 22)
        t = ol_id[key:key + 20]

        hashMap = {}
        v = ol_id.replace(t, "")
        h = 0

        while h < len(t):
            f = t[h:h + 2]
            i = int(f, 16)
            hashMap[h / 2] = i
            h += 2

        h = 0

        while h < len(v):
            B = v[h:h + 2]
            i = int(B, 16)
            index = (h / 2) % 10
            A = hashMap[index]
            i = i ^ 137
            i = i ^ A
            video_url_chars.append(compat_chr(i))
            h += 2

        video_url = 'https://openload.co/stream/%s?mime=true'
        video_url = video_url % (''.join(video_url_chars))

        title = self._og_search_title(webpage, default=None) or self._search_regex(
            r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
            'title', default=None) or self._html_search_meta(
            'description', webpage, 'title', fatal=True)

        entries = self._parse_html5_media_entries(url, webpage, video_id)
        subtitles = entries[0]['subtitles'] if entries else None

        info_dict = {
            'id': video_id,
            'title': title,
            'thumbnail': self._og_search_thumbnail(webpage, default=None),
            'url': video_url,
            # Seems all videos have extensions in their titles
            'ext': determine_ext(title, 'mp4'),
            'subtitles': subtitles,
        }
        return info_dict
Commit	Line	Data
2bfeee69	1	# coding: utf-8
6c20a0bb	2	from __future__ import unicode_literals
95ad9ce5	3
17f8deeb S	4	import re
17f8deeb S	5
2bfeee69	6	from .common import InfoExtractor
6c20a0bb	7	from ..compat import compat_chr
9e3c2f1d	8	from ..utils import (
594b0c4c	9	determine_ext,
9e3c2f1d YCH	10	ExtractorError,
9e3c2f1d YCH	11	)
2bfeee69 YCH	12
	13
	14	class OpenloadIE(InfoExtractor):
5c32a5be	15	_VALID_URL = r'https?://(?:openload\.(?:co\|io)\|oload\.tv)/(?:f\|embed)/(?P<id>[a-zA-Z0-9-_]+)'
2bfeee69	16
9e3c2f1d	17	_TESTS = [{
2bfeee69 YCH	18	'url': 'https://openload.co/f/kUEfGclsU9o',
	19	'md5': 'bf1c059b004ebc7a256f89408e65c36e',
	20	'info_dict': {
	21	'id': 'kUEfGclsU9o',
	22	'ext': 'mp4',
	23	'title': 'skyrim_no-audio_1080.mp4',
ec85ded8	24	'thumbnail': r're:^https?://.*\.jpg$',
2bfeee69	25	},
0711995b YCH	26	}, {
	27	'url': 'https://openload.co/embed/rjC09fkPLYs',
	28	'info_dict': {
	29	'id': 'rjC09fkPLYs',
	30	'ext': 'mp4',
	31	'title': 'movie.mp4',
ec85ded8	32	'thumbnail': r're:^https?://.*\.jpg$',
0711995b YCH	33	'subtitles': {
	34	'en': [{
	35	'ext': 'vtt',
	36	}],
	37	},
	38	},
	39	'params': {
	40	'skip_download': True, # test subtitles only
	41	},
9e3c2f1d YCH	42	}, {
	43	'url': 'https://openload.co/embed/kUEfGclsU9o/skyrim_no-audio_1080.mp4',
	44	'only_matching': True,
	45	}, {
	46	'url': 'https://openload.io/f/ZAn6oz-VZGE/',
	47	'only_matching': True,
21efee5f N	48	}, {
	49	'url': 'https://openload.co/f/_-ztPaZtMhM/',
	50	'only_matching': True,
e9063b5d S	51	}, {
	52	# unavailable via https://openload.co/f/Sxz5sADo82g/, different layout
	53	# for title and ext
	54	'url': 'https://openload.co/embed/Sxz5sADo82g/',
	55	'only_matching': True,
5c32a5be YCH	56	}, {
	57	'url': 'https://oload.tv/embed/KnG-kKZdcfY/',
	58	'only_matching': True,
9e3c2f1d	59	}]
2bfeee69	60
17f8deeb S	61	@staticmethod
	62	def _extract_urls(webpage):
	63	return re.findall(
	64	r'<iframe[^>]+src=["\']((?:https?://)?(?:openload\.(?:co\|io)\|oload\.tv)/embed/[a-zA-Z0-9-_]+)',
	65	webpage)
	66
2bfeee69 YCH	67	def _real_extract(self, url):
2bfeee69 YCH	68	video_id = self._match_id(url)
18b62161	69	webpage = self._download_webpage('https://openload.co/embed/%s/' % video_id, video_id)
9e3c2f1d	70
18b62161	71	if 'File not found' in webpage or 'deleted by the owner' in webpage:
9e3c2f1d YCH	72	raise ExtractorError('File not found', expected=True)
9e3c2f1d YCH	73
6c20a0bb	74	ol_id = self._search_regex(
39e7277e	75	'<span[^>]+id="[^"]+"[^>]*>([0-9A-Za-z]+)</span>',
6c20a0bb	76	webpage, 'openload ID')
c1decda5	77
da92da4b	78	video_url_chars = []
	79
	80	first_char = ord(ol_id[0])
b08cc749	81	key = first_char - 50
da92da4b	82	maxKey = max(2, key)
b08cc749	83	key = min(maxKey, len(ol_id) - 22)
b08cc749	84	t = ol_id[key:key + 20]
da92da4b	85
	86	hashMap = {}
	87	v = ol_id.replace(t, "")
	88	h = 0
	89
	90	while h < len(t):
	91	f = t[h:h + 2]
	92	i = int(f, 16)
	93	hashMap[h / 2] = i
	94	h += 2
	95
	96	h = 0
	97
	98	while h < len(v):
	99	B = v[h:h + 2]
	100	i = int(B, 16)
b08cc749	101	index = (h / 2) % 10
da92da4b	102	A = hashMap[index]
b08cc749	103	i = i ^ 137
da92da4b	104	i = i ^ A
	105	video_url_chars.append(compat_chr(i))
	106	h += 2
	107
	108	video_url = 'https://openload.co/stream/%s?mime=true'
	109	video_url = video_url % (''.join(video_url_chars))
2bfeee69	110
a1394b82 S	111	title = self._og_search_title(webpage, default=None) or self._search_regex(
	112	r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
	113	'title', default=None) or self._html_search_meta(
	114	'description', webpage, 'title', fatal=True)
	115
0711995b YCH	116	entries = self._parse_html5_media_entries(url, webpage, video_id)
	117	subtitles = entries[0]['subtitles'] if entries else None
	118
	119	info_dict = {
2bfeee69	120	'id': video_id,
a1394b82	121	'title': title,
c83a3522	122	'thumbnail': self._og_search_thumbnail(webpage, default=None),
2bfeee69	123	'url': video_url,
c1decda5	124	# Seems all videos have extensions in their titles
c1fa3f46	125	'ext': determine_ext(title, 'mp4'),
0711995b	126	'subtitles': subtitles,
2bfeee69	127	}
0711995b	128	return info_dict