[yt-dlp.git] / youtube_dl / extractor / openload.py

# coding: utf-8
from __future__ import unicode_literals

from .common import InfoExtractor
from ..compat import compat_chr
from ..utils import (
    determine_ext,
    ExtractorError,
)


class OpenloadIE(InfoExtractor):
    _VALID_URL = r'https?://(?:openload\.(?:co|io)|oload\.tv)/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'

    _TESTS = [{
        'url': 'https://openload.co/f/kUEfGclsU9o',
        'md5': 'bf1c059b004ebc7a256f89408e65c36e',
        'info_dict': {
            'id': 'kUEfGclsU9o',
            'ext': 'mp4',
            'title': 'skyrim_no-audio_1080.mp4',
            'thumbnail': r're:^https?://.*\.jpg$',
        },
    }, {
        'url': 'https://openload.co/embed/rjC09fkPLYs',
        'info_dict': {
            'id': 'rjC09fkPLYs',
            'ext': 'mp4',
            'title': 'movie.mp4',
            'thumbnail': r're:^https?://.*\.jpg$',
            'subtitles': {
                'en': [{
                    'ext': 'vtt',
                }],
            },
        },
        'params': {
            'skip_download': True,  # test subtitles only
        },
    }, {
        'url': 'https://openload.co/embed/kUEfGclsU9o/skyrim_no-audio_1080.mp4',
        'only_matching': True,
    }, {
        'url': 'https://openload.io/f/ZAn6oz-VZGE/',
        'only_matching': True,
    }, {
        'url': 'https://openload.co/f/_-ztPaZtMhM/',
        'only_matching': True,
    }, {
        # unavailable via https://openload.co/f/Sxz5sADo82g/, different layout
        # for title and ext
        'url': 'https://openload.co/embed/Sxz5sADo82g/',
        'only_matching': True,
    }, {
        'url': 'https://oload.tv/embed/KnG-kKZdcfY/',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage('https://openload.co/embed/%s/' % video_id, video_id)

        if 'File not found' in webpage or 'deleted by the owner' in webpage:
            raise ExtractorError('File not found', expected=True)

        ol_id = self._search_regex(
            '<span[^>]+id="[a-zA-Z0-9]+x"[^>]*>([0-9]+)</span>',
            webpage, 'openload ID')

        first_two_chars = int(float(ol_id[0:][:2]))
        urlcode = ''
        num = 2

        while num < len(ol_id):
            urlcode += compat_chr(int(float(ol_id[num:][:3])) -
                                  first_two_chars * int(float(ol_id[num + 3:][:2])))
            num += 5

        video_url = 'https://openload.co/stream/' + urlcode

        title = self._og_search_title(webpage, default=None) or self._search_regex(
            r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
            'title', default=None) or self._html_search_meta(
            'description', webpage, 'title', fatal=True)

        entries = self._parse_html5_media_entries(url, webpage, video_id)
        subtitles = entries[0]['subtitles'] if entries else None

        info_dict = {
            'id': video_id,
            'title': title,
            'thumbnail': self._og_search_thumbnail(webpage, default=None),
            'url': video_url,
            # Seems all videos have extensions in their titles
            'ext': determine_ext(title),
            'subtitles': subtitles,
        }
        return info_dict
Commit	Line	Data
2bfeee69	1	# coding: utf-8
6c20a0bb	2	from __future__ import unicode_literals
95ad9ce5	3
2bfeee69	4	from .common import InfoExtractor
6c20a0bb	5	from ..compat import compat_chr
9e3c2f1d	6	from ..utils import (
594b0c4c	7	determine_ext,
9e3c2f1d YCH	8	ExtractorError,
9e3c2f1d YCH	9	)
2bfeee69 YCH	10
	11
	12	class OpenloadIE(InfoExtractor):
5c32a5be	13	_VALID_URL = r'https?://(?:openload\.(?:co\|io)\|oload\.tv)/(?:f\|embed)/(?P<id>[a-zA-Z0-9-_]+)'
2bfeee69	14
9e3c2f1d	15	_TESTS = [{
2bfeee69 YCH	16	'url': 'https://openload.co/f/kUEfGclsU9o',
	17	'md5': 'bf1c059b004ebc7a256f89408e65c36e',
	18	'info_dict': {
	19	'id': 'kUEfGclsU9o',
	20	'ext': 'mp4',
	21	'title': 'skyrim_no-audio_1080.mp4',
ec85ded8	22	'thumbnail': r're:^https?://.*\.jpg$',
2bfeee69	23	},
0711995b YCH	24	}, {
	25	'url': 'https://openload.co/embed/rjC09fkPLYs',
	26	'info_dict': {
	27	'id': 'rjC09fkPLYs',
	28	'ext': 'mp4',
	29	'title': 'movie.mp4',
ec85ded8	30	'thumbnail': r're:^https?://.*\.jpg$',
0711995b YCH	31	'subtitles': {
	32	'en': [{
	33	'ext': 'vtt',
	34	}],
	35	},
	36	},
	37	'params': {
	38	'skip_download': True, # test subtitles only
	39	},
9e3c2f1d YCH	40	}, {
	41	'url': 'https://openload.co/embed/kUEfGclsU9o/skyrim_no-audio_1080.mp4',
	42	'only_matching': True,
	43	}, {
	44	'url': 'https://openload.io/f/ZAn6oz-VZGE/',
	45	'only_matching': True,
21efee5f N	46	}, {
	47	'url': 'https://openload.co/f/_-ztPaZtMhM/',
	48	'only_matching': True,
e9063b5d S	49	}, {
	50	# unavailable via https://openload.co/f/Sxz5sADo82g/, different layout
	51	# for title and ext
	52	'url': 'https://openload.co/embed/Sxz5sADo82g/',
	53	'only_matching': True,
5c32a5be YCH	54	}, {
	55	'url': 'https://oload.tv/embed/KnG-kKZdcfY/',
	56	'only_matching': True,
9e3c2f1d	57	}]
2bfeee69	58
2bfeee69 YCH	59	def _real_extract(self, url):
2bfeee69 YCH	60	video_id = self._match_id(url)
18b62161	61	webpage = self._download_webpage('https://openload.co/embed/%s/' % video_id, video_id)
9e3c2f1d	62
18b62161	63	if 'File not found' in webpage or 'deleted by the owner' in webpage:
9e3c2f1d YCH	64	raise ExtractorError('File not found', expected=True)
9e3c2f1d YCH	65
6c20a0bb YCH	66	ol_id = self._search_regex(
	67	'<span[^>]+id="[a-zA-Z0-9]+x"[^>]*>([0-9]+)</span>',
	68	webpage, 'openload ID')
c1decda5	69
6c20a0bb YCH	70	first_two_chars = int(float(ol_id[0:][:2]))
	71	urlcode = ''
	72	num = 2
95ad9ce5	73
6c20a0bb YCH	74	while num < len(ol_id):
	75	urlcode += compat_chr(int(float(ol_id[num:][:3])) -
	76	first_two_chars * int(float(ol_id[num + 3:][:2])))
	77	num += 5
95ad9ce5	78
6c20a0bb	79	video_url = 'https://openload.co/stream/' + urlcode
2bfeee69	80
a1394b82 S	81	title = self._og_search_title(webpage, default=None) or self._search_regex(
	82	r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
	83	'title', default=None) or self._html_search_meta(
	84	'description', webpage, 'title', fatal=True)
	85
0711995b YCH	86	entries = self._parse_html5_media_entries(url, webpage, video_id)
	87	subtitles = entries[0]['subtitles'] if entries else None
	88
	89	info_dict = {
2bfeee69	90	'id': video_id,
a1394b82	91	'title': title,
c83a3522	92	'thumbnail': self._og_search_thumbnail(webpage, default=None),
2bfeee69	93	'url': video_url,
c1decda5 YCH	94	# Seems all videos have extensions in their titles
c1decda5 YCH	95	'ext': determine_ext(title),
0711995b	96	'subtitles': subtitles,
2bfeee69	97	}
0711995b	98	return info_dict