[yt-dlp.git] / yt_dlp / extractor / bigflix.py

import base64
import re
import urllib.parse

from .common import InfoExtractor


class BigflixIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?bigflix\.com/.+/(?P<id>[0-9]+)'
    _TESTS = [{
        # 2 formats
        'url': 'http://www.bigflix.com/Tamil-movies/Drama-movies/Madarasapatinam/16070',
        'info_dict': {
            'id': '16070',
            'ext': 'mp4',
            'title': 'Madarasapatinam',
            'description': 'md5:9f0470b26a4ba8e824c823b5d95c2f6b',
            'formats': 'mincount:2',
        },
        'params': {
            'skip_download': True,
        },
    }, {
        # multiple formats
        'url': 'http://www.bigflix.com/Malayalam-movies/Drama-movies/Indian-Rupee/15967',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)

        webpage = self._download_webpage(url, video_id)

        title = self._html_search_regex(
            r'<div[^>]+class=["\']pagetitle["\'][^>]*>(.+?)</div>',
            webpage, 'title')

        def decode_url(quoted_b64_url):
            return base64.b64decode(urllib.parse.unquote(
                quoted_b64_url)).decode('utf-8')

        formats = []
        for height, encoded_url in re.findall(
                r'ContentURL_(\d{3,4})[pP][^=]+=([^&]+)', webpage):
            video_url = decode_url(encoded_url)
            f = {
                'url': video_url,
                'format_id': f'{height}p',
                'height': int(height),
            }
            if video_url.startswith('rtmp'):
                f['ext'] = 'flv'
            formats.append(f)

        file_url = self._search_regex(
            r'file=([^&]+)', webpage, 'video url', default=None)
        if file_url:
            video_url = decode_url(file_url)
            if all(f['url'] != video_url for f in formats):
                formats.append({
                    'url': decode_url(file_url),
                })

        description = self._html_search_meta('description', webpage)

        return {
            'id': video_id,
            'title': title,
            'description': description,
            'formats': formats,
        }
Commit	Line	Data
add96eb9	1	import base64
6e99d576	2	import re
add96eb9	3	import urllib.parse
0a899a14 VV	4
0a899a14 VV	5	from .common import InfoExtractor
0a899a14 VV	6
	7
	8	class BigflixIE(InfoExtractor):
6e99d576 S	9	_VALID_URL = r'https?://(?:www\.)?bigflix\.com/.+/(?P<id>[0-9]+)'
6e99d576 S	10	_TESTS = [{
a9bbd26f	11	# 2 formats
6e99d576 S	12	'url': 'http://www.bigflix.com/Tamil-movies/Drama-movies/Madarasapatinam/16070',
	13	'info_dict': {
	14	'id': '16070',
	15	'ext': 'mp4',
	16	'title': 'Madarasapatinam',
7f8b92e3	17	'description': 'md5:9f0470b26a4ba8e824c823b5d95c2f6b',
6e99d576 S	18	'formats': 'mincount:2',
	19	},
	20	'params': {
	21	'skip_download': True,
add96eb9	22	},
a9bbd26f S	23	}, {
	24	# multiple formats
	25	'url': 'http://www.bigflix.com/Malayalam-movies/Drama-movies/Indian-Rupee/15967',
	26	'only_matching': True,
6e99d576	27	}]
0a899a14 VV	28
	29	def _real_extract(self, url):
	30	video_id = self._match_id(url)
	31
	32	webpage = self._download_webpage(url, video_id)
	33
	34	title = self._html_search_regex(
	35	r'<div[^>]+class=["\']pagetitle["\'][^>]*>(.+?)</div>',
	36	webpage, 'title')
	37
6e99d576	38	def decode_url(quoted_b64_url):
add96eb9	39	return base64.b64decode(urllib.parse.unquote(
cf282071	40	quoted_b64_url)).decode('utf-8')
a9bbd26f S	41
	42	formats = []
	43	for height, encoded_url in re.findall(
7e8a800f	44	r'ContentURL_(\d{3,4})[pP][^=]+=([^&]+)', webpage):
a9bbd26f S	45	video_url = decode_url(encoded_url)
	46	f = {
	47	'url': video_url,
add96eb9	48	'format_id': f'{height}p',
a9bbd26f S	49	'height': int(height),
	50	}
	51	if video_url.startswith('rtmp'):
	52	f['ext'] = 'flv'
	53	formats.append(f)
6e99d576	54
a9bbd26f S	55	file_url = self._search_regex(
	56	r'file=([^&]+)', webpage, 'video url', default=None)
	57	if file_url:
	58	video_url = decode_url(file_url)
	59	if all(f['url'] != video_url for f in formats):
	60	formats.append({
	61	'url': decode_url(file_url),
	62	})
6e99d576	63
0a899a14 VV	64	description = self._html_search_meta('description', webpage)
	65
	66	return {
	67	'id': video_id,
	68	'title': title,
0a899a14	69	'description': description,
add96eb9	70	'formats': formats,
0a899a14	71	}