[yt-dlp.git] / yt_dlp / extractor / bigflix.py

import re

from .common import InfoExtractor
from ..compat import (
    compat_b64decode,
    compat_urllib_parse_unquote,
)


class BigflixIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?bigflix\.com/.+/(?P<id>[0-9]+)'
    _TESTS = [{
        # 2 formats
        'url': 'http://www.bigflix.com/Tamil-movies/Drama-movies/Madarasapatinam/16070',
        'info_dict': {
            'id': '16070',
            'ext': 'mp4',
            'title': 'Madarasapatinam',
            'description': 'md5:9f0470b26a4ba8e824c823b5d95c2f6b',
            'formats': 'mincount:2',
        },
        'params': {
            'skip_download': True,
        }
    }, {
        # multiple formats
        'url': 'http://www.bigflix.com/Malayalam-movies/Drama-movies/Indian-Rupee/15967',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)

        webpage = self._download_webpage(url, video_id)

        title = self._html_search_regex(
            r'<div[^>]+class=["\']pagetitle["\'][^>]*>(.+?)</div>',
            webpage, 'title')

        def decode_url(quoted_b64_url):
            return compat_b64decode(compat_urllib_parse_unquote(
                quoted_b64_url)).decode('utf-8')

        formats = []
        for height, encoded_url in re.findall(
                r'ContentURL_(\d{3,4})[pP][^=]+=([^&]+)', webpage):
            video_url = decode_url(encoded_url)
            f = {
                'url': video_url,
                'format_id': '%sp' % height,
                'height': int(height),
            }
            if video_url.startswith('rtmp'):
                f['ext'] = 'flv'
            formats.append(f)

        file_url = self._search_regex(
            r'file=([^&]+)', webpage, 'video url', default=None)
        if file_url:
            video_url = decode_url(file_url)
            if all(f['url'] != video_url for f in formats):
                formats.append({
                    'url': decode_url(file_url),
                })

        description = self._html_search_meta('description', webpage)

        return {
            'id': video_id,
            'title': title,
            'description': description,
            'formats': formats
        }
Commit	Line	Data
6e99d576	1	import re
0a899a14 VV	2
0a899a14 VV	3	from .common import InfoExtractor
cf282071 S	4	from ..compat import (
	5	compat_b64decode,
	6	compat_urllib_parse_unquote,
	7	)
0a899a14 VV	8
	9
	10	class BigflixIE(InfoExtractor):
6e99d576 S	11	_VALID_URL = r'https?://(?:www\.)?bigflix\.com/.+/(?P<id>[0-9]+)'
6e99d576 S	12	_TESTS = [{
a9bbd26f	13	# 2 formats
6e99d576 S	14	'url': 'http://www.bigflix.com/Tamil-movies/Drama-movies/Madarasapatinam/16070',
	15	'info_dict': {
	16	'id': '16070',
	17	'ext': 'mp4',
	18	'title': 'Madarasapatinam',
7f8b92e3	19	'description': 'md5:9f0470b26a4ba8e824c823b5d95c2f6b',
6e99d576 S	20	'formats': 'mincount:2',
	21	},
	22	'params': {
	23	'skip_download': True,
	24	}
a9bbd26f S	25	}, {
	26	# multiple formats
	27	'url': 'http://www.bigflix.com/Malayalam-movies/Drama-movies/Indian-Rupee/15967',
	28	'only_matching': True,
6e99d576	29	}]
0a899a14 VV	30
	31	def _real_extract(self, url):
	32	video_id = self._match_id(url)
	33
	34	webpage = self._download_webpage(url, video_id)
	35
	36	title = self._html_search_regex(
	37	r'<div[^>]+class=["\']pagetitle["\'][^>]*>(.+?)</div>',
	38	webpage, 'title')
	39
6e99d576	40	def decode_url(quoted_b64_url):
cf282071 S	41	return compat_b64decode(compat_urllib_parse_unquote(
cf282071 S	42	quoted_b64_url)).decode('utf-8')
a9bbd26f S	43
	44	formats = []
	45	for height, encoded_url in re.findall(
7e8a800f	46	r'ContentURL_(\d{3,4})[pP][^=]+=([^&]+)', webpage):
a9bbd26f S	47	video_url = decode_url(encoded_url)
	48	f = {
	49	'url': video_url,
	50	'format_id': '%sp' % height,
	51	'height': int(height),
	52	}
	53	if video_url.startswith('rtmp'):
	54	f['ext'] = 'flv'
	55	formats.append(f)
6e99d576	56
a9bbd26f S	57	file_url = self._search_regex(
	58	r'file=([^&]+)', webpage, 'video url', default=None)
	59	if file_url:
	60	video_url = decode_url(file_url)
	61	if all(f['url'] != video_url for f in formats):
	62	formats.append({
	63	'url': decode_url(file_url),
	64	})
6e99d576	65
0a899a14 VV	66	description = self._html_search_meta('description', webpage)
	67
	68	return {
	69	'id': video_id,
	70	'title': title,
0a899a14	71	'description': description,
6e99d576	72	'formats': formats
0a899a14	73	}