[yt-dlp.git] / youtube_dl / extractor / bigflix.py

# coding: utf-8
from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..compat import (
    compat_b64decode,
    compat_urllib_parse_unquote,
)


class BigflixIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?bigflix\.com/.+/(?P<id>[0-9]+)'
    _TESTS = [{
        # 2 formats
        'url': 'http://www.bigflix.com/Tamil-movies/Drama-movies/Madarasapatinam/16070',
        'info_dict': {
            'id': '16070',
            'ext': 'mp4',
            'title': 'Madarasapatinam',
            'description': 'md5:9f0470b26a4ba8e824c823b5d95c2f6b',
            'formats': 'mincount:2',
        },
        'params': {
            'skip_download': True,
        }
    }, {
        # multiple formats
        'url': 'http://www.bigflix.com/Malayalam-movies/Drama-movies/Indian-Rupee/15967',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)

        webpage = self._download_webpage(url, video_id)

        title = self._html_search_regex(
            r'<div[^>]+class=["\']pagetitle["\'][^>]*>(.+?)</div>',
            webpage, 'title')

        def decode_url(quoted_b64_url):
            return compat_b64decode(compat_urllib_parse_unquote(
                quoted_b64_url)).decode('utf-8')

        formats = []
        for height, encoded_url in re.findall(
                r'ContentURL_(\d{3,4})[pP][^=]+=([^&]+)', webpage):
            video_url = decode_url(encoded_url)
            f = {
                'url': video_url,
                'format_id': '%sp' % height,
                'height': int(height),
            }
            if video_url.startswith('rtmp'):
                f['ext'] = 'flv'
            formats.append(f)

        file_url = self._search_regex(
            r'file=([^&]+)', webpage, 'video url', default=None)
        if file_url:
            video_url = decode_url(file_url)
            if all(f['url'] != video_url for f in formats):
                formats.append({
                    'url': decode_url(file_url),
                })

        self._sort_formats(formats)

        description = self._html_search_meta('description', webpage)

        return {
            'id': video_id,
            'title': title,
            'description': description,
            'formats': formats
        }
Commit	Line	Data
0a899a14 VV	1	# coding: utf-8
	2	from __future__ import unicode_literals
	3
6e99d576	4	import re
0a899a14 VV	5
0a899a14 VV	6	from .common import InfoExtractor
cf282071 S	7	from ..compat import (
	8	compat_b64decode,
	9	compat_urllib_parse_unquote,
	10	)
0a899a14 VV	11
	12
	13	class BigflixIE(InfoExtractor):
6e99d576 S	14	_VALID_URL = r'https?://(?:www\.)?bigflix\.com/.+/(?P<id>[0-9]+)'
6e99d576 S	15	_TESTS = [{
a9bbd26f	16	# 2 formats
6e99d576 S	17	'url': 'http://www.bigflix.com/Tamil-movies/Drama-movies/Madarasapatinam/16070',
	18	'info_dict': {
	19	'id': '16070',
	20	'ext': 'mp4',
	21	'title': 'Madarasapatinam',
7f8b92e3	22	'description': 'md5:9f0470b26a4ba8e824c823b5d95c2f6b',
6e99d576 S	23	'formats': 'mincount:2',
	24	},
	25	'params': {
	26	'skip_download': True,
	27	}
a9bbd26f S	28	}, {
	29	# multiple formats
	30	'url': 'http://www.bigflix.com/Malayalam-movies/Drama-movies/Indian-Rupee/15967',
	31	'only_matching': True,
6e99d576	32	}]
0a899a14 VV	33
	34	def _real_extract(self, url):
	35	video_id = self._match_id(url)
	36
	37	webpage = self._download_webpage(url, video_id)
	38
	39	title = self._html_search_regex(
	40	r'<div[^>]+class=["\']pagetitle["\'][^>]*>(.+?)</div>',
	41	webpage, 'title')
	42
6e99d576	43	def decode_url(quoted_b64_url):
cf282071 S	44	return compat_b64decode(compat_urllib_parse_unquote(
cf282071 S	45	quoted_b64_url)).decode('utf-8')
a9bbd26f S	46
	47	formats = []
	48	for height, encoded_url in re.findall(
7e8a800f	49	r'ContentURL_(\d{3,4})[pP][^=]+=([^&]+)', webpage):
a9bbd26f S	50	video_url = decode_url(encoded_url)
	51	f = {
	52	'url': video_url,
	53	'format_id': '%sp' % height,
	54	'height': int(height),
	55	}
	56	if video_url.startswith('rtmp'):
	57	f['ext'] = 'flv'
	58	formats.append(f)
6e99d576	59
a9bbd26f S	60	file_url = self._search_regex(
	61	r'file=([^&]+)', webpage, 'video url', default=None)
	62	if file_url:
	63	video_url = decode_url(file_url)
	64	if all(f['url'] != video_url for f in formats):
	65	formats.append({
	66	'url': decode_url(file_url),
	67	})
6e99d576	68
a9bbd26f	69	self._sort_formats(formats)
0a899a14 VV	70
	71	description = self._html_search_meta('description', webpage)
	72
	73	return {
	74	'id': video_id,
	75	'title': title,
0a899a14	76	'description': description,
6e99d576	77	'formats': formats
0a899a14	78	}