[yt-dlp.git] / youtube_dl / extractor / cammodels.py

from __future__ import unicode_literals
from .common import InfoExtractor
from .common import ExtractorError
import json
import re
from ..utils import int_or_none


class CamModelsIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?cammodels\.com/cam/(?P<id>\w+)'
    _HEADERS = {
        'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36'
        # Needed because server doesn't return links to video URLs if a browser-like User-Agent is not used
    }

    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(
            url,
            video_id,
            headers=self._HEADERS)
        manifest_url_root = self._html_search_regex(
            r'manifestUrlRoot=(?P<id>https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*))',
            webpage,
            'manifest',
            None,
            False)
        if not manifest_url_root:
            offline = self._html_search_regex(
                r'(?P<id>I\'m offline, but let\'s stay connected!)',
                webpage,
                'offline indicator',
                None,
                False)
            private = self._html_search_regex(
                r'(?P<id>I’m in a private show right now)',
                webpage,
                'private show indicator',
                None,
                False)
            err = 'This user is currently offline, so nothing can be downloaded.' if offline \
                else 'This user is doing a private show, which requires payment. This extractor currently does not support private streams.' if private \
                else 'Unable to find link to stream info on webpage. Room is not offline, so something else is wrong.'
            raise ExtractorError(
                err,
                expected=True if offline or private else False,
                video_id=video_id
            )
        manifest_url = manifest_url_root + video_id + '.json'
        manifest = self._download_json(
            manifest_url,
            video_id,
            'Downloading links to streams.',
            'Link to stream URLs was found, but we couldn\'t access it.',
            headers=self._HEADERS)
        try:
            formats = []
            for fmtName in ['mp4-rtmp', 'mp4-hls']:
                for encoding in manifest['formats'][fmtName]['encodings']:
                    formats.append({
                        'ext': 'mp4',
                        'url': encoding['location'],
                        'width': int_or_none(encoding.get('videoWidth')),
                        'height': int_or_none(encoding.get('videoHeight')),
                        'vbr': int_or_none(encoding.get('videoKbps')),
                        'abr': int_or_none(encoding.get('audioKbps')),
                        'format_id': fmtName + str(encoding.get('videoWidth'))
                    })
        # If they change the JSON format, then fallback to parsing out RTMP links via regex.
        except KeyError:
            manifest_json = json.dumps(manifest)
            manifest_links = re.finditer(
                r'(?P<id>rtmp?:\/\/[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#&//=]*))',
                manifest_json)
            if not manifest_links:
                raise ExtractorError(
                    'Link to stream info was found, but we couldn\'t read the response. This is probably a bug.',
                    expected=False,
                    video_id=video_id)
            formats = []
            for manifest_link in manifest_links:
                url = manifest_link.group('id')
                formats.append({
                    'ext': 'mp4',
                    'url': url,
                    'format_id': url.split(sep='/')[-1]
                })
        self._sort_formats(formats)
        return {
            'id': video_id,
            'title': self._live_title(video_id),
            'formats': formats
        }
Commit	Line	Data
2a49d019	1	from __future__ import unicode_literals
	2	from .common import InfoExtractor
	3	from .common import ExtractorError
	4	import json
	5	import re
	6	from ..utils import int_or_none
	7
	8
	9	class CamModelsIE(InfoExtractor):
	10	_VALID_URL = r'https?://(?:www\.)?cammodels\.com/cam/(?P<id>\w+)'
	11	_HEADERS = {
	12	'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36'
	13	# Needed because server doesn't return links to video URLs if a browser-like User-Agent is not used
	14	}
	15
	16	def _real_extract(self, url):
	17	video_id = self._match_id(url)
	18	webpage = self._download_webpage(
	19	url,
	20	video_id,
	21	headers=self._HEADERS)
	22	manifest_url_root = self._html_search_regex(
	23	r'manifestUrlRoot=(?P<id>https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*))',
	24	webpage,
	25	'manifest',
	26	None,
	27	False)
	28	if not manifest_url_root:
	29	offline = self._html_search_regex(
	30	r'(?P<id>I\'m offline, but let\'s stay connected!)',
	31	webpage,
	32	'offline indicator',
	33	None,
	34	False)
	35	private = self._html_search_regex(
	36	r'(?P<id>I’m in a private show right now)',
	37	webpage,
	38	'private show indicator',
	39	None,
	40	False)
	41	err = 'This user is currently offline, so nothing can be downloaded.' if offline \
	42	else 'This user is doing a private show, which requires payment. This extractor currently does not support private streams.' if private \
	43	else 'Unable to find link to stream info on webpage. Room is not offline, so something else is wrong.'
	44	raise ExtractorError(
	45	err,
	46	expected=True if offline or private else False,
	47	video_id=video_id
	48	)
	49	manifest_url = manifest_url_root + video_id + '.json'
	50	manifest = self._download_json(
	51	manifest_url,
	52	video_id,
	53	'Downloading links to streams.',
	54	'Link to stream URLs was found, but we couldn\'t access it.',
	55	headers=self._HEADERS)
	56	try:
	57	formats = []
	58	for fmtName in ['mp4-rtmp', 'mp4-hls']:
	59	for encoding in manifest['formats'][fmtName]['encodings']:
	60	formats.append({
	61	'ext': 'mp4',
	62	'url': encoding['location'],
	63	'width': int_or_none(encoding.get('videoWidth')),
	64	'height': int_or_none(encoding.get('videoHeight')),
65	'vbr': int_or_none(encoding.get('videoKbps')),
66	'abr': int_or_none(encoding.get('audioKbps')),
67	'format_id': fmtName + str(encoding.get('videoWidth'))
68	})
69	# If they change the JSON format, then fallback to parsing out RTMP links via regex.
70	except KeyError:
71	manifest_json = json.dumps(manifest)
72	manifest_links = re.finditer(
73	r'(?P<id>rtmp?:\/\/[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#&//=]*))',
74	manifest_json)
75	if not manifest_links:
76	raise ExtractorError(
77	'Link to stream info was found, but we couldn\'t read the response. This is probably a bug.',
78	expected=False,
79	video_id=video_id)
80	formats = []
81	for manifest_link in manifest_links:
82	url = manifest_link.group('id')
83	formats.append({
84	'ext': 'mp4',
85	'url': url,
86	'format_id': url.split(sep='/')[-1]
87	})
88	self._sort_formats(formats)
89	return {
90	'id': video_id,
91	'title': self._live_title(video_id),
92	'formats': formats
93	}