]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/cammodels.py
[cammodels] Add extractor
[yt-dlp.git] / youtube_dl / extractor / cammodels.py
CommitLineData
2a49d019 1from __future__ import unicode_literals
2from .common import InfoExtractor
3from .common import ExtractorError
4import json
5import re
6from ..utils import int_or_none
7
8
9class CamModelsIE(InfoExtractor):
10 _VALID_URL = r'https?://(?:www\.)?cammodels\.com/cam/(?P<id>\w+)'
11 _HEADERS = {
12 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36'
13 # Needed because server doesn't return links to video URLs if a browser-like User-Agent is not used
14 }
15
16 def _real_extract(self, url):
17 video_id = self._match_id(url)
18 webpage = self._download_webpage(
19 url,
20 video_id,
21 headers=self._HEADERS)
22 manifest_url_root = self._html_search_regex(
23 r'manifestUrlRoot=(?P<id>https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*))',
24 webpage,
25 'manifest',
26 None,
27 False)
28 if not manifest_url_root:
29 offline = self._html_search_regex(
30 r'(?P<id>I\'m offline, but let\'s stay connected!)',
31 webpage,
32 'offline indicator',
33 None,
34 False)
35 private = self._html_search_regex(
36 r'(?P<id>I’m in a private show right now)',
37 webpage,
38 'private show indicator',
39 None,
40 False)
41 err = 'This user is currently offline, so nothing can be downloaded.' if offline \
42 else 'This user is doing a private show, which requires payment. This extractor currently does not support private streams.' if private \
43 else 'Unable to find link to stream info on webpage. Room is not offline, so something else is wrong.'
44 raise ExtractorError(
45 err,
46 expected=True if offline or private else False,
47 video_id=video_id
48 )
49 manifest_url = manifest_url_root + video_id + '.json'
50 manifest = self._download_json(
51 manifest_url,
52 video_id,
53 'Downloading links to streams.',
54 'Link to stream URLs was found, but we couldn\'t access it.',
55 headers=self._HEADERS)
56 try:
57 formats = []
58 for fmtName in ['mp4-rtmp', 'mp4-hls']:
59 for encoding in manifest['formats'][fmtName]['encodings']:
60 formats.append({
61 'ext': 'mp4',
62 'url': encoding['location'],
63 'width': int_or_none(encoding.get('videoWidth')),
64 'height': int_or_none(encoding.get('videoHeight')),
65 'vbr': int_or_none(encoding.get('videoKbps')),
66 'abr': int_or_none(encoding.get('audioKbps')),
67 'format_id': fmtName + str(encoding.get('videoWidth'))
68 })
69 # If they change the JSON format, then fallback to parsing out RTMP links via regex.
70 except KeyError:
71 manifest_json = json.dumps(manifest)
72 manifest_links = re.finditer(
73 r'(?P<id>rtmp?:\/\/[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#&//=]*))',
74 manifest_json)
75 if not manifest_links:
76 raise ExtractorError(
77 'Link to stream info was found, but we couldn\'t read the response. This is probably a bug.',
78 expected=False,
79 video_id=video_id)
80 formats = []
81 for manifest_link in manifest_links:
82 url = manifest_link.group('id')
83 formats.append({
84 'ext': 'mp4',
85 'url': url,
86 'format_id': url.split(sep='/')[-1]
87 })
88 self._sort_formats(formats)
89 return {
90 'id': video_id,
91 'title': self._live_title(video_id),
92 'formats': formats
93 }