]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/comcarcoff.py
[comcarcoff] Adapt c62159ea91a04ef82560472b254aef1cc9f70a11
[yt-dlp.git] / youtube_dl / extractor / comcarcoff.py
CommitLineData
dc5596ff
PH
1# encoding: utf-8
2import json
3
4from .common import InfoExtractor
5from ..utils import parse_iso8601
6
7
8class ComCarCoffIE(InfoExtractor):
3c864e93 9 _VALID_URL = r'http://(?:www\.)?comediansincarsgettingcoffee\.com/(?P<id>[a-z0-9\-]*)'
dc5596ff
PH
10 _TESTS = [{
11 'url': 'http://comediansincarsgettingcoffee.com/miranda-sings-happy-thanksgiving-miranda/',
12 'info_dict': {
13 'id': 'miranda-sings-happy-thanksgiving-miranda',
14 'ext': 'mp4',
15 'upload_date': '20141127',
16 'timestamp': 1417107600,
17 'title': 'Happy Thanksgiving Miranda',
18 'description': 'Jerry Seinfeld and his special guest Miranda Sings cruise around town in search of coffee, complaining and apologizing along the way.',
19 'thumbnail': 'http://ccc.crackle.com/images/s5e4_thumb.jpg',
20 },
21 'params': {
22 'skip_download': 'requires ffmpeg',
23 }
24 }]
25
26 def _real_extract(self, url):
27 display_id = self._match_id(url)
3c864e93
PH
28 if not display_id:
29 display_id = 'comediansincarsgettingcoffee.com'
dc5596ff
PH
30 webpage = self._download_webpage(url, display_id)
31
32 full_data = json.loads(self._search_regex(
33 r'<script type="application/json" id="videoData">(?P<json>.+?)</script>',
34 webpage, 'full data json'))
35
36 video_id = full_data['activeVideo']['video']
37 video_data = full_data['videos'][video_id]
38 thumbnails = [{
39 'url': video_data['images']['thumb'],
40 }, {
41 'url': video_data['images']['poster'],
42 }]
43 formats = self._extract_m3u8_formats(
44 video_data['mediaUrl'], video_id, ext='mp4')
45
46 return {
47 'id': video_id,
48 'display_id': display_id,
49 'title': video_data['title'],
50 'description': video_data.get('description'),
51 'timestamp': parse_iso8601(video_data.get('pubDate')),
52 'thumbnails': thumbnails,
53 'formats': formats,
54 }