]> jfr.im git - yt-dlp.git/blob - youtube_dl/extractor/democracynow.py
[democracynow] Fix _TESTS
[yt-dlp.git] / youtube_dl / extractor / democracynow.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5 from .common import InfoExtractor
6
7
8 class DemocracynowIE(InfoExtractor):
9 _VALID_URL = r'https?://(?:www\.)?democracynow.org/?(?P<id>[^\?]*)'
10 IE_NAME = 'democracynow'
11 _TESTS = [{
12 'url': 'http://www.democracynow.org/shows/2015/7/3',
13 'info_dict': {
14 'id': '2015-0703-001',
15 'ext': 'mp4',
16 'title': 'July 03, 2015 - Democracy Now!',
17 'description': 'A daily independent global news hour with Amy Goodman & Juan Gonz\xe1lez "What to the Slave is 4th of July?": James Earl Jones Reads Frederick Douglass\u2019 Historic Speech : "This Flag Comes Down Today": Bree Newsome Scales SC Capitol Flagpole, Takes Down Confederate Flag : "We Shall Overcome": Remembering Folk Icon, Activist Pete Seeger in His Own Words & Songs',
18 'uploader': 'Democracy Now',
19 'upload_date': None,
20 },
21 }, {
22 'url': 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree',
23 'info_dict': {
24 'id': '2015-0703-001',
25 'ext': 'mp4',
26 'title': '"This Flag Comes Down Today": Bree Newsome Scales SC Capitol Flagpole, Takes Down Confederate Flag',
27 'description': 'md5:4d2bc4f0d29f5553c2210a4bc7761a21',
28 'uploader': 'Democracy Now',
29 'upload_date': None,
30 },
31 }]
32
33 def _real_extract(self, url):
34 display_id = self._match_id(url)
35 base_host = re.search(r'^(.+?://[^/]+)', url).group(1)
36 if display_id == '':
37 display_id = 'home'
38 webpage = self._download_webpage(url, display_id)
39 description = self._og_search_description(webpage)
40
41 jstr = self._search_regex(r'<script[^>]+type="text/json"[^>]*>\s*({[^>]+})', webpage, 'json')
42 js = self._parse_json(jstr, display_id)
43 video_id = None
44 formats = []
45 subtitles = {}
46 for key in ('caption_file', '.......'):
47 # ....... = pending vtt support that doesn't clobber srt 'chapter_file':
48 url = js.get(key, '')
49 if url == '' or url is None:
50 continue
51 if not re.match(r'^https?://', url):
52 url = base_host + url
53 ext = re.search(r'\.([^\.]+)$', url).group(1)
54 subtitles['eng'] = [{
55 'ext': ext,
56 'url': url,
57 }]
58 for key in ('file', 'audio', 'video'):
59 url = js.get(key, '')
60 if url == '' or url is None:
61 continue
62 if not re.match(r'^https?://', url):
63 url = base_host + url
64 purl = re.search(r'/(?P<dir>[^/]+)/(?:dn)?(?P<fn>[^/]+?)\.(?P<ext>[^\.\?]+)(?P<hasparams>\?|$)', url)
65 if video_id is None:
66 video_id = purl.group('fn')
67 if js.get('start') is not None:
68 url += '&' if purl.group('hasparams') == '?' else '?'
69 url = url + 'start=' + str(js.get('start'))
70 formats.append({
71 'format_id': purl.group('dir'),
72 'ext': purl.group('ext'),
73 'url': url,
74 })
75 self._sort_formats(formats)
76 ret = {
77 'id': video_id,
78 'title': js.get('title'),
79 'description': description,
80 'uploader': 'Democracy Now',
81 'subtitles': subtitles,
82 'formats': formats,
83 }
84 return ret