]>
Commit | Line | Data |
---|---|---|
8e7a9016 JJ |
1 | from __future__ import unicode_literals |
2 | ||
8e7a9016 JJ |
3 | import json |
4 | import re | |
5 | ||
8e2ec955 PH |
6 | from .common import InfoExtractor |
7 | from ..utils import ( | |
8 | int_or_none, | |
9 | parse_iso8601, | |
10 | ) | |
11 | ||
12 | ||
8e7a9016 JJ |
13 | class DRBonanzaIE(InfoExtractor): |
14 | _VALID_URL = r'https?://(?:www\.)?dr\.dk/bonanza/(?:[^/]+/)+(?:[^/])+?(?:assetId=(?P<id>\d+))?(?:[#&]|$)' | |
15 | ||
16 | _TESTS = [{ | |
17 | 'url': 'http://www.dr.dk/bonanza/serie/portraetter/Talkshowet.htm?assetId=65517', | |
8e7a9016 JJ |
18 | 'info_dict': { |
19 | 'id': '65517', | |
20 | 'ext': 'mp4', | |
21 | 'title': 'Talkshowet - Leonard Cohen', | |
22 | 'description': 'md5:8f34194fb30cd8c8a30ad8b27b70c0ca', | |
8e2ec955 | 23 | 'thumbnail': 're:^https?://.*\.(?:gif|jpg)$', |
8e7a9016 JJ |
24 | 'timestamp': 1295537932, |
25 | 'upload_date': '20110120', | |
8e2ec955 | 26 | 'duration': 3664, |
8e7a9016 | 27 | }, |
18b5e1e5 YCH |
28 | 'params': { |
29 | 'skip_download': True, # requires rtmp | |
30 | }, | |
8e2ec955 | 31 | }, { |
8e7a9016 JJ |
32 | 'url': 'http://www.dr.dk/bonanza/radio/serie/sport/fodbold.htm?assetId=59410', |
33 | 'md5': '6dfe039417e76795fb783c52da3de11d', | |
34 | 'info_dict': { | |
35 | 'id': '59410', | |
36 | 'ext': 'mp3', | |
37 | 'title': 'EM fodbold 1992 Danmark - Tyskland finale Transmission', | |
38 | 'description': 'md5:501e5a195749480552e214fbbed16c4e', | |
8e2ec955 | 39 | 'thumbnail': 're:^https?://.*\.(?:gif|jpg)$', |
8e7a9016 JJ |
40 | 'timestamp': 1223274900, |
41 | 'upload_date': '20081006', | |
8e2ec955 | 42 | 'duration': 7369, |
8e7a9016 JJ |
43 | }, |
44 | }] | |
45 | ||
46 | def _real_extract(self, url): | |
47 | url_id = self._match_id(url) | |
8e2ec955 PH |
48 | webpage = self._download_webpage(url, url_id) |
49 | ||
8e7a9016 | 50 | if url_id: |
8e2ec955 | 51 | info = json.loads(self._html_search_regex(r'({.*?%s.*})' % url_id, webpage, 'json')) |
8e7a9016 JJ |
52 | else: |
53 | # Just fetch the first video on that page | |
54 | info = json.loads(self._html_search_regex(r'bonanzaFunctions.newPlaylist\(({.*})\)', webpage, 'json')) | |
8e2ec955 | 55 | |
8e7a9016 JJ |
56 | asset_id = str(info['AssetId']) |
57 | title = info['Title'].rstrip(' \'\"-,.:;!?') | |
8e2ec955 PH |
58 | duration = int_or_none(info.get('Duration'), scale=1000) |
59 | # First published online. "FirstPublished" contains the date for original airing. | |
60 | timestamp = parse_iso8601( | |
61 | re.sub(r'\.\d+$', '', info['Created'])) | |
62 | ||
8e7a9016 JJ |
63 | def parse_filename_info(url): |
64 | match = re.search(r'/\d+_(?P<width>\d+)x(?P<height>\d+)x(?P<bitrate>\d+)K\.(?P<ext>\w+)$', url) | |
65 | if match: | |
8e2ec955 PH |
66 | return { |
67 | 'width': int(match.group('width')), | |
68 | 'height': int(match.group('height')), | |
69 | 'vbr': int(match.group('bitrate')), | |
70 | 'ext': match.group('ext') | |
71 | } | |
8e7a9016 JJ |
72 | match = re.search(r'/\d+_(?P<bitrate>\d+)K\.(?P<ext>\w+)$', url) |
73 | if match: | |
8e2ec955 PH |
74 | return { |
75 | 'vbr': int(match.group('bitrate')), | |
76 | 'ext': match.group(2) | |
77 | } | |
78 | return {} | |
79 | ||
8e7a9016 JJ |
80 | video_types = ['VideoHigh', 'VideoMid', 'VideoLow'] |
81 | preferencemap = { | |
82 | 'VideoHigh': -1, | |
83 | 'VideoMid': -2, | |
84 | 'VideoLow': -3, | |
85 | 'Audio': -4, | |
86 | } | |
8e2ec955 | 87 | |
8e7a9016 JJ |
88 | formats = [] |
89 | for file in info['Files']: | |
90 | if info['Type'] == "Video": | |
91 | if file['Type'] in video_types: | |
8e2ec955 PH |
92 | format = parse_filename_info(file['Location']) |
93 | format.update({ | |
8e7a9016 JJ |
94 | 'url': file['Location'], |
95 | 'format_id': file['Type'].replace('Video', ''), | |
96 | 'preference': preferencemap.get(file['Type'], -10), | |
8e7a9016 | 97 | }) |
18b5e1e5 YCH |
98 | if format['url'].startswith('rtmp'): |
99 | rtmp_url = format['url'] | |
100 | format['rtmp_live'] = True # --resume does not work | |
101 | if '/bonanza/' in rtmp_url: | |
102 | format['play_path'] = rtmp_url.split('/bonanza/')[1] | |
8e2ec955 | 103 | formats.append(format) |
8e7a9016 JJ |
104 | elif file['Type'] == "Thumb": |
105 | thumbnail = file['Location'] | |
106 | elif info['Type'] == "Audio": | |
107 | if file['Type'] == "Audio": | |
8e2ec955 PH |
108 | format = parse_filename_info(file['Location']) |
109 | format.update({ | |
8e7a9016 JJ |
110 | 'url': file['Location'], |
111 | 'format_id': file['Type'], | |
8e7a9016 JJ |
112 | 'vcodec': 'none', |
113 | }) | |
8e2ec955 | 114 | formats.append(format) |
8e7a9016 JJ |
115 | elif file['Type'] == "Thumb": |
116 | thumbnail = file['Location'] | |
8e2ec955 PH |
117 | |
118 | description = '%s\n%s\n%s\n' % ( | |
119 | info['Description'], info['Actors'], info['Colophon']) | |
8e7a9016 | 120 | |
8e7a9016 | 121 | self._sort_formats(formats) |
8e2ec955 | 122 | |
8e7a9016 JJ |
123 | display_id = re.sub(r'[^\w\d-]', '', re.sub(r' ', '-', title.lower())) + '-' + asset_id |
124 | display_id = re.sub(r'-+', '-', display_id) | |
8e2ec955 | 125 | |
8e7a9016 JJ |
126 | return { |
127 | 'id': asset_id, | |
128 | 'display_id': display_id, | |
129 | 'title': title, | |
130 | 'formats': formats, | |
131 | 'description': description, | |
132 | 'thumbnail': thumbnail, | |
133 | 'timestamp': timestamp, | |
134 | 'duration': duration, | |
135 | } |