]>
Commit | Line | Data |
---|---|---|
8244288d | 1 | # coding: utf-8 |
e7916255 | 2 | from __future__ import unicode_literals |
8244288d | 3 | |
99afb3dd JMF |
4 | import re |
5 | ||
6 | from .common import InfoExtractor | |
c5f51551 | 7 | from ..compat import compat_str |
86916dae S |
8 | from ..utils import ( |
9 | ExtractorError, | |
10 | unified_strdate, | |
c5f51551 | 11 | HEADRequest, |
57ce8a6d | 12 | int_or_none, |
86916dae | 13 | ) |
99afb3dd JMF |
14 | |
15 | ||
16 | class WatIE(InfoExtractor): | |
c5f51551 | 17 | _VALID_URL = r'(?:wat:|https?://(?:www\.)?wat\.tv/video/.*-)(?P<id>[0-9a-z]+)' |
99afb3dd | 18 | IE_NAME = 'wat.tv' |
c28df247 S |
19 | _TESTS = [ |
20 | { | |
21 | 'url': 'http://www.wat.tv/video/soupe-figues-l-orange-aux-epices-6z1uz_2hvf7_.html', | |
c28df247 S |
22 | 'info_dict': { |
23 | 'id': '11713067', | |
c28df247 S |
24 | 'ext': 'mp4', |
25 | 'title': 'Soupe de figues à l\'orange et aux épices', | |
26 | 'description': 'Retrouvez l\'émission "Petits plats en équilibre", diffusée le 18 août 2014.', | |
27 | 'upload_date': '20140819', | |
28 | 'duration': 120, | |
29 | }, | |
0adf213d RA |
30 | 'params': { |
31 | # m3u8 download | |
32 | 'skip_download': True, | |
33 | }, | |
34 | 'expected_warnings': ['HTTP Error 404'], | |
c28df247 S |
35 | }, |
36 | { | |
37 | 'url': 'http://www.wat.tv/video/gregory-lemarchal-voix-ange-6z1v7_6ygkj_.html', | |
0adf213d | 38 | 'md5': 'b16574df2c3cd1a36ca0098f2a791925', |
c28df247 S |
39 | 'info_dict': { |
40 | 'id': '11713075', | |
c28df247 S |
41 | 'ext': 'mp4', |
42 | 'title': 'Grégory Lemarchal, une voix d\'ange depuis 10 ans (1/3)', | |
c28df247 | 43 | 'upload_date': '20140816', |
c28df247 | 44 | }, |
57ce8a6d | 45 | 'expected_warnings': ["Ce contenu n'est pas disponible pour l'instant."], |
fa800269 | 46 | }, |
c28df247 | 47 | ] |
e7916255 | 48 | |
57ce8a6d RA |
49 | _FORMATS = ( |
50 | (200, 416, 234), | |
51 | (400, 480, 270), | |
52 | (600, 640, 360), | |
53 | (1200, 640, 360), | |
54 | (1800, 960, 540), | |
55 | (2500, 1280, 720), | |
56 | ) | |
57 | ||
99afb3dd | 58 | def _real_extract(self, url): |
c5f51551 | 59 | video_id = self._match_id(url) |
60 | video_id = video_id if video_id.isdigit() and len(video_id) > 6 else compat_str(int(video_id, 36)) | |
8244288d | 61 | |
c5f51551 | 62 | # 'contentv4' is used in the website, but it also returns the related |
63 | # videos, we don't need them | |
57ce8a6d RA |
64 | video_data = self._download_json( |
65 | 'http://www.wat.tv/interface/contentv4s/' + video_id, video_id) | |
66 | video_info = video_data['media'] | |
a54bda3a | 67 | |
86916dae S |
68 | error_desc = video_info.get('error_desc') |
69 | if error_desc: | |
57ce8a6d RA |
70 | self.report_warning( |
71 | '%s returned error: %s' % (self.IE_NAME, error_desc)) | |
86916dae | 72 | |
8244288d | 73 | chapters = video_info['chapters'] |
57ce8a6d RA |
74 | if chapters: |
75 | first_chapter = chapters[0] | |
99afb3dd | 76 | |
57ce8a6d RA |
77 | def video_id_for_chapter(chapter): |
78 | return chapter['tc_start'].split('-')[0] | |
8244288d | 79 | |
57ce8a6d RA |
80 | if video_id_for_chapter(first_chapter) != video_id: |
81 | self.to_screen('Multipart video detected') | |
82 | entries = [self.url_result('wat:%s' % video_id_for_chapter(chapter)) for chapter in chapters] | |
83 | return self.playlist_result(entries, video_id, video_info['title']) | |
84 | # Otherwise we can continue and extract just one part, we have to use | |
85 | # the video id for getting the video url | |
86 | else: | |
87 | first_chapter = video_info | |
c5f51551 | 88 | |
57ce8a6d | 89 | title = first_chapter['title'] |
c5f51551 | 90 | |
91 | def extract_url(path_template, url_type): | |
92 | req_url = 'http://www.wat.tv/get/%s' % (path_template % video_id) | |
948cd5b7 RA |
93 | head = self._request_webpage(HEADRequest(req_url), video_id, 'Extracting %s url' % url_type, fatal=False) |
94 | if head: | |
95 | red_url = head.geturl() | |
96 | if req_url != red_url: | |
97 | return red_url | |
98 | return None | |
99 | ||
100 | def remove_bitrate_limit(manifest_url): | |
101 | return re.sub(r'(?:max|min)_bitrate=\d+&?', '', manifest_url) | |
c5f51551 | 102 | |
c5f51551 | 103 | formats = [] |
57ce8a6d | 104 | try: |
0adf213d | 105 | alt_urls = lambda manifest_url: [re.sub(r'(?:wdv|ssm)?\.ism/', repl + '.ism/', manifest_url) for repl in ('', 'ssm')] |
948cd5b7 RA |
106 | manifest_urls = self._download_json( |
107 | 'http://www.wat.tv/get/webhtml/' + video_id, video_id) | |
108 | m3u8_url = manifest_urls.get('hls') | |
109 | if m3u8_url: | |
110 | m3u8_url = remove_bitrate_limit(m3u8_url) | |
0adf213d RA |
111 | for m3u8_alt_url in alt_urls(m3u8_url): |
112 | formats.extend(self._extract_m3u8_formats( | |
113 | m3u8_alt_url, video_id, 'mp4', | |
114 | 'm3u8_native', m3u8_id='hls', fatal=False)) | |
948cd5b7 | 115 | formats.extend(self._extract_f4m_formats( |
0adf213d | 116 | m3u8_alt_url.replace('ios', 'web').replace('.m3u8', '.f4m'), |
948cd5b7 | 117 | video_id, f4m_id='hds', fatal=False)) |
948cd5b7 RA |
118 | mpd_url = manifest_urls.get('mpd') |
119 | if mpd_url: | |
0adf213d RA |
120 | mpd_url = remove_bitrate_limit(mpd_url) |
121 | for mpd_alt_url in alt_urls(mpd_url): | |
122 | formats.extend(self._extract_mpd_formats( | |
123 | mpd_alt_url, video_id, mpd_id='dash', fatal=False)) | |
57ce8a6d RA |
124 | self._sort_formats(formats) |
125 | except ExtractorError: | |
126 | abr = 64 | |
127 | for vbr, width, height in self._FORMATS: | |
128 | tbr = vbr + abr | |
129 | format_id = 'http-%s' % tbr | |
130 | fmt_url = 'http://dnl.adv.tf1.fr/2/USP-0x0/%s/%s/%s/ssm/%s-%s-64k.mp4' % (video_id[-4:-2], video_id[-2:], video_id, video_id, vbr) | |
131 | if self._is_valid_url(fmt_url, video_id, format_id): | |
132 | formats.append({ | |
133 | 'format_id': format_id, | |
134 | 'url': fmt_url, | |
135 | 'vbr': vbr, | |
136 | 'abr': abr, | |
137 | 'width': width, | |
138 | 'height': height, | |
139 | }) | |
140 | ||
141 | date_diffusion = first_chapter.get('date_diffusion') or video_data.get('configv4', {}).get('estatS4') | |
142 | upload_date = unified_strdate(date_diffusion) if date_diffusion else None | |
143 | duration = None | |
144 | files = video_info['files'] | |
145 | if files: | |
146 | duration = int_or_none(files[0].get('duration')) | |
a54bda3a | 147 | |
e7916255 | 148 | return { |
c5f51551 | 149 | 'id': video_id, |
57ce8a6d RA |
150 | 'title': title, |
151 | 'thumbnail': first_chapter.get('preview'), | |
152 | 'description': first_chapter.get('description'), | |
153 | 'view_count': int_or_none(video_info.get('views')), | |
e7916255 | 154 | 'upload_date': upload_date, |
57ce8a6d | 155 | 'duration': duration, |
a54bda3a | 156 | 'formats': formats, |
e7916255 | 157 | } |