]>
Commit | Line | Data |
---|---|---|
8244288d | 1 | # coding: utf-8 |
e7916255 | 2 | from __future__ import unicode_literals |
8244288d | 3 | |
99afb3dd | 4 | from .common import InfoExtractor |
c5f51551 | 5 | from ..compat import compat_str |
86916dae | 6 | from ..utils import ( |
86916dae | 7 | unified_strdate, |
c5f51551 | 8 | HEADRequest, |
57ce8a6d | 9 | int_or_none, |
86916dae | 10 | ) |
99afb3dd JMF |
11 | |
12 | ||
13 | class WatIE(InfoExtractor): | |
c5f51551 | 14 | _VALID_URL = r'(?:wat:|https?://(?:www\.)?wat\.tv/video/.*-)(?P<id>[0-9a-z]+)' |
99afb3dd | 15 | IE_NAME = 'wat.tv' |
c28df247 S |
16 | _TESTS = [ |
17 | { | |
18 | 'url': 'http://www.wat.tv/video/soupe-figues-l-orange-aux-epices-6z1uz_2hvf7_.html', | |
c28df247 S |
19 | 'info_dict': { |
20 | 'id': '11713067', | |
c28df247 S |
21 | 'ext': 'mp4', |
22 | 'title': 'Soupe de figues à l\'orange et aux épices', | |
23 | 'description': 'Retrouvez l\'émission "Petits plats en équilibre", diffusée le 18 août 2014.', | |
24 | 'upload_date': '20140819', | |
25 | 'duration': 120, | |
26 | }, | |
0adf213d RA |
27 | 'params': { |
28 | # m3u8 download | |
29 | 'skip_download': True, | |
30 | }, | |
31 | 'expected_warnings': ['HTTP Error 404'], | |
c28df247 S |
32 | }, |
33 | { | |
34 | 'url': 'http://www.wat.tv/video/gregory-lemarchal-voix-ange-6z1v7_6ygkj_.html', | |
0adf213d | 35 | 'md5': 'b16574df2c3cd1a36ca0098f2a791925', |
c28df247 S |
36 | 'info_dict': { |
37 | 'id': '11713075', | |
c28df247 S |
38 | 'ext': 'mp4', |
39 | 'title': 'Grégory Lemarchal, une voix d\'ange depuis 10 ans (1/3)', | |
c28df247 | 40 | 'upload_date': '20140816', |
c28df247 | 41 | }, |
57ce8a6d | 42 | 'expected_warnings': ["Ce contenu n'est pas disponible pour l'instant."], |
fa800269 | 43 | }, |
c28df247 | 44 | ] |
e7916255 | 45 | |
99afb3dd | 46 | def _real_extract(self, url): |
c5f51551 | 47 | video_id = self._match_id(url) |
48 | video_id = video_id if video_id.isdigit() and len(video_id) > 6 else compat_str(int(video_id, 36)) | |
8244288d | 49 | |
c5f51551 | 50 | # 'contentv4' is used in the website, but it also returns the related |
51 | # videos, we don't need them | |
57ce8a6d RA |
52 | video_data = self._download_json( |
53 | 'http://www.wat.tv/interface/contentv4s/' + video_id, video_id) | |
54 | video_info = video_data['media'] | |
a54bda3a | 55 | |
86916dae S |
56 | error_desc = video_info.get('error_desc') |
57 | if error_desc: | |
57ce8a6d RA |
58 | self.report_warning( |
59 | '%s returned error: %s' % (self.IE_NAME, error_desc)) | |
86916dae | 60 | |
8244288d | 61 | chapters = video_info['chapters'] |
57ce8a6d RA |
62 | if chapters: |
63 | first_chapter = chapters[0] | |
99afb3dd | 64 | |
57ce8a6d RA |
65 | def video_id_for_chapter(chapter): |
66 | return chapter['tc_start'].split('-')[0] | |
8244288d | 67 | |
57ce8a6d RA |
68 | if video_id_for_chapter(first_chapter) != video_id: |
69 | self.to_screen('Multipart video detected') | |
70 | entries = [self.url_result('wat:%s' % video_id_for_chapter(chapter)) for chapter in chapters] | |
71 | return self.playlist_result(entries, video_id, video_info['title']) | |
72 | # Otherwise we can continue and extract just one part, we have to use | |
73 | # the video id for getting the video url | |
74 | else: | |
75 | first_chapter = video_info | |
c5f51551 | 76 | |
57ce8a6d | 77 | title = first_chapter['title'] |
c5f51551 | 78 | |
79 | def extract_url(path_template, url_type): | |
80 | req_url = 'http://www.wat.tv/get/%s' % (path_template % video_id) | |
948cd5b7 RA |
81 | head = self._request_webpage(HEADRequest(req_url), video_id, 'Extracting %s url' % url_type, fatal=False) |
82 | if head: | |
83 | red_url = head.geturl() | |
84 | if req_url != red_url: | |
85 | return red_url | |
86 | return None | |
87 | ||
c5f51551 | 88 | formats = [] |
a820dc72 RA |
89 | manifest_urls = self._download_json( |
90 | 'http://www.wat.tv/get/webhtml/' + video_id, video_id) | |
91 | m3u8_url = manifest_urls.get('hls') | |
92 | if m3u8_url: | |
93 | formats.extend(self._extract_m3u8_formats( | |
94 | m3u8_url, video_id, 'mp4', | |
95 | 'm3u8_native', m3u8_id='hls', fatal=False)) | |
96 | mpd_url = manifest_urls.get('mpd') | |
97 | if mpd_url: | |
98 | formats.extend(self._extract_mpd_formats( | |
99 | mpd_url.replace('://das-q1.tf1.fr/', '://das-q1-ssl.tf1.fr/'), | |
100 | video_id, mpd_id='dash', fatal=False)) | |
101 | self._sort_formats(formats) | |
57ce8a6d RA |
102 | |
103 | date_diffusion = first_chapter.get('date_diffusion') or video_data.get('configv4', {}).get('estatS4') | |
104 | upload_date = unified_strdate(date_diffusion) if date_diffusion else None | |
105 | duration = None | |
106 | files = video_info['files'] | |
107 | if files: | |
108 | duration = int_or_none(files[0].get('duration')) | |
a54bda3a | 109 | |
e7916255 | 110 | return { |
c5f51551 | 111 | 'id': video_id, |
57ce8a6d RA |
112 | 'title': title, |
113 | 'thumbnail': first_chapter.get('preview'), | |
114 | 'description': first_chapter.get('description'), | |
115 | 'view_count': int_or_none(video_info.get('views')), | |
e7916255 | 116 | 'upload_date': upload_date, |
57ce8a6d | 117 | 'duration': duration, |
a54bda3a | 118 | 'formats': formats, |
e7916255 | 119 | } |