]>
Commit | Line | Data |
---|---|---|
8244288d | 1 | # coding: utf-8 |
e7916255 | 2 | from __future__ import unicode_literals |
8244288d | 3 | |
99afb3dd | 4 | import re |
a54bda3a | 5 | import hashlib |
99afb3dd JMF |
6 | |
7 | from .common import InfoExtractor | |
86916dae S |
8 | from ..utils import ( |
9 | ExtractorError, | |
10 | unified_strdate, | |
11 | ) | |
99afb3dd JMF |
12 | |
13 | ||
14 | class WatIE(InfoExtractor): | |
a54bda3a | 15 | _VALID_URL = r'http://www\.wat\.tv/video/(?P<display_id>.*)-(?P<short_id>.*?)_.*?\.html' |
99afb3dd | 16 | IE_NAME = 'wat.tv' |
c28df247 S |
17 | _TESTS = [ |
18 | { | |
19 | 'url': 'http://www.wat.tv/video/soupe-figues-l-orange-aux-epices-6z1uz_2hvf7_.html', | |
20 | 'md5': 'ce70e9223945ed26a8056d413ca55dc9', | |
21 | 'info_dict': { | |
22 | 'id': '11713067', | |
23 | 'display_id': 'soupe-figues-l-orange-aux-epices', | |
24 | 'ext': 'mp4', | |
25 | 'title': 'Soupe de figues à l\'orange et aux épices', | |
26 | 'description': 'Retrouvez l\'émission "Petits plats en équilibre", diffusée le 18 août 2014.', | |
27 | 'upload_date': '20140819', | |
28 | 'duration': 120, | |
29 | }, | |
30 | }, | |
31 | { | |
32 | 'url': 'http://www.wat.tv/video/gregory-lemarchal-voix-ange-6z1v7_6ygkj_.html', | |
33 | 'md5': 'fbc84e4378165278e743956d9c1bf16b', | |
34 | 'info_dict': { | |
35 | 'id': '11713075', | |
36 | 'display_id': 'gregory-lemarchal-voix-ange', | |
37 | 'ext': 'mp4', | |
38 | 'title': 'Grégory Lemarchal, une voix d\'ange depuis 10 ans (1/3)', | |
39 | 'description': 'md5:b7a849cf16a2b733d9cd10c52906dee3', | |
40 | 'upload_date': '20140816', | |
41 | 'duration': 2910, | |
42 | }, | |
54e9a4af | 43 | 'skip': "Ce contenu n'est pas disponible pour l'instant.", |
fa800269 | 44 | }, |
c28df247 | 45 | ] |
e7916255 | 46 | |
8244288d JMF |
47 | def download_video_info(self, real_id): |
48 | # 'contentv4' is used in the website, but it also returns the related | |
49 | # videos, we don't need them | |
e7916255 | 50 | info = self._download_json('http://www.wat.tv/interface/contentv3/' + real_id, real_id) |
8244288d JMF |
51 | return info['media'] |
52 | ||
99afb3dd | 53 | def _real_extract(self, url): |
8244288d JMF |
54 | def real_id_for_chapter(chapter): |
55 | return chapter['tc_start'].split('-')[0] | |
99afb3dd | 56 | mobj = re.match(self._VALID_URL, url) |
a54bda3a S |
57 | short_id = mobj.group('short_id') |
58 | display_id = mobj.group('display_id') | |
59 | webpage = self._download_webpage(url, display_id or short_id) | |
8244288d JMF |
60 | real_id = self._search_regex(r'xtpage = ".*-(.*?)";', webpage, 'real id') |
61 | ||
62 | video_info = self.download_video_info(real_id) | |
a54bda3a | 63 | |
86916dae S |
64 | error_desc = video_info.get('error_desc') |
65 | if error_desc: | |
66 | raise ExtractorError( | |
67 | '%s returned error: %s' % (self.IE_NAME, error_desc), expected=True) | |
68 | ||
c28df247 S |
69 | geo_list = video_info.get('geoList') |
70 | country = geo_list[0] if geo_list else '' | |
a54bda3a | 71 | |
8244288d JMF |
72 | chapters = video_info['chapters'] |
73 | first_chapter = chapters[0] | |
a54bda3a S |
74 | files = video_info['files'] |
75 | first_file = files[0] | |
99afb3dd | 76 | |
8244288d JMF |
77 | if real_id_for_chapter(first_chapter) != real_id: |
78 | self.to_screen('Multipart video detected') | |
79 | chapter_urls = [] | |
80 | for chapter in chapters: | |
81 | chapter_id = real_id_for_chapter(chapter) | |
82 | # Yes, when we this chapter is processed by WatIE, | |
83 | # it will download the info again | |
84 | chapter_info = self.download_video_info(chapter_id) | |
85 | chapter_urls.append(chapter_info['url']) | |
86 | entries = [self.url_result(chapter_url) for chapter_url in chapter_urls] | |
87 | return self.playlist_result(entries, real_id, video_info['title']) | |
88 | ||
e7916255 JMF |
89 | upload_date = None |
90 | if 'date_diffusion' in first_chapter: | |
91 | upload_date = unified_strdate(first_chapter['date_diffusion']) | |
8244288d JMF |
92 | # Otherwise we can continue and extract just one part, we have to use |
93 | # the short id for getting the video url | |
a54bda3a S |
94 | |
95 | formats = [{ | |
96 | 'url': 'http://wat.tv/get/android5/%s.mp4' % real_id, | |
97 | 'format_id': 'Mobile', | |
98 | }] | |
99 | ||
100 | fmts = [('SD', 'web')] | |
101 | if first_file.get('hasHD'): | |
102 | fmts.append(('HD', 'webhd')) | |
103 | ||
104 | def compute_token(param): | |
c28df247 S |
105 | timestamp = '%08x' % int(self._download_webpage( |
106 | 'http://www.wat.tv/servertime', real_id, | |
107 | 'Downloading server time').split('|')[0]) | |
a54bda3a S |
108 | magic = '9b673b13fa4682ed14c3cfa5af5310274b514c4133e9b3a81e6e3aba009l2564' |
109 | return '%s/%s' % (hashlib.md5((magic + param + timestamp).encode('ascii')).hexdigest(), timestamp) | |
110 | ||
111 | for fmt in fmts: | |
112 | webid = '/%s/%s' % (fmt[1], real_id) | |
113 | video_url = self._download_webpage( | |
c28df247 | 114 | 'http://www.wat.tv/get%s?token=%s&getURL=1&country=%s' % (webid, compute_token(webid), country), |
a54bda3a | 115 | real_id, |
8f4e8bf2 | 116 | 'Downloading %s video URL' % fmt[0], |
a54bda3a S |
117 | 'Failed to download %s video URL' % fmt[0], |
118 | False) | |
119 | if not video_url: | |
120 | continue | |
121 | formats.append({ | |
122 | 'url': video_url, | |
123 | 'ext': 'mp4', | |
124 | 'format_id': fmt[0], | |
125 | }) | |
126 | ||
e7916255 JMF |
127 | return { |
128 | 'id': real_id, | |
a54bda3a | 129 | 'display_id': display_id, |
e7916255 JMF |
130 | 'title': first_chapter['title'], |
131 | 'thumbnail': first_chapter['preview'], | |
132 | 'description': first_chapter['description'], | |
133 | 'view_count': video_info['views'], | |
134 | 'upload_date': upload_date, | |
a54bda3a S |
135 | 'duration': first_file['duration'], |
136 | 'formats': formats, | |
e7916255 | 137 | } |