]>
Commit | Line | Data |
---|---|---|
8244288d | 1 | # coding: utf-8 |
e7916255 | 2 | from __future__ import unicode_literals |
8244288d | 3 | |
99afb3dd | 4 | import re |
a54bda3a | 5 | import hashlib |
99afb3dd JMF |
6 | |
7 | from .common import InfoExtractor | |
c28df247 | 8 | from ..utils import unified_strdate |
99afb3dd JMF |
9 | |
10 | ||
11 | class WatIE(InfoExtractor): | |
a54bda3a | 12 | _VALID_URL = r'http://www\.wat\.tv/video/(?P<display_id>.*)-(?P<short_id>.*?)_.*?\.html' |
99afb3dd | 13 | IE_NAME = 'wat.tv' |
c28df247 S |
14 | _TESTS = [ |
15 | { | |
16 | 'url': 'http://www.wat.tv/video/soupe-figues-l-orange-aux-epices-6z1uz_2hvf7_.html', | |
17 | 'md5': 'ce70e9223945ed26a8056d413ca55dc9', | |
18 | 'info_dict': { | |
19 | 'id': '11713067', | |
20 | 'display_id': 'soupe-figues-l-orange-aux-epices', | |
21 | 'ext': 'mp4', | |
22 | 'title': 'Soupe de figues à l\'orange et aux épices', | |
23 | 'description': 'Retrouvez l\'émission "Petits plats en équilibre", diffusée le 18 août 2014.', | |
24 | 'upload_date': '20140819', | |
25 | 'duration': 120, | |
26 | }, | |
27 | }, | |
28 | { | |
29 | 'url': 'http://www.wat.tv/video/gregory-lemarchal-voix-ange-6z1v7_6ygkj_.html', | |
30 | 'md5': 'fbc84e4378165278e743956d9c1bf16b', | |
31 | 'info_dict': { | |
32 | 'id': '11713075', | |
33 | 'display_id': 'gregory-lemarchal-voix-ange', | |
34 | 'ext': 'mp4', | |
35 | 'title': 'Grégory Lemarchal, une voix d\'ange depuis 10 ans (1/3)', | |
36 | 'description': 'md5:b7a849cf16a2b733d9cd10c52906dee3', | |
37 | 'upload_date': '20140816', | |
38 | 'duration': 2910, | |
39 | }, | |
fa800269 | 40 | }, |
c28df247 | 41 | ] |
e7916255 | 42 | |
8244288d JMF |
43 | def download_video_info(self, real_id): |
44 | # 'contentv4' is used in the website, but it also returns the related | |
45 | # videos, we don't need them | |
e7916255 | 46 | info = self._download_json('http://www.wat.tv/interface/contentv3/' + real_id, real_id) |
8244288d JMF |
47 | return info['media'] |
48 | ||
99afb3dd | 49 | def _real_extract(self, url): |
8244288d JMF |
50 | def real_id_for_chapter(chapter): |
51 | return chapter['tc_start'].split('-')[0] | |
99afb3dd | 52 | mobj = re.match(self._VALID_URL, url) |
a54bda3a S |
53 | short_id = mobj.group('short_id') |
54 | display_id = mobj.group('display_id') | |
55 | webpage = self._download_webpage(url, display_id or short_id) | |
8244288d JMF |
56 | real_id = self._search_regex(r'xtpage = ".*-(.*?)";', webpage, 'real id') |
57 | ||
58 | video_info = self.download_video_info(real_id) | |
a54bda3a | 59 | |
c28df247 S |
60 | geo_list = video_info.get('geoList') |
61 | country = geo_list[0] if geo_list else '' | |
a54bda3a | 62 | |
8244288d JMF |
63 | chapters = video_info['chapters'] |
64 | first_chapter = chapters[0] | |
a54bda3a S |
65 | files = video_info['files'] |
66 | first_file = files[0] | |
99afb3dd | 67 | |
8244288d JMF |
68 | if real_id_for_chapter(first_chapter) != real_id: |
69 | self.to_screen('Multipart video detected') | |
70 | chapter_urls = [] | |
71 | for chapter in chapters: | |
72 | chapter_id = real_id_for_chapter(chapter) | |
73 | # Yes, when we this chapter is processed by WatIE, | |
74 | # it will download the info again | |
75 | chapter_info = self.download_video_info(chapter_id) | |
76 | chapter_urls.append(chapter_info['url']) | |
77 | entries = [self.url_result(chapter_url) for chapter_url in chapter_urls] | |
78 | return self.playlist_result(entries, real_id, video_info['title']) | |
79 | ||
e7916255 JMF |
80 | upload_date = None |
81 | if 'date_diffusion' in first_chapter: | |
82 | upload_date = unified_strdate(first_chapter['date_diffusion']) | |
8244288d JMF |
83 | # Otherwise we can continue and extract just one part, we have to use |
84 | # the short id for getting the video url | |
a54bda3a S |
85 | |
86 | formats = [{ | |
87 | 'url': 'http://wat.tv/get/android5/%s.mp4' % real_id, | |
88 | 'format_id': 'Mobile', | |
89 | }] | |
90 | ||
91 | fmts = [('SD', 'web')] | |
92 | if first_file.get('hasHD'): | |
93 | fmts.append(('HD', 'webhd')) | |
94 | ||
95 | def compute_token(param): | |
c28df247 S |
96 | timestamp = '%08x' % int(self._download_webpage( |
97 | 'http://www.wat.tv/servertime', real_id, | |
98 | 'Downloading server time').split('|')[0]) | |
a54bda3a S |
99 | magic = '9b673b13fa4682ed14c3cfa5af5310274b514c4133e9b3a81e6e3aba009l2564' |
100 | return '%s/%s' % (hashlib.md5((magic + param + timestamp).encode('ascii')).hexdigest(), timestamp) | |
101 | ||
102 | for fmt in fmts: | |
103 | webid = '/%s/%s' % (fmt[1], real_id) | |
104 | video_url = self._download_webpage( | |
c28df247 | 105 | 'http://www.wat.tv/get%s?token=%s&getURL=1&country=%s' % (webid, compute_token(webid), country), |
a54bda3a S |
106 | real_id, |
107 | 'Downloding %s video URL' % fmt[0], | |
108 | 'Failed to download %s video URL' % fmt[0], | |
109 | False) | |
110 | if not video_url: | |
111 | continue | |
112 | formats.append({ | |
113 | 'url': video_url, | |
114 | 'ext': 'mp4', | |
115 | 'format_id': fmt[0], | |
116 | }) | |
117 | ||
e7916255 JMF |
118 | return { |
119 | 'id': real_id, | |
a54bda3a | 120 | 'display_id': display_id, |
e7916255 JMF |
121 | 'title': first_chapter['title'], |
122 | 'thumbnail': first_chapter['preview'], | |
123 | 'description': first_chapter['description'], | |
124 | 'view_count': video_info['views'], | |
125 | 'upload_date': upload_date, | |
a54bda3a S |
126 | 'duration': first_file['duration'], |
127 | 'formats': formats, | |
e7916255 | 128 | } |