]>
Commit | Line | Data |
---|---|---|
1 | #!/usr/bin/env python3 | |
2 | ||
3 | # Allow direct execution | |
4 | import os | |
5 | import sys | |
6 | import unittest | |
7 | ||
8 | sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |
9 | ||
10 | ||
11 | from test.helper import FakeYDL, is_download_test, md5 | |
12 | from yt_dlp.extractor import ( | |
13 | NPOIE, | |
14 | NRKTVIE, | |
15 | PBSIE, | |
16 | CeskaTelevizeIE, | |
17 | ComedyCentralIE, | |
18 | DailymotionIE, | |
19 | DemocracynowIE, | |
20 | LyndaIE, | |
21 | RaiPlayIE, | |
22 | RTVEALaCartaIE, | |
23 | TedTalkIE, | |
24 | ThePlatformFeedIE, | |
25 | ThePlatformIE, | |
26 | VikiIE, | |
27 | VimeoIE, | |
28 | WallaIE, | |
29 | YoutubeIE, | |
30 | ) | |
31 | ||
32 | ||
33 | @is_download_test | |
34 | class BaseTestSubtitles(unittest.TestCase): | |
35 | url = None | |
36 | IE = None | |
37 | ||
38 | def setUp(self): | |
39 | self.DL = FakeYDL() | |
40 | self.ie = self.IE() | |
41 | self.DL.add_info_extractor(self.ie) | |
42 | if not self.IE.working(): | |
43 | print('Skipping: %s marked as not _WORKING' % self.IE.ie_key()) | |
44 | self.skipTest('IE marked as not _WORKING') | |
45 | ||
46 | def getInfoDict(self): | |
47 | info_dict = self.DL.extract_info(self.url, download=False) | |
48 | return info_dict | |
49 | ||
50 | def getSubtitles(self): | |
51 | info_dict = self.getInfoDict() | |
52 | subtitles = info_dict['requested_subtitles'] | |
53 | if not subtitles: | |
54 | return subtitles | |
55 | for sub_info in subtitles.values(): | |
56 | if sub_info.get('data') is None: | |
57 | uf = self.DL.urlopen(sub_info['url']) | |
58 | sub_info['data'] = uf.read().decode() | |
59 | return {l: sub_info['data'] for l, sub_info in subtitles.items()} | |
60 | ||
61 | ||
62 | @is_download_test | |
63 | class TestYoutubeSubtitles(BaseTestSubtitles): | |
64 | # Available subtitles for QRS8MkLhQmM: | |
65 | # Language formats | |
66 | # ru vtt, ttml, srv3, srv2, srv1, json3 | |
67 | # fr vtt, ttml, srv3, srv2, srv1, json3 | |
68 | # en vtt, ttml, srv3, srv2, srv1, json3 | |
69 | # nl vtt, ttml, srv3, srv2, srv1, json3 | |
70 | # de vtt, ttml, srv3, srv2, srv1, json3 | |
71 | # ko vtt, ttml, srv3, srv2, srv1, json3 | |
72 | # it vtt, ttml, srv3, srv2, srv1, json3 | |
73 | # zh-Hant vtt, ttml, srv3, srv2, srv1, json3 | |
74 | # hi vtt, ttml, srv3, srv2, srv1, json3 | |
75 | # pt-BR vtt, ttml, srv3, srv2, srv1, json3 | |
76 | # es-MX vtt, ttml, srv3, srv2, srv1, json3 | |
77 | # ja vtt, ttml, srv3, srv2, srv1, json3 | |
78 | # pl vtt, ttml, srv3, srv2, srv1, json3 | |
79 | url = 'QRS8MkLhQmM' | |
80 | IE = YoutubeIE | |
81 | ||
82 | def test_youtube_allsubtitles(self): | |
83 | self.DL.params['writesubtitles'] = True | |
84 | self.DL.params['allsubtitles'] = True | |
85 | subtitles = self.getSubtitles() | |
86 | self.assertEqual(len(subtitles.keys()), 13) | |
87 | self.assertEqual(md5(subtitles['en']), 'ae1bd34126571a77aabd4d276b28044d') | |
88 | self.assertEqual(md5(subtitles['it']), '0e0b667ba68411d88fd1c5f4f4eab2f9') | |
89 | for lang in ['fr', 'de']: | |
90 | self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang) | |
91 | ||
92 | def _test_subtitles_format(self, fmt, md5_hash, lang='en'): | |
93 | self.DL.params['writesubtitles'] = True | |
94 | self.DL.params['subtitlesformat'] = fmt | |
95 | subtitles = self.getSubtitles() | |
96 | self.assertEqual(md5(subtitles[lang]), md5_hash) | |
97 | ||
98 | def test_youtube_subtitles_ttml_format(self): | |
99 | self._test_subtitles_format('ttml', 'c97ddf1217390906fa9fbd34901f3da2') | |
100 | ||
101 | def test_youtube_subtitles_vtt_format(self): | |
102 | self._test_subtitles_format('vtt', 'ae1bd34126571a77aabd4d276b28044d') | |
103 | ||
104 | def test_youtube_subtitles_json3_format(self): | |
105 | self._test_subtitles_format('json3', '688dd1ce0981683867e7fe6fde2a224b') | |
106 | ||
107 | def _test_automatic_captions(self, url, lang): | |
108 | self.url = url | |
109 | self.DL.params['writeautomaticsub'] = True | |
110 | self.DL.params['subtitleslangs'] = [lang] | |
111 | subtitles = self.getSubtitles() | |
112 | self.assertTrue(subtitles[lang] is not None) | |
113 | ||
114 | def test_youtube_automatic_captions(self): | |
115 | # Available automatic captions for 8YoUxe5ncPo: | |
116 | # Language formats (all in vtt, ttml, srv3, srv2, srv1, json3) | |
117 | # gu, zh-Hans, zh-Hant, gd, ga, gl, lb, la, lo, tt, tr, | |
118 | # lv, lt, tk, th, tg, te, fil, haw, yi, ceb, yo, de, da, | |
119 | # el, eo, en, eu, et, es, ru, rw, ro, bn, be, bg, uk, jv, | |
120 | # bs, ja, or, xh, co, ca, cy, cs, ps, pt, pa, vi, pl, hy, | |
121 | # hr, ht, hu, hmn, hi, ha, mg, uz, ml, mn, mi, mk, ur, | |
122 | # mt, ms, mr, ug, ta, my, af, sw, is, am, | |
123 | # *it*, iw, sv, ar, | |
124 | # su, zu, az, id, ig, nl, no, ne, ny, fr, ku, fy, fa, fi, | |
125 | # ka, kk, sr, sq, ko, kn, km, st, sk, si, so, sn, sm, sl, | |
126 | # ky, sd | |
127 | # ... | |
128 | self._test_automatic_captions('8YoUxe5ncPo', 'it') | |
129 | ||
130 | @unittest.skip('Video unavailable') | |
131 | def test_youtube_translated_subtitles(self): | |
132 | # This video has a subtitles track, which can be translated (#4555) | |
133 | self._test_automatic_captions('Ky9eprVWzlI', 'it') | |
134 | ||
135 | def test_youtube_nosubtitles(self): | |
136 | self.DL.expect_warning('video doesn\'t have subtitles') | |
137 | # Available automatic captions for 8YoUxe5ncPo: | |
138 | # ... | |
139 | # 8YoUxe5ncPo has no subtitles | |
140 | self.url = '8YoUxe5ncPo' | |
141 | self.DL.params['writesubtitles'] = True | |
142 | self.DL.params['allsubtitles'] = True | |
143 | subtitles = self.getSubtitles() | |
144 | self.assertFalse(subtitles) | |
145 | ||
146 | ||
147 | @is_download_test | |
148 | class TestDailymotionSubtitles(BaseTestSubtitles): | |
149 | url = 'http://www.dailymotion.com/video/xczg00' | |
150 | IE = DailymotionIE | |
151 | ||
152 | def test_allsubtitles(self): | |
153 | self.DL.params['writesubtitles'] = True | |
154 | self.DL.params['allsubtitles'] = True | |
155 | subtitles = self.getSubtitles() | |
156 | self.assertTrue(len(subtitles.keys()) >= 6) | |
157 | self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f') | |
158 | self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792') | |
159 | for lang in ['es', 'fr', 'de']: | |
160 | self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang) | |
161 | ||
162 | def test_nosubtitles(self): | |
163 | self.DL.expect_warning('video doesn\'t have subtitles') | |
164 | self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv' | |
165 | self.DL.params['writesubtitles'] = True | |
166 | self.DL.params['allsubtitles'] = True | |
167 | subtitles = self.getSubtitles() | |
168 | self.assertFalse(subtitles) | |
169 | ||
170 | ||
171 | @is_download_test | |
172 | @unittest.skip('IE broken') | |
173 | class TestTedSubtitles(BaseTestSubtitles): | |
174 | url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html' | |
175 | IE = TedTalkIE | |
176 | ||
177 | def test_allsubtitles(self): | |
178 | self.DL.params['writesubtitles'] = True | |
179 | self.DL.params['allsubtitles'] = True | |
180 | subtitles = self.getSubtitles() | |
181 | self.assertTrue(len(subtitles.keys()) >= 28) | |
182 | self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14') | |
183 | self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5') | |
184 | for lang in ['es', 'fr', 'de']: | |
185 | self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang) | |
186 | ||
187 | ||
188 | @is_download_test | |
189 | class TestVimeoSubtitles(BaseTestSubtitles): | |
190 | url = 'http://vimeo.com/76979871' | |
191 | IE = VimeoIE | |
192 | ||
193 | def test_allsubtitles(self): | |
194 | self.DL.params['writesubtitles'] = True | |
195 | self.DL.params['allsubtitles'] = True | |
196 | subtitles = self.getSubtitles() | |
197 | self.assertEqual(set(subtitles.keys()), {'de', 'en', 'es', 'fr'}) | |
198 | self.assertEqual(md5(subtitles['en']), '386cbc9320b94e25cb364b97935e5dd1') | |
199 | self.assertEqual(md5(subtitles['fr']), 'c9b69eef35bc6641c0d4da8a04f9dfac') | |
200 | ||
201 | def test_nosubtitles(self): | |
202 | self.DL.expect_warning('video doesn\'t have subtitles') | |
203 | self.url = 'http://vimeo.com/68093876' | |
204 | self.DL.params['writesubtitles'] = True | |
205 | self.DL.params['allsubtitles'] = True | |
206 | subtitles = self.getSubtitles() | |
207 | self.assertFalse(subtitles) | |
208 | ||
209 | ||
210 | @is_download_test | |
211 | @unittest.skip('IE broken') | |
212 | class TestWallaSubtitles(BaseTestSubtitles): | |
213 | url = 'http://vod.walla.co.il/movie/2705958/the-yes-men' | |
214 | IE = WallaIE | |
215 | ||
216 | def test_allsubtitles(self): | |
217 | self.DL.expect_warning('Automatic Captions not supported by this server') | |
218 | self.DL.params['writesubtitles'] = True | |
219 | self.DL.params['allsubtitles'] = True | |
220 | subtitles = self.getSubtitles() | |
221 | self.assertEqual(set(subtitles.keys()), {'heb'}) | |
222 | self.assertEqual(md5(subtitles['heb']), 'e758c5d7cb982f6bef14f377ec7a3920') | |
223 | ||
224 | def test_nosubtitles(self): | |
225 | self.DL.expect_warning('video doesn\'t have subtitles') | |
226 | self.url = 'http://vod.walla.co.il/movie/2642630/one-direction-all-for-one' | |
227 | self.DL.params['writesubtitles'] = True | |
228 | self.DL.params['allsubtitles'] = True | |
229 | subtitles = self.getSubtitles() | |
230 | self.assertFalse(subtitles) | |
231 | ||
232 | ||
233 | @is_download_test | |
234 | @unittest.skip('IE broken') | |
235 | class TestCeskaTelevizeSubtitles(BaseTestSubtitles): | |
236 | url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky' | |
237 | IE = CeskaTelevizeIE | |
238 | ||
239 | def test_allsubtitles(self): | |
240 | self.DL.expect_warning('Automatic Captions not supported by this server') | |
241 | self.DL.params['writesubtitles'] = True | |
242 | self.DL.params['allsubtitles'] = True | |
243 | subtitles = self.getSubtitles() | |
244 | self.assertEqual(set(subtitles.keys()), {'cs'}) | |
245 | self.assertTrue(len(subtitles['cs']) > 20000) | |
246 | ||
247 | def test_nosubtitles(self): | |
248 | self.DL.expect_warning('video doesn\'t have subtitles') | |
249 | self.url = 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220' | |
250 | self.DL.params['writesubtitles'] = True | |
251 | self.DL.params['allsubtitles'] = True | |
252 | subtitles = self.getSubtitles() | |
253 | self.assertFalse(subtitles) | |
254 | ||
255 | ||
256 | @is_download_test | |
257 | @unittest.skip('IE broken') | |
258 | class TestLyndaSubtitles(BaseTestSubtitles): | |
259 | url = 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html' | |
260 | IE = LyndaIE | |
261 | ||
262 | def test_allsubtitles(self): | |
263 | self.DL.params['writesubtitles'] = True | |
264 | self.DL.params['allsubtitles'] = True | |
265 | subtitles = self.getSubtitles() | |
266 | self.assertEqual(set(subtitles.keys()), {'en'}) | |
267 | self.assertEqual(md5(subtitles['en']), '09bbe67222259bed60deaa26997d73a7') | |
268 | ||
269 | ||
270 | @is_download_test | |
271 | @unittest.skip('IE broken') | |
272 | class TestNPOSubtitles(BaseTestSubtitles): | |
273 | url = 'http://www.npo.nl/nos-journaal/28-08-2014/POW_00722860' | |
274 | IE = NPOIE | |
275 | ||
276 | def test_allsubtitles(self): | |
277 | self.DL.params['writesubtitles'] = True | |
278 | self.DL.params['allsubtitles'] = True | |
279 | subtitles = self.getSubtitles() | |
280 | self.assertEqual(set(subtitles.keys()), {'nl'}) | |
281 | self.assertEqual(md5(subtitles['nl']), 'fc6435027572b63fb4ab143abd5ad3f4') | |
282 | ||
283 | ||
284 | @is_download_test | |
285 | @unittest.skip('IE broken') | |
286 | class TestMTVSubtitles(BaseTestSubtitles): | |
287 | url = 'http://www.cc.com/video-clips/p63lk0/adam-devine-s-house-party-chasing-white-swans' | |
288 | IE = ComedyCentralIE | |
289 | ||
290 | def getInfoDict(self): | |
291 | return super().getInfoDict()['entries'][0] | |
292 | ||
293 | def test_allsubtitles(self): | |
294 | self.DL.params['writesubtitles'] = True | |
295 | self.DL.params['allsubtitles'] = True | |
296 | subtitles = self.getSubtitles() | |
297 | self.assertEqual(set(subtitles.keys()), {'en'}) | |
298 | self.assertEqual(md5(subtitles['en']), '78206b8d8a0cfa9da64dc026eea48961') | |
299 | ||
300 | ||
301 | @is_download_test | |
302 | class TestNRKSubtitles(BaseTestSubtitles): | |
303 | url = 'http://tv.nrk.no/serie/ikke-gjoer-dette-hjemme/DMPV73000411/sesong-2/episode-1' | |
304 | IE = NRKTVIE | |
305 | ||
306 | def test_allsubtitles(self): | |
307 | self.DL.params['writesubtitles'] = True | |
308 | self.DL.params['allsubtitles'] = True | |
309 | subtitles = self.getSubtitles() | |
310 | self.assertEqual(set(subtitles.keys()), {'nb-ttv'}) | |
311 | self.assertEqual(md5(subtitles['nb-ttv']), '67e06ff02d0deaf975e68f6cb8f6a149') | |
312 | ||
313 | ||
314 | @is_download_test | |
315 | class TestRaiPlaySubtitles(BaseTestSubtitles): | |
316 | IE = RaiPlayIE | |
317 | ||
318 | def test_subtitles_key(self): | |
319 | self.url = 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html' | |
320 | self.DL.params['writesubtitles'] = True | |
321 | self.DL.params['allsubtitles'] = True | |
322 | subtitles = self.getSubtitles() | |
323 | self.assertEqual(set(subtitles.keys()), {'it'}) | |
324 | self.assertEqual(md5(subtitles['it']), 'b1d90a98755126b61e667567a1f6680a') | |
325 | ||
326 | def test_subtitles_array_key(self): | |
327 | self.url = 'https://www.raiplay.it/video/2020/12/Report---04-01-2021-2e90f1de-8eee-4de4-ac0e-78d21db5b600.html' | |
328 | self.DL.params['writesubtitles'] = True | |
329 | self.DL.params['allsubtitles'] = True | |
330 | subtitles = self.getSubtitles() | |
331 | self.assertEqual(set(subtitles.keys()), {'it'}) | |
332 | self.assertEqual(md5(subtitles['it']), '4b3264186fbb103508abe5311cfcb9cd') | |
333 | ||
334 | ||
335 | @is_download_test | |
336 | @unittest.skip('IE broken - DRM only') | |
337 | class TestVikiSubtitles(BaseTestSubtitles): | |
338 | url = 'http://www.viki.com/videos/1060846v-punch-episode-18' | |
339 | IE = VikiIE | |
340 | ||
341 | def test_allsubtitles(self): | |
342 | self.DL.params['writesubtitles'] = True | |
343 | self.DL.params['allsubtitles'] = True | |
344 | subtitles = self.getSubtitles() | |
345 | self.assertEqual(set(subtitles.keys()), {'en'}) | |
346 | self.assertEqual(md5(subtitles['en']), '53cb083a5914b2d84ef1ab67b880d18a') | |
347 | ||
348 | ||
349 | @is_download_test | |
350 | class TestThePlatformSubtitles(BaseTestSubtitles): | |
351 | # from http://www.3playmedia.com/services-features/tools/integrations/theplatform/ | |
352 | # (see http://theplatform.com/about/partners/type/subtitles-closed-captioning/) | |
353 | url = 'theplatform:JFUjUE1_ehvq' | |
354 | IE = ThePlatformIE | |
355 | ||
356 | def test_allsubtitles(self): | |
357 | self.DL.params['writesubtitles'] = True | |
358 | self.DL.params['allsubtitles'] = True | |
359 | subtitles = self.getSubtitles() | |
360 | self.assertEqual(set(subtitles.keys()), {'en'}) | |
361 | self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b') | |
362 | ||
363 | ||
364 | @is_download_test | |
365 | @unittest.skip('IE broken') | |
366 | class TestThePlatformFeedSubtitles(BaseTestSubtitles): | |
367 | url = 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207' | |
368 | IE = ThePlatformFeedIE | |
369 | ||
370 | def test_allsubtitles(self): | |
371 | self.DL.params['writesubtitles'] = True | |
372 | self.DL.params['allsubtitles'] = True | |
373 | subtitles = self.getSubtitles() | |
374 | self.assertEqual(set(subtitles.keys()), {'en'}) | |
375 | self.assertEqual(md5(subtitles['en']), '48649a22e82b2da21c9a67a395eedade') | |
376 | ||
377 | ||
378 | @is_download_test | |
379 | class TestRtveSubtitles(BaseTestSubtitles): | |
380 | url = 'http://www.rtve.es/alacarta/videos/los-misterios-de-laura/misterios-laura-capitulo-32-misterio-del-numero-17-2-parte/2428621/' | |
381 | IE = RTVEALaCartaIE | |
382 | ||
383 | def test_allsubtitles(self): | |
384 | print('Skipping, only available from Spain') | |
385 | return | |
386 | self.DL.params['writesubtitles'] = True | |
387 | self.DL.params['allsubtitles'] = True | |
388 | subtitles = self.getSubtitles() | |
389 | self.assertEqual(set(subtitles.keys()), {'es'}) | |
390 | self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca') | |
391 | ||
392 | ||
393 | @is_download_test | |
394 | class TestDemocracynowSubtitles(BaseTestSubtitles): | |
395 | url = 'http://www.democracynow.org/shows/2015/7/3' | |
396 | IE = DemocracynowIE | |
397 | ||
398 | def test_allsubtitles(self): | |
399 | self.DL.params['writesubtitles'] = True | |
400 | self.DL.params['allsubtitles'] = True | |
401 | subtitles = self.getSubtitles() | |
402 | self.assertEqual(set(subtitles.keys()), {'en'}) | |
403 | self.assertEqual(md5(subtitles['en']), 'a3cc4c0b5eadd74d9974f1c1f5101045') | |
404 | ||
405 | def test_subtitles_in_page(self): | |
406 | self.url = 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree' | |
407 | self.DL.params['writesubtitles'] = True | |
408 | self.DL.params['allsubtitles'] = True | |
409 | subtitles = self.getSubtitles() | |
410 | self.assertEqual(set(subtitles.keys()), {'en'}) | |
411 | self.assertEqual(md5(subtitles['en']), 'a3cc4c0b5eadd74d9974f1c1f5101045') | |
412 | ||
413 | ||
414 | @is_download_test | |
415 | class TestPBSSubtitles(BaseTestSubtitles): | |
416 | url = 'https://www.pbs.org/video/how-fantasy-reflects-our-world-picecq/' | |
417 | IE = PBSIE | |
418 | ||
419 | def test_allsubtitles(self): | |
420 | self.DL.params['writesubtitles'] = True | |
421 | self.DL.params['allsubtitles'] = True | |
422 | subtitles = self.getSubtitles() | |
423 | self.assertEqual(set(subtitles.keys()), {'en'}) | |
424 | ||
425 | def test_subtitles_dfxp_format(self): | |
426 | self.DL.params['writesubtitles'] = True | |
427 | self.DL.params['subtitlesformat'] = 'dfxp' | |
428 | subtitles = self.getSubtitles() | |
429 | self.assertIn(md5(subtitles['en']), ['643b034254cdc3768ff1e750b6b5873b']) | |
430 | ||
431 | def test_subtitles_vtt_format(self): | |
432 | self.DL.params['writesubtitles'] = True | |
433 | self.DL.params['subtitlesformat'] = 'vtt' | |
434 | subtitles = self.getSubtitles() | |
435 | self.assertIn( | |
436 | md5(subtitles['en']), ['937a05711555b165d4c55a9667017045', 'f49ea998d6824d94959c8152a368ff73']) | |
437 | ||
438 | def test_subtitles_srt_format(self): | |
439 | self.DL.params['writesubtitles'] = True | |
440 | self.DL.params['subtitlesformat'] = 'srt' | |
441 | subtitles = self.getSubtitles() | |
442 | self.assertIn(md5(subtitles['en']), ['2082c21b43759d9bf172931b2f2ca371']) | |
443 | ||
444 | def test_subtitles_sami_format(self): | |
445 | self.DL.params['writesubtitles'] = True | |
446 | self.DL.params['subtitlesformat'] = 'sami' | |
447 | subtitles = self.getSubtitles() | |
448 | self.assertIn(md5(subtitles['en']), ['4256b16ac7da6a6780fafd04294e85cd']) | |
449 | ||
450 | ||
451 | if __name__ == '__main__': | |
452 | unittest.main() |