]> jfr.im git - yt-dlp.git/blame - test/test_subtitles.py
[ie/brightcove] Upgrade requests to HTTPS (#10202)
[yt-dlp.git] / test / test_subtitles.py
CommitLineData
cc52de43 1#!/usr/bin/env python3
54007a45 2
ae84f879
JMF
3# Allow direct execution
4import os
5import sys
6import unittest
ae84f879 7
f8271158 8sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
ae84f879
JMF
9
10
ac668111 11from test.helper import FakeYDL, is_download_test, md5
7a5c1cfe 12from yt_dlp.extractor import (
b9b42f2e 13 NPOIE,
f8271158 14 NRKTVIE,
a7e999be 15 PBSIE,
f8271158 16 CeskaTelevizeIE,
0af25f78 17 ComedyCentralIE,
f8271158 18 DailymotionIE,
19 DemocracynowIE,
20 LyndaIE,
c4d6fc6d 21 RaiPlayIE,
25ac63ed 22 RTVEALaCartaIE,
f8271158 23 TedTalkIE,
24 ThePlatformFeedIE,
25 ThePlatformIE,
26 VikiIE,
27 VimeoIE,
28 WallaIE,
29 YoutubeIE,
ae84f879
JMF
30)
31
32
060ac762 33@is_download_test
ae84f879
JMF
34class BaseTestSubtitles(unittest.TestCase):
35 url = None
36 IE = None
5f6a1245 37
ae84f879
JMF
38 def setUp(self):
39 self.DL = FakeYDL()
a504ced0
JMF
40 self.ie = self.IE()
41 self.DL.add_info_extractor(self.ie)
6d1b3489 42 if not self.IE.working():
add96eb9 43 print(f'Skipping: {self.IE.ie_key()} marked as not _WORKING')
6d1b3489 44 self.skipTest('IE marked as not _WORKING')
ae84f879
JMF
45
46 def getInfoDict(self):
add96eb9 47 return self.DL.extract_info(self.url, download=False)
ae84f879
JMF
48
49 def getSubtitles(self):
50 info_dict = self.getInfoDict()
c84dd8a9 51 subtitles = info_dict['requested_subtitles']
a504ced0
JMF
52 if not subtitles:
53 return subtitles
54 for sub_info in subtitles.values():
55 if sub_info.get('data') is None:
56 uf = self.DL.urlopen(sub_info['url'])
0f06bcd7 57 sub_info['data'] = uf.read().decode()
86e5f3ed 58 return {l: sub_info['data'] for l, sub_info in subtitles.items()}
ae84f879
JMF
59
60
060ac762 61@is_download_test
ae84f879 62class TestYoutubeSubtitles(BaseTestSubtitles):
6d1b3489 63 # Available subtitles for QRS8MkLhQmM:
64 # Language formats
65 # ru vtt, ttml, srv3, srv2, srv1, json3
66 # fr vtt, ttml, srv3, srv2, srv1, json3
67 # en vtt, ttml, srv3, srv2, srv1, json3
68 # nl vtt, ttml, srv3, srv2, srv1, json3
69 # de vtt, ttml, srv3, srv2, srv1, json3
70 # ko vtt, ttml, srv3, srv2, srv1, json3
71 # it vtt, ttml, srv3, srv2, srv1, json3
72 # zh-Hant vtt, ttml, srv3, srv2, srv1, json3
73 # hi vtt, ttml, srv3, srv2, srv1, json3
74 # pt-BR vtt, ttml, srv3, srv2, srv1, json3
75 # es-MX vtt, ttml, srv3, srv2, srv1, json3
76 # ja vtt, ttml, srv3, srv2, srv1, json3
77 # pl vtt, ttml, srv3, srv2, srv1, json3
ae84f879
JMF
78 url = 'QRS8MkLhQmM'
79 IE = YoutubeIE
80
ae84f879
JMF
81 def test_youtube_allsubtitles(self):
82 self.DL.params['writesubtitles'] = True
83 self.DL.params['allsubtitles'] = True
84 subtitles = self.getSubtitles()
85 self.assertEqual(len(subtitles.keys()), 13)
6d1b3489 86 self.assertEqual(md5(subtitles['en']), 'ae1bd34126571a77aabd4d276b28044d')
87 self.assertEqual(md5(subtitles['it']), '0e0b667ba68411d88fd1c5f4f4eab2f9')
920d21b9 88 for lang in ['fr', 'de']:
add96eb9 89 self.assertTrue(subtitles.get(lang) is not None, f'Subtitles for \'{lang}\' not extracted')
ae84f879 90
6d1b3489 91 def _test_subtitles_format(self, fmt, md5_hash, lang='en'):
ae84f879 92 self.DL.params['writesubtitles'] = True
6d1b3489 93 self.DL.params['subtitlesformat'] = fmt
ae84f879 94 subtitles = self.getSubtitles()
6d1b3489 95 self.assertEqual(md5(subtitles[lang]), md5_hash)
96
97 def test_youtube_subtitles_ttml_format(self):
98 self._test_subtitles_format('ttml', 'c97ddf1217390906fa9fbd34901f3da2')
ae84f879
JMF
99
100 def test_youtube_subtitles_vtt_format(self):
6d1b3489 101 self._test_subtitles_format('vtt', 'ae1bd34126571a77aabd4d276b28044d')
ae84f879 102
6d1b3489 103 def test_youtube_subtitles_json3_format(self):
104 self._test_subtitles_format('json3', '688dd1ce0981683867e7fe6fde2a224b')
ae84f879 105
6d1b3489 106 def _test_automatic_captions(self, url, lang):
107 self.url = url
8848f808 108 self.DL.params['writeautomaticsub'] = True
6d1b3489 109 self.DL.params['subtitleslangs'] = [lang]
8848f808 110 subtitles = self.getSubtitles()
6d1b3489 111 self.assertTrue(subtitles[lang] is not None)
8848f808 112
6d1b3489 113 def test_youtube_automatic_captions(self):
114 # Available automatic captions for 8YoUxe5ncPo:
115 # Language formats (all in vtt, ttml, srv3, srv2, srv1, json3)
116 # gu, zh-Hans, zh-Hant, gd, ga, gl, lb, la, lo, tt, tr,
117 # lv, lt, tk, th, tg, te, fil, haw, yi, ceb, yo, de, da,
118 # el, eo, en, eu, et, es, ru, rw, ro, bn, be, bg, uk, jv,
119 # bs, ja, or, xh, co, ca, cy, cs, ps, pt, pa, vi, pl, hy,
120 # hr, ht, hu, hmn, hi, ha, mg, uz, ml, mn, mi, mk, ur,
121 # mt, ms, mr, ug, ta, my, af, sw, is, am,
122 # *it*, iw, sv, ar,
123 # su, zu, az, id, ig, nl, no, ne, ny, fr, ku, fy, fa, fi,
124 # ka, kk, sr, sq, ko, kn, km, st, sk, si, so, sn, sm, sl,
125 # ky, sd
126 # ...
127 self._test_automatic_captions('8YoUxe5ncPo', 'it')
128
129 @unittest.skip('Video unavailable')
7d900ef1 130 def test_youtube_translated_subtitles(self):
6d1b3489 131 # This video has a subtitles track, which can be translated (#4555)
132 self._test_automatic_captions('Ky9eprVWzlI', 'it')
7d900ef1 133
ae84f879 134 def test_youtube_nosubtitles(self):
a0f59cdc 135 self.DL.expect_warning('video doesn\'t have subtitles')
6d1b3489 136 # Available automatic captions for 8YoUxe5ncPo:
137 # ...
138 # 8YoUxe5ncPo has no subtitles
139 self.url = '8YoUxe5ncPo'
ae84f879
JMF
140 self.DL.params['writesubtitles'] = True
141 self.DL.params['allsubtitles'] = True
142 subtitles = self.getSubtitles()
4d171848 143 self.assertFalse(subtitles)
ae84f879 144
ae84f879 145
060ac762 146@is_download_test
ae84f879
JMF
147class TestDailymotionSubtitles(BaseTestSubtitles):
148 url = 'http://www.dailymotion.com/video/xczg00'
149 IE = DailymotionIE
150
ae84f879
JMF
151 def test_allsubtitles(self):
152 self.DL.params['writesubtitles'] = True
153 self.DL.params['allsubtitles'] = True
154 subtitles = self.getSubtitles()
8fb474fb 155 self.assertTrue(len(subtitles.keys()) >= 6)
b7bb76df
JMF
156 self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
157 self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
158 for lang in ['es', 'fr', 'de']:
add96eb9 159 self.assertTrue(subtitles.get(lang) is not None, f'Subtitles for \'{lang}\' not extracted')
ae84f879 160
ae84f879 161 def test_nosubtitles(self):
a0f59cdc 162 self.DL.expect_warning('video doesn\'t have subtitles')
ae84f879
JMF
163 self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv'
164 self.DL.params['writesubtitles'] = True
165 self.DL.params['allsubtitles'] = True
166 subtitles = self.getSubtitles()
4d171848 167 self.assertFalse(subtitles)
ae84f879 168
ae84f879 169
060ac762 170@is_download_test
6d1b3489 171@unittest.skip('IE broken')
ae84f879
JMF
172class TestTedSubtitles(BaseTestSubtitles):
173 url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html'
4259402c 174 IE = TedTalkIE
ae84f879 175
ae84f879
JMF
176 def test_allsubtitles(self):
177 self.DL.params['writesubtitles'] = True
178 self.DL.params['allsubtitles'] = True
179 subtitles = self.getSubtitles()
0321213c 180 self.assertTrue(len(subtitles.keys()) >= 28)
b7bb76df
JMF
181 self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14')
182 self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5')
183 for lang in ['es', 'fr', 'de']:
add96eb9 184 self.assertTrue(subtitles.get(lang) is not None, f'Subtitles for \'{lang}\' not extracted')
ae84f879 185
b4bcffef 186
060ac762 187@is_download_test
4edff4cf
S
188class TestVimeoSubtitles(BaseTestSubtitles):
189 url = 'http://vimeo.com/76979871'
190 IE = VimeoIE
b0268cb6 191
4edff4cf
S
192 def test_allsubtitles(self):
193 self.DL.params['writesubtitles'] = True
194 self.DL.params['allsubtitles'] = True
195 subtitles = self.getSubtitles()
86e5f3ed 196 self.assertEqual(set(subtitles.keys()), {'de', 'en', 'es', 'fr'})
6d1b3489 197 self.assertEqual(md5(subtitles['en']), '386cbc9320b94e25cb364b97935e5dd1')
198 self.assertEqual(md5(subtitles['fr']), 'c9b69eef35bc6641c0d4da8a04f9dfac')
4edff4cf 199
4edff4cf 200 def test_nosubtitles(self):
a0f59cdc 201 self.DL.expect_warning('video doesn\'t have subtitles')
6d1b3489 202 self.url = 'http://vimeo.com/68093876'
4edff4cf
S
203 self.DL.params['writesubtitles'] = True
204 self.DL.params['allsubtitles'] = True
205 subtitles = self.getSubtitles()
4d171848 206 self.assertFalse(subtitles)
4edff4cf 207
4edff4cf 208
060ac762 209@is_download_test
6d1b3489 210@unittest.skip('IE broken')
0ca41c3d 211class TestWallaSubtitles(BaseTestSubtitles):
7bc8780c
S
212 url = 'http://vod.walla.co.il/movie/2705958/the-yes-men'
213 IE = WallaIE
214
7bc8780c 215 def test_allsubtitles(self):
a0f59cdc 216 self.DL.expect_warning('Automatic Captions not supported by this server')
7bc8780c
S
217 self.DL.params['writesubtitles'] = True
218 self.DL.params['allsubtitles'] = True
219 subtitles = self.getSubtitles()
86e5f3ed 220 self.assertEqual(set(subtitles.keys()), {'heb'})
7bc8780c
S
221 self.assertEqual(md5(subtitles['heb']), 'e758c5d7cb982f6bef14f377ec7a3920')
222
223 def test_nosubtitles(self):
a0f59cdc 224 self.DL.expect_warning('video doesn\'t have subtitles')
7bc8780c
S
225 self.url = 'http://vod.walla.co.il/movie/2642630/one-direction-all-for-one'
226 self.DL.params['writesubtitles'] = True
227 self.DL.params['allsubtitles'] = True
228 subtitles = self.getSubtitles()
4d171848 229 self.assertFalse(subtitles)
7bc8780c
S
230
231
060ac762 232@is_download_test
6d1b3489 233@unittest.skip('IE broken')
0b54a5b1
S
234class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
235 url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky'
236 IE = CeskaTelevizeIE
237
0b54a5b1
S
238 def test_allsubtitles(self):
239 self.DL.expect_warning('Automatic Captions not supported by this server')
240 self.DL.params['writesubtitles'] = True
241 self.DL.params['allsubtitles'] = True
242 subtitles = self.getSubtitles()
86e5f3ed 243 self.assertEqual(set(subtitles.keys()), {'cs'})
8fb474fb 244 self.assertTrue(len(subtitles['cs']) > 20000)
0b54a5b1
S
245
246 def test_nosubtitles(self):
247 self.DL.expect_warning('video doesn\'t have subtitles')
248 self.url = 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220'
249 self.DL.params['writesubtitles'] = True
250 self.DL.params['allsubtitles'] = True
251 subtitles = self.getSubtitles()
4d171848 252 self.assertFalse(subtitles)
0b54a5b1
S
253
254
060ac762 255@is_download_test
6d1b3489 256@unittest.skip('IE broken')
311c3938
JMF
257class TestLyndaSubtitles(BaseTestSubtitles):
258 url = 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html'
259 IE = LyndaIE
260
261 def test_allsubtitles(self):
262 self.DL.params['writesubtitles'] = True
263 self.DL.params['allsubtitles'] = True
264 subtitles = self.getSubtitles()
86e5f3ed 265 self.assertEqual(set(subtitles.keys()), {'en'})
311c3938
JMF
266 self.assertEqual(md5(subtitles['en']), '09bbe67222259bed60deaa26997d73a7')
267
268
060ac762 269@is_download_test
6d1b3489 270@unittest.skip('IE broken')
b9b42f2e
JMF
271class TestNPOSubtitles(BaseTestSubtitles):
272 url = 'http://www.npo.nl/nos-journaal/28-08-2014/POW_00722860'
273 IE = NPOIE
274
275 def test_allsubtitles(self):
276 self.DL.params['writesubtitles'] = True
277 self.DL.params['allsubtitles'] = True
278 subtitles = self.getSubtitles()
86e5f3ed 279 self.assertEqual(set(subtitles.keys()), {'nl'})
b9b42f2e
JMF
280 self.assertEqual(md5(subtitles['nl']), 'fc6435027572b63fb4ab143abd5ad3f4')
281
282
060ac762 283@is_download_test
6d1b3489 284@unittest.skip('IE broken')
0af25f78 285class TestMTVSubtitles(BaseTestSubtitles):
518d5ba5 286 url = 'http://www.cc.com/video-clips/p63lk0/adam-devine-s-house-party-chasing-white-swans'
0af25f78
JMF
287 IE = ComedyCentralIE
288
289 def getInfoDict(self):
86e5f3ed 290 return super().getInfoDict()['entries'][0]
0af25f78
JMF
291
292 def test_allsubtitles(self):
293 self.DL.params['writesubtitles'] = True
294 self.DL.params['allsubtitles'] = True
295 subtitles = self.getSubtitles()
86e5f3ed 296 self.assertEqual(set(subtitles.keys()), {'en'})
518d5ba5 297 self.assertEqual(md5(subtitles['en']), '78206b8d8a0cfa9da64dc026eea48961')
0af25f78
JMF
298
299
060ac762 300@is_download_test
01561da1
JMF
301class TestNRKSubtitles(BaseTestSubtitles):
302 url = 'http://tv.nrk.no/serie/ikke-gjoer-dette-hjemme/DMPV73000411/sesong-2/episode-1'
303 IE = NRKTVIE
304
305 def test_allsubtitles(self):
306 self.DL.params['writesubtitles'] = True
307 self.DL.params['allsubtitles'] = True
308 subtitles = self.getSubtitles()
6d1b3489 309 self.assertEqual(set(subtitles.keys()), {'nb-ttv'})
310 self.assertEqual(md5(subtitles['nb-ttv']), '67e06ff02d0deaf975e68f6cb8f6a149')
01561da1
JMF
311
312
060ac762 313@is_download_test
c4d6fc6d 314class TestRaiPlaySubtitles(BaseTestSubtitles):
c4d6fc6d 315 IE = RaiPlayIE
afbdd3ac 316
00dd0cd5 317 def test_subtitles_key(self):
318 self.url = 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
afbdd3ac
JMF
319 self.DL.params['writesubtitles'] = True
320 self.DL.params['allsubtitles'] = True
321 subtitles = self.getSubtitles()
86e5f3ed 322 self.assertEqual(set(subtitles.keys()), {'it'})
afbdd3ac
JMF
323 self.assertEqual(md5(subtitles['it']), 'b1d90a98755126b61e667567a1f6680a')
324
00dd0cd5 325 def test_subtitles_array_key(self):
326 self.url = 'https://www.raiplay.it/video/2020/12/Report---04-01-2021-2e90f1de-8eee-4de4-ac0e-78d21db5b600.html'
327 self.DL.params['writesubtitles'] = True
328 self.DL.params['allsubtitles'] = True
329 subtitles = self.getSubtitles()
86e5f3ed 330 self.assertEqual(set(subtitles.keys()), {'it'})
00dd0cd5 331 self.assertEqual(md5(subtitles['it']), '4b3264186fbb103508abe5311cfcb9cd')
332
afbdd3ac 333
060ac762 334@is_download_test
6d1b3489 335@unittest.skip('IE broken - DRM only')
4f7cea6c
JMF
336class TestVikiSubtitles(BaseTestSubtitles):
337 url = 'http://www.viki.com/videos/1060846v-punch-episode-18'
338 IE = VikiIE
339
340 def test_allsubtitles(self):
341 self.DL.params['writesubtitles'] = True
342 self.DL.params['allsubtitles'] = True
343 subtitles = self.getSubtitles()
86e5f3ed 344 self.assertEqual(set(subtitles.keys()), {'en'})
80970e53 345 self.assertEqual(md5(subtitles['en']), '53cb083a5914b2d84ef1ab67b880d18a')
4f7cea6c
JMF
346
347
060ac762 348@is_download_test
8807f127
JMF
349class TestThePlatformSubtitles(BaseTestSubtitles):
350 # from http://www.3playmedia.com/services-features/tools/integrations/theplatform/
351 # (see http://theplatform.com/about/partners/type/subtitles-closed-captioning/)
352 url = 'theplatform:JFUjUE1_ehvq'
353 IE = ThePlatformIE
354
355 def test_allsubtitles(self):
356 self.DL.params['writesubtitles'] = True
357 self.DL.params['allsubtitles'] = True
358 subtitles = self.getSubtitles()
86e5f3ed 359 self.assertEqual(set(subtitles.keys()), {'en'})
8807f127
JMF
360 self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b')
361
362
060ac762 363@is_download_test
6d1b3489 364@unittest.skip('IE broken')
f908b74f
YCH
365class TestThePlatformFeedSubtitles(BaseTestSubtitles):
366 url = 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207'
367 IE = ThePlatformFeedIE
368
369 def test_allsubtitles(self):
370 self.DL.params['writesubtitles'] = True
371 self.DL.params['allsubtitles'] = True
372 subtitles = self.getSubtitles()
86e5f3ed 373 self.assertEqual(set(subtitles.keys()), {'en'})
f908b74f
YCH
374 self.assertEqual(md5(subtitles['en']), '48649a22e82b2da21c9a67a395eedade')
375
376
060ac762 377@is_download_test
25ac63ed
JMF
378class TestRtveSubtitles(BaseTestSubtitles):
379 url = 'http://www.rtve.es/alacarta/videos/los-misterios-de-laura/misterios-laura-capitulo-32-misterio-del-numero-17-2-parte/2428621/'
380 IE = RTVEALaCartaIE
381
382 def test_allsubtitles(self):
383 print('Skipping, only available from Spain')
384 return
385 self.DL.params['writesubtitles'] = True
386 self.DL.params['allsubtitles'] = True
387 subtitles = self.getSubtitles()
86e5f3ed 388 self.assertEqual(set(subtitles.keys()), {'es'})
25ac63ed
JMF
389 self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca')
390
391
060ac762 392@is_download_test
66d041f2
YCH
393class TestDemocracynowSubtitles(BaseTestSubtitles):
394 url = 'http://www.democracynow.org/shows/2015/7/3'
395 IE = DemocracynowIE
396
397 def test_allsubtitles(self):
398 self.DL.params['writesubtitles'] = True
399 self.DL.params['allsubtitles'] = True
400 subtitles = self.getSubtitles()
86e5f3ed 401 self.assertEqual(set(subtitles.keys()), {'en'})
6d1b3489 402 self.assertEqual(md5(subtitles['en']), 'a3cc4c0b5eadd74d9974f1c1f5101045')
66d041f2
YCH
403
404 def test_subtitles_in_page(self):
405 self.url = 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree'
406 self.DL.params['writesubtitles'] = True
407 self.DL.params['allsubtitles'] = True
408 subtitles = self.getSubtitles()
86e5f3ed 409 self.assertEqual(set(subtitles.keys()), {'en'})
6d1b3489 410 self.assertEqual(md5(subtitles['en']), 'a3cc4c0b5eadd74d9974f1c1f5101045')
66d041f2
YCH
411
412
a7e999be 413@is_download_test
414class TestPBSSubtitles(BaseTestSubtitles):
415 url = 'https://www.pbs.org/video/how-fantasy-reflects-our-world-picecq/'
416 IE = PBSIE
417
418 def test_allsubtitles(self):
419 self.DL.params['writesubtitles'] = True
420 self.DL.params['allsubtitles'] = True
421 subtitles = self.getSubtitles()
86e5f3ed 422 self.assertEqual(set(subtitles.keys()), {'en'})
a7e999be 423
424 def test_subtitles_dfxp_format(self):
425 self.DL.params['writesubtitles'] = True
426 self.DL.params['subtitlesformat'] = 'dfxp'
427 subtitles = self.getSubtitles()
428 self.assertIn(md5(subtitles['en']), ['643b034254cdc3768ff1e750b6b5873b'])
429
430 def test_subtitles_vtt_format(self):
431 self.DL.params['writesubtitles'] = True
432 self.DL.params['subtitlesformat'] = 'vtt'
433 subtitles = self.getSubtitles()
434 self.assertIn(
435 md5(subtitles['en']), ['937a05711555b165d4c55a9667017045', 'f49ea998d6824d94959c8152a368ff73'])
436
437 def test_subtitles_srt_format(self):
438 self.DL.params['writesubtitles'] = True
439 self.DL.params['subtitlesformat'] = 'srt'
440 subtitles = self.getSubtitles()
441 self.assertIn(md5(subtitles['en']), ['2082c21b43759d9bf172931b2f2ca371'])
442
443 def test_subtitles_sami_format(self):
444 self.DL.params['writesubtitles'] = True
445 self.DL.params['subtitlesformat'] = 'sami'
446 subtitles = self.getSubtitles()
447 self.assertIn(md5(subtitles['en']), ['4256b16ac7da6a6780fafd04294e85cd'])
448
449
ae84f879
JMF
450if __name__ == '__main__':
451 unittest.main()