]> jfr.im git - yt-dlp.git/blame - test/test_subtitles.py
Release 2024.04.09
[yt-dlp.git] / test / test_subtitles.py
CommitLineData
cc52de43 1#!/usr/bin/env python3
54007a45 2
ae84f879
JMF
3# Allow direct execution
4import os
5import sys
6import unittest
ae84f879 7
f8271158 8sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
ae84f879
JMF
9
10
ac668111 11from test.helper import FakeYDL, is_download_test, md5
7a5c1cfe 12from yt_dlp.extractor import (
b9b42f2e 13 NPOIE,
f8271158 14 NRKTVIE,
a7e999be 15 PBSIE,
f8271158 16 CeskaTelevizeIE,
0af25f78 17 ComedyCentralIE,
f8271158 18 DailymotionIE,
19 DemocracynowIE,
20 LyndaIE,
c4d6fc6d 21 RaiPlayIE,
25ac63ed 22 RTVEALaCartaIE,
f8271158 23 TedTalkIE,
24 ThePlatformFeedIE,
25 ThePlatformIE,
26 VikiIE,
27 VimeoIE,
28 WallaIE,
29 YoutubeIE,
ae84f879
JMF
30)
31
32
060ac762 33@is_download_test
ae84f879
JMF
34class BaseTestSubtitles(unittest.TestCase):
35 url = None
36 IE = None
5f6a1245 37
ae84f879
JMF
38 def setUp(self):
39 self.DL = FakeYDL()
a504ced0
JMF
40 self.ie = self.IE()
41 self.DL.add_info_extractor(self.ie)
6d1b3489 42 if not self.IE.working():
43 print('Skipping: %s marked as not _WORKING' % self.IE.ie_key())
44 self.skipTest('IE marked as not _WORKING')
ae84f879
JMF
45
46 def getInfoDict(self):
a504ced0 47 info_dict = self.DL.extract_info(self.url, download=False)
ae84f879
JMF
48 return info_dict
49
50 def getSubtitles(self):
51 info_dict = self.getInfoDict()
c84dd8a9 52 subtitles = info_dict['requested_subtitles']
a504ced0
JMF
53 if not subtitles:
54 return subtitles
55 for sub_info in subtitles.values():
56 if sub_info.get('data') is None:
57 uf = self.DL.urlopen(sub_info['url'])
0f06bcd7 58 sub_info['data'] = uf.read().decode()
86e5f3ed 59 return {l: sub_info['data'] for l, sub_info in subtitles.items()}
ae84f879
JMF
60
61
060ac762 62@is_download_test
ae84f879 63class TestYoutubeSubtitles(BaseTestSubtitles):
6d1b3489 64 # Available subtitles for QRS8MkLhQmM:
65 # Language formats
66 # ru vtt, ttml, srv3, srv2, srv1, json3
67 # fr vtt, ttml, srv3, srv2, srv1, json3
68 # en vtt, ttml, srv3, srv2, srv1, json3
69 # nl vtt, ttml, srv3, srv2, srv1, json3
70 # de vtt, ttml, srv3, srv2, srv1, json3
71 # ko vtt, ttml, srv3, srv2, srv1, json3
72 # it vtt, ttml, srv3, srv2, srv1, json3
73 # zh-Hant vtt, ttml, srv3, srv2, srv1, json3
74 # hi vtt, ttml, srv3, srv2, srv1, json3
75 # pt-BR vtt, ttml, srv3, srv2, srv1, json3
76 # es-MX vtt, ttml, srv3, srv2, srv1, json3
77 # ja vtt, ttml, srv3, srv2, srv1, json3
78 # pl vtt, ttml, srv3, srv2, srv1, json3
ae84f879
JMF
79 url = 'QRS8MkLhQmM'
80 IE = YoutubeIE
81
ae84f879
JMF
82 def test_youtube_allsubtitles(self):
83 self.DL.params['writesubtitles'] = True
84 self.DL.params['allsubtitles'] = True
85 subtitles = self.getSubtitles()
86 self.assertEqual(len(subtitles.keys()), 13)
6d1b3489 87 self.assertEqual(md5(subtitles['en']), 'ae1bd34126571a77aabd4d276b28044d')
88 self.assertEqual(md5(subtitles['it']), '0e0b667ba68411d88fd1c5f4f4eab2f9')
920d21b9 89 for lang in ['fr', 'de']:
b7bb76df 90 self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
ae84f879 91
6d1b3489 92 def _test_subtitles_format(self, fmt, md5_hash, lang='en'):
ae84f879 93 self.DL.params['writesubtitles'] = True
6d1b3489 94 self.DL.params['subtitlesformat'] = fmt
ae84f879 95 subtitles = self.getSubtitles()
6d1b3489 96 self.assertEqual(md5(subtitles[lang]), md5_hash)
97
98 def test_youtube_subtitles_ttml_format(self):
99 self._test_subtitles_format('ttml', 'c97ddf1217390906fa9fbd34901f3da2')
ae84f879
JMF
100
101 def test_youtube_subtitles_vtt_format(self):
6d1b3489 102 self._test_subtitles_format('vtt', 'ae1bd34126571a77aabd4d276b28044d')
ae84f879 103
6d1b3489 104 def test_youtube_subtitles_json3_format(self):
105 self._test_subtitles_format('json3', '688dd1ce0981683867e7fe6fde2a224b')
ae84f879 106
6d1b3489 107 def _test_automatic_captions(self, url, lang):
108 self.url = url
8848f808 109 self.DL.params['writeautomaticsub'] = True
6d1b3489 110 self.DL.params['subtitleslangs'] = [lang]
8848f808 111 subtitles = self.getSubtitles()
6d1b3489 112 self.assertTrue(subtitles[lang] is not None)
8848f808 113
6d1b3489 114 def test_youtube_automatic_captions(self):
115 # Available automatic captions for 8YoUxe5ncPo:
116 # Language formats (all in vtt, ttml, srv3, srv2, srv1, json3)
117 # gu, zh-Hans, zh-Hant, gd, ga, gl, lb, la, lo, tt, tr,
118 # lv, lt, tk, th, tg, te, fil, haw, yi, ceb, yo, de, da,
119 # el, eo, en, eu, et, es, ru, rw, ro, bn, be, bg, uk, jv,
120 # bs, ja, or, xh, co, ca, cy, cs, ps, pt, pa, vi, pl, hy,
121 # hr, ht, hu, hmn, hi, ha, mg, uz, ml, mn, mi, mk, ur,
122 # mt, ms, mr, ug, ta, my, af, sw, is, am,
123 # *it*, iw, sv, ar,
124 # su, zu, az, id, ig, nl, no, ne, ny, fr, ku, fy, fa, fi,
125 # ka, kk, sr, sq, ko, kn, km, st, sk, si, so, sn, sm, sl,
126 # ky, sd
127 # ...
128 self._test_automatic_captions('8YoUxe5ncPo', 'it')
129
130 @unittest.skip('Video unavailable')
7d900ef1 131 def test_youtube_translated_subtitles(self):
6d1b3489 132 # This video has a subtitles track, which can be translated (#4555)
133 self._test_automatic_captions('Ky9eprVWzlI', 'it')
7d900ef1 134
ae84f879 135 def test_youtube_nosubtitles(self):
a0f59cdc 136 self.DL.expect_warning('video doesn\'t have subtitles')
6d1b3489 137 # Available automatic captions for 8YoUxe5ncPo:
138 # ...
139 # 8YoUxe5ncPo has no subtitles
140 self.url = '8YoUxe5ncPo'
ae84f879
JMF
141 self.DL.params['writesubtitles'] = True
142 self.DL.params['allsubtitles'] = True
143 subtitles = self.getSubtitles()
4d171848 144 self.assertFalse(subtitles)
ae84f879 145
ae84f879 146
060ac762 147@is_download_test
ae84f879
JMF
148class TestDailymotionSubtitles(BaseTestSubtitles):
149 url = 'http://www.dailymotion.com/video/xczg00'
150 IE = DailymotionIE
151
ae84f879
JMF
152 def test_allsubtitles(self):
153 self.DL.params['writesubtitles'] = True
154 self.DL.params['allsubtitles'] = True
155 subtitles = self.getSubtitles()
8fb474fb 156 self.assertTrue(len(subtitles.keys()) >= 6)
b7bb76df
JMF
157 self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
158 self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
159 for lang in ['es', 'fr', 'de']:
160 self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
ae84f879 161
ae84f879 162 def test_nosubtitles(self):
a0f59cdc 163 self.DL.expect_warning('video doesn\'t have subtitles')
ae84f879
JMF
164 self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv'
165 self.DL.params['writesubtitles'] = True
166 self.DL.params['allsubtitles'] = True
167 subtitles = self.getSubtitles()
4d171848 168 self.assertFalse(subtitles)
ae84f879 169
ae84f879 170
060ac762 171@is_download_test
6d1b3489 172@unittest.skip('IE broken')
ae84f879
JMF
173class TestTedSubtitles(BaseTestSubtitles):
174 url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html'
4259402c 175 IE = TedTalkIE
ae84f879 176
ae84f879
JMF
177 def test_allsubtitles(self):
178 self.DL.params['writesubtitles'] = True
179 self.DL.params['allsubtitles'] = True
180 subtitles = self.getSubtitles()
0321213c 181 self.assertTrue(len(subtitles.keys()) >= 28)
b7bb76df
JMF
182 self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14')
183 self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5')
184 for lang in ['es', 'fr', 'de']:
a0f59cdc 185 self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
ae84f879 186
b4bcffef 187
060ac762 188@is_download_test
4edff4cf
S
189class TestVimeoSubtitles(BaseTestSubtitles):
190 url = 'http://vimeo.com/76979871'
191 IE = VimeoIE
b0268cb6 192
4edff4cf
S
193 def test_allsubtitles(self):
194 self.DL.params['writesubtitles'] = True
195 self.DL.params['allsubtitles'] = True
196 subtitles = self.getSubtitles()
86e5f3ed 197 self.assertEqual(set(subtitles.keys()), {'de', 'en', 'es', 'fr'})
6d1b3489 198 self.assertEqual(md5(subtitles['en']), '386cbc9320b94e25cb364b97935e5dd1')
199 self.assertEqual(md5(subtitles['fr']), 'c9b69eef35bc6641c0d4da8a04f9dfac')
4edff4cf 200
4edff4cf 201 def test_nosubtitles(self):
a0f59cdc 202 self.DL.expect_warning('video doesn\'t have subtitles')
6d1b3489 203 self.url = 'http://vimeo.com/68093876'
4edff4cf
S
204 self.DL.params['writesubtitles'] = True
205 self.DL.params['allsubtitles'] = True
206 subtitles = self.getSubtitles()
4d171848 207 self.assertFalse(subtitles)
4edff4cf 208
4edff4cf 209
060ac762 210@is_download_test
6d1b3489 211@unittest.skip('IE broken')
0ca41c3d 212class TestWallaSubtitles(BaseTestSubtitles):
7bc8780c
S
213 url = 'http://vod.walla.co.il/movie/2705958/the-yes-men'
214 IE = WallaIE
215
7bc8780c 216 def test_allsubtitles(self):
a0f59cdc 217 self.DL.expect_warning('Automatic Captions not supported by this server')
7bc8780c
S
218 self.DL.params['writesubtitles'] = True
219 self.DL.params['allsubtitles'] = True
220 subtitles = self.getSubtitles()
86e5f3ed 221 self.assertEqual(set(subtitles.keys()), {'heb'})
7bc8780c
S
222 self.assertEqual(md5(subtitles['heb']), 'e758c5d7cb982f6bef14f377ec7a3920')
223
224 def test_nosubtitles(self):
a0f59cdc 225 self.DL.expect_warning('video doesn\'t have subtitles')
7bc8780c
S
226 self.url = 'http://vod.walla.co.il/movie/2642630/one-direction-all-for-one'
227 self.DL.params['writesubtitles'] = True
228 self.DL.params['allsubtitles'] = True
229 subtitles = self.getSubtitles()
4d171848 230 self.assertFalse(subtitles)
7bc8780c
S
231
232
060ac762 233@is_download_test
6d1b3489 234@unittest.skip('IE broken')
0b54a5b1
S
235class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
236 url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky'
237 IE = CeskaTelevizeIE
238
0b54a5b1
S
239 def test_allsubtitles(self):
240 self.DL.expect_warning('Automatic Captions not supported by this server')
241 self.DL.params['writesubtitles'] = True
242 self.DL.params['allsubtitles'] = True
243 subtitles = self.getSubtitles()
86e5f3ed 244 self.assertEqual(set(subtitles.keys()), {'cs'})
8fb474fb 245 self.assertTrue(len(subtitles['cs']) > 20000)
0b54a5b1
S
246
247 def test_nosubtitles(self):
248 self.DL.expect_warning('video doesn\'t have subtitles')
249 self.url = 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220'
250 self.DL.params['writesubtitles'] = True
251 self.DL.params['allsubtitles'] = True
252 subtitles = self.getSubtitles()
4d171848 253 self.assertFalse(subtitles)
0b54a5b1
S
254
255
060ac762 256@is_download_test
6d1b3489 257@unittest.skip('IE broken')
311c3938
JMF
258class TestLyndaSubtitles(BaseTestSubtitles):
259 url = 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html'
260 IE = LyndaIE
261
262 def test_allsubtitles(self):
263 self.DL.params['writesubtitles'] = True
264 self.DL.params['allsubtitles'] = True
265 subtitles = self.getSubtitles()
86e5f3ed 266 self.assertEqual(set(subtitles.keys()), {'en'})
311c3938
JMF
267 self.assertEqual(md5(subtitles['en']), '09bbe67222259bed60deaa26997d73a7')
268
269
060ac762 270@is_download_test
6d1b3489 271@unittest.skip('IE broken')
b9b42f2e
JMF
272class TestNPOSubtitles(BaseTestSubtitles):
273 url = 'http://www.npo.nl/nos-journaal/28-08-2014/POW_00722860'
274 IE = NPOIE
275
276 def test_allsubtitles(self):
277 self.DL.params['writesubtitles'] = True
278 self.DL.params['allsubtitles'] = True
279 subtitles = self.getSubtitles()
86e5f3ed 280 self.assertEqual(set(subtitles.keys()), {'nl'})
b9b42f2e
JMF
281 self.assertEqual(md5(subtitles['nl']), 'fc6435027572b63fb4ab143abd5ad3f4')
282
283
060ac762 284@is_download_test
6d1b3489 285@unittest.skip('IE broken')
0af25f78 286class TestMTVSubtitles(BaseTestSubtitles):
518d5ba5 287 url = 'http://www.cc.com/video-clips/p63lk0/adam-devine-s-house-party-chasing-white-swans'
0af25f78
JMF
288 IE = ComedyCentralIE
289
290 def getInfoDict(self):
86e5f3ed 291 return super().getInfoDict()['entries'][0]
0af25f78
JMF
292
293 def test_allsubtitles(self):
294 self.DL.params['writesubtitles'] = True
295 self.DL.params['allsubtitles'] = True
296 subtitles = self.getSubtitles()
86e5f3ed 297 self.assertEqual(set(subtitles.keys()), {'en'})
518d5ba5 298 self.assertEqual(md5(subtitles['en']), '78206b8d8a0cfa9da64dc026eea48961')
0af25f78
JMF
299
300
060ac762 301@is_download_test
01561da1
JMF
302class TestNRKSubtitles(BaseTestSubtitles):
303 url = 'http://tv.nrk.no/serie/ikke-gjoer-dette-hjemme/DMPV73000411/sesong-2/episode-1'
304 IE = NRKTVIE
305
306 def test_allsubtitles(self):
307 self.DL.params['writesubtitles'] = True
308 self.DL.params['allsubtitles'] = True
309 subtitles = self.getSubtitles()
6d1b3489 310 self.assertEqual(set(subtitles.keys()), {'nb-ttv'})
311 self.assertEqual(md5(subtitles['nb-ttv']), '67e06ff02d0deaf975e68f6cb8f6a149')
01561da1
JMF
312
313
060ac762 314@is_download_test
c4d6fc6d 315class TestRaiPlaySubtitles(BaseTestSubtitles):
c4d6fc6d 316 IE = RaiPlayIE
afbdd3ac 317
00dd0cd5 318 def test_subtitles_key(self):
319 self.url = 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
afbdd3ac
JMF
320 self.DL.params['writesubtitles'] = True
321 self.DL.params['allsubtitles'] = True
322 subtitles = self.getSubtitles()
86e5f3ed 323 self.assertEqual(set(subtitles.keys()), {'it'})
afbdd3ac
JMF
324 self.assertEqual(md5(subtitles['it']), 'b1d90a98755126b61e667567a1f6680a')
325
00dd0cd5 326 def test_subtitles_array_key(self):
327 self.url = 'https://www.raiplay.it/video/2020/12/Report---04-01-2021-2e90f1de-8eee-4de4-ac0e-78d21db5b600.html'
328 self.DL.params['writesubtitles'] = True
329 self.DL.params['allsubtitles'] = True
330 subtitles = self.getSubtitles()
86e5f3ed 331 self.assertEqual(set(subtitles.keys()), {'it'})
00dd0cd5 332 self.assertEqual(md5(subtitles['it']), '4b3264186fbb103508abe5311cfcb9cd')
333
afbdd3ac 334
060ac762 335@is_download_test
6d1b3489 336@unittest.skip('IE broken - DRM only')
4f7cea6c
JMF
337class TestVikiSubtitles(BaseTestSubtitles):
338 url = 'http://www.viki.com/videos/1060846v-punch-episode-18'
339 IE = VikiIE
340
341 def test_allsubtitles(self):
342 self.DL.params['writesubtitles'] = True
343 self.DL.params['allsubtitles'] = True
344 subtitles = self.getSubtitles()
86e5f3ed 345 self.assertEqual(set(subtitles.keys()), {'en'})
80970e53 346 self.assertEqual(md5(subtitles['en']), '53cb083a5914b2d84ef1ab67b880d18a')
4f7cea6c
JMF
347
348
060ac762 349@is_download_test
8807f127
JMF
350class TestThePlatformSubtitles(BaseTestSubtitles):
351 # from http://www.3playmedia.com/services-features/tools/integrations/theplatform/
352 # (see http://theplatform.com/about/partners/type/subtitles-closed-captioning/)
353 url = 'theplatform:JFUjUE1_ehvq'
354 IE = ThePlatformIE
355
356 def test_allsubtitles(self):
357 self.DL.params['writesubtitles'] = True
358 self.DL.params['allsubtitles'] = True
359 subtitles = self.getSubtitles()
86e5f3ed 360 self.assertEqual(set(subtitles.keys()), {'en'})
8807f127
JMF
361 self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b')
362
363
060ac762 364@is_download_test
6d1b3489 365@unittest.skip('IE broken')
f908b74f
YCH
366class TestThePlatformFeedSubtitles(BaseTestSubtitles):
367 url = 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207'
368 IE = ThePlatformFeedIE
369
370 def test_allsubtitles(self):
371 self.DL.params['writesubtitles'] = True
372 self.DL.params['allsubtitles'] = True
373 subtitles = self.getSubtitles()
86e5f3ed 374 self.assertEqual(set(subtitles.keys()), {'en'})
f908b74f
YCH
375 self.assertEqual(md5(subtitles['en']), '48649a22e82b2da21c9a67a395eedade')
376
377
060ac762 378@is_download_test
25ac63ed
JMF
379class TestRtveSubtitles(BaseTestSubtitles):
380 url = 'http://www.rtve.es/alacarta/videos/los-misterios-de-laura/misterios-laura-capitulo-32-misterio-del-numero-17-2-parte/2428621/'
381 IE = RTVEALaCartaIE
382
383 def test_allsubtitles(self):
384 print('Skipping, only available from Spain')
385 return
386 self.DL.params['writesubtitles'] = True
387 self.DL.params['allsubtitles'] = True
388 subtitles = self.getSubtitles()
86e5f3ed 389 self.assertEqual(set(subtitles.keys()), {'es'})
25ac63ed
JMF
390 self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca')
391
392
060ac762 393@is_download_test
66d041f2
YCH
394class TestDemocracynowSubtitles(BaseTestSubtitles):
395 url = 'http://www.democracynow.org/shows/2015/7/3'
396 IE = DemocracynowIE
397
398 def test_allsubtitles(self):
399 self.DL.params['writesubtitles'] = True
400 self.DL.params['allsubtitles'] = True
401 subtitles = self.getSubtitles()
86e5f3ed 402 self.assertEqual(set(subtitles.keys()), {'en'})
6d1b3489 403 self.assertEqual(md5(subtitles['en']), 'a3cc4c0b5eadd74d9974f1c1f5101045')
66d041f2
YCH
404
405 def test_subtitles_in_page(self):
406 self.url = 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree'
407 self.DL.params['writesubtitles'] = True
408 self.DL.params['allsubtitles'] = True
409 subtitles = self.getSubtitles()
86e5f3ed 410 self.assertEqual(set(subtitles.keys()), {'en'})
6d1b3489 411 self.assertEqual(md5(subtitles['en']), 'a3cc4c0b5eadd74d9974f1c1f5101045')
66d041f2
YCH
412
413
a7e999be 414@is_download_test
415class TestPBSSubtitles(BaseTestSubtitles):
416 url = 'https://www.pbs.org/video/how-fantasy-reflects-our-world-picecq/'
417 IE = PBSIE
418
419 def test_allsubtitles(self):
420 self.DL.params['writesubtitles'] = True
421 self.DL.params['allsubtitles'] = True
422 subtitles = self.getSubtitles()
86e5f3ed 423 self.assertEqual(set(subtitles.keys()), {'en'})
a7e999be 424
425 def test_subtitles_dfxp_format(self):
426 self.DL.params['writesubtitles'] = True
427 self.DL.params['subtitlesformat'] = 'dfxp'
428 subtitles = self.getSubtitles()
429 self.assertIn(md5(subtitles['en']), ['643b034254cdc3768ff1e750b6b5873b'])
430
431 def test_subtitles_vtt_format(self):
432 self.DL.params['writesubtitles'] = True
433 self.DL.params['subtitlesformat'] = 'vtt'
434 subtitles = self.getSubtitles()
435 self.assertIn(
436 md5(subtitles['en']), ['937a05711555b165d4c55a9667017045', 'f49ea998d6824d94959c8152a368ff73'])
437
438 def test_subtitles_srt_format(self):
439 self.DL.params['writesubtitles'] = True
440 self.DL.params['subtitlesformat'] = 'srt'
441 subtitles = self.getSubtitles()
442 self.assertIn(md5(subtitles['en']), ['2082c21b43759d9bf172931b2f2ca371'])
443
444 def test_subtitles_sami_format(self):
445 self.DL.params['writesubtitles'] = True
446 self.DL.params['subtitlesformat'] = 'sami'
447 subtitles = self.getSubtitles()
448 self.assertIn(md5(subtitles['en']), ['4256b16ac7da6a6780fafd04294e85cd'])
449
450
ae84f879
JMF
451if __name__ == '__main__':
452 unittest.main()