]> jfr.im git - yt-dlp.git/blame - test/test_subtitles.py
[cleanup] Misc
[yt-dlp.git] / test / test_subtitles.py
CommitLineData
cc52de43 1#!/usr/bin/env python3
ae84f879
JMF
2# Allow direct execution
3import os
4import sys
5import unittest
ae84f879 6
f8271158 7sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
ae84f879 8
f8271158 9from test.helper import FakeYDL, is_download_test, md5
ae84f879 10
7a5c1cfe 11from yt_dlp.extractor import (
b9b42f2e 12 NPOIE,
f8271158 13 NRKTVIE,
a7e999be 14 PBSIE,
f8271158 15 CeskaTelevizeIE,
0af25f78 16 ComedyCentralIE,
f8271158 17 DailymotionIE,
18 DemocracynowIE,
19 LyndaIE,
c4d6fc6d 20 RaiPlayIE,
25ac63ed 21 RTVEALaCartaIE,
f8271158 22 TedTalkIE,
23 ThePlatformFeedIE,
24 ThePlatformIE,
25 VikiIE,
26 VimeoIE,
27 WallaIE,
28 YoutubeIE,
ae84f879
JMF
29)
30
31
060ac762 32@is_download_test
ae84f879
JMF
33class BaseTestSubtitles(unittest.TestCase):
34 url = None
35 IE = None
5f6a1245 36
ae84f879
JMF
37 def setUp(self):
38 self.DL = FakeYDL()
a504ced0
JMF
39 self.ie = self.IE()
40 self.DL.add_info_extractor(self.ie)
ae84f879
JMF
41
42 def getInfoDict(self):
a504ced0 43 info_dict = self.DL.extract_info(self.url, download=False)
ae84f879
JMF
44 return info_dict
45
46 def getSubtitles(self):
47 info_dict = self.getInfoDict()
c84dd8a9 48 subtitles = info_dict['requested_subtitles']
a504ced0
JMF
49 if not subtitles:
50 return subtitles
51 for sub_info in subtitles.values():
52 if sub_info.get('data') is None:
53 uf = self.DL.urlopen(sub_info['url'])
0f06bcd7 54 sub_info['data'] = uf.read().decode()
86e5f3ed 55 return {l: sub_info['data'] for l, sub_info in subtitles.items()}
ae84f879
JMF
56
57
060ac762 58@is_download_test
ae84f879
JMF
59class TestYoutubeSubtitles(BaseTestSubtitles):
60 url = 'QRS8MkLhQmM'
61 IE = YoutubeIE
62
ae84f879
JMF
63 def test_youtube_allsubtitles(self):
64 self.DL.params['writesubtitles'] = True
65 self.DL.params['allsubtitles'] = True
66 subtitles = self.getSubtitles()
67 self.assertEqual(len(subtitles.keys()), 13)
8848f808 68 self.assertEqual(md5(subtitles['en']), '688dd1ce0981683867e7fe6fde2a224b')
69 self.assertEqual(md5(subtitles['it']), '31324d30b8430b309f7f5979a504a769')
920d21b9 70 for lang in ['fr', 'de']:
b7bb76df 71 self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
ae84f879 72
920d21b9 73 def test_youtube_subtitles_ttml_format(self):
ae84f879 74 self.DL.params['writesubtitles'] = True
920d21b9 75 self.DL.params['subtitlesformat'] = 'ttml'
ae84f879 76 subtitles = self.getSubtitles()
8848f808 77 self.assertEqual(md5(subtitles['en']), 'c97ddf1217390906fa9fbd34901f3da2')
ae84f879
JMF
78
79 def test_youtube_subtitles_vtt_format(self):
80 self.DL.params['writesubtitles'] = True
81 self.DL.params['subtitlesformat'] = 'vtt'
82 subtitles = self.getSubtitles()
8848f808 83 self.assertEqual(md5(subtitles['en']), 'ae1bd34126571a77aabd4d276b28044d')
ae84f879 84
ae84f879
JMF
85 def test_youtube_automatic_captions(self):
86 self.url = '8YoUxe5ncPo'
87 self.DL.params['writeautomaticsub'] = True
88 self.DL.params['subtitleslangs'] = ['it']
89 subtitles = self.getSubtitles()
90 self.assertTrue(subtitles['it'] is not None)
91
8848f808 92 def test_youtube_no_automatic_captions(self):
93 self.url = 'QRS8MkLhQmM'
94 self.DL.params['writeautomaticsub'] = True
95 subtitles = self.getSubtitles()
96 self.assertTrue(not subtitles)
97
7d900ef1
JMF
98 def test_youtube_translated_subtitles(self):
99 # This video has a subtitles track, which can be translated
8848f808 100 self.url = 'i0ZabxXmH4Y'
7d900ef1
JMF
101 self.DL.params['writeautomaticsub'] = True
102 self.DL.params['subtitleslangs'] = ['it']
103 subtitles = self.getSubtitles()
104 self.assertTrue(subtitles['it'] is not None)
105
ae84f879 106 def test_youtube_nosubtitles(self):
a0f59cdc 107 self.DL.expect_warning('video doesn\'t have subtitles')
345e3783 108 self.url = 'n5BB19UTcdA'
ae84f879
JMF
109 self.DL.params['writesubtitles'] = True
110 self.DL.params['allsubtitles'] = True
111 subtitles = self.getSubtitles()
4d171848 112 self.assertFalse(subtitles)
ae84f879 113
ae84f879 114
060ac762 115@is_download_test
ae84f879
JMF
116class TestDailymotionSubtitles(BaseTestSubtitles):
117 url = 'http://www.dailymotion.com/video/xczg00'
118 IE = DailymotionIE
119
ae84f879
JMF
120 def test_allsubtitles(self):
121 self.DL.params['writesubtitles'] = True
122 self.DL.params['allsubtitles'] = True
123 subtitles = self.getSubtitles()
8fb474fb 124 self.assertTrue(len(subtitles.keys()) >= 6)
b7bb76df
JMF
125 self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
126 self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
127 for lang in ['es', 'fr', 'de']:
128 self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
ae84f879 129
ae84f879 130 def test_nosubtitles(self):
a0f59cdc 131 self.DL.expect_warning('video doesn\'t have subtitles')
ae84f879
JMF
132 self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv'
133 self.DL.params['writesubtitles'] = True
134 self.DL.params['allsubtitles'] = True
135 subtitles = self.getSubtitles()
4d171848 136 self.assertFalse(subtitles)
ae84f879 137
ae84f879 138
060ac762 139@is_download_test
ae84f879
JMF
140class TestTedSubtitles(BaseTestSubtitles):
141 url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html'
4259402c 142 IE = TedTalkIE
ae84f879 143
ae84f879
JMF
144 def test_allsubtitles(self):
145 self.DL.params['writesubtitles'] = True
146 self.DL.params['allsubtitles'] = True
147 subtitles = self.getSubtitles()
0321213c 148 self.assertTrue(len(subtitles.keys()) >= 28)
b7bb76df
JMF
149 self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14')
150 self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5')
151 for lang in ['es', 'fr', 'de']:
a0f59cdc 152 self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
ae84f879 153
b4bcffef 154
060ac762 155@is_download_test
4edff4cf
S
156class TestVimeoSubtitles(BaseTestSubtitles):
157 url = 'http://vimeo.com/76979871'
158 IE = VimeoIE
b0268cb6 159
4edff4cf
S
160 def test_allsubtitles(self):
161 self.DL.params['writesubtitles'] = True
162 self.DL.params['allsubtitles'] = True
163 subtitles = self.getSubtitles()
86e5f3ed 164 self.assertEqual(set(subtitles.keys()), {'de', 'en', 'es', 'fr'})
b7bb76df
JMF
165 self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
166 self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8')
4edff4cf 167
4edff4cf 168 def test_nosubtitles(self):
a0f59cdc 169 self.DL.expect_warning('video doesn\'t have subtitles')
4edff4cf
S
170 self.url = 'http://vimeo.com/56015672'
171 self.DL.params['writesubtitles'] = True
172 self.DL.params['allsubtitles'] = True
173 subtitles = self.getSubtitles()
4d171848 174 self.assertFalse(subtitles)
4edff4cf 175
4edff4cf 176
060ac762 177@is_download_test
0ca41c3d 178class TestWallaSubtitles(BaseTestSubtitles):
7bc8780c
S
179 url = 'http://vod.walla.co.il/movie/2705958/the-yes-men'
180 IE = WallaIE
181
7bc8780c 182 def test_allsubtitles(self):
a0f59cdc 183 self.DL.expect_warning('Automatic Captions not supported by this server')
7bc8780c
S
184 self.DL.params['writesubtitles'] = True
185 self.DL.params['allsubtitles'] = True
186 subtitles = self.getSubtitles()
86e5f3ed 187 self.assertEqual(set(subtitles.keys()), {'heb'})
7bc8780c
S
188 self.assertEqual(md5(subtitles['heb']), 'e758c5d7cb982f6bef14f377ec7a3920')
189
190 def test_nosubtitles(self):
a0f59cdc 191 self.DL.expect_warning('video doesn\'t have subtitles')
7bc8780c
S
192 self.url = 'http://vod.walla.co.il/movie/2642630/one-direction-all-for-one'
193 self.DL.params['writesubtitles'] = True
194 self.DL.params['allsubtitles'] = True
195 subtitles = self.getSubtitles()
4d171848 196 self.assertFalse(subtitles)
7bc8780c
S
197
198
060ac762 199@is_download_test
0b54a5b1
S
200class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
201 url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky'
202 IE = CeskaTelevizeIE
203
0b54a5b1
S
204 def test_allsubtitles(self):
205 self.DL.expect_warning('Automatic Captions not supported by this server')
206 self.DL.params['writesubtitles'] = True
207 self.DL.params['allsubtitles'] = True
208 subtitles = self.getSubtitles()
86e5f3ed 209 self.assertEqual(set(subtitles.keys()), {'cs'})
8fb474fb 210 self.assertTrue(len(subtitles['cs']) > 20000)
0b54a5b1
S
211
212 def test_nosubtitles(self):
213 self.DL.expect_warning('video doesn\'t have subtitles')
214 self.url = 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220'
215 self.DL.params['writesubtitles'] = True
216 self.DL.params['allsubtitles'] = True
217 subtitles = self.getSubtitles()
4d171848 218 self.assertFalse(subtitles)
0b54a5b1
S
219
220
060ac762 221@is_download_test
311c3938
JMF
222class TestLyndaSubtitles(BaseTestSubtitles):
223 url = 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html'
224 IE = LyndaIE
225
226 def test_allsubtitles(self):
227 self.DL.params['writesubtitles'] = True
228 self.DL.params['allsubtitles'] = True
229 subtitles = self.getSubtitles()
86e5f3ed 230 self.assertEqual(set(subtitles.keys()), {'en'})
311c3938
JMF
231 self.assertEqual(md5(subtitles['en']), '09bbe67222259bed60deaa26997d73a7')
232
233
060ac762 234@is_download_test
b9b42f2e
JMF
235class TestNPOSubtitles(BaseTestSubtitles):
236 url = 'http://www.npo.nl/nos-journaal/28-08-2014/POW_00722860'
237 IE = NPOIE
238
239 def test_allsubtitles(self):
240 self.DL.params['writesubtitles'] = True
241 self.DL.params['allsubtitles'] = True
242 subtitles = self.getSubtitles()
86e5f3ed 243 self.assertEqual(set(subtitles.keys()), {'nl'})
b9b42f2e
JMF
244 self.assertEqual(md5(subtitles['nl']), 'fc6435027572b63fb4ab143abd5ad3f4')
245
246
060ac762 247@is_download_test
0af25f78 248class TestMTVSubtitles(BaseTestSubtitles):
518d5ba5 249 url = 'http://www.cc.com/video-clips/p63lk0/adam-devine-s-house-party-chasing-white-swans'
0af25f78
JMF
250 IE = ComedyCentralIE
251
252 def getInfoDict(self):
86e5f3ed 253 return super().getInfoDict()['entries'][0]
0af25f78
JMF
254
255 def test_allsubtitles(self):
256 self.DL.params['writesubtitles'] = True
257 self.DL.params['allsubtitles'] = True
258 subtitles = self.getSubtitles()
86e5f3ed 259 self.assertEqual(set(subtitles.keys()), {'en'})
518d5ba5 260 self.assertEqual(md5(subtitles['en']), '78206b8d8a0cfa9da64dc026eea48961')
0af25f78
JMF
261
262
060ac762 263@is_download_test
01561da1
JMF
264class TestNRKSubtitles(BaseTestSubtitles):
265 url = 'http://tv.nrk.no/serie/ikke-gjoer-dette-hjemme/DMPV73000411/sesong-2/episode-1'
266 IE = NRKTVIE
267
268 def test_allsubtitles(self):
269 self.DL.params['writesubtitles'] = True
270 self.DL.params['allsubtitles'] = True
271 subtitles = self.getSubtitles()
86e5f3ed 272 self.assertEqual(set(subtitles.keys()), {'no'})
f0bfaa2d 273 self.assertEqual(md5(subtitles['no']), '544fa917d3197fcbee64634559221cc2')
01561da1
JMF
274
275
060ac762 276@is_download_test
c4d6fc6d 277class TestRaiPlaySubtitles(BaseTestSubtitles):
c4d6fc6d 278 IE = RaiPlayIE
afbdd3ac 279
00dd0cd5 280 def test_subtitles_key(self):
281 self.url = 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
afbdd3ac
JMF
282 self.DL.params['writesubtitles'] = True
283 self.DL.params['allsubtitles'] = True
284 subtitles = self.getSubtitles()
86e5f3ed 285 self.assertEqual(set(subtitles.keys()), {'it'})
afbdd3ac
JMF
286 self.assertEqual(md5(subtitles['it']), 'b1d90a98755126b61e667567a1f6680a')
287
00dd0cd5 288 def test_subtitles_array_key(self):
289 self.url = 'https://www.raiplay.it/video/2020/12/Report---04-01-2021-2e90f1de-8eee-4de4-ac0e-78d21db5b600.html'
290 self.DL.params['writesubtitles'] = True
291 self.DL.params['allsubtitles'] = True
292 subtitles = self.getSubtitles()
86e5f3ed 293 self.assertEqual(set(subtitles.keys()), {'it'})
00dd0cd5 294 self.assertEqual(md5(subtitles['it']), '4b3264186fbb103508abe5311cfcb9cd')
295
afbdd3ac 296
060ac762 297@is_download_test
4f7cea6c
JMF
298class TestVikiSubtitles(BaseTestSubtitles):
299 url = 'http://www.viki.com/videos/1060846v-punch-episode-18'
300 IE = VikiIE
301
302 def test_allsubtitles(self):
303 self.DL.params['writesubtitles'] = True
304 self.DL.params['allsubtitles'] = True
305 subtitles = self.getSubtitles()
86e5f3ed 306 self.assertEqual(set(subtitles.keys()), {'en'})
80970e53 307 self.assertEqual(md5(subtitles['en']), '53cb083a5914b2d84ef1ab67b880d18a')
4f7cea6c
JMF
308
309
060ac762 310@is_download_test
8807f127
JMF
311class TestThePlatformSubtitles(BaseTestSubtitles):
312 # from http://www.3playmedia.com/services-features/tools/integrations/theplatform/
313 # (see http://theplatform.com/about/partners/type/subtitles-closed-captioning/)
314 url = 'theplatform:JFUjUE1_ehvq'
315 IE = ThePlatformIE
316
317 def test_allsubtitles(self):
318 self.DL.params['writesubtitles'] = True
319 self.DL.params['allsubtitles'] = True
320 subtitles = self.getSubtitles()
86e5f3ed 321 self.assertEqual(set(subtitles.keys()), {'en'})
8807f127
JMF
322 self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b')
323
324
060ac762 325@is_download_test
f908b74f
YCH
326class TestThePlatformFeedSubtitles(BaseTestSubtitles):
327 url = 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207'
328 IE = ThePlatformFeedIE
329
330 def test_allsubtitles(self):
331 self.DL.params['writesubtitles'] = True
332 self.DL.params['allsubtitles'] = True
333 subtitles = self.getSubtitles()
86e5f3ed 334 self.assertEqual(set(subtitles.keys()), {'en'})
f908b74f
YCH
335 self.assertEqual(md5(subtitles['en']), '48649a22e82b2da21c9a67a395eedade')
336
337
060ac762 338@is_download_test
25ac63ed
JMF
339class TestRtveSubtitles(BaseTestSubtitles):
340 url = 'http://www.rtve.es/alacarta/videos/los-misterios-de-laura/misterios-laura-capitulo-32-misterio-del-numero-17-2-parte/2428621/'
341 IE = RTVEALaCartaIE
342
343 def test_allsubtitles(self):
344 print('Skipping, only available from Spain')
345 return
346 self.DL.params['writesubtitles'] = True
347 self.DL.params['allsubtitles'] = True
348 subtitles = self.getSubtitles()
86e5f3ed 349 self.assertEqual(set(subtitles.keys()), {'es'})
25ac63ed
JMF
350 self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca')
351
352
060ac762 353@is_download_test
66d041f2
YCH
354class TestDemocracynowSubtitles(BaseTestSubtitles):
355 url = 'http://www.democracynow.org/shows/2015/7/3'
356 IE = DemocracynowIE
357
358 def test_allsubtitles(self):
359 self.DL.params['writesubtitles'] = True
360 self.DL.params['allsubtitles'] = True
361 subtitles = self.getSubtitles()
86e5f3ed 362 self.assertEqual(set(subtitles.keys()), {'en'})
66d041f2
YCH
363 self.assertEqual(md5(subtitles['en']), 'acaca989e24a9e45a6719c9b3d60815c')
364
365 def test_subtitles_in_page(self):
366 self.url = 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree'
367 self.DL.params['writesubtitles'] = True
368 self.DL.params['allsubtitles'] = True
369 subtitles = self.getSubtitles()
86e5f3ed 370 self.assertEqual(set(subtitles.keys()), {'en'})
66d041f2
YCH
371 self.assertEqual(md5(subtitles['en']), 'acaca989e24a9e45a6719c9b3d60815c')
372
373
a7e999be 374@is_download_test
375class TestPBSSubtitles(BaseTestSubtitles):
376 url = 'https://www.pbs.org/video/how-fantasy-reflects-our-world-picecq/'
377 IE = PBSIE
378
379 def test_allsubtitles(self):
380 self.DL.params['writesubtitles'] = True
381 self.DL.params['allsubtitles'] = True
382 subtitles = self.getSubtitles()
86e5f3ed 383 self.assertEqual(set(subtitles.keys()), {'en'})
a7e999be 384
385 def test_subtitles_dfxp_format(self):
386 self.DL.params['writesubtitles'] = True
387 self.DL.params['subtitlesformat'] = 'dfxp'
388 subtitles = self.getSubtitles()
389 self.assertIn(md5(subtitles['en']), ['643b034254cdc3768ff1e750b6b5873b'])
390
391 def test_subtitles_vtt_format(self):
392 self.DL.params['writesubtitles'] = True
393 self.DL.params['subtitlesformat'] = 'vtt'
394 subtitles = self.getSubtitles()
395 self.assertIn(
396 md5(subtitles['en']), ['937a05711555b165d4c55a9667017045', 'f49ea998d6824d94959c8152a368ff73'])
397
398 def test_subtitles_srt_format(self):
399 self.DL.params['writesubtitles'] = True
400 self.DL.params['subtitlesformat'] = 'srt'
401 subtitles = self.getSubtitles()
402 self.assertIn(md5(subtitles['en']), ['2082c21b43759d9bf172931b2f2ca371'])
403
404 def test_subtitles_sami_format(self):
405 self.DL.params['writesubtitles'] = True
406 self.DL.params['subtitlesformat'] = 'sami'
407 subtitles = self.getSubtitles()
408 self.assertIn(md5(subtitles['en']), ['4256b16ac7da6a6780fafd04294e85cd'])
409
410
ae84f879
JMF
411if __name__ == '__main__':
412 unittest.main()