]> jfr.im git - yt-dlp.git/blame - test/test_subtitles.py
[test] Use `pytest` instead of `nosetests` (#482)
[yt-dlp.git] / test / test_subtitles.py
CommitLineData
cc52de43 1#!/usr/bin/env python3
a0f59cdc 2from __future__ import unicode_literals
ae84f879
JMF
3
4# Allow direct execution
5import os
6import sys
7import unittest
8sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
9
060ac762 10from test.helper import FakeYDL, md5, is_download_test
ae84f879
JMF
11
12
7a5c1cfe 13from yt_dlp.extractor import (
ae84f879
JMF
14 YoutubeIE,
15 DailymotionIE,
16 TEDIE,
4edff4cf 17 VimeoIE,
7bc8780c 18 WallaIE,
0b54a5b1 19 CeskaTelevizeIE,
311c3938 20 LyndaIE,
b9b42f2e 21 NPOIE,
0af25f78 22 ComedyCentralIE,
01561da1 23 NRKTVIE,
c4d6fc6d 24 RaiPlayIE,
4f7cea6c 25 VikiIE,
8807f127 26 ThePlatformIE,
f908b74f 27 ThePlatformFeedIE,
25ac63ed 28 RTVEALaCartaIE,
66d041f2 29 DemocracynowIE,
ae84f879
JMF
30)
31
32
060ac762 33@is_download_test
ae84f879
JMF
34class BaseTestSubtitles(unittest.TestCase):
35 url = None
36 IE = None
5f6a1245 37
ae84f879
JMF
38 def setUp(self):
39 self.DL = FakeYDL()
a504ced0
JMF
40 self.ie = self.IE()
41 self.DL.add_info_extractor(self.ie)
ae84f879
JMF
42
43 def getInfoDict(self):
a504ced0 44 info_dict = self.DL.extract_info(self.url, download=False)
ae84f879
JMF
45 return info_dict
46
47 def getSubtitles(self):
48 info_dict = self.getInfoDict()
c84dd8a9 49 subtitles = info_dict['requested_subtitles']
a504ced0
JMF
50 if not subtitles:
51 return subtitles
52 for sub_info in subtitles.values():
53 if sub_info.get('data') is None:
54 uf = self.DL.urlopen(sub_info['url'])
55 sub_info['data'] = uf.read().decode('utf-8')
56 return dict((l, sub_info['data']) for l, sub_info in subtitles.items())
ae84f879
JMF
57
58
060ac762 59@is_download_test
ae84f879
JMF
60class TestYoutubeSubtitles(BaseTestSubtitles):
61 url = 'QRS8MkLhQmM'
62 IE = YoutubeIE
63
ae84f879
JMF
64 def test_youtube_allsubtitles(self):
65 self.DL.params['writesubtitles'] = True
66 self.DL.params['allsubtitles'] = True
67 subtitles = self.getSubtitles()
68 self.assertEqual(len(subtitles.keys()), 13)
8848f808 69 self.assertEqual(md5(subtitles['en']), '688dd1ce0981683867e7fe6fde2a224b')
70 self.assertEqual(md5(subtitles['it']), '31324d30b8430b309f7f5979a504a769')
920d21b9 71 for lang in ['fr', 'de']:
b7bb76df 72 self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
ae84f879 73
920d21b9 74 def test_youtube_subtitles_ttml_format(self):
ae84f879 75 self.DL.params['writesubtitles'] = True
920d21b9 76 self.DL.params['subtitlesformat'] = 'ttml'
ae84f879 77 subtitles = self.getSubtitles()
8848f808 78 self.assertEqual(md5(subtitles['en']), 'c97ddf1217390906fa9fbd34901f3da2')
ae84f879
JMF
79
80 def test_youtube_subtitles_vtt_format(self):
81 self.DL.params['writesubtitles'] = True
82 self.DL.params['subtitlesformat'] = 'vtt'
83 subtitles = self.getSubtitles()
8848f808 84 self.assertEqual(md5(subtitles['en']), 'ae1bd34126571a77aabd4d276b28044d')
ae84f879 85
ae84f879
JMF
86 def test_youtube_automatic_captions(self):
87 self.url = '8YoUxe5ncPo'
88 self.DL.params['writeautomaticsub'] = True
89 self.DL.params['subtitleslangs'] = ['it']
90 subtitles = self.getSubtitles()
91 self.assertTrue(subtitles['it'] is not None)
92
8848f808 93 def test_youtube_no_automatic_captions(self):
94 self.url = 'QRS8MkLhQmM'
95 self.DL.params['writeautomaticsub'] = True
96 subtitles = self.getSubtitles()
97 self.assertTrue(not subtitles)
98
7d900ef1
JMF
99 def test_youtube_translated_subtitles(self):
100 # This video has a subtitles track, which can be translated
8848f808 101 self.url = 'i0ZabxXmH4Y'
7d900ef1
JMF
102 self.DL.params['writeautomaticsub'] = True
103 self.DL.params['subtitleslangs'] = ['it']
104 subtitles = self.getSubtitles()
105 self.assertTrue(subtitles['it'] is not None)
106
ae84f879 107 def test_youtube_nosubtitles(self):
a0f59cdc 108 self.DL.expect_warning('video doesn\'t have subtitles')
345e3783 109 self.url = 'n5BB19UTcdA'
ae84f879
JMF
110 self.DL.params['writesubtitles'] = True
111 self.DL.params['allsubtitles'] = True
112 subtitles = self.getSubtitles()
4d171848 113 self.assertFalse(subtitles)
ae84f879 114
ae84f879 115
060ac762 116@is_download_test
ae84f879
JMF
117class TestDailymotionSubtitles(BaseTestSubtitles):
118 url = 'http://www.dailymotion.com/video/xczg00'
119 IE = DailymotionIE
120
ae84f879
JMF
121 def test_allsubtitles(self):
122 self.DL.params['writesubtitles'] = True
123 self.DL.params['allsubtitles'] = True
124 subtitles = self.getSubtitles()
8fb474fb 125 self.assertTrue(len(subtitles.keys()) >= 6)
b7bb76df
JMF
126 self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
127 self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
128 for lang in ['es', 'fr', 'de']:
129 self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
ae84f879 130
ae84f879 131 def test_nosubtitles(self):
a0f59cdc 132 self.DL.expect_warning('video doesn\'t have subtitles')
ae84f879
JMF
133 self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv'
134 self.DL.params['writesubtitles'] = True
135 self.DL.params['allsubtitles'] = True
136 subtitles = self.getSubtitles()
4d171848 137 self.assertFalse(subtitles)
ae84f879 138
ae84f879 139
060ac762 140@is_download_test
ae84f879
JMF
141class TestTedSubtitles(BaseTestSubtitles):
142 url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html'
143 IE = TEDIE
144
ae84f879
JMF
145 def test_allsubtitles(self):
146 self.DL.params['writesubtitles'] = True
147 self.DL.params['allsubtitles'] = True
148 subtitles = self.getSubtitles()
0321213c 149 self.assertTrue(len(subtitles.keys()) >= 28)
b7bb76df
JMF
150 self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14')
151 self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5')
152 for lang in ['es', 'fr', 'de']:
a0f59cdc 153 self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
ae84f879 154
b4bcffef 155
060ac762 156@is_download_test
4edff4cf
S
157class TestVimeoSubtitles(BaseTestSubtitles):
158 url = 'http://vimeo.com/76979871'
159 IE = VimeoIE
b0268cb6 160
4edff4cf
S
161 def test_allsubtitles(self):
162 self.DL.params['writesubtitles'] = True
163 self.DL.params['allsubtitles'] = True
164 subtitles = self.getSubtitles()
165 self.assertEqual(set(subtitles.keys()), set(['de', 'en', 'es', 'fr']))
b7bb76df
JMF
166 self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
167 self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8')
4edff4cf 168
4edff4cf 169 def test_nosubtitles(self):
a0f59cdc 170 self.DL.expect_warning('video doesn\'t have subtitles')
4edff4cf
S
171 self.url = 'http://vimeo.com/56015672'
172 self.DL.params['writesubtitles'] = True
173 self.DL.params['allsubtitles'] = True
174 subtitles = self.getSubtitles()
4d171848 175 self.assertFalse(subtitles)
4edff4cf 176
4edff4cf 177
060ac762 178@is_download_test
0ca41c3d 179class TestWallaSubtitles(BaseTestSubtitles):
7bc8780c
S
180 url = 'http://vod.walla.co.il/movie/2705958/the-yes-men'
181 IE = WallaIE
182
7bc8780c 183 def test_allsubtitles(self):
a0f59cdc 184 self.DL.expect_warning('Automatic Captions not supported by this server')
7bc8780c
S
185 self.DL.params['writesubtitles'] = True
186 self.DL.params['allsubtitles'] = True
187 subtitles = self.getSubtitles()
188 self.assertEqual(set(subtitles.keys()), set(['heb']))
189 self.assertEqual(md5(subtitles['heb']), 'e758c5d7cb982f6bef14f377ec7a3920')
190
191 def test_nosubtitles(self):
a0f59cdc 192 self.DL.expect_warning('video doesn\'t have subtitles')
7bc8780c
S
193 self.url = 'http://vod.walla.co.il/movie/2642630/one-direction-all-for-one'
194 self.DL.params['writesubtitles'] = True
195 self.DL.params['allsubtitles'] = True
196 subtitles = self.getSubtitles()
4d171848 197 self.assertFalse(subtitles)
7bc8780c
S
198
199
060ac762 200@is_download_test
0b54a5b1
S
201class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
202 url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky'
203 IE = CeskaTelevizeIE
204
0b54a5b1
S
205 def test_allsubtitles(self):
206 self.DL.expect_warning('Automatic Captions not supported by this server')
207 self.DL.params['writesubtitles'] = True
208 self.DL.params['allsubtitles'] = True
209 subtitles = self.getSubtitles()
210 self.assertEqual(set(subtitles.keys()), set(['cs']))
8fb474fb 211 self.assertTrue(len(subtitles['cs']) > 20000)
0b54a5b1
S
212
213 def test_nosubtitles(self):
214 self.DL.expect_warning('video doesn\'t have subtitles')
215 self.url = 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220'
216 self.DL.params['writesubtitles'] = True
217 self.DL.params['allsubtitles'] = True
218 subtitles = self.getSubtitles()
4d171848 219 self.assertFalse(subtitles)
0b54a5b1
S
220
221
060ac762 222@is_download_test
311c3938
JMF
223class TestLyndaSubtitles(BaseTestSubtitles):
224 url = 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html'
225 IE = LyndaIE
226
227 def test_allsubtitles(self):
228 self.DL.params['writesubtitles'] = True
229 self.DL.params['allsubtitles'] = True
230 subtitles = self.getSubtitles()
231 self.assertEqual(set(subtitles.keys()), set(['en']))
232 self.assertEqual(md5(subtitles['en']), '09bbe67222259bed60deaa26997d73a7')
233
234
060ac762 235@is_download_test
b9b42f2e
JMF
236class TestNPOSubtitles(BaseTestSubtitles):
237 url = 'http://www.npo.nl/nos-journaal/28-08-2014/POW_00722860'
238 IE = NPOIE
239
240 def test_allsubtitles(self):
241 self.DL.params['writesubtitles'] = True
242 self.DL.params['allsubtitles'] = True
243 subtitles = self.getSubtitles()
244 self.assertEqual(set(subtitles.keys()), set(['nl']))
245 self.assertEqual(md5(subtitles['nl']), 'fc6435027572b63fb4ab143abd5ad3f4')
246
247
060ac762 248@is_download_test
0af25f78 249class TestMTVSubtitles(BaseTestSubtitles):
518d5ba5 250 url = 'http://www.cc.com/video-clips/p63lk0/adam-devine-s-house-party-chasing-white-swans'
0af25f78
JMF
251 IE = ComedyCentralIE
252
253 def getInfoDict(self):
254 return super(TestMTVSubtitles, self).getInfoDict()['entries'][0]
255
256 def test_allsubtitles(self):
257 self.DL.params['writesubtitles'] = True
258 self.DL.params['allsubtitles'] = True
259 subtitles = self.getSubtitles()
260 self.assertEqual(set(subtitles.keys()), set(['en']))
518d5ba5 261 self.assertEqual(md5(subtitles['en']), '78206b8d8a0cfa9da64dc026eea48961')
0af25f78
JMF
262
263
060ac762 264@is_download_test
01561da1
JMF
265class TestNRKSubtitles(BaseTestSubtitles):
266 url = 'http://tv.nrk.no/serie/ikke-gjoer-dette-hjemme/DMPV73000411/sesong-2/episode-1'
267 IE = NRKTVIE
268
269 def test_allsubtitles(self):
270 self.DL.params['writesubtitles'] = True
271 self.DL.params['allsubtitles'] = True
272 subtitles = self.getSubtitles()
273 self.assertEqual(set(subtitles.keys()), set(['no']))
f0bfaa2d 274 self.assertEqual(md5(subtitles['no']), '544fa917d3197fcbee64634559221cc2')
01561da1
JMF
275
276
060ac762 277@is_download_test
c4d6fc6d 278class TestRaiPlaySubtitles(BaseTestSubtitles):
c4d6fc6d 279 IE = RaiPlayIE
afbdd3ac 280
00dd0cd5 281 def test_subtitles_key(self):
282 self.url = 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
afbdd3ac
JMF
283 self.DL.params['writesubtitles'] = True
284 self.DL.params['allsubtitles'] = True
285 subtitles = self.getSubtitles()
286 self.assertEqual(set(subtitles.keys()), set(['it']))
287 self.assertEqual(md5(subtitles['it']), 'b1d90a98755126b61e667567a1f6680a')
288
00dd0cd5 289 def test_subtitles_array_key(self):
290 self.url = 'https://www.raiplay.it/video/2020/12/Report---04-01-2021-2e90f1de-8eee-4de4-ac0e-78d21db5b600.html'
291 self.DL.params['writesubtitles'] = True
292 self.DL.params['allsubtitles'] = True
293 subtitles = self.getSubtitles()
294 self.assertEqual(set(subtitles.keys()), set(['it']))
295 self.assertEqual(md5(subtitles['it']), '4b3264186fbb103508abe5311cfcb9cd')
296
afbdd3ac 297
060ac762 298@is_download_test
4f7cea6c
JMF
299class TestVikiSubtitles(BaseTestSubtitles):
300 url = 'http://www.viki.com/videos/1060846v-punch-episode-18'
301 IE = VikiIE
302
303 def test_allsubtitles(self):
304 self.DL.params['writesubtitles'] = True
305 self.DL.params['allsubtitles'] = True
306 subtitles = self.getSubtitles()
307 self.assertEqual(set(subtitles.keys()), set(['en']))
80970e53 308 self.assertEqual(md5(subtitles['en']), '53cb083a5914b2d84ef1ab67b880d18a')
4f7cea6c
JMF
309
310
060ac762 311@is_download_test
8807f127
JMF
312class TestThePlatformSubtitles(BaseTestSubtitles):
313 # from http://www.3playmedia.com/services-features/tools/integrations/theplatform/
314 # (see http://theplatform.com/about/partners/type/subtitles-closed-captioning/)
315 url = 'theplatform:JFUjUE1_ehvq'
316 IE = ThePlatformIE
317
318 def test_allsubtitles(self):
319 self.DL.params['writesubtitles'] = True
320 self.DL.params['allsubtitles'] = True
321 subtitles = self.getSubtitles()
322 self.assertEqual(set(subtitles.keys()), set(['en']))
323 self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b')
324
325
060ac762 326@is_download_test
f908b74f
YCH
327class TestThePlatformFeedSubtitles(BaseTestSubtitles):
328 url = 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207'
329 IE = ThePlatformFeedIE
330
331 def test_allsubtitles(self):
332 self.DL.params['writesubtitles'] = True
333 self.DL.params['allsubtitles'] = True
334 subtitles = self.getSubtitles()
335 self.assertEqual(set(subtitles.keys()), set(['en']))
336 self.assertEqual(md5(subtitles['en']), '48649a22e82b2da21c9a67a395eedade')
337
338
060ac762 339@is_download_test
25ac63ed
JMF
340class TestRtveSubtitles(BaseTestSubtitles):
341 url = 'http://www.rtve.es/alacarta/videos/los-misterios-de-laura/misterios-laura-capitulo-32-misterio-del-numero-17-2-parte/2428621/'
342 IE = RTVEALaCartaIE
343
344 def test_allsubtitles(self):
345 print('Skipping, only available from Spain')
346 return
347 self.DL.params['writesubtitles'] = True
348 self.DL.params['allsubtitles'] = True
349 subtitles = self.getSubtitles()
350 self.assertEqual(set(subtitles.keys()), set(['es']))
351 self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca')
352
353
060ac762 354@is_download_test
66d041f2
YCH
355class TestDemocracynowSubtitles(BaseTestSubtitles):
356 url = 'http://www.democracynow.org/shows/2015/7/3'
357 IE = DemocracynowIE
358
359 def test_allsubtitles(self):
360 self.DL.params['writesubtitles'] = True
361 self.DL.params['allsubtitles'] = True
362 subtitles = self.getSubtitles()
363 self.assertEqual(set(subtitles.keys()), set(['en']))
364 self.assertEqual(md5(subtitles['en']), 'acaca989e24a9e45a6719c9b3d60815c')
365
366 def test_subtitles_in_page(self):
367 self.url = 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree'
368 self.DL.params['writesubtitles'] = True
369 self.DL.params['allsubtitles'] = True
370 subtitles = self.getSubtitles()
371 self.assertEqual(set(subtitles.keys()), set(['en']))
372 self.assertEqual(md5(subtitles['en']), 'acaca989e24a9e45a6719c9b3d60815c')
373
374
ae84f879
JMF
375if __name__ == '__main__':
376 unittest.main()