]> jfr.im git - yt-dlp.git/blame - test/test_subtitles.py
[instagram:user] Fix extraction (closes #16119)
[yt-dlp.git] / test / test_subtitles.py
CommitLineData
ae84f879 1#!/usr/bin/env python
a0f59cdc 2from __future__ import unicode_literals
ae84f879
JMF
3
4# Allow direct execution
5import os
6import sys
7import unittest
8sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
9
d0efb9ec 10from test.helper import FakeYDL, md5
ae84f879
JMF
11
12
13from youtube_dl.extractor import (
14 YoutubeIE,
15 DailymotionIE,
16 TEDIE,
4edff4cf 17 VimeoIE,
7bc8780c 18 WallaIE,
0b54a5b1 19 CeskaTelevizeIE,
311c3938 20 LyndaIE,
b9b42f2e 21 NPOIE,
0af25f78 22 ComedyCentralIE,
01561da1 23 NRKTVIE,
c4d6fc6d 24 RaiPlayIE,
4f7cea6c 25 VikiIE,
8807f127 26 ThePlatformIE,
f908b74f 27 ThePlatformFeedIE,
25ac63ed 28 RTVEALaCartaIE,
7e195d0e 29 FunnyOrDieIE,
66d041f2 30 DemocracynowIE,
ae84f879
JMF
31)
32
33
34class BaseTestSubtitles(unittest.TestCase):
35 url = None
36 IE = None
5f6a1245 37
ae84f879
JMF
38 def setUp(self):
39 self.DL = FakeYDL()
a504ced0
JMF
40 self.ie = self.IE()
41 self.DL.add_info_extractor(self.ie)
ae84f879
JMF
42
43 def getInfoDict(self):
a504ced0 44 info_dict = self.DL.extract_info(self.url, download=False)
ae84f879
JMF
45 return info_dict
46
47 def getSubtitles(self):
48 info_dict = self.getInfoDict()
c84dd8a9 49 subtitles = info_dict['requested_subtitles']
a504ced0
JMF
50 if not subtitles:
51 return subtitles
52 for sub_info in subtitles.values():
53 if sub_info.get('data') is None:
54 uf = self.DL.urlopen(sub_info['url'])
55 sub_info['data'] = uf.read().decode('utf-8')
56 return dict((l, sub_info['data']) for l, sub_info in subtitles.items())
ae84f879
JMF
57
58
59class TestYoutubeSubtitles(BaseTestSubtitles):
60 url = 'QRS8MkLhQmM'
61 IE = YoutubeIE
62
ae84f879
JMF
63 def test_youtube_allsubtitles(self):
64 self.DL.params['writesubtitles'] = True
65 self.DL.params['allsubtitles'] = True
66 subtitles = self.getSubtitles()
67 self.assertEqual(len(subtitles.keys()), 13)
920d21b9 68 self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
69 self.assertEqual(md5(subtitles['it']), '6d752b98c31f1cf8d597050c7a2cb4b5')
70 for lang in ['fr', 'de']:
b7bb76df 71 self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
ae84f879 72
920d21b9 73 def test_youtube_subtitles_ttml_format(self):
ae84f879 74 self.DL.params['writesubtitles'] = True
920d21b9 75 self.DL.params['subtitlesformat'] = 'ttml'
ae84f879 76 subtitles = self.getSubtitles()
920d21b9 77 self.assertEqual(md5(subtitles['en']), 'e306f8c42842f723447d9f63ad65df54')
ae84f879
JMF
78
79 def test_youtube_subtitles_vtt_format(self):
80 self.DL.params['writesubtitles'] = True
81 self.DL.params['subtitlesformat'] = 'vtt'
82 subtitles = self.getSubtitles()
4e0084d9 83 self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
ae84f879 84
ae84f879
JMF
85 def test_youtube_automatic_captions(self):
86 self.url = '8YoUxe5ncPo'
87 self.DL.params['writeautomaticsub'] = True
88 self.DL.params['subtitleslangs'] = ['it']
89 subtitles = self.getSubtitles()
90 self.assertTrue(subtitles['it'] is not None)
91
7d900ef1
JMF
92 def test_youtube_translated_subtitles(self):
93 # This video has a subtitles track, which can be translated
94 self.url = 'Ky9eprVWzlI'
95 self.DL.params['writeautomaticsub'] = True
96 self.DL.params['subtitleslangs'] = ['it']
97 subtitles = self.getSubtitles()
98 self.assertTrue(subtitles['it'] is not None)
99
ae84f879 100 def test_youtube_nosubtitles(self):
a0f59cdc 101 self.DL.expect_warning('video doesn\'t have subtitles')
345e3783 102 self.url = 'n5BB19UTcdA'
ae84f879
JMF
103 self.DL.params['writesubtitles'] = True
104 self.DL.params['allsubtitles'] = True
105 subtitles = self.getSubtitles()
4d171848 106 self.assertFalse(subtitles)
ae84f879 107
ae84f879
JMF
108
109class TestDailymotionSubtitles(BaseTestSubtitles):
110 url = 'http://www.dailymotion.com/video/xczg00'
111 IE = DailymotionIE
112
ae84f879
JMF
113 def test_allsubtitles(self):
114 self.DL.params['writesubtitles'] = True
115 self.DL.params['allsubtitles'] = True
116 subtitles = self.getSubtitles()
8fb474fb 117 self.assertTrue(len(subtitles.keys()) >= 6)
b7bb76df
JMF
118 self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
119 self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
120 for lang in ['es', 'fr', 'de']:
121 self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
ae84f879 122
ae84f879 123 def test_nosubtitles(self):
a0f59cdc 124 self.DL.expect_warning('video doesn\'t have subtitles')
ae84f879
JMF
125 self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv'
126 self.DL.params['writesubtitles'] = True
127 self.DL.params['allsubtitles'] = True
128 subtitles = self.getSubtitles()
4d171848 129 self.assertFalse(subtitles)
ae84f879 130
ae84f879
JMF
131
132class TestTedSubtitles(BaseTestSubtitles):
133 url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html'
134 IE = TEDIE
135
ae84f879
JMF
136 def test_allsubtitles(self):
137 self.DL.params['writesubtitles'] = True
138 self.DL.params['allsubtitles'] = True
139 subtitles = self.getSubtitles()
0321213c 140 self.assertTrue(len(subtitles.keys()) >= 28)
b7bb76df
JMF
141 self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14')
142 self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5')
143 for lang in ['es', 'fr', 'de']:
a0f59cdc 144 self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
ae84f879 145
b4bcffef 146
4edff4cf
S
147class TestVimeoSubtitles(BaseTestSubtitles):
148 url = 'http://vimeo.com/76979871'
149 IE = VimeoIE
b0268cb6 150
4edff4cf
S
151 def test_allsubtitles(self):
152 self.DL.params['writesubtitles'] = True
153 self.DL.params['allsubtitles'] = True
154 subtitles = self.getSubtitles()
155 self.assertEqual(set(subtitles.keys()), set(['de', 'en', 'es', 'fr']))
b7bb76df
JMF
156 self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
157 self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8')
4edff4cf 158
4edff4cf 159 def test_nosubtitles(self):
a0f59cdc 160 self.DL.expect_warning('video doesn\'t have subtitles')
4edff4cf
S
161 self.url = 'http://vimeo.com/56015672'
162 self.DL.params['writesubtitles'] = True
163 self.DL.params['allsubtitles'] = True
164 subtitles = self.getSubtitles()
4d171848 165 self.assertFalse(subtitles)
4edff4cf 166
4edff4cf 167
0ca41c3d 168class TestWallaSubtitles(BaseTestSubtitles):
7bc8780c
S
169 url = 'http://vod.walla.co.il/movie/2705958/the-yes-men'
170 IE = WallaIE
171
7bc8780c 172 def test_allsubtitles(self):
a0f59cdc 173 self.DL.expect_warning('Automatic Captions not supported by this server')
7bc8780c
S
174 self.DL.params['writesubtitles'] = True
175 self.DL.params['allsubtitles'] = True
176 subtitles = self.getSubtitles()
177 self.assertEqual(set(subtitles.keys()), set(['heb']))
178 self.assertEqual(md5(subtitles['heb']), 'e758c5d7cb982f6bef14f377ec7a3920')
179
180 def test_nosubtitles(self):
a0f59cdc 181 self.DL.expect_warning('video doesn\'t have subtitles')
7bc8780c
S
182 self.url = 'http://vod.walla.co.il/movie/2642630/one-direction-all-for-one'
183 self.DL.params['writesubtitles'] = True
184 self.DL.params['allsubtitles'] = True
185 subtitles = self.getSubtitles()
4d171848 186 self.assertFalse(subtitles)
7bc8780c
S
187
188
0b54a5b1
S
189class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
190 url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky'
191 IE = CeskaTelevizeIE
192
0b54a5b1
S
193 def test_allsubtitles(self):
194 self.DL.expect_warning('Automatic Captions not supported by this server')
195 self.DL.params['writesubtitles'] = True
196 self.DL.params['allsubtitles'] = True
197 subtitles = self.getSubtitles()
198 self.assertEqual(set(subtitles.keys()), set(['cs']))
8fb474fb 199 self.assertTrue(len(subtitles['cs']) > 20000)
0b54a5b1
S
200
201 def test_nosubtitles(self):
202 self.DL.expect_warning('video doesn\'t have subtitles')
203 self.url = 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220'
204 self.DL.params['writesubtitles'] = True
205 self.DL.params['allsubtitles'] = True
206 subtitles = self.getSubtitles()
4d171848 207 self.assertFalse(subtitles)
0b54a5b1
S
208
209
311c3938
JMF
210class TestLyndaSubtitles(BaseTestSubtitles):
211 url = 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html'
212 IE = LyndaIE
213
214 def test_allsubtitles(self):
215 self.DL.params['writesubtitles'] = True
216 self.DL.params['allsubtitles'] = True
217 subtitles = self.getSubtitles()
218 self.assertEqual(set(subtitles.keys()), set(['en']))
219 self.assertEqual(md5(subtitles['en']), '09bbe67222259bed60deaa26997d73a7')
220
221
b9b42f2e
JMF
222class TestNPOSubtitles(BaseTestSubtitles):
223 url = 'http://www.npo.nl/nos-journaal/28-08-2014/POW_00722860'
224 IE = NPOIE
225
226 def test_allsubtitles(self):
227 self.DL.params['writesubtitles'] = True
228 self.DL.params['allsubtitles'] = True
229 subtitles = self.getSubtitles()
230 self.assertEqual(set(subtitles.keys()), set(['nl']))
231 self.assertEqual(md5(subtitles['nl']), 'fc6435027572b63fb4ab143abd5ad3f4')
232
233
0af25f78
JMF
234class TestMTVSubtitles(BaseTestSubtitles):
235 url = 'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother'
236 IE = ComedyCentralIE
237
238 def getInfoDict(self):
239 return super(TestMTVSubtitles, self).getInfoDict()['entries'][0]
240
241 def test_allsubtitles(self):
242 self.DL.params['writesubtitles'] = True
243 self.DL.params['allsubtitles'] = True
244 subtitles = self.getSubtitles()
245 self.assertEqual(set(subtitles.keys()), set(['en']))
246 self.assertEqual(md5(subtitles['en']), 'b9f6ca22a6acf597ec76f61749765e65')
247
248
01561da1
JMF
249class TestNRKSubtitles(BaseTestSubtitles):
250 url = 'http://tv.nrk.no/serie/ikke-gjoer-dette-hjemme/DMPV73000411/sesong-2/episode-1'
251 IE = NRKTVIE
252
253 def test_allsubtitles(self):
254 self.DL.params['writesubtitles'] = True
255 self.DL.params['allsubtitles'] = True
256 subtitles = self.getSubtitles()
257 self.assertEqual(set(subtitles.keys()), set(['no']))
f0bfaa2d 258 self.assertEqual(md5(subtitles['no']), '544fa917d3197fcbee64634559221cc2')
01561da1
JMF
259
260
c4d6fc6d
S
261class TestRaiPlaySubtitles(BaseTestSubtitles):
262 url = 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
263 IE = RaiPlayIE
afbdd3ac
JMF
264
265 def test_allsubtitles(self):
266 self.DL.params['writesubtitles'] = True
267 self.DL.params['allsubtitles'] = True
268 subtitles = self.getSubtitles()
269 self.assertEqual(set(subtitles.keys()), set(['it']))
270 self.assertEqual(md5(subtitles['it']), 'b1d90a98755126b61e667567a1f6680a')
271
272
4f7cea6c
JMF
273class TestVikiSubtitles(BaseTestSubtitles):
274 url = 'http://www.viki.com/videos/1060846v-punch-episode-18'
275 IE = VikiIE
276
277 def test_allsubtitles(self):
278 self.DL.params['writesubtitles'] = True
279 self.DL.params['allsubtitles'] = True
280 subtitles = self.getSubtitles()
281 self.assertEqual(set(subtitles.keys()), set(['en']))
80970e53 282 self.assertEqual(md5(subtitles['en']), '53cb083a5914b2d84ef1ab67b880d18a')
4f7cea6c
JMF
283
284
8807f127
JMF
285class TestThePlatformSubtitles(BaseTestSubtitles):
286 # from http://www.3playmedia.com/services-features/tools/integrations/theplatform/
287 # (see http://theplatform.com/about/partners/type/subtitles-closed-captioning/)
288 url = 'theplatform:JFUjUE1_ehvq'
289 IE = ThePlatformIE
290
291 def test_allsubtitles(self):
292 self.DL.params['writesubtitles'] = True
293 self.DL.params['allsubtitles'] = True
294 subtitles = self.getSubtitles()
295 self.assertEqual(set(subtitles.keys()), set(['en']))
296 self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b')
297
298
f908b74f
YCH
299class TestThePlatformFeedSubtitles(BaseTestSubtitles):
300 url = 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207'
301 IE = ThePlatformFeedIE
302
303 def test_allsubtitles(self):
304 self.DL.params['writesubtitles'] = True
305 self.DL.params['allsubtitles'] = True
306 subtitles = self.getSubtitles()
307 self.assertEqual(set(subtitles.keys()), set(['en']))
308 self.assertEqual(md5(subtitles['en']), '48649a22e82b2da21c9a67a395eedade')
309
310
25ac63ed
JMF
311class TestRtveSubtitles(BaseTestSubtitles):
312 url = 'http://www.rtve.es/alacarta/videos/los-misterios-de-laura/misterios-laura-capitulo-32-misterio-del-numero-17-2-parte/2428621/'
313 IE = RTVEALaCartaIE
314
315 def test_allsubtitles(self):
316 print('Skipping, only available from Spain')
317 return
318 self.DL.params['writesubtitles'] = True
319 self.DL.params['allsubtitles'] = True
320 subtitles = self.getSubtitles()
321 self.assertEqual(set(subtitles.keys()), set(['es']))
322 self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca')
323
324
7e195d0e
S
325class TestFunnyOrDieSubtitles(BaseTestSubtitles):
326 url = 'http://www.funnyordie.com/videos/224829ff6d/judd-apatow-will-direct-your-vine'
327 IE = FunnyOrDieIE
328
329 def test_allsubtitles(self):
330 self.DL.params['writesubtitles'] = True
331 self.DL.params['allsubtitles'] = True
332 subtitles = self.getSubtitles()
333 self.assertEqual(set(subtitles.keys()), set(['en']))
334 self.assertEqual(md5(subtitles['en']), 'c5593c193eacd353596c11c2d4f9ecc4')
335
336
66d041f2
YCH
337class TestDemocracynowSubtitles(BaseTestSubtitles):
338 url = 'http://www.democracynow.org/shows/2015/7/3'
339 IE = DemocracynowIE
340
341 def test_allsubtitles(self):
342 self.DL.params['writesubtitles'] = True
343 self.DL.params['allsubtitles'] = True
344 subtitles = self.getSubtitles()
345 self.assertEqual(set(subtitles.keys()), set(['en']))
346 self.assertEqual(md5(subtitles['en']), 'acaca989e24a9e45a6719c9b3d60815c')
347
348 def test_subtitles_in_page(self):
349 self.url = 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree'
350 self.DL.params['writesubtitles'] = True
351 self.DL.params['allsubtitles'] = True
352 subtitles = self.getSubtitles()
353 self.assertEqual(set(subtitles.keys()), set(['en']))
354 self.assertEqual(md5(subtitles['en']), 'acaca989e24a9e45a6719c9b3d60815c')
355
356
ae84f879
JMF
357if __name__ == '__main__':
358 unittest.main()