From: Ismaël Mejía Date: Sat, 2 Nov 2013 18:50:45 +0000 (+0100) Subject: Merge branch 'ted_subtitles' X-Git-Tag: 2021.01.07~12885^2~2 X-Git-Url: https://jfr.im/git/yt-dlp.git/commitdiff_plain/38db46794f3ccfef09094db9b411e55acd4c1a3d?hp=a9a3876d55be943a7eaf505cbeb8fb862514db6c Merge branch 'ted_subtitles' --- diff --git a/Makefile b/Makefile index 85dacfa4c..c6d09932b 100644 --- a/Makefile +++ b/Makefile @@ -13,13 +13,13 @@ PYTHON=/usr/bin/env python # set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local ifeq ($(PREFIX),/usr) - SYSCONFDIR=/etc + SYSCONFDIR=/etc else - ifeq ($(PREFIX),/usr/local) - SYSCONFDIR=/etc - else - SYSCONFDIR=$(PREFIX)/etc - endif + ifeq ($(PREFIX),/usr/local) + SYSCONFDIR=/etc + else + SYSCONFDIR=$(PREFIX)/etc + endif endif install: youtube-dl youtube-dl.1 youtube-dl.bash-completion @@ -71,6 +71,7 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash- --exclude '*~' \ --exclude '__pycache' \ --exclude '.git' \ + --exclude 'testdata' \ -- \ bin devscripts test youtube_dl \ CHANGELOG LICENSE README.md README.txt \ diff --git a/README.md b/README.md index 8824daee2..a2b296613 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,8 @@ # OPTIONS sudo if needed) -i, --ignore-errors continue on download errors, for example to to skip unavailable videos in a playlist + --abort-on-error Abort downloading of further videos (in the + playlist or the command line) if an error occurs --dump-user-agent display the current browser identification --user-agent UA specify a custom user agent --referer REF specify a custom referer, use if the video access @@ -30,7 +32,7 @@ # OPTIONS --extractor-descriptions Output descriptions of all supported extractors --proxy URL Use the specified HTTP/HTTPS proxy --no-check-certificate Suppress HTTPS certificate validation. - --cache-dir None Location in the filesystem where youtube-dl can + --cache-dir DIR Location in the filesystem where youtube-dl can store downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache /youtube-dl . @@ -57,9 +59,10 @@ ## Video Selection: file. Record all downloaded videos in it. ## Download Options: - -r, --rate-limit LIMIT maximum download rate (e.g. 50k or 44.6m) + -r, --rate-limit LIMIT maximum download rate in bytes per second (e.g. + 50K or 4.2M) -R, --retries RETRIES number of retries (default is 10) - --buffer-size SIZE size of download buffer (e.g. 1024 or 16k) + --buffer-size SIZE size of download buffer (e.g. 1024 or 16K) (default is 1024) --no-resize-buffer do not automatically adjust the buffer size. By default, the buffer size is automatically resized @@ -75,7 +78,10 @@ ## Filesystem Options: %(uploader_id)s for the uploader nickname if different, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename - extension, %(upload_date)s for the upload date + extension, %(format)s for the format description + (like "22 - 1280x720" or "HD"),%(format_id)s for + the unique id of the format (like Youtube's + itags: "137"),%(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id , %(playlist)s for the playlist the video is in, @@ -100,6 +106,7 @@ ## Filesystem Options: file modification time --write-description write video description to a .description file --write-info-json write video metadata to a .info.json file + --write-annotations write video annotations to a .annotation file --write-thumbnail write thumbnail image to disk ## Verbosity / Simulation Options: @@ -120,6 +127,8 @@ ## Verbosity / Simulation Options: -v, --verbose print various debugging information --dump-intermediate-pages print downloaded pages to debug problems(very verbose) + --write-pages Write downloaded pages to files in the current + directory ## Video Format Options: -f, --format FORMAT video format code, specifiy the order of @@ -166,6 +175,7 @@ ## Post-processing Options: processed files are overwritten by default --embed-subs embed subtitles in the video (only for mp4 videos) + --add-metadata add metadata to the files # CONFIGURATION diff --git a/devscripts/bash-completion.in b/devscripts/bash-completion.in index bd10f63c2..ce893fcbe 100644 --- a/devscripts/bash-completion.in +++ b/devscripts/bash-completion.in @@ -1,4 +1,4 @@ -__youtube-dl() +__youtube_dl() { local cur prev opts COMPREPLY=() @@ -15,4 +15,4 @@ __youtube-dl() fi } -complete -F __youtube-dl youtube-dl +complete -F __youtube_dl youtube-dl diff --git a/devscripts/check-porn.py b/devscripts/check-porn.py new file mode 100644 index 000000000..63401fe18 --- /dev/null +++ b/devscripts/check-porn.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python + +""" +This script employs a VERY basic heuristic ('porn' in webpage.lower()) to check +if we are not 'age_limit' tagging some porn site +""" + +# Allow direct execution +import os +import sys +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from test.helper import get_testcases +from youtube_dl.utils import compat_urllib_request + +for test in get_testcases(): + try: + webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read() + except: + print('\nFail: {0}'.format(test['name'])) + continue + + webpage = webpage.decode('utf8', 'replace') + + if 'porn' in webpage.lower() and ('info_dict' not in test + or 'age_limit' not in test['info_dict'] + or test['info_dict']['age_limit'] != 18): + print('\nPotential missing age_limit check: {0}'.format(test['name'])) + + elif 'porn' not in webpage.lower() and ('info_dict' in test and + 'age_limit' in test['info_dict'] and + test['info_dict']['age_limit'] == 18): + print('\nPotential false negative: {0}'.format(test['name'])) + + else: + sys.stdout.write('.') + sys.stdout.flush() + +print() diff --git a/devscripts/release.sh b/devscripts/release.sh index 796468b4b..2766174c1 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -88,10 +88,6 @@ ROOT=$(pwd) "$ROOT/devscripts/gh-pages/update-sites.py" git add *.html *.html.in update git commit -m "release $version" - git show HEAD - read -p "Is it good, can I push? (y/n) " -n 1 - if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi - echo git push "$ROOT" gh-pages git push "$ORIGIN_URL" gh-pages ) diff --git a/setup.py b/setup.py index 3b6dc2d40..aa7cfca08 100644 --- a/setup.py +++ b/setup.py @@ -8,8 +8,10 @@ try: from setuptools import setup + setuptools_available = True except ImportError: from distutils.core import setup + setuptools_available = False try: # This will create an exe that needs Microsoft Visual C++ 2008 @@ -43,13 +45,16 @@ params = py2exe_params else: params = { - 'scripts': ['bin/youtube-dl'], 'data_files': [ # Installing system-wide would require sudo... ('etc/bash_completion.d', ['youtube-dl.bash-completion']), ('share/doc/youtube_dl', ['README.txt']), ('share/man/man1/', ['youtube-dl.1']) ] } + if setuptools_available: + params['entry_points'] = {'console_scripts': ['youtube-dl = youtube_dl:main']} + else: + params['scripts'] = ['bin/youtube-dl'] # Get the version from youtube_dl/version.py without importing the package exec(compile(open('youtube_dl/version.py').read(), @@ -63,6 +68,7 @@ ' YouTube.com and other video sites.', url='https://github.com/rg3/youtube-dl', author='Ricardo Garcia', + author_email='ytdl@yt-dl.org', maintainer='Philipp Hagemeister', maintainer_email='phihag@phihag.de', packages=['youtube_dl', 'youtube_dl.extractor'], diff --git a/test/helper.py b/test/helper.py index ad1b74dd3..d7bf7a828 100644 --- a/test/helper.py +++ b/test/helper.py @@ -1,22 +1,29 @@ import errno import io +import hashlib import json import os.path import re import types +import sys import youtube_dl.extractor -from youtube_dl import YoutubeDL, YoutubeDLHandler -from youtube_dl.utils import ( - compat_cookiejar, - compat_urllib_request, -) +from youtube_dl import YoutubeDL +from youtube_dl.utils import preferredencoding -youtube_dl._setup_opener(timeout=10) -PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json") -with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: - parameters = json.load(pf) +def global_setup(): + youtube_dl._setup_opener(timeout=10) + + +def get_params(override=None): + PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), + "parameters.json") + with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: + parameters = json.load(pf) + if override: + parameters.update(override) + return parameters def try_rm(filename): @@ -28,11 +35,26 @@ def try_rm(filename): raise +def report_warning(message): + ''' + Print the message to stderr, it will be prefixed with 'WARNING:' + If stderr is a tty file the 'WARNING:' will be colored + ''' + if sys.stderr.isatty() and os.name != 'nt': + _msg_header = u'\033[0;33mWARNING:\033[0m' + else: + _msg_header = u'WARNING:' + output = u'%s %s\n' % (_msg_header, message) + if 'b' in getattr(sys.stderr, 'mode', '') or sys.version_info[0] < 3: + output = output.encode(preferredencoding()) + sys.stderr.write(output) + + class FakeYDL(YoutubeDL): - def __init__(self): + def __init__(self, override=None): # Different instances of the downloader can't share the same dictionary # some test set the "sublang" parameter, which would break the md5 checks. - params = dict(parameters) + params = get_params(override=override) super(FakeYDL, self).__init__(params) self.result = [] @@ -62,3 +84,6 @@ def get_testcases(): for t in getattr(ie, '_TESTS', []): t['name'] = type(ie).__name__[:-len('IE')] yield t + + +md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py new file mode 100644 index 000000000..ffebb4ae5 --- /dev/null +++ b/test/test_YoutubeDL.py @@ -0,0 +1,133 @@ +#!/usr/bin/env python + +# Allow direct execution +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from test.helper import FakeYDL + + +class YDL(FakeYDL): + def __init__(self, *args, **kwargs): + super(YDL, self).__init__(*args, **kwargs) + self.downloaded_info_dicts = [] + self.msgs = [] + + def process_info(self, info_dict): + self.downloaded_info_dicts.append(info_dict) + + def to_screen(self, msg): + self.msgs.append(msg) + + +class TestFormatSelection(unittest.TestCase): + def test_prefer_free_formats(self): + # Same resolution => download webm + ydl = YDL() + ydl.params['prefer_free_formats'] = True + formats = [ + {u'ext': u'webm', u'height': 460}, + {u'ext': u'mp4', u'height': 460}, + ] + info_dict = {u'formats': formats, u'extractor': u'test'} + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded[u'ext'], u'webm') + + # Different resolution => download best quality (mp4) + ydl = YDL() + ydl.params['prefer_free_formats'] = True + formats = [ + {u'ext': u'webm', u'height': 720}, + {u'ext': u'mp4', u'height': 1080}, + ] + info_dict[u'formats'] = formats + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded[u'ext'], u'mp4') + + # No prefer_free_formats => keep original formats order + ydl = YDL() + ydl.params['prefer_free_formats'] = False + formats = [ + {u'ext': u'webm', u'height': 720}, + {u'ext': u'flv', u'height': 720}, + ] + info_dict[u'formats'] = formats + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded[u'ext'], u'flv') + + def test_format_limit(self): + formats = [ + {u'format_id': u'meh', u'url': u'http://example.com/meh'}, + {u'format_id': u'good', u'url': u'http://example.com/good'}, + {u'format_id': u'great', u'url': u'http://example.com/great'}, + {u'format_id': u'excellent', u'url': u'http://example.com/exc'}, + ] + info_dict = { + u'formats': formats, u'extractor': u'test', 'id': 'testvid'} + + ydl = YDL() + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded[u'format_id'], u'excellent') + + ydl = YDL({'format_limit': 'good'}) + assert ydl.params['format_limit'] == 'good' + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded[u'format_id'], u'good') + + ydl = YDL({'format_limit': 'great', 'format': 'all'}) + ydl.process_ie_result(info_dict) + self.assertEqual(ydl.downloaded_info_dicts[0][u'format_id'], u'meh') + self.assertEqual(ydl.downloaded_info_dicts[1][u'format_id'], u'good') + self.assertEqual(ydl.downloaded_info_dicts[2][u'format_id'], u'great') + self.assertTrue('3' in ydl.msgs[0]) + + ydl = YDL() + ydl.params['format_limit'] = 'excellent' + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded[u'format_id'], u'excellent') + + def test_format_selection(self): + formats = [ + {u'format_id': u'35', u'ext': u'mp4'}, + {u'format_id': u'45', u'ext': u'webm'}, + {u'format_id': u'47', u'ext': u'webm'}, + {u'format_id': u'2', u'ext': u'flv'}, + ] + info_dict = {u'formats': formats, u'extractor': u'test'} + + ydl = YDL({'format': u'20/47'}) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], u'47') + + ydl = YDL({'format': u'20/71/worst'}) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], u'35') + + ydl = YDL() + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], u'2') + + ydl = YDL({'format': u'webm/mp4'}) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], u'47') + + ydl = YDL({'format': u'3gp/40/mp4'}) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], u'35') + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_age_restriction.py b/test/test_age_restriction.py index ec3e30572..d500c6edc 100644 --- a/test/test_age_restriction.py +++ b/test/test_age_restriction.py @@ -1,14 +1,16 @@ #!/usr/bin/env python +# Allow direct execution +import os import sys import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from test.helper import global_setup, try_rm +global_setup() -# Allow direct execution -import os -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from youtube_dl import YoutubeDL -from .helper import try_rm def _download_restricted(url, filename, age): diff --git a/test/test_all_urls.py b/test/test_all_urls.py index b28ad000b..56e5f80e1 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -1,14 +1,20 @@ #!/usr/bin/env python +# Allow direct execution +import os import sys import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -# Allow direct execution -import os -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from youtube_dl.extractor import YoutubeIE, YoutubePlaylistIE, YoutubeChannelIE, JustinTVIE, gen_extractors -from .helper import get_testcases +from test.helper import get_testcases + +from youtube_dl.extractor import ( + gen_extractors, + JustinTVIE, + YoutubeIE, +) + class TestAllURLsMatching(unittest.TestCase): def setUp(self): diff --git a/test/test_dailymotion_subtitles.py b/test/test_dailymotion_subtitles.py index e655d280d..ba3580ea4 100644 --- a/test/test_dailymotion_subtitles.py +++ b/test/test_dailymotion_subtitles.py @@ -1,18 +1,16 @@ #!/usr/bin/env python +# Allow direct execution +import os import sys import unittest -import hashlib +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -# Allow direct execution -import os -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from test.helper import FakeYDL, global_setup, md5 +global_setup() -from youtube_dl.extractor import DailymotionIE -from youtube_dl.utils import * -from .helper import FakeYDL -md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() +from youtube_dl.extractor import DailymotionIE class TestDailymotionSubtitles(unittest.TestCase): def setUp(self): @@ -24,7 +22,7 @@ def getInfoDict(self): return info_dict def getSubtitles(self): info_dict = self.getInfoDict() - return info_dict[0]['subtitles'] + return info_dict['subtitles'] def test_no_writesubtitles(self): subtitles = self.getSubtitles() self.assertEqual(subtitles, None) diff --git a/test/test_download.py b/test/test_download.py index 68da4d984..dfb04d010 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -1,26 +1,39 @@ #!/usr/bin/env python +# Allow direct execution +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from test.helper import ( + get_params, + get_testcases, + global_setup, + try_rm, + md5, + report_warning +) +global_setup() + + import hashlib import io -import os import json -import unittest -import sys import socket -import binascii - -# Allow direct execution -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import youtube_dl.YoutubeDL -from youtube_dl.utils import * - -PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json") +from youtube_dl.utils import ( + compat_str, + compat_urllib_error, + compat_HTTPError, + DownloadError, + ExtractorError, + UnavailableVideoError, +) RETRIES = 3 -md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() - class YoutubeDL(youtube_dl.YoutubeDL): def __init__(self, *args, **kwargs): self.to_stderr = self.to_screen @@ -37,18 +50,12 @@ def _file_md5(fn): with open(fn, 'rb') as f: return hashlib.md5(f.read()).hexdigest() -import test.helper as helper # Set up remaining global configuration -from .helper import get_testcases, try_rm defs = get_testcases() -with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: - parameters = json.load(pf) - class TestDownload(unittest.TestCase): maxDiff = None def setUp(self): - self.parameters = parameters self.defs = defs ### Dynamically generate tests @@ -61,15 +68,17 @@ def print_skipping(reason): if not ie._WORKING: print_skipping('IE marked as not _WORKING') return - if 'playlist' not in test_case and not test_case['file']: - print_skipping('No output file specified') - return + if 'playlist' not in test_case: + info_dict = test_case.get('info_dict', {}) + if not test_case.get('file') and not (info_dict.get('id') and info_dict.get('ext')): + print_skipping('The output file cannot be know, the "file" ' + 'key is missing or the info_dict is incomplete') + return if 'skip' in test_case: print_skipping(test_case['skip']) return - params = self.parameters.copy() - params.update(test_case.get('params', {})) + params = get_params(test_case.get('params', {})) ydl = YoutubeDL(params) ydl.add_default_info_extractors() @@ -79,35 +88,47 @@ def _hook(status): finished_hook_called.add(status['filename']) ydl.fd.add_progress_hook(_hook) + def get_tc_filename(tc): + return tc.get('file') or ydl.prepare_filename(tc.get('info_dict', {})) + test_cases = test_case.get('playlist', [test_case]) - for tc in test_cases: - try_rm(tc['file']) - try_rm(tc['file'] + '.part') - try_rm(tc['file'] + '.info.json') + def try_rm_tcs_files(): + for tc in test_cases: + tc_filename = get_tc_filename(tc) + try_rm(tc_filename) + try_rm(tc_filename + '.part') + try_rm(tc_filename + '.info.json') + try_rm_tcs_files() try: - for retry in range(1, RETRIES + 1): + try_num = 1 + while True: try: ydl.download([test_case['url']]) except (DownloadError, ExtractorError) as err: - if retry == RETRIES: raise - # Check if the exception is not a network related one - if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError): + if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503): raise - print('Retrying: {0} failed tries\n\n##########\n\n'.format(retry)) + if try_num == RETRIES: + report_warning(u'Failed due to network errors, skipping...') + return + + print('Retrying: {0} failed tries\n\n##########\n\n'.format(try_num)) + + try_num += 1 else: break for tc in test_cases: + tc_filename = get_tc_filename(tc) if not test_case.get('params', {}).get('skip_download', False): - self.assertTrue(os.path.exists(tc['file']), msg='Missing file ' + tc['file']) - self.assertTrue(tc['file'] in finished_hook_called) - self.assertTrue(os.path.exists(tc['file'] + '.info.json')) + self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename) + self.assertTrue(tc_filename in finished_hook_called) + self.assertTrue(os.path.exists(tc_filename + '.info.json')) if 'md5' in tc: - md5_for_file = _file_md5(tc['file']) + md5_for_file = _file_md5(tc_filename) self.assertEqual(md5_for_file, tc['md5']) - with io.open(tc['file'] + '.info.json', encoding='utf-8') as infof: + with io.open(tc_filename + '.info.json', encoding='utf-8') as infof: info_dict = json.load(infof) for (info_field, expected) in tc.get('info_dict', {}).items(): if isinstance(expected, compat_str) and expected.startswith('md5:'): @@ -128,10 +149,7 @@ def _hook(status): for key in ('id', 'url', 'title', 'ext'): self.assertTrue(key in info_dict.keys() and info_dict[key]) finally: - for tc in test_cases: - try_rm(tc['file']) - try_rm(tc['file'] + '.part') - try_rm(tc['file'] + '.info.json') + try_rm_tcs_files() return test_template diff --git a/test/test_playlists.py b/test/test_playlists.py index 108a4d63b..d6a8d56df 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -1,13 +1,16 @@ #!/usr/bin/env python # encoding: utf-8 -import sys -import unittest -import json # Allow direct execution import os -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from test.helper import FakeYDL, global_setup +global_setup() + from youtube_dl.extractor import ( DailymotionPlaylistIE, @@ -18,9 +21,7 @@ LivestreamIE, NHLVideocenterIE, ) -from youtube_dl.utils import * -from .helper import FakeYDL class TestPlaylists(unittest.TestCase): def assertIsPlaylist(self, info): diff --git a/test/test_utils.py b/test/test_utils.py index f2c03d421..f3fbff042 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1,14 +1,15 @@ #!/usr/bin/env python +# coding: utf-8 -# Various small unit tests - +# Allow direct execution +import os import sys import unittest -import xml.etree.ElementTree +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -# Allow direct execution -import os -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +# Various small unit tests +import xml.etree.ElementTree #from youtube_dl.utils import htmlentity_transform from youtube_dl.utils import ( @@ -21,6 +22,8 @@ find_xpath_attr, get_meta_content, xpath_with_ns, + smuggle_url, + unsmuggle_url, ) if sys.version_info < (3, 0): @@ -155,5 +158,18 @@ def test_xpath_with_ns(self): self.assertEqual(find('media:song/media:author').text, u'The Author') self.assertEqual(find('media:song/url').text, u'http://server.com/download.mp3') + def test_smuggle_url(self): + data = {u"ö": u"ö", u"abc": [3]} + url = 'https://foo.bar/baz?x=y#a' + smug_url = smuggle_url(url, data) + unsmug_url, unsmug_data = unsmuggle_url(smug_url) + self.assertEqual(url, unsmug_url) + self.assertEqual(data, unsmug_data) + + res_url, res_data = unsmuggle_url(url) + self.assertEqual(res_url, url) + self.assertEqual(res_data, None) + + if __name__ == '__main__': unittest.main() diff --git a/test/test_write_annotations.py b/test/test_write_annotations.py index ba7a9f50a..35defb895 100644 --- a/test/test_write_annotations.py +++ b/test/test_write_annotations.py @@ -1,39 +1,37 @@ #!/usr/bin/env python # coding: utf-8 -import xml.etree.ElementTree +# Allow direct execution import os import sys import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -# Allow direct execution -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from test.helper import get_params, global_setup, try_rm +global_setup() + + +import io + +import xml.etree.ElementTree import youtube_dl.YoutubeDL import youtube_dl.extractor -from youtube_dl.utils import * -from .helper import try_rm - -PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json") -# General configuration (from __init__, not very elegant...) -jar = compat_cookiejar.CookieJar() -cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar) -proxy_handler = compat_urllib_request.ProxyHandler() -opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler()) -compat_urllib_request.install_opener(opener) class YoutubeDL(youtube_dl.YoutubeDL): def __init__(self, *args, **kwargs): super(YoutubeDL, self).__init__(*args, **kwargs) self.to_stderr = self.to_screen -with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: - params = json.load(pf) -params['writeannotations'] = True -params['skip_download'] = True -params['writeinfojson'] = False -params['format'] = 'flv' +params = get_params({ + 'writeannotations': True, + 'skip_download': True, + 'writeinfojson': False, + 'format': 'flv', +}) + + TEST_ID = 'gr51aVj-mLg' ANNOTATIONS_FILE = TEST_ID + '.flv.annotations.xml' diff --git a/test/test_write_info_json.py b/test/test_write_info_json.py index de6d5180f..a5b6f6972 100644 --- a/test/test_write_info_json.py +++ b/test/test_write_info_json.py @@ -1,37 +1,34 @@ #!/usr/bin/env python # coding: utf-8 -import json +# Allow direct execution import os import sys import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -# Allow direct execution -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from test.helper import get_params, global_setup +global_setup() + + +import io +import json import youtube_dl.YoutubeDL import youtube_dl.extractor -from youtube_dl.utils import * - -PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json") -# General configuration (from __init__, not very elegant...) -jar = compat_cookiejar.CookieJar() -cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar) -proxy_handler = compat_urllib_request.ProxyHandler() -opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler()) -compat_urllib_request.install_opener(opener) class YoutubeDL(youtube_dl.YoutubeDL): def __init__(self, *args, **kwargs): super(YoutubeDL, self).__init__(*args, **kwargs) self.to_stderr = self.to_screen -with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: - params = json.load(pf) -params['writeinfojson'] = True -params['skip_download'] = True -params['writedescription'] = True +params = get_params({ + 'writeinfojson': True, + 'skip_download': True, + 'writedescription': True, +}) + TEST_ID = 'BaW_jenozKc' INFO_JSON_FILE = TEST_ID + '.mp4.info.json' @@ -42,6 +39,7 @@ def __init__(self, *args, **kwargs): For more information, contact phihag@phihag.de .''' + class TestInfoJSON(unittest.TestCase): def setUp(self): # Clear old files diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index 0b5c79030..4b7a7847b 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -1,20 +1,26 @@ #!/usr/bin/env python +# Allow direct execution +import os import sys import unittest -import json +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from test.helper import FakeYDL, global_setup +global_setup() -# Allow direct execution -import os -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from youtube_dl.extractor import YoutubeUserIE, YoutubePlaylistIE, YoutubeIE, YoutubeChannelIE, YoutubeShowIE -from youtube_dl.utils import * +from youtube_dl.extractor import ( + YoutubeUserIE, + YoutubePlaylistIE, + YoutubeIE, + YoutubeChannelIE, + YoutubeShowIE, +) -from .helper import FakeYDL class TestYoutubeLists(unittest.TestCase): - def assertIsPlaylist(self,info): + def assertIsPlaylist(self, info): """Make sure the info has '_type' set to 'playlist'""" self.assertEqual(info['_type'], 'playlist') @@ -100,7 +106,7 @@ def test_youtube_show(self): dl = FakeYDL() ie = YoutubeShowIE(dl) result = ie.extract('http://www.youtube.com/show/airdisasters') - self.assertTrue(len(result) >= 4) + self.assertTrue(len(result) >= 3) if __name__ == '__main__': unittest.main() diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 5007d9a16..5e1ff5eb0 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -1,14 +1,18 @@ #!/usr/bin/env python -import io -import re -import string +# Allow direct execution +import os import sys import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -# Allow direct execution -import os -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from test.helper import global_setup +global_setup() + + +import io +import re +import string from youtube_dl.extractor import YoutubeIE from youtube_dl.utils import compat_str, compat_urlretrieve diff --git a/test/test_youtube_subtitles.py b/test/test_youtube_subtitles.py index 07850385e..00430a338 100644 --- a/test/test_youtube_subtitles.py +++ b/test/test_youtube_subtitles.py @@ -1,69 +1,79 @@ #!/usr/bin/env python +# Allow direct execution +import os import sys import unittest -import hashlib +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from test.helper import FakeYDL, global_setup, md5 +global_setup() -# Allow direct execution -import os -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from youtube_dl.extractor import YoutubeIE -from youtube_dl.utils import * -from .helper import FakeYDL -md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() class TestYoutubeSubtitles(unittest.TestCase): def setUp(self): self.DL = FakeYDL() self.url = 'QRS8MkLhQmM' + def getInfoDict(self): IE = YoutubeIE(self.DL) info_dict = IE.extract(self.url) return info_dict + def getSubtitles(self): info_dict = self.getInfoDict() - return info_dict[0]['subtitles'] + return info_dict[0]['subtitles'] + def test_youtube_no_writesubtitles(self): self.DL.params['writesubtitles'] = False subtitles = self.getSubtitles() self.assertEqual(subtitles, None) + def test_youtube_subtitles(self): self.DL.params['writesubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260') + def test_youtube_subtitles_lang(self): self.DL.params['writesubtitles'] = True self.DL.params['subtitleslangs'] = ['it'] subtitles = self.getSubtitles() self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d') + def test_youtube_allsubtitles(self): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(len(subtitles.keys()), 13) + def test_youtube_subtitles_sbv_format(self): self.DL.params['writesubtitles'] = True self.DL.params['subtitlesformat'] = 'sbv' subtitles = self.getSubtitles() self.assertEqual(md5(subtitles['en']), '13aeaa0c245a8bed9a451cb643e3ad8b') + def test_youtube_subtitles_vtt_format(self): self.DL.params['writesubtitles'] = True self.DL.params['subtitlesformat'] = 'vtt' subtitles = self.getSubtitles() self.assertEqual(md5(subtitles['en']), '356cdc577fde0c6783b9b822e7206ff7') + def test_youtube_list_subtitles(self): self.DL.expect_warning(u'Video doesn\'t have automatic captions') self.DL.params['listsubtitles'] = True info_dict = self.getInfoDict() self.assertEqual(info_dict, None) + def test_youtube_automatic_captions(self): self.url = '8YoUxe5ncPo' self.DL.params['writeautomaticsub'] = True self.DL.params['subtitleslangs'] = ['it'] subtitles = self.getSubtitles() self.assertTrue(subtitles['it'] is not None) + def test_youtube_nosubtitles(self): self.DL.expect_warning(u'video doesn\'t have subtitles') self.url = 'sAjKT8FhjI8' @@ -71,6 +81,7 @@ def test_youtube_nosubtitles(self): self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(len(subtitles), 0) + def test_youtube_multiple_langs(self): self.url = 'QRS8MkLhQmM' self.DL.params['writesubtitles'] = True diff --git a/youtube_dl/PostProcessor.py b/youtube_dl/PostProcessor.py index 039e01498..13b56ede5 100644 --- a/youtube_dl/PostProcessor.py +++ b/youtube_dl/PostProcessor.py @@ -2,9 +2,15 @@ import subprocess import sys import time -import datetime -from .utils import * + +from .utils import ( + compat_subprocess_get_DEVNULL, + encodeFilename, + PostProcessingError, + shell_quote, + subtitles_filename, +) class PostProcessor(object): diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index c8054544a..7f73ea360 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -91,7 +91,7 @@ class YoutubeDL(object): downloadarchive: File name of a file where all downloads are recorded. Videos already present in the file are not downloaded again. - + The following parameters are not used by YoutubeDL itself, they are used by the FileDownloader: nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test, @@ -216,10 +216,10 @@ def report_warning(self, message): If stderr is a tty file the 'WARNING:' will be colored ''' if sys.stderr.isatty() and os.name != 'nt': - _msg_header=u'\033[0;33mWARNING:\033[0m' + _msg_header = u'\033[0;33mWARNING:\033[0m' else: - _msg_header=u'WARNING:' - warning_message=u'%s %s' % (_msg_header,message) + _msg_header = u'WARNING:' + warning_message = u'%s %s' % (_msg_header, message) self.to_stderr(warning_message) def report_error(self, message, tb=None): @@ -234,19 +234,6 @@ def report_error(self, message, tb=None): error_message = u'%s %s' % (_msg_header, message) self.trouble(error_message, tb) - def slow_down(self, start_time, byte_counter): - """Sleep if the download speed is over the rate limit.""" - rate_limit = self.params.get('ratelimit', None) - if rate_limit is None or byte_counter == 0: - return - now = time.time() - elapsed = now - start_time - if elapsed <= 0.0: - return - speed = float(byte_counter) / elapsed - if speed > rate_limit: - time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit) - def report_writedescription(self, descfn): """ Report that the description file is being written """ self.to_screen(u'[info] Writing video description to: ' + descfn) @@ -285,16 +272,18 @@ def prepare_filename(self, info_dict): autonumber_size = 5 autonumber_templ = u'%0' + str(autonumber_size) + u'd' template_dict['autonumber'] = autonumber_templ % self._num_downloads - if template_dict['playlist_index'] is not None: + if template_dict.get('playlist_index') is not None: template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index'] - sanitize = lambda k,v: sanitize_filename( + sanitize = lambda k, v: sanitize_filename( u'NA' if v is None else compat_str(v), restricted=self.params.get('restrictfilenames'), - is_id=(k==u'id')) - template_dict = dict((k, sanitize(k, v)) for k,v in template_dict.items()) + is_id=(k == u'id')) + template_dict = dict((k, sanitize(k, v)) + for k, v in template_dict.items()) - filename = self.params['outtmpl'] % template_dict + tmpl = os.path.expanduser(self.params['outtmpl']) + filename = tmpl % template_dict return filename except KeyError as err: self.report_error(u'Erroneous output template') @@ -328,14 +317,14 @@ def _match_entry(self, info_dict): return (u'%(title)s has already been recorded in archive' % info_dict) return None - + def extract_info(self, url, download=True, ie_key=None, extra_info={}): ''' Returns a list with a dictionary for each video we find. If 'download', also downloads the videos. extra_info is a dict containing the extra values to add to each result ''' - + if ie_key: ies = [self.get_info_extractor(ie_key)] else: @@ -377,7 +366,7 @@ def extract_info(self, url, download=True, ie_key=None, extra_info={}): raise else: self.report_error(u'no suitable InfoExtractor: %s' % url) - + def process_ie_result(self, ie_result, download=True, extra_info={}): """ Take the result of the ie(may be modified) and resolve all unresolved @@ -390,13 +379,7 @@ def process_ie_result(self, ie_result, download=True, extra_info={}): result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system if result_type == 'video': ie_result.update(extra_info) - if 'playlist' not in ie_result: - # It isn't part of a playlist - ie_result['playlist'] = None - ie_result['playlist_index'] = None - if download: - self.process_info(ie_result) - return ie_result + return self.process_video_result(ie_result) elif result_type == 'url': # We have to add extra_info to the results because it may be # contained in a playlist @@ -407,7 +390,7 @@ def process_ie_result(self, ie_result, download=True, extra_info={}): elif result_type == 'playlist': # We process each entry in the playlist playlist = ie_result.get('title', None) or ie_result.get('id', None) - self.to_screen(u'[download] Downloading playlist: %s' % playlist) + self.to_screen(u'[download] Downloading playlist: %s' % playlist) playlist_results = [] @@ -425,12 +408,12 @@ def process_ie_result(self, ie_result, download=True, extra_info={}): self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" % (ie_result['extractor'], playlist, n_all_entries, n_entries)) - for i,entry in enumerate(entries,1): - self.to_screen(u'[download] Downloading video #%s of %s' %(i, n_entries)) + for i, entry in enumerate(entries, 1): + self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries)) extra = { - 'playlist': playlist, - 'playlist_index': i + playliststart, - } + 'playlist': playlist, + 'playlist_index': i + playliststart, + } if not 'extractor' in entry: # We set the extractor, if it's an url it will be set then to # the new extractor, but if it's already a video we must make @@ -454,6 +437,107 @@ def _fixup(r): else: raise Exception('Invalid result type: %s' % result_type) + def select_format(self, format_spec, available_formats): + if format_spec == 'best' or format_spec is None: + return available_formats[-1] + elif format_spec == 'worst': + return available_formats[0] + else: + extensions = [u'mp4', u'flv', u'webm', u'3gp'] + if format_spec in extensions: + filter_f = lambda f: f['ext'] == format_spec + else: + filter_f = lambda f: f['format_id'] == format_spec + matches = list(filter(filter_f, available_formats)) + if matches: + return matches[-1] + return None + + def process_video_result(self, info_dict, download=True): + assert info_dict.get('_type', 'video') == 'video' + + if 'playlist' not in info_dict: + # It isn't part of a playlist + info_dict['playlist'] = None + info_dict['playlist_index'] = None + + # This extractors handle format selection themselves + if info_dict['extractor'] in [u'youtube', u'Youku']: + if download: + self.process_info(info_dict) + return info_dict + + # We now pick which formats have to be downloaded + if info_dict.get('formats') is None: + # There's only one format available + formats = [info_dict] + else: + formats = info_dict['formats'] + + # We check that all the formats have the format and format_id fields + for (i, format) in enumerate(formats): + if format.get('format_id') is None: + format['format_id'] = compat_str(i) + if format.get('format') is None: + format['format'] = u'{id} - {res}{note}'.format( + id=format['format_id'], + res=self.format_resolution(format), + note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '', + ) + # Automatically determine file extension if missing + if 'ext' not in format: + format['ext'] = determine_ext(format['url']) + + if self.params.get('listformats', None): + self.list_formats(info_dict) + return + + format_limit = self.params.get('format_limit', None) + if format_limit: + formats = list(takewhile_inclusive( + lambda f: f['format_id'] != format_limit, formats + )) + if self.params.get('prefer_free_formats'): + def _free_formats_key(f): + try: + ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext']) + except ValueError: + ext_ord = -1 + # We only compare the extension if they have the same height and width + return (f.get('height'), f.get('width'), ext_ord) + formats = sorted(formats, key=_free_formats_key) + + req_format = self.params.get('format', 'best') + if req_format is None: + req_format = 'best' + formats_to_download = [] + # The -1 is for supporting YoutubeIE + if req_format in ('-1', 'all'): + formats_to_download = formats + else: + # We can accept formats requestd in the format: 34/5/best, we pick + # the first that is available, starting from left + req_formats = req_format.split('/') + for rf in req_formats: + selected_format = self.select_format(rf, formats) + if selected_format is not None: + formats_to_download = [selected_format] + break + if not formats_to_download: + raise ExtractorError(u'requested format not available', + expected=True) + + if download: + if len(formats_to_download) > 1: + self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download))) + for format in formats_to_download: + new_info = dict(info_dict) + new_info.update(format) + self.process_info(new_info) + # We update the info dict with the best quality format (backwards compatibility) + info_dict.update(formats_to_download[-1]) + return info_dict + def process_info(self, info_dict): """Process a single resolved IE result.""" @@ -491,9 +575,9 @@ def process_info(self, info_dict): if self.params.get('forceurl', False): # For RTMP URLs, also include the playpath compat_print(info_dict['url'] + info_dict.get('play_path', u'')) - if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict: + if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None: compat_print(info_dict['thumbnail']) - if self.params.get('forcedescription', False) and 'description' in info_dict: + if self.params.get('forcedescription', False) and info_dict.get('description') is not None: compat_print(info_dict['description']) if self.params.get('forcefilename', False) and filename is not None: compat_print(filename) @@ -529,20 +613,20 @@ def process_info(self, info_dict): if self.params.get('writeannotations', False): try: - annofn = filename + u'.annotations.xml' - self.report_writeannotations(annofn) - with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile: - annofile.write(info_dict['annotations']) + annofn = filename + u'.annotations.xml' + self.report_writeannotations(annofn) + with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile: + annofile.write(info_dict['annotations']) except (KeyError, TypeError): self.report_warning(u'There are no annotations to write.') except (OSError, IOError): - self.report_error(u'Cannot write annotations file: ' + annofn) - return + self.report_error(u'Cannot write annotations file: ' + annofn) + return subtitles_are_requested = any([self.params.get('writesubtitles', False), self.params.get('writeautomaticsub')]) - if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']: + if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']: # subtitles download errors are already managed as troubles in relevant IE # that way it will silently go on when used with unsupporting IE subtitles = info_dict['subtitles'] @@ -564,7 +648,7 @@ def process_info(self, info_dict): infofn = filename + u'.info.json' self.report_writeinfojson(infofn) try: - json_info_dict = dict((k, v) for k,v in info_dict.items() if not k in ['urlhandle']) + json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle']) write_json_file(json_info_dict, encodeFilename(infofn)) except (OSError, IOError): self.report_error(u'Cannot write metadata to JSON file ' + infofn) @@ -634,7 +718,7 @@ def post_process(self, filename, ie_info): keep_video = None for pp in self._pps: try: - keep_video_wish,new_info = pp.run(info) + keep_video_wish, new_info = pp.run(info) if keep_video_wish is not None: if keep_video_wish: keep_video = keep_video_wish @@ -672,3 +756,38 @@ def record_download_archive(self, info_dict): vid_id = info_dict['extractor'] + u' ' + info_dict['id'] with locked_file(fn, 'a', encoding='utf-8') as archive_file: archive_file.write(vid_id + u'\n') + + @staticmethod + def format_resolution(format, default='unknown'): + if format.get('_resolution') is not None: + return format['_resolution'] + if format.get('height') is not None: + if format.get('width') is not None: + res = u'%sx%s' % (format['width'], format['height']) + else: + res = u'%sp' % format['height'] + else: + res = default + return res + + def list_formats(self, info_dict): + def line(format): + return (u'%-15s%-10s%-12s%s' % ( + format['format_id'], + format['ext'], + self.format_resolution(format), + format.get('format_note', ''), + ) + ) + + formats = info_dict.get('formats', [info_dict]) + formats_s = list(map(line, formats)) + if len(formats) > 1: + formats_s[0] += (' ' if formats[0].get('format_note') else '') + '(worst)' + formats_s[-1] += (' ' if formats[-1].get('format_note') else '') + '(best)' + + header_line = line({ + 'format_id': u'format code', 'ext': u'extension', + '_resolution': u'resolution', 'format_note': u'note'}) + self.to_screen(u'[info] Available formats for %s:\n%s\n%s' % + (info_dict['id'], header_line, u"\n".join(formats_s))) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index fb1270ea2..48ffcbf8e 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -31,6 +31,7 @@ 'Huarong Huo', 'Ismael Mejía', 'Steffan \'Ruirize\' James', + 'Andras Elso', ) __license__ = 'Public Domain' @@ -46,17 +47,43 @@ import socket import subprocess import sys -import warnings +import traceback import platform -from .utils import * +from .utils import ( + compat_cookiejar, + compat_print, + compat_str, + compat_urllib_request, + DateRange, + decodeOption, + determine_ext, + DownloadError, + get_cachedir, + make_HTTPS_handler, + MaxDownloadsReached, + platform_name, + preferredencoding, + SameFileError, + std_headers, + write_string, + YoutubeDLHandler, +) from .update import update_self from .version import __version__ -from .FileDownloader import * +from .FileDownloader import ( + FileDownloader, +) from .extractor import gen_extractors from .YoutubeDL import YoutubeDL -from .PostProcessor import * +from .PostProcessor import ( + FFmpegMetadataPP, + FFmpegVideoConvertor, + FFmpegExtractAudioPP, + FFmpegEmbedSubtitlePP, +) + def parseOpts(overrideArguments=None): def _readOptions(filename_bytes): @@ -106,7 +133,7 @@ def _find_term_columns(): def _hide_login_info(opts): opts = list(opts) - for private_opt in ['-p', '--password', '-u', '--username']: + for private_opt in ['-p', '--password', '-u', '--username', '--video-password']: try: i = opts.index(private_opt) opts[i+1] = '' @@ -152,6 +179,9 @@ def _hide_login_info(opts): action='store_true', dest='update_self', help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)') general.add_option('-i', '--ignore-errors', action='store_true', dest='ignoreerrors', help='continue on download errors, for example to to skip unavailable videos in a playlist', default=False) + general.add_option('--abort-on-error', + action='store_false', dest='ignoreerrors', + help='Abort downloading of further videos (in the playlist or the command line) if an error occurs') general.add_option('--dump-user-agent', action='store_true', dest='dump_user_agent', help='display the current browser identification', default=False) @@ -169,7 +199,7 @@ def _hide_login_info(opts): general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL') general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.') general.add_option( - '--cache-dir', dest='cachedir', default=get_cachedir(), + '--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR', help='Location in the filesystem where youtube-dl can store downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl .') general.add_option( '--no-cache-dir', action='store_const', const=None, dest='cachedir', @@ -208,7 +238,7 @@ def _hide_login_info(opts): video_format.add_option('-f', '--format', - action='store', dest='format', metavar='FORMAT', + action='store', dest='format', metavar='FORMAT', default='best', help='video format code, specifiy the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported') video_format.add_option('--all-formats', action='store_const', dest='format', help='download all available video formats', const='all') @@ -240,11 +270,11 @@ def _hide_login_info(opts): help='languages of the subtitles to download (optional) separated by commas, use IETF language tags like \'en,pt\'') downloader.add_option('-r', '--rate-limit', - dest='ratelimit', metavar='LIMIT', help='maximum download rate (e.g. 50k or 44.6m)') + dest='ratelimit', metavar='LIMIT', help='maximum download rate in bytes per second (e.g. 50K or 4.2M)') downloader.add_option('-R', '--retries', dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10) downloader.add_option('--buffer-size', - dest='buffersize', metavar='SIZE', help='size of download buffer (e.g. 1024 or 16k) (default is %default)', default="1024") + dest='buffersize', metavar='SIZE', help='size of download buffer (e.g. 1024 or 16K) (default is %default)', default="1024") downloader.add_option('--no-resize-buffer', action='store_true', dest='noresizebuffer', help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.', default=False) @@ -286,6 +316,9 @@ def _hide_login_info(opts): verbosity.add_option('--dump-intermediate-pages', action='store_true', dest='dump_intermediate_pages', default=False, help='print downloaded pages to debug problems(very verbose)') + verbosity.add_option('--write-pages', + action='store_true', dest='write_pages', default=False, + help='Write downloaded pages to files in the current directory') verbosity.add_option('--youtube-print-sig-code', action='store_true', dest='youtube_print_sig_code', default=False, help=optparse.SUPPRESS_HELP) @@ -305,7 +338,10 @@ def _hide_login_info(opts): help=('output filename template. Use %(title)s to get the title, ' '%(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, ' '%(autonumber)s to get an automatically incremented number, ' - '%(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), ' + '%(ext)s for the filename extension, ' + '%(format)s for the format description (like "22 - 1280x720" or "HD"),' + '%(format_id)s for the unique id of the format (like Youtube\'s itags: "137"),' + '%(upload_date)s for the upload date (YYYYMMDD), ' '%(extractor)s for the provider (youtube, metacafe, etc), ' '%(id)s for the video id , %(playlist)s for the playlist the video is in, ' '%(playlist_index)s for the position in the playlist and %% for a literal percent. ' @@ -619,6 +655,7 @@ def _real_main(argv=None): 'prefer_free_formats': opts.prefer_free_formats, 'verbose': opts.verbose, 'dump_intermediate_pages': opts.dump_intermediate_pages, + 'write_pages': opts.write_pages, 'test': opts.test, 'keepvideo': opts.keepvideo, 'min_filesize': opts.min_filesize, @@ -688,7 +725,7 @@ def _real_main(argv=None): if opts.cookiefile is not None: try: jar.save() - except (IOError, OSError) as err: + except (IOError, OSError): sys.exit(u'ERROR: unable to save cookie jar') sys.exit(retcode) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 748f12e5a..bcf1cce7f 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -72,6 +72,7 @@ from .jukebox import JukeboxIE from .justintv import JustinTVIE from .kankan import KankanIE +from .keezmovies import KeezMoviesIE from .kickstarter import KickStarterIE from .keek import KeekIE from .liveleak import LiveLeakIE @@ -82,6 +83,7 @@ from .mixcloud import MixcloudIE from .mtv import MTVIE from .muzu import MuzuTVIE +from .myspace import MySpaceIE from .myspass import MySpassIE from .myvideo import MyVideoIE from .naver import NaverIE @@ -94,6 +96,7 @@ from .orf import ORFIE from .pbs import PBSIE from .photobucket import PhotobucketIE +from .pornhub import PornHubIE from .pornotube import PornotubeIE from .rbmaradio import RBMARadioIE from .redtube import RedTubeIE @@ -102,22 +105,27 @@ from .rottentomatoes import RottenTomatoesIE from .roxwel import RoxwelIE from .rtlnow import RTLnowIE +from .rutube import RutubeIE from .sina import SinaIE from .slashdot import SlashdotIE from .slideshare import SlideshareIE from .sohu import SohuIE from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE from .southparkstudios import SouthParkStudiosIE +from .spankwire import SpankwireIE from .spiegel import SpiegelIE from .stanfordoc import StanfordOpenClassroomIE from .statigram import StatigramIE from .steam import SteamIE +from .sztvhu import SztvHuIE from .teamcoco import TeamcocoIE +from .techtalks import TechTalksIE from .ted import TEDIE from .tf1 import TF1IE from .thisav import ThisAVIE from .traileraddict import TrailerAddictIE from .trilulilu import TriluliluIE +from .tube8 import Tube8IE from .tudou import TudouIE from .tumblr import TumblrIE from .tutv import TutvIE @@ -134,7 +142,9 @@ from .videopremium import VideoPremiumIE from .vimeo import VimeoIE, VimeoChannelIE from .vine import VineIE +from .vk import VKIE from .wat import WatIE +from .websurg import WeBSurgIE from .weibo import WeiboIE from .wimp import WimpIE from .worldstarhiphop import WorldStarHipHopIE diff --git a/youtube_dl/extractor/addanime.py b/youtube_dl/extractor/addanime.py index 82a785a19..b99d4b966 100644 --- a/youtube_dl/extractor/addanime.py +++ b/youtube_dl/extractor/addanime.py @@ -17,8 +17,8 @@ class AddAnimeIE(InfoExtractor): IE_NAME = u'AddAnime' _TEST = { u'url': u'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9', - u'file': u'24MR3YO5SAS9.flv', - u'md5': u'1036a0e0cd307b95bd8a8c3a5c8cfaf1', + u'file': u'24MR3YO5SAS9.mp4', + u'md5': u'72954ea10bc979ab5e2eb288b21425a0', u'info_dict': { u"description": u"One Piece 606", u"title": u"One Piece 606" @@ -31,7 +31,8 @@ def _real_extract(self, url): video_id = mobj.group('video_id') webpage = self._download_webpage(url, video_id) except ExtractorError as ee: - if not isinstance(ee.cause, compat_HTTPError): + if not isinstance(ee.cause, compat_HTTPError) or \ + ee.cause.code != 503: raise redir_webpage = ee.cause.read().decode('utf-8') @@ -60,16 +61,26 @@ def _real_extract(self, url): note=u'Confirming after redirect') webpage = self._download_webpage(url, video_id) - video_url = self._search_regex(r"var normal_video_file = '(.*?)';", - webpage, u'video file URL') + formats = [] + for format_id in ('normal', 'hq'): + rex = r"var %s_video_file = '(.*?)';" % re.escape(format_id) + video_url = self._search_regex(rex, webpage, u'video file URLx', + fatal=False) + if not video_url: + continue + formats.append({ + 'format_id': format_id, + 'url': video_url, + }) + if not formats: + raise ExtractorError(u'Cannot find any video format!') video_title = self._og_search_title(webpage) video_description = self._og_search_description(webpage) return { '_type': 'video', 'id': video_id, - 'url': video_url, - 'ext': 'flv', + 'formats': formats, 'title': video_title, 'description': video_description } diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 5ee8a67b1..e10c74c11 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -158,7 +158,9 @@ def _extract_from_webpage(self, webpage, video_id, lang): 'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'), } - formats = player_info['VSR'].values() + all_formats = player_info['VSR'].values() + # Some formats use the m3u8 protocol + all_formats = list(filter(lambda f: f.get('videoFormat') != 'M3U8', all_formats)) def _match_lang(f): if f.get('versionCode') is None: return True @@ -170,16 +172,36 @@ def _match_lang(f): regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l] return any(re.match(r, f['versionCode']) for r in regexes) # Some formats may not be in the same language as the url - formats = filter(_match_lang, formats) - # Some formats use the m3u8 protocol - formats = filter(lambda f: f.get('videoFormat') != 'M3U8', formats) + formats = filter(_match_lang, all_formats) + formats = list(formats) # in python3 filter returns an iterator + if not formats: + # Some videos are only available in the 'Originalversion' + # they aren't tagged as being in French or German + if all(f['versionCode'] == 'VO' for f in all_formats): + formats = all_formats + else: + raise ExtractorError(u'The formats list is empty') # We order the formats by quality - formats = sorted(formats, key=lambda f: int(f.get('height',-1))) + if re.match(r'[A-Z]Q', formats[0]['quality']) is not None: + sort_key = lambda f: ['HQ', 'MQ', 'EQ', 'SQ'].index(f['quality']) + else: + sort_key = lambda f: int(f.get('height',-1)) + formats = sorted(formats, key=sort_key) # Prefer videos without subtitles in the same language formats = sorted(formats, key=lambda f: re.match(r'VO(F|A)-STM\1', f.get('versionCode', '')) is None) # Pick the best quality def _format(format_info): + quality = format_info['quality'] + m_quality = re.match(r'\w*? - (\d*)p', quality) + if m_quality is not None: + quality = m_quality.group(1) + if format_info.get('versionCode') is not None: + format_id = u'%s-%s' % (quality, format_info['versionCode']) + else: + format_id = quality info = { + 'format_id': format_id, + 'format_note': format_info.get('versionLibelle'), 'width': format_info.get('width'), 'height': format_info.get('height'), } @@ -192,8 +214,6 @@ def _format(format_info): info['ext'] = determine_ext(info['url']) return info info_dict['formats'] = [_format(f) for f in formats] - # TODO: Remove when #980 has been merged - info_dict.update(info_dict['formats'][-1]) return info_dict @@ -207,7 +227,7 @@ class ArteTVCreativeIE(ArteTVPlus7IE): u'url': u'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design', u'file': u'050489-002.mp4', u'info_dict': { - u'title': u'Agentur Amateur #2 - Corporate Design', + u'title': u'Agentur Amateur / Agence Amateur #2 : Corporate Design', }, } diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 745212f2f..1392f382a 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -53,6 +53,8 @@ def _build_brighcove_url(cls, object_str): # Fix up some stupid HTML, see https://github.com/rg3/youtube-dl/issues/1553 object_str = re.sub(r'(', lambda m: m.group(1) + '/>', object_str) + # Fix up some stupid XML, see https://github.com/rg3/youtube-dl/issues/1608 + object_str = object_str.replace(u'<--', u'