]> jfr.im git - yt-dlp.git/blame - youtube_dl/utils.py
Merge remote-tracking branch 'origin/master'
[yt-dlp.git] / youtube_dl / utils.py
CommitLineData
d77c3dfd
FV
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
c496ca96
PH
4import datetime
5import email.utils
f45c185f 6import errno
d77c3dfd 7import gzip
03f9daab 8import io
f4bfd65f 9import json
d77c3dfd
FV
10import locale
11import os
c496ca96 12import platform
d77c3dfd 13import re
c496ca96 14import socket
d77c3dfd 15import sys
01951dda 16import traceback
d77c3dfd 17import zlib
d77c3dfd 18
01ba00ca 19try:
59ae15a5 20 import urllib.request as compat_urllib_request
01ba00ca 21except ImportError: # Python 2
59ae15a5 22 import urllib2 as compat_urllib_request
01ba00ca
PH
23
24try:
59ae15a5 25 import urllib.error as compat_urllib_error
01ba00ca 26except ImportError: # Python 2
59ae15a5 27 import urllib2 as compat_urllib_error
01ba00ca
PH
28
29try:
59ae15a5 30 import urllib.parse as compat_urllib_parse
01ba00ca 31except ImportError: # Python 2
59ae15a5 32 import urllib as compat_urllib_parse
01ba00ca 33
799c0763
PH
34try:
35 from urllib.parse import urlparse as compat_urllib_parse_urlparse
36except ImportError: # Python 2
37 from urlparse import urlparse as compat_urllib_parse_urlparse
38
6543f0dc
JMF
39try:
40 import urllib.parse as compat_urlparse
41except ImportError: # Python 2
42 import urlparse as compat_urlparse
43
01ba00ca 44try:
59ae15a5 45 import http.cookiejar as compat_cookiejar
01ba00ca 46except ImportError: # Python 2
59ae15a5 47 import cookielib as compat_cookiejar
01ba00ca 48
3e669f36 49try:
59ae15a5 50 import html.entities as compat_html_entities
9f37a959 51except ImportError: # Python 2
59ae15a5 52 import htmlentitydefs as compat_html_entities
3e669f36 53
a8156c1d 54try:
59ae15a5 55 import html.parser as compat_html_parser
9f37a959 56except ImportError: # Python 2
59ae15a5 57 import HTMLParser as compat_html_parser
a8156c1d 58
348d0a7a 59try:
59ae15a5 60 import http.client as compat_http_client
9f37a959 61except ImportError: # Python 2
59ae15a5 62 import httplib as compat_http_client
348d0a7a 63
2eabb802 64try:
0e283428 65 from urllib.error import HTTPError as compat_HTTPError
2eabb802
PH
66except ImportError: # Python 2
67 from urllib2 import HTTPError as compat_HTTPError
68
5910e210
PH
69try:
70 from subprocess import DEVNULL
71 compat_subprocess_get_DEVNULL = lambda: DEVNULL
72except ImportError:
73 compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
74
9f37a959 75try:
59ae15a5 76 from urllib.parse import parse_qs as compat_parse_qs
9f37a959 77except ImportError: # Python 2
59ae15a5
PH
78 # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
79 # Python 2's version is apparently totally broken
80 def _unquote(string, encoding='utf-8', errors='replace'):
81 if string == '':
82 return string
83 res = string.split('%')
84 if len(res) == 1:
85 return string
86 if encoding is None:
87 encoding = 'utf-8'
88 if errors is None:
89 errors = 'replace'
90 # pct_sequence: contiguous sequence of percent-encoded bytes, decoded
91 pct_sequence = b''
92 string = res[0]
93 for item in res[1:]:
94 try:
95 if not item:
96 raise ValueError
97 pct_sequence += item[:2].decode('hex')
98 rest = item[2:]
99 if not rest:
100 # This segment was just a single percent-encoded character.
101 # May be part of a sequence of code units, so delay decoding.
102 # (Stored in pct_sequence).
103 continue
104 except ValueError:
105 rest = '%' + item
106 # Encountered non-percent-encoded characters. Flush the current
107 # pct_sequence.
108 string += pct_sequence.decode(encoding, errors) + rest
109 pct_sequence = b''
110 if pct_sequence:
111 # Flush the final pct_sequence
112 string += pct_sequence.decode(encoding, errors)
113 return string
114
115 def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
116 encoding='utf-8', errors='replace'):
117 qs, _coerce_result = qs, unicode
118 pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
119 r = []
120 for name_value in pairs:
121 if not name_value and not strict_parsing:
122 continue
123 nv = name_value.split('=', 1)
124 if len(nv) != 2:
125 if strict_parsing:
126 raise ValueError("bad query field: %r" % (name_value,))
127 # Handle case of a control-name with no equal sign
128 if keep_blank_values:
129 nv.append('')
130 else:
131 continue
132 if len(nv[1]) or keep_blank_values:
133 name = nv[0].replace('+', ' ')
134 name = _unquote(name, encoding=encoding, errors=errors)
135 name = _coerce_result(name)
136 value = nv[1].replace('+', ' ')
137 value = _unquote(value, encoding=encoding, errors=errors)
138 value = _coerce_result(value)
139 r.append((name, value))
140 return r
141
142 def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
143 encoding='utf-8', errors='replace'):
144 parsed_result = {}
145 pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
146 encoding=encoding, errors=errors)
147 for name, value in pairs:
148 if name in parsed_result:
149 parsed_result[name].append(value)
150 else:
151 parsed_result[name] = [value]
152 return parsed_result
348d0a7a 153
3e669f36 154try:
59ae15a5 155 compat_str = unicode # Python 2
3e669f36 156except NameError:
59ae15a5 157 compat_str = str
3e669f36
PH
158
159try:
59ae15a5 160 compat_chr = unichr # Python 2
3e669f36 161except NameError:
59ae15a5 162 compat_chr = chr
3e669f36 163
b31756c1
FV
164def compat_ord(c):
165 if type(c) is int: return c
166 else: return ord(c)
167
468e2e92
FV
168# This is not clearly defined otherwise
169compiled_regex_type = type(re.compile(''))
170
3e669f36 171std_headers = {
59ae15a5
PH
172 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0',
173 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
174 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
175 'Accept-Encoding': 'gzip, deflate',
176 'Accept-Language': 'en-us,en;q=0.5',
3e669f36 177}
f427df17 178
d77c3dfd 179def preferredencoding():
59ae15a5 180 """Get preferred encoding.
d77c3dfd 181
59ae15a5
PH
182 Returns the best encoding scheme for the system, based on
183 locale.getpreferredencoding() and some further tweaks.
184 """
185 try:
186 pref = locale.getpreferredencoding()
187 u'TEST'.encode(pref)
188 except:
189 pref = 'UTF-8'
bae611f2 190
59ae15a5 191 return pref
d77c3dfd 192
8cd10ac4 193if sys.version_info < (3,0):
59ae15a5
PH
194 def compat_print(s):
195 print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
8cd10ac4 196else:
59ae15a5
PH
197 def compat_print(s):
198 assert type(s) == type(u'')
199 print(s)
d77c3dfd 200
f4bfd65f
PH
201# In Python 2.x, json.dump expects a bytestream.
202# In Python 3.x, it writes to a character stream
203if sys.version_info < (3,0):
204 def write_json_file(obj, fn):
205 with open(fn, 'wb') as f:
206 json.dump(obj, f)
207else:
208 def write_json_file(obj, fn):
209 with open(fn, 'w', encoding='utf-8') as f:
210 json.dump(obj, f)
211
59ae56fa
PH
212if sys.version_info >= (2,7):
213 def find_xpath_attr(node, xpath, key, val):
214 """ Find the xpath xpath[@key=val] """
5de3ece2 215 assert re.match(r'^[a-zA-Z]+$', key)
bba12cec 216 assert re.match(r'^[a-zA-Z@\s]*$', val)
59ae56fa
PH
217 expr = xpath + u"[@%s='%s']" % (key, val)
218 return node.find(expr)
219else:
220 def find_xpath_attr(node, xpath, key, val):
221 for f in node.findall(xpath):
222 if f.attrib.get(key) == val:
223 return f
224 return None
225
d77c3dfd 226def htmlentity_transform(matchobj):
59ae15a5
PH
227 """Transforms an HTML entity to a character.
228
229 This function receives a match object and is intended to be used with
230 the re.sub() function.
231 """
232 entity = matchobj.group(1)
233
234 # Known non-numeric HTML entity
235 if entity in compat_html_entities.name2codepoint:
236 return compat_chr(compat_html_entities.name2codepoint[entity])
237
238 mobj = re.match(u'(?u)#(x?\\d+)', entity)
239 if mobj is not None:
240 numstr = mobj.group(1)
241 if numstr.startswith(u'x'):
242 base = 16
243 numstr = u'0%s' % numstr
244 else:
245 base = 10
246 return compat_chr(int(numstr, base))
247
248 # Unknown entity in name, return its literal representation
249 return (u'&%s;' % entity)
d77c3dfd 250
a8156c1d 251compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
43e8fafd
ND
252class AttrParser(compat_html_parser.HTMLParser):
253 """Modified HTMLParser that isolates a tag with the specified attribute"""
254 def __init__(self, attribute, value):
255 self.attribute = attribute
256 self.value = value
59ae15a5
PH
257 self.result = None
258 self.started = False
259 self.depth = {}
260 self.html = None
261 self.watch_startpos = False
262 self.error_count = 0
263 compat_html_parser.HTMLParser.__init__(self)
264
265 def error(self, message):
266 if self.error_count > 10 or self.started:
267 raise compat_html_parser.HTMLParseError(message, self.getpos())
268 self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line
269 self.error_count += 1
270 self.goahead(1)
271
272 def loads(self, html):
273 self.html = html
274 self.feed(html)
275 self.close()
276
277 def handle_starttag(self, tag, attrs):
278 attrs = dict(attrs)
279 if self.started:
280 self.find_startpos(None)
43e8fafd 281 if self.attribute in attrs and attrs[self.attribute] == self.value:
59ae15a5
PH
282 self.result = [tag]
283 self.started = True
284 self.watch_startpos = True
285 if self.started:
286 if not tag in self.depth: self.depth[tag] = 0
287 self.depth[tag] += 1
288
289 def handle_endtag(self, tag):
290 if self.started:
291 if tag in self.depth: self.depth[tag] -= 1
292 if self.depth[self.result[0]] == 0:
293 self.started = False
294 self.result.append(self.getpos())
295
296 def find_startpos(self, x):
297 """Needed to put the start position of the result (self.result[1])
298 after the opening tag with the requested id"""
299 if self.watch_startpos:
300 self.watch_startpos = False
301 self.result.append(self.getpos())
302 handle_entityref = handle_charref = handle_data = handle_comment = \
303 handle_decl = handle_pi = unknown_decl = find_startpos
304
305 def get_result(self):
306 if self.result is None:
307 return None
308 if len(self.result) != 3:
309 return None
310 lines = self.html.split('\n')
311 lines = lines[self.result[1][0]-1:self.result[2][0]]
312 lines[0] = lines[0][self.result[1][1]:]
313 if len(lines) == 1:
314 lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]]
315 lines[-1] = lines[-1][:self.result[2][1]]
316 return '\n'.join(lines).strip()
3b024e17
PH
317# Hack for https://github.com/rg3/youtube-dl/issues/662
318if sys.version_info < (2, 7, 3):
319 AttrParser.parse_endtag = (lambda self, i:
320 i + len("</scr'+'ipt>")
321 if self.rawdata[i:].startswith("</scr'+'ipt>")
322 else compat_html_parser.HTMLParser.parse_endtag(self, i))
9e6dd238
FV
323
324def get_element_by_id(id, html):
43e8fafd
ND
325 """Return the content of the tag with the specified ID in the passed HTML document"""
326 return get_element_by_attribute("id", id, html)
327
328def get_element_by_attribute(attribute, value, html):
329 """Return the content of the tag with the specified attribute in the passed HTML document"""
330 parser = AttrParser(attribute, value)
59ae15a5
PH
331 try:
332 parser.loads(html)
333 except compat_html_parser.HTMLParseError:
334 pass
335 return parser.get_result()
9e6dd238
FV
336
337
338def clean_html(html):
59ae15a5
PH
339 """Clean an HTML snippet into a readable string"""
340 # Newline vs <br />
341 html = html.replace('\n', ' ')
6b3aef80
FV
342 html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
343 html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
59ae15a5
PH
344 # Strip html tags
345 html = re.sub('<.*?>', '', html)
346 # Replace html entities
347 html = unescapeHTML(html)
7decf895 348 return html.strip()
9e6dd238
FV
349
350
d77c3dfd 351def sanitize_open(filename, open_mode):
59ae15a5
PH
352 """Try to open the given filename, and slightly tweak it if this fails.
353
354 Attempts to open the given filename. If this fails, it tries to change
355 the filename slightly, step by step, until it's either able to open it
356 or it fails and raises a final exception, like the standard open()
357 function.
358
359 It returns the tuple (stream, definitive_file_name).
360 """
361 try:
362 if filename == u'-':
363 if sys.platform == 'win32':
364 import msvcrt
365 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
898280a0 366 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
59ae15a5
PH
367 stream = open(encodeFilename(filename), open_mode)
368 return (stream, filename)
369 except (IOError, OSError) as err:
f45c185f
PH
370 if err.errno in (errno.EACCES,):
371 raise
59ae15a5 372
f45c185f
PH
373 # In case of error, try to remove win32 forbidden chars
374 alt_filename = os.path.join(
375 re.sub(u'[/<>:"\\|\\\\?\\*]', u'#', path_part)
376 for path_part in os.path.split(filename)
377 )
378 if alt_filename == filename:
379 raise
380 else:
381 # An exception here should be caught in the caller
382 stream = open(encodeFilename(filename), open_mode)
383 return (stream, alt_filename)
d77c3dfd
FV
384
385
386def timeconvert(timestr):
59ae15a5
PH
387 """Convert RFC 2822 defined time string into system timestamp"""
388 timestamp = None
389 timetuple = email.utils.parsedate_tz(timestr)
390 if timetuple is not None:
391 timestamp = email.utils.mktime_tz(timetuple)
392 return timestamp
1c469a94 393
796173d0 394def sanitize_filename(s, restricted=False, is_id=False):
59ae15a5
PH
395 """Sanitizes a string so it could be used as part of a filename.
396 If restricted is set, use a stricter subset of allowed characters.
796173d0 397 Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
59ae15a5
PH
398 """
399 def replace_insane(char):
400 if char == '?' or ord(char) < 32 or ord(char) == 127:
401 return ''
402 elif char == '"':
403 return '' if restricted else '\''
404 elif char == ':':
405 return '_-' if restricted else ' -'
406 elif char in '\\/|*<>':
407 return '_'
627dcfff 408 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
59ae15a5
PH
409 return '_'
410 if restricted and ord(char) > 127:
411 return '_'
412 return char
413
414 result = u''.join(map(replace_insane, s))
796173d0
PH
415 if not is_id:
416 while '__' in result:
417 result = result.replace('__', '_')
418 result = result.strip('_')
419 # Common case of "Foreign band name - English song title"
420 if restricted and result.startswith('-_'):
421 result = result[2:]
422 if not result:
423 result = '_'
59ae15a5 424 return result
d77c3dfd
FV
425
426def orderedSet(iterable):
59ae15a5
PH
427 """ Remove all duplicates from the input iterable """
428 res = []
429 for el in iterable:
430 if el not in res:
431 res.append(el)
432 return res
d77c3dfd
FV
433
434def unescapeHTML(s):
59ae15a5
PH
435 """
436 @param s a string
437 """
438 assert type(s) == type(u'')
d77c3dfd 439
59ae15a5
PH
440 result = re.sub(u'(?u)&(.+?);', htmlentity_transform, s)
441 return result
d77c3dfd
FV
442
443def encodeFilename(s):
59ae15a5
PH
444 """
445 @param s The name of the file
446 """
d77c3dfd 447
59ae15a5 448 assert type(s) == type(u'')
d77c3dfd 449
59ae15a5
PH
450 # Python 3 has a Unicode API
451 if sys.version_info >= (3, 0):
452 return s
0f00efed 453
59ae15a5
PH
454 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
455 # Pass u'' directly to use Unicode APIs on Windows 2000 and up
456 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
457 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
458 return s
459 else:
6df40dcb
PH
460 encoding = sys.getfilesystemencoding()
461 if encoding is None:
462 encoding = 'utf-8'
463 return s.encode(encoding, 'ignore')
d77c3dfd 464
8271226a
PH
465def decodeOption(optval):
466 if optval is None:
467 return optval
468 if isinstance(optval, bytes):
469 optval = optval.decode(preferredencoding())
470
471 assert isinstance(optval, compat_str)
472 return optval
1c256f70 473
4539dd30
PH
474def formatSeconds(secs):
475 if secs > 3600:
476 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
477 elif secs > 60:
478 return '%d:%02d' % (secs // 60, secs % 60)
479 else:
480 return '%d' % secs
481
ea6d901e
PH
482def make_HTTPS_handler(opts):
483 if sys.version_info < (3,2):
484 # Python's 2.x handler is very simplistic
acebc9cd 485 return compat_urllib_request.HTTPSHandler()
ea6d901e
PH
486 else:
487 import ssl
488 context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
489 context.set_default_verify_paths()
490
491 context.verify_mode = (ssl.CERT_NONE
492 if opts.no_check_certificate
493 else ssl.CERT_REQUIRED)
acebc9cd 494 return compat_urllib_request.HTTPSHandler(context=context)
ea6d901e 495
1c256f70
PH
496class ExtractorError(Exception):
497 """Error during info extraction."""
2eabb802 498 def __init__(self, msg, tb=None, expected=False, cause=None):
9a82b238
PH
499 """ tb, if given, is the original traceback (so that it can be printed out).
500 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
501 """
502
503 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
504 expected = True
505 if not expected:
298f833b 506 msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type youtube-dl -U to update.'
1c256f70 507 super(ExtractorError, self).__init__(msg)
d5979c5d 508
1c256f70 509 self.traceback = tb
8cc83b8d 510 self.exc_info = sys.exc_info() # preserve original exception
2eabb802 511 self.cause = cause
1c256f70 512
01951dda
PH
513 def format_traceback(self):
514 if self.traceback is None:
515 return None
516 return u''.join(traceback.format_tb(self.traceback))
517
1c256f70 518
d77c3dfd 519class DownloadError(Exception):
59ae15a5 520 """Download Error exception.
d77c3dfd 521
59ae15a5
PH
522 This exception may be thrown by FileDownloader objects if they are not
523 configured to continue on errors. They will contain the appropriate
524 error message.
525 """
8cc83b8d
FV
526 def __init__(self, msg, exc_info=None):
527 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
528 super(DownloadError, self).__init__(msg)
529 self.exc_info = exc_info
d77c3dfd
FV
530
531
532class SameFileError(Exception):
59ae15a5 533 """Same File exception.
d77c3dfd 534
59ae15a5
PH
535 This exception will be thrown by FileDownloader objects if they detect
536 multiple files would have to be downloaded to the same file on disk.
537 """
538 pass
d77c3dfd
FV
539
540
541class PostProcessingError(Exception):
59ae15a5 542 """Post Processing exception.
d77c3dfd 543
59ae15a5
PH
544 This exception may be raised by PostProcessor's .run() method to
545 indicate an error in the postprocessing task.
546 """
7851b379
PH
547 def __init__(self, msg):
548 self.msg = msg
d77c3dfd
FV
549
550class MaxDownloadsReached(Exception):
59ae15a5
PH
551 """ --max-downloads limit has been reached. """
552 pass
d77c3dfd
FV
553
554
555class UnavailableVideoError(Exception):
59ae15a5 556 """Unavailable Format exception.
d77c3dfd 557
59ae15a5
PH
558 This exception will be thrown when a video is requested
559 in a format that is not available for that video.
560 """
561 pass
d77c3dfd
FV
562
563
564class ContentTooShortError(Exception):
59ae15a5 565 """Content Too Short exception.
d77c3dfd 566
59ae15a5
PH
567 This exception may be raised by FileDownloader objects when a file they
568 download is too small for what the server announced first, indicating
569 the connection was probably interrupted.
570 """
571 # Both in bytes
572 downloaded = None
573 expected = None
d77c3dfd 574
59ae15a5
PH
575 def __init__(self, downloaded, expected):
576 self.downloaded = downloaded
577 self.expected = expected
d77c3dfd 578
acebc9cd 579class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
59ae15a5
PH
580 """Handler for HTTP requests and responses.
581
582 This class, when installed with an OpenerDirector, automatically adds
583 the standard headers to every HTTP request and handles gzipped and
584 deflated responses from web servers. If compression is to be avoided in
585 a particular request, the original request in the program code only has
586 to include the HTTP header "Youtubedl-No-Compression", which will be
587 removed before making the real request.
588
589 Part of this code was copied from:
590
591 http://techknack.net/python-urllib2-handlers/
592
593 Andrew Rowls, the author of that code, agreed to release it to the
594 public domain.
595 """
596
597 @staticmethod
598 def deflate(data):
599 try:
600 return zlib.decompress(data, -zlib.MAX_WBITS)
601 except zlib.error:
602 return zlib.decompress(data)
603
604 @staticmethod
605 def addinfourl_wrapper(stream, headers, url, code):
606 if hasattr(compat_urllib_request.addinfourl, 'getcode'):
607 return compat_urllib_request.addinfourl(stream, headers, url, code)
608 ret = compat_urllib_request.addinfourl(stream, headers, url)
609 ret.code = code
610 return ret
611
acebc9cd
PH
612 def http_request(self, req):
613 for h,v in std_headers.items():
59ae15a5
PH
614 if h in req.headers:
615 del req.headers[h]
335959e7 616 req.add_header(h, v)
59ae15a5
PH
617 if 'Youtubedl-no-compression' in req.headers:
618 if 'Accept-encoding' in req.headers:
619 del req.headers['Accept-encoding']
620 del req.headers['Youtubedl-no-compression']
3446dfb7 621 if 'Youtubedl-user-agent' in req.headers:
335959e7
PH
622 if 'User-agent' in req.headers:
623 del req.headers['User-agent']
624 req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
3446dfb7 625 del req.headers['Youtubedl-user-agent']
59ae15a5
PH
626 return req
627
acebc9cd 628 def http_response(self, req, resp):
59ae15a5
PH
629 old_resp = resp
630 # gzip
631 if resp.headers.get('Content-encoding', '') == 'gzip':
aa3e9507
PH
632 content = resp.read()
633 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
634 try:
635 uncompressed = io.BytesIO(gz.read())
636 except IOError as original_ioerror:
637 # There may be junk add the end of the file
638 # See http://stackoverflow.com/q/4928560/35070 for details
639 for i in range(1, 1024):
640 try:
641 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
642 uncompressed = io.BytesIO(gz.read())
643 except IOError:
644 continue
645 break
646 else:
647 raise original_ioerror
648 resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5
PH
649 resp.msg = old_resp.msg
650 # deflate
651 if resp.headers.get('Content-encoding', '') == 'deflate':
652 gz = io.BytesIO(self.deflate(resp.read()))
653 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
654 resp.msg = old_resp.msg
655 return resp
0f8d03f8 656
acebc9cd
PH
657 https_request = http_request
658 https_response = http_response
bf50b038
JMF
659
660def unified_strdate(date_str):
661 """Return a string with the date in the format YYYYMMDD"""
662 upload_date = None
663 #Replace commas
664 date_str = date_str.replace(',',' ')
665 # %z (UTC offset) is only supported in python>=3.2
666 date_str = re.sub(r' (\+|-)[\d]*$', '', date_str)
73e79f2a 667 format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y', '%Y-%m-%d', '%d/%m/%Y', '%Y/%m/%d %H:%M:%S', '%d.%m.%Y %H:%M']
bf50b038
JMF
668 for expression in format_expressions:
669 try:
670 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
671 except:
672 pass
673 return upload_date
674
cbdbb766 675def determine_ext(url, default_ext=u'unknown_video'):
73e79f2a
PH
676 guess = url.partition(u'?')[0].rpartition(u'.')[2]
677 if re.match(r'^[A-Za-z0-9]+$', guess):
678 return guess
679 else:
cbdbb766 680 return default_ext
73e79f2a 681
d4051a8e
JMF
682def subtitles_filename(filename, sub_lang, sub_format):
683 return filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
684
bd558525 685def date_from_str(date_str):
37254abc
JMF
686 """
687 Return a datetime object from a string in the format YYYYMMDD or
688 (now|today)[+-][0-9](day|week|month|year)(s)?"""
689 today = datetime.date.today()
690 if date_str == 'now'or date_str == 'today':
691 return today
692 match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
693 if match is not None:
694 sign = match.group('sign')
695 time = int(match.group('time'))
696 if sign == '-':
697 time = -time
698 unit = match.group('unit')
699 #A bad aproximation?
700 if unit == 'month':
701 unit = 'day'
702 time *= 30
703 elif unit == 'year':
704 unit = 'day'
705 time *= 365
706 unit += 's'
707 delta = datetime.timedelta(**{unit: time})
708 return today + delta
bd558525
JMF
709 return datetime.datetime.strptime(date_str, "%Y%m%d").date()
710
711class DateRange(object):
712 """Represents a time interval between two dates"""
713 def __init__(self, start=None, end=None):
714 """start and end must be strings in the format accepted by date"""
715 if start is not None:
716 self.start = date_from_str(start)
717 else:
718 self.start = datetime.datetime.min.date()
719 if end is not None:
720 self.end = date_from_str(end)
721 else:
722 self.end = datetime.datetime.max.date()
37254abc 723 if self.start > self.end:
bd558525
JMF
724 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
725 @classmethod
726 def day(cls, day):
727 """Returns a range that only contains the given day"""
728 return cls(day,day)
729 def __contains__(self, date):
730 """Check if the date is in the range"""
37254abc
JMF
731 if not isinstance(date, datetime.date):
732 date = date_from_str(date)
733 return self.start <= date <= self.end
bd558525
JMF
734 def __str__(self):
735 return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
c496ca96
PH
736
737
738def platform_name():
739 """ Returns the platform name as a compat_str """
740 res = platform.platform()
741 if isinstance(res, bytes):
742 res = res.decode(preferredencoding())
743
744 assert isinstance(res, compat_str)
745 return res