]> jfr.im git - yt-dlp.git/blame - youtube_dl/utils.py
[addanime] improve
[yt-dlp.git] / youtube_dl / utils.py
CommitLineData
d77c3dfd
FV
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
f45c185f 4import errno
d77c3dfd 5import gzip
03f9daab 6import io
f4bfd65f 7import json
d77c3dfd
FV
8import locale
9import os
10import re
11import sys
01951dda 12import traceback
d77c3dfd 13import zlib
d77c3dfd 14import email.utils
d5979c5d 15import socket
bd558525 16import datetime
d77c3dfd 17
01ba00ca 18try:
59ae15a5 19 import urllib.request as compat_urllib_request
01ba00ca 20except ImportError: # Python 2
59ae15a5 21 import urllib2 as compat_urllib_request
01ba00ca
PH
22
23try:
59ae15a5 24 import urllib.error as compat_urllib_error
01ba00ca 25except ImportError: # Python 2
59ae15a5 26 import urllib2 as compat_urllib_error
01ba00ca
PH
27
28try:
59ae15a5 29 import urllib.parse as compat_urllib_parse
01ba00ca 30except ImportError: # Python 2
59ae15a5 31 import urllib as compat_urllib_parse
01ba00ca 32
799c0763
PH
33try:
34 from urllib.parse import urlparse as compat_urllib_parse_urlparse
35except ImportError: # Python 2
36 from urlparse import urlparse as compat_urllib_parse_urlparse
37
6543f0dc
JMF
38try:
39 import urllib.parse as compat_urlparse
40except ImportError: # Python 2
41 import urlparse as compat_urlparse
42
01ba00ca 43try:
59ae15a5 44 import http.cookiejar as compat_cookiejar
01ba00ca 45except ImportError: # Python 2
59ae15a5 46 import cookielib as compat_cookiejar
01ba00ca 47
3e669f36 48try:
59ae15a5 49 import html.entities as compat_html_entities
9f37a959 50except ImportError: # Python 2
59ae15a5 51 import htmlentitydefs as compat_html_entities
3e669f36 52
a8156c1d 53try:
59ae15a5 54 import html.parser as compat_html_parser
9f37a959 55except ImportError: # Python 2
59ae15a5 56 import HTMLParser as compat_html_parser
a8156c1d 57
348d0a7a 58try:
59ae15a5 59 import http.client as compat_http_client
9f37a959 60except ImportError: # Python 2
59ae15a5 61 import httplib as compat_http_client
348d0a7a 62
2eabb802
PH
63try:
64 from http.error import HTTPError as compat_HTTPError
65except ImportError: # Python 2
66 from urllib2 import HTTPError as compat_HTTPError
67
5910e210
PH
68try:
69 from subprocess import DEVNULL
70 compat_subprocess_get_DEVNULL = lambda: DEVNULL
71except ImportError:
72 compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
73
9f37a959 74try:
59ae15a5 75 from urllib.parse import parse_qs as compat_parse_qs
9f37a959 76except ImportError: # Python 2
59ae15a5
PH
77 # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
78 # Python 2's version is apparently totally broken
79 def _unquote(string, encoding='utf-8', errors='replace'):
80 if string == '':
81 return string
82 res = string.split('%')
83 if len(res) == 1:
84 return string
85 if encoding is None:
86 encoding = 'utf-8'
87 if errors is None:
88 errors = 'replace'
89 # pct_sequence: contiguous sequence of percent-encoded bytes, decoded
90 pct_sequence = b''
91 string = res[0]
92 for item in res[1:]:
93 try:
94 if not item:
95 raise ValueError
96 pct_sequence += item[:2].decode('hex')
97 rest = item[2:]
98 if not rest:
99 # This segment was just a single percent-encoded character.
100 # May be part of a sequence of code units, so delay decoding.
101 # (Stored in pct_sequence).
102 continue
103 except ValueError:
104 rest = '%' + item
105 # Encountered non-percent-encoded characters. Flush the current
106 # pct_sequence.
107 string += pct_sequence.decode(encoding, errors) + rest
108 pct_sequence = b''
109 if pct_sequence:
110 # Flush the final pct_sequence
111 string += pct_sequence.decode(encoding, errors)
112 return string
113
114 def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
115 encoding='utf-8', errors='replace'):
116 qs, _coerce_result = qs, unicode
117 pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
118 r = []
119 for name_value in pairs:
120 if not name_value and not strict_parsing:
121 continue
122 nv = name_value.split('=', 1)
123 if len(nv) != 2:
124 if strict_parsing:
125 raise ValueError("bad query field: %r" % (name_value,))
126 # Handle case of a control-name with no equal sign
127 if keep_blank_values:
128 nv.append('')
129 else:
130 continue
131 if len(nv[1]) or keep_blank_values:
132 name = nv[0].replace('+', ' ')
133 name = _unquote(name, encoding=encoding, errors=errors)
134 name = _coerce_result(name)
135 value = nv[1].replace('+', ' ')
136 value = _unquote(value, encoding=encoding, errors=errors)
137 value = _coerce_result(value)
138 r.append((name, value))
139 return r
140
141 def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
142 encoding='utf-8', errors='replace'):
143 parsed_result = {}
144 pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
145 encoding=encoding, errors=errors)
146 for name, value in pairs:
147 if name in parsed_result:
148 parsed_result[name].append(value)
149 else:
150 parsed_result[name] = [value]
151 return parsed_result
348d0a7a 152
3e669f36 153try:
59ae15a5 154 compat_str = unicode # Python 2
3e669f36 155except NameError:
59ae15a5 156 compat_str = str
3e669f36
PH
157
158try:
59ae15a5 159 compat_chr = unichr # Python 2
3e669f36 160except NameError:
59ae15a5 161 compat_chr = chr
3e669f36 162
b31756c1
FV
163def compat_ord(c):
164 if type(c) is int: return c
165 else: return ord(c)
166
468e2e92
FV
167# This is not clearly defined otherwise
168compiled_regex_type = type(re.compile(''))
169
3e669f36 170std_headers = {
59ae15a5
PH
171 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0',
172 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
173 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
174 'Accept-Encoding': 'gzip, deflate',
175 'Accept-Language': 'en-us,en;q=0.5',
3e669f36 176}
f427df17 177
d77c3dfd 178def preferredencoding():
59ae15a5 179 """Get preferred encoding.
d77c3dfd 180
59ae15a5
PH
181 Returns the best encoding scheme for the system, based on
182 locale.getpreferredencoding() and some further tweaks.
183 """
184 try:
185 pref = locale.getpreferredencoding()
186 u'TEST'.encode(pref)
187 except:
188 pref = 'UTF-8'
bae611f2 189
59ae15a5 190 return pref
d77c3dfd 191
8cd10ac4 192if sys.version_info < (3,0):
59ae15a5
PH
193 def compat_print(s):
194 print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
8cd10ac4 195else:
59ae15a5
PH
196 def compat_print(s):
197 assert type(s) == type(u'')
198 print(s)
d77c3dfd 199
f4bfd65f
PH
200# In Python 2.x, json.dump expects a bytestream.
201# In Python 3.x, it writes to a character stream
202if sys.version_info < (3,0):
203 def write_json_file(obj, fn):
204 with open(fn, 'wb') as f:
205 json.dump(obj, f)
206else:
207 def write_json_file(obj, fn):
208 with open(fn, 'w', encoding='utf-8') as f:
209 json.dump(obj, f)
210
59ae56fa
PH
211if sys.version_info >= (2,7):
212 def find_xpath_attr(node, xpath, key, val):
213 """ Find the xpath xpath[@key=val] """
5de3ece2 214 assert re.match(r'^[a-zA-Z]+$', key)
bba12cec 215 assert re.match(r'^[a-zA-Z@\s]*$', val)
59ae56fa
PH
216 expr = xpath + u"[@%s='%s']" % (key, val)
217 return node.find(expr)
218else:
219 def find_xpath_attr(node, xpath, key, val):
220 for f in node.findall(xpath):
221 if f.attrib.get(key) == val:
222 return f
223 return None
224
d77c3dfd 225def htmlentity_transform(matchobj):
59ae15a5
PH
226 """Transforms an HTML entity to a character.
227
228 This function receives a match object and is intended to be used with
229 the re.sub() function.
230 """
231 entity = matchobj.group(1)
232
233 # Known non-numeric HTML entity
234 if entity in compat_html_entities.name2codepoint:
235 return compat_chr(compat_html_entities.name2codepoint[entity])
236
237 mobj = re.match(u'(?u)#(x?\\d+)', entity)
238 if mobj is not None:
239 numstr = mobj.group(1)
240 if numstr.startswith(u'x'):
241 base = 16
242 numstr = u'0%s' % numstr
243 else:
244 base = 10
245 return compat_chr(int(numstr, base))
246
247 # Unknown entity in name, return its literal representation
248 return (u'&%s;' % entity)
d77c3dfd 249
a8156c1d 250compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
43e8fafd
ND
251class AttrParser(compat_html_parser.HTMLParser):
252 """Modified HTMLParser that isolates a tag with the specified attribute"""
253 def __init__(self, attribute, value):
254 self.attribute = attribute
255 self.value = value
59ae15a5
PH
256 self.result = None
257 self.started = False
258 self.depth = {}
259 self.html = None
260 self.watch_startpos = False
261 self.error_count = 0
262 compat_html_parser.HTMLParser.__init__(self)
263
264 def error(self, message):
265 if self.error_count > 10 or self.started:
266 raise compat_html_parser.HTMLParseError(message, self.getpos())
267 self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line
268 self.error_count += 1
269 self.goahead(1)
270
271 def loads(self, html):
272 self.html = html
273 self.feed(html)
274 self.close()
275
276 def handle_starttag(self, tag, attrs):
277 attrs = dict(attrs)
278 if self.started:
279 self.find_startpos(None)
43e8fafd 280 if self.attribute in attrs and attrs[self.attribute] == self.value:
59ae15a5
PH
281 self.result = [tag]
282 self.started = True
283 self.watch_startpos = True
284 if self.started:
285 if not tag in self.depth: self.depth[tag] = 0
286 self.depth[tag] += 1
287
288 def handle_endtag(self, tag):
289 if self.started:
290 if tag in self.depth: self.depth[tag] -= 1
291 if self.depth[self.result[0]] == 0:
292 self.started = False
293 self.result.append(self.getpos())
294
295 def find_startpos(self, x):
296 """Needed to put the start position of the result (self.result[1])
297 after the opening tag with the requested id"""
298 if self.watch_startpos:
299 self.watch_startpos = False
300 self.result.append(self.getpos())
301 handle_entityref = handle_charref = handle_data = handle_comment = \
302 handle_decl = handle_pi = unknown_decl = find_startpos
303
304 def get_result(self):
305 if self.result is None:
306 return None
307 if len(self.result) != 3:
308 return None
309 lines = self.html.split('\n')
310 lines = lines[self.result[1][0]-1:self.result[2][0]]
311 lines[0] = lines[0][self.result[1][1]:]
312 if len(lines) == 1:
313 lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]]
314 lines[-1] = lines[-1][:self.result[2][1]]
315 return '\n'.join(lines).strip()
3b024e17
PH
316# Hack for https://github.com/rg3/youtube-dl/issues/662
317if sys.version_info < (2, 7, 3):
318 AttrParser.parse_endtag = (lambda self, i:
319 i + len("</scr'+'ipt>")
320 if self.rawdata[i:].startswith("</scr'+'ipt>")
321 else compat_html_parser.HTMLParser.parse_endtag(self, i))
9e6dd238
FV
322
323def get_element_by_id(id, html):
43e8fafd
ND
324 """Return the content of the tag with the specified ID in the passed HTML document"""
325 return get_element_by_attribute("id", id, html)
326
327def get_element_by_attribute(attribute, value, html):
328 """Return the content of the tag with the specified attribute in the passed HTML document"""
329 parser = AttrParser(attribute, value)
59ae15a5
PH
330 try:
331 parser.loads(html)
332 except compat_html_parser.HTMLParseError:
333 pass
334 return parser.get_result()
9e6dd238
FV
335
336
337def clean_html(html):
59ae15a5
PH
338 """Clean an HTML snippet into a readable string"""
339 # Newline vs <br />
340 html = html.replace('\n', ' ')
6b3aef80
FV
341 html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
342 html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
59ae15a5
PH
343 # Strip html tags
344 html = re.sub('<.*?>', '', html)
345 # Replace html entities
346 html = unescapeHTML(html)
7decf895 347 return html.strip()
9e6dd238
FV
348
349
d77c3dfd 350def sanitize_open(filename, open_mode):
59ae15a5
PH
351 """Try to open the given filename, and slightly tweak it if this fails.
352
353 Attempts to open the given filename. If this fails, it tries to change
354 the filename slightly, step by step, until it's either able to open it
355 or it fails and raises a final exception, like the standard open()
356 function.
357
358 It returns the tuple (stream, definitive_file_name).
359 """
360 try:
361 if filename == u'-':
362 if sys.platform == 'win32':
363 import msvcrt
364 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
898280a0 365 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
59ae15a5
PH
366 stream = open(encodeFilename(filename), open_mode)
367 return (stream, filename)
368 except (IOError, OSError) as err:
f45c185f
PH
369 if err.errno in (errno.EACCES,):
370 raise
59ae15a5 371
f45c185f
PH
372 # In case of error, try to remove win32 forbidden chars
373 alt_filename = os.path.join(
374 re.sub(u'[/<>:"\\|\\\\?\\*]', u'#', path_part)
375 for path_part in os.path.split(filename)
376 )
377 if alt_filename == filename:
378 raise
379 else:
380 # An exception here should be caught in the caller
381 stream = open(encodeFilename(filename), open_mode)
382 return (stream, alt_filename)
d77c3dfd
FV
383
384
385def timeconvert(timestr):
59ae15a5
PH
386 """Convert RFC 2822 defined time string into system timestamp"""
387 timestamp = None
388 timetuple = email.utils.parsedate_tz(timestr)
389 if timetuple is not None:
390 timestamp = email.utils.mktime_tz(timetuple)
391 return timestamp
1c469a94 392
796173d0 393def sanitize_filename(s, restricted=False, is_id=False):
59ae15a5
PH
394 """Sanitizes a string so it could be used as part of a filename.
395 If restricted is set, use a stricter subset of allowed characters.
796173d0 396 Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
59ae15a5
PH
397 """
398 def replace_insane(char):
399 if char == '?' or ord(char) < 32 or ord(char) == 127:
400 return ''
401 elif char == '"':
402 return '' if restricted else '\''
403 elif char == ':':
404 return '_-' if restricted else ' -'
405 elif char in '\\/|*<>':
406 return '_'
627dcfff 407 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
59ae15a5
PH
408 return '_'
409 if restricted and ord(char) > 127:
410 return '_'
411 return char
412
413 result = u''.join(map(replace_insane, s))
796173d0
PH
414 if not is_id:
415 while '__' in result:
416 result = result.replace('__', '_')
417 result = result.strip('_')
418 # Common case of "Foreign band name - English song title"
419 if restricted and result.startswith('-_'):
420 result = result[2:]
421 if not result:
422 result = '_'
59ae15a5 423 return result
d77c3dfd
FV
424
425def orderedSet(iterable):
59ae15a5
PH
426 """ Remove all duplicates from the input iterable """
427 res = []
428 for el in iterable:
429 if el not in res:
430 res.append(el)
431 return res
d77c3dfd
FV
432
433def unescapeHTML(s):
59ae15a5
PH
434 """
435 @param s a string
436 """
437 assert type(s) == type(u'')
d77c3dfd 438
59ae15a5
PH
439 result = re.sub(u'(?u)&(.+?);', htmlentity_transform, s)
440 return result
d77c3dfd
FV
441
442def encodeFilename(s):
59ae15a5
PH
443 """
444 @param s The name of the file
445 """
d77c3dfd 446
59ae15a5 447 assert type(s) == type(u'')
d77c3dfd 448
59ae15a5
PH
449 # Python 3 has a Unicode API
450 if sys.version_info >= (3, 0):
451 return s
0f00efed 452
59ae15a5
PH
453 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
454 # Pass u'' directly to use Unicode APIs on Windows 2000 and up
455 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
456 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
457 return s
458 else:
6df40dcb
PH
459 encoding = sys.getfilesystemencoding()
460 if encoding is None:
461 encoding = 'utf-8'
462 return s.encode(encoding, 'ignore')
d77c3dfd 463
8271226a
PH
464def decodeOption(optval):
465 if optval is None:
466 return optval
467 if isinstance(optval, bytes):
468 optval = optval.decode(preferredencoding())
469
470 assert isinstance(optval, compat_str)
471 return optval
1c256f70 472
4539dd30
PH
473def formatSeconds(secs):
474 if secs > 3600:
475 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
476 elif secs > 60:
477 return '%d:%02d' % (secs // 60, secs % 60)
478 else:
479 return '%d' % secs
480
ea6d901e
PH
481def make_HTTPS_handler(opts):
482 if sys.version_info < (3,2):
483 # Python's 2.x handler is very simplistic
acebc9cd 484 return compat_urllib_request.HTTPSHandler()
ea6d901e
PH
485 else:
486 import ssl
487 context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
488 context.set_default_verify_paths()
489
490 context.verify_mode = (ssl.CERT_NONE
491 if opts.no_check_certificate
492 else ssl.CERT_REQUIRED)
acebc9cd 493 return compat_urllib_request.HTTPSHandler(context=context)
ea6d901e 494
1c256f70
PH
495class ExtractorError(Exception):
496 """Error during info extraction."""
2eabb802 497 def __init__(self, msg, tb=None, expected=False, cause=None):
9a82b238
PH
498 """ tb, if given, is the original traceback (so that it can be printed out).
499 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
500 """
501
502 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
503 expected = True
504 if not expected:
298f833b 505 msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type youtube-dl -U to update.'
1c256f70 506 super(ExtractorError, self).__init__(msg)
d5979c5d 507
1c256f70 508 self.traceback = tb
8cc83b8d 509 self.exc_info = sys.exc_info() # preserve original exception
2eabb802 510 self.cause = cause
1c256f70 511
01951dda
PH
512 def format_traceback(self):
513 if self.traceback is None:
514 return None
515 return u''.join(traceback.format_tb(self.traceback))
516
1c256f70 517
d77c3dfd 518class DownloadError(Exception):
59ae15a5 519 """Download Error exception.
d77c3dfd 520
59ae15a5
PH
521 This exception may be thrown by FileDownloader objects if they are not
522 configured to continue on errors. They will contain the appropriate
523 error message.
524 """
8cc83b8d
FV
525 def __init__(self, msg, exc_info=None):
526 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
527 super(DownloadError, self).__init__(msg)
528 self.exc_info = exc_info
d77c3dfd
FV
529
530
531class SameFileError(Exception):
59ae15a5 532 """Same File exception.
d77c3dfd 533
59ae15a5
PH
534 This exception will be thrown by FileDownloader objects if they detect
535 multiple files would have to be downloaded to the same file on disk.
536 """
537 pass
d77c3dfd
FV
538
539
540class PostProcessingError(Exception):
59ae15a5 541 """Post Processing exception.
d77c3dfd 542
59ae15a5
PH
543 This exception may be raised by PostProcessor's .run() method to
544 indicate an error in the postprocessing task.
545 """
7851b379
PH
546 def __init__(self, msg):
547 self.msg = msg
d77c3dfd
FV
548
549class MaxDownloadsReached(Exception):
59ae15a5
PH
550 """ --max-downloads limit has been reached. """
551 pass
d77c3dfd
FV
552
553
554class UnavailableVideoError(Exception):
59ae15a5 555 """Unavailable Format exception.
d77c3dfd 556
59ae15a5
PH
557 This exception will be thrown when a video is requested
558 in a format that is not available for that video.
559 """
560 pass
d77c3dfd
FV
561
562
563class ContentTooShortError(Exception):
59ae15a5 564 """Content Too Short exception.
d77c3dfd 565
59ae15a5
PH
566 This exception may be raised by FileDownloader objects when a file they
567 download is too small for what the server announced first, indicating
568 the connection was probably interrupted.
569 """
570 # Both in bytes
571 downloaded = None
572 expected = None
d77c3dfd 573
59ae15a5
PH
574 def __init__(self, downloaded, expected):
575 self.downloaded = downloaded
576 self.expected = expected
d77c3dfd 577
acebc9cd 578class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
59ae15a5
PH
579 """Handler for HTTP requests and responses.
580
581 This class, when installed with an OpenerDirector, automatically adds
582 the standard headers to every HTTP request and handles gzipped and
583 deflated responses from web servers. If compression is to be avoided in
584 a particular request, the original request in the program code only has
585 to include the HTTP header "Youtubedl-No-Compression", which will be
586 removed before making the real request.
587
588 Part of this code was copied from:
589
590 http://techknack.net/python-urllib2-handlers/
591
592 Andrew Rowls, the author of that code, agreed to release it to the
593 public domain.
594 """
595
596 @staticmethod
597 def deflate(data):
598 try:
599 return zlib.decompress(data, -zlib.MAX_WBITS)
600 except zlib.error:
601 return zlib.decompress(data)
602
603 @staticmethod
604 def addinfourl_wrapper(stream, headers, url, code):
605 if hasattr(compat_urllib_request.addinfourl, 'getcode'):
606 return compat_urllib_request.addinfourl(stream, headers, url, code)
607 ret = compat_urllib_request.addinfourl(stream, headers, url)
608 ret.code = code
609 return ret
610
acebc9cd
PH
611 def http_request(self, req):
612 for h,v in std_headers.items():
59ae15a5
PH
613 if h in req.headers:
614 del req.headers[h]
335959e7 615 req.add_header(h, v)
59ae15a5
PH
616 if 'Youtubedl-no-compression' in req.headers:
617 if 'Accept-encoding' in req.headers:
618 del req.headers['Accept-encoding']
619 del req.headers['Youtubedl-no-compression']
3446dfb7 620 if 'Youtubedl-user-agent' in req.headers:
335959e7
PH
621 if 'User-agent' in req.headers:
622 del req.headers['User-agent']
623 req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
3446dfb7 624 del req.headers['Youtubedl-user-agent']
59ae15a5
PH
625 return req
626
acebc9cd 627 def http_response(self, req, resp):
59ae15a5
PH
628 old_resp = resp
629 # gzip
630 if resp.headers.get('Content-encoding', '') == 'gzip':
631 gz = gzip.GzipFile(fileobj=io.BytesIO(resp.read()), mode='r')
632 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
633 resp.msg = old_resp.msg
634 # deflate
635 if resp.headers.get('Content-encoding', '') == 'deflate':
636 gz = io.BytesIO(self.deflate(resp.read()))
637 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
638 resp.msg = old_resp.msg
639 return resp
0f8d03f8 640
acebc9cd
PH
641 https_request = http_request
642 https_response = http_response
bf50b038
JMF
643
644def unified_strdate(date_str):
645 """Return a string with the date in the format YYYYMMDD"""
646 upload_date = None
647 #Replace commas
648 date_str = date_str.replace(',',' ')
649 # %z (UTC offset) is only supported in python>=3.2
650 date_str = re.sub(r' (\+|-)[\d]*$', '', date_str)
73e79f2a 651 format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y', '%Y-%m-%d', '%d/%m/%Y', '%Y/%m/%d %H:%M:%S', '%d.%m.%Y %H:%M']
bf50b038
JMF
652 for expression in format_expressions:
653 try:
654 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
655 except:
656 pass
657 return upload_date
658
cbdbb766 659def determine_ext(url, default_ext=u'unknown_video'):
73e79f2a
PH
660 guess = url.partition(u'?')[0].rpartition(u'.')[2]
661 if re.match(r'^[A-Za-z0-9]+$', guess):
662 return guess
663 else:
cbdbb766 664 return default_ext
73e79f2a 665
d4051a8e
JMF
666def subtitles_filename(filename, sub_lang, sub_format):
667 return filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
668
bd558525 669def date_from_str(date_str):
37254abc
JMF
670 """
671 Return a datetime object from a string in the format YYYYMMDD or
672 (now|today)[+-][0-9](day|week|month|year)(s)?"""
673 today = datetime.date.today()
674 if date_str == 'now'or date_str == 'today':
675 return today
676 match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
677 if match is not None:
678 sign = match.group('sign')
679 time = int(match.group('time'))
680 if sign == '-':
681 time = -time
682 unit = match.group('unit')
683 #A bad aproximation?
684 if unit == 'month':
685 unit = 'day'
686 time *= 30
687 elif unit == 'year':
688 unit = 'day'
689 time *= 365
690 unit += 's'
691 delta = datetime.timedelta(**{unit: time})
692 return today + delta
bd558525
JMF
693 return datetime.datetime.strptime(date_str, "%Y%m%d").date()
694
695class DateRange(object):
696 """Represents a time interval between two dates"""
697 def __init__(self, start=None, end=None):
698 """start and end must be strings in the format accepted by date"""
699 if start is not None:
700 self.start = date_from_str(start)
701 else:
702 self.start = datetime.datetime.min.date()
703 if end is not None:
704 self.end = date_from_str(end)
705 else:
706 self.end = datetime.datetime.max.date()
37254abc 707 if self.start > self.end:
bd558525
JMF
708 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
709 @classmethod
710 def day(cls, day):
711 """Returns a range that only contains the given day"""
712 return cls(day,day)
713 def __contains__(self, date):
714 """Check if the date is in the range"""
37254abc
JMF
715 if not isinstance(date, datetime.date):
716 date = date_from_str(date)
717 return self.start <= date <= self.end
bd558525
JMF
718 def __str__(self):
719 return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())