]> jfr.im git - yt-dlp.git/blame - youtube_dl/utils.py
Deprecate --only-sub
[yt-dlp.git] / youtube_dl / utils.py
CommitLineData
d77c3dfd
FV
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
4import gzip
03f9daab 5import io
f4bfd65f 6import json
d77c3dfd
FV
7import locale
8import os
9import re
10import sys
01951dda 11import traceback
d77c3dfd 12import zlib
d77c3dfd 13import email.utils
921a1455 14import json
bd558525 15import datetime
d77c3dfd 16
01ba00ca 17try:
59ae15a5 18 import urllib.request as compat_urllib_request
01ba00ca 19except ImportError: # Python 2
59ae15a5 20 import urllib2 as compat_urllib_request
01ba00ca
PH
21
22try:
59ae15a5 23 import urllib.error as compat_urllib_error
01ba00ca 24except ImportError: # Python 2
59ae15a5 25 import urllib2 as compat_urllib_error
01ba00ca
PH
26
27try:
59ae15a5 28 import urllib.parse as compat_urllib_parse
01ba00ca 29except ImportError: # Python 2
59ae15a5 30 import urllib as compat_urllib_parse
01ba00ca 31
799c0763
PH
32try:
33 from urllib.parse import urlparse as compat_urllib_parse_urlparse
34except ImportError: # Python 2
35 from urlparse import urlparse as compat_urllib_parse_urlparse
36
01ba00ca 37try:
59ae15a5 38 import http.cookiejar as compat_cookiejar
01ba00ca 39except ImportError: # Python 2
59ae15a5 40 import cookielib as compat_cookiejar
01ba00ca 41
3e669f36 42try:
59ae15a5 43 import html.entities as compat_html_entities
9f37a959 44except ImportError: # Python 2
59ae15a5 45 import htmlentitydefs as compat_html_entities
3e669f36 46
a8156c1d 47try:
59ae15a5 48 import html.parser as compat_html_parser
9f37a959 49except ImportError: # Python 2
59ae15a5 50 import HTMLParser as compat_html_parser
a8156c1d 51
348d0a7a 52try:
59ae15a5 53 import http.client as compat_http_client
9f37a959 54except ImportError: # Python 2
59ae15a5 55 import httplib as compat_http_client
348d0a7a 56
5910e210
PH
57try:
58 from subprocess import DEVNULL
59 compat_subprocess_get_DEVNULL = lambda: DEVNULL
60except ImportError:
61 compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
62
9f37a959 63try:
59ae15a5 64 from urllib.parse import parse_qs as compat_parse_qs
9f37a959 65except ImportError: # Python 2
59ae15a5
PH
66 # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
67 # Python 2's version is apparently totally broken
68 def _unquote(string, encoding='utf-8', errors='replace'):
69 if string == '':
70 return string
71 res = string.split('%')
72 if len(res) == 1:
73 return string
74 if encoding is None:
75 encoding = 'utf-8'
76 if errors is None:
77 errors = 'replace'
78 # pct_sequence: contiguous sequence of percent-encoded bytes, decoded
79 pct_sequence = b''
80 string = res[0]
81 for item in res[1:]:
82 try:
83 if not item:
84 raise ValueError
85 pct_sequence += item[:2].decode('hex')
86 rest = item[2:]
87 if not rest:
88 # This segment was just a single percent-encoded character.
89 # May be part of a sequence of code units, so delay decoding.
90 # (Stored in pct_sequence).
91 continue
92 except ValueError:
93 rest = '%' + item
94 # Encountered non-percent-encoded characters. Flush the current
95 # pct_sequence.
96 string += pct_sequence.decode(encoding, errors) + rest
97 pct_sequence = b''
98 if pct_sequence:
99 # Flush the final pct_sequence
100 string += pct_sequence.decode(encoding, errors)
101 return string
102
103 def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
104 encoding='utf-8', errors='replace'):
105 qs, _coerce_result = qs, unicode
106 pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
107 r = []
108 for name_value in pairs:
109 if not name_value and not strict_parsing:
110 continue
111 nv = name_value.split('=', 1)
112 if len(nv) != 2:
113 if strict_parsing:
114 raise ValueError("bad query field: %r" % (name_value,))
115 # Handle case of a control-name with no equal sign
116 if keep_blank_values:
117 nv.append('')
118 else:
119 continue
120 if len(nv[1]) or keep_blank_values:
121 name = nv[0].replace('+', ' ')
122 name = _unquote(name, encoding=encoding, errors=errors)
123 name = _coerce_result(name)
124 value = nv[1].replace('+', ' ')
125 value = _unquote(value, encoding=encoding, errors=errors)
126 value = _coerce_result(value)
127 r.append((name, value))
128 return r
129
130 def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
131 encoding='utf-8', errors='replace'):
132 parsed_result = {}
133 pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
134 encoding=encoding, errors=errors)
135 for name, value in pairs:
136 if name in parsed_result:
137 parsed_result[name].append(value)
138 else:
139 parsed_result[name] = [value]
140 return parsed_result
348d0a7a 141
3e669f36 142try:
59ae15a5 143 compat_str = unicode # Python 2
3e669f36 144except NameError:
59ae15a5 145 compat_str = str
3e669f36
PH
146
147try:
59ae15a5 148 compat_chr = unichr # Python 2
3e669f36 149except NameError:
59ae15a5 150 compat_chr = chr
3e669f36 151
3e669f36 152std_headers = {
59ae15a5
PH
153 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0',
154 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
155 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
156 'Accept-Encoding': 'gzip, deflate',
157 'Accept-Language': 'en-us,en;q=0.5',
3e669f36 158}
f427df17 159
d77c3dfd 160def preferredencoding():
59ae15a5 161 """Get preferred encoding.
d77c3dfd 162
59ae15a5
PH
163 Returns the best encoding scheme for the system, based on
164 locale.getpreferredencoding() and some further tweaks.
165 """
166 try:
167 pref = locale.getpreferredencoding()
168 u'TEST'.encode(pref)
169 except:
170 pref = 'UTF-8'
bae611f2 171
59ae15a5 172 return pref
d77c3dfd 173
8cd10ac4 174if sys.version_info < (3,0):
59ae15a5
PH
175 def compat_print(s):
176 print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
8cd10ac4 177else:
59ae15a5
PH
178 def compat_print(s):
179 assert type(s) == type(u'')
180 print(s)
d77c3dfd 181
f4bfd65f
PH
182# In Python 2.x, json.dump expects a bytestream.
183# In Python 3.x, it writes to a character stream
184if sys.version_info < (3,0):
185 def write_json_file(obj, fn):
186 with open(fn, 'wb') as f:
187 json.dump(obj, f)
188else:
189 def write_json_file(obj, fn):
190 with open(fn, 'w', encoding='utf-8') as f:
191 json.dump(obj, f)
192
d77c3dfd 193def htmlentity_transform(matchobj):
59ae15a5
PH
194 """Transforms an HTML entity to a character.
195
196 This function receives a match object and is intended to be used with
197 the re.sub() function.
198 """
199 entity = matchobj.group(1)
200
201 # Known non-numeric HTML entity
202 if entity in compat_html_entities.name2codepoint:
203 return compat_chr(compat_html_entities.name2codepoint[entity])
204
205 mobj = re.match(u'(?u)#(x?\\d+)', entity)
206 if mobj is not None:
207 numstr = mobj.group(1)
208 if numstr.startswith(u'x'):
209 base = 16
210 numstr = u'0%s' % numstr
211 else:
212 base = 10
213 return compat_chr(int(numstr, base))
214
215 # Unknown entity in name, return its literal representation
216 return (u'&%s;' % entity)
d77c3dfd 217
a8156c1d 218compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
43e8fafd
ND
219class AttrParser(compat_html_parser.HTMLParser):
220 """Modified HTMLParser that isolates a tag with the specified attribute"""
221 def __init__(self, attribute, value):
222 self.attribute = attribute
223 self.value = value
59ae15a5
PH
224 self.result = None
225 self.started = False
226 self.depth = {}
227 self.html = None
228 self.watch_startpos = False
229 self.error_count = 0
230 compat_html_parser.HTMLParser.__init__(self)
231
232 def error(self, message):
233 if self.error_count > 10 or self.started:
234 raise compat_html_parser.HTMLParseError(message, self.getpos())
235 self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line
236 self.error_count += 1
237 self.goahead(1)
238
239 def loads(self, html):
240 self.html = html
241 self.feed(html)
242 self.close()
243
244 def handle_starttag(self, tag, attrs):
245 attrs = dict(attrs)
246 if self.started:
247 self.find_startpos(None)
43e8fafd 248 if self.attribute in attrs and attrs[self.attribute] == self.value:
59ae15a5
PH
249 self.result = [tag]
250 self.started = True
251 self.watch_startpos = True
252 if self.started:
253 if not tag in self.depth: self.depth[tag] = 0
254 self.depth[tag] += 1
255
256 def handle_endtag(self, tag):
257 if self.started:
258 if tag in self.depth: self.depth[tag] -= 1
259 if self.depth[self.result[0]] == 0:
260 self.started = False
261 self.result.append(self.getpos())
262
263 def find_startpos(self, x):
264 """Needed to put the start position of the result (self.result[1])
265 after the opening tag with the requested id"""
266 if self.watch_startpos:
267 self.watch_startpos = False
268 self.result.append(self.getpos())
269 handle_entityref = handle_charref = handle_data = handle_comment = \
270 handle_decl = handle_pi = unknown_decl = find_startpos
271
272 def get_result(self):
273 if self.result is None:
274 return None
275 if len(self.result) != 3:
276 return None
277 lines = self.html.split('\n')
278 lines = lines[self.result[1][0]-1:self.result[2][0]]
279 lines[0] = lines[0][self.result[1][1]:]
280 if len(lines) == 1:
281 lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]]
282 lines[-1] = lines[-1][:self.result[2][1]]
283 return '\n'.join(lines).strip()
3b024e17
PH
284# Hack for https://github.com/rg3/youtube-dl/issues/662
285if sys.version_info < (2, 7, 3):
286 AttrParser.parse_endtag = (lambda self, i:
287 i + len("</scr'+'ipt>")
288 if self.rawdata[i:].startswith("</scr'+'ipt>")
289 else compat_html_parser.HTMLParser.parse_endtag(self, i))
9e6dd238
FV
290
291def get_element_by_id(id, html):
43e8fafd
ND
292 """Return the content of the tag with the specified ID in the passed HTML document"""
293 return get_element_by_attribute("id", id, html)
294
295def get_element_by_attribute(attribute, value, html):
296 """Return the content of the tag with the specified attribute in the passed HTML document"""
297 parser = AttrParser(attribute, value)
59ae15a5
PH
298 try:
299 parser.loads(html)
300 except compat_html_parser.HTMLParseError:
301 pass
302 return parser.get_result()
9e6dd238
FV
303
304
305def clean_html(html):
59ae15a5
PH
306 """Clean an HTML snippet into a readable string"""
307 # Newline vs <br />
308 html = html.replace('\n', ' ')
6b3aef80
FV
309 html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
310 html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
59ae15a5
PH
311 # Strip html tags
312 html = re.sub('<.*?>', '', html)
313 # Replace html entities
314 html = unescapeHTML(html)
7decf895 315 return html.strip()
9e6dd238
FV
316
317
d77c3dfd 318def sanitize_open(filename, open_mode):
59ae15a5
PH
319 """Try to open the given filename, and slightly tweak it if this fails.
320
321 Attempts to open the given filename. If this fails, it tries to change
322 the filename slightly, step by step, until it's either able to open it
323 or it fails and raises a final exception, like the standard open()
324 function.
325
326 It returns the tuple (stream, definitive_file_name).
327 """
328 try:
329 if filename == u'-':
330 if sys.platform == 'win32':
331 import msvcrt
332 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
898280a0 333 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
59ae15a5
PH
334 stream = open(encodeFilename(filename), open_mode)
335 return (stream, filename)
336 except (IOError, OSError) as err:
337 # In case of error, try to remove win32 forbidden chars
338 filename = re.sub(u'[/<>:"\\|\\\\?\\*]', u'#', filename)
339
340 # An exception here should be caught in the caller
341 stream = open(encodeFilename(filename), open_mode)
342 return (stream, filename)
d77c3dfd
FV
343
344
345def timeconvert(timestr):
59ae15a5
PH
346 """Convert RFC 2822 defined time string into system timestamp"""
347 timestamp = None
348 timetuple = email.utils.parsedate_tz(timestr)
349 if timetuple is not None:
350 timestamp = email.utils.mktime_tz(timetuple)
351 return timestamp
1c469a94 352
796173d0 353def sanitize_filename(s, restricted=False, is_id=False):
59ae15a5
PH
354 """Sanitizes a string so it could be used as part of a filename.
355 If restricted is set, use a stricter subset of allowed characters.
796173d0 356 Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
59ae15a5
PH
357 """
358 def replace_insane(char):
359 if char == '?' or ord(char) < 32 or ord(char) == 127:
360 return ''
361 elif char == '"':
362 return '' if restricted else '\''
363 elif char == ':':
364 return '_-' if restricted else ' -'
365 elif char in '\\/|*<>':
366 return '_'
627dcfff 367 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
59ae15a5
PH
368 return '_'
369 if restricted and ord(char) > 127:
370 return '_'
371 return char
372
373 result = u''.join(map(replace_insane, s))
796173d0
PH
374 if not is_id:
375 while '__' in result:
376 result = result.replace('__', '_')
377 result = result.strip('_')
378 # Common case of "Foreign band name - English song title"
379 if restricted and result.startswith('-_'):
380 result = result[2:]
381 if not result:
382 result = '_'
59ae15a5 383 return result
d77c3dfd
FV
384
385def orderedSet(iterable):
59ae15a5
PH
386 """ Remove all duplicates from the input iterable """
387 res = []
388 for el in iterable:
389 if el not in res:
390 res.append(el)
391 return res
d77c3dfd
FV
392
393def unescapeHTML(s):
59ae15a5
PH
394 """
395 @param s a string
396 """
397 assert type(s) == type(u'')
d77c3dfd 398
59ae15a5
PH
399 result = re.sub(u'(?u)&(.+?);', htmlentity_transform, s)
400 return result
d77c3dfd
FV
401
402def encodeFilename(s):
59ae15a5
PH
403 """
404 @param s The name of the file
405 """
d77c3dfd 406
59ae15a5 407 assert type(s) == type(u'')
d77c3dfd 408
59ae15a5
PH
409 # Python 3 has a Unicode API
410 if sys.version_info >= (3, 0):
411 return s
0f00efed 412
59ae15a5
PH
413 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
414 # Pass u'' directly to use Unicode APIs on Windows 2000 and up
415 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
416 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
417 return s
418 else:
6df40dcb
PH
419 encoding = sys.getfilesystemencoding()
420 if encoding is None:
421 encoding = 'utf-8'
422 return s.encode(encoding, 'ignore')
d77c3dfd 423
8271226a
PH
424def decodeOption(optval):
425 if optval is None:
426 return optval
427 if isinstance(optval, bytes):
428 optval = optval.decode(preferredencoding())
429
430 assert isinstance(optval, compat_str)
431 return optval
1c256f70 432
4539dd30
PH
433def formatSeconds(secs):
434 if secs > 3600:
435 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
436 elif secs > 60:
437 return '%d:%02d' % (secs // 60, secs % 60)
438 else:
439 return '%d' % secs
440
ea6d901e
PH
441def make_HTTPS_handler(opts):
442 if sys.version_info < (3,2):
443 # Python's 2.x handler is very simplistic
444 return compat_urllib_request.HTTPSHandler()
445 else:
446 import ssl
447 context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
448 context.set_default_verify_paths()
449
450 context.verify_mode = (ssl.CERT_NONE
451 if opts.no_check_certificate
452 else ssl.CERT_REQUIRED)
453 return compat_urllib_request.HTTPSHandler(context=context)
454
1c256f70
PH
455class ExtractorError(Exception):
456 """Error during info extraction."""
457 def __init__(self, msg, tb=None):
01951dda 458 """ tb, if given, is the original traceback (so that it can be printed out). """
1c256f70 459 super(ExtractorError, self).__init__(msg)
1c256f70 460 self.traceback = tb
8cc83b8d 461 self.exc_info = sys.exc_info() # preserve original exception
1c256f70 462
01951dda
PH
463 def format_traceback(self):
464 if self.traceback is None:
465 return None
466 return u''.join(traceback.format_tb(self.traceback))
467
1c256f70 468
d77c3dfd 469class DownloadError(Exception):
59ae15a5 470 """Download Error exception.
d77c3dfd 471
59ae15a5
PH
472 This exception may be thrown by FileDownloader objects if they are not
473 configured to continue on errors. They will contain the appropriate
474 error message.
475 """
8cc83b8d
FV
476 def __init__(self, msg, exc_info=None):
477 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
478 super(DownloadError, self).__init__(msg)
479 self.exc_info = exc_info
d77c3dfd
FV
480
481
482class SameFileError(Exception):
59ae15a5 483 """Same File exception.
d77c3dfd 484
59ae15a5
PH
485 This exception will be thrown by FileDownloader objects if they detect
486 multiple files would have to be downloaded to the same file on disk.
487 """
488 pass
d77c3dfd
FV
489
490
491class PostProcessingError(Exception):
59ae15a5 492 """Post Processing exception.
d77c3dfd 493
59ae15a5
PH
494 This exception may be raised by PostProcessor's .run() method to
495 indicate an error in the postprocessing task.
496 """
7851b379
PH
497 def __init__(self, msg):
498 self.msg = msg
d77c3dfd
FV
499
500class MaxDownloadsReached(Exception):
59ae15a5
PH
501 """ --max-downloads limit has been reached. """
502 pass
d77c3dfd
FV
503
504
505class UnavailableVideoError(Exception):
59ae15a5 506 """Unavailable Format exception.
d77c3dfd 507
59ae15a5
PH
508 This exception will be thrown when a video is requested
509 in a format that is not available for that video.
510 """
511 pass
d77c3dfd
FV
512
513
514class ContentTooShortError(Exception):
59ae15a5 515 """Content Too Short exception.
d77c3dfd 516
59ae15a5
PH
517 This exception may be raised by FileDownloader objects when a file they
518 download is too small for what the server announced first, indicating
519 the connection was probably interrupted.
520 """
521 # Both in bytes
522 downloaded = None
523 expected = None
d77c3dfd 524
59ae15a5
PH
525 def __init__(self, downloaded, expected):
526 self.downloaded = downloaded
527 self.expected = expected
d77c3dfd 528
01ba00ca 529class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
59ae15a5
PH
530 """Handler for HTTP requests and responses.
531
532 This class, when installed with an OpenerDirector, automatically adds
533 the standard headers to every HTTP request and handles gzipped and
534 deflated responses from web servers. If compression is to be avoided in
535 a particular request, the original request in the program code only has
536 to include the HTTP header "Youtubedl-No-Compression", which will be
537 removed before making the real request.
538
539 Part of this code was copied from:
540
541 http://techknack.net/python-urllib2-handlers/
542
543 Andrew Rowls, the author of that code, agreed to release it to the
544 public domain.
545 """
546
547 @staticmethod
548 def deflate(data):
549 try:
550 return zlib.decompress(data, -zlib.MAX_WBITS)
551 except zlib.error:
552 return zlib.decompress(data)
553
554 @staticmethod
555 def addinfourl_wrapper(stream, headers, url, code):
556 if hasattr(compat_urllib_request.addinfourl, 'getcode'):
557 return compat_urllib_request.addinfourl(stream, headers, url, code)
558 ret = compat_urllib_request.addinfourl(stream, headers, url)
559 ret.code = code
560 return ret
561
562 def http_request(self, req):
335959e7 563 for h,v in std_headers.items():
59ae15a5
PH
564 if h in req.headers:
565 del req.headers[h]
335959e7 566 req.add_header(h, v)
59ae15a5
PH
567 if 'Youtubedl-no-compression' in req.headers:
568 if 'Accept-encoding' in req.headers:
569 del req.headers['Accept-encoding']
570 del req.headers['Youtubedl-no-compression']
3446dfb7 571 if 'Youtubedl-user-agent' in req.headers:
335959e7
PH
572 if 'User-agent' in req.headers:
573 del req.headers['User-agent']
574 req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
3446dfb7 575 del req.headers['Youtubedl-user-agent']
59ae15a5
PH
576 return req
577
578 def http_response(self, req, resp):
579 old_resp = resp
580 # gzip
581 if resp.headers.get('Content-encoding', '') == 'gzip':
582 gz = gzip.GzipFile(fileobj=io.BytesIO(resp.read()), mode='r')
583 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
584 resp.msg = old_resp.msg
585 # deflate
586 if resp.headers.get('Content-encoding', '') == 'deflate':
587 gz = io.BytesIO(self.deflate(resp.read()))
588 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
589 resp.msg = old_resp.msg
590 return resp
0f8d03f8
PH
591
592 https_request = http_request
593 https_response = http_response
bf50b038
JMF
594
595def unified_strdate(date_str):
596 """Return a string with the date in the format YYYYMMDD"""
597 upload_date = None
598 #Replace commas
599 date_str = date_str.replace(',',' ')
600 # %z (UTC offset) is only supported in python>=3.2
601 date_str = re.sub(r' (\+|-)[\d]*$', '', date_str)
602 format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y', '%Y-%m-%d', '%d/%m/%Y', '%Y/%m/%d %H:%M:%S']
603 for expression in format_expressions:
604 try:
605 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
606 except:
607 pass
608 return upload_date
609
bd558525 610def date_from_str(date_str):
37254abc
JMF
611 """
612 Return a datetime object from a string in the format YYYYMMDD or
613 (now|today)[+-][0-9](day|week|month|year)(s)?"""
614 today = datetime.date.today()
615 if date_str == 'now'or date_str == 'today':
616 return today
617 match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
618 if match is not None:
619 sign = match.group('sign')
620 time = int(match.group('time'))
621 if sign == '-':
622 time = -time
623 unit = match.group('unit')
624 #A bad aproximation?
625 if unit == 'month':
626 unit = 'day'
627 time *= 30
628 elif unit == 'year':
629 unit = 'day'
630 time *= 365
631 unit += 's'
632 delta = datetime.timedelta(**{unit: time})
633 return today + delta
bd558525
JMF
634 return datetime.datetime.strptime(date_str, "%Y%m%d").date()
635
636class DateRange(object):
637 """Represents a time interval between two dates"""
638 def __init__(self, start=None, end=None):
639 """start and end must be strings in the format accepted by date"""
640 if start is not None:
641 self.start = date_from_str(start)
642 else:
643 self.start = datetime.datetime.min.date()
644 if end is not None:
645 self.end = date_from_str(end)
646 else:
647 self.end = datetime.datetime.max.date()
37254abc 648 if self.start > self.end:
bd558525
JMF
649 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
650 @classmethod
651 def day(cls, day):
652 """Returns a range that only contains the given day"""
653 return cls(day,day)
654 def __contains__(self, date):
655 """Check if the date is in the range"""
37254abc
JMF
656 if not isinstance(date, datetime.date):
657 date = date_from_str(date)
658 return self.start <= date <= self.end
bd558525
JMF
659 def __str__(self):
660 return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())