]> jfr.im git - yt-dlp.git/blame - youtube_dl/utils.py
Merge remote-tracking branch 'jaimeMF/SearchIE'
[yt-dlp.git] / youtube_dl / utils.py
CommitLineData
d77c3dfd
FV
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
f45c185f 4import errno
d77c3dfd 5import gzip
03f9daab 6import io
f4bfd65f 7import json
d77c3dfd
FV
8import locale
9import os
10import re
11import sys
01951dda 12import traceback
d77c3dfd 13import zlib
d77c3dfd 14import email.utils
921a1455 15import json
bd558525 16import datetime
d77c3dfd 17
01ba00ca 18try:
59ae15a5 19 import urllib.request as compat_urllib_request
01ba00ca 20except ImportError: # Python 2
59ae15a5 21 import urllib2 as compat_urllib_request
01ba00ca
PH
22
23try:
59ae15a5 24 import urllib.error as compat_urllib_error
01ba00ca 25except ImportError: # Python 2
59ae15a5 26 import urllib2 as compat_urllib_error
01ba00ca
PH
27
28try:
59ae15a5 29 import urllib.parse as compat_urllib_parse
01ba00ca 30except ImportError: # Python 2
59ae15a5 31 import urllib as compat_urllib_parse
01ba00ca 32
799c0763
PH
33try:
34 from urllib.parse import urlparse as compat_urllib_parse_urlparse
35except ImportError: # Python 2
36 from urlparse import urlparse as compat_urllib_parse_urlparse
37
01ba00ca 38try:
59ae15a5 39 import http.cookiejar as compat_cookiejar
01ba00ca 40except ImportError: # Python 2
59ae15a5 41 import cookielib as compat_cookiejar
01ba00ca 42
3e669f36 43try:
59ae15a5 44 import html.entities as compat_html_entities
9f37a959 45except ImportError: # Python 2
59ae15a5 46 import htmlentitydefs as compat_html_entities
3e669f36 47
a8156c1d 48try:
59ae15a5 49 import html.parser as compat_html_parser
9f37a959 50except ImportError: # Python 2
59ae15a5 51 import HTMLParser as compat_html_parser
a8156c1d 52
348d0a7a 53try:
59ae15a5 54 import http.client as compat_http_client
9f37a959 55except ImportError: # Python 2
59ae15a5 56 import httplib as compat_http_client
348d0a7a 57
5910e210
PH
58try:
59 from subprocess import DEVNULL
60 compat_subprocess_get_DEVNULL = lambda: DEVNULL
61except ImportError:
62 compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
63
9f37a959 64try:
59ae15a5 65 from urllib.parse import parse_qs as compat_parse_qs
9f37a959 66except ImportError: # Python 2
59ae15a5
PH
67 # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
68 # Python 2's version is apparently totally broken
69 def _unquote(string, encoding='utf-8', errors='replace'):
70 if string == '':
71 return string
72 res = string.split('%')
73 if len(res) == 1:
74 return string
75 if encoding is None:
76 encoding = 'utf-8'
77 if errors is None:
78 errors = 'replace'
79 # pct_sequence: contiguous sequence of percent-encoded bytes, decoded
80 pct_sequence = b''
81 string = res[0]
82 for item in res[1:]:
83 try:
84 if not item:
85 raise ValueError
86 pct_sequence += item[:2].decode('hex')
87 rest = item[2:]
88 if not rest:
89 # This segment was just a single percent-encoded character.
90 # May be part of a sequence of code units, so delay decoding.
91 # (Stored in pct_sequence).
92 continue
93 except ValueError:
94 rest = '%' + item
95 # Encountered non-percent-encoded characters. Flush the current
96 # pct_sequence.
97 string += pct_sequence.decode(encoding, errors) + rest
98 pct_sequence = b''
99 if pct_sequence:
100 # Flush the final pct_sequence
101 string += pct_sequence.decode(encoding, errors)
102 return string
103
104 def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
105 encoding='utf-8', errors='replace'):
106 qs, _coerce_result = qs, unicode
107 pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
108 r = []
109 for name_value in pairs:
110 if not name_value and not strict_parsing:
111 continue
112 nv = name_value.split('=', 1)
113 if len(nv) != 2:
114 if strict_parsing:
115 raise ValueError("bad query field: %r" % (name_value,))
116 # Handle case of a control-name with no equal sign
117 if keep_blank_values:
118 nv.append('')
119 else:
120 continue
121 if len(nv[1]) or keep_blank_values:
122 name = nv[0].replace('+', ' ')
123 name = _unquote(name, encoding=encoding, errors=errors)
124 name = _coerce_result(name)
125 value = nv[1].replace('+', ' ')
126 value = _unquote(value, encoding=encoding, errors=errors)
127 value = _coerce_result(value)
128 r.append((name, value))
129 return r
130
131 def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
132 encoding='utf-8', errors='replace'):
133 parsed_result = {}
134 pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
135 encoding=encoding, errors=errors)
136 for name, value in pairs:
137 if name in parsed_result:
138 parsed_result[name].append(value)
139 else:
140 parsed_result[name] = [value]
141 return parsed_result
348d0a7a 142
3e669f36 143try:
59ae15a5 144 compat_str = unicode # Python 2
3e669f36 145except NameError:
59ae15a5 146 compat_str = str
3e669f36
PH
147
148try:
59ae15a5 149 compat_chr = unichr # Python 2
3e669f36 150except NameError:
59ae15a5 151 compat_chr = chr
3e669f36 152
3e669f36 153std_headers = {
59ae15a5
PH
154 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0',
155 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
156 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
157 'Accept-Encoding': 'gzip, deflate',
158 'Accept-Language': 'en-us,en;q=0.5',
3e669f36 159}
f427df17 160
d77c3dfd 161def preferredencoding():
59ae15a5 162 """Get preferred encoding.
d77c3dfd 163
59ae15a5
PH
164 Returns the best encoding scheme for the system, based on
165 locale.getpreferredencoding() and some further tweaks.
166 """
167 try:
168 pref = locale.getpreferredencoding()
169 u'TEST'.encode(pref)
170 except:
171 pref = 'UTF-8'
bae611f2 172
59ae15a5 173 return pref
d77c3dfd 174
8cd10ac4 175if sys.version_info < (3,0):
59ae15a5
PH
176 def compat_print(s):
177 print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
8cd10ac4 178else:
59ae15a5
PH
179 def compat_print(s):
180 assert type(s) == type(u'')
181 print(s)
d77c3dfd 182
f4bfd65f
PH
183# In Python 2.x, json.dump expects a bytestream.
184# In Python 3.x, it writes to a character stream
185if sys.version_info < (3,0):
186 def write_json_file(obj, fn):
187 with open(fn, 'wb') as f:
188 json.dump(obj, f)
189else:
190 def write_json_file(obj, fn):
191 with open(fn, 'w', encoding='utf-8') as f:
192 json.dump(obj, f)
193
d77c3dfd 194def htmlentity_transform(matchobj):
59ae15a5
PH
195 """Transforms an HTML entity to a character.
196
197 This function receives a match object and is intended to be used with
198 the re.sub() function.
199 """
200 entity = matchobj.group(1)
201
202 # Known non-numeric HTML entity
203 if entity in compat_html_entities.name2codepoint:
204 return compat_chr(compat_html_entities.name2codepoint[entity])
205
206 mobj = re.match(u'(?u)#(x?\\d+)', entity)
207 if mobj is not None:
208 numstr = mobj.group(1)
209 if numstr.startswith(u'x'):
210 base = 16
211 numstr = u'0%s' % numstr
212 else:
213 base = 10
214 return compat_chr(int(numstr, base))
215
216 # Unknown entity in name, return its literal representation
217 return (u'&%s;' % entity)
d77c3dfd 218
a8156c1d 219compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
43e8fafd
ND
220class AttrParser(compat_html_parser.HTMLParser):
221 """Modified HTMLParser that isolates a tag with the specified attribute"""
222 def __init__(self, attribute, value):
223 self.attribute = attribute
224 self.value = value
59ae15a5
PH
225 self.result = None
226 self.started = False
227 self.depth = {}
228 self.html = None
229 self.watch_startpos = False
230 self.error_count = 0
231 compat_html_parser.HTMLParser.__init__(self)
232
233 def error(self, message):
234 if self.error_count > 10 or self.started:
235 raise compat_html_parser.HTMLParseError(message, self.getpos())
236 self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line
237 self.error_count += 1
238 self.goahead(1)
239
240 def loads(self, html):
241 self.html = html
242 self.feed(html)
243 self.close()
244
245 def handle_starttag(self, tag, attrs):
246 attrs = dict(attrs)
247 if self.started:
248 self.find_startpos(None)
43e8fafd 249 if self.attribute in attrs and attrs[self.attribute] == self.value:
59ae15a5
PH
250 self.result = [tag]
251 self.started = True
252 self.watch_startpos = True
253 if self.started:
254 if not tag in self.depth: self.depth[tag] = 0
255 self.depth[tag] += 1
256
257 def handle_endtag(self, tag):
258 if self.started:
259 if tag in self.depth: self.depth[tag] -= 1
260 if self.depth[self.result[0]] == 0:
261 self.started = False
262 self.result.append(self.getpos())
263
264 def find_startpos(self, x):
265 """Needed to put the start position of the result (self.result[1])
266 after the opening tag with the requested id"""
267 if self.watch_startpos:
268 self.watch_startpos = False
269 self.result.append(self.getpos())
270 handle_entityref = handle_charref = handle_data = handle_comment = \
271 handle_decl = handle_pi = unknown_decl = find_startpos
272
273 def get_result(self):
274 if self.result is None:
275 return None
276 if len(self.result) != 3:
277 return None
278 lines = self.html.split('\n')
279 lines = lines[self.result[1][0]-1:self.result[2][0]]
280 lines[0] = lines[0][self.result[1][1]:]
281 if len(lines) == 1:
282 lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]]
283 lines[-1] = lines[-1][:self.result[2][1]]
284 return '\n'.join(lines).strip()
3b024e17
PH
285# Hack for https://github.com/rg3/youtube-dl/issues/662
286if sys.version_info < (2, 7, 3):
287 AttrParser.parse_endtag = (lambda self, i:
288 i + len("</scr'+'ipt>")
289 if self.rawdata[i:].startswith("</scr'+'ipt>")
290 else compat_html_parser.HTMLParser.parse_endtag(self, i))
9e6dd238
FV
291
292def get_element_by_id(id, html):
43e8fafd
ND
293 """Return the content of the tag with the specified ID in the passed HTML document"""
294 return get_element_by_attribute("id", id, html)
295
296def get_element_by_attribute(attribute, value, html):
297 """Return the content of the tag with the specified attribute in the passed HTML document"""
298 parser = AttrParser(attribute, value)
59ae15a5
PH
299 try:
300 parser.loads(html)
301 except compat_html_parser.HTMLParseError:
302 pass
303 return parser.get_result()
9e6dd238
FV
304
305
306def clean_html(html):
59ae15a5
PH
307 """Clean an HTML snippet into a readable string"""
308 # Newline vs <br />
309 html = html.replace('\n', ' ')
6b3aef80
FV
310 html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
311 html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
59ae15a5
PH
312 # Strip html tags
313 html = re.sub('<.*?>', '', html)
314 # Replace html entities
315 html = unescapeHTML(html)
7decf895 316 return html.strip()
9e6dd238
FV
317
318
d77c3dfd 319def sanitize_open(filename, open_mode):
59ae15a5
PH
320 """Try to open the given filename, and slightly tweak it if this fails.
321
322 Attempts to open the given filename. If this fails, it tries to change
323 the filename slightly, step by step, until it's either able to open it
324 or it fails and raises a final exception, like the standard open()
325 function.
326
327 It returns the tuple (stream, definitive_file_name).
328 """
329 try:
330 if filename == u'-':
331 if sys.platform == 'win32':
332 import msvcrt
333 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
898280a0 334 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
59ae15a5
PH
335 stream = open(encodeFilename(filename), open_mode)
336 return (stream, filename)
337 except (IOError, OSError) as err:
f45c185f
PH
338 if err.errno in (errno.EACCES,):
339 raise
59ae15a5 340
f45c185f
PH
341 # In case of error, try to remove win32 forbidden chars
342 alt_filename = os.path.join(
343 re.sub(u'[/<>:"\\|\\\\?\\*]', u'#', path_part)
344 for path_part in os.path.split(filename)
345 )
346 if alt_filename == filename:
347 raise
348 else:
349 # An exception here should be caught in the caller
350 stream = open(encodeFilename(filename), open_mode)
351 return (stream, alt_filename)
d77c3dfd
FV
352
353
354def timeconvert(timestr):
59ae15a5
PH
355 """Convert RFC 2822 defined time string into system timestamp"""
356 timestamp = None
357 timetuple = email.utils.parsedate_tz(timestr)
358 if timetuple is not None:
359 timestamp = email.utils.mktime_tz(timetuple)
360 return timestamp
1c469a94 361
796173d0 362def sanitize_filename(s, restricted=False, is_id=False):
59ae15a5
PH
363 """Sanitizes a string so it could be used as part of a filename.
364 If restricted is set, use a stricter subset of allowed characters.
796173d0 365 Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
59ae15a5
PH
366 """
367 def replace_insane(char):
368 if char == '?' or ord(char) < 32 or ord(char) == 127:
369 return ''
370 elif char == '"':
371 return '' if restricted else '\''
372 elif char == ':':
373 return '_-' if restricted else ' -'
374 elif char in '\\/|*<>':
375 return '_'
627dcfff 376 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
59ae15a5
PH
377 return '_'
378 if restricted and ord(char) > 127:
379 return '_'
380 return char
381
382 result = u''.join(map(replace_insane, s))
796173d0
PH
383 if not is_id:
384 while '__' in result:
385 result = result.replace('__', '_')
386 result = result.strip('_')
387 # Common case of "Foreign band name - English song title"
388 if restricted and result.startswith('-_'):
389 result = result[2:]
390 if not result:
391 result = '_'
59ae15a5 392 return result
d77c3dfd
FV
393
394def orderedSet(iterable):
59ae15a5
PH
395 """ Remove all duplicates from the input iterable """
396 res = []
397 for el in iterable:
398 if el not in res:
399 res.append(el)
400 return res
d77c3dfd
FV
401
402def unescapeHTML(s):
59ae15a5
PH
403 """
404 @param s a string
405 """
406 assert type(s) == type(u'')
d77c3dfd 407
59ae15a5
PH
408 result = re.sub(u'(?u)&(.+?);', htmlentity_transform, s)
409 return result
d77c3dfd
FV
410
411def encodeFilename(s):
59ae15a5
PH
412 """
413 @param s The name of the file
414 """
d77c3dfd 415
59ae15a5 416 assert type(s) == type(u'')
d77c3dfd 417
59ae15a5
PH
418 # Python 3 has a Unicode API
419 if sys.version_info >= (3, 0):
420 return s
0f00efed 421
59ae15a5
PH
422 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
423 # Pass u'' directly to use Unicode APIs on Windows 2000 and up
424 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
425 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
426 return s
427 else:
6df40dcb
PH
428 encoding = sys.getfilesystemencoding()
429 if encoding is None:
430 encoding = 'utf-8'
431 return s.encode(encoding, 'ignore')
d77c3dfd 432
8271226a
PH
433def decodeOption(optval):
434 if optval is None:
435 return optval
436 if isinstance(optval, bytes):
437 optval = optval.decode(preferredencoding())
438
439 assert isinstance(optval, compat_str)
440 return optval
1c256f70 441
4539dd30
PH
442def formatSeconds(secs):
443 if secs > 3600:
444 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
445 elif secs > 60:
446 return '%d:%02d' % (secs // 60, secs % 60)
447 else:
448 return '%d' % secs
449
ea6d901e
PH
450def make_HTTPS_handler(opts):
451 if sys.version_info < (3,2):
452 # Python's 2.x handler is very simplistic
453 return compat_urllib_request.HTTPSHandler()
454 else:
455 import ssl
456 context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
457 context.set_default_verify_paths()
458
459 context.verify_mode = (ssl.CERT_NONE
460 if opts.no_check_certificate
461 else ssl.CERT_REQUIRED)
462 return compat_urllib_request.HTTPSHandler(context=context)
463
1c256f70
PH
464class ExtractorError(Exception):
465 """Error during info extraction."""
466 def __init__(self, msg, tb=None):
01951dda 467 """ tb, if given, is the original traceback (so that it can be printed out). """
1c256f70 468 super(ExtractorError, self).__init__(msg)
1c256f70 469 self.traceback = tb
8cc83b8d 470 self.exc_info = sys.exc_info() # preserve original exception
1c256f70 471
01951dda
PH
472 def format_traceback(self):
473 if self.traceback is None:
474 return None
475 return u''.join(traceback.format_tb(self.traceback))
476
1c256f70 477
d77c3dfd 478class DownloadError(Exception):
59ae15a5 479 """Download Error exception.
d77c3dfd 480
59ae15a5
PH
481 This exception may be thrown by FileDownloader objects if they are not
482 configured to continue on errors. They will contain the appropriate
483 error message.
484 """
8cc83b8d
FV
485 def __init__(self, msg, exc_info=None):
486 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
487 super(DownloadError, self).__init__(msg)
488 self.exc_info = exc_info
d77c3dfd
FV
489
490
491class SameFileError(Exception):
59ae15a5 492 """Same File exception.
d77c3dfd 493
59ae15a5
PH
494 This exception will be thrown by FileDownloader objects if they detect
495 multiple files would have to be downloaded to the same file on disk.
496 """
497 pass
d77c3dfd
FV
498
499
500class PostProcessingError(Exception):
59ae15a5 501 """Post Processing exception.
d77c3dfd 502
59ae15a5
PH
503 This exception may be raised by PostProcessor's .run() method to
504 indicate an error in the postprocessing task.
505 """
7851b379
PH
506 def __init__(self, msg):
507 self.msg = msg
d77c3dfd
FV
508
509class MaxDownloadsReached(Exception):
59ae15a5
PH
510 """ --max-downloads limit has been reached. """
511 pass
d77c3dfd
FV
512
513
514class UnavailableVideoError(Exception):
59ae15a5 515 """Unavailable Format exception.
d77c3dfd 516
59ae15a5
PH
517 This exception will be thrown when a video is requested
518 in a format that is not available for that video.
519 """
520 pass
d77c3dfd
FV
521
522
523class ContentTooShortError(Exception):
59ae15a5 524 """Content Too Short exception.
d77c3dfd 525
59ae15a5
PH
526 This exception may be raised by FileDownloader objects when a file they
527 download is too small for what the server announced first, indicating
528 the connection was probably interrupted.
529 """
530 # Both in bytes
531 downloaded = None
532 expected = None
d77c3dfd 533
59ae15a5
PH
534 def __init__(self, downloaded, expected):
535 self.downloaded = downloaded
536 self.expected = expected
d77c3dfd 537
01ba00ca 538class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
59ae15a5
PH
539 """Handler for HTTP requests and responses.
540
541 This class, when installed with an OpenerDirector, automatically adds
542 the standard headers to every HTTP request and handles gzipped and
543 deflated responses from web servers. If compression is to be avoided in
544 a particular request, the original request in the program code only has
545 to include the HTTP header "Youtubedl-No-Compression", which will be
546 removed before making the real request.
547
548 Part of this code was copied from:
549
550 http://techknack.net/python-urllib2-handlers/
551
552 Andrew Rowls, the author of that code, agreed to release it to the
553 public domain.
554 """
555
556 @staticmethod
557 def deflate(data):
558 try:
559 return zlib.decompress(data, -zlib.MAX_WBITS)
560 except zlib.error:
561 return zlib.decompress(data)
562
563 @staticmethod
564 def addinfourl_wrapper(stream, headers, url, code):
565 if hasattr(compat_urllib_request.addinfourl, 'getcode'):
566 return compat_urllib_request.addinfourl(stream, headers, url, code)
567 ret = compat_urllib_request.addinfourl(stream, headers, url)
568 ret.code = code
569 return ret
570
571 def http_request(self, req):
335959e7 572 for h,v in std_headers.items():
59ae15a5
PH
573 if h in req.headers:
574 del req.headers[h]
335959e7 575 req.add_header(h, v)
59ae15a5
PH
576 if 'Youtubedl-no-compression' in req.headers:
577 if 'Accept-encoding' in req.headers:
578 del req.headers['Accept-encoding']
579 del req.headers['Youtubedl-no-compression']
3446dfb7 580 if 'Youtubedl-user-agent' in req.headers:
335959e7
PH
581 if 'User-agent' in req.headers:
582 del req.headers['User-agent']
583 req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
3446dfb7 584 del req.headers['Youtubedl-user-agent']
59ae15a5
PH
585 return req
586
587 def http_response(self, req, resp):
588 old_resp = resp
589 # gzip
590 if resp.headers.get('Content-encoding', '') == 'gzip':
591 gz = gzip.GzipFile(fileobj=io.BytesIO(resp.read()), mode='r')
592 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
593 resp.msg = old_resp.msg
594 # deflate
595 if resp.headers.get('Content-encoding', '') == 'deflate':
596 gz = io.BytesIO(self.deflate(resp.read()))
597 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
598 resp.msg = old_resp.msg
599 return resp
0f8d03f8
PH
600
601 https_request = http_request
602 https_response = http_response
bf50b038
JMF
603
604def unified_strdate(date_str):
605 """Return a string with the date in the format YYYYMMDD"""
606 upload_date = None
607 #Replace commas
608 date_str = date_str.replace(',',' ')
609 # %z (UTC offset) is only supported in python>=3.2
610 date_str = re.sub(r' (\+|-)[\d]*$', '', date_str)
611 format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y', '%Y-%m-%d', '%d/%m/%Y', '%Y/%m/%d %H:%M:%S']
612 for expression in format_expressions:
613 try:
614 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
615 except:
616 pass
617 return upload_date
618
bd558525 619def date_from_str(date_str):
37254abc
JMF
620 """
621 Return a datetime object from a string in the format YYYYMMDD or
622 (now|today)[+-][0-9](day|week|month|year)(s)?"""
623 today = datetime.date.today()
624 if date_str == 'now'or date_str == 'today':
625 return today
626 match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
627 if match is not None:
628 sign = match.group('sign')
629 time = int(match.group('time'))
630 if sign == '-':
631 time = -time
632 unit = match.group('unit')
633 #A bad aproximation?
634 if unit == 'month':
635 unit = 'day'
636 time *= 30
637 elif unit == 'year':
638 unit = 'day'
639 time *= 365
640 unit += 's'
641 delta = datetime.timedelta(**{unit: time})
642 return today + delta
bd558525
JMF
643 return datetime.datetime.strptime(date_str, "%Y%m%d").date()
644
645class DateRange(object):
646 """Represents a time interval between two dates"""
647 def __init__(self, start=None, end=None):
648 """start and end must be strings in the format accepted by date"""
649 if start is not None:
650 self.start = date_from_str(start)
651 else:
652 self.start = datetime.datetime.min.date()
653 if end is not None:
654 self.end = date_from_str(end)
655 else:
656 self.end = datetime.datetime.max.date()
37254abc 657 if self.start > self.end:
bd558525
JMF
658 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
659 @classmethod
660 def day(cls, day):
661 """Returns a range that only contains the given day"""
662 return cls(day,day)
663 def __contains__(self, date):
664 """Check if the date is in the range"""
37254abc
JMF
665 if not isinstance(date, datetime.date):
666 date = date_from_str(date)
667 return self.start <= date <= self.end
bd558525
JMF
668 def __str__(self):
669 return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())