]> jfr.im git - yt-dlp.git/blame - youtube_dl/utils.py
Implement search_regex from #847
[yt-dlp.git] / youtube_dl / utils.py
CommitLineData
d77c3dfd
FV
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
f45c185f 4import errno
d77c3dfd 5import gzip
03f9daab 6import io
f4bfd65f 7import json
d77c3dfd
FV
8import locale
9import os
10import re
11import sys
01951dda 12import traceback
d77c3dfd 13import zlib
d77c3dfd 14import email.utils
921a1455 15import json
bd558525 16import datetime
d77c3dfd 17
01ba00ca 18try:
59ae15a5 19 import urllib.request as compat_urllib_request
01ba00ca 20except ImportError: # Python 2
59ae15a5 21 import urllib2 as compat_urllib_request
01ba00ca
PH
22
23try:
59ae15a5 24 import urllib.error as compat_urllib_error
01ba00ca 25except ImportError: # Python 2
59ae15a5 26 import urllib2 as compat_urllib_error
01ba00ca
PH
27
28try:
59ae15a5 29 import urllib.parse as compat_urllib_parse
01ba00ca 30except ImportError: # Python 2
59ae15a5 31 import urllib as compat_urllib_parse
01ba00ca 32
799c0763
PH
33try:
34 from urllib.parse import urlparse as compat_urllib_parse_urlparse
35except ImportError: # Python 2
36 from urlparse import urlparse as compat_urllib_parse_urlparse
37
01ba00ca 38try:
59ae15a5 39 import http.cookiejar as compat_cookiejar
01ba00ca 40except ImportError: # Python 2
59ae15a5 41 import cookielib as compat_cookiejar
01ba00ca 42
3e669f36 43try:
59ae15a5 44 import html.entities as compat_html_entities
9f37a959 45except ImportError: # Python 2
59ae15a5 46 import htmlentitydefs as compat_html_entities
3e669f36 47
a8156c1d 48try:
59ae15a5 49 import html.parser as compat_html_parser
9f37a959 50except ImportError: # Python 2
59ae15a5 51 import HTMLParser as compat_html_parser
a8156c1d 52
348d0a7a 53try:
59ae15a5 54 import http.client as compat_http_client
9f37a959 55except ImportError: # Python 2
59ae15a5 56 import httplib as compat_http_client
348d0a7a 57
5910e210
PH
58try:
59 from subprocess import DEVNULL
60 compat_subprocess_get_DEVNULL = lambda: DEVNULL
61except ImportError:
62 compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
63
9f37a959 64try:
59ae15a5 65 from urllib.parse import parse_qs as compat_parse_qs
9f37a959 66except ImportError: # Python 2
59ae15a5
PH
67 # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
68 # Python 2's version is apparently totally broken
69 def _unquote(string, encoding='utf-8', errors='replace'):
70 if string == '':
71 return string
72 res = string.split('%')
73 if len(res) == 1:
74 return string
75 if encoding is None:
76 encoding = 'utf-8'
77 if errors is None:
78 errors = 'replace'
79 # pct_sequence: contiguous sequence of percent-encoded bytes, decoded
80 pct_sequence = b''
81 string = res[0]
82 for item in res[1:]:
83 try:
84 if not item:
85 raise ValueError
86 pct_sequence += item[:2].decode('hex')
87 rest = item[2:]
88 if not rest:
89 # This segment was just a single percent-encoded character.
90 # May be part of a sequence of code units, so delay decoding.
91 # (Stored in pct_sequence).
92 continue
93 except ValueError:
94 rest = '%' + item
95 # Encountered non-percent-encoded characters. Flush the current
96 # pct_sequence.
97 string += pct_sequence.decode(encoding, errors) + rest
98 pct_sequence = b''
99 if pct_sequence:
100 # Flush the final pct_sequence
101 string += pct_sequence.decode(encoding, errors)
102 return string
103
104 def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
105 encoding='utf-8', errors='replace'):
106 qs, _coerce_result = qs, unicode
107 pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
108 r = []
109 for name_value in pairs:
110 if not name_value and not strict_parsing:
111 continue
112 nv = name_value.split('=', 1)
113 if len(nv) != 2:
114 if strict_parsing:
115 raise ValueError("bad query field: %r" % (name_value,))
116 # Handle case of a control-name with no equal sign
117 if keep_blank_values:
118 nv.append('')
119 else:
120 continue
121 if len(nv[1]) or keep_blank_values:
122 name = nv[0].replace('+', ' ')
123 name = _unquote(name, encoding=encoding, errors=errors)
124 name = _coerce_result(name)
125 value = nv[1].replace('+', ' ')
126 value = _unquote(value, encoding=encoding, errors=errors)
127 value = _coerce_result(value)
128 r.append((name, value))
129 return r
130
131 def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
132 encoding='utf-8', errors='replace'):
133 parsed_result = {}
134 pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
135 encoding=encoding, errors=errors)
136 for name, value in pairs:
137 if name in parsed_result:
138 parsed_result[name].append(value)
139 else:
140 parsed_result[name] = [value]
141 return parsed_result
348d0a7a 142
3e669f36 143try:
59ae15a5 144 compat_str = unicode # Python 2
3e669f36 145except NameError:
59ae15a5 146 compat_str = str
3e669f36
PH
147
148try:
59ae15a5 149 compat_chr = unichr # Python 2
3e669f36 150except NameError:
59ae15a5 151 compat_chr = chr
3e669f36 152
b31756c1
FV
153def compat_ord(c):
154 if type(c) is int: return c
155 else: return ord(c)
156
3e669f36 157std_headers = {
59ae15a5
PH
158 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0',
159 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
160 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
161 'Accept-Encoding': 'gzip, deflate',
162 'Accept-Language': 'en-us,en;q=0.5',
3e669f36 163}
f427df17 164
d77c3dfd 165def preferredencoding():
59ae15a5 166 """Get preferred encoding.
d77c3dfd 167
59ae15a5
PH
168 Returns the best encoding scheme for the system, based on
169 locale.getpreferredencoding() and some further tweaks.
170 """
171 try:
172 pref = locale.getpreferredencoding()
173 u'TEST'.encode(pref)
174 except:
175 pref = 'UTF-8'
bae611f2 176
59ae15a5 177 return pref
d77c3dfd 178
8cd10ac4 179if sys.version_info < (3,0):
59ae15a5
PH
180 def compat_print(s):
181 print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
8cd10ac4 182else:
59ae15a5
PH
183 def compat_print(s):
184 assert type(s) == type(u'')
185 print(s)
d77c3dfd 186
f4bfd65f
PH
187# In Python 2.x, json.dump expects a bytestream.
188# In Python 3.x, it writes to a character stream
189if sys.version_info < (3,0):
190 def write_json_file(obj, fn):
191 with open(fn, 'wb') as f:
192 json.dump(obj, f)
193else:
194 def write_json_file(obj, fn):
195 with open(fn, 'w', encoding='utf-8') as f:
196 json.dump(obj, f)
197
d77c3dfd 198def htmlentity_transform(matchobj):
59ae15a5
PH
199 """Transforms an HTML entity to a character.
200
201 This function receives a match object and is intended to be used with
202 the re.sub() function.
203 """
204 entity = matchobj.group(1)
205
206 # Known non-numeric HTML entity
207 if entity in compat_html_entities.name2codepoint:
208 return compat_chr(compat_html_entities.name2codepoint[entity])
209
210 mobj = re.match(u'(?u)#(x?\\d+)', entity)
211 if mobj is not None:
212 numstr = mobj.group(1)
213 if numstr.startswith(u'x'):
214 base = 16
215 numstr = u'0%s' % numstr
216 else:
217 base = 10
218 return compat_chr(int(numstr, base))
219
220 # Unknown entity in name, return its literal representation
221 return (u'&%s;' % entity)
d77c3dfd 222
a8156c1d 223compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
43e8fafd
ND
224class AttrParser(compat_html_parser.HTMLParser):
225 """Modified HTMLParser that isolates a tag with the specified attribute"""
226 def __init__(self, attribute, value):
227 self.attribute = attribute
228 self.value = value
59ae15a5
PH
229 self.result = None
230 self.started = False
231 self.depth = {}
232 self.html = None
233 self.watch_startpos = False
234 self.error_count = 0
235 compat_html_parser.HTMLParser.__init__(self)
236
237 def error(self, message):
238 if self.error_count > 10 or self.started:
239 raise compat_html_parser.HTMLParseError(message, self.getpos())
240 self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line
241 self.error_count += 1
242 self.goahead(1)
243
244 def loads(self, html):
245 self.html = html
246 self.feed(html)
247 self.close()
248
249 def handle_starttag(self, tag, attrs):
250 attrs = dict(attrs)
251 if self.started:
252 self.find_startpos(None)
43e8fafd 253 if self.attribute in attrs and attrs[self.attribute] == self.value:
59ae15a5
PH
254 self.result = [tag]
255 self.started = True
256 self.watch_startpos = True
257 if self.started:
258 if not tag in self.depth: self.depth[tag] = 0
259 self.depth[tag] += 1
260
261 def handle_endtag(self, tag):
262 if self.started:
263 if tag in self.depth: self.depth[tag] -= 1
264 if self.depth[self.result[0]] == 0:
265 self.started = False
266 self.result.append(self.getpos())
267
268 def find_startpos(self, x):
269 """Needed to put the start position of the result (self.result[1])
270 after the opening tag with the requested id"""
271 if self.watch_startpos:
272 self.watch_startpos = False
273 self.result.append(self.getpos())
274 handle_entityref = handle_charref = handle_data = handle_comment = \
275 handle_decl = handle_pi = unknown_decl = find_startpos
276
277 def get_result(self):
278 if self.result is None:
279 return None
280 if len(self.result) != 3:
281 return None
282 lines = self.html.split('\n')
283 lines = lines[self.result[1][0]-1:self.result[2][0]]
284 lines[0] = lines[0][self.result[1][1]:]
285 if len(lines) == 1:
286 lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]]
287 lines[-1] = lines[-1][:self.result[2][1]]
288 return '\n'.join(lines).strip()
3b024e17
PH
289# Hack for https://github.com/rg3/youtube-dl/issues/662
290if sys.version_info < (2, 7, 3):
291 AttrParser.parse_endtag = (lambda self, i:
292 i + len("</scr'+'ipt>")
293 if self.rawdata[i:].startswith("</scr'+'ipt>")
294 else compat_html_parser.HTMLParser.parse_endtag(self, i))
9e6dd238
FV
295
296def get_element_by_id(id, html):
43e8fafd
ND
297 """Return the content of the tag with the specified ID in the passed HTML document"""
298 return get_element_by_attribute("id", id, html)
299
300def get_element_by_attribute(attribute, value, html):
301 """Return the content of the tag with the specified attribute in the passed HTML document"""
302 parser = AttrParser(attribute, value)
59ae15a5
PH
303 try:
304 parser.loads(html)
305 except compat_html_parser.HTMLParseError:
306 pass
307 return parser.get_result()
9e6dd238
FV
308
309
310def clean_html(html):
59ae15a5
PH
311 """Clean an HTML snippet into a readable string"""
312 # Newline vs <br />
313 html = html.replace('\n', ' ')
6b3aef80
FV
314 html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
315 html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
59ae15a5
PH
316 # Strip html tags
317 html = re.sub('<.*?>', '', html)
318 # Replace html entities
319 html = unescapeHTML(html)
7decf895 320 return html.strip()
9e6dd238
FV
321
322
d77c3dfd 323def sanitize_open(filename, open_mode):
59ae15a5
PH
324 """Try to open the given filename, and slightly tweak it if this fails.
325
326 Attempts to open the given filename. If this fails, it tries to change
327 the filename slightly, step by step, until it's either able to open it
328 or it fails and raises a final exception, like the standard open()
329 function.
330
331 It returns the tuple (stream, definitive_file_name).
332 """
333 try:
334 if filename == u'-':
335 if sys.platform == 'win32':
336 import msvcrt
337 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
898280a0 338 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
59ae15a5
PH
339 stream = open(encodeFilename(filename), open_mode)
340 return (stream, filename)
341 except (IOError, OSError) as err:
f45c185f
PH
342 if err.errno in (errno.EACCES,):
343 raise
59ae15a5 344
f45c185f
PH
345 # In case of error, try to remove win32 forbidden chars
346 alt_filename = os.path.join(
347 re.sub(u'[/<>:"\\|\\\\?\\*]', u'#', path_part)
348 for path_part in os.path.split(filename)
349 )
350 if alt_filename == filename:
351 raise
352 else:
353 # An exception here should be caught in the caller
354 stream = open(encodeFilename(filename), open_mode)
355 return (stream, alt_filename)
d77c3dfd
FV
356
357
358def timeconvert(timestr):
59ae15a5
PH
359 """Convert RFC 2822 defined time string into system timestamp"""
360 timestamp = None
361 timetuple = email.utils.parsedate_tz(timestr)
362 if timetuple is not None:
363 timestamp = email.utils.mktime_tz(timetuple)
364 return timestamp
1c469a94 365
796173d0 366def sanitize_filename(s, restricted=False, is_id=False):
59ae15a5
PH
367 """Sanitizes a string so it could be used as part of a filename.
368 If restricted is set, use a stricter subset of allowed characters.
796173d0 369 Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
59ae15a5
PH
370 """
371 def replace_insane(char):
372 if char == '?' or ord(char) < 32 or ord(char) == 127:
373 return ''
374 elif char == '"':
375 return '' if restricted else '\''
376 elif char == ':':
377 return '_-' if restricted else ' -'
378 elif char in '\\/|*<>':
379 return '_'
627dcfff 380 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
59ae15a5
PH
381 return '_'
382 if restricted and ord(char) > 127:
383 return '_'
384 return char
385
386 result = u''.join(map(replace_insane, s))
796173d0
PH
387 if not is_id:
388 while '__' in result:
389 result = result.replace('__', '_')
390 result = result.strip('_')
391 # Common case of "Foreign band name - English song title"
392 if restricted and result.startswith('-_'):
393 result = result[2:]
394 if not result:
395 result = '_'
59ae15a5 396 return result
d77c3dfd
FV
397
398def orderedSet(iterable):
59ae15a5
PH
399 """ Remove all duplicates from the input iterable """
400 res = []
401 for el in iterable:
402 if el not in res:
403 res.append(el)
404 return res
d77c3dfd
FV
405
406def unescapeHTML(s):
59ae15a5
PH
407 """
408 @param s a string
409 """
410 assert type(s) == type(u'')
d77c3dfd 411
59ae15a5
PH
412 result = re.sub(u'(?u)&(.+?);', htmlentity_transform, s)
413 return result
d77c3dfd
FV
414
415def encodeFilename(s):
59ae15a5
PH
416 """
417 @param s The name of the file
418 """
d77c3dfd 419
59ae15a5 420 assert type(s) == type(u'')
d77c3dfd 421
59ae15a5
PH
422 # Python 3 has a Unicode API
423 if sys.version_info >= (3, 0):
424 return s
0f00efed 425
59ae15a5
PH
426 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
427 # Pass u'' directly to use Unicode APIs on Windows 2000 and up
428 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
429 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
430 return s
431 else:
6df40dcb
PH
432 encoding = sys.getfilesystemencoding()
433 if encoding is None:
434 encoding = 'utf-8'
435 return s.encode(encoding, 'ignore')
d77c3dfd 436
8271226a
PH
437def decodeOption(optval):
438 if optval is None:
439 return optval
440 if isinstance(optval, bytes):
441 optval = optval.decode(preferredencoding())
442
443 assert isinstance(optval, compat_str)
444 return optval
1c256f70 445
4539dd30
PH
446def formatSeconds(secs):
447 if secs > 3600:
448 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
449 elif secs > 60:
450 return '%d:%02d' % (secs // 60, secs % 60)
451 else:
452 return '%d' % secs
453
ea6d901e
PH
454def make_HTTPS_handler(opts):
455 if sys.version_info < (3,2):
456 # Python's 2.x handler is very simplistic
457 return compat_urllib_request.HTTPSHandler()
458 else:
459 import ssl
460 context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
461 context.set_default_verify_paths()
462
463 context.verify_mode = (ssl.CERT_NONE
464 if opts.no_check_certificate
465 else ssl.CERT_REQUIRED)
466 return compat_urllib_request.HTTPSHandler(context=context)
467
1c256f70
PH
468class ExtractorError(Exception):
469 """Error during info extraction."""
470 def __init__(self, msg, tb=None):
01951dda 471 """ tb, if given, is the original traceback (so that it can be printed out). """
1c256f70 472 super(ExtractorError, self).__init__(msg)
1c256f70 473 self.traceback = tb
8cc83b8d 474 self.exc_info = sys.exc_info() # preserve original exception
1c256f70 475
01951dda
PH
476 def format_traceback(self):
477 if self.traceback is None:
478 return None
479 return u''.join(traceback.format_tb(self.traceback))
480
1c256f70 481
d77c3dfd 482class DownloadError(Exception):
59ae15a5 483 """Download Error exception.
d77c3dfd 484
59ae15a5
PH
485 This exception may be thrown by FileDownloader objects if they are not
486 configured to continue on errors. They will contain the appropriate
487 error message.
488 """
8cc83b8d
FV
489 def __init__(self, msg, exc_info=None):
490 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
491 super(DownloadError, self).__init__(msg)
492 self.exc_info = exc_info
d77c3dfd
FV
493
494
495class SameFileError(Exception):
59ae15a5 496 """Same File exception.
d77c3dfd 497
59ae15a5
PH
498 This exception will be thrown by FileDownloader objects if they detect
499 multiple files would have to be downloaded to the same file on disk.
500 """
501 pass
d77c3dfd
FV
502
503
504class PostProcessingError(Exception):
59ae15a5 505 """Post Processing exception.
d77c3dfd 506
59ae15a5
PH
507 This exception may be raised by PostProcessor's .run() method to
508 indicate an error in the postprocessing task.
509 """
7851b379
PH
510 def __init__(self, msg):
511 self.msg = msg
d77c3dfd
FV
512
513class MaxDownloadsReached(Exception):
59ae15a5
PH
514 """ --max-downloads limit has been reached. """
515 pass
d77c3dfd
FV
516
517
518class UnavailableVideoError(Exception):
59ae15a5 519 """Unavailable Format exception.
d77c3dfd 520
59ae15a5
PH
521 This exception will be thrown when a video is requested
522 in a format that is not available for that video.
523 """
524 pass
d77c3dfd
FV
525
526
527class ContentTooShortError(Exception):
59ae15a5 528 """Content Too Short exception.
d77c3dfd 529
59ae15a5
PH
530 This exception may be raised by FileDownloader objects when a file they
531 download is too small for what the server announced first, indicating
532 the connection was probably interrupted.
533 """
534 # Both in bytes
535 downloaded = None
536 expected = None
d77c3dfd 537
59ae15a5
PH
538 def __init__(self, downloaded, expected):
539 self.downloaded = downloaded
540 self.expected = expected
d77c3dfd 541
01ba00ca 542class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
59ae15a5
PH
543 """Handler for HTTP requests and responses.
544
545 This class, when installed with an OpenerDirector, automatically adds
546 the standard headers to every HTTP request and handles gzipped and
547 deflated responses from web servers. If compression is to be avoided in
548 a particular request, the original request in the program code only has
549 to include the HTTP header "Youtubedl-No-Compression", which will be
550 removed before making the real request.
551
552 Part of this code was copied from:
553
554 http://techknack.net/python-urllib2-handlers/
555
556 Andrew Rowls, the author of that code, agreed to release it to the
557 public domain.
558 """
559
560 @staticmethod
561 def deflate(data):
562 try:
563 return zlib.decompress(data, -zlib.MAX_WBITS)
564 except zlib.error:
565 return zlib.decompress(data)
566
567 @staticmethod
568 def addinfourl_wrapper(stream, headers, url, code):
569 if hasattr(compat_urllib_request.addinfourl, 'getcode'):
570 return compat_urllib_request.addinfourl(stream, headers, url, code)
571 ret = compat_urllib_request.addinfourl(stream, headers, url)
572 ret.code = code
573 return ret
574
575 def http_request(self, req):
335959e7 576 for h,v in std_headers.items():
59ae15a5
PH
577 if h in req.headers:
578 del req.headers[h]
335959e7 579 req.add_header(h, v)
59ae15a5
PH
580 if 'Youtubedl-no-compression' in req.headers:
581 if 'Accept-encoding' in req.headers:
582 del req.headers['Accept-encoding']
583 del req.headers['Youtubedl-no-compression']
3446dfb7 584 if 'Youtubedl-user-agent' in req.headers:
335959e7
PH
585 if 'User-agent' in req.headers:
586 del req.headers['User-agent']
587 req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
3446dfb7 588 del req.headers['Youtubedl-user-agent']
59ae15a5
PH
589 return req
590
591 def http_response(self, req, resp):
592 old_resp = resp
593 # gzip
594 if resp.headers.get('Content-encoding', '') == 'gzip':
595 gz = gzip.GzipFile(fileobj=io.BytesIO(resp.read()), mode='r')
596 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
597 resp.msg = old_resp.msg
598 # deflate
599 if resp.headers.get('Content-encoding', '') == 'deflate':
600 gz = io.BytesIO(self.deflate(resp.read()))
601 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
602 resp.msg = old_resp.msg
603 return resp
0f8d03f8
PH
604
605 https_request = http_request
606 https_response = http_response
bf50b038
JMF
607
608def unified_strdate(date_str):
609 """Return a string with the date in the format YYYYMMDD"""
610 upload_date = None
611 #Replace commas
612 date_str = date_str.replace(',',' ')
613 # %z (UTC offset) is only supported in python>=3.2
614 date_str = re.sub(r' (\+|-)[\d]*$', '', date_str)
615 format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y', '%Y-%m-%d', '%d/%m/%Y', '%Y/%m/%d %H:%M:%S']
616 for expression in format_expressions:
617 try:
618 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
619 except:
620 pass
621 return upload_date
622
bd558525 623def date_from_str(date_str):
37254abc
JMF
624 """
625 Return a datetime object from a string in the format YYYYMMDD or
626 (now|today)[+-][0-9](day|week|month|year)(s)?"""
627 today = datetime.date.today()
628 if date_str == 'now'or date_str == 'today':
629 return today
630 match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
631 if match is not None:
632 sign = match.group('sign')
633 time = int(match.group('time'))
634 if sign == '-':
635 time = -time
636 unit = match.group('unit')
637 #A bad aproximation?
638 if unit == 'month':
639 unit = 'day'
640 time *= 30
641 elif unit == 'year':
642 unit = 'day'
643 time *= 365
644 unit += 's'
645 delta = datetime.timedelta(**{unit: time})
646 return today + delta
bd558525
JMF
647 return datetime.datetime.strptime(date_str, "%Y%m%d").date()
648
649class DateRange(object):
650 """Represents a time interval between two dates"""
651 def __init__(self, start=None, end=None):
652 """start and end must be strings in the format accepted by date"""
653 if start is not None:
654 self.start = date_from_str(start)
655 else:
656 self.start = datetime.datetime.min.date()
657 if end is not None:
658 self.end = date_from_str(end)
659 else:
660 self.end = datetime.datetime.max.date()
37254abc 661 if self.start > self.end:
bd558525
JMF
662 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
663 @classmethod
664 def day(cls, day):
665 """Returns a range that only contains the given day"""
666 return cls(day,day)
667 def __contains__(self, date):
668 """Check if the date is in the range"""
37254abc
JMF
669 if not isinstance(date, datetime.date):
670 date = date_from_str(date)
671 return self.start <= date <= self.end
bd558525
JMF
672 def __str__(self):
673 return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())