]> jfr.im git - yt-dlp.git/blob - youtube_dl/utils.py
[ustream] Simplify channel extraction
[yt-dlp.git] / youtube_dl / utils.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 import datetime
5 import email.utils
6 import errno
7 import gzip
8 import io
9 import json
10 import locale
11 import os
12 import platform
13 import re
14 import socket
15 import sys
16 import traceback
17 import zlib
18
19 try:
20 import urllib.request as compat_urllib_request
21 except ImportError: # Python 2
22 import urllib2 as compat_urllib_request
23
24 try:
25 import urllib.error as compat_urllib_error
26 except ImportError: # Python 2
27 import urllib2 as compat_urllib_error
28
29 try:
30 import urllib.parse as compat_urllib_parse
31 except ImportError: # Python 2
32 import urllib as compat_urllib_parse
33
34 try:
35 from urllib.parse import urlparse as compat_urllib_parse_urlparse
36 except ImportError: # Python 2
37 from urlparse import urlparse as compat_urllib_parse_urlparse
38
39 try:
40 import urllib.parse as compat_urlparse
41 except ImportError: # Python 2
42 import urlparse as compat_urlparse
43
44 try:
45 import http.cookiejar as compat_cookiejar
46 except ImportError: # Python 2
47 import cookielib as compat_cookiejar
48
49 try:
50 import html.entities as compat_html_entities
51 except ImportError: # Python 2
52 import htmlentitydefs as compat_html_entities
53
54 try:
55 import html.parser as compat_html_parser
56 except ImportError: # Python 2
57 import HTMLParser as compat_html_parser
58
59 try:
60 import http.client as compat_http_client
61 except ImportError: # Python 2
62 import httplib as compat_http_client
63
64 try:
65 from urllib.error import HTTPError as compat_HTTPError
66 except ImportError: # Python 2
67 from urllib2 import HTTPError as compat_HTTPError
68
69 try:
70 from subprocess import DEVNULL
71 compat_subprocess_get_DEVNULL = lambda: DEVNULL
72 except ImportError:
73 compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
74
75 try:
76 from urllib.parse import parse_qs as compat_parse_qs
77 except ImportError: # Python 2
78 # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
79 # Python 2's version is apparently totally broken
80 def _unquote(string, encoding='utf-8', errors='replace'):
81 if string == '':
82 return string
83 res = string.split('%')
84 if len(res) == 1:
85 return string
86 if encoding is None:
87 encoding = 'utf-8'
88 if errors is None:
89 errors = 'replace'
90 # pct_sequence: contiguous sequence of percent-encoded bytes, decoded
91 pct_sequence = b''
92 string = res[0]
93 for item in res[1:]:
94 try:
95 if not item:
96 raise ValueError
97 pct_sequence += item[:2].decode('hex')
98 rest = item[2:]
99 if not rest:
100 # This segment was just a single percent-encoded character.
101 # May be part of a sequence of code units, so delay decoding.
102 # (Stored in pct_sequence).
103 continue
104 except ValueError:
105 rest = '%' + item
106 # Encountered non-percent-encoded characters. Flush the current
107 # pct_sequence.
108 string += pct_sequence.decode(encoding, errors) + rest
109 pct_sequence = b''
110 if pct_sequence:
111 # Flush the final pct_sequence
112 string += pct_sequence.decode(encoding, errors)
113 return string
114
115 def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
116 encoding='utf-8', errors='replace'):
117 qs, _coerce_result = qs, unicode
118 pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
119 r = []
120 for name_value in pairs:
121 if not name_value and not strict_parsing:
122 continue
123 nv = name_value.split('=', 1)
124 if len(nv) != 2:
125 if strict_parsing:
126 raise ValueError("bad query field: %r" % (name_value,))
127 # Handle case of a control-name with no equal sign
128 if keep_blank_values:
129 nv.append('')
130 else:
131 continue
132 if len(nv[1]) or keep_blank_values:
133 name = nv[0].replace('+', ' ')
134 name = _unquote(name, encoding=encoding, errors=errors)
135 name = _coerce_result(name)
136 value = nv[1].replace('+', ' ')
137 value = _unquote(value, encoding=encoding, errors=errors)
138 value = _coerce_result(value)
139 r.append((name, value))
140 return r
141
142 def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
143 encoding='utf-8', errors='replace'):
144 parsed_result = {}
145 pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
146 encoding=encoding, errors=errors)
147 for name, value in pairs:
148 if name in parsed_result:
149 parsed_result[name].append(value)
150 else:
151 parsed_result[name] = [value]
152 return parsed_result
153
154 try:
155 compat_str = unicode # Python 2
156 except NameError:
157 compat_str = str
158
159 try:
160 compat_chr = unichr # Python 2
161 except NameError:
162 compat_chr = chr
163
164 def compat_ord(c):
165 if type(c) is int: return c
166 else: return ord(c)
167
168 # This is not clearly defined otherwise
169 compiled_regex_type = type(re.compile(''))
170
171 std_headers = {
172 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0',
173 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
174 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
175 'Accept-Encoding': 'gzip, deflate',
176 'Accept-Language': 'en-us,en;q=0.5',
177 }
178
179 def preferredencoding():
180 """Get preferred encoding.
181
182 Returns the best encoding scheme for the system, based on
183 locale.getpreferredencoding() and some further tweaks.
184 """
185 try:
186 pref = locale.getpreferredencoding()
187 u'TEST'.encode(pref)
188 except:
189 pref = 'UTF-8'
190
191 return pref
192
193 if sys.version_info < (3,0):
194 def compat_print(s):
195 print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
196 else:
197 def compat_print(s):
198 assert type(s) == type(u'')
199 print(s)
200
201 # In Python 2.x, json.dump expects a bytestream.
202 # In Python 3.x, it writes to a character stream
203 if sys.version_info < (3,0):
204 def write_json_file(obj, fn):
205 with open(fn, 'wb') as f:
206 json.dump(obj, f)
207 else:
208 def write_json_file(obj, fn):
209 with open(fn, 'w', encoding='utf-8') as f:
210 json.dump(obj, f)
211
212 if sys.version_info >= (2,7):
213 def find_xpath_attr(node, xpath, key, val):
214 """ Find the xpath xpath[@key=val] """
215 assert re.match(r'^[a-zA-Z]+$', key)
216 assert re.match(r'^[a-zA-Z0-9@\s]*$', val)
217 expr = xpath + u"[@%s='%s']" % (key, val)
218 return node.find(expr)
219 else:
220 def find_xpath_attr(node, xpath, key, val):
221 for f in node.findall(xpath):
222 if f.attrib.get(key) == val:
223 return f
224 return None
225
226 def htmlentity_transform(matchobj):
227 """Transforms an HTML entity to a character.
228
229 This function receives a match object and is intended to be used with
230 the re.sub() function.
231 """
232 entity = matchobj.group(1)
233
234 # Known non-numeric HTML entity
235 if entity in compat_html_entities.name2codepoint:
236 return compat_chr(compat_html_entities.name2codepoint[entity])
237
238 mobj = re.match(u'(?u)#(x?\\d+)', entity)
239 if mobj is not None:
240 numstr = mobj.group(1)
241 if numstr.startswith(u'x'):
242 base = 16
243 numstr = u'0%s' % numstr
244 else:
245 base = 10
246 return compat_chr(int(numstr, base))
247
248 # Unknown entity in name, return its literal representation
249 return (u'&%s;' % entity)
250
251 compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
252 class BaseHTMLParser(compat_html_parser.HTMLParser):
253 def __init(self):
254 compat_html_parser.HTMLParser.__init__(self)
255 self.html = None
256
257 def loads(self, html):
258 self.html = html
259 self.feed(html)
260 self.close()
261
262 class AttrParser(BaseHTMLParser):
263 """Modified HTMLParser that isolates a tag with the specified attribute"""
264 def __init__(self, attribute, value):
265 self.attribute = attribute
266 self.value = value
267 self.result = None
268 self.started = False
269 self.depth = {}
270 self.watch_startpos = False
271 self.error_count = 0
272 BaseHTMLParser.__init__(self)
273
274 def error(self, message):
275 if self.error_count > 10 or self.started:
276 raise compat_html_parser.HTMLParseError(message, self.getpos())
277 self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line
278 self.error_count += 1
279 self.goahead(1)
280
281 def handle_starttag(self, tag, attrs):
282 attrs = dict(attrs)
283 if self.started:
284 self.find_startpos(None)
285 if self.attribute in attrs and attrs[self.attribute] == self.value:
286 self.result = [tag]
287 self.started = True
288 self.watch_startpos = True
289 if self.started:
290 if not tag in self.depth: self.depth[tag] = 0
291 self.depth[tag] += 1
292
293 def handle_endtag(self, tag):
294 if self.started:
295 if tag in self.depth: self.depth[tag] -= 1
296 if self.depth[self.result[0]] == 0:
297 self.started = False
298 self.result.append(self.getpos())
299
300 def find_startpos(self, x):
301 """Needed to put the start position of the result (self.result[1])
302 after the opening tag with the requested id"""
303 if self.watch_startpos:
304 self.watch_startpos = False
305 self.result.append(self.getpos())
306 handle_entityref = handle_charref = handle_data = handle_comment = \
307 handle_decl = handle_pi = unknown_decl = find_startpos
308
309 def get_result(self):
310 if self.result is None:
311 return None
312 if len(self.result) != 3:
313 return None
314 lines = self.html.split('\n')
315 lines = lines[self.result[1][0]-1:self.result[2][0]]
316 lines[0] = lines[0][self.result[1][1]:]
317 if len(lines) == 1:
318 lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]]
319 lines[-1] = lines[-1][:self.result[2][1]]
320 return '\n'.join(lines).strip()
321 # Hack for https://github.com/rg3/youtube-dl/issues/662
322 if sys.version_info < (2, 7, 3):
323 AttrParser.parse_endtag = (lambda self, i:
324 i + len("</scr'+'ipt>")
325 if self.rawdata[i:].startswith("</scr'+'ipt>")
326 else compat_html_parser.HTMLParser.parse_endtag(self, i))
327
328 def get_element_by_id(id, html):
329 """Return the content of the tag with the specified ID in the passed HTML document"""
330 return get_element_by_attribute("id", id, html)
331
332 def get_element_by_attribute(attribute, value, html):
333 """Return the content of the tag with the specified attribute in the passed HTML document"""
334 parser = AttrParser(attribute, value)
335 try:
336 parser.loads(html)
337 except compat_html_parser.HTMLParseError:
338 pass
339 return parser.get_result()
340
341 class MetaParser(BaseHTMLParser):
342 """
343 Modified HTMLParser that isolates a meta tag with the specified name
344 attribute.
345 """
346 def __init__(self, name):
347 BaseHTMLParser.__init__(self)
348 self.name = name
349 self.content = None
350 self.result = None
351
352 def handle_starttag(self, tag, attrs):
353 if tag != 'meta':
354 return
355 attrs = dict(attrs)
356 if attrs.get('name') == self.name:
357 self.result = attrs.get('content')
358
359 def get_result(self):
360 return self.result
361
362 def get_meta_content(name, html):
363 """
364 Return the content attribute from the meta tag with the given name attribute.
365 """
366 parser = MetaParser(name)
367 try:
368 parser.loads(html)
369 except compat_html_parser.HTMLParseError:
370 pass
371 return parser.get_result()
372
373
374 def clean_html(html):
375 """Clean an HTML snippet into a readable string"""
376 # Newline vs <br />
377 html = html.replace('\n', ' ')
378 html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
379 html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
380 # Strip html tags
381 html = re.sub('<.*?>', '', html)
382 # Replace html entities
383 html = unescapeHTML(html)
384 return html.strip()
385
386
387 def sanitize_open(filename, open_mode):
388 """Try to open the given filename, and slightly tweak it if this fails.
389
390 Attempts to open the given filename. If this fails, it tries to change
391 the filename slightly, step by step, until it's either able to open it
392 or it fails and raises a final exception, like the standard open()
393 function.
394
395 It returns the tuple (stream, definitive_file_name).
396 """
397 try:
398 if filename == u'-':
399 if sys.platform == 'win32':
400 import msvcrt
401 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
402 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
403 stream = open(encodeFilename(filename), open_mode)
404 return (stream, filename)
405 except (IOError, OSError) as err:
406 if err.errno in (errno.EACCES,):
407 raise
408
409 # In case of error, try to remove win32 forbidden chars
410 alt_filename = os.path.join(
411 re.sub(u'[/<>:"\\|\\\\?\\*]', u'#', path_part)
412 for path_part in os.path.split(filename)
413 )
414 if alt_filename == filename:
415 raise
416 else:
417 # An exception here should be caught in the caller
418 stream = open(encodeFilename(filename), open_mode)
419 return (stream, alt_filename)
420
421
422 def timeconvert(timestr):
423 """Convert RFC 2822 defined time string into system timestamp"""
424 timestamp = None
425 timetuple = email.utils.parsedate_tz(timestr)
426 if timetuple is not None:
427 timestamp = email.utils.mktime_tz(timetuple)
428 return timestamp
429
430 def sanitize_filename(s, restricted=False, is_id=False):
431 """Sanitizes a string so it could be used as part of a filename.
432 If restricted is set, use a stricter subset of allowed characters.
433 Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
434 """
435 def replace_insane(char):
436 if char == '?' or ord(char) < 32 or ord(char) == 127:
437 return ''
438 elif char == '"':
439 return '' if restricted else '\''
440 elif char == ':':
441 return '_-' if restricted else ' -'
442 elif char in '\\/|*<>':
443 return '_'
444 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
445 return '_'
446 if restricted and ord(char) > 127:
447 return '_'
448 return char
449
450 result = u''.join(map(replace_insane, s))
451 if not is_id:
452 while '__' in result:
453 result = result.replace('__', '_')
454 result = result.strip('_')
455 # Common case of "Foreign band name - English song title"
456 if restricted and result.startswith('-_'):
457 result = result[2:]
458 if not result:
459 result = '_'
460 return result
461
462 def orderedSet(iterable):
463 """ Remove all duplicates from the input iterable """
464 res = []
465 for el in iterable:
466 if el not in res:
467 res.append(el)
468 return res
469
470 def unescapeHTML(s):
471 """
472 @param s a string
473 """
474 assert type(s) == type(u'')
475
476 result = re.sub(u'(?u)&(.+?);', htmlentity_transform, s)
477 return result
478
479 def encodeFilename(s):
480 """
481 @param s The name of the file
482 """
483
484 assert type(s) == type(u'')
485
486 # Python 3 has a Unicode API
487 if sys.version_info >= (3, 0):
488 return s
489
490 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
491 # Pass u'' directly to use Unicode APIs on Windows 2000 and up
492 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
493 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
494 return s
495 else:
496 encoding = sys.getfilesystemencoding()
497 if encoding is None:
498 encoding = 'utf-8'
499 return s.encode(encoding, 'ignore')
500
501 def decodeOption(optval):
502 if optval is None:
503 return optval
504 if isinstance(optval, bytes):
505 optval = optval.decode(preferredencoding())
506
507 assert isinstance(optval, compat_str)
508 return optval
509
510 def formatSeconds(secs):
511 if secs > 3600:
512 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
513 elif secs > 60:
514 return '%d:%02d' % (secs // 60, secs % 60)
515 else:
516 return '%d' % secs
517
518 def make_HTTPS_handler(opts):
519 if sys.version_info < (3,2):
520 # Python's 2.x handler is very simplistic
521 return compat_urllib_request.HTTPSHandler()
522 else:
523 import ssl
524 context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
525 context.set_default_verify_paths()
526
527 context.verify_mode = (ssl.CERT_NONE
528 if opts.no_check_certificate
529 else ssl.CERT_REQUIRED)
530 return compat_urllib_request.HTTPSHandler(context=context)
531
532 class ExtractorError(Exception):
533 """Error during info extraction."""
534 def __init__(self, msg, tb=None, expected=False, cause=None):
535 """ tb, if given, is the original traceback (so that it can be printed out).
536 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
537 """
538
539 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
540 expected = True
541 if not expected:
542 msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type youtube-dl -U to update.'
543 super(ExtractorError, self).__init__(msg)
544
545 self.traceback = tb
546 self.exc_info = sys.exc_info() # preserve original exception
547 self.cause = cause
548
549 def format_traceback(self):
550 if self.traceback is None:
551 return None
552 return u''.join(traceback.format_tb(self.traceback))
553
554
555 class DownloadError(Exception):
556 """Download Error exception.
557
558 This exception may be thrown by FileDownloader objects if they are not
559 configured to continue on errors. They will contain the appropriate
560 error message.
561 """
562 def __init__(self, msg, exc_info=None):
563 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
564 super(DownloadError, self).__init__(msg)
565 self.exc_info = exc_info
566
567
568 class SameFileError(Exception):
569 """Same File exception.
570
571 This exception will be thrown by FileDownloader objects if they detect
572 multiple files would have to be downloaded to the same file on disk.
573 """
574 pass
575
576
577 class PostProcessingError(Exception):
578 """Post Processing exception.
579
580 This exception may be raised by PostProcessor's .run() method to
581 indicate an error in the postprocessing task.
582 """
583 def __init__(self, msg):
584 self.msg = msg
585
586 class MaxDownloadsReached(Exception):
587 """ --max-downloads limit has been reached. """
588 pass
589
590
591 class UnavailableVideoError(Exception):
592 """Unavailable Format exception.
593
594 This exception will be thrown when a video is requested
595 in a format that is not available for that video.
596 """
597 pass
598
599
600 class ContentTooShortError(Exception):
601 """Content Too Short exception.
602
603 This exception may be raised by FileDownloader objects when a file they
604 download is too small for what the server announced first, indicating
605 the connection was probably interrupted.
606 """
607 # Both in bytes
608 downloaded = None
609 expected = None
610
611 def __init__(self, downloaded, expected):
612 self.downloaded = downloaded
613 self.expected = expected
614
615 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
616 """Handler for HTTP requests and responses.
617
618 This class, when installed with an OpenerDirector, automatically adds
619 the standard headers to every HTTP request and handles gzipped and
620 deflated responses from web servers. If compression is to be avoided in
621 a particular request, the original request in the program code only has
622 to include the HTTP header "Youtubedl-No-Compression", which will be
623 removed before making the real request.
624
625 Part of this code was copied from:
626
627 http://techknack.net/python-urllib2-handlers/
628
629 Andrew Rowls, the author of that code, agreed to release it to the
630 public domain.
631 """
632
633 @staticmethod
634 def deflate(data):
635 try:
636 return zlib.decompress(data, -zlib.MAX_WBITS)
637 except zlib.error:
638 return zlib.decompress(data)
639
640 @staticmethod
641 def addinfourl_wrapper(stream, headers, url, code):
642 if hasattr(compat_urllib_request.addinfourl, 'getcode'):
643 return compat_urllib_request.addinfourl(stream, headers, url, code)
644 ret = compat_urllib_request.addinfourl(stream, headers, url)
645 ret.code = code
646 return ret
647
648 def http_request(self, req):
649 for h,v in std_headers.items():
650 if h in req.headers:
651 del req.headers[h]
652 req.add_header(h, v)
653 if 'Youtubedl-no-compression' in req.headers:
654 if 'Accept-encoding' in req.headers:
655 del req.headers['Accept-encoding']
656 del req.headers['Youtubedl-no-compression']
657 if 'Youtubedl-user-agent' in req.headers:
658 if 'User-agent' in req.headers:
659 del req.headers['User-agent']
660 req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
661 del req.headers['Youtubedl-user-agent']
662 return req
663
664 def http_response(self, req, resp):
665 old_resp = resp
666 # gzip
667 if resp.headers.get('Content-encoding', '') == 'gzip':
668 content = resp.read()
669 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
670 try:
671 uncompressed = io.BytesIO(gz.read())
672 except IOError as original_ioerror:
673 # There may be junk add the end of the file
674 # See http://stackoverflow.com/q/4928560/35070 for details
675 for i in range(1, 1024):
676 try:
677 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
678 uncompressed = io.BytesIO(gz.read())
679 except IOError:
680 continue
681 break
682 else:
683 raise original_ioerror
684 resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
685 resp.msg = old_resp.msg
686 # deflate
687 if resp.headers.get('Content-encoding', '') == 'deflate':
688 gz = io.BytesIO(self.deflate(resp.read()))
689 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
690 resp.msg = old_resp.msg
691 return resp
692
693 https_request = http_request
694 https_response = http_response
695
696 def unified_strdate(date_str):
697 """Return a string with the date in the format YYYYMMDD"""
698 upload_date = None
699 #Replace commas
700 date_str = date_str.replace(',',' ')
701 # %z (UTC offset) is only supported in python>=3.2
702 date_str = re.sub(r' (\+|-)[\d]*$', '', date_str)
703 format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y', '%Y-%m-%d', '%d/%m/%Y', '%Y/%m/%d %H:%M:%S', '%d.%m.%Y %H:%M']
704 for expression in format_expressions:
705 try:
706 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
707 except:
708 pass
709 return upload_date
710
711 def determine_ext(url, default_ext=u'unknown_video'):
712 guess = url.partition(u'?')[0].rpartition(u'.')[2]
713 if re.match(r'^[A-Za-z0-9]+$', guess):
714 return guess
715 else:
716 return default_ext
717
718 def subtitles_filename(filename, sub_lang, sub_format):
719 return filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
720
721 def date_from_str(date_str):
722 """
723 Return a datetime object from a string in the format YYYYMMDD or
724 (now|today)[+-][0-9](day|week|month|year)(s)?"""
725 today = datetime.date.today()
726 if date_str == 'now'or date_str == 'today':
727 return today
728 match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
729 if match is not None:
730 sign = match.group('sign')
731 time = int(match.group('time'))
732 if sign == '-':
733 time = -time
734 unit = match.group('unit')
735 #A bad aproximation?
736 if unit == 'month':
737 unit = 'day'
738 time *= 30
739 elif unit == 'year':
740 unit = 'day'
741 time *= 365
742 unit += 's'
743 delta = datetime.timedelta(**{unit: time})
744 return today + delta
745 return datetime.datetime.strptime(date_str, "%Y%m%d").date()
746
747 class DateRange(object):
748 """Represents a time interval between two dates"""
749 def __init__(self, start=None, end=None):
750 """start and end must be strings in the format accepted by date"""
751 if start is not None:
752 self.start = date_from_str(start)
753 else:
754 self.start = datetime.datetime.min.date()
755 if end is not None:
756 self.end = date_from_str(end)
757 else:
758 self.end = datetime.datetime.max.date()
759 if self.start > self.end:
760 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
761 @classmethod
762 def day(cls, day):
763 """Returns a range that only contains the given day"""
764 return cls(day,day)
765 def __contains__(self, date):
766 """Check if the date is in the range"""
767 if not isinstance(date, datetime.date):
768 date = date_from_str(date)
769 return self.start <= date <= self.end
770 def __str__(self):
771 return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
772
773
774 def platform_name():
775 """ Returns the platform name as a compat_str """
776 res = platform.platform()
777 if isinstance(res, bytes):
778 res = res.decode(preferredencoding())
779
780 assert isinstance(res, compat_str)
781 return res
782
783
784 def bytes_to_intlist(bs):
785 if not bs:
786 return []
787 if isinstance(bs[0], int): # Python 3
788 return list(bs)
789 else:
790 return [ord(c) for c in bs]
791
792
793 def intlist_to_bytes(xs):
794 if not xs:
795 return b''
796 if isinstance(chr(0), bytes): # Python 2
797 return ''.join([chr(x) for x in xs])
798 else:
799 return bytes(xs)