2 # -*- coding: utf-8 -*-
4 from __future__
import unicode_literals
32 import xml
.etree
.ElementTree
42 compat_socket_create_connection
,
46 compat_urllib_parse_urlparse
,
47 compat_urllib_request
,
53 # This is not clearly defined otherwise
54 compiled_regex_type
= type(re
.compile(''))
57 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
58 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
59 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
60 'Accept-Encoding': 'gzip, deflate',
61 'Accept-Language': 'en-us,en;q=0.5',
65 def preferredencoding():
66 """Get preferred encoding.
68 Returns the best encoding scheme for the system, based on
69 locale.getpreferredencoding() and some further tweaks.
72 pref
= locale
.getpreferredencoding()
80 def write_json_file(obj
, fn
):
81 """ Encode obj as JSON and write it to fn, atomically if possible """
83 fn
= encodeFilename(fn
)
84 if sys
.version_info
< (3, 0) and sys
.platform
!= 'win32':
85 encoding
= get_filesystem_encoding()
86 # os.path.basename returns a bytes object, but NamedTemporaryFile
87 # will fail if the filename contains non ascii characters unless we
88 # use a unicode object
89 path_basename
= lambda f
: os
.path
.basename(fn
).decode(encoding
)
90 # the same for os.path.dirname
91 path_dirname
= lambda f
: os
.path
.dirname(fn
).decode(encoding
)
93 path_basename
= os
.path
.basename
94 path_dirname
= os
.path
.dirname
98 'prefix': path_basename(fn
) + '.',
99 'dir': path_dirname(fn
),
103 # In Python 2.x, json.dump expects a bytestream.
104 # In Python 3.x, it writes to a character stream
105 if sys
.version_info
< (3, 0):
113 tf
= tempfile
.NamedTemporaryFile(**args
)
118 if sys
.platform
== 'win32':
119 # Need to remove existing file on Windows, else os.rename raises
120 # WindowsError or FileExistsError.
125 os
.rename(tf
.name
, fn
)
134 if sys
.version_info
>= (2, 7):
135 def find_xpath_attr(node
, xpath
, key
, val
):
136 """ Find the xpath xpath[@key=val] """
137 assert re
.match(r
'^[a-zA-Z-]+$', key
)
138 assert re
.match(r
'^[a-zA-Z0-9@\s:._-]*$', val
)
139 expr
= xpath
+ "[@%s='%s']" % (key
, val
)
140 return node
.find(expr
)
142 def find_xpath_attr(node
, xpath
, key
, val
):
143 # Here comes the crazy part: In 2.6, if the xpath is a unicode,
144 # .//node does not match if a node is a direct child of . !
145 if isinstance(xpath
, compat_str
):
146 xpath
= xpath
.encode('ascii')
148 for f
in node
.findall(xpath
):
149 if f
.attrib
.get(key
) == val
:
153 # On python2.6 the xml.etree.ElementTree.Element methods don't support
154 # the namespace parameter
157 def xpath_with_ns(path
, ns_map
):
158 components
= [c
.split(':') for c
in path
.split('/')]
162 replaced
.append(c
[0])
165 replaced
.append('{%s}%s' % (ns_map
[ns
], tag
))
166 return '/'.join(replaced
)
169 def xpath_text(node
, xpath
, name
=None, fatal
=False):
170 if sys
.version_info
< (2, 7): # Crazy 2.6
171 xpath
= xpath
.encode('ascii')
174 if n
is None or n
.text
is None:
176 name
= xpath
if name
is None else name
177 raise ExtractorError('Could not find XML element %s' % name
)
183 def get_element_by_id(id, html
):
184 """Return the content of the tag with the specified ID in the passed HTML document"""
185 return get_element_by_attribute("id", id, html
)
188 def get_element_by_attribute(attribute
, value
, html
):
189 """Return the content of the tag with the specified attribute in the passed HTML document"""
191 m
= re
.search(r
'''(?xs)
193 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
195 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
199 ''' % (re
.escape(attribute
), re
.escape(value
)), html
)
203 res
= m
.group('content')
205 if res
.startswith('"') or res
.startswith("'"):
208 return unescapeHTML(res
)
211 def clean_html(html
):
212 """Clean an HTML snippet into a readable string"""
214 if html
is None: # Convenience for sanitizing descriptions etc.
218 html
= html
.replace('\n', ' ')
219 html
= re
.sub(r
'\s*<\s*br\s*/?\s*>\s*', '\n', html
)
220 html
= re
.sub(r
'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html
)
222 html
= re
.sub('<.*?>', '', html
)
223 # Replace html entities
224 html
= unescapeHTML(html
)
228 def sanitize_open(filename
, open_mode
):
229 """Try to open the given filename, and slightly tweak it if this fails.
231 Attempts to open the given filename. If this fails, it tries to change
232 the filename slightly, step by step, until it's either able to open it
233 or it fails and raises a final exception, like the standard open()
236 It returns the tuple (stream, definitive_file_name).
240 if sys
.platform
== 'win32':
242 msvcrt
.setmode(sys
.stdout
.fileno(), os
.O_BINARY
)
243 return (sys
.stdout
.buffer if hasattr(sys
.stdout
, 'buffer') else sys
.stdout
, filename
)
244 stream
= open(encodeFilename(filename
), open_mode
)
245 return (stream
, filename
)
246 except (IOError, OSError) as err
:
247 if err
.errno
in (errno
.EACCES
,):
250 # In case of error, try to remove win32 forbidden chars
251 alt_filename
= os
.path
.join(
252 re
.sub('[/<>:"\\|\\\\?\\*]', '#', path_part
)
253 for path_part
in os
.path
.split(filename
)
255 if alt_filename
== filename
:
258 # An exception here should be caught in the caller
259 stream
= open(encodeFilename(filename
), open_mode
)
260 return (stream
, alt_filename
)
263 def timeconvert(timestr
):
264 """Convert RFC 2822 defined time string into system timestamp"""
266 timetuple
= email
.utils
.parsedate_tz(timestr
)
267 if timetuple
is not None:
268 timestamp
= email
.utils
.mktime_tz(timetuple
)
272 def sanitize_filename(s
, restricted
=False, is_id
=False):
273 """Sanitizes a string so it could be used as part of a filename.
274 If restricted is set, use a stricter subset of allowed characters.
275 Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
277 def replace_insane(char
):
278 if char
== '?' or ord(char
) < 32 or ord(char
) == 127:
281 return '' if restricted
else '\''
283 return '_-' if restricted
else ' -'
284 elif char
in '\\/|*<>':
286 if restricted
and (char
in '!&\'()[]{}$;`^,#' or char
.isspace()):
288 if restricted
and ord(char
) > 127:
293 s
= re
.sub(r
'[0-9]+(?::[0-9]+)+', lambda m
: m
.group(0).replace(':', '_'), s
)
294 result
= ''.join(map(replace_insane
, s
))
296 while '__' in result
:
297 result
= result
.replace('__', '_')
298 result
= result
.strip('_')
299 # Common case of "Foreign band name - English song title"
300 if restricted
and result
.startswith('-_'):
307 def orderedSet(iterable
):
308 """ Remove all duplicates from the input iterable """
316 def _htmlentity_transform(entity
):
317 """Transforms an HTML entity to a character."""
318 # Known non-numeric HTML entity
319 if entity
in compat_html_entities
.name2codepoint
:
320 return compat_chr(compat_html_entities
.name2codepoint
[entity
])
322 mobj
= re
.match(r
'#(x?[0-9]+)', entity
)
324 numstr
= mobj
.group(1)
325 if numstr
.startswith('x'):
327 numstr
= '0%s' % numstr
330 return compat_chr(int(numstr
, base
))
332 # Unknown entity in name, return its literal representation
333 return ('&%s;' % entity
)
339 assert type(s
) == compat_str
342 r
'&([^;]+);', lambda m
: _htmlentity_transform(m
.group(1)), s
)
345 def encodeFilename(s
, for_subprocess
=False):
347 @param s The name of the file
350 assert type(s
) == compat_str
352 # Python 3 has a Unicode API
353 if sys
.version_info
>= (3, 0):
356 if sys
.platform
== 'win32' and sys
.getwindowsversion()[0] >= 5:
357 # Pass '' directly to use Unicode APIs on Windows 2000 and up
358 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
359 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
360 if not for_subprocess
:
363 # For subprocess calls, encode with locale encoding
364 # Refer to http://stackoverflow.com/a/9951851/35070
365 encoding
= preferredencoding()
367 encoding
= sys
.getfilesystemencoding()
370 return s
.encode(encoding
, 'ignore')
373 def encodeArgument(s
):
374 if not isinstance(s
, compat_str
):
375 # Legacy code that uses byte strings
376 # Uncomment the following line after fixing all post processors
377 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
378 s
= s
.decode('ascii')
379 return encodeFilename(s
, True)
382 def decodeOption(optval
):
385 if isinstance(optval
, bytes):
386 optval
= optval
.decode(preferredencoding())
388 assert isinstance(optval
, compat_str
)
392 def formatSeconds(secs
):
394 return '%d:%02d:%02d' % (secs
// 3600, (secs
% 3600) // 60, secs
% 60)
396 return '%d:%02d' % (secs
// 60, secs
% 60)
401 def make_HTTPS_handler(params
, **kwargs
):
402 opts_no_check_certificate
= params
.get('nocheckcertificate', False)
403 if hasattr(ssl
, 'create_default_context'): # Python >= 3.4 or 2.7.9
404 context
= ssl
.create_default_context(ssl
.Purpose
.SERVER_AUTH
)
405 if opts_no_check_certificate
:
406 context
.check_hostname
= False
407 context
.verify_mode
= ssl
.CERT_NONE
409 return YoutubeDLHTTPSHandler(params
, context
=context
, **kwargs
)
412 # (create_default_context present but HTTPSHandler has no context=)
415 if sys
.version_info
< (3, 2):
416 return YoutubeDLHTTPSHandler(params
, **kwargs
)
418 context
= ssl
.SSLContext(ssl
.PROTOCOL_TLSv1
)
419 context
.verify_mode
= (ssl
.CERT_NONE
420 if opts_no_check_certificate
421 else ssl
.CERT_REQUIRED
)
422 context
.set_default_verify_paths()
423 return YoutubeDLHTTPSHandler(params
, context
=context
, **kwargs
)
426 class ExtractorError(Exception):
427 """Error during info extraction."""
429 def __init__(self
, msg
, tb
=None, expected
=False, cause
=None, video_id
=None):
430 """ tb, if given, is the original traceback (so that it can be printed out).
431 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
434 if sys
.exc_info()[0] in (compat_urllib_error
.URLError
, socket
.timeout
, UnavailableVideoError
):
436 if video_id
is not None:
437 msg
= video_id
+ ': ' + msg
439 msg
+= ' (caused by %r)' % cause
441 if ytdl_is_updateable():
442 update_cmd
= 'type youtube-dl -U to update'
444 update_cmd
= 'see https://yt-dl.org/update on how to update'
445 msg
+= '; please report this issue on https://yt-dl.org/bug .'
446 msg
+= ' Make sure you are using the latest version; %s.' % update_cmd
447 msg
+= ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
448 super(ExtractorError
, self
).__init
__(msg
)
451 self
.exc_info
= sys
.exc_info() # preserve original exception
453 self
.video_id
= video_id
455 def format_traceback(self
):
456 if self
.traceback
is None:
458 return ''.join(traceback
.format_tb(self
.traceback
))
461 class UnsupportedError(ExtractorError
):
462 def __init__(self
, url
):
463 super(UnsupportedError
, self
).__init
__(
464 'Unsupported URL: %s' % url
, expected
=True)
468 class RegexNotFoundError(ExtractorError
):
469 """Error when a regex didn't match"""
473 class DownloadError(Exception):
474 """Download Error exception.
476 This exception may be thrown by FileDownloader objects if they are not
477 configured to continue on errors. They will contain the appropriate
481 def __init__(self
, msg
, exc_info
=None):
482 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
483 super(DownloadError
, self
).__init
__(msg
)
484 self
.exc_info
= exc_info
487 class SameFileError(Exception):
488 """Same File exception.
490 This exception will be thrown by FileDownloader objects if they detect
491 multiple files would have to be downloaded to the same file on disk.
496 class PostProcessingError(Exception):
497 """Post Processing exception.
499 This exception may be raised by PostProcessor's .run() method to
500 indicate an error in the postprocessing task.
503 def __init__(self
, msg
):
507 class MaxDownloadsReached(Exception):
508 """ --max-downloads limit has been reached. """
512 class UnavailableVideoError(Exception):
513 """Unavailable Format exception.
515 This exception will be thrown when a video is requested
516 in a format that is not available for that video.
521 class ContentTooShortError(Exception):
522 """Content Too Short exception.
524 This exception may be raised by FileDownloader objects when a file they
525 download is too small for what the server announced first, indicating
526 the connection was probably interrupted.
532 def __init__(self
, downloaded
, expected
):
533 self
.downloaded
= downloaded
534 self
.expected
= expected
537 def _create_http_connection(ydl_handler
, http_class
, is_https
, *args
, **kwargs
):
538 hc
= http_class(*args
, **kwargs
)
539 source_address
= ydl_handler
._params
.get('source_address')
540 if source_address
is not None:
541 sa
= (source_address
, 0)
542 if hasattr(hc
, 'source_address'): # Python 2.7+
543 hc
.source_address
= sa
545 def _hc_connect(self
, *args
, **kwargs
):
546 sock
= compat_socket_create_connection(
547 (self
.host
, self
.port
), self
.timeout
, sa
)
549 self
.sock
= ssl
.wrap_socket(
550 sock
, self
.key_file
, self
.cert_file
,
551 ssl_version
=ssl
.PROTOCOL_TLSv1
)
554 hc
.connect
= functools
.partial(_hc_connect
, hc
)
559 class YoutubeDLHandler(compat_urllib_request
.HTTPHandler
):
560 """Handler for HTTP requests and responses.
562 This class, when installed with an OpenerDirector, automatically adds
563 the standard headers to every HTTP request and handles gzipped and
564 deflated responses from web servers. If compression is to be avoided in
565 a particular request, the original request in the program code only has
566 to include the HTTP header "Youtubedl-No-Compression", which will be
567 removed before making the real request.
569 Part of this code was copied from:
571 http://techknack.net/python-urllib2-handlers/
573 Andrew Rowls, the author of that code, agreed to release it to the
577 def __init__(self
, params
, *args
, **kwargs
):
578 compat_urllib_request
.HTTPHandler
.__init
__(self
, *args
, **kwargs
)
579 self
._params
= params
581 def http_open(self
, req
):
582 return self
.do_open(functools
.partial(
583 _create_http_connection
, self
, compat_http_client
.HTTPConnection
, False),
589 return zlib
.decompress(data
, -zlib
.MAX_WBITS
)
591 return zlib
.decompress(data
)
594 def addinfourl_wrapper(stream
, headers
, url
, code
):
595 if hasattr(compat_urllib_request
.addinfourl
, 'getcode'):
596 return compat_urllib_request
.addinfourl(stream
, headers
, url
, code
)
597 ret
= compat_urllib_request
.addinfourl(stream
, headers
, url
)
601 def http_request(self
, req
):
602 for h
, v
in std_headers
.items():
603 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
604 # The dict keys are capitalized because of this bug by urllib
605 if h
.capitalize() not in req
.headers
:
607 if 'Youtubedl-no-compression' in req
.headers
:
608 if 'Accept-encoding' in req
.headers
:
609 del req
.headers
['Accept-encoding']
610 del req
.headers
['Youtubedl-no-compression']
612 if sys
.version_info
< (2, 7) and '#' in req
.get_full_url():
613 # Python 2.6 is brain-dead when it comes to fragments
614 req
._Request
__original
= req
._Request
__original
.partition('#')[0]
615 req
._Request
__r
_type
= req
._Request
__r
_type
.partition('#')[0]
619 def http_response(self
, req
, resp
):
622 if resp
.headers
.get('Content-encoding', '') == 'gzip':
623 content
= resp
.read()
624 gz
= gzip
.GzipFile(fileobj
=io
.BytesIO(content
), mode
='rb')
626 uncompressed
= io
.BytesIO(gz
.read())
627 except IOError as original_ioerror
:
628 # There may be junk add the end of the file
629 # See http://stackoverflow.com/q/4928560/35070 for details
630 for i
in range(1, 1024):
632 gz
= gzip
.GzipFile(fileobj
=io
.BytesIO(content
[:-i
]), mode
='rb')
633 uncompressed
= io
.BytesIO(gz
.read())
638 raise original_ioerror
639 resp
= self
.addinfourl_wrapper(uncompressed
, old_resp
.headers
, old_resp
.url
, old_resp
.code
)
640 resp
.msg
= old_resp
.msg
642 if resp
.headers
.get('Content-encoding', '') == 'deflate':
643 gz
= io
.BytesIO(self
.deflate(resp
.read()))
644 resp
= self
.addinfourl_wrapper(gz
, old_resp
.headers
, old_resp
.url
, old_resp
.code
)
645 resp
.msg
= old_resp
.msg
648 https_request
= http_request
649 https_response
= http_response
652 class YoutubeDLHTTPSHandler(compat_urllib_request
.HTTPSHandler
):
653 def __init__(self
, params
, https_conn_class
=None, *args
, **kwargs
):
654 compat_urllib_request
.HTTPSHandler
.__init
__(self
, *args
, **kwargs
)
655 self
._https
_conn
_class
= https_conn_class
or compat_http_client
.HTTPSConnection
656 self
._params
= params
658 def https_open(self
, req
):
660 if hasattr(self
, '_context'): # python > 2.6
661 kwargs
['context'] = self
._context
662 if hasattr(self
, '_check_hostname'): # python 3.x
663 kwargs
['check_hostname'] = self
._check
_hostname
664 return self
.do_open(functools
.partial(
665 _create_http_connection
, self
, self
._https
_conn
_class
, True),
669 def parse_iso8601(date_str
, delimiter
='T'):
670 """ Return a UNIX timestamp from the given date """
676 r
'(\.[0-9]+)?(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
679 timezone
= datetime
.timedelta()
681 date_str
= date_str
[:-len(m
.group(0))]
682 if not m
.group('sign'):
683 timezone
= datetime
.timedelta()
685 sign
= 1 if m
.group('sign') == '+' else -1
686 timezone
= datetime
.timedelta(
687 hours
=sign
* int(m
.group('hours')),
688 minutes
=sign
* int(m
.group('minutes')))
689 date_format
= '%Y-%m-%d{0}%H:%M:%S'.format(delimiter
)
690 dt
= datetime
.datetime
.strptime(date_str
, date_format
) - timezone
691 return calendar
.timegm(dt
.timetuple())
694 def unified_strdate(date_str
, day_first
=True):
695 """Return a string with the date in the format YYYYMMDD"""
701 date_str
= date_str
.replace(',', ' ')
702 # %z (UTC offset) is only supported in python>=3.2
703 date_str
= re
.sub(r
' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str
)
704 # Remove AM/PM + timezone
705 date_str
= re
.sub(r
'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str
)
707 format_expressions
= [
712 '%b %dst %Y %I:%M%p',
713 '%b %dnd %Y %I:%M%p',
714 '%b %dth %Y %I:%M%p',
720 '%Y-%m-%d %H:%M:%S.%f',
723 '%Y-%m-%dT%H:%M:%SZ',
724 '%Y-%m-%dT%H:%M:%S.%fZ',
725 '%Y-%m-%dT%H:%M:%S.%f0Z',
727 '%Y-%m-%dT%H:%M:%S.%f',
731 format_expressions
.extend([
738 format_expressions
.extend([
744 for expression
in format_expressions
:
746 upload_date
= datetime
.datetime
.strptime(date_str
, expression
).strftime('%Y%m%d')
749 if upload_date
is None:
750 timetuple
= email
.utils
.parsedate_tz(date_str
)
752 upload_date
= datetime
.datetime(*timetuple
[:6]).strftime('%Y%m%d')
756 def determine_ext(url
, default_ext
='unknown_video'):
759 guess
= url
.partition('?')[0].rpartition('.')[2]
760 if re
.match(r
'^[A-Za-z0-9]+$', guess
):
766 def subtitles_filename(filename
, sub_lang
, sub_format
):
767 return filename
.rsplit('.', 1)[0] + '.' + sub_lang
+ '.' + sub_format
770 def date_from_str(date_str
):
772 Return a datetime object from a string in the format YYYYMMDD or
773 (now|today)[+-][0-9](day|week|month|year)(s)?"""
774 today
= datetime
.date
.today()
775 if date_str
in ('now', 'today'):
777 if date_str
== 'yesterday':
778 return today
- datetime
.timedelta(days
=1)
779 match
= re
.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str
)
780 if match
is not None:
781 sign
= match
.group('sign')
782 time
= int(match
.group('time'))
785 unit
= match
.group('unit')
786 # A bad aproximation?
794 delta
= datetime
.timedelta(**{unit: time}
)
796 return datetime
.datetime
.strptime(date_str
, "%Y%m%d").date()
799 def hyphenate_date(date_str
):
801 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
802 match
= re
.match(r
'^(\d\d\d\d)(\d\d)(\d\d)$', date_str
)
803 if match
is not None:
804 return '-'.join(match
.groups())
809 class DateRange(object):
810 """Represents a time interval between two dates"""
812 def __init__(self
, start
=None, end
=None):
813 """start and end must be strings in the format accepted by date"""
814 if start
is not None:
815 self
.start
= date_from_str(start
)
817 self
.start
= datetime
.datetime
.min.date()
819 self
.end
= date_from_str(end
)
821 self
.end
= datetime
.datetime
.max.date()
822 if self
.start
> self
.end
:
823 raise ValueError('Date range: "%s" , the start date must be before the end date' % self
)
827 """Returns a range that only contains the given day"""
830 def __contains__(self
, date
):
831 """Check if the date is in the range"""
832 if not isinstance(date
, datetime
.date
):
833 date
= date_from_str(date
)
834 return self
.start
<= date
<= self
.end
837 return '%s - %s' % (self
.start
.isoformat(), self
.end
.isoformat())
841 """ Returns the platform name as a compat_str """
842 res
= platform
.platform()
843 if isinstance(res
, bytes):
844 res
= res
.decode(preferredencoding())
846 assert isinstance(res
, compat_str
)
850 def _windows_write_string(s
, out
):
851 """ Returns True if the string was written using special methods,
852 False if it has yet to be written out."""
853 # Adapted from http://stackoverflow.com/a/3259271/35070
856 import ctypes
.wintypes
864 fileno
= out
.fileno()
865 except AttributeError:
866 # If the output stream doesn't have a fileno, it's virtual
868 except io
.UnsupportedOperation
:
869 # Some strange Windows pseudo files?
871 if fileno
not in WIN_OUTPUT_IDS
:
874 GetStdHandle
= ctypes
.WINFUNCTYPE(
875 ctypes
.wintypes
.HANDLE
, ctypes
.wintypes
.DWORD
)(
876 (b
"GetStdHandle", ctypes
.windll
.kernel32
))
877 h
= GetStdHandle(WIN_OUTPUT_IDS
[fileno
])
879 WriteConsoleW
= ctypes
.WINFUNCTYPE(
880 ctypes
.wintypes
.BOOL
, ctypes
.wintypes
.HANDLE
, ctypes
.wintypes
.LPWSTR
,
881 ctypes
.wintypes
.DWORD
, ctypes
.POINTER(ctypes
.wintypes
.DWORD
),
882 ctypes
.wintypes
.LPVOID
)((b
"WriteConsoleW", ctypes
.windll
.kernel32
))
883 written
= ctypes
.wintypes
.DWORD(0)
885 GetFileType
= ctypes
.WINFUNCTYPE(ctypes
.wintypes
.DWORD
, ctypes
.wintypes
.DWORD
)((b
"GetFileType", ctypes
.windll
.kernel32
))
886 FILE_TYPE_CHAR
= 0x0002
887 FILE_TYPE_REMOTE
= 0x8000
888 GetConsoleMode
= ctypes
.WINFUNCTYPE(
889 ctypes
.wintypes
.BOOL
, ctypes
.wintypes
.HANDLE
,
890 ctypes
.POINTER(ctypes
.wintypes
.DWORD
))(
891 (b
"GetConsoleMode", ctypes
.windll
.kernel32
))
892 INVALID_HANDLE_VALUE
= ctypes
.wintypes
.DWORD(-1).value
894 def not_a_console(handle
):
895 if handle
== INVALID_HANDLE_VALUE
or handle
is None:
897 return ((GetFileType(handle
) & ~FILE_TYPE_REMOTE
) != FILE_TYPE_CHAR
898 or GetConsoleMode(handle
, ctypes
.byref(ctypes
.wintypes
.DWORD())) == 0)
903 def next_nonbmp_pos(s
):
905 return next(i
for i
, c
in enumerate(s
) if ord(c
) > 0xffff)
906 except StopIteration:
910 count
= min(next_nonbmp_pos(s
), 1024)
913 h
, s
, count
if count
else 2, ctypes
.byref(written
), None)
915 raise OSError('Failed to write string')
916 if not count
: # We just wrote a non-BMP character
917 assert written
.value
== 2
920 assert written
.value
> 0
921 s
= s
[written
.value
:]
925 def write_string(s
, out
=None, encoding
=None):
928 assert type(s
) == compat_str
930 if sys
.platform
== 'win32' and encoding
is None and hasattr(out
, 'fileno'):
931 if _windows_write_string(s
, out
):
934 if ('b' in getattr(out
, 'mode', '') or
935 sys
.version_info
[0] < 3): # Python 2 lies about mode of sys.stderr
936 byt
= s
.encode(encoding
or preferredencoding(), 'ignore')
938 elif hasattr(out
, 'buffer'):
939 enc
= encoding
or getattr(out
, 'encoding', None) or preferredencoding()
940 byt
= s
.encode(enc
, 'ignore')
941 out
.buffer.write(byt
)
947 def bytes_to_intlist(bs
):
950 if isinstance(bs
[0], int): # Python 3
953 return [ord(c
) for c
in bs
]
956 def intlist_to_bytes(xs
):
959 return struct_pack('%dB' % len(xs
), *xs
)
962 # Cross-platform file locking
963 if sys
.platform
== 'win32':
964 import ctypes
.wintypes
967 class OVERLAPPED(ctypes
.Structure
):
969 ('Internal', ctypes
.wintypes
.LPVOID
),
970 ('InternalHigh', ctypes
.wintypes
.LPVOID
),
971 ('Offset', ctypes
.wintypes
.DWORD
),
972 ('OffsetHigh', ctypes
.wintypes
.DWORD
),
973 ('hEvent', ctypes
.wintypes
.HANDLE
),
976 kernel32
= ctypes
.windll
.kernel32
977 LockFileEx
= kernel32
.LockFileEx
978 LockFileEx
.argtypes
= [
979 ctypes
.wintypes
.HANDLE
, # hFile
980 ctypes
.wintypes
.DWORD
, # dwFlags
981 ctypes
.wintypes
.DWORD
, # dwReserved
982 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockLow
983 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockHigh
984 ctypes
.POINTER(OVERLAPPED
) # Overlapped
986 LockFileEx
.restype
= ctypes
.wintypes
.BOOL
987 UnlockFileEx
= kernel32
.UnlockFileEx
988 UnlockFileEx
.argtypes
= [
989 ctypes
.wintypes
.HANDLE
, # hFile
990 ctypes
.wintypes
.DWORD
, # dwReserved
991 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockLow
992 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockHigh
993 ctypes
.POINTER(OVERLAPPED
) # Overlapped
995 UnlockFileEx
.restype
= ctypes
.wintypes
.BOOL
996 whole_low
= 0xffffffff
997 whole_high
= 0x7fffffff
999 def _lock_file(f
, exclusive
):
1000 overlapped
= OVERLAPPED()
1001 overlapped
.Offset
= 0
1002 overlapped
.OffsetHigh
= 0
1003 overlapped
.hEvent
= 0
1004 f
._lock
_file
_overlapped
_p
= ctypes
.pointer(overlapped
)
1005 handle
= msvcrt
.get_osfhandle(f
.fileno())
1006 if not LockFileEx(handle
, 0x2 if exclusive
else 0x0, 0,
1007 whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
):
1008 raise OSError('Locking file failed: %r' % ctypes
.FormatError())
1010 def _unlock_file(f
):
1011 assert f
._lock
_file
_overlapped
_p
1012 handle
= msvcrt
.get_osfhandle(f
.fileno())
1013 if not UnlockFileEx(handle
, 0,
1014 whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
):
1015 raise OSError('Unlocking file failed: %r' % ctypes
.FormatError())
1020 def _lock_file(f
, exclusive
):
1021 fcntl
.flock(f
, fcntl
.LOCK_EX
if exclusive
else fcntl
.LOCK_SH
)
1023 def _unlock_file(f
):
1024 fcntl
.flock(f
, fcntl
.LOCK_UN
)
1027 class locked_file(object):
1028 def __init__(self
, filename
, mode
, encoding
=None):
1029 assert mode
in ['r', 'a', 'w']
1030 self
.f
= io
.open(filename
, mode
, encoding
=encoding
)
1033 def __enter__(self
):
1034 exclusive
= self
.mode
!= 'r'
1036 _lock_file(self
.f
, exclusive
)
1042 def __exit__(self
, etype
, value
, traceback
):
1044 _unlock_file(self
.f
)
1051 def write(self
, *args
):
1052 return self
.f
.write(*args
)
1054 def read(self
, *args
):
1055 return self
.f
.read(*args
)
1058 def get_filesystem_encoding():
1059 encoding
= sys
.getfilesystemencoding()
1060 return encoding
if encoding
is not None else 'utf-8'
1063 def shell_quote(args
):
1065 encoding
= get_filesystem_encoding()
1067 if isinstance(a
, bytes):
1068 # We may get a filename encoded with 'encodeFilename'
1069 a
= a
.decode(encoding
)
1070 quoted_args
.append(pipes
.quote(a
))
1071 return ' '.join(quoted_args
)
1074 def takewhile_inclusive(pred
, seq
):
1075 """ Like itertools.takewhile, but include the latest evaluated element
1076 (the first element so that Not pred(e)) """
1083 def smuggle_url(url
, data
):
1084 """ Pass additional data in a URL for internal use. """
1086 sdata
= compat_urllib_parse
.urlencode(
1087 {'__youtubedl_smuggle': json.dumps(data)}
)
1088 return url
+ '#' + sdata
1091 def unsmuggle_url(smug_url
, default
=None):
1092 if '#__youtubedl_smuggle' not in smug_url
:
1093 return smug_url
, default
1094 url
, _
, sdata
= smug_url
.rpartition('#')
1095 jsond
= compat_parse_qs(sdata
)['__youtubedl_smuggle'][0]
1096 data
= json
.loads(jsond
)
1100 def format_bytes(bytes):
1103 if type(bytes) is str:
1104 bytes = float(bytes)
1108 exponent
= int(math
.log(bytes, 1024.0))
1109 suffix
= ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent
]
1110 converted
= float(bytes) / float(1024 ** exponent
)
1111 return '%.2f%s' % (converted
, suffix
)
1114 def parse_filesize(s
):
1118 # The lower-case forms are of course incorrect and inofficial,
1119 # but we support those too
1157 units_re
= '|'.join(re
.escape(u
) for u
in _UNIT_TABLE
)
1159 r
'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)' % units_re
, s
)
1163 num_str
= m
.group('num').replace(',', '.')
1164 mult
= _UNIT_TABLE
[m
.group('unit')]
1165 return int(float(num_str
) * mult
)
1168 def get_term_width():
1169 columns
= compat_getenv('COLUMNS', None)
1174 sp
= subprocess
.Popen(
1176 stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
)
1177 out
, err
= sp
.communicate()
1178 return int(out
.split()[1])
1184 def month_by_name(name
):
1185 """ Return the number of a month by (locale-independently) English name """
1188 'January', 'February', 'March', 'April', 'May', 'June',
1189 'July', 'August', 'September', 'October', 'November', 'December']
1191 return ENGLISH_NAMES
.index(name
) + 1
1196 def fix_xml_ampersands(xml_str
):
1197 """Replace all the '&' by '&' in XML"""
1199 r
'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
1204 def setproctitle(title
):
1205 assert isinstance(title
, compat_str
)
1207 libc
= ctypes
.cdll
.LoadLibrary("libc.so.6")
1210 title_bytes
= title
.encode('utf-8')
1211 buf
= ctypes
.create_string_buffer(len(title_bytes
))
1212 buf
.value
= title_bytes
1214 libc
.prctl(15, buf
, 0, 0, 0)
1215 except AttributeError:
1216 return # Strange libc, just skip this
1219 def remove_start(s
, start
):
1220 if s
.startswith(start
):
1221 return s
[len(start
):]
1225 def remove_end(s
, end
):
1227 return s
[:-len(end
)]
1231 def url_basename(url
):
1232 path
= compat_urlparse
.urlparse(url
).path
1233 return path
.strip('/').split('/')[-1]
1236 class HEADRequest(compat_urllib_request
.Request
):
1237 def get_method(self
):
1241 def int_or_none(v
, scale
=1, default
=None, get_attr
=None, invscale
=1):
1244 v
= getattr(v
, get_attr
, None)
1247 return default
if v
is None else (int(v
) * invscale
// scale
)
1250 def str_or_none(v
, default
=None):
1251 return default
if v
is None else compat_str(v
)
1254 def str_to_int(int_str
):
1255 """ A more relaxed version of int_or_none """
1258 int_str
= re
.sub(r
'[,\.\+]', '', int_str
)
1262 def float_or_none(v
, scale
=1, invscale
=1, default
=None):
1263 return default
if v
is None else (float(v
) * invscale
/ scale
)
1266 def parse_duration(s
):
1267 if not isinstance(s
, compat_basestring
):
1275 (?P<only_mins>[0-9.]+)\s*(?:mins?|minutes?)\s*|
1276 (?P<only_hours>[0-9.]+)\s*(?:hours?)|
1280 (?:(?P<days>[0-9]+)\s*(?:[:d]|days?)\s*)?
1281 (?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*
1283 (?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*
1285 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?
1290 if m
.group('only_mins'):
1291 return float_or_none(m
.group('only_mins'), invscale
=60)
1292 if m
.group('only_hours'):
1293 return float_or_none(m
.group('only_hours'), invscale
=60 * 60)
1295 res
+= int(m
.group('secs'))
1297 res
+= int(m
.group('mins')) * 60
1298 if m
.group('hours'):
1299 res
+= int(m
.group('hours')) * 60 * 60
1301 res
+= int(m
.group('days')) * 24 * 60 * 60
1303 res
+= float(m
.group('ms'))
1307 def prepend_extension(filename
, ext
):
1308 name
, real_ext
= os
.path
.splitext(filename
)
1309 return '{0}.{1}{2}'.format(name
, ext
, real_ext
)
1312 def check_executable(exe
, args
=[]):
1313 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
1314 args can be a list of arguments for a short output (like -version) """
1316 subprocess
.Popen([exe
] + args
, stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
).communicate()
1322 def get_exe_version(exe
, args
=['--version'],
1323 version_re
=None, unrecognized
='present'):
1324 """ Returns the version of the specified executable,
1325 or False if the executable is not present """
1327 out
, _
= subprocess
.Popen(
1329 stdout
=subprocess
.PIPE
, stderr
=subprocess
.STDOUT
).communicate()
1332 if isinstance(out
, bytes): # Python 2.x
1333 out
= out
.decode('ascii', 'ignore')
1334 return detect_exe_version(out
, version_re
, unrecognized
)
1337 def detect_exe_version(output
, version_re
=None, unrecognized
='present'):
1338 assert isinstance(output
, compat_str
)
1339 if version_re
is None:
1340 version_re
= r
'version\s+([-0-9._a-zA-Z]+)'
1341 m
= re
.search(version_re
, output
)
1348 class PagedList(object):
1350 # This is only useful for tests
1351 return len(self
.getslice())
1354 class OnDemandPagedList(PagedList
):
1355 def __init__(self
, pagefunc
, pagesize
):
1356 self
._pagefunc
= pagefunc
1357 self
._pagesize
= pagesize
1359 def getslice(self
, start
=0, end
=None):
1361 for pagenum
in itertools
.count(start
// self
._pagesize
):
1362 firstid
= pagenum
* self
._pagesize
1363 nextfirstid
= pagenum
* self
._pagesize
+ self
._pagesize
1364 if start
>= nextfirstid
:
1367 page_results
= list(self
._pagefunc
(pagenum
))
1370 start
% self
._pagesize
1371 if firstid
<= start
< nextfirstid
1375 ((end
- 1) % self
._pagesize
) + 1
1376 if (end
is not None and firstid
<= end
<= nextfirstid
)
1379 if startv
!= 0 or endv
is not None:
1380 page_results
= page_results
[startv
:endv
]
1381 res
.extend(page_results
)
1383 # A little optimization - if current page is not "full", ie. does
1384 # not contain page_size videos then we can assume that this page
1385 # is the last one - there are no more ids on further pages -
1386 # i.e. no need to query again.
1387 if len(page_results
) + startv
< self
._pagesize
:
1390 # If we got the whole page, but the next page is not interesting,
1391 # break out early as well
1392 if end
== nextfirstid
:
1397 class InAdvancePagedList(PagedList
):
1398 def __init__(self
, pagefunc
, pagecount
, pagesize
):
1399 self
._pagefunc
= pagefunc
1400 self
._pagecount
= pagecount
1401 self
._pagesize
= pagesize
1403 def getslice(self
, start
=0, end
=None):
1405 start_page
= start
// self
._pagesize
1407 self
._pagecount
if end
is None else (end
// self
._pagesize
+ 1))
1408 skip_elems
= start
- start_page
* self
._pagesize
1409 only_more
= None if end
is None else end
- start
1410 for pagenum
in range(start_page
, end_page
):
1411 page
= list(self
._pagefunc
(pagenum
))
1413 page
= page
[skip_elems
:]
1415 if only_more
is not None:
1416 if len(page
) < only_more
:
1417 only_more
-= len(page
)
1419 page
= page
[:only_more
]
1426 def uppercase_escape(s
):
1427 unicode_escape
= codecs
.getdecoder('unicode_escape')
1429 r
'\\U[0-9a-fA-F]{8}',
1430 lambda m
: unicode_escape(m
.group(0))[0],
1434 def escape_rfc3986(s
):
1435 """Escape non-ASCII characters as suggested by RFC 3986"""
1436 if sys
.version_info
< (3, 0) and isinstance(s
, compat_str
):
1437 s
= s
.encode('utf-8')
1438 return compat_urllib_parse
.quote(s
, b
"%/;:@&=+$,!~*'()?#[]")
1441 def escape_url(url
):
1442 """Escape URL as suggested by RFC 3986"""
1443 url_parsed
= compat_urllib_parse_urlparse(url
)
1444 return url_parsed
._replace
(
1445 path
=escape_rfc3986(url_parsed
.path
),
1446 params
=escape_rfc3986(url_parsed
.params
),
1447 query
=escape_rfc3986(url_parsed
.query
),
1448 fragment
=escape_rfc3986(url_parsed
.fragment
)
1452 struct
.pack('!I', 0)
1454 # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument
1455 def struct_pack(spec
, *args
):
1456 if isinstance(spec
, compat_str
):
1457 spec
= spec
.encode('ascii')
1458 return struct
.pack(spec
, *args
)
1460 def struct_unpack(spec
, *args
):
1461 if isinstance(spec
, compat_str
):
1462 spec
= spec
.encode('ascii')
1463 return struct
.unpack(spec
, *args
)
1465 struct_pack
= struct
.pack
1466 struct_unpack
= struct
.unpack
1469 def read_batch_urls(batch_fd
):
1471 if not isinstance(url
, compat_str
):
1472 url
= url
.decode('utf-8', 'replace')
1473 BOM_UTF8
= '\xef\xbb\xbf'
1474 if url
.startswith(BOM_UTF8
):
1475 url
= url
[len(BOM_UTF8
):]
1477 if url
.startswith(('#', ';', ']')):
1481 with contextlib
.closing(batch_fd
) as fd
:
1482 return [url
for url
in map(fixup
, fd
) if url
]
1485 def urlencode_postdata(*args
, **kargs
):
1486 return compat_urllib_parse
.urlencode(*args
, **kargs
).encode('ascii')
1490 etree_iter
= xml
.etree
.ElementTree
.Element
.iter
1491 except AttributeError: # Python <=2.6
1492 etree_iter
= lambda n
: n
.findall('.//*')
1496 class TreeBuilder(xml
.etree
.ElementTree
.TreeBuilder
):
1497 def doctype(self
, name
, pubid
, system
):
1498 pass # Ignore doctypes
1500 parser
= xml
.etree
.ElementTree
.XMLParser(target
=TreeBuilder())
1501 kwargs
= {'parser': parser}
if sys
.version_info
>= (2, 7) else {}
1502 tree
= xml
.etree
.ElementTree
.XML(s
.encode('utf-8'), **kwargs
)
1503 # Fix up XML parser in Python 2.x
1504 if sys
.version_info
< (3, 0):
1505 for n
in etree_iter(tree
):
1506 if n
.text
is not None:
1507 if not isinstance(n
.text
, compat_str
):
1508 n
.text
= n
.text
.decode('utf-8')
1521 def parse_age_limit(s
):
1524 m
= re
.match(r
'^(?P<age>\d{1,2})\+?$', s
)
1525 return int(m
.group('age')) if m
else US_RATINGS
.get(s
, None)
1528 def strip_jsonp(code
):
1530 r
'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r
'\1', code
)
1533 def js_to_json(code
):
1536 if v
in ('true', 'false', 'null'):
1538 if v
.startswith('"'):
1540 if v
.startswith("'"):
1542 v
= re
.sub(r
"\\\\|\\'|\"", lambda m: {
1549 res = re.sub(r'''(?x)
1550 "(?
:[^
"\\]*(?:\\\\|\\")?
)*"|
1551 '(?:[^'\\]*(?:\\\\|\\')?)*'|
1552 [a-zA-Z_][.a-zA-Z_0-9]*
1554 res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
1558 def qualities(quality_ids):
1559 """ Get a numeric quality value out of a list of possible values """
1562 return quality_ids.index(qid)
1568 DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
1571 def limit_length(s, length):
1572 """ Add ellipses to overly long strings """
1577 return s[:length - len(ELLIPSES)] + ELLIPSES
1581 def version_tuple(v):
1582 return tuple(int(e) for e in re.split(r'[-.]', v))
1585 def is_outdated_version(version, limit, assume_new=True):
1587 return not assume_new
1589 return version_tuple(version) < version_tuple(limit)
1591 return not assume_new
1594 def ytdl_is_updateable():
1595 """ Returns if youtube-dl can be updated with -U """
1596 from zipimport import zipimporter
1598 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
1601 def args_to_str(args):
1602 # Get a short string representation for a subprocess command
1603 return ' '.join(shlex_quote(a) for a in args)
1606 def urlhandle_detect_ext(url_handle):
1609 getheader = lambda h: url_handle.headers[h]
1610 except AttributeError: # Python < 3
1611 getheader = url_handle.info().getheader
1613 cd = getheader('Content-Disposition')
1615 m = re.match(r'attachment;\s*filename="(?P
<filename
>[^
"]+)"', cd)
1617 e = determine_ext(m.group('filename
'), default_ext=None)
1621 return getheader('Content
-Type
').split("/")[1]
1624 def age_restricted(content_limit, age_limit):
1625 """ Returns True iff the content should be blocked """
1627 if age_limit is None: # No limit set
1629 if content_limit is None:
1630 return False # Content available for everyone
1631 return age_limit < content_limit
1634 def is_html(first_bytes):
1635 """ Detect whether a file contains HTML by examining its first bytes. """
1638 (b'\xef\xbb\xbf', 'utf
-8'),
1639 (b'\x00\x00\xfe\xff', 'utf
-32-be
'),
1640 (b'\xff\xfe\x00\x00', 'utf
-32-le
'),
1641 (b'\xff\xfe', 'utf
-16-le
'),
1642 (b'\xfe\xff', 'utf
-16-be
'),
1644 for bom, enc in BOMS:
1645 if first_bytes.startswith(bom):
1646 s = first_bytes[len(bom):].decode(enc, 'replace
')
1649 s = first_bytes.decode('utf
-8', 'replace
')
1651 return re.match(r'^\s
*<', s)
1654 def determine_protocol(info_dict):
1655 protocol = info_dict.get('protocol
')
1656 if protocol is not None:
1659 url = info_dict['url
']
1660 if url.startswith('rtmp
'):
1662 elif url.startswith('mms
'):
1664 elif url.startswith('rtsp
'):
1667 ext = determine_ext(url)
1673 return compat_urllib_parse_urlparse(url).scheme
1676 def render_table(header_row, data):
1677 """ Render a list of rows, each as a list of values """
1678 table = [header_row] + data
1679 max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
1680 format_str = ' '.join('%-' + compat_str(ml + 1) + 's
' for ml in max_lens[:-1]) + '%s'
1681 return '\n'.join(format_str % tuple(row) for row in table)
1684 def _match_one(filter_part, dct):
1685 COMPARISON_OPERATORS = {
1693 operator_rex = re.compile(r'''(?x)\s*
1695 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1697 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
1698 (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
1701 ''' % '|
'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
1702 m = operator_rex.search(filter_part)
1704 op = COMPARISON_OPERATORS[m.group('op
')]
1705 if m.group('strval
') is not None:
1706 if m.group('op
') not in ('=', '!='):
1708 'Operator
%s does
not support string values
!' % m.group('op
'))
1709 comparison_value = m.group('strval
')
1712 comparison_value = int(m.group('intval
'))
1714 comparison_value = parse_filesize(m.group('intval
'))
1715 if comparison_value is None:
1716 comparison_value = parse_filesize(m.group('intval
') + 'B
')
1717 if comparison_value is None:
1719 'Invalid integer value
%r in filter part
%r' % (
1720 m.group('intval
'), filter_part))
1721 actual_value = dct.get(m.group('key
'))
1722 if actual_value is None:
1723 return m.group('none_inclusive
')
1724 return op(actual_value, comparison_value)
1727 '': lambda v: v is not None,
1728 '!': lambda v: v is None,
1730 operator_rex = re.compile(r'''(?x)\s*
1731 (?P<op>%s)\s*(?P<key>[a-z_]+)
1733 ''' % '|
'.join(map(re.escape, UNARY_OPERATORS.keys())))
1734 m = operator_rex.search(filter_part)
1736 op = UNARY_OPERATORS[m.group('op
')]
1737 actual_value = dct.get(m.group('key
'))
1738 return op(actual_value)
1740 raise ValueError('Invalid
filter part
%r' % filter_part)
1743 def match_str(filter_str, dct):
1744 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
1747 _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
1750 def match_filter_func(filter_str):
1751 def _match_func(info_dict):
1752 if match_str(filter_str, info_dict):
1755 video_title = info_dict.get('title
', info_dict.get('id', 'video
'))
1756 return '%s does
not pass filter %s, skipping
..' % (video_title, filter_str)