]> jfr.im git - yt-dlp.git/blame - youtube_dl/utils.py
Ignore BOM in batch files (Fixes #2450)
[yt-dlp.git] / youtube_dl / utils.py
CommitLineData
d77c3dfd
FV
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
62e609ab 4import contextlib
e3946f98 5import ctypes
c496ca96
PH
6import datetime
7import email.utils
f45c185f 8import errno
d77c3dfd 9import gzip
b7ab0590 10import itertools
03f9daab 11import io
f4bfd65f 12import json
d77c3dfd 13import locale
02dbf93f 14import math
d77c3dfd 15import os
4eb7f1d1 16import pipes
c496ca96 17import platform
d77c3dfd 18import re
13ebea79 19import ssl
c496ca96 20import socket
b53466e1 21import struct
1c088fa8 22import subprocess
d77c3dfd 23import sys
01951dda 24import traceback
d77c3dfd 25import zlib
d77c3dfd 26
01ba00ca 27try:
59ae15a5 28 import urllib.request as compat_urllib_request
01ba00ca 29except ImportError: # Python 2
59ae15a5 30 import urllib2 as compat_urllib_request
01ba00ca
PH
31
32try:
59ae15a5 33 import urllib.error as compat_urllib_error
01ba00ca 34except ImportError: # Python 2
59ae15a5 35 import urllib2 as compat_urllib_error
01ba00ca
PH
36
37try:
59ae15a5 38 import urllib.parse as compat_urllib_parse
01ba00ca 39except ImportError: # Python 2
59ae15a5 40 import urllib as compat_urllib_parse
01ba00ca 41
799c0763
PH
42try:
43 from urllib.parse import urlparse as compat_urllib_parse_urlparse
44except ImportError: # Python 2
45 from urlparse import urlparse as compat_urllib_parse_urlparse
46
6543f0dc
JMF
47try:
48 import urllib.parse as compat_urlparse
49except ImportError: # Python 2
50 import urlparse as compat_urlparse
51
01ba00ca 52try:
59ae15a5 53 import http.cookiejar as compat_cookiejar
01ba00ca 54except ImportError: # Python 2
59ae15a5 55 import cookielib as compat_cookiejar
01ba00ca 56
3e669f36 57try:
59ae15a5 58 import html.entities as compat_html_entities
9f37a959 59except ImportError: # Python 2
59ae15a5 60 import htmlentitydefs as compat_html_entities
3e669f36 61
a8156c1d 62try:
59ae15a5 63 import html.parser as compat_html_parser
9f37a959 64except ImportError: # Python 2
59ae15a5 65 import HTMLParser as compat_html_parser
a8156c1d 66
348d0a7a 67try:
59ae15a5 68 import http.client as compat_http_client
9f37a959 69except ImportError: # Python 2
59ae15a5 70 import httplib as compat_http_client
348d0a7a 71
2eabb802 72try:
0e283428 73 from urllib.error import HTTPError as compat_HTTPError
2eabb802
PH
74except ImportError: # Python 2
75 from urllib2 import HTTPError as compat_HTTPError
76
e0df6211
PH
77try:
78 from urllib.request import urlretrieve as compat_urlretrieve
79except ImportError: # Python 2
80 from urllib import urlretrieve as compat_urlretrieve
81
82
5910e210
PH
83try:
84 from subprocess import DEVNULL
85 compat_subprocess_get_DEVNULL = lambda: DEVNULL
86except ImportError:
87 compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
88
9f37a959 89try:
59ae15a5 90 from urllib.parse import parse_qs as compat_parse_qs
9f37a959 91except ImportError: # Python 2
59ae15a5
PH
92 # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
93 # Python 2's version is apparently totally broken
94 def _unquote(string, encoding='utf-8', errors='replace'):
95 if string == '':
96 return string
97 res = string.split('%')
98 if len(res) == 1:
99 return string
100 if encoding is None:
101 encoding = 'utf-8'
102 if errors is None:
103 errors = 'replace'
104 # pct_sequence: contiguous sequence of percent-encoded bytes, decoded
105 pct_sequence = b''
106 string = res[0]
107 for item in res[1:]:
108 try:
109 if not item:
110 raise ValueError
111 pct_sequence += item[:2].decode('hex')
112 rest = item[2:]
113 if not rest:
114 # This segment was just a single percent-encoded character.
115 # May be part of a sequence of code units, so delay decoding.
116 # (Stored in pct_sequence).
117 continue
118 except ValueError:
119 rest = '%' + item
120 # Encountered non-percent-encoded characters. Flush the current
121 # pct_sequence.
122 string += pct_sequence.decode(encoding, errors) + rest
123 pct_sequence = b''
124 if pct_sequence:
125 # Flush the final pct_sequence
126 string += pct_sequence.decode(encoding, errors)
127 return string
128
129 def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
130 encoding='utf-8', errors='replace'):
131 qs, _coerce_result = qs, unicode
132 pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
133 r = []
134 for name_value in pairs:
135 if not name_value and not strict_parsing:
136 continue
137 nv = name_value.split('=', 1)
138 if len(nv) != 2:
139 if strict_parsing:
140 raise ValueError("bad query field: %r" % (name_value,))
141 # Handle case of a control-name with no equal sign
142 if keep_blank_values:
143 nv.append('')
144 else:
145 continue
146 if len(nv[1]) or keep_blank_values:
147 name = nv[0].replace('+', ' ')
148 name = _unquote(name, encoding=encoding, errors=errors)
149 name = _coerce_result(name)
150 value = nv[1].replace('+', ' ')
151 value = _unquote(value, encoding=encoding, errors=errors)
152 value = _coerce_result(value)
153 r.append((name, value))
154 return r
155
156 def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
157 encoding='utf-8', errors='replace'):
158 parsed_result = {}
159 pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
160 encoding=encoding, errors=errors)
161 for name, value in pairs:
162 if name in parsed_result:
163 parsed_result[name].append(value)
164 else:
165 parsed_result[name] = [value]
166 return parsed_result
348d0a7a 167
3e669f36 168try:
59ae15a5 169 compat_str = unicode # Python 2
3e669f36 170except NameError:
59ae15a5 171 compat_str = str
3e669f36
PH
172
173try:
59ae15a5 174 compat_chr = unichr # Python 2
3e669f36 175except NameError:
59ae15a5 176 compat_chr = chr
3e669f36 177
f7300c5c
JMF
178try:
179 from xml.etree.ElementTree import ParseError as compat_xml_parse_error
180except ImportError: # Python 2.6
181 from xml.parsers.expat import ExpatError as compat_xml_parse_error
182
b31756c1
FV
183def compat_ord(c):
184 if type(c) is int: return c
185 else: return ord(c)
186
468e2e92
FV
187# This is not clearly defined otherwise
188compiled_regex_type = type(re.compile(''))
189
3e669f36 190std_headers = {
ae8f7871 191 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
59ae15a5
PH
192 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
193 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
194 'Accept-Encoding': 'gzip, deflate',
195 'Accept-Language': 'en-us,en;q=0.5',
3e669f36 196}
f427df17 197
d77c3dfd 198def preferredencoding():
59ae15a5 199 """Get preferred encoding.
d77c3dfd 200
59ae15a5
PH
201 Returns the best encoding scheme for the system, based on
202 locale.getpreferredencoding() and some further tweaks.
203 """
204 try:
205 pref = locale.getpreferredencoding()
206 u'TEST'.encode(pref)
207 except:
208 pref = 'UTF-8'
bae611f2 209
59ae15a5 210 return pref
d77c3dfd 211
8cd10ac4 212if sys.version_info < (3,0):
59ae15a5
PH
213 def compat_print(s):
214 print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
8cd10ac4 215else:
59ae15a5
PH
216 def compat_print(s):
217 assert type(s) == type(u'')
218 print(s)
d77c3dfd 219
f4bfd65f
PH
220# In Python 2.x, json.dump expects a bytestream.
221# In Python 3.x, it writes to a character stream
222if sys.version_info < (3,0):
223 def write_json_file(obj, fn):
224 with open(fn, 'wb') as f:
225 json.dump(obj, f)
226else:
227 def write_json_file(obj, fn):
228 with open(fn, 'w', encoding='utf-8') as f:
229 json.dump(obj, f)
230
59ae56fa
PH
231if sys.version_info >= (2,7):
232 def find_xpath_attr(node, xpath, key, val):
233 """ Find the xpath xpath[@key=val] """
5de3ece2 234 assert re.match(r'^[a-zA-Z]+$', key)
af1588c0 235 assert re.match(r'^[a-zA-Z0-9@\s:._]*$', val)
59ae56fa
PH
236 expr = xpath + u"[@%s='%s']" % (key, val)
237 return node.find(expr)
238else:
239 def find_xpath_attr(node, xpath, key, val):
240 for f in node.findall(xpath):
241 if f.attrib.get(key) == val:
242 return f
243 return None
244
d7e66d39
JMF
245# On python2.6 the xml.etree.ElementTree.Element methods don't support
246# the namespace parameter
247def xpath_with_ns(path, ns_map):
248 components = [c.split(':') for c in path.split('/')]
249 replaced = []
250 for c in components:
251 if len(c) == 1:
252 replaced.append(c[0])
253 else:
254 ns, tag = c
255 replaced.append('{%s}%s' % (ns_map[ns], tag))
256 return '/'.join(replaced)
257
d77c3dfd 258def htmlentity_transform(matchobj):
59ae15a5
PH
259 """Transforms an HTML entity to a character.
260
261 This function receives a match object and is intended to be used with
262 the re.sub() function.
263 """
264 entity = matchobj.group(1)
265
266 # Known non-numeric HTML entity
267 if entity in compat_html_entities.name2codepoint:
268 return compat_chr(compat_html_entities.name2codepoint[entity])
269
270 mobj = re.match(u'(?u)#(x?\\d+)', entity)
271 if mobj is not None:
272 numstr = mobj.group(1)
273 if numstr.startswith(u'x'):
274 base = 16
275 numstr = u'0%s' % numstr
276 else:
277 base = 10
278 return compat_chr(int(numstr, base))
279
280 # Unknown entity in name, return its literal representation
281 return (u'&%s;' % entity)
d77c3dfd 282
a8156c1d 283compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
a921f407
JMF
284class BaseHTMLParser(compat_html_parser.HTMLParser):
285 def __init(self):
286 compat_html_parser.HTMLParser.__init__(self)
287 self.html = None
288
289 def loads(self, html):
290 self.html = html
291 self.feed(html)
292 self.close()
293
294class AttrParser(BaseHTMLParser):
43e8fafd
ND
295 """Modified HTMLParser that isolates a tag with the specified attribute"""
296 def __init__(self, attribute, value):
297 self.attribute = attribute
298 self.value = value
59ae15a5
PH
299 self.result = None
300 self.started = False
301 self.depth = {}
59ae15a5
PH
302 self.watch_startpos = False
303 self.error_count = 0
a921f407 304 BaseHTMLParser.__init__(self)
59ae15a5
PH
305
306 def error(self, message):
307 if self.error_count > 10 or self.started:
308 raise compat_html_parser.HTMLParseError(message, self.getpos())
309 self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line
310 self.error_count += 1
311 self.goahead(1)
312
59ae15a5
PH
313 def handle_starttag(self, tag, attrs):
314 attrs = dict(attrs)
315 if self.started:
316 self.find_startpos(None)
43e8fafd 317 if self.attribute in attrs and attrs[self.attribute] == self.value:
59ae15a5
PH
318 self.result = [tag]
319 self.started = True
320 self.watch_startpos = True
321 if self.started:
322 if not tag in self.depth: self.depth[tag] = 0
323 self.depth[tag] += 1
324
325 def handle_endtag(self, tag):
326 if self.started:
327 if tag in self.depth: self.depth[tag] -= 1
328 if self.depth[self.result[0]] == 0:
329 self.started = False
330 self.result.append(self.getpos())
331
332 def find_startpos(self, x):
333 """Needed to put the start position of the result (self.result[1])
334 after the opening tag with the requested id"""
335 if self.watch_startpos:
336 self.watch_startpos = False
337 self.result.append(self.getpos())
338 handle_entityref = handle_charref = handle_data = handle_comment = \
339 handle_decl = handle_pi = unknown_decl = find_startpos
340
341 def get_result(self):
342 if self.result is None:
343 return None
344 if len(self.result) != 3:
345 return None
346 lines = self.html.split('\n')
347 lines = lines[self.result[1][0]-1:self.result[2][0]]
348 lines[0] = lines[0][self.result[1][1]:]
349 if len(lines) == 1:
350 lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]]
351 lines[-1] = lines[-1][:self.result[2][1]]
352 return '\n'.join(lines).strip()
3b024e17
PH
353# Hack for https://github.com/rg3/youtube-dl/issues/662
354if sys.version_info < (2, 7, 3):
355 AttrParser.parse_endtag = (lambda self, i:
356 i + len("</scr'+'ipt>")
357 if self.rawdata[i:].startswith("</scr'+'ipt>")
358 else compat_html_parser.HTMLParser.parse_endtag(self, i))
9e6dd238
FV
359
360def get_element_by_id(id, html):
43e8fafd
ND
361 """Return the content of the tag with the specified ID in the passed HTML document"""
362 return get_element_by_attribute("id", id, html)
363
364def get_element_by_attribute(attribute, value, html):
365 """Return the content of the tag with the specified attribute in the passed HTML document"""
366 parser = AttrParser(attribute, value)
59ae15a5
PH
367 try:
368 parser.loads(html)
369 except compat_html_parser.HTMLParseError:
370 pass
371 return parser.get_result()
9e6dd238 372
a921f407
JMF
373class MetaParser(BaseHTMLParser):
374 """
375 Modified HTMLParser that isolates a meta tag with the specified name
376 attribute.
377 """
378 def __init__(self, name):
379 BaseHTMLParser.__init__(self)
380 self.name = name
381 self.content = None
382 self.result = None
383
384 def handle_starttag(self, tag, attrs):
385 if tag != 'meta':
386 return
387 attrs = dict(attrs)
388 if attrs.get('name') == self.name:
389 self.result = attrs.get('content')
390
391 def get_result(self):
392 return self.result
393
394def get_meta_content(name, html):
395 """
396 Return the content attribute from the meta tag with the given name attribute.
397 """
398 parser = MetaParser(name)
399 try:
400 parser.loads(html)
401 except compat_html_parser.HTMLParseError:
402 pass
403 return parser.get_result()
404
9e6dd238
FV
405
406def clean_html(html):
59ae15a5
PH
407 """Clean an HTML snippet into a readable string"""
408 # Newline vs <br />
409 html = html.replace('\n', ' ')
6b3aef80
FV
410 html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
411 html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
59ae15a5
PH
412 # Strip html tags
413 html = re.sub('<.*?>', '', html)
414 # Replace html entities
415 html = unescapeHTML(html)
7decf895 416 return html.strip()
9e6dd238
FV
417
418
d77c3dfd 419def sanitize_open(filename, open_mode):
59ae15a5
PH
420 """Try to open the given filename, and slightly tweak it if this fails.
421
422 Attempts to open the given filename. If this fails, it tries to change
423 the filename slightly, step by step, until it's either able to open it
424 or it fails and raises a final exception, like the standard open()
425 function.
426
427 It returns the tuple (stream, definitive_file_name).
428 """
429 try:
430 if filename == u'-':
431 if sys.platform == 'win32':
432 import msvcrt
433 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
898280a0 434 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
59ae15a5
PH
435 stream = open(encodeFilename(filename), open_mode)
436 return (stream, filename)
437 except (IOError, OSError) as err:
f45c185f
PH
438 if err.errno in (errno.EACCES,):
439 raise
59ae15a5 440
f45c185f
PH
441 # In case of error, try to remove win32 forbidden chars
442 alt_filename = os.path.join(
443 re.sub(u'[/<>:"\\|\\\\?\\*]', u'#', path_part)
444 for path_part in os.path.split(filename)
445 )
446 if alt_filename == filename:
447 raise
448 else:
449 # An exception here should be caught in the caller
450 stream = open(encodeFilename(filename), open_mode)
451 return (stream, alt_filename)
d77c3dfd
FV
452
453
454def timeconvert(timestr):
59ae15a5
PH
455 """Convert RFC 2822 defined time string into system timestamp"""
456 timestamp = None
457 timetuple = email.utils.parsedate_tz(timestr)
458 if timetuple is not None:
459 timestamp = email.utils.mktime_tz(timetuple)
460 return timestamp
1c469a94 461
796173d0 462def sanitize_filename(s, restricted=False, is_id=False):
59ae15a5
PH
463 """Sanitizes a string so it could be used as part of a filename.
464 If restricted is set, use a stricter subset of allowed characters.
796173d0 465 Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
59ae15a5
PH
466 """
467 def replace_insane(char):
468 if char == '?' or ord(char) < 32 or ord(char) == 127:
469 return ''
470 elif char == '"':
471 return '' if restricted else '\''
472 elif char == ':':
473 return '_-' if restricted else ' -'
474 elif char in '\\/|*<>':
475 return '_'
627dcfff 476 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
59ae15a5
PH
477 return '_'
478 if restricted and ord(char) > 127:
479 return '_'
480 return char
481
482 result = u''.join(map(replace_insane, s))
796173d0
PH
483 if not is_id:
484 while '__' in result:
485 result = result.replace('__', '_')
486 result = result.strip('_')
487 # Common case of "Foreign band name - English song title"
488 if restricted and result.startswith('-_'):
489 result = result[2:]
490 if not result:
491 result = '_'
59ae15a5 492 return result
d77c3dfd
FV
493
494def orderedSet(iterable):
59ae15a5
PH
495 """ Remove all duplicates from the input iterable """
496 res = []
497 for el in iterable:
498 if el not in res:
499 res.append(el)
500 return res
d77c3dfd
FV
501
502def unescapeHTML(s):
59ae15a5
PH
503 """
504 @param s a string
505 """
506 assert type(s) == type(u'')
d77c3dfd 507
59ae15a5
PH
508 result = re.sub(u'(?u)&(.+?);', htmlentity_transform, s)
509 return result
d77c3dfd 510
8bf48f23
PH
511
512def encodeFilename(s, for_subprocess=False):
59ae15a5
PH
513 """
514 @param s The name of the file
515 """
d77c3dfd 516
8bf48f23 517 assert type(s) == compat_str
d77c3dfd 518
59ae15a5
PH
519 # Python 3 has a Unicode API
520 if sys.version_info >= (3, 0):
521 return s
0f00efed 522
59ae15a5
PH
523 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
524 # Pass u'' directly to use Unicode APIs on Windows 2000 and up
525 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
526 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
8bf48f23
PH
527 if not for_subprocess:
528 return s
529 else:
530 # For subprocess calls, encode with locale encoding
531 # Refer to http://stackoverflow.com/a/9951851/35070
532 encoding = preferredencoding()
59ae15a5 533 else:
6df40dcb 534 encoding = sys.getfilesystemencoding()
8bf48f23
PH
535 if encoding is None:
536 encoding = 'utf-8'
537 return s.encode(encoding, 'ignore')
538
d77c3dfd 539
8271226a
PH
540def decodeOption(optval):
541 if optval is None:
542 return optval
543 if isinstance(optval, bytes):
544 optval = optval.decode(preferredencoding())
545
546 assert isinstance(optval, compat_str)
547 return optval
1c256f70 548
4539dd30
PH
549def formatSeconds(secs):
550 if secs > 3600:
551 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
552 elif secs > 60:
553 return '%d:%02d' % (secs // 60, secs % 60)
554 else:
555 return '%d' % secs
556
a0ddb8a2
PH
557
558def make_HTTPS_handler(opts_no_check_certificate, **kwargs):
13ebea79
PH
559 if sys.version_info < (3, 2):
560 import httplib
561
562 class HTTPSConnectionV3(httplib.HTTPSConnection):
563 def __init__(self, *args, **kwargs):
564 httplib.HTTPSConnection.__init__(self, *args, **kwargs)
565
566 def connect(self):
567 sock = socket.create_connection((self.host, self.port), self.timeout)
ac79fa02 568 if getattr(self, '_tunnel_host', False):
13ebea79
PH
569 self.sock = sock
570 self._tunnel()
571 try:
572 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv3)
de79c46c 573 except ssl.SSLError:
13ebea79
PH
574 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
575
576 class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):
577 def https_open(self, req):
578 return self.do_open(HTTPSConnectionV3, req)
a0ddb8a2 579 return HTTPSHandlerV3(**kwargs)
ea6d901e 580 else:
13ebea79 581 context = ssl.SSLContext(ssl.PROTOCOL_SSLv3)
ea6d901e 582 context.verify_mode = (ssl.CERT_NONE
dca08720 583 if opts_no_check_certificate
ea6d901e 584 else ssl.CERT_REQUIRED)
303b479e
PH
585 context.set_default_verify_paths()
586 try:
587 context.load_default_certs()
588 except AttributeError:
589 pass # Python < 3.4
a0ddb8a2 590 return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
ea6d901e 591
1c256f70
PH
592class ExtractorError(Exception):
593 """Error during info extraction."""
2eabb802 594 def __init__(self, msg, tb=None, expected=False, cause=None):
9a82b238
PH
595 """ tb, if given, is the original traceback (so that it can be printed out).
596 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
597 """
598
599 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
600 expected = True
601 if not expected:
298f833b 602 msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type youtube-dl -U to update.'
1c256f70 603 super(ExtractorError, self).__init__(msg)
d5979c5d 604
1c256f70 605 self.traceback = tb
8cc83b8d 606 self.exc_info = sys.exc_info() # preserve original exception
2eabb802 607 self.cause = cause
1c256f70 608
01951dda
PH
609 def format_traceback(self):
610 if self.traceback is None:
611 return None
612 return u''.join(traceback.format_tb(self.traceback))
613
1c256f70 614
55b3e45b
JMF
615class RegexNotFoundError(ExtractorError):
616 """Error when a regex didn't match"""
617 pass
618
619
d77c3dfd 620class DownloadError(Exception):
59ae15a5 621 """Download Error exception.
d77c3dfd 622
59ae15a5
PH
623 This exception may be thrown by FileDownloader objects if they are not
624 configured to continue on errors. They will contain the appropriate
625 error message.
626 """
8cc83b8d
FV
627 def __init__(self, msg, exc_info=None):
628 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
629 super(DownloadError, self).__init__(msg)
630 self.exc_info = exc_info
d77c3dfd
FV
631
632
633class SameFileError(Exception):
59ae15a5 634 """Same File exception.
d77c3dfd 635
59ae15a5
PH
636 This exception will be thrown by FileDownloader objects if they detect
637 multiple files would have to be downloaded to the same file on disk.
638 """
639 pass
d77c3dfd
FV
640
641
642class PostProcessingError(Exception):
59ae15a5 643 """Post Processing exception.
d77c3dfd 644
59ae15a5
PH
645 This exception may be raised by PostProcessor's .run() method to
646 indicate an error in the postprocessing task.
647 """
7851b379
PH
648 def __init__(self, msg):
649 self.msg = msg
d77c3dfd
FV
650
651class MaxDownloadsReached(Exception):
59ae15a5
PH
652 """ --max-downloads limit has been reached. """
653 pass
d77c3dfd
FV
654
655
656class UnavailableVideoError(Exception):
59ae15a5 657 """Unavailable Format exception.
d77c3dfd 658
59ae15a5
PH
659 This exception will be thrown when a video is requested
660 in a format that is not available for that video.
661 """
662 pass
d77c3dfd
FV
663
664
665class ContentTooShortError(Exception):
59ae15a5 666 """Content Too Short exception.
d77c3dfd 667
59ae15a5
PH
668 This exception may be raised by FileDownloader objects when a file they
669 download is too small for what the server announced first, indicating
670 the connection was probably interrupted.
671 """
672 # Both in bytes
673 downloaded = None
674 expected = None
d77c3dfd 675
59ae15a5
PH
676 def __init__(self, downloaded, expected):
677 self.downloaded = downloaded
678 self.expected = expected
d77c3dfd 679
acebc9cd 680class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
59ae15a5
PH
681 """Handler for HTTP requests and responses.
682
683 This class, when installed with an OpenerDirector, automatically adds
684 the standard headers to every HTTP request and handles gzipped and
685 deflated responses from web servers. If compression is to be avoided in
686 a particular request, the original request in the program code only has
687 to include the HTTP header "Youtubedl-No-Compression", which will be
688 removed before making the real request.
689
690 Part of this code was copied from:
691
692 http://techknack.net/python-urllib2-handlers/
693
694 Andrew Rowls, the author of that code, agreed to release it to the
695 public domain.
696 """
697
698 @staticmethod
699 def deflate(data):
700 try:
701 return zlib.decompress(data, -zlib.MAX_WBITS)
702 except zlib.error:
703 return zlib.decompress(data)
704
705 @staticmethod
706 def addinfourl_wrapper(stream, headers, url, code):
707 if hasattr(compat_urllib_request.addinfourl, 'getcode'):
708 return compat_urllib_request.addinfourl(stream, headers, url, code)
709 ret = compat_urllib_request.addinfourl(stream, headers, url)
710 ret.code = code
711 return ret
712
acebc9cd
PH
713 def http_request(self, req):
714 for h,v in std_headers.items():
59ae15a5
PH
715 if h in req.headers:
716 del req.headers[h]
335959e7 717 req.add_header(h, v)
59ae15a5
PH
718 if 'Youtubedl-no-compression' in req.headers:
719 if 'Accept-encoding' in req.headers:
720 del req.headers['Accept-encoding']
721 del req.headers['Youtubedl-no-compression']
3446dfb7 722 if 'Youtubedl-user-agent' in req.headers:
335959e7
PH
723 if 'User-agent' in req.headers:
724 del req.headers['User-agent']
725 req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
3446dfb7 726 del req.headers['Youtubedl-user-agent']
59ae15a5
PH
727 return req
728
acebc9cd 729 def http_response(self, req, resp):
59ae15a5
PH
730 old_resp = resp
731 # gzip
732 if resp.headers.get('Content-encoding', '') == 'gzip':
aa3e9507
PH
733 content = resp.read()
734 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
735 try:
736 uncompressed = io.BytesIO(gz.read())
737 except IOError as original_ioerror:
738 # There may be junk add the end of the file
739 # See http://stackoverflow.com/q/4928560/35070 for details
740 for i in range(1, 1024):
741 try:
742 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
743 uncompressed = io.BytesIO(gz.read())
744 except IOError:
745 continue
746 break
747 else:
748 raise original_ioerror
749 resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5
PH
750 resp.msg = old_resp.msg
751 # deflate
752 if resp.headers.get('Content-encoding', '') == 'deflate':
753 gz = io.BytesIO(self.deflate(resp.read()))
754 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
755 resp.msg = old_resp.msg
756 return resp
0f8d03f8 757
acebc9cd
PH
758 https_request = http_request
759 https_response = http_response
bf50b038 760
5de90176 761
bf50b038
JMF
762def unified_strdate(date_str):
763 """Return a string with the date in the format YYYYMMDD"""
764 upload_date = None
765 #Replace commas
026fcc04 766 date_str = date_str.replace(',', ' ')
bf50b038 767 # %z (UTC offset) is only supported in python>=3.2
026fcc04 768 date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
19e1d359
JMF
769 format_expressions = [
770 '%d %B %Y',
0f99566c 771 '%d %b %Y',
19e1d359
JMF
772 '%B %d %Y',
773 '%b %d %Y',
774 '%Y-%m-%d',
775 '%d/%m/%Y',
776 '%Y/%m/%d %H:%M:%S',
5d73273f 777 '%Y-%m-%d %H:%M:%S',
19e1d359
JMF
778 '%d.%m.%Y %H:%M',
779 '%Y-%m-%dT%H:%M:%SZ',
59040888
PH
780 '%Y-%m-%dT%H:%M:%S.%fZ',
781 '%Y-%m-%dT%H:%M:%S.%f0Z',
2e1fa03b 782 '%Y-%m-%dT%H:%M:%S',
7ff5d5c2 783 '%Y-%m-%dT%H:%M:%S.%f',
5de90176 784 '%Y-%m-%dT%H:%M',
19e1d359 785 ]
bf50b038
JMF
786 for expression in format_expressions:
787 try:
788 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
5de90176 789 except ValueError:
bf50b038 790 pass
42393ce2
PH
791 if upload_date is None:
792 timetuple = email.utils.parsedate_tz(date_str)
793 if timetuple:
794 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
bf50b038
JMF
795 return upload_date
796
cbdbb766 797def determine_ext(url, default_ext=u'unknown_video'):
73e79f2a
PH
798 guess = url.partition(u'?')[0].rpartition(u'.')[2]
799 if re.match(r'^[A-Za-z0-9]+$', guess):
800 return guess
801 else:
cbdbb766 802 return default_ext
73e79f2a 803
d4051a8e
JMF
804def subtitles_filename(filename, sub_lang, sub_format):
805 return filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
806
bd558525 807def date_from_str(date_str):
37254abc
JMF
808 """
809 Return a datetime object from a string in the format YYYYMMDD or
810 (now|today)[+-][0-9](day|week|month|year)(s)?"""
811 today = datetime.date.today()
812 if date_str == 'now'or date_str == 'today':
813 return today
814 match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
815 if match is not None:
816 sign = match.group('sign')
817 time = int(match.group('time'))
818 if sign == '-':
819 time = -time
820 unit = match.group('unit')
821 #A bad aproximation?
822 if unit == 'month':
823 unit = 'day'
824 time *= 30
825 elif unit == 'year':
826 unit = 'day'
827 time *= 365
828 unit += 's'
829 delta = datetime.timedelta(**{unit: time})
830 return today + delta
bd558525
JMF
831 return datetime.datetime.strptime(date_str, "%Y%m%d").date()
832
e63fc1be 833def hyphenate_date(date_str):
834 """
835 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
836 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
837 if match is not None:
838 return '-'.join(match.groups())
839 else:
840 return date_str
841
bd558525
JMF
842class DateRange(object):
843 """Represents a time interval between two dates"""
844 def __init__(self, start=None, end=None):
845 """start and end must be strings in the format accepted by date"""
846 if start is not None:
847 self.start = date_from_str(start)
848 else:
849 self.start = datetime.datetime.min.date()
850 if end is not None:
851 self.end = date_from_str(end)
852 else:
853 self.end = datetime.datetime.max.date()
37254abc 854 if self.start > self.end:
bd558525
JMF
855 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
856 @classmethod
857 def day(cls, day):
858 """Returns a range that only contains the given day"""
859 return cls(day,day)
860 def __contains__(self, date):
861 """Check if the date is in the range"""
37254abc
JMF
862 if not isinstance(date, datetime.date):
863 date = date_from_str(date)
864 return self.start <= date <= self.end
bd558525
JMF
865 def __str__(self):
866 return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
c496ca96
PH
867
868
869def platform_name():
870 """ Returns the platform name as a compat_str """
871 res = platform.platform()
872 if isinstance(res, bytes):
873 res = res.decode(preferredencoding())
874
875 assert isinstance(res, compat_str)
876 return res
c257baff
PH
877
878
7459e3a2
PH
879def write_string(s, out=None):
880 if out is None:
881 out = sys.stderr
8bf48f23 882 assert type(s) == compat_str
7459e3a2
PH
883
884 if ('b' in getattr(out, 'mode', '') or
885 sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
886 s = s.encode(preferredencoding(), 'ignore')
8bf48f23
PH
887 try:
888 out.write(s)
889 except UnicodeEncodeError:
890 # In Windows shells, this can fail even when the codec is just charmap!?
891 # See https://wiki.python.org/moin/PrintFails#Issue
892 if sys.platform == 'win32' and hasattr(out, 'encoding'):
893 s = s.encode(out.encoding, 'ignore').decode(out.encoding)
894 out.write(s)
895 else:
896 raise
897
7459e3a2
PH
898 out.flush()
899
900
48ea9cea
PH
901def bytes_to_intlist(bs):
902 if not bs:
903 return []
904 if isinstance(bs[0], int): # Python 3
905 return list(bs)
906 else:
907 return [ord(c) for c in bs]
908
c257baff 909
cba892fa 910def intlist_to_bytes(xs):
911 if not xs:
912 return b''
913 if isinstance(chr(0), bytes): # Python 2
914 return ''.join([chr(x) for x in xs])
915 else:
916 return bytes(xs)
c38b1e77
PH
917
918
919def get_cachedir(params={}):
920 cache_root = os.environ.get('XDG_CACHE_HOME',
921 os.path.expanduser('~/.cache'))
922 return params.get('cachedir', os.path.join(cache_root, 'youtube-dl'))
c1c9a79c
PH
923
924
925# Cross-platform file locking
926if sys.platform == 'win32':
927 import ctypes.wintypes
928 import msvcrt
929
930 class OVERLAPPED(ctypes.Structure):
931 _fields_ = [
932 ('Internal', ctypes.wintypes.LPVOID),
933 ('InternalHigh', ctypes.wintypes.LPVOID),
934 ('Offset', ctypes.wintypes.DWORD),
935 ('OffsetHigh', ctypes.wintypes.DWORD),
936 ('hEvent', ctypes.wintypes.HANDLE),
937 ]
938
939 kernel32 = ctypes.windll.kernel32
940 LockFileEx = kernel32.LockFileEx
941 LockFileEx.argtypes = [
942 ctypes.wintypes.HANDLE, # hFile
943 ctypes.wintypes.DWORD, # dwFlags
944 ctypes.wintypes.DWORD, # dwReserved
945 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
946 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
947 ctypes.POINTER(OVERLAPPED) # Overlapped
948 ]
949 LockFileEx.restype = ctypes.wintypes.BOOL
950 UnlockFileEx = kernel32.UnlockFileEx
951 UnlockFileEx.argtypes = [
952 ctypes.wintypes.HANDLE, # hFile
953 ctypes.wintypes.DWORD, # dwReserved
954 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
955 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
956 ctypes.POINTER(OVERLAPPED) # Overlapped
957 ]
958 UnlockFileEx.restype = ctypes.wintypes.BOOL
959 whole_low = 0xffffffff
960 whole_high = 0x7fffffff
961
962 def _lock_file(f, exclusive):
963 overlapped = OVERLAPPED()
964 overlapped.Offset = 0
965 overlapped.OffsetHigh = 0
966 overlapped.hEvent = 0
967 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
968 handle = msvcrt.get_osfhandle(f.fileno())
969 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
970 whole_low, whole_high, f._lock_file_overlapped_p):
971 raise OSError('Locking file failed: %r' % ctypes.FormatError())
972
973 def _unlock_file(f):
974 assert f._lock_file_overlapped_p
975 handle = msvcrt.get_osfhandle(f.fileno())
976 if not UnlockFileEx(handle, 0,
977 whole_low, whole_high, f._lock_file_overlapped_p):
978 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
979
980else:
981 import fcntl
982
983 def _lock_file(f, exclusive):
984 fcntl.lockf(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
985
986 def _unlock_file(f):
987 fcntl.lockf(f, fcntl.LOCK_UN)
988
989
990class locked_file(object):
991 def __init__(self, filename, mode, encoding=None):
992 assert mode in ['r', 'a', 'w']
993 self.f = io.open(filename, mode, encoding=encoding)
994 self.mode = mode
995
996 def __enter__(self):
997 exclusive = self.mode != 'r'
998 try:
999 _lock_file(self.f, exclusive)
1000 except IOError:
1001 self.f.close()
1002 raise
1003 return self
1004
1005 def __exit__(self, etype, value, traceback):
1006 try:
1007 _unlock_file(self.f)
1008 finally:
1009 self.f.close()
1010
1011 def __iter__(self):
1012 return iter(self.f)
1013
1014 def write(self, *args):
1015 return self.f.write(*args)
1016
1017 def read(self, *args):
1018 return self.f.read(*args)
4eb7f1d1
JMF
1019
1020
1021def shell_quote(args):
a6a173c2
JMF
1022 quoted_args = []
1023 encoding = sys.getfilesystemencoding()
1024 if encoding is None:
1025 encoding = 'utf-8'
1026 for a in args:
1027 if isinstance(a, bytes):
1028 # We may get a filename encoded with 'encodeFilename'
1029 a = a.decode(encoding)
1030 quoted_args.append(pipes.quote(a))
1031 return u' '.join(quoted_args)
9d4660ca
PH
1032
1033
f4d96df0
PH
1034def takewhile_inclusive(pred, seq):
1035 """ Like itertools.takewhile, but include the latest evaluated element
1036 (the first element so that Not pred(e)) """
1037 for e in seq:
1038 yield e
1039 if not pred(e):
1040 return
1041
1042
9d4660ca
PH
1043def smuggle_url(url, data):
1044 """ Pass additional data in a URL for internal use. """
1045
1046 sdata = compat_urllib_parse.urlencode(
1047 {u'__youtubedl_smuggle': json.dumps(data)})
1048 return url + u'#' + sdata
1049
1050
79f82953 1051def unsmuggle_url(smug_url, default=None):
9d4660ca 1052 if not '#__youtubedl_smuggle' in smug_url:
79f82953 1053 return smug_url, default
9d4660ca
PH
1054 url, _, sdata = smug_url.rpartition(u'#')
1055 jsond = compat_parse_qs(sdata)[u'__youtubedl_smuggle'][0]
1056 data = json.loads(jsond)
1057 return url, data
02dbf93f
PH
1058
1059
02dbf93f
PH
1060def format_bytes(bytes):
1061 if bytes is None:
1062 return u'N/A'
1063 if type(bytes) is str:
1064 bytes = float(bytes)
1065 if bytes == 0.0:
1066 exponent = 0
1067 else:
1068 exponent = int(math.log(bytes, 1024.0))
1069 suffix = [u'B', u'KiB', u'MiB', u'GiB', u'TiB', u'PiB', u'EiB', u'ZiB', u'YiB'][exponent]
1070 converted = float(bytes) / float(1024 ** exponent)
1071 return u'%.2f%s' % (converted, suffix)
f53c966a 1072
1c088fa8 1073
f53c966a
JMF
1074def str_to_int(int_str):
1075 int_str = re.sub(r'[,\.]', u'', int_str)
1076 return int(int_str)
1c088fa8
PH
1077
1078
1079def get_term_width():
1080 columns = os.environ.get('COLUMNS', None)
1081 if columns:
1082 return int(columns)
1083
1084 try:
1085 sp = subprocess.Popen(
1086 ['stty', 'size'],
1087 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
1088 out, err = sp.communicate()
1089 return int(out.split()[1])
1090 except:
1091 pass
1092 return None
caefb1de
PH
1093
1094
1095def month_by_name(name):
1096 """ Return the number of a month by (locale-independently) English name """
1097
1098 ENGLISH_NAMES = [
dadb8184 1099 u'January', u'February', u'March', u'April', u'May', u'June',
caefb1de
PH
1100 u'July', u'August', u'September', u'October', u'November', u'December']
1101 try:
1102 return ENGLISH_NAMES.index(name) + 1
1103 except ValueError:
1104 return None
18258362
JMF
1105
1106
5aafe895 1107def fix_xml_ampersands(xml_str):
18258362 1108 """Replace all the '&' by '&amp;' in XML"""
5aafe895
PH
1109 return re.sub(
1110 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
1111 u'&amp;',
1112 xml_str)
e3946f98
PH
1113
1114
1115def setproctitle(title):
8bf48f23 1116 assert isinstance(title, compat_str)
e3946f98
PH
1117 try:
1118 libc = ctypes.cdll.LoadLibrary("libc.so.6")
1119 except OSError:
1120 return
1121 title = title
1122 buf = ctypes.create_string_buffer(len(title) + 1)
e64eaaa9 1123 buf.value = title.encode('utf-8')
e3946f98
PH
1124 try:
1125 libc.prctl(15, ctypes.byref(buf), 0, 0, 0)
1126 except AttributeError:
1127 return # Strange libc, just skip this
d7dda168
PH
1128
1129
1130def remove_start(s, start):
1131 if s.startswith(start):
1132 return s[len(start):]
1133 return s
29eb5174
PH
1134
1135
1136def url_basename(url):
9b8aaeed
JMF
1137 path = compat_urlparse.urlparse(url).path
1138 return path.strip(u'/').split(u'/')[-1]
aa94a6d3
PH
1139
1140
1141class HEADRequest(compat_urllib_request.Request):
1142 def get_method(self):
1143 return "HEAD"
7217e148
PH
1144
1145
dd27fd17
PH
1146def int_or_none(v, scale=1):
1147 return v if v is None else (int(v) // scale)
608d11f5
PH
1148
1149
1150def parse_duration(s):
1151 if s is None:
1152 return None
1153
1154 m = re.match(
2db806b4 1155 r'(?:(?:(?P<hours>[0-9]+)[:h])?(?P<mins>[0-9]+)[:m])?(?P<secs>[0-9]+)s?$', s)
608d11f5
PH
1156 if not m:
1157 return None
1158 res = int(m.group('secs'))
1159 if m.group('mins'):
1160 res += int(m.group('mins')) * 60
1161 if m.group('hours'):
1162 res += int(m.group('hours')) * 60 * 60
1163 return res
91d7d0b3
JMF
1164
1165
1166def prepend_extension(filename, ext):
1167 name, real_ext = os.path.splitext(filename)
1168 return u'{0}.{1}{2}'.format(name, ext, real_ext)
d70ad093
PH
1169
1170
1171def check_executable(exe, args=[]):
1172 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
1173 args can be a list of arguments for a short output (like -version) """
1174 try:
1175 subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
1176 except OSError:
1177 return False
1178 return exe
b7ab0590
PH
1179
1180
1181class PagedList(object):
1182 def __init__(self, pagefunc, pagesize):
1183 self._pagefunc = pagefunc
1184 self._pagesize = pagesize
1185
dd26ced1
PH
1186 def __len__(self):
1187 # This is only useful for tests
1188 return len(self.getslice())
1189
b7ab0590
PH
1190 def getslice(self, start=0, end=None):
1191 res = []
1192 for pagenum in itertools.count(start // self._pagesize):
1193 firstid = pagenum * self._pagesize
1194 nextfirstid = pagenum * self._pagesize + self._pagesize
1195 if start >= nextfirstid:
1196 continue
1197
1198 page_results = list(self._pagefunc(pagenum))
1199
1200 startv = (
1201 start % self._pagesize
1202 if firstid <= start < nextfirstid
1203 else 0)
1204
1205 endv = (
1206 ((end - 1) % self._pagesize) + 1
1207 if (end is not None and firstid <= end <= nextfirstid)
1208 else None)
1209
1210 if startv != 0 or endv is not None:
1211 page_results = page_results[startv:endv]
1212 res.extend(page_results)
1213
1214 # A little optimization - if current page is not "full", ie. does
1215 # not contain page_size videos then we can assume that this page
1216 # is the last one - there are no more ids on further pages -
1217 # i.e. no need to query again.
1218 if len(page_results) + startv < self._pagesize:
1219 break
1220
1221 # If we got the whole page, but the next page is not interesting,
1222 # break out early as well
1223 if end == nextfirstid:
1224 break
1225 return res
81c2f20b
PH
1226
1227
1228def uppercase_escape(s):
1229 return re.sub(
1230 r'\\U([0-9a-fA-F]{8})',
1231 lambda m: compat_chr(int(m.group(1), base=16)), s)
b53466e1
PH
1232
1233try:
1234 struct.pack(u'!I', 0)
1235except TypeError:
1236 # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument
1237 def struct_pack(spec, *args):
1238 if isinstance(spec, compat_str):
1239 spec = spec.encode('ascii')
1240 return struct.pack(spec, *args)
1241
1242 def struct_unpack(spec, *args):
1243 if isinstance(spec, compat_str):
1244 spec = spec.encode('ascii')
1245 return struct.unpack(spec, *args)
1246else:
1247 struct_pack = struct.pack
1248 struct_unpack = struct.unpack
62e609ab
PH
1249
1250
1251def read_batch_urls(batch_fd):
1252 def fixup(url):
1253 if not isinstance(url, compat_str):
1254 url = url.decode('utf-8', 'replace')
1255 BOM_UTF8 = u'\xef\xbb\xbf'
1256 if url.startswith(BOM_UTF8):
1257 url = url[len(BOM_UTF8):]
1258 url = url.strip()
1259 if url.startswith(('#', ';', ']')):
1260 return False
1261 return url
1262
1263 with contextlib.closing(batch_fd) as fd:
1264 return [url for url in map(fixup, fd) if url]