youtube_dlc/utils.py

   1 #!/usr/bin/env python
   2 # coding: utf-8
   3
   4 from __future__ import unicode_literals
   5
   6 import base64
   7 import binascii
   8 import calendar
   9 import codecs
  10 import collections
  11 import contextlib
  12 import ctypes
  13 import datetime
  14 import email.utils
  15 import email.header
  16 import errno
  17 import functools
  18 import gzip
  19 import io
  20 import itertools
  21 import json
  22 import locale
  23 import math
  24 import operator
  25 import os
  26 import platform
  27 import random
  28 import re
  29 import socket
  30 import ssl
  31 import subprocess
  32 import sys
  33 import tempfile
  34 import time
  35 import traceback
  36 import xml.etree.ElementTree
  37 import zlib
  38
  39 from .compat import (
  40     compat_HTMLParseError,
  41     compat_HTMLParser,
  42     compat_basestring,
  43     compat_chr,
  44     compat_cookiejar,
  45     compat_ctypes_WINFUNCTYPE,
  46     compat_etree_fromstring,
  47     compat_expanduser,
  48     compat_html_entities,
  49     compat_html_entities_html5,
  50     compat_http_client,
  51     compat_integer_types,
  52     compat_kwargs,
  53     compat_os_name,
  54     compat_parse_qs,
  55     compat_shlex_quote,
  56     compat_str,
  57     compat_struct_pack,
  58     compat_struct_unpack,
  59     compat_urllib_error,
  60     compat_urllib_parse,
  61     compat_urllib_parse_urlencode,
  62     compat_urllib_parse_urlparse,
  63     compat_urllib_parse_urlunparse,
  64     compat_urllib_parse_quote,
  65     compat_urllib_parse_quote_plus,
  66     compat_urllib_parse_unquote_plus,
  67     compat_urllib_request,
  68     compat_urlparse,
  69     compat_xpath,
  70 )
  71
  72 from .socks import (
  73     ProxyType,
  74     sockssocket,
  75 )
  76
  77
  78 def register_socks_protocols():
  79     # "Register" SOCKS protocols
  80     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
  81     # URLs with protocols not in urlparse.uses_netloc are not handled correctly
  82     for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
  83         if scheme not in compat_urlparse.uses_netloc:
  84             compat_urlparse.uses_netloc.append(scheme)
  85
  86
  87 # This is not clearly defined otherwise
  88 compiled_regex_type = type(re.compile(''))
  89
  90
  91 def random_user_agent():
  92     _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
  93     _CHROME_VERSIONS = (
  94         '74.0.3729.129',
  95         '76.0.3780.3',
  96         '76.0.3780.2',
  97         '74.0.3729.128',
  98         '76.0.3780.1',
  99         '76.0.3780.0',
 100         '75.0.3770.15',
 101         '74.0.3729.127',
 102         '74.0.3729.126',
 103         '76.0.3779.1',
 104         '76.0.3779.0',
 105         '75.0.3770.14',
 106         '74.0.3729.125',
 107         '76.0.3778.1',
 108         '76.0.3778.0',
 109         '75.0.3770.13',
 110         '74.0.3729.124',
 111         '74.0.3729.123',
 112         '73.0.3683.121',
 113         '76.0.3777.1',
 114         '76.0.3777.0',
 115         '75.0.3770.12',
 116         '74.0.3729.122',
 117         '76.0.3776.4',
 118         '75.0.3770.11',
 119         '74.0.3729.121',
 120         '76.0.3776.3',
 121         '76.0.3776.2',
 122         '73.0.3683.120',
 123         '74.0.3729.120',
 124         '74.0.3729.119',
 125         '74.0.3729.118',
 126         '76.0.3776.1',
 127         '76.0.3776.0',
 128         '76.0.3775.5',
 129         '75.0.3770.10',
 130         '74.0.3729.117',
 131         '76.0.3775.4',
 132         '76.0.3775.3',
 133         '74.0.3729.116',
 134         '75.0.3770.9',
 135         '76.0.3775.2',
 136         '76.0.3775.1',
 137         '76.0.3775.0',
 138         '75.0.3770.8',
 139         '74.0.3729.115',
 140         '74.0.3729.114',
 141         '76.0.3774.1',
 142         '76.0.3774.0',
 143         '75.0.3770.7',
 144         '74.0.3729.113',
 145         '74.0.3729.112',
 146         '74.0.3729.111',
 147         '76.0.3773.1',
 148         '76.0.3773.0',
 149         '75.0.3770.6',
 150         '74.0.3729.110',
 151         '74.0.3729.109',
 152         '76.0.3772.1',
 153         '76.0.3772.0',
 154         '75.0.3770.5',
 155         '74.0.3729.108',
 156         '74.0.3729.107',
 157         '76.0.3771.1',
 158         '76.0.3771.0',
 159         '75.0.3770.4',
 160         '74.0.3729.106',
 161         '74.0.3729.105',
 162         '75.0.3770.3',
 163         '74.0.3729.104',
 164         '74.0.3729.103',
 165         '74.0.3729.102',
 166         '75.0.3770.2',
 167         '74.0.3729.101',
 168         '75.0.3770.1',
 169         '75.0.3770.0',
 170         '74.0.3729.100',
 171         '75.0.3769.5',
 172         '75.0.3769.4',
 173         '74.0.3729.99',
 174         '75.0.3769.3',
 175         '75.0.3769.2',
 176         '75.0.3768.6',
 177         '74.0.3729.98',
 178         '75.0.3769.1',
 179         '75.0.3769.0',
 180         '74.0.3729.97',
 181         '73.0.3683.119',
 182         '73.0.3683.118',
 183         '74.0.3729.96',
 184         '75.0.3768.5',
 185         '75.0.3768.4',
 186         '75.0.3768.3',
 187         '75.0.3768.2',
 188         '74.0.3729.95',
 189         '74.0.3729.94',
 190         '75.0.3768.1',
 191         '75.0.3768.0',
 192         '74.0.3729.93',
 193         '74.0.3729.92',
 194         '73.0.3683.117',
 195         '74.0.3729.91',
 196         '75.0.3766.3',
 197         '74.0.3729.90',
 198         '75.0.3767.2',
 199         '75.0.3767.1',
 200         '75.0.3767.0',
 201         '74.0.3729.89',
 202         '73.0.3683.116',
 203         '75.0.3766.2',
 204         '74.0.3729.88',
 205         '75.0.3766.1',
 206         '75.0.3766.0',
 207         '74.0.3729.87',
 208         '73.0.3683.115',
 209         '74.0.3729.86',
 210         '75.0.3765.1',
 211         '75.0.3765.0',
 212         '74.0.3729.85',
 213         '73.0.3683.114',
 214         '74.0.3729.84',
 215         '75.0.3764.1',
 216         '75.0.3764.0',
 217         '74.0.3729.83',
 218         '73.0.3683.113',
 219         '75.0.3763.2',
 220         '75.0.3761.4',
 221         '74.0.3729.82',
 222         '75.0.3763.1',
 223         '75.0.3763.0',
 224         '74.0.3729.81',
 225         '73.0.3683.112',
 226         '75.0.3762.1',
 227         '75.0.3762.0',
 228         '74.0.3729.80',
 229         '75.0.3761.3',
 230         '74.0.3729.79',
 231         '73.0.3683.111',
 232         '75.0.3761.2',
 233         '74.0.3729.78',
 234         '74.0.3729.77',
 235         '75.0.3761.1',
 236         '75.0.3761.0',
 237         '73.0.3683.110',
 238         '74.0.3729.76',
 239         '74.0.3729.75',
 240         '75.0.3760.0',
 241         '74.0.3729.74',
 242         '75.0.3759.8',
 243         '75.0.3759.7',
 244         '75.0.3759.6',
 245         '74.0.3729.73',
 246         '75.0.3759.5',
 247         '74.0.3729.72',
 248         '73.0.3683.109',
 249         '75.0.3759.4',
 250         '75.0.3759.3',
 251         '74.0.3729.71',
 252         '75.0.3759.2',
 253         '74.0.3729.70',
 254         '73.0.3683.108',
 255         '74.0.3729.69',
 256         '75.0.3759.1',
 257         '75.0.3759.0',
 258         '74.0.3729.68',
 259         '73.0.3683.107',
 260         '74.0.3729.67',
 261         '75.0.3758.1',
 262         '75.0.3758.0',
 263         '74.0.3729.66',
 264         '73.0.3683.106',
 265         '74.0.3729.65',
 266         '75.0.3757.1',
 267         '75.0.3757.0',
 268         '74.0.3729.64',
 269         '73.0.3683.105',
 270         '74.0.3729.63',
 271         '75.0.3756.1',
 272         '75.0.3756.0',
 273         '74.0.3729.62',
 274         '73.0.3683.104',
 275         '75.0.3755.3',
 276         '75.0.3755.2',
 277         '73.0.3683.103',
 278         '75.0.3755.1',
 279         '75.0.3755.0',
 280         '74.0.3729.61',
 281         '73.0.3683.102',
 282         '74.0.3729.60',
 283         '75.0.3754.2',
 284         '74.0.3729.59',
 285         '75.0.3753.4',
 286         '74.0.3729.58',
 287         '75.0.3754.1',
 288         '75.0.3754.0',
 289         '74.0.3729.57',
 290         '73.0.3683.101',
 291         '75.0.3753.3',
 292         '75.0.3752.2',
 293         '75.0.3753.2',
 294         '74.0.3729.56',
 295         '75.0.3753.1',
 296         '75.0.3753.0',
 297         '74.0.3729.55',
 298         '73.0.3683.100',
 299         '74.0.3729.54',
 300         '75.0.3752.1',
 301         '75.0.3752.0',
 302         '74.0.3729.53',
 303         '73.0.3683.99',
 304         '74.0.3729.52',
 305         '75.0.3751.1',
 306         '75.0.3751.0',
 307         '74.0.3729.51',
 308         '73.0.3683.98',
 309         '74.0.3729.50',
 310         '75.0.3750.0',
 311         '74.0.3729.49',
 312         '74.0.3729.48',
 313         '74.0.3729.47',
 314         '75.0.3749.3',
 315         '74.0.3729.46',
 316         '73.0.3683.97',
 317         '75.0.3749.2',
 318         '74.0.3729.45',
 319         '75.0.3749.1',
 320         '75.0.3749.0',
 321         '74.0.3729.44',
 322         '73.0.3683.96',
 323         '74.0.3729.43',
 324         '74.0.3729.42',
 325         '75.0.3748.1',
 326         '75.0.3748.0',
 327         '74.0.3729.41',
 328         '75.0.3747.1',
 329         '73.0.3683.95',
 330         '75.0.3746.4',
 331         '74.0.3729.40',
 332         '74.0.3729.39',
 333         '75.0.3747.0',
 334         '75.0.3746.3',
 335         '75.0.3746.2',
 336         '74.0.3729.38',
 337         '75.0.3746.1',
 338         '75.0.3746.0',
 339         '74.0.3729.37',
 340         '73.0.3683.94',
 341         '75.0.3745.5',
 342         '75.0.3745.4',
 343         '75.0.3745.3',
 344         '75.0.3745.2',
 345         '74.0.3729.36',
 346         '75.0.3745.1',
 347         '75.0.3745.0',
 348         '75.0.3744.2',
 349         '74.0.3729.35',
 350         '73.0.3683.93',
 351         '74.0.3729.34',
 352         '75.0.3744.1',
 353         '75.0.3744.0',
 354         '74.0.3729.33',
 355         '73.0.3683.92',
 356         '74.0.3729.32',
 357         '74.0.3729.31',
 358         '73.0.3683.91',
 359         '75.0.3741.2',
 360         '75.0.3740.5',
 361         '74.0.3729.30',
 362         '75.0.3741.1',
 363         '75.0.3741.0',
 364         '74.0.3729.29',
 365         '75.0.3740.4',
 366         '73.0.3683.90',
 367         '74.0.3729.28',
 368         '75.0.3740.3',
 369         '73.0.3683.89',
 370         '75.0.3740.2',
 371         '74.0.3729.27',
 372         '75.0.3740.1',
 373         '75.0.3740.0',
 374         '74.0.3729.26',
 375         '73.0.3683.88',
 376         '73.0.3683.87',
 377         '74.0.3729.25',
 378         '75.0.3739.1',
 379         '75.0.3739.0',
 380         '73.0.3683.86',
 381         '74.0.3729.24',
 382         '73.0.3683.85',
 383         '75.0.3738.4',
 384         '75.0.3738.3',
 385         '75.0.3738.2',
 386         '75.0.3738.1',
 387         '75.0.3738.0',
 388         '74.0.3729.23',
 389         '73.0.3683.84',
 390         '74.0.3729.22',
 391         '74.0.3729.21',
 392         '75.0.3737.1',
 393         '75.0.3737.0',
 394         '74.0.3729.20',
 395         '73.0.3683.83',
 396         '74.0.3729.19',
 397         '75.0.3736.1',
 398         '75.0.3736.0',
 399         '74.0.3729.18',
 400         '73.0.3683.82',
 401         '74.0.3729.17',
 402         '75.0.3735.1',
 403         '75.0.3735.0',
 404         '74.0.3729.16',
 405         '73.0.3683.81',
 406         '75.0.3734.1',
 407         '75.0.3734.0',
 408         '74.0.3729.15',
 409         '73.0.3683.80',
 410         '74.0.3729.14',
 411         '75.0.3733.1',
 412         '75.0.3733.0',
 413         '75.0.3732.1',
 414         '74.0.3729.13',
 415         '74.0.3729.12',
 416         '73.0.3683.79',
 417         '74.0.3729.11',
 418         '75.0.3732.0',
 419         '74.0.3729.10',
 420         '73.0.3683.78',
 421         '74.0.3729.9',
 422         '74.0.3729.8',
 423         '74.0.3729.7',
 424         '75.0.3731.3',
 425         '75.0.3731.2',
 426         '75.0.3731.0',
 427         '74.0.3729.6',
 428         '73.0.3683.77',
 429         '73.0.3683.76',
 430         '75.0.3730.5',
 431         '75.0.3730.4',
 432         '73.0.3683.75',
 433         '74.0.3729.5',
 434         '73.0.3683.74',
 435         '75.0.3730.3',
 436         '75.0.3730.2',
 437         '74.0.3729.4',
 438         '73.0.3683.73',
 439         '73.0.3683.72',
 440         '75.0.3730.1',
 441         '75.0.3730.0',
 442         '74.0.3729.3',
 443         '73.0.3683.71',
 444         '74.0.3729.2',
 445         '73.0.3683.70',
 446         '74.0.3729.1',
 447         '74.0.3729.0',
 448         '74.0.3726.4',
 449         '73.0.3683.69',
 450         '74.0.3726.3',
 451         '74.0.3728.0',
 452         '74.0.3726.2',
 453         '73.0.3683.68',
 454         '74.0.3726.1',
 455         '74.0.3726.0',
 456         '74.0.3725.4',
 457         '73.0.3683.67',
 458         '73.0.3683.66',
 459         '74.0.3725.3',
 460         '74.0.3725.2',
 461         '74.0.3725.1',
 462         '74.0.3724.8',
 463         '74.0.3725.0',
 464         '73.0.3683.65',
 465         '74.0.3724.7',
 466         '74.0.3724.6',
 467         '74.0.3724.5',
 468         '74.0.3724.4',
 469         '74.0.3724.3',
 470         '74.0.3724.2',
 471         '74.0.3724.1',
 472         '74.0.3724.0',
 473         '73.0.3683.64',
 474         '74.0.3723.1',
 475         '74.0.3723.0',
 476         '73.0.3683.63',
 477         '74.0.3722.1',
 478         '74.0.3722.0',
 479         '73.0.3683.62',
 480         '74.0.3718.9',
 481         '74.0.3702.3',
 482         '74.0.3721.3',
 483         '74.0.3721.2',
 484         '74.0.3721.1',
 485         '74.0.3721.0',
 486         '74.0.3720.6',
 487         '73.0.3683.61',
 488         '72.0.3626.122',
 489         '73.0.3683.60',
 490         '74.0.3720.5',
 491         '72.0.3626.121',
 492         '74.0.3718.8',
 493         '74.0.3720.4',
 494         '74.0.3720.3',
 495         '74.0.3718.7',
 496         '74.0.3720.2',
 497         '74.0.3720.1',
 498         '74.0.3720.0',
 499         '74.0.3718.6',
 500         '74.0.3719.5',
 501         '73.0.3683.59',
 502         '74.0.3718.5',
 503         '74.0.3718.4',
 504         '74.0.3719.4',
 505         '74.0.3719.3',
 506         '74.0.3719.2',
 507         '74.0.3719.1',
 508         '73.0.3683.58',
 509         '74.0.3719.0',
 510         '73.0.3683.57',
 511         '73.0.3683.56',
 512         '74.0.3718.3',
 513         '73.0.3683.55',
 514         '74.0.3718.2',
 515         '74.0.3718.1',
 516         '74.0.3718.0',
 517         '73.0.3683.54',
 518         '74.0.3717.2',
 519         '73.0.3683.53',
 520         '74.0.3717.1',
 521         '74.0.3717.0',
 522         '73.0.3683.52',
 523         '74.0.3716.1',
 524         '74.0.3716.0',
 525         '73.0.3683.51',
 526         '74.0.3715.1',
 527         '74.0.3715.0',
 528         '73.0.3683.50',
 529         '74.0.3711.2',
 530         '74.0.3714.2',
 531         '74.0.3713.3',
 532         '74.0.3714.1',
 533         '74.0.3714.0',
 534         '73.0.3683.49',
 535         '74.0.3713.1',
 536         '74.0.3713.0',
 537         '72.0.3626.120',
 538         '73.0.3683.48',
 539         '74.0.3712.2',
 540         '74.0.3712.1',
 541         '74.0.3712.0',
 542         '73.0.3683.47',
 543         '72.0.3626.119',
 544         '73.0.3683.46',
 545         '74.0.3710.2',
 546         '72.0.3626.118',
 547         '74.0.3711.1',
 548         '74.0.3711.0',
 549         '73.0.3683.45',
 550         '72.0.3626.117',
 551         '74.0.3710.1',
 552         '74.0.3710.0',
 553         '73.0.3683.44',
 554         '72.0.3626.116',
 555         '74.0.3709.1',
 556         '74.0.3709.0',
 557         '74.0.3704.9',
 558         '73.0.3683.43',
 559         '72.0.3626.115',
 560         '74.0.3704.8',
 561         '74.0.3704.7',
 562         '74.0.3708.0',
 563         '74.0.3706.7',
 564         '74.0.3704.6',
 565         '73.0.3683.42',
 566         '72.0.3626.114',
 567         '74.0.3706.6',
 568         '72.0.3626.113',
 569         '74.0.3704.5',
 570         '74.0.3706.5',
 571         '74.0.3706.4',
 572         '74.0.3706.3',
 573         '74.0.3706.2',
 574         '74.0.3706.1',
 575         '74.0.3706.0',
 576         '73.0.3683.41',
 577         '72.0.3626.112',
 578         '74.0.3705.1',
 579         '74.0.3705.0',
 580         '73.0.3683.40',
 581         '72.0.3626.111',
 582         '73.0.3683.39',
 583         '74.0.3704.4',
 584         '73.0.3683.38',
 585         '74.0.3704.3',
 586         '74.0.3704.2',
 587         '74.0.3704.1',
 588         '74.0.3704.0',
 589         '73.0.3683.37',
 590         '72.0.3626.110',
 591         '72.0.3626.109',
 592         '74.0.3703.3',
 593         '74.0.3703.2',
 594         '73.0.3683.36',
 595         '74.0.3703.1',
 596         '74.0.3703.0',
 597         '73.0.3683.35',
 598         '72.0.3626.108',
 599         '74.0.3702.2',
 600         '74.0.3699.3',
 601         '74.0.3702.1',
 602         '74.0.3702.0',
 603         '73.0.3683.34',
 604         '72.0.3626.107',
 605         '73.0.3683.33',
 606         '74.0.3701.1',
 607         '74.0.3701.0',
 608         '73.0.3683.32',
 609         '73.0.3683.31',
 610         '72.0.3626.105',
 611         '74.0.3700.1',
 612         '74.0.3700.0',
 613         '73.0.3683.29',
 614         '72.0.3626.103',
 615         '74.0.3699.2',
 616         '74.0.3699.1',
 617         '74.0.3699.0',
 618         '73.0.3683.28',
 619         '72.0.3626.102',
 620         '73.0.3683.27',
 621         '73.0.3683.26',
 622         '74.0.3698.0',
 623         '74.0.3696.2',
 624         '72.0.3626.101',
 625         '73.0.3683.25',
 626         '74.0.3696.1',
 627         '74.0.3696.0',
 628         '74.0.3694.8',
 629         '72.0.3626.100',
 630         '74.0.3694.7',
 631         '74.0.3694.6',
 632         '74.0.3694.5',
 633         '74.0.3694.4',
 634         '72.0.3626.99',
 635         '72.0.3626.98',
 636         '74.0.3694.3',
 637         '73.0.3683.24',
 638         '72.0.3626.97',
 639         '72.0.3626.96',
 640         '72.0.3626.95',
 641         '73.0.3683.23',
 642         '72.0.3626.94',
 643         '73.0.3683.22',
 644         '73.0.3683.21',
 645         '72.0.3626.93',
 646         '74.0.3694.2',
 647         '72.0.3626.92',
 648         '74.0.3694.1',
 649         '74.0.3694.0',
 650         '74.0.3693.6',
 651         '73.0.3683.20',
 652         '72.0.3626.91',
 653         '74.0.3693.5',
 654         '74.0.3693.4',
 655         '74.0.3693.3',
 656         '74.0.3693.2',
 657         '73.0.3683.19',
 658         '74.0.3693.1',
 659         '74.0.3693.0',
 660         '73.0.3683.18',
 661         '72.0.3626.90',
 662         '74.0.3692.1',
 663         '74.0.3692.0',
 664         '73.0.3683.17',
 665         '72.0.3626.89',
 666         '74.0.3687.3',
 667         '74.0.3691.1',
 668         '74.0.3691.0',
 669         '73.0.3683.16',
 670         '72.0.3626.88',
 671         '72.0.3626.87',
 672         '73.0.3683.15',
 673         '74.0.3690.1',
 674         '74.0.3690.0',
 675         '73.0.3683.14',
 676         '72.0.3626.86',
 677         '73.0.3683.13',
 678         '73.0.3683.12',
 679         '74.0.3689.1',
 680         '74.0.3689.0',
 681         '73.0.3683.11',
 682         '72.0.3626.85',
 683         '73.0.3683.10',
 684         '72.0.3626.84',
 685         '73.0.3683.9',
 686         '74.0.3688.1',
 687         '74.0.3688.0',
 688         '73.0.3683.8',
 689         '72.0.3626.83',
 690         '74.0.3687.2',
 691         '74.0.3687.1',
 692         '74.0.3687.0',
 693         '73.0.3683.7',
 694         '72.0.3626.82',
 695         '74.0.3686.4',
 696         '72.0.3626.81',
 697         '74.0.3686.3',
 698         '74.0.3686.2',
 699         '74.0.3686.1',
 700         '74.0.3686.0',
 701         '73.0.3683.6',
 702         '72.0.3626.80',
 703         '74.0.3685.1',
 704         '74.0.3685.0',
 705         '73.0.3683.5',
 706         '72.0.3626.79',
 707         '74.0.3684.1',
 708         '74.0.3684.0',
 709         '73.0.3683.4',
 710         '72.0.3626.78',
 711         '72.0.3626.77',
 712         '73.0.3683.3',
 713         '73.0.3683.2',
 714         '72.0.3626.76',
 715         '73.0.3683.1',
 716         '73.0.3683.0',
 717         '72.0.3626.75',
 718         '71.0.3578.141',
 719         '73.0.3682.1',
 720         '73.0.3682.0',
 721         '72.0.3626.74',
 722         '71.0.3578.140',
 723         '73.0.3681.4',
 724         '73.0.3681.3',
 725         '73.0.3681.2',
 726         '73.0.3681.1',
 727         '73.0.3681.0',
 728         '72.0.3626.73',
 729         '71.0.3578.139',
 730         '72.0.3626.72',
 731         '72.0.3626.71',
 732         '73.0.3680.1',
 733         '73.0.3680.0',
 734         '72.0.3626.70',
 735         '71.0.3578.138',
 736         '73.0.3678.2',
 737         '73.0.3679.1',
 738         '73.0.3679.0',
 739         '72.0.3626.69',
 740         '71.0.3578.137',
 741         '73.0.3678.1',
 742         '73.0.3678.0',
 743         '71.0.3578.136',
 744         '73.0.3677.1',
 745         '73.0.3677.0',
 746         '72.0.3626.68',
 747         '72.0.3626.67',
 748         '71.0.3578.135',
 749         '73.0.3676.1',
 750         '73.0.3676.0',
 751         '73.0.3674.2',
 752         '72.0.3626.66',
 753         '71.0.3578.134',
 754         '73.0.3674.1',
 755         '73.0.3674.0',
 756         '72.0.3626.65',
 757         '71.0.3578.133',
 758         '73.0.3673.2',
 759         '73.0.3673.1',
 760         '73.0.3673.0',
 761         '72.0.3626.64',
 762         '71.0.3578.132',
 763         '72.0.3626.63',
 764         '72.0.3626.62',
 765         '72.0.3626.61',
 766         '72.0.3626.60',
 767         '73.0.3672.1',
 768         '73.0.3672.0',
 769         '72.0.3626.59',
 770         '71.0.3578.131',
 771         '73.0.3671.3',
 772         '73.0.3671.2',
 773         '73.0.3671.1',
 774         '73.0.3671.0',
 775         '72.0.3626.58',
 776         '71.0.3578.130',
 777         '73.0.3670.1',
 778         '73.0.3670.0',
 779         '72.0.3626.57',
 780         '71.0.3578.129',
 781         '73.0.3669.1',
 782         '73.0.3669.0',
 783         '72.0.3626.56',
 784         '71.0.3578.128',
 785         '73.0.3668.2',
 786         '73.0.3668.1',
 787         '73.0.3668.0',
 788         '72.0.3626.55',
 789         '71.0.3578.127',
 790         '73.0.3667.2',
 791         '73.0.3667.1',
 792         '73.0.3667.0',
 793         '72.0.3626.54',
 794         '71.0.3578.126',
 795         '73.0.3666.1',
 796         '73.0.3666.0',
 797         '72.0.3626.53',
 798         '71.0.3578.125',
 799         '73.0.3665.4',
 800         '73.0.3665.3',
 801         '72.0.3626.52',
 802         '73.0.3665.2',
 803         '73.0.3664.4',
 804         '73.0.3665.1',
 805         '73.0.3665.0',
 806         '72.0.3626.51',
 807         '71.0.3578.124',
 808         '72.0.3626.50',
 809         '73.0.3664.3',
 810         '73.0.3664.2',
 811         '73.0.3664.1',
 812         '73.0.3664.0',
 813         '73.0.3663.2',
 814         '72.0.3626.49',
 815         '71.0.3578.123',
 816         '73.0.3663.1',
 817         '73.0.3663.0',
 818         '72.0.3626.48',
 819         '71.0.3578.122',
 820         '73.0.3662.1',
 821         '73.0.3662.0',
 822         '72.0.3626.47',
 823         '71.0.3578.121',
 824         '73.0.3661.1',
 825         '72.0.3626.46',
 826         '73.0.3661.0',
 827         '72.0.3626.45',
 828         '71.0.3578.120',
 829         '73.0.3660.2',
 830         '73.0.3660.1',
 831         '73.0.3660.0',
 832         '72.0.3626.44',
 833         '71.0.3578.119',
 834         '73.0.3659.1',
 835         '73.0.3659.0',
 836         '72.0.3626.43',
 837         '71.0.3578.118',
 838         '73.0.3658.1',
 839         '73.0.3658.0',
 840         '72.0.3626.42',
 841         '71.0.3578.117',
 842         '73.0.3657.1',
 843         '73.0.3657.0',
 844         '72.0.3626.41',
 845         '71.0.3578.116',
 846         '73.0.3656.1',
 847         '73.0.3656.0',
 848         '72.0.3626.40',
 849         '71.0.3578.115',
 850         '73.0.3655.1',
 851         '73.0.3655.0',
 852         '72.0.3626.39',
 853         '71.0.3578.114',
 854         '73.0.3654.1',
 855         '73.0.3654.0',
 856         '72.0.3626.38',
 857         '71.0.3578.113',
 858         '73.0.3653.1',
 859         '73.0.3653.0',
 860         '72.0.3626.37',
 861         '71.0.3578.112',
 862         '73.0.3652.1',
 863         '73.0.3652.0',
 864         '72.0.3626.36',
 865         '71.0.3578.111',
 866         '73.0.3651.1',
 867         '73.0.3651.0',
 868         '72.0.3626.35',
 869         '71.0.3578.110',
 870         '73.0.3650.1',
 871         '73.0.3650.0',
 872         '72.0.3626.34',
 873         '71.0.3578.109',
 874         '73.0.3649.1',
 875         '73.0.3649.0',
 876         '72.0.3626.33',
 877         '71.0.3578.108',
 878         '73.0.3648.2',
 879         '73.0.3648.1',
 880         '73.0.3648.0',
 881         '72.0.3626.32',
 882         '71.0.3578.107',
 883         '73.0.3647.2',
 884         '73.0.3647.1',
 885         '73.0.3647.0',
 886         '72.0.3626.31',
 887         '71.0.3578.106',
 888         '73.0.3635.3',
 889         '73.0.3646.2',
 890         '73.0.3646.1',
 891         '73.0.3646.0',
 892         '72.0.3626.30',
 893         '71.0.3578.105',
 894         '72.0.3626.29',
 895         '73.0.3645.2',
 896         '73.0.3645.1',
 897         '73.0.3645.0',
 898         '72.0.3626.28',
 899         '71.0.3578.104',
 900         '72.0.3626.27',
 901         '72.0.3626.26',
 902         '72.0.3626.25',
 903         '72.0.3626.24',
 904         '73.0.3644.0',
 905         '73.0.3643.2',
 906         '72.0.3626.23',
 907         '71.0.3578.103',
 908         '73.0.3643.1',
 909         '73.0.3643.0',
 910         '72.0.3626.22',
 911         '71.0.3578.102',
 912         '73.0.3642.1',
 913         '73.0.3642.0',
 914         '72.0.3626.21',
 915         '71.0.3578.101',
 916         '73.0.3641.1',
 917         '73.0.3641.0',
 918         '72.0.3626.20',
 919         '71.0.3578.100',
 920         '72.0.3626.19',
 921         '73.0.3640.1',
 922         '73.0.3640.0',
 923         '72.0.3626.18',
 924         '73.0.3639.1',
 925         '71.0.3578.99',
 926         '73.0.3639.0',
 927         '72.0.3626.17',
 928         '73.0.3638.2',
 929         '72.0.3626.16',
 930         '73.0.3638.1',
 931         '73.0.3638.0',
 932         '72.0.3626.15',
 933         '71.0.3578.98',
 934         '73.0.3635.2',
 935         '71.0.3578.97',
 936         '73.0.3637.1',
 937         '73.0.3637.0',
 938         '72.0.3626.14',
 939         '71.0.3578.96',
 940         '71.0.3578.95',
 941         '72.0.3626.13',
 942         '71.0.3578.94',
 943         '73.0.3636.2',
 944         '71.0.3578.93',
 945         '73.0.3636.1',
 946         '73.0.3636.0',
 947         '72.0.3626.12',
 948         '71.0.3578.92',
 949         '73.0.3635.1',
 950         '73.0.3635.0',
 951         '72.0.3626.11',
 952         '71.0.3578.91',
 953         '73.0.3634.2',
 954         '73.0.3634.1',
 955         '73.0.3634.0',
 956         '72.0.3626.10',
 957         '71.0.3578.90',
 958         '71.0.3578.89',
 959         '73.0.3633.2',
 960         '73.0.3633.1',
 961         '73.0.3633.0',
 962         '72.0.3610.4',
 963         '72.0.3626.9',
 964         '71.0.3578.88',
 965         '73.0.3632.5',
 966         '73.0.3632.4',
 967         '73.0.3632.3',
 968         '73.0.3632.2',
 969         '73.0.3632.1',
 970         '73.0.3632.0',
 971         '72.0.3626.8',
 972         '71.0.3578.87',
 973         '73.0.3631.2',
 974         '73.0.3631.1',
 975         '73.0.3631.0',
 976         '72.0.3626.7',
 977         '71.0.3578.86',
 978         '72.0.3626.6',
 979         '73.0.3630.1',
 980         '73.0.3630.0',
 981         '72.0.3626.5',
 982         '71.0.3578.85',
 983         '72.0.3626.4',
 984         '73.0.3628.3',
 985         '73.0.3628.2',
 986         '73.0.3629.1',
 987         '73.0.3629.0',
 988         '72.0.3626.3',
 989         '71.0.3578.84',
 990         '73.0.3628.1',
 991         '73.0.3628.0',
 992         '71.0.3578.83',
 993         '73.0.3627.1',
 994         '73.0.3627.0',
 995         '72.0.3626.2',
 996         '71.0.3578.82',
 997         '71.0.3578.81',
 998         '71.0.3578.80',
 999         '72.0.3626.1',
1000         '72.0.3626.0',
1001         '71.0.3578.79',
1002         '70.0.3538.124',
1003         '71.0.3578.78',
1004         '72.0.3623.4',
1005         '72.0.3625.2',
1006         '72.0.3625.1',
1007         '72.0.3625.0',
1008         '71.0.3578.77',
1009         '70.0.3538.123',
1010         '72.0.3624.4',
1011         '72.0.3624.3',
1012         '72.0.3624.2',
1013         '71.0.3578.76',
1014         '72.0.3624.1',
1015         '72.0.3624.0',
1016         '72.0.3623.3',
1017         '71.0.3578.75',
1018         '70.0.3538.122',
1019         '71.0.3578.74',
1020         '72.0.3623.2',
1021         '72.0.3610.3',
1022         '72.0.3623.1',
1023         '72.0.3623.0',
1024         '72.0.3622.3',
1025         '72.0.3622.2',
1026         '71.0.3578.73',
1027         '70.0.3538.121',
1028         '72.0.3622.1',
1029         '72.0.3622.0',
1030         '71.0.3578.72',
1031         '70.0.3538.120',
1032         '72.0.3621.1',
1033         '72.0.3621.0',
1034         '71.0.3578.71',
1035         '70.0.3538.119',
1036         '72.0.3620.1',
1037         '72.0.3620.0',
1038         '71.0.3578.70',
1039         '70.0.3538.118',
1040         '71.0.3578.69',
1041         '72.0.3619.1',
1042         '72.0.3619.0',
1043         '71.0.3578.68',
1044         '70.0.3538.117',
1045         '71.0.3578.67',
1046         '72.0.3618.1',
1047         '72.0.3618.0',
1048         '71.0.3578.66',
1049         '70.0.3538.116',
1050         '72.0.3617.1',
1051         '72.0.3617.0',
1052         '71.0.3578.65',
1053         '70.0.3538.115',
1054         '72.0.3602.3',
1055         '71.0.3578.64',
1056         '72.0.3616.1',
1057         '72.0.3616.0',
1058         '71.0.3578.63',
1059         '70.0.3538.114',
1060         '71.0.3578.62',
1061         '72.0.3615.1',
1062         '72.0.3615.0',
1063         '71.0.3578.61',
1064         '70.0.3538.113',
1065         '72.0.3614.1',
1066         '72.0.3614.0',
1067         '71.0.3578.60',
1068         '70.0.3538.112',
1069         '72.0.3613.1',
1070         '72.0.3613.0',
1071         '71.0.3578.59',
1072         '70.0.3538.111',
1073         '72.0.3612.2',
1074         '72.0.3612.1',
1075         '72.0.3612.0',
1076         '70.0.3538.110',
1077         '71.0.3578.58',
1078         '70.0.3538.109',
1079         '72.0.3611.2',
1080         '72.0.3611.1',
1081         '72.0.3611.0',
1082         '71.0.3578.57',
1083         '70.0.3538.108',
1084         '72.0.3610.2',
1085         '71.0.3578.56',
1086         '71.0.3578.55',
1087         '72.0.3610.1',
1088         '72.0.3610.0',
1089         '71.0.3578.54',
1090         '70.0.3538.107',
1091         '71.0.3578.53',
1092         '72.0.3609.3',
1093         '71.0.3578.52',
1094         '72.0.3609.2',
1095         '71.0.3578.51',
1096         '72.0.3608.5',
1097         '72.0.3609.1',
1098         '72.0.3609.0',
1099         '71.0.3578.50',
1100         '70.0.3538.106',
1101         '72.0.3608.4',
1102         '72.0.3608.3',
1103         '72.0.3608.2',
1104         '71.0.3578.49',
1105         '72.0.3608.1',
1106         '72.0.3608.0',
1107         '70.0.3538.105',
1108         '71.0.3578.48',
1109         '72.0.3607.1',
1110         '72.0.3607.0',
1111         '71.0.3578.47',
1112         '70.0.3538.104',
1113         '72.0.3606.2',
1114         '72.0.3606.1',
1115         '72.0.3606.0',
1116         '71.0.3578.46',
1117         '70.0.3538.103',
1118         '70.0.3538.102',
1119         '72.0.3605.3',
1120         '72.0.3605.2',
1121         '72.0.3605.1',
1122         '72.0.3605.0',
1123         '71.0.3578.45',
1124         '70.0.3538.101',
1125         '71.0.3578.44',
1126         '71.0.3578.43',
1127         '70.0.3538.100',
1128         '70.0.3538.99',
1129         '71.0.3578.42',
1130         '72.0.3604.1',
1131         '72.0.3604.0',
1132         '71.0.3578.41',
1133         '70.0.3538.98',
1134         '71.0.3578.40',
1135         '72.0.3603.2',
1136         '72.0.3603.1',
1137         '72.0.3603.0',
1138         '71.0.3578.39',
1139         '70.0.3538.97',
1140         '72.0.3602.2',
1141         '71.0.3578.38',
1142         '71.0.3578.37',
1143         '72.0.3602.1',
1144         '72.0.3602.0',
1145         '71.0.3578.36',
1146         '70.0.3538.96',
1147         '72.0.3601.1',
1148         '72.0.3601.0',
1149         '71.0.3578.35',
1150         '70.0.3538.95',
1151         '72.0.3600.1',
1152         '72.0.3600.0',
1153         '71.0.3578.34',
1154         '70.0.3538.94',
1155         '72.0.3599.3',
1156         '72.0.3599.2',
1157         '72.0.3599.1',
1158         '72.0.3599.0',
1159         '71.0.3578.33',
1160         '70.0.3538.93',
1161         '72.0.3598.1',
1162         '72.0.3598.0',
1163         '71.0.3578.32',
1164         '70.0.3538.87',
1165         '72.0.3597.1',
1166         '72.0.3597.0',
1167         '72.0.3596.2',
1168         '71.0.3578.31',
1169         '70.0.3538.86',
1170         '71.0.3578.30',
1171         '71.0.3578.29',
1172         '72.0.3596.1',
1173         '72.0.3596.0',
1174         '71.0.3578.28',
1175         '70.0.3538.85',
1176         '72.0.3595.2',
1177         '72.0.3591.3',
1178         '72.0.3595.1',
1179         '72.0.3595.0',
1180         '71.0.3578.27',
1181         '70.0.3538.84',
1182         '72.0.3594.1',
1183         '72.0.3594.0',
1184         '71.0.3578.26',
1185         '70.0.3538.83',
1186         '72.0.3593.2',
1187         '72.0.3593.1',
1188         '72.0.3593.0',
1189         '71.0.3578.25',
1190         '70.0.3538.82',
1191         '72.0.3589.3',
1192         '72.0.3592.2',
1193         '72.0.3592.1',
1194         '72.0.3592.0',
1195         '71.0.3578.24',
1196         '72.0.3589.2',
1197         '70.0.3538.81',
1198         '70.0.3538.80',
1199         '72.0.3591.2',
1200         '72.0.3591.1',
1201         '72.0.3591.0',
1202         '71.0.3578.23',
1203         '70.0.3538.79',
1204         '71.0.3578.22',
1205         '72.0.3590.1',
1206         '72.0.3590.0',
1207         '71.0.3578.21',
1208         '70.0.3538.78',
1209         '70.0.3538.77',
1210         '72.0.3589.1',
1211         '72.0.3589.0',
1212         '71.0.3578.20',
1213         '70.0.3538.76',
1214         '71.0.3578.19',
1215         '70.0.3538.75',
1216         '72.0.3588.1',
1217         '72.0.3588.0',
1218         '71.0.3578.18',
1219         '70.0.3538.74',
1220         '72.0.3586.2',
1221         '72.0.3587.0',
1222         '71.0.3578.17',
1223         '70.0.3538.73',
1224         '72.0.3586.1',
1225         '72.0.3586.0',
1226         '71.0.3578.16',
1227         '70.0.3538.72',
1228         '72.0.3585.1',
1229         '72.0.3585.0',
1230         '71.0.3578.15',
1231         '70.0.3538.71',
1232         '71.0.3578.14',
1233         '72.0.3584.1',
1234         '72.0.3584.0',
1235         '71.0.3578.13',
1236         '70.0.3538.70',
1237         '72.0.3583.2',
1238         '71.0.3578.12',
1239         '72.0.3583.1',
1240         '72.0.3583.0',
1241         '71.0.3578.11',
1242         '70.0.3538.69',
1243         '71.0.3578.10',
1244         '72.0.3582.0',
1245         '72.0.3581.4',
1246         '71.0.3578.9',
1247         '70.0.3538.67',
1248         '72.0.3581.3',
1249         '72.0.3581.2',
1250         '72.0.3581.1',
1251         '72.0.3581.0',
1252         '71.0.3578.8',
1253         '70.0.3538.66',
1254         '72.0.3580.1',
1255         '72.0.3580.0',
1256         '71.0.3578.7',
1257         '70.0.3538.65',
1258         '71.0.3578.6',
1259         '72.0.3579.1',
1260         '72.0.3579.0',
1261         '71.0.3578.5',
1262         '70.0.3538.64',
1263         '71.0.3578.4',
1264         '71.0.3578.3',
1265         '71.0.3578.2',
1266         '71.0.3578.1',
1267         '71.0.3578.0',
1268         '70.0.3538.63',
1269         '69.0.3497.128',
1270         '70.0.3538.62',
1271         '70.0.3538.61',
1272         '70.0.3538.60',
1273         '70.0.3538.59',
1274         '71.0.3577.1',
1275         '71.0.3577.0',
1276         '70.0.3538.58',
1277         '69.0.3497.127',
1278         '71.0.3576.2',
1279         '71.0.3576.1',
1280         '71.0.3576.0',
1281         '70.0.3538.57',
1282         '70.0.3538.56',
1283         '71.0.3575.2',
1284         '70.0.3538.55',
1285         '69.0.3497.126',
1286         '70.0.3538.54',
1287         '71.0.3575.1',
1288         '71.0.3575.0',
1289         '71.0.3574.1',
1290         '71.0.3574.0',
1291         '70.0.3538.53',
1292         '69.0.3497.125',
1293         '70.0.3538.52',
1294         '71.0.3573.1',
1295         '71.0.3573.0',
1296         '70.0.3538.51',
1297         '69.0.3497.124',
1298         '71.0.3572.1',
1299         '71.0.3572.0',
1300         '70.0.3538.50',
1301         '69.0.3497.123',
1302         '71.0.3571.2',
1303         '70.0.3538.49',
1304         '69.0.3497.122',
1305         '71.0.3571.1',
1306         '71.0.3571.0',
1307         '70.0.3538.48',
1308         '69.0.3497.121',
1309         '71.0.3570.1',
1310         '71.0.3570.0',
1311         '70.0.3538.47',
1312         '69.0.3497.120',
1313         '71.0.3568.2',
1314         '71.0.3569.1',
1315         '71.0.3569.0',
1316         '70.0.3538.46',
1317         '69.0.3497.119',
1318         '70.0.3538.45',
1319         '71.0.3568.1',
1320         '71.0.3568.0',
1321         '70.0.3538.44',
1322         '69.0.3497.118',
1323         '70.0.3538.43',
1324         '70.0.3538.42',
1325         '71.0.3567.1',
1326         '71.0.3567.0',
1327         '70.0.3538.41',
1328         '69.0.3497.117',
1329         '71.0.3566.1',
1330         '71.0.3566.0',
1331         '70.0.3538.40',
1332         '69.0.3497.116',
1333         '71.0.3565.1',
1334         '71.0.3565.0',
1335         '70.0.3538.39',
1336         '69.0.3497.115',
1337         '71.0.3564.1',
1338         '71.0.3564.0',
1339         '70.0.3538.38',
1340         '69.0.3497.114',
1341         '71.0.3563.0',
1342         '71.0.3562.2',
1343         '70.0.3538.37',
1344         '69.0.3497.113',
1345         '70.0.3538.36',
1346         '70.0.3538.35',
1347         '71.0.3562.1',
1348         '71.0.3562.0',
1349         '70.0.3538.34',
1350         '69.0.3497.112',
1351         '70.0.3538.33',
1352         '71.0.3561.1',
1353         '71.0.3561.0',
1354         '70.0.3538.32',
1355         '69.0.3497.111',
1356         '71.0.3559.6',
1357         '71.0.3560.1',
1358         '71.0.3560.0',
1359         '71.0.3559.5',
1360         '71.0.3559.4',
1361         '70.0.3538.31',
1362         '69.0.3497.110',
1363         '71.0.3559.3',
1364         '70.0.3538.30',
1365         '69.0.3497.109',
1366         '71.0.3559.2',
1367         '71.0.3559.1',
1368         '71.0.3559.0',
1369         '70.0.3538.29',
1370         '69.0.3497.108',
1371         '71.0.3558.2',
1372         '71.0.3558.1',
1373         '71.0.3558.0',
1374         '70.0.3538.28',
1375         '69.0.3497.107',
1376         '71.0.3557.2',
1377         '71.0.3557.1',
1378         '71.0.3557.0',
1379         '70.0.3538.27',
1380         '69.0.3497.106',
1381         '71.0.3554.4',
1382         '70.0.3538.26',
1383         '71.0.3556.1',
1384         '71.0.3556.0',
1385         '70.0.3538.25',
1386         '71.0.3554.3',
1387         '69.0.3497.105',
1388         '71.0.3554.2',
1389         '70.0.3538.24',
1390         '69.0.3497.104',
1391         '71.0.3555.2',
1392         '70.0.3538.23',
1393         '71.0.3555.1',
1394         '71.0.3555.0',
1395         '70.0.3538.22',
1396         '69.0.3497.103',
1397         '71.0.3554.1',
1398         '71.0.3554.0',
1399         '70.0.3538.21',
1400         '69.0.3497.102',
1401         '71.0.3553.3',
1402         '70.0.3538.20',
1403         '69.0.3497.101',
1404         '71.0.3553.2',
1405         '69.0.3497.100',
1406         '71.0.3553.1',
1407         '71.0.3553.0',
1408         '70.0.3538.19',
1409         '69.0.3497.99',
1410         '69.0.3497.98',
1411         '69.0.3497.97',
1412         '71.0.3552.6',
1413         '71.0.3552.5',
1414         '71.0.3552.4',
1415         '71.0.3552.3',
1416         '71.0.3552.2',
1417         '71.0.3552.1',
1418         '71.0.3552.0',
1419         '70.0.3538.18',
1420         '69.0.3497.96',
1421         '71.0.3551.3',
1422         '71.0.3551.2',
1423         '71.0.3551.1',
1424         '71.0.3551.0',
1425         '70.0.3538.17',
1426         '69.0.3497.95',
1427         '71.0.3550.3',
1428         '71.0.3550.2',
1429         '71.0.3550.1',
1430         '71.0.3550.0',
1431         '70.0.3538.16',
1432         '69.0.3497.94',
1433         '71.0.3549.1',
1434         '71.0.3549.0',
1435         '70.0.3538.15',
1436         '69.0.3497.93',
1437         '69.0.3497.92',
1438         '71.0.3548.1',
1439         '71.0.3548.0',
1440         '70.0.3538.14',
1441         '69.0.3497.91',
1442         '71.0.3547.1',
1443         '71.0.3547.0',
1444         '70.0.3538.13',
1445         '69.0.3497.90',
1446         '71.0.3546.2',
1447         '69.0.3497.89',
1448         '71.0.3546.1',
1449         '71.0.3546.0',
1450         '70.0.3538.12',
1451         '69.0.3497.88',
1452         '71.0.3545.4',
1453         '71.0.3545.3',
1454         '71.0.3545.2',
1455         '71.0.3545.1',
1456         '71.0.3545.0',
1457         '70.0.3538.11',
1458         '69.0.3497.87',
1459         '71.0.3544.5',
1460         '71.0.3544.4',
1461         '71.0.3544.3',
1462         '71.0.3544.2',
1463         '71.0.3544.1',
1464         '71.0.3544.0',
1465         '69.0.3497.86',
1466         '70.0.3538.10',
1467         '69.0.3497.85',
1468         '70.0.3538.9',
1469         '69.0.3497.84',
1470         '71.0.3543.4',
1471         '70.0.3538.8',
1472         '71.0.3543.3',
1473         '71.0.3543.2',
1474         '71.0.3543.1',
1475         '71.0.3543.0',
1476         '70.0.3538.7',
1477         '69.0.3497.83',
1478         '71.0.3542.2',
1479         '71.0.3542.1',
1480         '71.0.3542.0',
1481         '70.0.3538.6',
1482         '69.0.3497.82',
1483         '69.0.3497.81',
1484         '71.0.3541.1',
1485         '71.0.3541.0',
1486         '70.0.3538.5',
1487         '69.0.3497.80',
1488         '71.0.3540.1',
1489         '71.0.3540.0',
1490         '70.0.3538.4',
1491         '69.0.3497.79',
1492         '70.0.3538.3',
1493         '71.0.3539.1',
1494         '71.0.3539.0',
1495         '69.0.3497.78',
1496         '68.0.3440.134',
1497         '69.0.3497.77',
1498         '70.0.3538.2',
1499         '70.0.3538.1',
1500         '70.0.3538.0',
1501         '69.0.3497.76',
1502         '68.0.3440.133',
1503         '69.0.3497.75',
1504         '70.0.3537.2',
1505         '70.0.3537.1',
1506         '70.0.3537.0',
1507         '69.0.3497.74',
1508         '68.0.3440.132',
1509         '70.0.3536.0',
1510         '70.0.3535.5',
1511         '70.0.3535.4',
1512         '70.0.3535.3',
1513         '69.0.3497.73',
1514         '68.0.3440.131',
1515         '70.0.3532.8',
1516         '70.0.3532.7',
1517         '69.0.3497.72',
1518         '69.0.3497.71',
1519         '70.0.3535.2',
1520         '70.0.3535.1',
1521         '70.0.3535.0',
1522         '69.0.3497.70',
1523         '68.0.3440.130',
1524         '69.0.3497.69',
1525         '68.0.3440.129',
1526         '70.0.3534.4',
1527         '70.0.3534.3',
1528         '70.0.3534.2',
1529         '70.0.3534.1',
1530         '70.0.3534.0',
1531         '69.0.3497.68',
1532         '68.0.3440.128',
1533         '70.0.3533.2',
1534         '70.0.3533.1',
1535         '70.0.3533.0',
1536         '69.0.3497.67',
1537         '68.0.3440.127',
1538         '70.0.3532.6',
1539         '70.0.3532.5',
1540         '70.0.3532.4',
1541         '69.0.3497.66',
1542         '68.0.3440.126',
1543         '70.0.3532.3',
1544         '70.0.3532.2',
1545         '70.0.3532.1',
1546         '69.0.3497.60',
1547         '69.0.3497.65',
1548         '69.0.3497.64',
1549         '70.0.3532.0',
1550         '70.0.3531.0',
1551         '70.0.3530.4',
1552         '70.0.3530.3',
1553         '70.0.3530.2',
1554         '69.0.3497.58',
1555         '68.0.3440.125',
1556         '69.0.3497.57',
1557         '69.0.3497.56',
1558         '69.0.3497.55',
1559         '69.0.3497.54',
1560         '70.0.3530.1',
1561         '70.0.3530.0',
1562         '69.0.3497.53',
1563         '68.0.3440.124',
1564         '69.0.3497.52',
1565         '70.0.3529.3',
1566         '70.0.3529.2',
1567         '70.0.3529.1',
1568         '70.0.3529.0',
1569         '69.0.3497.51',
1570         '70.0.3528.4',
1571         '68.0.3440.123',
1572         '70.0.3528.3',
1573         '70.0.3528.2',
1574         '70.0.3528.1',
1575         '70.0.3528.0',
1576         '69.0.3497.50',
1577         '68.0.3440.122',
1578         '70.0.3527.1',
1579         '70.0.3527.0',
1580         '69.0.3497.49',
1581         '68.0.3440.121',
1582         '70.0.3526.1',
1583         '70.0.3526.0',
1584         '68.0.3440.120',
1585         '69.0.3497.48',
1586         '69.0.3497.47',
1587         '68.0.3440.119',
1588         '68.0.3440.118',
1589         '70.0.3525.5',
1590         '70.0.3525.4',
1591         '70.0.3525.3',
1592         '68.0.3440.117',
1593         '69.0.3497.46',
1594         '70.0.3525.2',
1595         '70.0.3525.1',
1596         '70.0.3525.0',
1597         '69.0.3497.45',
1598         '68.0.3440.116',
1599         '70.0.3524.4',
1600         '70.0.3524.3',
1601         '69.0.3497.44',
1602         '70.0.3524.2',
1603         '70.0.3524.1',
1604         '70.0.3524.0',
1605         '70.0.3523.2',
1606         '69.0.3497.43',
1607         '68.0.3440.115',
1608         '70.0.3505.9',
1609         '69.0.3497.42',
1610         '70.0.3505.8',
1611         '70.0.3523.1',
1612         '70.0.3523.0',
1613         '69.0.3497.41',
1614         '68.0.3440.114',
1615         '70.0.3505.7',
1616         '69.0.3497.40',
1617         '70.0.3522.1',
1618         '70.0.3522.0',
1619         '70.0.3521.2',
1620         '69.0.3497.39',
1621         '68.0.3440.113',
1622         '70.0.3505.6',
1623         '70.0.3521.1',
1624         '70.0.3521.0',
1625         '69.0.3497.38',
1626         '68.0.3440.112',
1627         '70.0.3520.1',
1628         '70.0.3520.0',
1629         '69.0.3497.37',
1630         '68.0.3440.111',
1631         '70.0.3519.3',
1632         '70.0.3519.2',
1633         '70.0.3519.1',
1634         '70.0.3519.0',
1635         '69.0.3497.36',
1636         '68.0.3440.110',
1637         '70.0.3518.1',
1638         '70.0.3518.0',
1639         '69.0.3497.35',
1640         '69.0.3497.34',
1641         '68.0.3440.109',
1642         '70.0.3517.1',
1643         '70.0.3517.0',
1644         '69.0.3497.33',
1645         '68.0.3440.108',
1646         '69.0.3497.32',
1647         '70.0.3516.3',
1648         '70.0.3516.2',
1649         '70.0.3516.1',
1650         '70.0.3516.0',
1651         '69.0.3497.31',
1652         '68.0.3440.107',
1653         '70.0.3515.4',
1654         '68.0.3440.106',
1655         '70.0.3515.3',
1656         '70.0.3515.2',
1657         '70.0.3515.1',
1658         '70.0.3515.0',
1659         '69.0.3497.30',
1660         '68.0.3440.105',
1661         '68.0.3440.104',
1662         '70.0.3514.2',
1663         '70.0.3514.1',
1664         '70.0.3514.0',
1665         '69.0.3497.29',
1666         '68.0.3440.103',
1667         '70.0.3513.1',
1668         '70.0.3513.0',
1669         '69.0.3497.28',
1670     )
1671     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1672
1673
1674 std_headers = {
1675     'User-Agent': random_user_agent(),
1676     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1677     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1678     'Accept-Encoding': 'gzip, deflate',
1679     'Accept-Language': 'en-us,en;q=0.5',
1680 }
1681
1682
1683 USER_AGENTS = {
1684     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1685 }
1686
1687
1688 NO_DEFAULT = object()
1689
1690 ENGLISH_MONTH_NAMES = [
1691     'January', 'February', 'March', 'April', 'May', 'June',
1692     'July', 'August', 'September', 'October', 'November', 'December']
1693
1694 MONTH_NAMES = {
1695     'en': ENGLISH_MONTH_NAMES,
1696     'fr': [
1697         'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1698         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
1699 }
1700
1701 KNOWN_EXTENSIONS = (
1702     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1703     'flv', 'f4v', 'f4a', 'f4b',
1704     'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1705     'mkv', 'mka', 'mk3d',
1706     'avi', 'divx',
1707     'mov',
1708     'asf', 'wmv', 'wma',
1709     '3gp', '3g2',
1710     'mp3',
1711     'flac',
1712     'ape',
1713     'wav',
1714     'f4f', 'f4m', 'm3u8', 'smil')
1715
1716 # needed for sanitizing filenames in restricted mode
1717 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
1718                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1719                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
1720
1721 DATE_FORMATS = (
1722     '%d %B %Y',
1723     '%d %b %Y',
1724     '%B %d %Y',
1725     '%B %dst %Y',
1726     '%B %dnd %Y',
1727     '%B %drd %Y',
1728     '%B %dth %Y',
1729     '%b %d %Y',
1730     '%b %dst %Y',
1731     '%b %dnd %Y',
1732     '%b %drd %Y',
1733     '%b %dth %Y',
1734     '%b %dst %Y %I:%M',
1735     '%b %dnd %Y %I:%M',
1736     '%b %drd %Y %I:%M',
1737     '%b %dth %Y %I:%M',
1738     '%Y %m %d',
1739     '%Y-%m-%d',
1740     '%Y/%m/%d',
1741     '%Y/%m/%d %H:%M',
1742     '%Y/%m/%d %H:%M:%S',
1743     '%Y-%m-%d %H:%M',
1744     '%Y-%m-%d %H:%M:%S',
1745     '%Y-%m-%d %H:%M:%S.%f',
1746     '%d.%m.%Y %H:%M',
1747     '%d.%m.%Y %H.%M',
1748     '%Y-%m-%dT%H:%M:%SZ',
1749     '%Y-%m-%dT%H:%M:%S.%fZ',
1750     '%Y-%m-%dT%H:%M:%S.%f0Z',
1751     '%Y-%m-%dT%H:%M:%S',
1752     '%Y-%m-%dT%H:%M:%S.%f',
1753     '%Y-%m-%dT%H:%M',
1754     '%b %d %Y at %H:%M',
1755     '%b %d %Y at %H:%M:%S',
1756     '%B %d %Y at %H:%M',
1757     '%B %d %Y at %H:%M:%S',
1758 )
1759
1760 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1761 DATE_FORMATS_DAY_FIRST.extend([
1762     '%d-%m-%Y',
1763     '%d.%m.%Y',
1764     '%d.%m.%y',
1765     '%d/%m/%Y',
1766     '%d/%m/%y',
1767     '%d/%m/%Y %H:%M:%S',
1768 ])
1769
1770 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1771 DATE_FORMATS_MONTH_FIRST.extend([
1772     '%m-%d-%Y',
1773     '%m.%d.%Y',
1774     '%m/%d/%Y',
1775     '%m/%d/%y',
1776     '%m/%d/%Y %H:%M:%S',
1777 ])
1778
1779 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
1780 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
1781
1782
1783 def preferredencoding():
1784     """Get preferred encoding.
1785
1786     Returns the best encoding scheme for the system, based on
1787     locale.getpreferredencoding() and some further tweaks.
1788     """
1789     try:
1790         pref = locale.getpreferredencoding()
1791         'TEST'.encode(pref)
1792     except Exception:
1793         pref = 'UTF-8'
1794
1795     return pref
1796
1797
1798 def write_json_file(obj, fn):
1799     """ Encode obj as JSON and write it to fn, atomically if possible """
1800
1801     fn = encodeFilename(fn)
1802     if sys.version_info < (3, 0) and sys.platform != 'win32':
1803         encoding = get_filesystem_encoding()
1804         # os.path.basename returns a bytes object, but NamedTemporaryFile
1805         # will fail if the filename contains non ascii characters unless we
1806         # use a unicode object
1807         path_basename = lambda f: os.path.basename(fn).decode(encoding)
1808         # the same for os.path.dirname
1809         path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1810     else:
1811         path_basename = os.path.basename
1812         path_dirname = os.path.dirname
1813
1814     args = {
1815         'suffix': '.tmp',
1816         'prefix': path_basename(fn) + '.',
1817         'dir': path_dirname(fn),
1818         'delete': False,
1819     }
1820
1821     # In Python 2.x, json.dump expects a bytestream.
1822     # In Python 3.x, it writes to a character stream
1823     if sys.version_info < (3, 0):
1824         args['mode'] = 'wb'
1825     else:
1826         args.update({
1827             'mode': 'w',
1828             'encoding': 'utf-8',
1829         })
1830
1831     tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
1832
1833     try:
1834         with tf:
1835             json.dump(obj, tf)
1836         if sys.platform == 'win32':
1837             # Need to remove existing file on Windows, else os.rename raises
1838             # WindowsError or FileExistsError.
1839             try:
1840                 os.unlink(fn)
1841             except OSError:
1842                 pass
1843         try:
1844             mask = os.umask(0)
1845             os.umask(mask)
1846             os.chmod(tf.name, 0o666 & ~mask)
1847         except OSError:
1848             pass
1849         os.rename(tf.name, fn)
1850     except Exception:
1851         try:
1852             os.remove(tf.name)
1853         except OSError:
1854             pass
1855         raise
1856
1857
1858 if sys.version_info >= (2, 7):
1859     def find_xpath_attr(node, xpath, key, val=None):
1860         """ Find the xpath xpath[@key=val] """
1861         assert re.match(r'^[a-zA-Z_-]+$', key)
1862         expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
1863         return node.find(expr)
1864 else:
1865     def find_xpath_attr(node, xpath, key, val=None):
1866         for f in node.findall(compat_xpath(xpath)):
1867             if key not in f.attrib:
1868                 continue
1869             if val is None or f.attrib.get(key) == val:
1870                 return f
1871         return None
1872
1873 # On python2.6 the xml.etree.ElementTree.Element methods don't support
1874 # the namespace parameter
1875
1876
1877 def xpath_with_ns(path, ns_map):
1878     components = [c.split(':') for c in path.split('/')]
1879     replaced = []
1880     for c in components:
1881         if len(c) == 1:
1882             replaced.append(c[0])
1883         else:
1884             ns, tag = c
1885             replaced.append('{%s}%s' % (ns_map[ns], tag))
1886     return '/'.join(replaced)
1887
1888
1889 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1890     def _find_xpath(xpath):
1891         return node.find(compat_xpath(xpath))
1892
1893     if isinstance(xpath, (str, compat_str)):
1894         n = _find_xpath(xpath)
1895     else:
1896         for xp in xpath:
1897             n = _find_xpath(xp)
1898             if n is not None:
1899                 break
1900
1901     if n is None:
1902         if default is not NO_DEFAULT:
1903             return default
1904         elif fatal:
1905             name = xpath if name is None else name
1906             raise ExtractorError('Could not find XML element %s' % name)
1907         else:
1908             return None
1909     return n
1910
1911
1912 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1913     n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1914     if n is None or n == default:
1915         return n
1916     if n.text is None:
1917         if default is not NO_DEFAULT:
1918             return default
1919         elif fatal:
1920             name = xpath if name is None else name
1921             raise ExtractorError('Could not find XML element\'s text %s' % name)
1922         else:
1923             return None
1924     return n.text
1925
1926
1927 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1928     n = find_xpath_attr(node, xpath, key)
1929     if n is None:
1930         if default is not NO_DEFAULT:
1931             return default
1932         elif fatal:
1933             name = '%s[@%s]' % (xpath, key) if name is None else name
1934             raise ExtractorError('Could not find XML attribute %s' % name)
1935         else:
1936             return None
1937     return n.attrib[key]
1938
1939
1940 def get_element_by_id(id, html):
1941     """Return the content of the tag with the specified ID in the passed HTML document"""
1942     return get_element_by_attribute('id', id, html)
1943
1944
1945 def get_element_by_class(class_name, html):
1946     """Return the content of the first tag with the specified class in the passed HTML document"""
1947     retval = get_elements_by_class(class_name, html)
1948     return retval[0] if retval else None
1949
1950
1951 def get_element_by_attribute(attribute, value, html, escape_value=True):
1952     retval = get_elements_by_attribute(attribute, value, html, escape_value)
1953     return retval[0] if retval else None
1954
1955
1956 def get_elements_by_class(class_name, html):
1957     """Return the content of all tags with the specified class in the passed HTML document as a list"""
1958     return get_elements_by_attribute(
1959         'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1960         html, escape_value=False)
1961
1962
1963 def get_elements_by_attribute(attribute, value, html, escape_value=True):
1964     """Return the content of the tag with the specified attribute in the passed HTML document"""
1965
1966     value = re.escape(value) if escape_value else value
1967
1968     retlist = []
1969     for m in re.finditer(r'''(?xs)
1970         <([a-zA-Z0-9:._-]+)
1971          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1972          \s+%s=['"]?%s['"]?
1973          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1974         \s*>
1975         (?P<content>.*?)
1976         </\1>
1977     ''' % (re.escape(attribute), value), html):
1978         res = m.group('content')
1979
1980         if res.startswith('"') or res.startswith("'"):
1981             res = res[1:-1]
1982
1983         retlist.append(unescapeHTML(res))
1984
1985     return retlist
1986
1987
1988 class HTMLAttributeParser(compat_HTMLParser):
1989     """Trivial HTML parser to gather the attributes for a single element"""
1990
1991     def __init__(self):
1992         self.attrs = {}
1993         compat_HTMLParser.__init__(self)
1994
1995     def handle_starttag(self, tag, attrs):
1996         self.attrs = dict(attrs)
1997
1998
1999 def extract_attributes(html_element):
2000     """Given a string for an HTML element such as
2001     <el
2002          a="foo" B="bar" c="&98;az" d=boz
2003          empty= noval entity="&amp;"
2004          sq='"' dq="'"
2005     >
2006     Decode and return a dictionary of attributes.
2007     {
2008         'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2009         'empty': '', 'noval': None, 'entity': '&',
2010         'sq': '"', 'dq': '\''
2011     }.
2012     NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2013     but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2014     """
2015     parser = HTMLAttributeParser()
2016     try:
2017         parser.feed(html_element)
2018         parser.close()
2019     # Older Python may throw HTMLParseError in case of malformed HTML
2020     except compat_HTMLParseError:
2021         pass
2022     return parser.attrs
2023
2024
2025 def clean_html(html):
2026     """Clean an HTML snippet into a readable string"""
2027
2028     if html is None:  # Convenience for sanitizing descriptions etc.
2029         return html
2030
2031     # Newline vs <br />
2032     html = html.replace('\n', ' ')
2033     html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2034     html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
2035     # Strip html tags
2036     html = re.sub('<.*?>', '', html)
2037     # Replace html entities
2038     html = unescapeHTML(html)
2039     return html.strip()
2040
2041
2042 def sanitize_open(filename, open_mode):
2043     """Try to open the given filename, and slightly tweak it if this fails.
2044
2045     Attempts to open the given filename. If this fails, it tries to change
2046     the filename slightly, step by step, until it's either able to open it
2047     or it fails and raises a final exception, like the standard open()
2048     function.
2049
2050     It returns the tuple (stream, definitive_file_name).
2051     """
2052     try:
2053         if filename == '-':
2054             if sys.platform == 'win32':
2055                 import msvcrt
2056                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
2057             return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
2058         stream = open(encodeFilename(filename), open_mode)
2059         return (stream, filename)
2060     except (IOError, OSError) as err:
2061         if err.errno in (errno.EACCES,):
2062             raise
2063
2064         # In case of error, try to remove win32 forbidden chars
2065         alt_filename = sanitize_path(filename)
2066         if alt_filename == filename:
2067             raise
2068         else:
2069             # An exception here should be caught in the caller
2070             stream = open(encodeFilename(alt_filename), open_mode)
2071             return (stream, alt_filename)
2072
2073
2074 def timeconvert(timestr):
2075     """Convert RFC 2822 defined time string into system timestamp"""
2076     timestamp = None
2077     timetuple = email.utils.parsedate_tz(timestr)
2078     if timetuple is not None:
2079         timestamp = email.utils.mktime_tz(timetuple)
2080     return timestamp
2081
2082
2083 def sanitize_filename(s, restricted=False, is_id=False):
2084     """Sanitizes a string so it could be used as part of a filename.
2085     If restricted is set, use a stricter subset of allowed characters.
2086     Set is_id if this is not an arbitrary string, but an ID that should be kept
2087     if possible.
2088     """
2089     def replace_insane(char):
2090         if restricted and char in ACCENT_CHARS:
2091             return ACCENT_CHARS[char]
2092         if char == '?' or ord(char) < 32 or ord(char) == 127:
2093             return ''
2094         elif char == '"':
2095             return '' if restricted else '\''
2096         elif char == ':':
2097             return '_-' if restricted else ' -'
2098         elif char in '\\/|*<>':
2099             return '_'
2100         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
2101             return '_'
2102         if restricted and ord(char) > 127:
2103             return '_'
2104         return char
2105
2106     # Handle timestamps
2107     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
2108     result = ''.join(map(replace_insane, s))
2109     if not is_id:
2110         while '__' in result:
2111             result = result.replace('__', '_')
2112         result = result.strip('_')
2113         # Common case of "Foreign band name - English song title"
2114         if restricted and result.startswith('-_'):
2115             result = result[2:]
2116         if result.startswith('-'):
2117             result = '_' + result[len('-'):]
2118         result = result.lstrip('.')
2119         if not result:
2120             result = '_'
2121     return result
2122
2123
2124 def sanitize_path(s):
2125     """Sanitizes and normalizes path on Windows"""
2126     if sys.platform != 'win32':
2127         return s
2128     drive_or_unc, _ = os.path.splitdrive(s)
2129     if sys.version_info < (2, 7) and not drive_or_unc:
2130         drive_or_unc, _ = os.path.splitunc(s)
2131     norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2132     if drive_or_unc:
2133         norm_path.pop(0)
2134     sanitized_path = [
2135         path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
2136         for path_part in norm_path]
2137     if drive_or_unc:
2138         sanitized_path.insert(0, drive_or_unc + os.path.sep)
2139     return os.path.join(*sanitized_path)
2140
2141
2142 def sanitize_url(url):
2143     # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2144     # the number of unwanted failures due to missing protocol
2145     if url.startswith('//'):
2146         return 'http:%s' % url
2147     # Fix some common typos seen so far
2148     COMMON_TYPOS = (
2149         # https://github.com/ytdl-org/youtube-dl/issues/15649
2150         (r'^httpss://', r'https://'),
2151         # https://bx1.be/lives/direct-tv/
2152         (r'^rmtp([es]?)://', r'rtmp\1://'),
2153     )
2154     for mistake, fixup in COMMON_TYPOS:
2155         if re.match(mistake, url):
2156             return re.sub(mistake, fixup, url)
2157     return url
2158
2159
2160 def sanitized_Request(url, *args, **kwargs):
2161     return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
2162
2163
2164 def expand_path(s):
2165     """Expand shell variables and ~"""
2166     return os.path.expandvars(compat_expanduser(s))
2167
2168
2169 def orderedSet(iterable):
2170     """ Remove all duplicates from the input iterable """
2171     res = []
2172     for el in iterable:
2173         if el not in res:
2174             res.append(el)
2175     return res
2176
2177
2178 def _htmlentity_transform(entity_with_semicolon):
2179     """Transforms an HTML entity to a character."""
2180     entity = entity_with_semicolon[:-1]
2181
2182     # Known non-numeric HTML entity
2183     if entity in compat_html_entities.name2codepoint:
2184         return compat_chr(compat_html_entities.name2codepoint[entity])
2185
2186     # TODO: HTML5 allows entities without a semicolon. For example,
2187     # '&Eacuteric' should be decoded as 'Éric'.
2188     if entity_with_semicolon in compat_html_entities_html5:
2189         return compat_html_entities_html5[entity_with_semicolon]
2190
2191     mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
2192     if mobj is not None:
2193         numstr = mobj.group(1)
2194         if numstr.startswith('x'):
2195             base = 16
2196             numstr = '0%s' % numstr
2197         else:
2198             base = 10
2199         # See https://github.com/ytdl-org/youtube-dl/issues/7518
2200         try:
2201             return compat_chr(int(numstr, base))
2202         except ValueError:
2203             pass
2204
2205     # Unknown entity in name, return its literal representation
2206     return '&%s;' % entity
2207
2208
2209 def unescapeHTML(s):
2210     if s is None:
2211         return None
2212     assert type(s) == compat_str
2213
2214     return re.sub(
2215         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
2216
2217
2218 def get_subprocess_encoding():
2219     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2220         # For subprocess calls, encode with locale encoding
2221         # Refer to http://stackoverflow.com/a/9951851/35070
2222         encoding = preferredencoding()
2223     else:
2224         encoding = sys.getfilesystemencoding()
2225     if encoding is None:
2226         encoding = 'utf-8'
2227     return encoding
2228
2229
2230 def encodeFilename(s, for_subprocess=False):
2231     """
2232     @param s The name of the file
2233     """
2234
2235     assert type(s) == compat_str
2236
2237     # Python 3 has a Unicode API
2238     if sys.version_info >= (3, 0):
2239         return s
2240
2241     # Pass '' directly to use Unicode APIs on Windows 2000 and up
2242     # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2243     # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2244     if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2245         return s
2246
2247     # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2248     if sys.platform.startswith('java'):
2249         return s
2250
2251     return s.encode(get_subprocess_encoding(), 'ignore')
2252
2253
2254 def decodeFilename(b, for_subprocess=False):
2255
2256     if sys.version_info >= (3, 0):
2257         return b
2258
2259     if not isinstance(b, bytes):
2260         return b
2261
2262     return b.decode(get_subprocess_encoding(), 'ignore')
2263
2264
2265 def encodeArgument(s):
2266     if not isinstance(s, compat_str):
2267         # Legacy code that uses byte strings
2268         # Uncomment the following line after fixing all post processors
2269         # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
2270         s = s.decode('ascii')
2271     return encodeFilename(s, True)
2272
2273
2274 def decodeArgument(b):
2275     return decodeFilename(b, True)
2276
2277
2278 def decodeOption(optval):
2279     if optval is None:
2280         return optval
2281     if isinstance(optval, bytes):
2282         optval = optval.decode(preferredencoding())
2283
2284     assert isinstance(optval, compat_str)
2285     return optval
2286
2287
2288 def formatSeconds(secs, delim=':'):
2289     if secs > 3600:
2290         return '%d%s%02d%s%02d' % (secs // 3600, delim, (secs % 3600) // 60, delim, secs % 60)
2291     elif secs > 60:
2292         return '%d%s%02d' % (secs // 60, delim, secs % 60)
2293     else:
2294         return '%d' % secs
2295
2296
2297 def make_HTTPS_handler(params, **kwargs):
2298     opts_no_check_certificate = params.get('nocheckcertificate', False)
2299     if hasattr(ssl, 'create_default_context'):  # Python >= 3.4 or 2.7.9
2300         context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
2301         if opts_no_check_certificate:
2302             context.check_hostname = False
2303             context.verify_mode = ssl.CERT_NONE
2304         try:
2305             return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2306         except TypeError:
2307             # Python 2.7.8
2308             # (create_default_context present but HTTPSHandler has no context=)
2309             pass
2310
2311     if sys.version_info < (3, 2):
2312         return YoutubeDLHTTPSHandler(params, **kwargs)
2313     else:  # Python < 3.4
2314         context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
2315         context.verify_mode = (ssl.CERT_NONE
2316                                if opts_no_check_certificate
2317                                else ssl.CERT_REQUIRED)
2318         context.set_default_verify_paths()
2319         return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2320
2321
2322 def bug_reports_message():
2323     if ytdl_is_updateable():
2324         update_cmd = 'type  youtube-dlc -U  to update'
2325     else:
2326         update_cmd = 'see  https://github.com/pukkandan/yt-dlc  on how to update'
2327     msg = '; please report this issue on https://github.com/pukkandan/yt-dlc .'
2328     msg += ' Make sure you are using the latest version; %s.' % update_cmd
2329     msg += ' Be sure to call youtube-dlc with the --verbose flag and include its complete output.'
2330     return msg
2331
2332
2333 class YoutubeDLError(Exception):
2334     """Base exception for YoutubeDL errors."""
2335     pass
2336
2337
2338 class ExtractorError(YoutubeDLError):
2339     """Error during info extraction."""
2340
2341     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
2342         """ tb, if given, is the original traceback (so that it can be printed out).
2343         If expected is set, this is a normal error message and most likely not a bug in youtube-dlc.
2344         """
2345
2346         if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
2347             expected = True
2348         if video_id is not None:
2349             msg = video_id + ': ' + msg
2350         if cause:
2351             msg += ' (caused by %r)' % cause
2352         if not expected:
2353             msg += bug_reports_message()
2354         super(ExtractorError, self).__init__(msg)
2355
2356         self.traceback = tb
2357         self.exc_info = sys.exc_info()  # preserve original exception
2358         self.cause = cause
2359         self.video_id = video_id
2360
2361     def format_traceback(self):
2362         if self.traceback is None:
2363             return None
2364         return ''.join(traceback.format_tb(self.traceback))
2365
2366
2367 class UnsupportedError(ExtractorError):
2368     def __init__(self, url):
2369         super(UnsupportedError, self).__init__(
2370             'Unsupported URL: %s' % url, expected=True)
2371         self.url = url
2372
2373
2374 class RegexNotFoundError(ExtractorError):
2375     """Error when a regex didn't match"""
2376     pass
2377
2378
2379 class GeoRestrictedError(ExtractorError):
2380     """Geographic restriction Error exception.
2381
2382     This exception may be thrown when a video is not available from your
2383     geographic location due to geographic restrictions imposed by a website.
2384     """
2385
2386     def __init__(self, msg, countries=None):
2387         super(GeoRestrictedError, self).__init__(msg, expected=True)
2388         self.msg = msg
2389         self.countries = countries
2390
2391
2392 class DownloadError(YoutubeDLError):
2393     """Download Error exception.
2394
2395     This exception may be thrown by FileDownloader objects if they are not
2396     configured to continue on errors. They will contain the appropriate
2397     error message.
2398     """
2399
2400     def __init__(self, msg, exc_info=None):
2401         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2402         super(DownloadError, self).__init__(msg)
2403         self.exc_info = exc_info
2404
2405
2406 class SameFileError(YoutubeDLError):
2407     """Same File exception.
2408
2409     This exception will be thrown by FileDownloader objects if they detect
2410     multiple files would have to be downloaded to the same file on disk.
2411     """
2412     pass
2413
2414
2415 class PostProcessingError(YoutubeDLError):
2416     """Post Processing exception.
2417
2418     This exception may be raised by PostProcessor's .run() method to
2419     indicate an error in the postprocessing task.
2420     """
2421
2422     def __init__(self, msg):
2423         super(PostProcessingError, self).__init__(msg)
2424         self.msg = msg
2425
2426
2427 class MaxDownloadsReached(YoutubeDLError):
2428     """ --max-downloads limit has been reached. """
2429     pass
2430
2431
2432 class UnavailableVideoError(YoutubeDLError):
2433     """Unavailable Format exception.
2434
2435     This exception will be thrown when a video is requested
2436     in a format that is not available for that video.
2437     """
2438     pass
2439
2440
2441 class ContentTooShortError(YoutubeDLError):
2442     """Content Too Short exception.
2443
2444     This exception may be raised by FileDownloader objects when a file they
2445     download is too small for what the server announced first, indicating
2446     the connection was probably interrupted.
2447     """
2448
2449     def __init__(self, downloaded, expected):
2450         super(ContentTooShortError, self).__init__(
2451             'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2452         )
2453         # Both in bytes
2454         self.downloaded = downloaded
2455         self.expected = expected
2456
2457
2458 class XAttrMetadataError(YoutubeDLError):
2459     def __init__(self, code=None, msg='Unknown error'):
2460         super(XAttrMetadataError, self).__init__(msg)
2461         self.code = code
2462         self.msg = msg
2463
2464         # Parsing code and msg
2465         if (self.code in (errno.ENOSPC, errno.EDQUOT)
2466                 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
2467             self.reason = 'NO_SPACE'
2468         elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2469             self.reason = 'VALUE_TOO_LONG'
2470         else:
2471             self.reason = 'NOT_SUPPORTED'
2472
2473
2474 class XAttrUnavailableError(YoutubeDLError):
2475     pass
2476
2477
2478 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
2479     # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2480     # expected HTTP responses to meet HTTP/1.0 or later (see also
2481     # https://github.com/ytdl-org/youtube-dl/issues/6727)
2482     if sys.version_info < (3, 0):
2483         kwargs['strict'] = True
2484     hc = http_class(*args, **compat_kwargs(kwargs))
2485     source_address = ydl_handler._params.get('source_address')
2486
2487     if source_address is not None:
2488         # This is to workaround _create_connection() from socket where it will try all
2489         # address data from getaddrinfo() including IPv6. This filters the result from
2490         # getaddrinfo() based on the source_address value.
2491         # This is based on the cpython socket.create_connection() function.
2492         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2493         def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2494             host, port = address
2495             err = None
2496             addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
2497             af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2498             ip_addrs = [addr for addr in addrs if addr[0] == af]
2499             if addrs and not ip_addrs:
2500                 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2501                 raise socket.error(
2502                     "No remote IP%s addresses available for connect, can't use '%s' as source address"
2503                     % (ip_version, source_address[0]))
2504             for res in ip_addrs:
2505                 af, socktype, proto, canonname, sa = res
2506                 sock = None
2507                 try:
2508                     sock = socket.socket(af, socktype, proto)
2509                     if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2510                         sock.settimeout(timeout)
2511                     sock.bind(source_address)
2512                     sock.connect(sa)
2513                     err = None  # Explicitly break reference cycle
2514                     return sock
2515                 except socket.error as _:
2516                     err = _
2517                     if sock is not None:
2518                         sock.close()
2519             if err is not None:
2520                 raise err
2521             else:
2522                 raise socket.error('getaddrinfo returns an empty list')
2523         if hasattr(hc, '_create_connection'):
2524             hc._create_connection = _create_connection
2525         sa = (source_address, 0)
2526         if hasattr(hc, 'source_address'):  # Python 2.7+
2527             hc.source_address = sa
2528         else:  # Python 2.6
2529             def _hc_connect(self, *args, **kwargs):
2530                 sock = _create_connection(
2531                     (self.host, self.port), self.timeout, sa)
2532                 if is_https:
2533                     self.sock = ssl.wrap_socket(
2534                         sock, self.key_file, self.cert_file,
2535                         ssl_version=ssl.PROTOCOL_TLSv1)
2536                 else:
2537                     self.sock = sock
2538             hc.connect = functools.partial(_hc_connect, hc)
2539
2540     return hc
2541
2542
2543 def handle_youtubedl_headers(headers):
2544     filtered_headers = headers
2545
2546     if 'Youtubedl-no-compression' in filtered_headers:
2547         filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
2548         del filtered_headers['Youtubedl-no-compression']
2549
2550     return filtered_headers
2551
2552
2553 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
2554     """Handler for HTTP requests and responses.
2555
2556     This class, when installed with an OpenerDirector, automatically adds
2557     the standard headers to every HTTP request and handles gzipped and
2558     deflated responses from web servers. If compression is to be avoided in
2559     a particular request, the original request in the program code only has
2560     to include the HTTP header "Youtubedl-no-compression", which will be
2561     removed before making the real request.
2562
2563     Part of this code was copied from:
2564
2565     http://techknack.net/python-urllib2-handlers/
2566
2567     Andrew Rowls, the author of that code, agreed to release it to the
2568     public domain.
2569     """
2570
2571     def __init__(self, params, *args, **kwargs):
2572         compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2573         self._params = params
2574
2575     def http_open(self, req):
2576         conn_class = compat_http_client.HTTPConnection
2577
2578         socks_proxy = req.headers.get('Ytdl-socks-proxy')
2579         if socks_proxy:
2580             conn_class = make_socks_conn_class(conn_class, socks_proxy)
2581             del req.headers['Ytdl-socks-proxy']
2582
2583         return self.do_open(functools.partial(
2584             _create_http_connection, self, conn_class, False),
2585             req)
2586
2587     @staticmethod
2588     def deflate(data):
2589         try:
2590             return zlib.decompress(data, -zlib.MAX_WBITS)
2591         except zlib.error:
2592             return zlib.decompress(data)
2593
2594     def http_request(self, req):
2595         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2596         # always respected by websites, some tend to give out URLs with non percent-encoded
2597         # non-ASCII characters (see telemb.py, ard.py [#3412])
2598         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2599         # To work around aforementioned issue we will replace request's original URL with
2600         # percent-encoded one
2601         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2602         # the code of this workaround has been moved here from YoutubeDL.urlopen()
2603         url = req.get_full_url()
2604         url_escaped = escape_url(url)
2605
2606         # Substitute URL if any change after escaping
2607         if url != url_escaped:
2608             req = update_Request(req, url=url_escaped)
2609
2610         for h, v in std_headers.items():
2611             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2612             # The dict keys are capitalized because of this bug by urllib
2613             if h.capitalize() not in req.headers:
2614                 req.add_header(h, v)
2615
2616         req.headers = handle_youtubedl_headers(req.headers)
2617
2618         if sys.version_info < (2, 7) and '#' in req.get_full_url():
2619             # Python 2.6 is brain-dead when it comes to fragments
2620             req._Request__original = req._Request__original.partition('#')[0]
2621             req._Request__r_type = req._Request__r_type.partition('#')[0]
2622
2623         return req
2624
2625     def http_response(self, req, resp):
2626         old_resp = resp
2627         # gzip
2628         if resp.headers.get('Content-encoding', '') == 'gzip':
2629             content = resp.read()
2630             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2631             try:
2632                 uncompressed = io.BytesIO(gz.read())
2633             except IOError as original_ioerror:
2634                 # There may be junk add the end of the file
2635                 # See http://stackoverflow.com/q/4928560/35070 for details
2636                 for i in range(1, 1024):
2637                     try:
2638                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2639                         uncompressed = io.BytesIO(gz.read())
2640                     except IOError:
2641                         continue
2642                     break
2643                 else:
2644                     raise original_ioerror
2645             resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
2646             resp.msg = old_resp.msg
2647             del resp.headers['Content-encoding']
2648         # deflate
2649         if resp.headers.get('Content-encoding', '') == 'deflate':
2650             gz = io.BytesIO(self.deflate(resp.read()))
2651             resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
2652             resp.msg = old_resp.msg
2653             del resp.headers['Content-encoding']
2654         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
2655         # https://github.com/ytdl-org/youtube-dl/issues/6457).
2656         if 300 <= resp.code < 400:
2657             location = resp.headers.get('Location')
2658             if location:
2659                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2660                 if sys.version_info >= (3, 0):
2661                     location = location.encode('iso-8859-1').decode('utf-8')
2662                 else:
2663                     location = location.decode('utf-8')
2664                 location_escaped = escape_url(location)
2665                 if location != location_escaped:
2666                     del resp.headers['Location']
2667                     if sys.version_info < (3, 0):
2668                         location_escaped = location_escaped.encode('utf-8')
2669                     resp.headers['Location'] = location_escaped
2670         return resp
2671
2672     https_request = http_request
2673     https_response = http_response
2674
2675
2676 def make_socks_conn_class(base_class, socks_proxy):
2677     assert issubclass(base_class, (
2678         compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2679
2680     url_components = compat_urlparse.urlparse(socks_proxy)
2681     if url_components.scheme.lower() == 'socks5':
2682         socks_type = ProxyType.SOCKS5
2683     elif url_components.scheme.lower() in ('socks', 'socks4'):
2684         socks_type = ProxyType.SOCKS4
2685     elif url_components.scheme.lower() == 'socks4a':
2686         socks_type = ProxyType.SOCKS4A
2687
2688     def unquote_if_non_empty(s):
2689         if not s:
2690             return s
2691         return compat_urllib_parse_unquote_plus(s)
2692
2693     proxy_args = (
2694         socks_type,
2695         url_components.hostname, url_components.port or 1080,
2696         True,  # Remote DNS
2697         unquote_if_non_empty(url_components.username),
2698         unquote_if_non_empty(url_components.password),
2699     )
2700
2701     class SocksConnection(base_class):
2702         def connect(self):
2703             self.sock = sockssocket()
2704             self.sock.setproxy(*proxy_args)
2705             if type(self.timeout) in (int, float):
2706                 self.sock.settimeout(self.timeout)
2707             self.sock.connect((self.host, self.port))
2708
2709             if isinstance(self, compat_http_client.HTTPSConnection):
2710                 if hasattr(self, '_context'):  # Python > 2.6
2711                     self.sock = self._context.wrap_socket(
2712                         self.sock, server_hostname=self.host)
2713                 else:
2714                     self.sock = ssl.wrap_socket(self.sock)
2715
2716     return SocksConnection
2717
2718
2719 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2720     def __init__(self, params, https_conn_class=None, *args, **kwargs):
2721         compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2722         self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2723         self._params = params
2724
2725     def https_open(self, req):
2726         kwargs = {}
2727         conn_class = self._https_conn_class
2728
2729         if hasattr(self, '_context'):  # python > 2.6
2730             kwargs['context'] = self._context
2731         if hasattr(self, '_check_hostname'):  # python 3.x
2732             kwargs['check_hostname'] = self._check_hostname
2733
2734         socks_proxy = req.headers.get('Ytdl-socks-proxy')
2735         if socks_proxy:
2736             conn_class = make_socks_conn_class(conn_class, socks_proxy)
2737             del req.headers['Ytdl-socks-proxy']
2738
2739         return self.do_open(functools.partial(
2740             _create_http_connection, self, conn_class, True),
2741             req, **kwargs)
2742
2743
2744 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
2745     """
2746     See [1] for cookie file format.
2747
2748     1. https://curl.haxx.se/docs/http-cookies.html
2749     """
2750     _HTTPONLY_PREFIX = '#HttpOnly_'
2751     _ENTRY_LEN = 7
2752     _HEADER = '''# Netscape HTTP Cookie File
2753 # This file is generated by youtube-dlc.  Do not edit.
2754
2755 '''
2756     _CookieFileEntry = collections.namedtuple(
2757         'CookieFileEntry',
2758         ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
2759
2760     def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2761         """
2762         Save cookies to a file.
2763
2764         Most of the code is taken from CPython 3.8 and slightly adapted
2765         to support cookie files with UTF-8 in both python 2 and 3.
2766         """
2767         if filename is None:
2768             if self.filename is not None:
2769                 filename = self.filename
2770             else:
2771                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2772
2773         # Store session cookies with `expires` set to 0 instead of an empty
2774         # string
2775         for cookie in self:
2776             if cookie.expires is None:
2777                 cookie.expires = 0
2778
2779         with io.open(filename, 'w', encoding='utf-8') as f:
2780             f.write(self._HEADER)
2781             now = time.time()
2782             for cookie in self:
2783                 if not ignore_discard and cookie.discard:
2784                     continue
2785                 if not ignore_expires and cookie.is_expired(now):
2786                     continue
2787                 if cookie.secure:
2788                     secure = 'TRUE'
2789                 else:
2790                     secure = 'FALSE'
2791                 if cookie.domain.startswith('.'):
2792                     initial_dot = 'TRUE'
2793                 else:
2794                     initial_dot = 'FALSE'
2795                 if cookie.expires is not None:
2796                     expires = compat_str(cookie.expires)
2797                 else:
2798                     expires = ''
2799                 if cookie.value is None:
2800                     # cookies.txt regards 'Set-Cookie: foo' as a cookie
2801                     # with no name, whereas http.cookiejar regards it as a
2802                     # cookie with no value.
2803                     name = ''
2804                     value = cookie.name
2805                 else:
2806                     name = cookie.name
2807                     value = cookie.value
2808                 f.write(
2809                     '\t'.join([cookie.domain, initial_dot, cookie.path,
2810                                secure, expires, name, value]) + '\n')
2811
2812     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
2813         """Load cookies from a file."""
2814         if filename is None:
2815             if self.filename is not None:
2816                 filename = self.filename
2817             else:
2818                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2819
2820         def prepare_line(line):
2821             if line.startswith(self._HTTPONLY_PREFIX):
2822                 line = line[len(self._HTTPONLY_PREFIX):]
2823             # comments and empty lines are fine
2824             if line.startswith('#') or not line.strip():
2825                 return line
2826             cookie_list = line.split('\t')
2827             if len(cookie_list) != self._ENTRY_LEN:
2828                 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2829             cookie = self._CookieFileEntry(*cookie_list)
2830             if cookie.expires_at and not cookie.expires_at.isdigit():
2831                 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2832             return line
2833
2834         cf = io.StringIO()
2835         with io.open(filename, encoding='utf-8') as f:
2836             for line in f:
2837                 try:
2838                     cf.write(prepare_line(line))
2839                 except compat_cookiejar.LoadError as e:
2840                     write_string(
2841                         'WARNING: skipping cookie file entry due to %s: %r\n'
2842                         % (e, line), sys.stderr)
2843                     continue
2844         cf.seek(0)
2845         self._really_load(cf, filename, ignore_discard, ignore_expires)
2846         # Session cookies are denoted by either `expires` field set to
2847         # an empty string or 0. MozillaCookieJar only recognizes the former
2848         # (see [1]). So we need force the latter to be recognized as session
2849         # cookies on our own.
2850         # Session cookies may be important for cookies-based authentication,
2851         # e.g. usually, when user does not check 'Remember me' check box while
2852         # logging in on a site, some important cookies are stored as session
2853         # cookies so that not recognizing them will result in failed login.
2854         # 1. https://bugs.python.org/issue17164
2855         for cookie in self:
2856             # Treat `expires=0` cookies as session cookies
2857             if cookie.expires == 0:
2858                 cookie.expires = None
2859                 cookie.discard = True
2860
2861
2862 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2863     def __init__(self, cookiejar=None):
2864         compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2865
2866     def http_response(self, request, response):
2867         # Python 2 will choke on next HTTP request in row if there are non-ASCII
2868         # characters in Set-Cookie HTTP header of last response (see
2869         # https://github.com/ytdl-org/youtube-dl/issues/6769).
2870         # In order to at least prevent crashing we will percent encode Set-Cookie
2871         # header before HTTPCookieProcessor starts processing it.
2872         # if sys.version_info < (3, 0) and response.headers:
2873         #     for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2874         #         set_cookie = response.headers.get(set_cookie_header)
2875         #         if set_cookie:
2876         #             set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2877         #             if set_cookie != set_cookie_escaped:
2878         #                 del response.headers[set_cookie_header]
2879         #                 response.headers[set_cookie_header] = set_cookie_escaped
2880         return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2881
2882     https_request = compat_urllib_request.HTTPCookieProcessor.http_request
2883     https_response = http_response
2884
2885
2886 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
2887     if sys.version_info[0] < 3:
2888         def redirect_request(self, req, fp, code, msg, headers, newurl):
2889             # On python 2 urlh.geturl() may sometimes return redirect URL
2890             # as byte string instead of unicode. This workaround allows
2891             # to force it always return unicode.
2892             return compat_urllib_request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, compat_str(newurl))
2893
2894
2895 def extract_timezone(date_str):
2896     m = re.search(
2897         r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
2898         date_str)
2899     if not m:
2900         timezone = datetime.timedelta()
2901     else:
2902         date_str = date_str[:-len(m.group('tz'))]
2903         if not m.group('sign'):
2904             timezone = datetime.timedelta()
2905         else:
2906             sign = 1 if m.group('sign') == '+' else -1
2907             timezone = datetime.timedelta(
2908                 hours=sign * int(m.group('hours')),
2909                 minutes=sign * int(m.group('minutes')))
2910     return timezone, date_str
2911
2912
2913 def parse_iso8601(date_str, delimiter='T', timezone=None):
2914     """ Return a UNIX timestamp from the given date """
2915
2916     if date_str is None:
2917         return None
2918
2919     date_str = re.sub(r'\.[0-9]+', '', date_str)
2920
2921     if timezone is None:
2922         timezone, date_str = extract_timezone(date_str)
2923
2924     try:
2925         date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
2926         dt = datetime.datetime.strptime(date_str, date_format) - timezone
2927         return calendar.timegm(dt.timetuple())
2928     except ValueError:
2929         pass
2930
2931
2932 def date_formats(day_first=True):
2933     return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
2934
2935
2936 def unified_strdate(date_str, day_first=True):
2937     """Return a string with the date in the format YYYYMMDD"""
2938
2939     if date_str is None:
2940         return None
2941     upload_date = None
2942     # Replace commas
2943     date_str = date_str.replace(',', ' ')
2944     # Remove AM/PM + timezone
2945     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
2946     _, date_str = extract_timezone(date_str)
2947
2948     for expression in date_formats(day_first):
2949         try:
2950             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
2951         except ValueError:
2952             pass
2953     if upload_date is None:
2954         timetuple = email.utils.parsedate_tz(date_str)
2955         if timetuple:
2956             try:
2957                 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
2958             except ValueError:
2959                 pass
2960     if upload_date is not None:
2961         return compat_str(upload_date)
2962
2963
2964 def unified_timestamp(date_str, day_first=True):
2965     if date_str is None:
2966         return None
2967
2968     date_str = re.sub(r'[,|]', '', date_str)
2969
2970     pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
2971     timezone, date_str = extract_timezone(date_str)
2972
2973     # Remove AM/PM + timezone
2974     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
2975
2976     # Remove unrecognized timezones from ISO 8601 alike timestamps
2977     m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
2978     if m:
2979         date_str = date_str[:-len(m.group('tz'))]
2980
2981     # Python only supports microseconds, so remove nanoseconds
2982     m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
2983     if m:
2984         date_str = m.group(1)
2985
2986     for expression in date_formats(day_first):
2987         try:
2988             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
2989             return calendar.timegm(dt.timetuple())
2990         except ValueError:
2991             pass
2992     timetuple = email.utils.parsedate_tz(date_str)
2993     if timetuple:
2994         return calendar.timegm(timetuple) + pm_delta * 3600
2995
2996
2997 def determine_ext(url, default_ext='unknown_video'):
2998     if url is None or '.' not in url:
2999         return default_ext
3000     guess = url.partition('?')[0].rpartition('.')[2]
3001     if re.match(r'^[A-Za-z0-9]+$', guess):
3002         return guess
3003     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3004     elif guess.rstrip('/') in KNOWN_EXTENSIONS:
3005         return guess.rstrip('/')
3006     else:
3007         return default_ext
3008
3009
3010 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3011     return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
3012
3013
3014 def date_from_str(date_str):
3015     """
3016     Return a datetime object from a string in the format YYYYMMDD or
3017     (now|today)[+-][0-9](day|week|month|year)(s)?"""
3018     today = datetime.date.today()
3019     if date_str in ('now', 'today'):
3020         return today
3021     if date_str == 'yesterday':
3022         return today - datetime.timedelta(days=1)
3023     match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
3024     if match is not None:
3025         sign = match.group('sign')
3026         time = int(match.group('time'))
3027         if sign == '-':
3028             time = -time
3029         unit = match.group('unit')
3030         # A bad approximation?
3031         if unit == 'month':
3032             unit = 'day'
3033             time *= 30
3034         elif unit == 'year':
3035             unit = 'day'
3036             time *= 365
3037         unit += 's'
3038         delta = datetime.timedelta(**{unit: time})
3039         return today + delta
3040     return datetime.datetime.strptime(date_str, '%Y%m%d').date()
3041
3042
3043 def hyphenate_date(date_str):
3044     """
3045     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3046     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3047     if match is not None:
3048         return '-'.join(match.groups())
3049     else:
3050         return date_str
3051
3052
3053 class DateRange(object):
3054     """Represents a time interval between two dates"""
3055
3056     def __init__(self, start=None, end=None):
3057         """start and end must be strings in the format accepted by date"""
3058         if start is not None:
3059             self.start = date_from_str(start)
3060         else:
3061             self.start = datetime.datetime.min.date()
3062         if end is not None:
3063             self.end = date_from_str(end)
3064         else:
3065             self.end = datetime.datetime.max.date()
3066         if self.start > self.end:
3067             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
3068
3069     @classmethod
3070     def day(cls, day):
3071         """Returns a range that only contains the given day"""
3072         return cls(day, day)
3073
3074     def __contains__(self, date):
3075         """Check if the date is in the range"""
3076         if not isinstance(date, datetime.date):
3077             date = date_from_str(date)
3078         return self.start <= date <= self.end
3079
3080     def __str__(self):
3081         return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
3082
3083
3084 def platform_name():
3085     """ Returns the platform name as a compat_str """
3086     res = platform.platform()
3087     if isinstance(res, bytes):
3088         res = res.decode(preferredencoding())
3089
3090     assert isinstance(res, compat_str)
3091     return res
3092
3093
3094 def _windows_write_string(s, out):
3095     """ Returns True if the string was written using special methods,
3096     False if it has yet to be written out."""
3097     # Adapted from http://stackoverflow.com/a/3259271/35070
3098
3099     import ctypes
3100     import ctypes.wintypes
3101
3102     WIN_OUTPUT_IDS = {
3103         1: -11,
3104         2: -12,
3105     }
3106
3107     try:
3108         fileno = out.fileno()
3109     except AttributeError:
3110         # If the output stream doesn't have a fileno, it's virtual
3111         return False
3112     except io.UnsupportedOperation:
3113         # Some strange Windows pseudo files?
3114         return False
3115     if fileno not in WIN_OUTPUT_IDS:
3116         return False
3117
3118     GetStdHandle = compat_ctypes_WINFUNCTYPE(
3119         ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
3120         ('GetStdHandle', ctypes.windll.kernel32))
3121     h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3122
3123     WriteConsoleW = compat_ctypes_WINFUNCTYPE(
3124         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3125         ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
3126         ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
3127     written = ctypes.wintypes.DWORD(0)
3128
3129     GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
3130     FILE_TYPE_CHAR = 0x0002
3131     FILE_TYPE_REMOTE = 0x8000
3132     GetConsoleMode = compat_ctypes_WINFUNCTYPE(
3133         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3134         ctypes.POINTER(ctypes.wintypes.DWORD))(
3135         ('GetConsoleMode', ctypes.windll.kernel32))
3136     INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3137
3138     def not_a_console(handle):
3139         if handle == INVALID_HANDLE_VALUE or handle is None:
3140             return True
3141         return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3142                 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
3143
3144     if not_a_console(h):
3145         return False
3146
3147     def next_nonbmp_pos(s):
3148         try:
3149             return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3150         except StopIteration:
3151             return len(s)
3152
3153     while s:
3154         count = min(next_nonbmp_pos(s), 1024)
3155
3156         ret = WriteConsoleW(
3157             h, s, count if count else 2, ctypes.byref(written), None)
3158         if ret == 0:
3159             raise OSError('Failed to write string')
3160         if not count:  # We just wrote a non-BMP character
3161             assert written.value == 2
3162             s = s[1:]
3163         else:
3164             assert written.value > 0
3165             s = s[written.value:]
3166     return True
3167
3168
3169 def write_string(s, out=None, encoding=None):
3170     if out is None:
3171         out = sys.stderr
3172     assert type(s) == compat_str
3173
3174     if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3175         if _windows_write_string(s, out):
3176             return
3177
3178     if ('b' in getattr(out, 'mode', '')
3179             or sys.version_info[0] < 3):  # Python 2 lies about mode of sys.stderr
3180         byt = s.encode(encoding or preferredencoding(), 'ignore')
3181         out.write(byt)
3182     elif hasattr(out, 'buffer'):
3183         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3184         byt = s.encode(enc, 'ignore')
3185         out.buffer.write(byt)
3186     else:
3187         out.write(s)
3188     out.flush()
3189
3190
3191 def bytes_to_intlist(bs):
3192     if not bs:
3193         return []
3194     if isinstance(bs[0], int):  # Python 3
3195         return list(bs)
3196     else:
3197         return [ord(c) for c in bs]
3198
3199
3200 def intlist_to_bytes(xs):
3201     if not xs:
3202         return b''
3203     return compat_struct_pack('%dB' % len(xs), *xs)
3204
3205
3206 # Cross-platform file locking
3207 if sys.platform == 'win32':
3208     import ctypes.wintypes
3209     import msvcrt
3210
3211     class OVERLAPPED(ctypes.Structure):
3212         _fields_ = [
3213             ('Internal', ctypes.wintypes.LPVOID),
3214             ('InternalHigh', ctypes.wintypes.LPVOID),
3215             ('Offset', ctypes.wintypes.DWORD),
3216             ('OffsetHigh', ctypes.wintypes.DWORD),
3217             ('hEvent', ctypes.wintypes.HANDLE),
3218         ]
3219
3220     kernel32 = ctypes.windll.kernel32
3221     LockFileEx = kernel32.LockFileEx
3222     LockFileEx.argtypes = [
3223         ctypes.wintypes.HANDLE,     # hFile
3224         ctypes.wintypes.DWORD,      # dwFlags
3225         ctypes.wintypes.DWORD,      # dwReserved
3226         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
3227         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
3228         ctypes.POINTER(OVERLAPPED)  # Overlapped
3229     ]
3230     LockFileEx.restype = ctypes.wintypes.BOOL
3231     UnlockFileEx = kernel32.UnlockFileEx
3232     UnlockFileEx.argtypes = [
3233         ctypes.wintypes.HANDLE,     # hFile
3234         ctypes.wintypes.DWORD,      # dwReserved
3235         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
3236         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
3237         ctypes.POINTER(OVERLAPPED)  # Overlapped
3238     ]
3239     UnlockFileEx.restype = ctypes.wintypes.BOOL
3240     whole_low = 0xffffffff
3241     whole_high = 0x7fffffff
3242
3243     def _lock_file(f, exclusive):
3244         overlapped = OVERLAPPED()
3245         overlapped.Offset = 0
3246         overlapped.OffsetHigh = 0
3247         overlapped.hEvent = 0
3248         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3249         handle = msvcrt.get_osfhandle(f.fileno())
3250         if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3251                           whole_low, whole_high, f._lock_file_overlapped_p):
3252             raise OSError('Locking file failed: %r' % ctypes.FormatError())
3253
3254     def _unlock_file(f):
3255         assert f._lock_file_overlapped_p
3256         handle = msvcrt.get_osfhandle(f.fileno())
3257         if not UnlockFileEx(handle, 0,
3258                             whole_low, whole_high, f._lock_file_overlapped_p):
3259             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3260
3261 else:
3262     # Some platforms, such as Jython, is missing fcntl
3263     try:
3264         import fcntl
3265
3266         def _lock_file(f, exclusive):
3267             fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
3268
3269         def _unlock_file(f):
3270             fcntl.flock(f, fcntl.LOCK_UN)
3271     except ImportError:
3272         UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3273
3274         def _lock_file(f, exclusive):
3275             raise IOError(UNSUPPORTED_MSG)
3276
3277         def _unlock_file(f):
3278             raise IOError(UNSUPPORTED_MSG)
3279
3280
3281 class locked_file(object):
3282     def __init__(self, filename, mode, encoding=None):
3283         assert mode in ['r', 'a', 'w']
3284         self.f = io.open(filename, mode, encoding=encoding)
3285         self.mode = mode
3286
3287     def __enter__(self):
3288         exclusive = self.mode != 'r'
3289         try:
3290             _lock_file(self.f, exclusive)
3291         except IOError:
3292             self.f.close()
3293             raise
3294         return self
3295
3296     def __exit__(self, etype, value, traceback):
3297         try:
3298             _unlock_file(self.f)
3299         finally:
3300             self.f.close()
3301
3302     def __iter__(self):
3303         return iter(self.f)
3304
3305     def write(self, *args):
3306         return self.f.write(*args)
3307
3308     def read(self, *args):
3309         return self.f.read(*args)
3310
3311
3312 def get_filesystem_encoding():
3313     encoding = sys.getfilesystemencoding()
3314     return encoding if encoding is not None else 'utf-8'
3315
3316
3317 def shell_quote(args):
3318     quoted_args = []
3319     encoding = get_filesystem_encoding()
3320     for a in args:
3321         if isinstance(a, bytes):
3322             # We may get a filename encoded with 'encodeFilename'
3323             a = a.decode(encoding)
3324         quoted_args.append(compat_shlex_quote(a))
3325     return ' '.join(quoted_args)
3326
3327
3328 def smuggle_url(url, data):
3329     """ Pass additional data in a URL for internal use. """
3330
3331     url, idata = unsmuggle_url(url, {})
3332     data.update(idata)
3333     sdata = compat_urllib_parse_urlencode(
3334         {'__youtubedl_smuggle': json.dumps(data)})
3335     return url + '#' + sdata
3336
3337
3338 def unsmuggle_url(smug_url, default=None):
3339     if '#__youtubedl_smuggle' not in smug_url:
3340         return smug_url, default
3341     url, _, sdata = smug_url.rpartition('#')
3342     jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
3343     data = json.loads(jsond)
3344     return url, data
3345
3346
3347 def format_bytes(bytes):
3348     if bytes is None:
3349         return 'N/A'
3350     if type(bytes) is str:
3351         bytes = float(bytes)
3352     if bytes == 0.0:
3353         exponent = 0
3354     else:
3355         exponent = int(math.log(bytes, 1024.0))
3356     suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
3357     converted = float(bytes) / float(1024 ** exponent)
3358     return '%.2f%s' % (converted, suffix)
3359
3360
3361 def lookup_unit_table(unit_table, s):
3362     units_re = '|'.join(re.escape(u) for u in unit_table)
3363     m = re.match(
3364         r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
3365     if not m:
3366         return None
3367     num_str = m.group('num').replace(',', '.')
3368     mult = unit_table[m.group('unit')]
3369     return int(float(num_str) * mult)
3370
3371
3372 def parse_filesize(s):
3373     if s is None:
3374         return None
3375
3376     # The lower-case forms are of course incorrect and unofficial,
3377     # but we support those too
3378     _UNIT_TABLE = {
3379         'B': 1,
3380         'b': 1,
3381         'bytes': 1,
3382         'KiB': 1024,
3383         'KB': 1000,
3384         'kB': 1024,
3385         'Kb': 1000,
3386         'kb': 1000,
3387         'kilobytes': 1000,
3388         'kibibytes': 1024,
3389         'MiB': 1024 ** 2,
3390         'MB': 1000 ** 2,
3391         'mB': 1024 ** 2,
3392         'Mb': 1000 ** 2,
3393         'mb': 1000 ** 2,
3394         'megabytes': 1000 ** 2,
3395         'mebibytes': 1024 ** 2,
3396         'GiB': 1024 ** 3,
3397         'GB': 1000 ** 3,
3398         'gB': 1024 ** 3,
3399         'Gb': 1000 ** 3,
3400         'gb': 1000 ** 3,
3401         'gigabytes': 1000 ** 3,
3402         'gibibytes': 1024 ** 3,
3403         'TiB': 1024 ** 4,
3404         'TB': 1000 ** 4,
3405         'tB': 1024 ** 4,
3406         'Tb': 1000 ** 4,
3407         'tb': 1000 ** 4,
3408         'terabytes': 1000 ** 4,
3409         'tebibytes': 1024 ** 4,
3410         'PiB': 1024 ** 5,
3411         'PB': 1000 ** 5,
3412         'pB': 1024 ** 5,
3413         'Pb': 1000 ** 5,
3414         'pb': 1000 ** 5,
3415         'petabytes': 1000 ** 5,
3416         'pebibytes': 1024 ** 5,
3417         'EiB': 1024 ** 6,
3418         'EB': 1000 ** 6,
3419         'eB': 1024 ** 6,
3420         'Eb': 1000 ** 6,
3421         'eb': 1000 ** 6,
3422         'exabytes': 1000 ** 6,
3423         'exbibytes': 1024 ** 6,
3424         'ZiB': 1024 ** 7,
3425         'ZB': 1000 ** 7,
3426         'zB': 1024 ** 7,
3427         'Zb': 1000 ** 7,
3428         'zb': 1000 ** 7,
3429         'zettabytes': 1000 ** 7,
3430         'zebibytes': 1024 ** 7,
3431         'YiB': 1024 ** 8,
3432         'YB': 1000 ** 8,
3433         'yB': 1024 ** 8,
3434         'Yb': 1000 ** 8,
3435         'yb': 1000 ** 8,
3436         'yottabytes': 1000 ** 8,
3437         'yobibytes': 1024 ** 8,
3438     }
3439
3440     return lookup_unit_table(_UNIT_TABLE, s)
3441
3442
3443 def parse_count(s):
3444     if s is None:
3445         return None
3446
3447     s = s.strip()
3448
3449     if re.match(r'^[\d,.]+$', s):
3450         return str_to_int(s)
3451
3452     _UNIT_TABLE = {
3453         'k': 1000,
3454         'K': 1000,
3455         'm': 1000 ** 2,
3456         'M': 1000 ** 2,
3457         'kk': 1000 ** 2,
3458         'KK': 1000 ** 2,
3459     }
3460
3461     return lookup_unit_table(_UNIT_TABLE, s)
3462
3463
3464 def parse_resolution(s):
3465     if s is None:
3466         return {}
3467
3468     mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3469     if mobj:
3470         return {
3471             'width': int(mobj.group('w')),
3472             'height': int(mobj.group('h')),
3473         }
3474
3475     mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3476     if mobj:
3477         return {'height': int(mobj.group(1))}
3478
3479     mobj = re.search(r'\b([48])[kK]\b', s)
3480     if mobj:
3481         return {'height': int(mobj.group(1)) * 540}
3482
3483     return {}
3484
3485
3486 def parse_bitrate(s):
3487     if not isinstance(s, compat_str):
3488         return
3489     mobj = re.search(r'\b(\d+)\s*kbps', s)
3490     if mobj:
3491         return int(mobj.group(1))
3492
3493
3494 def month_by_name(name, lang='en'):
3495     """ Return the number of a month by (locale-independently) English name """
3496
3497     month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
3498
3499     try:
3500         return month_names.index(name) + 1
3501     except ValueError:
3502         return None
3503
3504
3505 def month_by_abbreviation(abbrev):
3506     """ Return the number of a month by (locale-independently) English
3507         abbreviations """
3508
3509     try:
3510         return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
3511     except ValueError:
3512         return None
3513
3514
3515 def fix_xml_ampersands(xml_str):
3516     """Replace all the '&' by '&amp;' in XML"""
3517     return re.sub(
3518         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
3519         '&amp;',
3520         xml_str)
3521
3522
3523 def setproctitle(title):
3524     assert isinstance(title, compat_str)
3525
3526     # ctypes in Jython is not complete
3527     # http://bugs.jython.org/issue2148
3528     if sys.platform.startswith('java'):
3529         return
3530
3531     try:
3532         libc = ctypes.cdll.LoadLibrary('libc.so.6')
3533     except OSError:
3534         return
3535     except TypeError:
3536         # LoadLibrary in Windows Python 2.7.13 only expects
3537         # a bytestring, but since unicode_literals turns
3538         # every string into a unicode string, it fails.
3539         return
3540     title_bytes = title.encode('utf-8')
3541     buf = ctypes.create_string_buffer(len(title_bytes))
3542     buf.value = title_bytes
3543     try:
3544         libc.prctl(15, buf, 0, 0, 0)
3545     except AttributeError:
3546         return  # Strange libc, just skip this
3547
3548
3549 def remove_start(s, start):
3550     return s[len(start):] if s is not None and s.startswith(start) else s
3551
3552
3553 def remove_end(s, end):
3554     return s[:-len(end)] if s is not None and s.endswith(end) else s
3555
3556
3557 def remove_quotes(s):
3558     if s is None or len(s) < 2:
3559         return s
3560     for quote in ('"', "'", ):
3561         if s[0] == quote and s[-1] == quote:
3562             return s[1:-1]
3563     return s
3564
3565
3566 def get_domain(url):
3567     domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3568     return domain.group('domain') if domain else None
3569
3570
3571 def url_basename(url):
3572     path = compat_urlparse.urlparse(url).path
3573     return path.strip('/').split('/')[-1]
3574
3575
3576 def base_url(url):
3577     return re.match(r'https?://[^?#&]+/', url).group()
3578
3579
3580 def urljoin(base, path):
3581     if isinstance(path, bytes):
3582         path = path.decode('utf-8')
3583     if not isinstance(path, compat_str) or not path:
3584         return None
3585     if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
3586         return path
3587     if isinstance(base, bytes):
3588         base = base.decode('utf-8')
3589     if not isinstance(base, compat_str) or not re.match(
3590             r'^(?:https?:)?//', base):
3591         return None
3592     return compat_urlparse.urljoin(base, path)
3593
3594
3595 class HEADRequest(compat_urllib_request.Request):
3596     def get_method(self):
3597         return 'HEAD'
3598
3599
3600 class PUTRequest(compat_urllib_request.Request):
3601     def get_method(self):
3602         return 'PUT'
3603
3604
3605 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
3606     if get_attr:
3607         if v is not None:
3608             v = getattr(v, get_attr, None)
3609     if v == '':
3610         v = None
3611     if v is None:
3612         return default
3613     try:
3614         return int(v) * invscale // scale
3615     except (ValueError, TypeError):
3616         return default
3617
3618
3619 def str_or_none(v, default=None):
3620     return default if v is None else compat_str(v)
3621
3622
3623 def str_to_int(int_str):
3624     """ A more relaxed version of int_or_none """
3625     if isinstance(int_str, compat_integer_types):
3626         return int_str
3627     elif isinstance(int_str, compat_str):
3628         int_str = re.sub(r'[,\.\+]', '', int_str)
3629         return int_or_none(int_str)
3630
3631
3632 def float_or_none(v, scale=1, invscale=1, default=None):
3633     if v is None:
3634         return default
3635     try:
3636         return float(v) * invscale / scale
3637     except (ValueError, TypeError):
3638         return default
3639
3640
3641 def bool_or_none(v, default=None):
3642     return v if isinstance(v, bool) else default
3643
3644
3645 def strip_or_none(v, default=None):
3646     return v.strip() if isinstance(v, compat_str) else default
3647
3648
3649 def url_or_none(url):
3650     if not url or not isinstance(url, compat_str):
3651         return None
3652     url = url.strip()
3653     return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
3654
3655
3656 def parse_duration(s):
3657     if not isinstance(s, compat_basestring):
3658         return None
3659
3660     s = s.strip()
3661
3662     days, hours, mins, secs, ms = [None] * 5
3663     m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
3664     if m:
3665         days, hours, mins, secs, ms = m.groups()
3666     else:
3667         m = re.match(
3668             r'''(?ix)(?:P?
3669                 (?:
3670                     [0-9]+\s*y(?:ears?)?\s*
3671                 )?
3672                 (?:
3673                     [0-9]+\s*m(?:onths?)?\s*
3674                 )?
3675                 (?:
3676                     [0-9]+\s*w(?:eeks?)?\s*
3677                 )?
3678                 (?:
3679                     (?P<days>[0-9]+)\s*d(?:ays?)?\s*
3680                 )?
3681                 T)?
3682                 (?:
3683                     (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3684                 )?
3685                 (?:
3686                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3687                 )?
3688                 (?:
3689                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
3690                 )?Z?$''', s)
3691         if m:
3692             days, hours, mins, secs, ms = m.groups()
3693         else:
3694             m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
3695             if m:
3696                 hours, mins = m.groups()
3697             else:
3698                 return None
3699
3700     duration = 0
3701     if secs:
3702         duration += float(secs)
3703     if mins:
3704         duration += float(mins) * 60
3705     if hours:
3706         duration += float(hours) * 60 * 60
3707     if days:
3708         duration += float(days) * 24 * 60 * 60
3709     if ms:
3710         duration += float(ms)
3711     return duration
3712
3713
3714 def prepend_extension(filename, ext, expected_real_ext=None):
3715     name, real_ext = os.path.splitext(filename)
3716     return (
3717         '{0}.{1}{2}'.format(name, ext, real_ext)
3718         if not expected_real_ext or real_ext[1:] == expected_real_ext
3719         else '{0}.{1}'.format(filename, ext))
3720
3721
3722 def replace_extension(filename, ext, expected_real_ext=None):
3723     name, real_ext = os.path.splitext(filename)
3724     return '{0}.{1}'.format(
3725         name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3726         ext)
3727
3728
3729 def check_executable(exe, args=[]):
3730     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3731     args can be a list of arguments for a short output (like -version) """
3732     try:
3733         subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
3734     except OSError:
3735         return False
3736     return exe
3737
3738
3739 def get_exe_version(exe, args=['--version'],
3740                     version_re=None, unrecognized='present'):
3741     """ Returns the version of the specified executable,
3742     or False if the executable is not present """
3743     try:
3744         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
3745         # SIGTTOU if youtube-dlc is run in the background.
3746         # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
3747         out, _ = subprocess.Popen(
3748             [encodeArgument(exe)] + args,
3749             stdin=subprocess.PIPE,
3750             stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
3751     except OSError:
3752         return False
3753     if isinstance(out, bytes):  # Python 2.x
3754         out = out.decode('ascii', 'ignore')
3755     return detect_exe_version(out, version_re, unrecognized)
3756
3757
3758 def detect_exe_version(output, version_re=None, unrecognized='present'):
3759     assert isinstance(output, compat_str)
3760     if version_re is None:
3761         version_re = r'version\s+([-0-9._a-zA-Z]+)'
3762     m = re.search(version_re, output)
3763     if m:
3764         return m.group(1)
3765     else:
3766         return unrecognized
3767
3768
3769 class PagedList(object):
3770     def __len__(self):
3771         # This is only useful for tests
3772         return len(self.getslice())
3773
3774
3775 class OnDemandPagedList(PagedList):
3776     def __init__(self, pagefunc, pagesize, use_cache=True):
3777         self._pagefunc = pagefunc
3778         self._pagesize = pagesize
3779         self._use_cache = use_cache
3780         if use_cache:
3781             self._cache = {}
3782
3783     def getslice(self, start=0, end=None):
3784         res = []
3785         for pagenum in itertools.count(start // self._pagesize):
3786             firstid = pagenum * self._pagesize
3787             nextfirstid = pagenum * self._pagesize + self._pagesize
3788             if start >= nextfirstid:
3789                 continue
3790
3791             page_results = None
3792             if self._use_cache:
3793                 page_results = self._cache.get(pagenum)
3794             if page_results is None:
3795                 page_results = list(self._pagefunc(pagenum))
3796             if self._use_cache:
3797                 self._cache[pagenum] = page_results
3798
3799             startv = (
3800                 start % self._pagesize
3801                 if firstid <= start < nextfirstid
3802                 else 0)
3803
3804             endv = (
3805                 ((end - 1) % self._pagesize) + 1
3806                 if (end is not None and firstid <= end <= nextfirstid)
3807                 else None)
3808
3809             if startv != 0 or endv is not None:
3810                 page_results = page_results[startv:endv]
3811             res.extend(page_results)
3812
3813             # A little optimization - if current page is not "full", ie. does
3814             # not contain page_size videos then we can assume that this page
3815             # is the last one - there are no more ids on further pages -
3816             # i.e. no need to query again.
3817             if len(page_results) + startv < self._pagesize:
3818                 break
3819
3820             # If we got the whole page, but the next page is not interesting,
3821             # break out early as well
3822             if end == nextfirstid:
3823                 break
3824         return res
3825
3826
3827 class InAdvancePagedList(PagedList):
3828     def __init__(self, pagefunc, pagecount, pagesize):
3829         self._pagefunc = pagefunc
3830         self._pagecount = pagecount
3831         self._pagesize = pagesize
3832
3833     def getslice(self, start=0, end=None):
3834         res = []
3835         start_page = start // self._pagesize
3836         end_page = (
3837             self._pagecount if end is None else (end // self._pagesize + 1))
3838         skip_elems = start - start_page * self._pagesize
3839         only_more = None if end is None else end - start
3840         for pagenum in range(start_page, end_page):
3841             page = list(self._pagefunc(pagenum))
3842             if skip_elems:
3843                 page = page[skip_elems:]
3844                 skip_elems = None
3845             if only_more is not None:
3846                 if len(page) < only_more:
3847                     only_more -= len(page)
3848                 else:
3849                     page = page[:only_more]
3850                     res.extend(page)
3851                     break
3852             res.extend(page)
3853         return res
3854
3855
3856 def uppercase_escape(s):
3857     unicode_escape = codecs.getdecoder('unicode_escape')
3858     return re.sub(
3859         r'\\U[0-9a-fA-F]{8}',
3860         lambda m: unicode_escape(m.group(0))[0],
3861         s)
3862
3863
3864 def lowercase_escape(s):
3865     unicode_escape = codecs.getdecoder('unicode_escape')
3866     return re.sub(
3867         r'\\u[0-9a-fA-F]{4}',
3868         lambda m: unicode_escape(m.group(0))[0],
3869         s)
3870
3871
3872 def escape_rfc3986(s):
3873     """Escape non-ASCII characters as suggested by RFC 3986"""
3874     if sys.version_info < (3, 0) and isinstance(s, compat_str):
3875         s = s.encode('utf-8')
3876     return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
3877
3878
3879 def escape_url(url):
3880     """Escape URL as suggested by RFC 3986"""
3881     url_parsed = compat_urllib_parse_urlparse(url)
3882     return url_parsed._replace(
3883         netloc=url_parsed.netloc.encode('idna').decode('ascii'),
3884         path=escape_rfc3986(url_parsed.path),
3885         params=escape_rfc3986(url_parsed.params),
3886         query=escape_rfc3986(url_parsed.query),
3887         fragment=escape_rfc3986(url_parsed.fragment)
3888     ).geturl()
3889
3890
3891 def read_batch_urls(batch_fd):
3892     def fixup(url):
3893         if not isinstance(url, compat_str):
3894             url = url.decode('utf-8', 'replace')
3895         BOM_UTF8 = '\xef\xbb\xbf'
3896         if url.startswith(BOM_UTF8):
3897             url = url[len(BOM_UTF8):]
3898         url = url.strip()
3899         if url.startswith(('#', ';', ']')):
3900             return False
3901         return url
3902
3903     with contextlib.closing(batch_fd) as fd:
3904         return [url for url in map(fixup, fd) if url]
3905
3906
3907 def urlencode_postdata(*args, **kargs):
3908     return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
3909
3910
3911 def update_url_query(url, query):
3912     if not query:
3913         return url
3914     parsed_url = compat_urlparse.urlparse(url)
3915     qs = compat_parse_qs(parsed_url.query)
3916     qs.update(query)
3917     return compat_urlparse.urlunparse(parsed_url._replace(
3918         query=compat_urllib_parse_urlencode(qs, True)))
3919
3920
3921 def update_Request(req, url=None, data=None, headers={}, query={}):
3922     req_headers = req.headers.copy()
3923     req_headers.update(headers)
3924     req_data = data or req.data
3925     req_url = update_url_query(url or req.get_full_url(), query)
3926     req_get_method = req.get_method()
3927     if req_get_method == 'HEAD':
3928         req_type = HEADRequest
3929     elif req_get_method == 'PUT':
3930         req_type = PUTRequest
3931     else:
3932         req_type = compat_urllib_request.Request
3933     new_req = req_type(
3934         req_url, data=req_data, headers=req_headers,
3935         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
3936     if hasattr(req, 'timeout'):
3937         new_req.timeout = req.timeout
3938     return new_req
3939
3940
3941 def _multipart_encode_impl(data, boundary):
3942     content_type = 'multipart/form-data; boundary=%s' % boundary
3943
3944     out = b''
3945     for k, v in data.items():
3946         out += b'--' + boundary.encode('ascii') + b'\r\n'
3947         if isinstance(k, compat_str):
3948             k = k.encode('utf-8')
3949         if isinstance(v, compat_str):
3950             v = v.encode('utf-8')
3951         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
3952         # suggests sending UTF-8 directly. Firefox sends UTF-8, too
3953         content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
3954         if boundary.encode('ascii') in content:
3955             raise ValueError('Boundary overlaps with data')
3956         out += content
3957
3958     out += b'--' + boundary.encode('ascii') + b'--\r\n'
3959
3960     return out, content_type
3961
3962
3963 def multipart_encode(data, boundary=None):
3964     '''
3965     Encode a dict to RFC 7578-compliant form-data
3966
3967     data:
3968         A dict where keys and values can be either Unicode or bytes-like
3969         objects.
3970     boundary:
3971         If specified a Unicode object, it's used as the boundary. Otherwise
3972         a random boundary is generated.
3973
3974     Reference: https://tools.ietf.org/html/rfc7578
3975     '''
3976     has_specified_boundary = boundary is not None
3977
3978     while True:
3979         if boundary is None:
3980             boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
3981
3982         try:
3983             out, content_type = _multipart_encode_impl(data, boundary)
3984             break
3985         except ValueError:
3986             if has_specified_boundary:
3987                 raise
3988             boundary = None
3989
3990     return out, content_type
3991
3992
3993 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
3994     if isinstance(key_or_keys, (list, tuple)):
3995         for key in key_or_keys:
3996             if key not in d or d[key] is None or skip_false_values and not d[key]:
3997                 continue
3998             return d[key]
3999         return default
4000     return d.get(key_or_keys, default)
4001
4002
4003 def try_get(src, getter, expected_type=None):
4004     if not isinstance(getter, (list, tuple)):
4005         getter = [getter]
4006     for get in getter:
4007         try:
4008             v = get(src)
4009         except (AttributeError, KeyError, TypeError, IndexError):
4010             pass
4011         else:
4012             if expected_type is None or isinstance(v, expected_type):
4013                 return v
4014
4015
4016 def merge_dicts(*dicts):
4017     merged = {}
4018     for a_dict in dicts:
4019         for k, v in a_dict.items():
4020             if v is None:
4021                 continue
4022             if (k not in merged
4023                     or (isinstance(v, compat_str) and v
4024                         and isinstance(merged[k], compat_str)
4025                         and not merged[k])):
4026                 merged[k] = v
4027     return merged
4028
4029
4030 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4031     return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4032
4033
4034 US_RATINGS = {
4035     'G': 0,
4036     'PG': 10,
4037     'PG-13': 13,
4038     'R': 16,
4039     'NC': 18,
4040 }
4041
4042
4043 TV_PARENTAL_GUIDELINES = {
4044     'TV-Y': 0,
4045     'TV-Y7': 7,
4046     'TV-G': 0,
4047     'TV-PG': 0,
4048     'TV-14': 14,
4049     'TV-MA': 17,
4050 }
4051
4052
4053 def parse_age_limit(s):
4054     if type(s) == int:
4055         return s if 0 <= s <= 21 else None
4056     if not isinstance(s, compat_basestring):
4057         return None
4058     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
4059     if m:
4060         return int(m.group('age'))
4061     if s in US_RATINGS:
4062         return US_RATINGS[s]
4063     m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
4064     if m:
4065         return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
4066     return None
4067
4068
4069 def strip_jsonp(code):
4070     return re.sub(
4071         r'''(?sx)^
4072             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
4073             (?:\s*&&\s*(?P=func_name))?
4074             \s*\(\s*(?P<callback_data>.*)\);?
4075             \s*?(?://[^\n]*)*$''',
4076         r'\g<callback_data>', code)
4077
4078
4079 def js_to_json(code):
4080     COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
4081     SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4082     INTEGER_TABLE = (
4083         (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4084         (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4085     )
4086
4087     def fix_kv(m):
4088         v = m.group(0)
4089         if v in ('true', 'false', 'null'):
4090             return v
4091         elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
4092             return ""
4093
4094         if v[0] in ("'", '"'):
4095             v = re.sub(r'(?s)\\.|"', lambda m: {
4096                 '"': '\\"',
4097                 "\\'": "'",
4098                 '\\\n': '',
4099                 '\\x': '\\u00',
4100             }.get(m.group(0), m.group(0)), v[1:-1])
4101         else:
4102             for regex, base in INTEGER_TABLE:
4103                 im = re.match(regex, v)
4104                 if im:
4105                     i = int(im.group(1), base)
4106                     return '"%d":' % i if v.endswith(':') else '%d' % i
4107
4108         return '"%s"' % v
4109
4110     return re.sub(r'''(?sx)
4111         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4112         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4113         {comment}|,(?={skip}[\]}}])|
4114         (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
4115         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
4116         [0-9]+(?={skip}:)|
4117         !+
4118         '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
4119
4120
4121 def qualities(quality_ids):
4122     """ Get a numeric quality value out of a list of possible values """
4123     def q(qid):
4124         try:
4125             return quality_ids.index(qid)
4126         except ValueError:
4127             return -1
4128     return q
4129
4130
4131 DEFAULT_OUTTMPL = '%(title)s [%(id)s].%(ext)s'
4132
4133
4134 def limit_length(s, length):
4135     """ Add ellipses to overly long strings """
4136     if s is None:
4137         return None
4138     ELLIPSES = '...'
4139     if len(s) > length:
4140         return s[:length - len(ELLIPSES)] + ELLIPSES
4141     return s
4142
4143
4144 def version_tuple(v):
4145     return tuple(int(e) for e in re.split(r'[-.]', v))
4146
4147
4148 def is_outdated_version(version, limit, assume_new=True):
4149     if not version:
4150         return not assume_new
4151     try:
4152         return version_tuple(version) < version_tuple(limit)
4153     except ValueError:
4154         return not assume_new
4155
4156
4157 def ytdl_is_updateable():
4158     """ Returns if youtube-dlc can be updated with -U """
4159     return False
4160
4161     from zipimport import zipimporter
4162
4163     return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
4164
4165
4166 def args_to_str(args):
4167     # Get a short string representation for a subprocess command
4168     return ' '.join(compat_shlex_quote(a) for a in args)
4169
4170
4171 def error_to_compat_str(err):
4172     err_str = str(err)
4173     # On python 2 error byte string must be decoded with proper
4174     # encoding rather than ascii
4175     if sys.version_info[0] < 3:
4176         err_str = err_str.decode(preferredencoding())
4177     return err_str
4178
4179
4180 def mimetype2ext(mt):
4181     if mt is None:
4182         return None
4183
4184     ext = {
4185         'audio/mp4': 'm4a',
4186         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4187         # it's the most popular one
4188         'audio/mpeg': 'mp3',
4189         'audio/x-wav': 'wav',
4190     }.get(mt)
4191     if ext is not None:
4192         return ext
4193
4194     _, _, res = mt.rpartition('/')
4195     res = res.split(';')[0].strip().lower()
4196
4197     return {
4198         '3gpp': '3gp',
4199         'smptett+xml': 'tt',
4200         'ttaf+xml': 'dfxp',
4201         'ttml+xml': 'ttml',
4202         'x-flv': 'flv',
4203         'x-mp4-fragmented': 'mp4',
4204         'x-ms-sami': 'sami',
4205         'x-ms-wmv': 'wmv',
4206         'mpegurl': 'm3u8',
4207         'x-mpegurl': 'm3u8',
4208         'vnd.apple.mpegurl': 'm3u8',
4209         'dash+xml': 'mpd',
4210         'f4m+xml': 'f4m',
4211         'hds+xml': 'f4m',
4212         'vnd.ms-sstr+xml': 'ism',
4213         'quicktime': 'mov',
4214         'mp2t': 'ts',
4215         'x-wav': 'wav',
4216     }.get(res, res)
4217
4218
4219 def parse_codecs(codecs_str):
4220     # http://tools.ietf.org/html/rfc6381
4221     if not codecs_str:
4222         return {}
4223     split_codecs = list(filter(None, map(
4224         lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
4225     vcodec, acodec = None, None
4226     for full_codec in split_codecs:
4227         codec = full_codec.split('.')[0]
4228         if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4229             if not vcodec:
4230                 vcodec = full_codec
4231         elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4232             if not acodec:
4233                 acodec = full_codec
4234         else:
4235             write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4236     if not vcodec and not acodec:
4237         if len(split_codecs) == 2:
4238             return {
4239                 'vcodec': split_codecs[0],
4240                 'acodec': split_codecs[1],
4241             }
4242     else:
4243         return {
4244             'vcodec': vcodec or 'none',
4245             'acodec': acodec or 'none',
4246         }
4247     return {}
4248
4249
4250 def urlhandle_detect_ext(url_handle):
4251     getheader = url_handle.headers.get
4252
4253     cd = getheader('Content-Disposition')
4254     if cd:
4255         m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4256         if m:
4257             e = determine_ext(m.group('filename'), default_ext=None)
4258             if e:
4259                 return e
4260
4261     return mimetype2ext(getheader('Content-Type'))
4262
4263
4264 def encode_data_uri(data, mime_type):
4265     return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4266
4267
4268 def age_restricted(content_limit, age_limit):
4269     """ Returns True iff the content should be blocked """
4270
4271     if age_limit is None:  # No limit set
4272         return False
4273     if content_limit is None:
4274         return False  # Content available for everyone
4275     return age_limit < content_limit
4276
4277
4278 def is_html(first_bytes):
4279     """ Detect whether a file contains HTML by examining its first bytes. """
4280
4281     BOMS = [
4282         (b'\xef\xbb\xbf', 'utf-8'),
4283         (b'\x00\x00\xfe\xff', 'utf-32-be'),
4284         (b'\xff\xfe\x00\x00', 'utf-32-le'),
4285         (b'\xff\xfe', 'utf-16-le'),
4286         (b'\xfe\xff', 'utf-16-be'),
4287     ]
4288     for bom, enc in BOMS:
4289         if first_bytes.startswith(bom):
4290             s = first_bytes[len(bom):].decode(enc, 'replace')
4291             break
4292     else:
4293         s = first_bytes.decode('utf-8', 'replace')
4294
4295     return re.match(r'^\s*<', s)
4296
4297
4298 def determine_protocol(info_dict):
4299     protocol = info_dict.get('protocol')
4300     if protocol is not None:
4301         return protocol
4302
4303     url = info_dict['url']
4304     if url.startswith('rtmp'):
4305         return 'rtmp'
4306     elif url.startswith('mms'):
4307         return 'mms'
4308     elif url.startswith('rtsp'):
4309         return 'rtsp'
4310
4311     ext = determine_ext(url)
4312     if ext == 'm3u8':
4313         return 'm3u8'
4314     elif ext == 'f4m':
4315         return 'f4m'
4316
4317     return compat_urllib_parse_urlparse(url).scheme
4318
4319
4320 def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
4321     """ Render a list of rows, each as a list of values """
4322
4323     def get_max_lens(table):
4324         return [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4325
4326     def filter_using_list(row, filterArray):
4327         return [col for (take, col) in zip(filterArray, row) if take]
4328
4329     if hideEmpty:
4330         max_lens = get_max_lens(data)
4331         header_row = filter_using_list(header_row, max_lens)
4332         data = [filter_using_list(row, max_lens) for row in data]
4333
4334     table = [header_row] + data
4335     max_lens = get_max_lens(table)
4336     if delim:
4337         table = [header_row] + [['-' * ml for ml in max_lens]] + data
4338     format_str = ' '.join('%-' + compat_str(ml + extraGap) + 's' for ml in max_lens[:-1]) + ' %s'
4339     return '\n'.join(format_str % tuple(row) for row in table)
4340
4341
4342 def _match_one(filter_part, dct):
4343     COMPARISON_OPERATORS = {
4344         '<': operator.lt,
4345         '<=': operator.le,
4346         '>': operator.gt,
4347         '>=': operator.ge,
4348         '=': operator.eq,
4349         '!=': operator.ne,
4350     }
4351     operator_rex = re.compile(r'''(?x)\s*
4352         (?P<key>[a-z_]+)
4353         \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4354         (?:
4355             (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
4356             (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
4357             (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
4358         )
4359         \s*$
4360         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4361     m = operator_rex.search(filter_part)
4362     if m:
4363         op = COMPARISON_OPERATORS[m.group('op')]
4364         actual_value = dct.get(m.group('key'))
4365         if (m.group('quotedstrval') is not None
4366             or m.group('strval') is not None
4367             # If the original field is a string and matching comparisonvalue is
4368             # a number we should respect the origin of the original field
4369             # and process comparison value as a string (see
4370             # https://github.com/ytdl-org/youtube-dl/issues/11082).
4371             or actual_value is not None and m.group('intval') is not None
4372                 and isinstance(actual_value, compat_str)):
4373             if m.group('op') not in ('=', '!='):
4374                 raise ValueError(
4375                     'Operator %s does not support string values!' % m.group('op'))
4376             comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4377             quote = m.group('quote')
4378             if quote is not None:
4379                 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
4380         else:
4381             try:
4382                 comparison_value = int(m.group('intval'))
4383             except ValueError:
4384                 comparison_value = parse_filesize(m.group('intval'))
4385                 if comparison_value is None:
4386                     comparison_value = parse_filesize(m.group('intval') + 'B')
4387                 if comparison_value is None:
4388                     raise ValueError(
4389                         'Invalid integer value %r in filter part %r' % (
4390                             m.group('intval'), filter_part))
4391         if actual_value is None:
4392             return m.group('none_inclusive')
4393         return op(actual_value, comparison_value)
4394
4395     UNARY_OPERATORS = {
4396         '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4397         '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
4398     }
4399     operator_rex = re.compile(r'''(?x)\s*
4400         (?P<op>%s)\s*(?P<key>[a-z_]+)
4401         \s*$
4402         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4403     m = operator_rex.search(filter_part)
4404     if m:
4405         op = UNARY_OPERATORS[m.group('op')]
4406         actual_value = dct.get(m.group('key'))
4407         return op(actual_value)
4408
4409     raise ValueError('Invalid filter part %r' % filter_part)
4410
4411
4412 def match_str(filter_str, dct):
4413     """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
4414
4415     return all(
4416         _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
4417
4418
4419 def match_filter_func(filter_str):
4420     def _match_func(info_dict):
4421         if match_str(filter_str, info_dict):
4422             return None
4423         else:
4424             video_title = info_dict.get('title', info_dict.get('id', 'video'))
4425             return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4426     return _match_func
4427
4428
4429 def parse_dfxp_time_expr(time_expr):
4430     if not time_expr:
4431         return
4432
4433     mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4434     if mobj:
4435         return float(mobj.group('time_offset'))
4436
4437     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
4438     if mobj:
4439         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
4440
4441
4442 def srt_subtitles_timecode(seconds):
4443     return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
4444
4445
4446 def dfxp2srt(dfxp_data):
4447     '''
4448     @param dfxp_data A bytes-like object containing DFXP data
4449     @returns A unicode object containing converted SRT data
4450     '''
4451     LEGACY_NAMESPACES = (
4452         (b'http://www.w3.org/ns/ttml', [
4453             b'http://www.w3.org/2004/11/ttaf1',
4454             b'http://www.w3.org/2006/04/ttaf1',
4455             b'http://www.w3.org/2006/10/ttaf1',
4456         ]),
4457         (b'http://www.w3.org/ns/ttml#styling', [
4458             b'http://www.w3.org/ns/ttml#style',
4459         ]),
4460     )
4461
4462     SUPPORTED_STYLING = [
4463         'color',
4464         'fontFamily',
4465         'fontSize',
4466         'fontStyle',
4467         'fontWeight',
4468         'textDecoration'
4469     ]
4470
4471     _x = functools.partial(xpath_with_ns, ns_map={
4472         'xml': 'http://www.w3.org/XML/1998/namespace',
4473         'ttml': 'http://www.w3.org/ns/ttml',
4474         'tts': 'http://www.w3.org/ns/ttml#styling',
4475     })
4476
4477     styles = {}
4478     default_style = {}
4479
4480     class TTMLPElementParser(object):
4481         _out = ''
4482         _unclosed_elements = []
4483         _applied_styles = []
4484
4485         def start(self, tag, attrib):
4486             if tag in (_x('ttml:br'), 'br'):
4487                 self._out += '\n'
4488             else:
4489                 unclosed_elements = []
4490                 style = {}
4491                 element_style_id = attrib.get('style')
4492                 if default_style:
4493                     style.update(default_style)
4494                 if element_style_id:
4495                     style.update(styles.get(element_style_id, {}))
4496                 for prop in SUPPORTED_STYLING:
4497                     prop_val = attrib.get(_x('tts:' + prop))
4498                     if prop_val:
4499                         style[prop] = prop_val
4500                 if style:
4501                     font = ''
4502                     for k, v in sorted(style.items()):
4503                         if self._applied_styles and self._applied_styles[-1].get(k) == v:
4504                             continue
4505                         if k == 'color':
4506                             font += ' color="%s"' % v
4507                         elif k == 'fontSize':
4508                             font += ' size="%s"' % v
4509                         elif k == 'fontFamily':
4510                             font += ' face="%s"' % v
4511                         elif k == 'fontWeight' and v == 'bold':
4512                             self._out += '<b>'
4513                             unclosed_elements.append('b')
4514                         elif k == 'fontStyle' and v == 'italic':
4515                             self._out += '<i>'
4516                             unclosed_elements.append('i')
4517                         elif k == 'textDecoration' and v == 'underline':
4518                             self._out += '<u>'
4519                             unclosed_elements.append('u')
4520                     if font:
4521                         self._out += '<font' + font + '>'
4522                         unclosed_elements.append('font')
4523                     applied_style = {}
4524                     if self._applied_styles:
4525                         applied_style.update(self._applied_styles[-1])
4526                     applied_style.update(style)
4527                     self._applied_styles.append(applied_style)
4528                 self._unclosed_elements.append(unclosed_elements)
4529
4530         def end(self, tag):
4531             if tag not in (_x('ttml:br'), 'br'):
4532                 unclosed_elements = self._unclosed_elements.pop()
4533                 for element in reversed(unclosed_elements):
4534                     self._out += '</%s>' % element
4535                 if unclosed_elements and self._applied_styles:
4536                     self._applied_styles.pop()
4537
4538         def data(self, data):
4539             self._out += data
4540
4541         def close(self):
4542             return self._out.strip()
4543
4544     def parse_node(node):
4545         target = TTMLPElementParser()
4546         parser = xml.etree.ElementTree.XMLParser(target=target)
4547         parser.feed(xml.etree.ElementTree.tostring(node))
4548         return parser.close()
4549
4550     for k, v in LEGACY_NAMESPACES:
4551         for ns in v:
4552             dfxp_data = dfxp_data.replace(ns, k)
4553
4554     dfxp = compat_etree_fromstring(dfxp_data)
4555     out = []
4556     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
4557
4558     if not paras:
4559         raise ValueError('Invalid dfxp/TTML subtitle')
4560
4561     repeat = False
4562     while True:
4563         for style in dfxp.findall(_x('.//ttml:style')):
4564             style_id = style.get('id') or style.get(_x('xml:id'))
4565             if not style_id:
4566                 continue
4567             parent_style_id = style.get('style')
4568             if parent_style_id:
4569                 if parent_style_id not in styles:
4570                     repeat = True
4571                     continue
4572                 styles[style_id] = styles[parent_style_id].copy()
4573             for prop in SUPPORTED_STYLING:
4574                 prop_val = style.get(_x('tts:' + prop))
4575                 if prop_val:
4576                     styles.setdefault(style_id, {})[prop] = prop_val
4577         if repeat:
4578             repeat = False
4579         else:
4580             break
4581
4582     for p in ('body', 'div'):
4583         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4584         if ele is None:
4585             continue
4586         style = styles.get(ele.get('style'))
4587         if not style:
4588             continue
4589         default_style.update(style)
4590
4591     for para, index in zip(paras, itertools.count(1)):
4592         begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
4593         end_time = parse_dfxp_time_expr(para.attrib.get('end'))
4594         dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4595         if begin_time is None:
4596             continue
4597         if not end_time:
4598             if not dur:
4599                 continue
4600             end_time = begin_time + dur
4601         out.append('%d\n%s --> %s\n%s\n\n' % (
4602             index,
4603             srt_subtitles_timecode(begin_time),
4604             srt_subtitles_timecode(end_time),
4605             parse_node(para)))
4606
4607     return ''.join(out)
4608
4609
4610 def cli_option(params, command_option, param):
4611     param = params.get(param)
4612     if param:
4613         param = compat_str(param)
4614     return [command_option, param] if param is not None else []
4615
4616
4617 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4618     param = params.get(param)
4619     if param is None:
4620         return []
4621     assert isinstance(param, bool)
4622     if separator:
4623         return [command_option + separator + (true_value if param else false_value)]
4624     return [command_option, true_value if param else false_value]
4625
4626
4627 def cli_valueless_option(params, command_option, param, expected_value=True):
4628     param = params.get(param)
4629     return [command_option] if param == expected_value else []
4630
4631
4632 def cli_configuration_args(params, param, default=[]):
4633     ex_args = params.get(param)
4634     if ex_args is None:
4635         return default
4636     assert isinstance(ex_args, list)
4637     return ex_args
4638
4639
4640 class ISO639Utils(object):
4641     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4642     _lang_map = {
4643         'aa': 'aar',
4644         'ab': 'abk',
4645         'ae': 'ave',
4646         'af': 'afr',
4647         'ak': 'aka',
4648         'am': 'amh',
4649         'an': 'arg',
4650         'ar': 'ara',
4651         'as': 'asm',
4652         'av': 'ava',
4653         'ay': 'aym',
4654         'az': 'aze',
4655         'ba': 'bak',
4656         'be': 'bel',
4657         'bg': 'bul',
4658         'bh': 'bih',
4659         'bi': 'bis',
4660         'bm': 'bam',
4661         'bn': 'ben',
4662         'bo': 'bod',
4663         'br': 'bre',
4664         'bs': 'bos',
4665         'ca': 'cat',
4666         'ce': 'che',
4667         'ch': 'cha',
4668         'co': 'cos',
4669         'cr': 'cre',
4670         'cs': 'ces',
4671         'cu': 'chu',
4672         'cv': 'chv',
4673         'cy': 'cym',
4674         'da': 'dan',
4675         'de': 'deu',
4676         'dv': 'div',
4677         'dz': 'dzo',
4678         'ee': 'ewe',
4679         'el': 'ell',
4680         'en': 'eng',
4681         'eo': 'epo',
4682         'es': 'spa',
4683         'et': 'est',
4684         'eu': 'eus',
4685         'fa': 'fas',
4686         'ff': 'ful',
4687         'fi': 'fin',
4688         'fj': 'fij',
4689         'fo': 'fao',
4690         'fr': 'fra',
4691         'fy': 'fry',
4692         'ga': 'gle',
4693         'gd': 'gla',
4694         'gl': 'glg',
4695         'gn': 'grn',
4696         'gu': 'guj',
4697         'gv': 'glv',
4698         'ha': 'hau',
4699         'he': 'heb',
4700         'iw': 'heb',  # Replaced by he in 1989 revision
4701         'hi': 'hin',
4702         'ho': 'hmo',
4703         'hr': 'hrv',
4704         'ht': 'hat',
4705         'hu': 'hun',
4706         'hy': 'hye',
4707         'hz': 'her',
4708         'ia': 'ina',
4709         'id': 'ind',
4710         'in': 'ind',  # Replaced by id in 1989 revision
4711         'ie': 'ile',
4712         'ig': 'ibo',
4713         'ii': 'iii',
4714         'ik': 'ipk',
4715         'io': 'ido',
4716         'is': 'isl',
4717         'it': 'ita',
4718         'iu': 'iku',
4719         'ja': 'jpn',
4720         'jv': 'jav',
4721         'ka': 'kat',
4722         'kg': 'kon',
4723         'ki': 'kik',
4724         'kj': 'kua',
4725         'kk': 'kaz',
4726         'kl': 'kal',
4727         'km': 'khm',
4728         'kn': 'kan',
4729         'ko': 'kor',
4730         'kr': 'kau',
4731         'ks': 'kas',
4732         'ku': 'kur',
4733         'kv': 'kom',
4734         'kw': 'cor',
4735         'ky': 'kir',
4736         'la': 'lat',
4737         'lb': 'ltz',
4738         'lg': 'lug',
4739         'li': 'lim',
4740         'ln': 'lin',
4741         'lo': 'lao',
4742         'lt': 'lit',
4743         'lu': 'lub',
4744         'lv': 'lav',
4745         'mg': 'mlg',
4746         'mh': 'mah',
4747         'mi': 'mri',
4748         'mk': 'mkd',
4749         'ml': 'mal',
4750         'mn': 'mon',
4751         'mr': 'mar',
4752         'ms': 'msa',
4753         'mt': 'mlt',
4754         'my': 'mya',
4755         'na': 'nau',
4756         'nb': 'nob',
4757         'nd': 'nde',
4758         'ne': 'nep',
4759         'ng': 'ndo',
4760         'nl': 'nld',
4761         'nn': 'nno',
4762         'no': 'nor',
4763         'nr': 'nbl',
4764         'nv': 'nav',
4765         'ny': 'nya',
4766         'oc': 'oci',
4767         'oj': 'oji',
4768         'om': 'orm',
4769         'or': 'ori',
4770         'os': 'oss',
4771         'pa': 'pan',
4772         'pi': 'pli',
4773         'pl': 'pol',
4774         'ps': 'pus',
4775         'pt': 'por',
4776         'qu': 'que',
4777         'rm': 'roh',
4778         'rn': 'run',
4779         'ro': 'ron',
4780         'ru': 'rus',
4781         'rw': 'kin',
4782         'sa': 'san',
4783         'sc': 'srd',
4784         'sd': 'snd',
4785         'se': 'sme',
4786         'sg': 'sag',
4787         'si': 'sin',
4788         'sk': 'slk',
4789         'sl': 'slv',
4790         'sm': 'smo',
4791         'sn': 'sna',
4792         'so': 'som',
4793         'sq': 'sqi',
4794         'sr': 'srp',
4795         'ss': 'ssw',
4796         'st': 'sot',
4797         'su': 'sun',
4798         'sv': 'swe',
4799         'sw': 'swa',
4800         'ta': 'tam',
4801         'te': 'tel',
4802         'tg': 'tgk',
4803         'th': 'tha',
4804         'ti': 'tir',
4805         'tk': 'tuk',
4806         'tl': 'tgl',
4807         'tn': 'tsn',
4808         'to': 'ton',
4809         'tr': 'tur',
4810         'ts': 'tso',
4811         'tt': 'tat',
4812         'tw': 'twi',
4813         'ty': 'tah',
4814         'ug': 'uig',
4815         'uk': 'ukr',
4816         'ur': 'urd',
4817         'uz': 'uzb',
4818         've': 'ven',
4819         'vi': 'vie',
4820         'vo': 'vol',
4821         'wa': 'wln',
4822         'wo': 'wol',
4823         'xh': 'xho',
4824         'yi': 'yid',
4825         'ji': 'yid',  # Replaced by yi in 1989 revision
4826         'yo': 'yor',
4827         'za': 'zha',
4828         'zh': 'zho',
4829         'zu': 'zul',
4830     }
4831
4832     @classmethod
4833     def short2long(cls, code):
4834         """Convert language code from ISO 639-1 to ISO 639-2/T"""
4835         return cls._lang_map.get(code[:2])
4836
4837     @classmethod
4838     def long2short(cls, code):
4839         """Convert language code from ISO 639-2/T to ISO 639-1"""
4840         for short_name, long_name in cls._lang_map.items():
4841             if long_name == code:
4842                 return short_name
4843
4844
4845 class ISO3166Utils(object):
4846     # From http://data.okfn.org/data/core/country-list
4847     _country_map = {
4848         'AF': 'Afghanistan',
4849         'AX': 'Åland Islands',
4850         'AL': 'Albania',
4851         'DZ': 'Algeria',
4852         'AS': 'American Samoa',
4853         'AD': 'Andorra',
4854         'AO': 'Angola',
4855         'AI': 'Anguilla',
4856         'AQ': 'Antarctica',
4857         'AG': 'Antigua and Barbuda',
4858         'AR': 'Argentina',
4859         'AM': 'Armenia',
4860         'AW': 'Aruba',
4861         'AU': 'Australia',
4862         'AT': 'Austria',
4863         'AZ': 'Azerbaijan',
4864         'BS': 'Bahamas',
4865         'BH': 'Bahrain',
4866         'BD': 'Bangladesh',
4867         'BB': 'Barbados',
4868         'BY': 'Belarus',
4869         'BE': 'Belgium',
4870         'BZ': 'Belize',
4871         'BJ': 'Benin',
4872         'BM': 'Bermuda',
4873         'BT': 'Bhutan',
4874         'BO': 'Bolivia, Plurinational State of',
4875         'BQ': 'Bonaire, Sint Eustatius and Saba',
4876         'BA': 'Bosnia and Herzegovina',
4877         'BW': 'Botswana',
4878         'BV': 'Bouvet Island',
4879         'BR': 'Brazil',
4880         'IO': 'British Indian Ocean Territory',
4881         'BN': 'Brunei Darussalam',
4882         'BG': 'Bulgaria',
4883         'BF': 'Burkina Faso',
4884         'BI': 'Burundi',
4885         'KH': 'Cambodia',
4886         'CM': 'Cameroon',
4887         'CA': 'Canada',
4888         'CV': 'Cape Verde',
4889         'KY': 'Cayman Islands',
4890         'CF': 'Central African Republic',
4891         'TD': 'Chad',
4892         'CL': 'Chile',
4893         'CN': 'China',
4894         'CX': 'Christmas Island',
4895         'CC': 'Cocos (Keeling) Islands',
4896         'CO': 'Colombia',
4897         'KM': 'Comoros',
4898         'CG': 'Congo',
4899         'CD': 'Congo, the Democratic Republic of the',
4900         'CK': 'Cook Islands',
4901         'CR': 'Costa Rica',
4902         'CI': 'Côte d\'Ivoire',
4903         'HR': 'Croatia',
4904         'CU': 'Cuba',
4905         'CW': 'Curaçao',
4906         'CY': 'Cyprus',
4907         'CZ': 'Czech Republic',
4908         'DK': 'Denmark',
4909         'DJ': 'Djibouti',
4910         'DM': 'Dominica',
4911         'DO': 'Dominican Republic',
4912         'EC': 'Ecuador',
4913         'EG': 'Egypt',
4914         'SV': 'El Salvador',
4915         'GQ': 'Equatorial Guinea',
4916         'ER': 'Eritrea',
4917         'EE': 'Estonia',
4918         'ET': 'Ethiopia',
4919         'FK': 'Falkland Islands (Malvinas)',
4920         'FO': 'Faroe Islands',
4921         'FJ': 'Fiji',
4922         'FI': 'Finland',
4923         'FR': 'France',
4924         'GF': 'French Guiana',
4925         'PF': 'French Polynesia',
4926         'TF': 'French Southern Territories',
4927         'GA': 'Gabon',
4928         'GM': 'Gambia',
4929         'GE': 'Georgia',
4930         'DE': 'Germany',
4931         'GH': 'Ghana',
4932         'GI': 'Gibraltar',
4933         'GR': 'Greece',
4934         'GL': 'Greenland',
4935         'GD': 'Grenada',
4936         'GP': 'Guadeloupe',
4937         'GU': 'Guam',
4938         'GT': 'Guatemala',
4939         'GG': 'Guernsey',
4940         'GN': 'Guinea',
4941         'GW': 'Guinea-Bissau',
4942         'GY': 'Guyana',
4943         'HT': 'Haiti',
4944         'HM': 'Heard Island and McDonald Islands',
4945         'VA': 'Holy See (Vatican City State)',
4946         'HN': 'Honduras',
4947         'HK': 'Hong Kong',
4948         'HU': 'Hungary',
4949         'IS': 'Iceland',
4950         'IN': 'India',
4951         'ID': 'Indonesia',
4952         'IR': 'Iran, Islamic Republic of',
4953         'IQ': 'Iraq',
4954         'IE': 'Ireland',
4955         'IM': 'Isle of Man',
4956         'IL': 'Israel',
4957         'IT': 'Italy',
4958         'JM': 'Jamaica',
4959         'JP': 'Japan',
4960         'JE': 'Jersey',
4961         'JO': 'Jordan',
4962         'KZ': 'Kazakhstan',
4963         'KE': 'Kenya',
4964         'KI': 'Kiribati',
4965         'KP': 'Korea, Democratic People\'s Republic of',
4966         'KR': 'Korea, Republic of',
4967         'KW': 'Kuwait',
4968         'KG': 'Kyrgyzstan',
4969         'LA': 'Lao People\'s Democratic Republic',
4970         'LV': 'Latvia',
4971         'LB': 'Lebanon',
4972         'LS': 'Lesotho',
4973         'LR': 'Liberia',
4974         'LY': 'Libya',
4975         'LI': 'Liechtenstein',
4976         'LT': 'Lithuania',
4977         'LU': 'Luxembourg',
4978         'MO': 'Macao',
4979         'MK': 'Macedonia, the Former Yugoslav Republic of',
4980         'MG': 'Madagascar',
4981         'MW': 'Malawi',
4982         'MY': 'Malaysia',
4983         'MV': 'Maldives',
4984         'ML': 'Mali',
4985         'MT': 'Malta',
4986         'MH': 'Marshall Islands',
4987         'MQ': 'Martinique',
4988         'MR': 'Mauritania',
4989         'MU': 'Mauritius',
4990         'YT': 'Mayotte',
4991         'MX': 'Mexico',
4992         'FM': 'Micronesia, Federated States of',
4993         'MD': 'Moldova, Republic of',
4994         'MC': 'Monaco',
4995         'MN': 'Mongolia',
4996         'ME': 'Montenegro',
4997         'MS': 'Montserrat',
4998         'MA': 'Morocco',
4999         'MZ': 'Mozambique',
5000         'MM': 'Myanmar',
5001         'NA': 'Namibia',
5002         'NR': 'Nauru',
5003         'NP': 'Nepal',
5004         'NL': 'Netherlands',
5005         'NC': 'New Caledonia',
5006         'NZ': 'New Zealand',
5007         'NI': 'Nicaragua',
5008         'NE': 'Niger',
5009         'NG': 'Nigeria',
5010         'NU': 'Niue',
5011         'NF': 'Norfolk Island',
5012         'MP': 'Northern Mariana Islands',
5013         'NO': 'Norway',
5014         'OM': 'Oman',
5015         'PK': 'Pakistan',
5016         'PW': 'Palau',
5017         'PS': 'Palestine, State of',
5018         'PA': 'Panama',
5019         'PG': 'Papua New Guinea',
5020         'PY': 'Paraguay',
5021         'PE': 'Peru',
5022         'PH': 'Philippines',
5023         'PN': 'Pitcairn',
5024         'PL': 'Poland',
5025         'PT': 'Portugal',
5026         'PR': 'Puerto Rico',
5027         'QA': 'Qatar',
5028         'RE': 'Réunion',
5029         'RO': 'Romania',
5030         'RU': 'Russian Federation',
5031         'RW': 'Rwanda',
5032         'BL': 'Saint Barthélemy',
5033         'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5034         'KN': 'Saint Kitts and Nevis',
5035         'LC': 'Saint Lucia',
5036         'MF': 'Saint Martin (French part)',
5037         'PM': 'Saint Pierre and Miquelon',
5038         'VC': 'Saint Vincent and the Grenadines',
5039         'WS': 'Samoa',
5040         'SM': 'San Marino',
5041         'ST': 'Sao Tome and Principe',
5042         'SA': 'Saudi Arabia',
5043         'SN': 'Senegal',
5044         'RS': 'Serbia',
5045         'SC': 'Seychelles',
5046         'SL': 'Sierra Leone',
5047         'SG': 'Singapore',
5048         'SX': 'Sint Maarten (Dutch part)',
5049         'SK': 'Slovakia',
5050         'SI': 'Slovenia',
5051         'SB': 'Solomon Islands',
5052         'SO': 'Somalia',
5053         'ZA': 'South Africa',
5054         'GS': 'South Georgia and the South Sandwich Islands',
5055         'SS': 'South Sudan',
5056         'ES': 'Spain',
5057         'LK': 'Sri Lanka',
5058         'SD': 'Sudan',
5059         'SR': 'Suriname',
5060         'SJ': 'Svalbard and Jan Mayen',
5061         'SZ': 'Swaziland',
5062         'SE': 'Sweden',
5063         'CH': 'Switzerland',
5064         'SY': 'Syrian Arab Republic',
5065         'TW': 'Taiwan, Province of China',
5066         'TJ': 'Tajikistan',
5067         'TZ': 'Tanzania, United Republic of',
5068         'TH': 'Thailand',
5069         'TL': 'Timor-Leste',
5070         'TG': 'Togo',
5071         'TK': 'Tokelau',
5072         'TO': 'Tonga',
5073         'TT': 'Trinidad and Tobago',
5074         'TN': 'Tunisia',
5075         'TR': 'Turkey',
5076         'TM': 'Turkmenistan',
5077         'TC': 'Turks and Caicos Islands',
5078         'TV': 'Tuvalu',
5079         'UG': 'Uganda',
5080         'UA': 'Ukraine',
5081         'AE': 'United Arab Emirates',
5082         'GB': 'United Kingdom',
5083         'US': 'United States',
5084         'UM': 'United States Minor Outlying Islands',
5085         'UY': 'Uruguay',
5086         'UZ': 'Uzbekistan',
5087         'VU': 'Vanuatu',
5088         'VE': 'Venezuela, Bolivarian Republic of',
5089         'VN': 'Viet Nam',
5090         'VG': 'Virgin Islands, British',
5091         'VI': 'Virgin Islands, U.S.',
5092         'WF': 'Wallis and Futuna',
5093         'EH': 'Western Sahara',
5094         'YE': 'Yemen',
5095         'ZM': 'Zambia',
5096         'ZW': 'Zimbabwe',
5097     }
5098
5099     @classmethod
5100     def short2full(cls, code):
5101         """Convert an ISO 3166-2 country code to the corresponding full name"""
5102         return cls._country_map.get(code.upper())
5103
5104
5105 class GeoUtils(object):
5106     # Major IPv4 address blocks per country
5107     _country_ip_map = {
5108         'AD': '46.172.224.0/19',
5109         'AE': '94.200.0.0/13',
5110         'AF': '149.54.0.0/17',
5111         'AG': '209.59.64.0/18',
5112         'AI': '204.14.248.0/21',
5113         'AL': '46.99.0.0/16',
5114         'AM': '46.70.0.0/15',
5115         'AO': '105.168.0.0/13',
5116         'AP': '182.50.184.0/21',
5117         'AQ': '23.154.160.0/24',
5118         'AR': '181.0.0.0/12',
5119         'AS': '202.70.112.0/20',
5120         'AT': '77.116.0.0/14',
5121         'AU': '1.128.0.0/11',
5122         'AW': '181.41.0.0/18',
5123         'AX': '185.217.4.0/22',
5124         'AZ': '5.197.0.0/16',
5125         'BA': '31.176.128.0/17',
5126         'BB': '65.48.128.0/17',
5127         'BD': '114.130.0.0/16',
5128         'BE': '57.0.0.0/8',
5129         'BF': '102.178.0.0/15',
5130         'BG': '95.42.0.0/15',
5131         'BH': '37.131.0.0/17',
5132         'BI': '154.117.192.0/18',
5133         'BJ': '137.255.0.0/16',
5134         'BL': '185.212.72.0/23',
5135         'BM': '196.12.64.0/18',
5136         'BN': '156.31.0.0/16',
5137         'BO': '161.56.0.0/16',
5138         'BQ': '161.0.80.0/20',
5139         'BR': '191.128.0.0/12',
5140         'BS': '24.51.64.0/18',
5141         'BT': '119.2.96.0/19',
5142         'BW': '168.167.0.0/16',
5143         'BY': '178.120.0.0/13',
5144         'BZ': '179.42.192.0/18',
5145         'CA': '99.224.0.0/11',
5146         'CD': '41.243.0.0/16',
5147         'CF': '197.242.176.0/21',
5148         'CG': '160.113.0.0/16',
5149         'CH': '85.0.0.0/13',
5150         'CI': '102.136.0.0/14',
5151         'CK': '202.65.32.0/19',
5152         'CL': '152.172.0.0/14',
5153         'CM': '102.244.0.0/14',
5154         'CN': '36.128.0.0/10',
5155         'CO': '181.240.0.0/12',
5156         'CR': '201.192.0.0/12',
5157         'CU': '152.206.0.0/15',
5158         'CV': '165.90.96.0/19',
5159         'CW': '190.88.128.0/17',
5160         'CY': '31.153.0.0/16',
5161         'CZ': '88.100.0.0/14',
5162         'DE': '53.0.0.0/8',
5163         'DJ': '197.241.0.0/17',
5164         'DK': '87.48.0.0/12',
5165         'DM': '192.243.48.0/20',
5166         'DO': '152.166.0.0/15',
5167         'DZ': '41.96.0.0/12',
5168         'EC': '186.68.0.0/15',
5169         'EE': '90.190.0.0/15',
5170         'EG': '156.160.0.0/11',
5171         'ER': '196.200.96.0/20',
5172         'ES': '88.0.0.0/11',
5173         'ET': '196.188.0.0/14',
5174         'EU': '2.16.0.0/13',
5175         'FI': '91.152.0.0/13',
5176         'FJ': '144.120.0.0/16',
5177         'FK': '80.73.208.0/21',
5178         'FM': '119.252.112.0/20',
5179         'FO': '88.85.32.0/19',
5180         'FR': '90.0.0.0/9',
5181         'GA': '41.158.0.0/15',
5182         'GB': '25.0.0.0/8',
5183         'GD': '74.122.88.0/21',
5184         'GE': '31.146.0.0/16',
5185         'GF': '161.22.64.0/18',
5186         'GG': '62.68.160.0/19',
5187         'GH': '154.160.0.0/12',
5188         'GI': '95.164.0.0/16',
5189         'GL': '88.83.0.0/19',
5190         'GM': '160.182.0.0/15',
5191         'GN': '197.149.192.0/18',
5192         'GP': '104.250.0.0/19',
5193         'GQ': '105.235.224.0/20',
5194         'GR': '94.64.0.0/13',
5195         'GT': '168.234.0.0/16',
5196         'GU': '168.123.0.0/16',
5197         'GW': '197.214.80.0/20',
5198         'GY': '181.41.64.0/18',
5199         'HK': '113.252.0.0/14',
5200         'HN': '181.210.0.0/16',
5201         'HR': '93.136.0.0/13',
5202         'HT': '148.102.128.0/17',
5203         'HU': '84.0.0.0/14',
5204         'ID': '39.192.0.0/10',
5205         'IE': '87.32.0.0/12',
5206         'IL': '79.176.0.0/13',
5207         'IM': '5.62.80.0/20',
5208         'IN': '117.192.0.0/10',
5209         'IO': '203.83.48.0/21',
5210         'IQ': '37.236.0.0/14',
5211         'IR': '2.176.0.0/12',
5212         'IS': '82.221.0.0/16',
5213         'IT': '79.0.0.0/10',
5214         'JE': '87.244.64.0/18',
5215         'JM': '72.27.0.0/17',
5216         'JO': '176.29.0.0/16',
5217         'JP': '133.0.0.0/8',
5218         'KE': '105.48.0.0/12',
5219         'KG': '158.181.128.0/17',
5220         'KH': '36.37.128.0/17',
5221         'KI': '103.25.140.0/22',
5222         'KM': '197.255.224.0/20',
5223         'KN': '198.167.192.0/19',
5224         'KP': '175.45.176.0/22',
5225         'KR': '175.192.0.0/10',
5226         'KW': '37.36.0.0/14',
5227         'KY': '64.96.0.0/15',
5228         'KZ': '2.72.0.0/13',
5229         'LA': '115.84.64.0/18',
5230         'LB': '178.135.0.0/16',
5231         'LC': '24.92.144.0/20',
5232         'LI': '82.117.0.0/19',
5233         'LK': '112.134.0.0/15',
5234         'LR': '102.183.0.0/16',
5235         'LS': '129.232.0.0/17',
5236         'LT': '78.56.0.0/13',
5237         'LU': '188.42.0.0/16',
5238         'LV': '46.109.0.0/16',
5239         'LY': '41.252.0.0/14',
5240         'MA': '105.128.0.0/11',
5241         'MC': '88.209.64.0/18',
5242         'MD': '37.246.0.0/16',
5243         'ME': '178.175.0.0/17',
5244         'MF': '74.112.232.0/21',
5245         'MG': '154.126.0.0/17',
5246         'MH': '117.103.88.0/21',
5247         'MK': '77.28.0.0/15',
5248         'ML': '154.118.128.0/18',
5249         'MM': '37.111.0.0/17',
5250         'MN': '49.0.128.0/17',
5251         'MO': '60.246.0.0/16',
5252         'MP': '202.88.64.0/20',
5253         'MQ': '109.203.224.0/19',
5254         'MR': '41.188.64.0/18',
5255         'MS': '208.90.112.0/22',
5256         'MT': '46.11.0.0/16',
5257         'MU': '105.16.0.0/12',
5258         'MV': '27.114.128.0/18',
5259         'MW': '102.70.0.0/15',
5260         'MX': '187.192.0.0/11',
5261         'MY': '175.136.0.0/13',
5262         'MZ': '197.218.0.0/15',
5263         'NA': '41.182.0.0/16',
5264         'NC': '101.101.0.0/18',
5265         'NE': '197.214.0.0/18',
5266         'NF': '203.17.240.0/22',
5267         'NG': '105.112.0.0/12',
5268         'NI': '186.76.0.0/15',
5269         'NL': '145.96.0.0/11',
5270         'NO': '84.208.0.0/13',
5271         'NP': '36.252.0.0/15',
5272         'NR': '203.98.224.0/19',
5273         'NU': '49.156.48.0/22',
5274         'NZ': '49.224.0.0/14',
5275         'OM': '5.36.0.0/15',
5276         'PA': '186.72.0.0/15',
5277         'PE': '186.160.0.0/14',
5278         'PF': '123.50.64.0/18',
5279         'PG': '124.240.192.0/19',
5280         'PH': '49.144.0.0/13',
5281         'PK': '39.32.0.0/11',
5282         'PL': '83.0.0.0/11',
5283         'PM': '70.36.0.0/20',
5284         'PR': '66.50.0.0/16',
5285         'PS': '188.161.0.0/16',
5286         'PT': '85.240.0.0/13',
5287         'PW': '202.124.224.0/20',
5288         'PY': '181.120.0.0/14',
5289         'QA': '37.210.0.0/15',
5290         'RE': '102.35.0.0/16',
5291         'RO': '79.112.0.0/13',
5292         'RS': '93.86.0.0/15',
5293         'RU': '5.136.0.0/13',
5294         'RW': '41.186.0.0/16',
5295         'SA': '188.48.0.0/13',
5296         'SB': '202.1.160.0/19',
5297         'SC': '154.192.0.0/11',
5298         'SD': '102.120.0.0/13',
5299         'SE': '78.64.0.0/12',
5300         'SG': '8.128.0.0/10',
5301         'SI': '188.196.0.0/14',
5302         'SK': '78.98.0.0/15',
5303         'SL': '102.143.0.0/17',
5304         'SM': '89.186.32.0/19',
5305         'SN': '41.82.0.0/15',
5306         'SO': '154.115.192.0/18',
5307         'SR': '186.179.128.0/17',
5308         'SS': '105.235.208.0/21',
5309         'ST': '197.159.160.0/19',
5310         'SV': '168.243.0.0/16',
5311         'SX': '190.102.0.0/20',
5312         'SY': '5.0.0.0/16',
5313         'SZ': '41.84.224.0/19',
5314         'TC': '65.255.48.0/20',
5315         'TD': '154.68.128.0/19',
5316         'TG': '196.168.0.0/14',
5317         'TH': '171.96.0.0/13',
5318         'TJ': '85.9.128.0/18',
5319         'TK': '27.96.24.0/21',
5320         'TL': '180.189.160.0/20',
5321         'TM': '95.85.96.0/19',
5322         'TN': '197.0.0.0/11',
5323         'TO': '175.176.144.0/21',
5324         'TR': '78.160.0.0/11',
5325         'TT': '186.44.0.0/15',
5326         'TV': '202.2.96.0/19',
5327         'TW': '120.96.0.0/11',
5328         'TZ': '156.156.0.0/14',
5329         'UA': '37.52.0.0/14',
5330         'UG': '102.80.0.0/13',
5331         'US': '6.0.0.0/8',
5332         'UY': '167.56.0.0/13',
5333         'UZ': '84.54.64.0/18',
5334         'VA': '212.77.0.0/19',
5335         'VC': '207.191.240.0/21',
5336         'VE': '186.88.0.0/13',
5337         'VG': '66.81.192.0/20',
5338         'VI': '146.226.0.0/16',
5339         'VN': '14.160.0.0/11',
5340         'VU': '202.80.32.0/20',
5341         'WF': '117.20.32.0/21',
5342         'WS': '202.4.32.0/19',
5343         'YE': '134.35.0.0/16',
5344         'YT': '41.242.116.0/22',
5345         'ZA': '41.0.0.0/11',
5346         'ZM': '102.144.0.0/13',
5347         'ZW': '102.177.192.0/18',
5348     }
5349
5350     @classmethod
5351     def random_ipv4(cls, code_or_block):
5352         if len(code_or_block) == 2:
5353             block = cls._country_ip_map.get(code_or_block.upper())
5354             if not block:
5355                 return None
5356         else:
5357             block = code_or_block
5358         addr, preflen = block.split('/')
5359         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5360         addr_max = addr_min | (0xffffffff >> int(preflen))
5361         return compat_str(socket.inet_ntoa(
5362             compat_struct_pack('!L', random.randint(addr_min, addr_max))))
5363
5364
5365 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
5366     def __init__(self, proxies=None):
5367         # Set default handlers
5368         for type in ('http', 'https'):
5369             setattr(self, '%s_open' % type,
5370                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5371                         meth(r, proxy, type))
5372         compat_urllib_request.ProxyHandler.__init__(self, proxies)
5373
5374     def proxy_open(self, req, proxy, type):
5375         req_proxy = req.headers.get('Ytdl-request-proxy')
5376         if req_proxy is not None:
5377             proxy = req_proxy
5378             del req.headers['Ytdl-request-proxy']
5379
5380         if proxy == '__noproxy__':
5381             return None  # No Proxy
5382         if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
5383             req.add_header('Ytdl-socks-proxy', proxy)
5384             # youtube-dlc's http/https handlers do wrapping the socket with socks
5385             return None
5386         return compat_urllib_request.ProxyHandler.proxy_open(
5387             self, req, proxy, type)
5388
5389
5390 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5391 # released into Public Domain
5392 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5393
5394 def long_to_bytes(n, blocksize=0):
5395     """long_to_bytes(n:long, blocksize:int) : string
5396     Convert a long integer to a byte string.
5397
5398     If optional blocksize is given and greater than zero, pad the front of the
5399     byte string with binary zeros so that the length is a multiple of
5400     blocksize.
5401     """
5402     # after much testing, this algorithm was deemed to be the fastest
5403     s = b''
5404     n = int(n)
5405     while n > 0:
5406         s = compat_struct_pack('>I', n & 0xffffffff) + s
5407         n = n >> 32
5408     # strip off leading zeros
5409     for i in range(len(s)):
5410         if s[i] != b'\000'[0]:
5411             break
5412     else:
5413         # only happens when n == 0
5414         s = b'\000'
5415         i = 0
5416     s = s[i:]
5417     # add back some pad bytes.  this could be done more efficiently w.r.t. the
5418     # de-padding being done above, but sigh...
5419     if blocksize > 0 and len(s) % blocksize:
5420         s = (blocksize - len(s) % blocksize) * b'\000' + s
5421     return s
5422
5423
5424 def bytes_to_long(s):
5425     """bytes_to_long(string) : long
5426     Convert a byte string to a long integer.
5427
5428     This is (essentially) the inverse of long_to_bytes().
5429     """
5430     acc = 0
5431     length = len(s)
5432     if length % 4:
5433         extra = (4 - length % 4)
5434         s = b'\000' * extra + s
5435         length = length + extra
5436     for i in range(0, length, 4):
5437         acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5438     return acc
5439
5440
5441 def ohdave_rsa_encrypt(data, exponent, modulus):
5442     '''
5443     Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5444
5445     Input:
5446         data: data to encrypt, bytes-like object
5447         exponent, modulus: parameter e and N of RSA algorithm, both integer
5448     Output: hex string of encrypted data
5449
5450     Limitation: supports one block encryption only
5451     '''
5452
5453     payload = int(binascii.hexlify(data[::-1]), 16)
5454     encrypted = pow(payload, exponent, modulus)
5455     return '%x' % encrypted
5456
5457
5458 def pkcs1pad(data, length):
5459     """
5460     Padding input data with PKCS#1 scheme
5461
5462     @param {int[]} data        input data
5463     @param {int}   length      target length
5464     @returns {int[]}           padded data
5465     """
5466     if len(data) > length - 11:
5467         raise ValueError('Input data too long for PKCS#1 padding')
5468
5469     pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5470     return [0, 2] + pseudo_random + [0] + data
5471
5472
5473 def encode_base_n(num, n, table=None):
5474     FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
5475     if not table:
5476         table = FULL_TABLE[:n]
5477
5478     if n > len(table):
5479         raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5480
5481     if num == 0:
5482         return table[0]
5483
5484     ret = ''
5485     while num:
5486         ret = table[num % n] + ret
5487         num = num // n
5488     return ret
5489
5490
5491 def decode_packed_codes(code):
5492     mobj = re.search(PACKED_CODES_RE, code)
5493     obfuscated_code, base, count, symbols = mobj.groups()
5494     base = int(base)
5495     count = int(count)
5496     symbols = symbols.split('|')
5497     symbol_table = {}
5498
5499     while count:
5500         count -= 1
5501         base_n_count = encode_base_n(count, base)
5502         symbol_table[base_n_count] = symbols[count] or base_n_count
5503
5504     return re.sub(
5505         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
5506         obfuscated_code)
5507
5508
5509 def caesar(s, alphabet, shift):
5510     if shift == 0:
5511         return s
5512     l = len(alphabet)
5513     return ''.join(
5514         alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5515         for c in s)
5516
5517
5518 def rot47(s):
5519     return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5520
5521
5522 def parse_m3u8_attributes(attrib):
5523     info = {}
5524     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5525         if val.startswith('"'):
5526             val = val[1:-1]
5527         info[key] = val
5528     return info
5529
5530
5531 def urshift(val, n):
5532     return val >> n if val >= 0 else (val + 0x100000000) >> n
5533
5534
5535 # Based on png2str() written by @gdkchan and improved by @yokrysty
5536 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
5537 def decode_png(png_data):
5538     # Reference: https://www.w3.org/TR/PNG/
5539     header = png_data[8:]
5540
5541     if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5542         raise IOError('Not a valid PNG file.')
5543
5544     int_map = {1: '>B', 2: '>H', 4: '>I'}
5545     unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5546
5547     chunks = []
5548
5549     while header:
5550         length = unpack_integer(header[:4])
5551         header = header[4:]
5552
5553         chunk_type = header[:4]
5554         header = header[4:]
5555
5556         chunk_data = header[:length]
5557         header = header[length:]
5558
5559         header = header[4:]  # Skip CRC
5560
5561         chunks.append({
5562             'type': chunk_type,
5563             'length': length,
5564             'data': chunk_data
5565         })
5566
5567     ihdr = chunks[0]['data']
5568
5569     width = unpack_integer(ihdr[:4])
5570     height = unpack_integer(ihdr[4:8])
5571
5572     idat = b''
5573
5574     for chunk in chunks:
5575         if chunk['type'] == b'IDAT':
5576             idat += chunk['data']
5577
5578     if not idat:
5579         raise IOError('Unable to read PNG data.')
5580
5581     decompressed_data = bytearray(zlib.decompress(idat))
5582
5583     stride = width * 3
5584     pixels = []
5585
5586     def _get_pixel(idx):
5587         x = idx % stride
5588         y = idx // stride
5589         return pixels[y][x]
5590
5591     for y in range(height):
5592         basePos = y * (1 + stride)
5593         filter_type = decompressed_data[basePos]
5594
5595         current_row = []
5596
5597         pixels.append(current_row)
5598
5599         for x in range(stride):
5600             color = decompressed_data[1 + basePos + x]
5601             basex = y * stride + x
5602             left = 0
5603             up = 0
5604
5605             if x > 2:
5606                 left = _get_pixel(basex - 3)
5607             if y > 0:
5608                 up = _get_pixel(basex - stride)
5609
5610             if filter_type == 1:  # Sub
5611                 color = (color + left) & 0xff
5612             elif filter_type == 2:  # Up
5613                 color = (color + up) & 0xff
5614             elif filter_type == 3:  # Average
5615                 color = (color + ((left + up) >> 1)) & 0xff
5616             elif filter_type == 4:  # Paeth
5617                 a = left
5618                 b = up
5619                 c = 0
5620
5621                 if x > 2 and y > 0:
5622                     c = _get_pixel(basex - stride - 3)
5623
5624                 p = a + b - c
5625
5626                 pa = abs(p - a)
5627                 pb = abs(p - b)
5628                 pc = abs(p - c)
5629
5630                 if pa <= pb and pa <= pc:
5631                     color = (color + a) & 0xff
5632                 elif pb <= pc:
5633                     color = (color + b) & 0xff
5634                 else:
5635                     color = (color + c) & 0xff
5636
5637             current_row.append(color)
5638
5639     return width, height, pixels
5640
5641
5642 def write_xattr(path, key, value):
5643     # This mess below finds the best xattr tool for the job
5644     try:
5645         # try the pyxattr module...
5646         import xattr
5647
5648         if hasattr(xattr, 'set'):  # pyxattr
5649             # Unicode arguments are not supported in python-pyxattr until
5650             # version 0.5.0
5651             # See https://github.com/ytdl-org/youtube-dl/issues/5498
5652             pyxattr_required_version = '0.5.0'
5653             if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
5654                 # TODO: fallback to CLI tools
5655                 raise XAttrUnavailableError(
5656                     'python-pyxattr is detected but is too old. '
5657                     'youtube-dlc requires %s or above while your version is %s. '
5658                     'Falling back to other xattr implementations' % (
5659                         pyxattr_required_version, xattr.__version__))
5660
5661             setxattr = xattr.set
5662         else:  # xattr
5663             setxattr = xattr.setxattr
5664
5665         try:
5666             setxattr(path, key, value)
5667         except EnvironmentError as e:
5668             raise XAttrMetadataError(e.errno, e.strerror)
5669
5670     except ImportError:
5671         if compat_os_name == 'nt':
5672             # Write xattrs to NTFS Alternate Data Streams:
5673             # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5674             assert ':' not in key
5675             assert os.path.exists(path)
5676
5677             ads_fn = path + ':' + key
5678             try:
5679                 with open(ads_fn, 'wb') as f:
5680                     f.write(value)
5681             except EnvironmentError as e:
5682                 raise XAttrMetadataError(e.errno, e.strerror)
5683         else:
5684             user_has_setfattr = check_executable('setfattr', ['--version'])
5685             user_has_xattr = check_executable('xattr', ['-h'])
5686
5687             if user_has_setfattr or user_has_xattr:
5688
5689                 value = value.decode('utf-8')
5690                 if user_has_setfattr:
5691                     executable = 'setfattr'
5692                     opts = ['-n', key, '-v', value]
5693                 elif user_has_xattr:
5694                     executable = 'xattr'
5695                     opts = ['-w', key, value]
5696
5697                 cmd = ([encodeFilename(executable, True)]
5698                        + [encodeArgument(o) for o in opts]
5699                        + [encodeFilename(path, True)])
5700
5701                 try:
5702                     p = subprocess.Popen(
5703                         cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5704                 except EnvironmentError as e:
5705                     raise XAttrMetadataError(e.errno, e.strerror)
5706                 stdout, stderr = p.communicate()
5707                 stderr = stderr.decode('utf-8', 'replace')
5708                 if p.returncode != 0:
5709                     raise XAttrMetadataError(p.returncode, stderr)
5710
5711             else:
5712                 # On Unix, and can't find pyxattr, setfattr, or xattr.
5713                 if sys.platform.startswith('linux'):
5714                     raise XAttrUnavailableError(
5715                         "Couldn't find a tool to set the xattrs. "
5716                         "Install either the python 'pyxattr' or 'xattr' "
5717                         "modules, or the GNU 'attr' package "
5718                         "(which contains the 'setfattr' tool).")
5719                 else:
5720                     raise XAttrUnavailableError(
5721                         "Couldn't find a tool to set the xattrs. "
5722                         "Install either the python 'xattr' module, "
5723                         "or the 'xattr' binary.")
5724
5725
5726 def random_birthday(year_field, month_field, day_field):
5727     start_date = datetime.date(1950, 1, 1)
5728     end_date = datetime.date(1995, 12, 31)
5729     offset = random.randint(0, (end_date - start_date).days)
5730     random_date = start_date + datetime.timedelta(offset)
5731     return {
5732         year_field: str(random_date.year),
5733         month_field: str(random_date.month),
5734         day_field: str(random_date.day),
5735     }
5736
5737
5738 # Templates for internet shortcut files, which are plain text files.
5739 DOT_URL_LINK_TEMPLATE = '''
5740 [InternetShortcut]
5741 URL=%(url)s
5742 '''.lstrip()
5743
5744 DOT_WEBLOC_LINK_TEMPLATE = '''
5745 <?xml version="1.0" encoding="UTF-8"?>
5746 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
5747 <plist version="1.0">
5748 <dict>
5749 \t<key>URL</key>
5750 \t<string>%(url)s</string>
5751 </dict>
5752 </plist>
5753 '''.lstrip()
5754
5755 DOT_DESKTOP_LINK_TEMPLATE = '''
5756 [Desktop Entry]
5757 Encoding=UTF-8
5758 Name=%(filename)s
5759 Type=Link
5760 URL=%(url)s
5761 Icon=text-html
5762 '''.lstrip()
5763
5764
5765 def iri_to_uri(iri):
5766     """
5767     Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
5768
5769     The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
5770     """
5771
5772     iri_parts = compat_urllib_parse_urlparse(iri)
5773
5774     if '[' in iri_parts.netloc:
5775         raise ValueError('IPv6 URIs are not, yet, supported.')
5776         # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
5777
5778     # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
5779
5780     net_location = ''
5781     if iri_parts.username:
5782         net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
5783         if iri_parts.password is not None:
5784             net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
5785         net_location += '@'
5786
5787     net_location += iri_parts.hostname.encode('idna').decode('utf-8')  # Punycode for Unicode hostnames.
5788     # The 'idna' encoding produces ASCII text.
5789     if iri_parts.port is not None and iri_parts.port != 80:
5790         net_location += ':' + str(iri_parts.port)
5791
5792     return compat_urllib_parse_urlunparse(
5793         (iri_parts.scheme,
5794             net_location,
5795
5796             compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
5797
5798             # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
5799             compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
5800
5801             # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
5802             compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
5803
5804             compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
5805
5806     # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
5807
5808
5809 def to_high_limit_path(path):
5810     if sys.platform in ['win32', 'cygwin']:
5811         # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
5812         return r'\\?\ '.rstrip() + os.path.abspath(path)
5813
5814     return path
5815
5816
5817 def format_field(obj, field, template='%s', ignore=(None, ''), default='', func=None):
5818     val = obj.get(field, default)
5819     if func and val not in ignore:
5820         val = func(val)
5821     return template % val if val not in ignore else default