yt_dlp/extractor/common.py

   1 import base64
   2 import collections
   3 import getpass
   4 import hashlib
   5 import http.client
   6 import http.cookiejar
   7 import http.cookies
   8 import inspect
   9 import itertools
  10 import json
  11 import math
  12 import netrc
  13 import os
  14 import random
  15 import re
  16 import sys
  17 import time
  18 import types
  19 import urllib.parse
  20 import urllib.request
  21 import xml.etree.ElementTree
  22
  23 from ..compat import functools  # isort: split
  24 from ..compat import compat_etree_fromstring, compat_expanduser, compat_os_name
  25 from ..cookies import LenientSimpleCookie
  26 from ..downloader.f4m import get_base_url, remove_encrypted_media
  27 from ..utils import (
  28     IDENTITY,
  29     JSON_LD_RE,
  30     NO_DEFAULT,
  31     ExtractorError,
  32     FormatSorter,
  33     GeoRestrictedError,
  34     GeoUtils,
  35     HEADRequest,
  36     LenientJSONDecoder,
  37     RegexNotFoundError,
  38     RetryManager,
  39     UnsupportedError,
  40     age_restricted,
  41     base_url,
  42     bug_reports_message,
  43     classproperty,
  44     clean_html,
  45     deprecation_warning,
  46     determine_ext,
  47     dict_get,
  48     encode_data_uri,
  49     error_to_compat_str,
  50     extract_attributes,
  51     filter_dict,
  52     fix_xml_ampersands,
  53     float_or_none,
  54     format_field,
  55     int_or_none,
  56     join_nonempty,
  57     js_to_json,
  58     mimetype2ext,
  59     network_exceptions,
  60     orderedSet,
  61     parse_bitrate,
  62     parse_codecs,
  63     parse_duration,
  64     parse_iso8601,
  65     parse_m3u8_attributes,
  66     parse_resolution,
  67     sanitize_filename,
  68     sanitize_url,
  69     sanitized_Request,
  70     smuggle_url,
  71     str_or_none,
  72     str_to_int,
  73     strip_or_none,
  74     traverse_obj,
  75     truncate_string,
  76     try_call,
  77     try_get,
  78     unescapeHTML,
  79     unified_strdate,
  80     unified_timestamp,
  81     update_Request,
  82     update_url_query,
  83     url_basename,
  84     url_or_none,
  85     urlhandle_detect_ext,
  86     urljoin,
  87     variadic,
  88     xpath_element,
  89     xpath_text,
  90     xpath_with_ns,
  91 )
  92
  93
  94 class InfoExtractor:
  95     """Information Extractor class.
  96
  97     Information extractors are the classes that, given a URL, extract
  98     information about the video (or videos) the URL refers to. This
  99     information includes the real video URL, the video title, author and
 100     others. The information is stored in a dictionary which is then
 101     passed to the YoutubeDL. The YoutubeDL processes this
 102     information possibly downloading the video to the file system, among
 103     other possible outcomes.
 104
 105     The type field determines the type of the result.
 106     By far the most common value (and the default if _type is missing) is
 107     "video", which indicates a single video.
 108
 109     For a video, the dictionaries must include the following fields:
 110
 111     id:             Video identifier.
 112     title:          Video title, unescaped. Set to an empty string if video has
 113                     no title as opposed to "None" which signifies that the
 114                     extractor failed to obtain a title
 115
 116     Additionally, it must contain either a formats entry or a url one:
 117
 118     formats:        A list of dictionaries for each format available, ordered
 119                     from worst to best quality.
 120
 121                     Potential fields:
 122                     * url        The mandatory URL representing the media:
 123                                    for plain file media - HTTP URL of this file,
 124                                    for RTMP - RTMP URL,
 125                                    for HLS - URL of the M3U8 media playlist,
 126                                    for HDS - URL of the F4M manifest,
 127                                    for DASH
 128                                      - HTTP URL to plain file media (in case of
 129                                        unfragmented media)
 130                                      - URL of the MPD manifest or base URL
 131                                        representing the media if MPD manifest
 132                                        is parsed from a string (in case of
 133                                        fragmented media)
 134                                    for MSS - URL of the ISM manifest.
 135                     * request_data  Data to send in POST request to the URL
 136                     * manifest_url
 137                                  The URL of the manifest file in case of
 138                                  fragmented media:
 139                                    for HLS - URL of the M3U8 master playlist,
 140                                    for HDS - URL of the F4M manifest,
 141                                    for DASH - URL of the MPD manifest,
 142                                    for MSS - URL of the ISM manifest.
 143                     * manifest_stream_number  (For internal use only)
 144                                  The index of the stream in the manifest file
 145                     * ext        Will be calculated from URL if missing
 146                     * format     A human-readable description of the format
 147                                  ("mp4 container with h264/opus").
 148                                  Calculated from the format_id, width, height.
 149                                  and format_note fields if missing.
 150                     * format_id  A short description of the format
 151                                  ("mp4_h264_opus" or "19").
 152                                 Technically optional, but strongly recommended.
 153                     * format_note Additional info about the format
 154                                  ("3D" or "DASH video")
 155                     * width      Width of the video, if known
 156                     * height     Height of the video, if known
 157                     * aspect_ratio  Aspect ratio of the video, if known
 158                                  Automatically calculated from width and height
 159                     * resolution Textual description of width and height
 160                                  Automatically calculated from width and height
 161                     * dynamic_range The dynamic range of the video. One of:
 162                                  "SDR" (None), "HDR10", "HDR10+, "HDR12", "HLG, "DV"
 163                     * tbr        Average bitrate of audio and video in KBit/s
 164                     * abr        Average audio bitrate in KBit/s
 165                     * acodec     Name of the audio codec in use
 166                     * asr        Audio sampling rate in Hertz
 167                     * audio_channels  Number of audio channels
 168                     * vbr        Average video bitrate in KBit/s
 169                     * fps        Frame rate
 170                     * vcodec     Name of the video codec in use
 171                     * container  Name of the container format
 172                     * filesize   The number of bytes, if known in advance
 173                     * filesize_approx  An estimate for the number of bytes
 174                     * player_url SWF Player URL (used for rtmpdump).
 175                     * protocol   The protocol that will be used for the actual
 176                                  download, lower-case. One of "http", "https" or
 177                                  one of the protocols defined in downloader.PROTOCOL_MAP
 178                     * fragment_base_url
 179                                  Base URL for fragments. Each fragment's path
 180                                  value (if present) will be relative to
 181                                  this URL.
 182                     * fragments  A list of fragments of a fragmented media.
 183                                  Each fragment entry must contain either an url
 184                                  or a path. If an url is present it should be
 185                                  considered by a client. Otherwise both path and
 186                                  fragment_base_url must be present. Here is
 187                                  the list of all potential fields:
 188                                  * "url" - fragment's URL
 189                                  * "path" - fragment's path relative to
 190                                             fragment_base_url
 191                                  * "duration" (optional, int or float)
 192                                  * "filesize" (optional, int)
 193                     * is_from_start  Is a live format that can be downloaded
 194                                 from the start. Boolean
 195                     * preference Order number of this format. If this field is
 196                                  present and not None, the formats get sorted
 197                                  by this field, regardless of all other values.
 198                                  -1 for default (order by other properties),
 199                                  -2 or smaller for less than default.
 200                                  < -1000 to hide the format (if there is
 201                                     another one which is strictly better)
 202                     * language   Language code, e.g. "de" or "en-US".
 203                     * language_preference  Is this in the language mentioned in
 204                                  the URL?
 205                                  10 if it's what the URL is about,
 206                                  -1 for default (don't know),
 207                                  -10 otherwise, other values reserved for now.
 208                     * quality    Order number of the video quality of this
 209                                  format, irrespective of the file format.
 210                                  -1 for default (order by other properties),
 211                                  -2 or smaller for less than default.
 212                     * source_preference  Order number for this video source
 213                                   (quality takes higher priority)
 214                                  -1 for default (order by other properties),
 215                                  -2 or smaller for less than default.
 216                     * http_headers  A dictionary of additional HTTP headers
 217                                  to add to the request.
 218                     * stretched_ratio  If given and not 1, indicates that the
 219                                  video's pixels are not square.
 220                                  width : height ratio as float.
 221                     * no_resume  The server does not support resuming the
 222                                  (HTTP or RTMP) download. Boolean.
 223                     * has_drm    The format has DRM and cannot be downloaded. Boolean
 224                     * extra_param_to_segment_url  A query string to append to each
 225                                  fragment's URL, or to update each existing query string
 226                                  with. Only applied by the native HLS/DASH downloaders.
 227                     * hls_aes    A dictionary of HLS AES-128 decryption information
 228                                  used by the native HLS downloader to override the
 229                                  values in the media playlist when an '#EXT-X-KEY' tag
 230                                  is present in the playlist:
 231                                  * uri  The URI from which the key will be downloaded
 232                                  * key  The key (as hex) used to decrypt fragments.
 233                                         If `key` is given, any key URI will be ignored
 234                                  * iv   The IV (as hex) used to decrypt fragments
 235                     * downloader_options  A dictionary of downloader options
 236                                  (For internal use only)
 237                                  * http_chunk_size Chunk size for HTTP downloads
 238                                  * ffmpeg_args     Extra arguments for ffmpeg downloader
 239                     RTMP formats can also have the additional fields: page_url,
 240                     app, play_path, tc_url, flash_version, rtmp_live, rtmp_conn,
 241                     rtmp_protocol, rtmp_real_time
 242
 243     url:            Final video URL.
 244     ext:            Video filename extension.
 245     format:         The video format, defaults to ext (used for --get-format)
 246     player_url:     SWF Player URL (used for rtmpdump).
 247
 248     The following fields are optional:
 249
 250     direct:         True if a direct video file was given (must only be set by GenericIE)
 251     alt_title:      A secondary title of the video.
 252     display_id      An alternative identifier for the video, not necessarily
 253                     unique, but available before title. Typically, id is
 254                     something like "4234987", title "Dancing naked mole rats",
 255                     and display_id "dancing-naked-mole-rats"
 256     thumbnails:     A list of dictionaries, with the following entries:
 257                         * "id" (optional, string) - Thumbnail format ID
 258                         * "url"
 259                         * "preference" (optional, int) - quality of the image
 260                         * "width" (optional, int)
 261                         * "height" (optional, int)
 262                         * "resolution" (optional, string "{width}x{height}",
 263                                         deprecated)
 264                         * "filesize" (optional, int)
 265                         * "http_headers" (dict) - HTTP headers for the request
 266     thumbnail:      Full URL to a video thumbnail image.
 267     description:    Full video description.
 268     uploader:       Full name of the video uploader.
 269     license:        License name the video is licensed under.
 270     creator:        The creator of the video.
 271     timestamp:      UNIX timestamp of the moment the video was uploaded
 272     upload_date:    Video upload date in UTC (YYYYMMDD).
 273                     If not explicitly set, calculated from timestamp
 274     release_timestamp: UNIX timestamp of the moment the video was released.
 275                     If it is not clear whether to use timestamp or this, use the former
 276     release_date:   The date (YYYYMMDD) when the video was released in UTC.
 277                     If not explicitly set, calculated from release_timestamp
 278     modified_timestamp: UNIX timestamp of the moment the video was last modified.
 279     modified_date:   The date (YYYYMMDD) when the video was last modified in UTC.
 280                     If not explicitly set, calculated from modified_timestamp
 281     uploader_id:    Nickname or id of the video uploader.
 282     uploader_url:   Full URL to a personal webpage of the video uploader.
 283     channel:        Full name of the channel the video is uploaded on.
 284                     Note that channel fields may or may not repeat uploader
 285                     fields. This depends on a particular extractor.
 286     channel_id:     Id of the channel.
 287     channel_url:    Full URL to a channel webpage.
 288     channel_follower_count: Number of followers of the channel.
 289     location:       Physical location where the video was filmed.
 290     subtitles:      The available subtitles as a dictionary in the format
 291                     {tag: subformats}. "tag" is usually a language code, and
 292                     "subformats" is a list sorted from lower to higher
 293                     preference, each element is a dictionary with the "ext"
 294                     entry and one of:
 295                         * "data": The subtitles file contents
 296                         * "url": A URL pointing to the subtitles file
 297                     It can optionally also have:
 298                         * "name": Name or description of the subtitles
 299                         * "http_headers": A dictionary of additional HTTP headers
 300                                   to add to the request.
 301                     "ext" will be calculated from URL if missing
 302     automatic_captions: Like 'subtitles'; contains automatically generated
 303                     captions instead of normal subtitles
 304     duration:       Length of the video in seconds, as an integer or float.
 305     view_count:     How many users have watched the video on the platform.
 306     concurrent_view_count: How many users are currently watching the video on the platform.
 307     like_count:     Number of positive ratings of the video
 308     dislike_count:  Number of negative ratings of the video
 309     repost_count:   Number of reposts of the video
 310     average_rating: Average rating give by users, the scale used depends on the webpage
 311     comment_count:  Number of comments on the video
 312     comments:       A list of comments, each with one or more of the following
 313                     properties (all but one of text or html optional):
 314                         * "author" - human-readable name of the comment author
 315                         * "author_id" - user ID of the comment author
 316                         * "author_thumbnail" - The thumbnail of the comment author
 317                         * "id" - Comment ID
 318                         * "html" - Comment as HTML
 319                         * "text" - Plain text of the comment
 320                         * "timestamp" - UNIX timestamp of comment
 321                         * "parent" - ID of the comment this one is replying to.
 322                                      Set to "root" to indicate that this is a
 323                                      comment to the original video.
 324                         * "like_count" - Number of positive ratings of the comment
 325                         * "dislike_count" - Number of negative ratings of the comment
 326                         * "is_favorited" - Whether the comment is marked as
 327                                            favorite by the video uploader
 328                         * "author_is_uploader" - Whether the comment is made by
 329                                                  the video uploader
 330     age_limit:      Age restriction for the video, as an integer (years)
 331     webpage_url:    The URL to the video webpage, if given to yt-dlp it
 332                     should allow to get the same result again. (It will be set
 333                     by YoutubeDL if it's missing)
 334     categories:     A list of categories that the video falls in, for example
 335                     ["Sports", "Berlin"]
 336     tags:           A list of tags assigned to the video, e.g. ["sweden", "pop music"]
 337     cast:           A list of the video cast
 338     is_live:        True, False, or None (=unknown). Whether this video is a
 339                     live stream that goes on instead of a fixed-length video.
 340     was_live:       True, False, or None (=unknown). Whether this video was
 341                     originally a live stream.
 342     live_status:    None (=unknown), 'is_live', 'is_upcoming', 'was_live', 'not_live',
 343                     or 'post_live' (was live, but VOD is not yet processed)
 344                     If absent, automatically set from is_live, was_live
 345     start_time:     Time in seconds where the reproduction should start, as
 346                     specified in the URL.
 347     end_time:       Time in seconds where the reproduction should end, as
 348                     specified in the URL.
 349     chapters:       A list of dictionaries, with the following entries:
 350                         * "start_time" - The start time of the chapter in seconds
 351                         * "end_time" - The end time of the chapter in seconds
 352                         * "title" (optional, string)
 353     playable_in_embed: Whether this video is allowed to play in embedded
 354                     players on other sites. Can be True (=always allowed),
 355                     False (=never allowed), None (=unknown), or a string
 356                     specifying the criteria for embedability; e.g. 'whitelist'
 357     availability:   Under what condition the video is available. One of
 358                     'private', 'premium_only', 'subscriber_only', 'needs_auth',
 359                     'unlisted' or 'public'. Use 'InfoExtractor._availability'
 360                     to set it
 361     _old_archive_ids: A list of old archive ids needed for backward compatibility
 362     _format_sort_fields: A list of fields to use for sorting formats
 363     __post_extractor: A function to be called just before the metadata is
 364                     written to either disk, logger or console. The function
 365                     must return a dict which will be added to the info_dict.
 366                     This is usefull for additional information that is
 367                     time-consuming to extract. Note that the fields thus
 368                     extracted will not be available to output template and
 369                     match_filter. So, only "comments" and "comment_count" are
 370                     currently allowed to be extracted via this method.
 371
 372     The following fields should only be used when the video belongs to some logical
 373     chapter or section:
 374
 375     chapter:        Name or title of the chapter the video belongs to.
 376     chapter_number: Number of the chapter the video belongs to, as an integer.
 377     chapter_id:     Id of the chapter the video belongs to, as a unicode string.
 378
 379     The following fields should only be used when the video is an episode of some
 380     series, programme or podcast:
 381
 382     series:         Title of the series or programme the video episode belongs to.
 383     series_id:      Id of the series or programme the video episode belongs to, as a unicode string.
 384     season:         Title of the season the video episode belongs to.
 385     season_number:  Number of the season the video episode belongs to, as an integer.
 386     season_id:      Id of the season the video episode belongs to, as a unicode string.
 387     episode:        Title of the video episode. Unlike mandatory video title field,
 388                     this field should denote the exact title of the video episode
 389                     without any kind of decoration.
 390     episode_number: Number of the video episode within a season, as an integer.
 391     episode_id:     Id of the video episode, as a unicode string.
 392
 393     The following fields should only be used when the media is a track or a part of
 394     a music album:
 395
 396     track:          Title of the track.
 397     track_number:   Number of the track within an album or a disc, as an integer.
 398     track_id:       Id of the track (useful in case of custom indexing, e.g. 6.iii),
 399                     as a unicode string.
 400     artist:         Artist(s) of the track.
 401     genre:          Genre(s) of the track.
 402     album:          Title of the album the track belongs to.
 403     album_type:     Type of the album (e.g. "Demo", "Full-length", "Split", "Compilation", etc).
 404     album_artist:   List of all artists appeared on the album (e.g.
 405                     "Ash Borer / Fell Voices" or "Various Artists", useful for splits
 406                     and compilations).
 407     disc_number:    Number of the disc or other physical medium the track belongs to,
 408                     as an integer.
 409     release_year:   Year (YYYY) when the album was released.
 410     composer:       Composer of the piece
 411
 412     The following fields should only be set for clips that should be cut from the original video:
 413
 414     section_start:  Start time of the section in seconds
 415     section_end:    End time of the section in seconds
 416
 417     The following fields should only be set for storyboards:
 418     rows:           Number of rows in each storyboard fragment, as an integer
 419     columns:        Number of columns in each storyboard fragment, as an integer
 420
 421     Unless mentioned otherwise, the fields should be Unicode strings.
 422
 423     Unless mentioned otherwise, None is equivalent to absence of information.
 424
 425
 426     _type "playlist" indicates multiple videos.
 427     There must be a key "entries", which is a list, an iterable, or a PagedList
 428     object, each element of which is a valid dictionary by this specification.
 429
 430     Additionally, playlists can have "id", "title", and any other relevant
 431     attributes with the same semantics as videos (see above).
 432
 433     It can also have the following optional fields:
 434
 435     playlist_count: The total number of videos in a playlist. If not given,
 436                     YoutubeDL tries to calculate it from "entries"
 437
 438
 439     _type "multi_video" indicates that there are multiple videos that
 440     form a single show, for examples multiple acts of an opera or TV episode.
 441     It must have an entries key like a playlist and contain all the keys
 442     required for a video at the same time.
 443
 444
 445     _type "url" indicates that the video must be extracted from another
 446     location, possibly by a different extractor. Its only required key is:
 447     "url" - the next URL to extract.
 448     The key "ie_key" can be set to the class name (minus the trailing "IE",
 449     e.g. "Youtube") if the extractor class is known in advance.
 450     Additionally, the dictionary may have any properties of the resolved entity
 451     known in advance, for example "title" if the title of the referred video is
 452     known ahead of time.
 453
 454
 455     _type "url_transparent" entities have the same specification as "url", but
 456     indicate that the given additional information is more precise than the one
 457     associated with the resolved URL.
 458     This is useful when a site employs a video service that hosts the video and
 459     its technical metadata, but that video service does not embed a useful
 460     title, description etc.
 461
 462
 463     Subclasses of this should also be added to the list of extractors and
 464     should define a _VALID_URL regexp and, re-define the _real_extract() and
 465     (optionally) _real_initialize() methods.
 466
 467     Subclasses may also override suitable() if necessary, but ensure the function
 468     signature is preserved and that this function imports everything it needs
 469     (except other extractors), so that lazy_extractors works correctly.
 470
 471     Subclasses can define a list of _EMBED_REGEX, which will be searched for in
 472     the HTML of Generic webpages. It may also override _extract_embed_urls
 473     or _extract_from_webpage as necessary. While these are normally classmethods,
 474     _extract_from_webpage is allowed to be an instance method.
 475
 476     _extract_from_webpage may raise self.StopExtraction() to stop further
 477     processing of the webpage and obtain exclusive rights to it. This is useful
 478     when the extractor cannot reliably be matched using just the URL,
 479     e.g. invidious/peertube instances
 480
 481     Embed-only extractors can be defined by setting _VALID_URL = False.
 482
 483     To support username + password (or netrc) login, the extractor must define a
 484     _NETRC_MACHINE and re-define _perform_login(username, password) and
 485     (optionally) _initialize_pre_login() methods. The _perform_login method will
 486     be called between _initialize_pre_login and _real_initialize if credentials
 487     are passed by the user. In cases where it is necessary to have the login
 488     process as part of the extraction rather than initialization, _perform_login
 489     can be left undefined.
 490
 491     _GEO_BYPASS attribute may be set to False in order to disable
 492     geo restriction bypass mechanisms for a particular extractor.
 493     Though it won't disable explicit geo restriction bypass based on
 494     country code provided with geo_bypass_country.
 495
 496     _GEO_COUNTRIES attribute may contain a list of presumably geo unrestricted
 497     countries for this extractor. One of these countries will be used by
 498     geo restriction bypass mechanism right away in order to bypass
 499     geo restriction, of course, if the mechanism is not disabled.
 500
 501     _GEO_IP_BLOCKS attribute may contain a list of presumably geo unrestricted
 502     IP blocks in CIDR notation for this extractor. One of these IP blocks
 503     will be used by geo restriction bypass mechanism similarly
 504     to _GEO_COUNTRIES.
 505
 506     The _ENABLED attribute should be set to False for IEs that
 507     are disabled by default and must be explicitly enabled.
 508
 509     The _WORKING attribute should be set to False for broken IEs
 510     in order to warn the users and skip the tests.
 511     """
 512
 513     _ready = False
 514     _downloader = None
 515     _x_forwarded_for_ip = None
 516     _GEO_BYPASS = True
 517     _GEO_COUNTRIES = None
 518     _GEO_IP_BLOCKS = None
 519     _WORKING = True
 520     _ENABLED = True
 521     _NETRC_MACHINE = None
 522     IE_DESC = None
 523     SEARCH_KEY = None
 524     _VALID_URL = None
 525     _EMBED_REGEX = []
 526
 527     def _login_hint(self, method=NO_DEFAULT, netrc=None):
 528         password_hint = f'--username and --password, or --netrc ({netrc or self._NETRC_MACHINE}) to provide account credentials'
 529         return {
 530             None: '',
 531             'any': f'Use --cookies, --cookies-from-browser, {password_hint}',
 532             'password': f'Use {password_hint}',
 533             'cookies': (
 534                 'Use --cookies-from-browser or --cookies for the authentication. '
 535                 'See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies'),
 536         }[method if method is not NO_DEFAULT else 'any' if self.supports_login() else 'cookies']
 537
 538     def __init__(self, downloader=None):
 539         """Constructor. Receives an optional downloader (a YoutubeDL instance).
 540         If a downloader is not passed during initialization,
 541         it must be set using "set_downloader()" before "extract()" is called"""
 542         self._ready = False
 543         self._x_forwarded_for_ip = None
 544         self._printed_messages = set()
 545         self.set_downloader(downloader)
 546
 547     @classmethod
 548     def _match_valid_url(cls, url):
 549         if cls._VALID_URL is False:
 550             return None
 551         # This does not use has/getattr intentionally - we want to know whether
 552         # we have cached the regexp for *this* class, whereas getattr would also
 553         # match the superclass
 554         if '_VALID_URL_RE' not in cls.__dict__:
 555             cls._VALID_URL_RE = re.compile(cls._VALID_URL)
 556         return cls._VALID_URL_RE.match(url)
 557
 558     @classmethod
 559     def suitable(cls, url):
 560         """Receives a URL and returns True if suitable for this IE."""
 561         # This function must import everything it needs (except other extractors),
 562         # so that lazy_extractors works correctly
 563         return cls._match_valid_url(url) is not None
 564
 565     @classmethod
 566     def _match_id(cls, url):
 567         return cls._match_valid_url(url).group('id')
 568
 569     @classmethod
 570     def get_temp_id(cls, url):
 571         try:
 572             return cls._match_id(url)
 573         except (IndexError, AttributeError):
 574             return None
 575
 576     @classmethod
 577     def working(cls):
 578         """Getter method for _WORKING."""
 579         return cls._WORKING
 580
 581     @classmethod
 582     def supports_login(cls):
 583         return bool(cls._NETRC_MACHINE)
 584
 585     def initialize(self):
 586         """Initializes an instance (authentication, etc)."""
 587         self._printed_messages = set()
 588         self._initialize_geo_bypass({
 589             'countries': self._GEO_COUNTRIES,
 590             'ip_blocks': self._GEO_IP_BLOCKS,
 591         })
 592         if not self._ready:
 593             self._initialize_pre_login()
 594             if self.supports_login():
 595                 username, password = self._get_login_info()
 596                 if username:
 597                     self._perform_login(username, password)
 598             elif self.get_param('username') and False not in (self.IE_DESC, self._NETRC_MACHINE):
 599                 self.report_warning(f'Login with password is not supported for this website. {self._login_hint("cookies")}')
 600             self._real_initialize()
 601             self._ready = True
 602
 603     def _initialize_geo_bypass(self, geo_bypass_context):
 604         """
 605         Initialize geo restriction bypass mechanism.
 606
 607         This method is used to initialize geo bypass mechanism based on faking
 608         X-Forwarded-For HTTP header. A random country from provided country list
 609         is selected and a random IP belonging to this country is generated. This
 610         IP will be passed as X-Forwarded-For HTTP header in all subsequent
 611         HTTP requests.
 612
 613         This method will be used for initial geo bypass mechanism initialization
 614         during the instance initialization with _GEO_COUNTRIES and
 615         _GEO_IP_BLOCKS.
 616
 617         You may also manually call it from extractor's code if geo bypass
 618         information is not available beforehand (e.g. obtained during
 619         extraction) or due to some other reason. In this case you should pass
 620         this information in geo bypass context passed as first argument. It may
 621         contain following fields:
 622
 623         countries:  List of geo unrestricted countries (similar
 624                     to _GEO_COUNTRIES)
 625         ip_blocks:  List of geo unrestricted IP blocks in CIDR notation
 626                     (similar to _GEO_IP_BLOCKS)
 627
 628         """
 629         if not self._x_forwarded_for_ip:
 630
 631             # Geo bypass mechanism is explicitly disabled by user
 632             if not self.get_param('geo_bypass', True):
 633                 return
 634
 635             if not geo_bypass_context:
 636                 geo_bypass_context = {}
 637
 638             # Backward compatibility: previously _initialize_geo_bypass
 639             # expected a list of countries, some 3rd party code may still use
 640             # it this way
 641             if isinstance(geo_bypass_context, (list, tuple)):
 642                 geo_bypass_context = {
 643                     'countries': geo_bypass_context,
 644                 }
 645
 646             # The whole point of geo bypass mechanism is to fake IP
 647             # as X-Forwarded-For HTTP header based on some IP block or
 648             # country code.
 649
 650             # Path 1: bypassing based on IP block in CIDR notation
 651
 652             # Explicit IP block specified by user, use it right away
 653             # regardless of whether extractor is geo bypassable or not
 654             ip_block = self.get_param('geo_bypass_ip_block', None)
 655
 656             # Otherwise use random IP block from geo bypass context but only
 657             # if extractor is known as geo bypassable
 658             if not ip_block:
 659                 ip_blocks = geo_bypass_context.get('ip_blocks')
 660                 if self._GEO_BYPASS and ip_blocks:
 661                     ip_block = random.choice(ip_blocks)
 662
 663             if ip_block:
 664                 self._x_forwarded_for_ip = GeoUtils.random_ipv4(ip_block)
 665                 self.write_debug(f'Using fake IP {self._x_forwarded_for_ip} as X-Forwarded-For')
 666                 return
 667
 668             # Path 2: bypassing based on country code
 669
 670             # Explicit country code specified by user, use it right away
 671             # regardless of whether extractor is geo bypassable or not
 672             country = self.get_param('geo_bypass_country', None)
 673
 674             # Otherwise use random country code from geo bypass context but
 675             # only if extractor is known as geo bypassable
 676             if not country:
 677                 countries = geo_bypass_context.get('countries')
 678                 if self._GEO_BYPASS and countries:
 679                     country = random.choice(countries)
 680
 681             if country:
 682                 self._x_forwarded_for_ip = GeoUtils.random_ipv4(country)
 683                 self._downloader.write_debug(
 684                     f'Using fake IP {self._x_forwarded_for_ip} ({country.upper()}) as X-Forwarded-For')
 685
 686     def extract(self, url):
 687         """Extracts URL information and returns it in list of dicts."""
 688         try:
 689             for _ in range(2):
 690                 try:
 691                     self.initialize()
 692                     self.to_screen('Extracting URL: %s' % (
 693                         url if self.get_param('verbose') else truncate_string(url, 100, 20)))
 694                     ie_result = self._real_extract(url)
 695                     if ie_result is None:
 696                         return None
 697                     if self._x_forwarded_for_ip:
 698                         ie_result['__x_forwarded_for_ip'] = self._x_forwarded_for_ip
 699                     subtitles = ie_result.get('subtitles') or {}
 700                     if 'no-live-chat' in self.get_param('compat_opts'):
 701                         for lang in ('live_chat', 'comments', 'danmaku'):
 702                             subtitles.pop(lang, None)
 703                     return ie_result
 704                 except GeoRestrictedError as e:
 705                     if self.__maybe_fake_ip_and_retry(e.countries):
 706                         continue
 707                     raise
 708         except UnsupportedError:
 709             raise
 710         except ExtractorError as e:
 711             e.video_id = e.video_id or self.get_temp_id(url),
 712             e.ie = e.ie or self.IE_NAME,
 713             e.traceback = e.traceback or sys.exc_info()[2]
 714             raise
 715         except http.client.IncompleteRead as e:
 716             raise ExtractorError('A network error has occurred.', cause=e, expected=True, video_id=self.get_temp_id(url))
 717         except (KeyError, StopIteration) as e:
 718             raise ExtractorError('An extractor error has occurred.', cause=e, video_id=self.get_temp_id(url))
 719
 720     def __maybe_fake_ip_and_retry(self, countries):
 721         if (not self.get_param('geo_bypass_country', None)
 722                 and self._GEO_BYPASS
 723                 and self.get_param('geo_bypass', True)
 724                 and not self._x_forwarded_for_ip
 725                 and countries):
 726             country_code = random.choice(countries)
 727             self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
 728             if self._x_forwarded_for_ip:
 729                 self.report_warning(
 730                     'Video is geo restricted. Retrying extraction with fake IP %s (%s) as X-Forwarded-For.'
 731                     % (self._x_forwarded_for_ip, country_code.upper()))
 732                 return True
 733         return False
 734
 735     def set_downloader(self, downloader):
 736         """Sets a YoutubeDL instance as the downloader for this IE."""
 737         self._downloader = downloader
 738
 739     @property
 740     def cache(self):
 741         return self._downloader.cache
 742
 743     @property
 744     def cookiejar(self):
 745         return self._downloader.cookiejar
 746
 747     def _initialize_pre_login(self):
 748         """ Initialization before login. Redefine in subclasses."""
 749         pass
 750
 751     def _perform_login(self, username, password):
 752         """ Login with username and password. Redefine in subclasses."""
 753         pass
 754
 755     def _real_initialize(self):
 756         """Real initialization process. Redefine in subclasses."""
 757         pass
 758
 759     def _real_extract(self, url):
 760         """Real extraction process. Redefine in subclasses."""
 761         raise NotImplementedError('This method must be implemented by subclasses')
 762
 763     @classmethod
 764     def ie_key(cls):
 765         """A string for getting the InfoExtractor with get_info_extractor"""
 766         return cls.__name__[:-2]
 767
 768     @classproperty
 769     def IE_NAME(cls):
 770         return cls.__name__[:-2]
 771
 772     @staticmethod
 773     def __can_accept_status_code(err, expected_status):
 774         assert isinstance(err, urllib.error.HTTPError)
 775         if expected_status is None:
 776             return False
 777         elif callable(expected_status):
 778             return expected_status(err.code) is True
 779         else:
 780             return err.code in variadic(expected_status)
 781
 782     def _create_request(self, url_or_request, data=None, headers=None, query=None):
 783         if isinstance(url_or_request, urllib.request.Request):
 784             return update_Request(url_or_request, data=data, headers=headers, query=query)
 785         if query:
 786             url_or_request = update_url_query(url_or_request, query)
 787         return sanitized_Request(url_or_request, data, headers or {})
 788
 789     def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers=None, query=None, expected_status=None):
 790         """
 791         Return the response handle.
 792
 793         See _download_webpage docstring for arguments specification.
 794         """
 795         if not self._downloader._first_webpage_request:
 796             sleep_interval = self.get_param('sleep_interval_requests') or 0
 797             if sleep_interval > 0:
 798                 self.to_screen('Sleeping %s seconds ...' % sleep_interval)
 799                 time.sleep(sleep_interval)
 800         else:
 801             self._downloader._first_webpage_request = False
 802
 803         if note is None:
 804             self.report_download_webpage(video_id)
 805         elif note is not False:
 806             if video_id is None:
 807                 self.to_screen(str(note))
 808             else:
 809                 self.to_screen(f'{video_id}: {note}')
 810
 811         # Some sites check X-Forwarded-For HTTP header in order to figure out
 812         # the origin of the client behind proxy. This allows bypassing geo
 813         # restriction by faking this header's value to IP that belongs to some
 814         # geo unrestricted country. We will do so once we encounter any
 815         # geo restriction error.
 816         if self._x_forwarded_for_ip:
 817             headers = (headers or {}).copy()
 818             headers.setdefault('X-Forwarded-For', self._x_forwarded_for_ip)
 819
 820         try:
 821             return self._downloader.urlopen(self._create_request(url_or_request, data, headers, query))
 822         except network_exceptions as err:
 823             if isinstance(err, urllib.error.HTTPError):
 824                 if self.__can_accept_status_code(err, expected_status):
 825                     # Retain reference to error to prevent file object from
 826                     # being closed before it can be read. Works around the
 827                     # effects of <https://bugs.python.org/issue15002>
 828                     # introduced in Python 3.4.1.
 829                     err.fp._error = err
 830                     return err.fp
 831
 832             if errnote is False:
 833                 return False
 834             if errnote is None:
 835                 errnote = 'Unable to download webpage'
 836
 837             errmsg = f'{errnote}: {error_to_compat_str(err)}'
 838             if fatal:
 839                 raise ExtractorError(errmsg, cause=err)
 840             else:
 841                 self.report_warning(errmsg)
 842                 return False
 843
 844     def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True,
 845                                  encoding=None, data=None, headers={}, query={}, expected_status=None):
 846         """
 847         Return a tuple (page content as string, URL handle).
 848
 849         Arguments:
 850         url_or_request -- plain text URL as a string or
 851             a urllib.request.Request object
 852         video_id -- Video/playlist/item identifier (string)
 853
 854         Keyword arguments:
 855         note -- note printed before downloading (string)
 856         errnote -- note printed in case of an error (string)
 857         fatal -- flag denoting whether error should be considered fatal,
 858             i.e. whether it should cause ExtractionError to be raised,
 859             otherwise a warning will be reported and extraction continued
 860         encoding -- encoding for a page content decoding, guessed automatically
 861             when not explicitly specified
 862         data -- POST data (bytes)
 863         headers -- HTTP headers (dict)
 864         query -- URL query (dict)
 865         expected_status -- allows to accept failed HTTP requests (non 2xx
 866             status code) by explicitly specifying a set of accepted status
 867             codes. Can be any of the following entities:
 868                 - an integer type specifying an exact failed status code to
 869                   accept
 870                 - a list or a tuple of integer types specifying a list of
 871                   failed status codes to accept
 872                 - a callable accepting an actual failed status code and
 873                   returning True if it should be accepted
 874             Note that this argument does not affect success status codes (2xx)
 875             which are always accepted.
 876         """
 877
 878         # Strip hashes from the URL (#1038)
 879         if isinstance(url_or_request, str):
 880             url_or_request = url_or_request.partition('#')[0]
 881
 882         urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query, expected_status=expected_status)
 883         if urlh is False:
 884             assert not fatal
 885             return False
 886         content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal, encoding=encoding)
 887         return (content, urlh)
 888
 889     @staticmethod
 890     def _guess_encoding_from_content(content_type, webpage_bytes):
 891         m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
 892         if m:
 893             encoding = m.group(1)
 894         else:
 895             m = re.search(br'<meta[^>]+charset=[\'"]?([^\'")]+)[ /\'">]',
 896                           webpage_bytes[:1024])
 897             if m:
 898                 encoding = m.group(1).decode('ascii')
 899             elif webpage_bytes.startswith(b'\xff\xfe'):
 900                 encoding = 'utf-16'
 901             else:
 902                 encoding = 'utf-8'
 903
 904         return encoding
 905
 906     def __check_blocked(self, content):
 907         first_block = content[:512]
 908         if ('<title>Access to this site is blocked</title>' in content
 909                 and 'Websense' in first_block):
 910             msg = 'Access to this webpage has been blocked by Websense filtering software in your network.'
 911             blocked_iframe = self._html_search_regex(
 912                 r'<iframe src="([^"]+)"', content,
 913                 'Websense information URL', default=None)
 914             if blocked_iframe:
 915                 msg += ' Visit %s for more details' % blocked_iframe
 916             raise ExtractorError(msg, expected=True)
 917         if '<title>The URL you requested has been blocked</title>' in first_block:
 918             msg = (
 919                 'Access to this webpage has been blocked by Indian censorship. '
 920                 'Use a VPN or proxy server (with --proxy) to route around it.')
 921             block_msg = self._html_search_regex(
 922                 r'</h1><p>(.*?)</p>',
 923                 content, 'block message', default=None)
 924             if block_msg:
 925                 msg += ' (Message: "%s")' % block_msg.replace('\n', ' ')
 926             raise ExtractorError(msg, expected=True)
 927         if ('<title>TTK :: Доступ к ресурсу ограничен</title>' in content
 928                 and 'blocklist.rkn.gov.ru' in content):
 929             raise ExtractorError(
 930                 'Access to this webpage has been blocked by decision of the Russian government. '
 931                 'Visit http://blocklist.rkn.gov.ru/ for a block reason.',
 932                 expected=True)
 933
 934     def _request_dump_filename(self, url, video_id):
 935         basen = f'{video_id}_{url}'
 936         trim_length = self.get_param('trim_file_name') or 240
 937         if len(basen) > trim_length:
 938             h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
 939             basen = basen[:trim_length - len(h)] + h
 940         filename = sanitize_filename(f'{basen}.dump', restricted=True)
 941         # Working around MAX_PATH limitation on Windows (see
 942         # http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx)
 943         if compat_os_name == 'nt':
 944             absfilepath = os.path.abspath(filename)
 945             if len(absfilepath) > 259:
 946                 filename = fR'\\?\{absfilepath}'
 947         return filename
 948
 949     def __decode_webpage(self, webpage_bytes, encoding, headers):
 950         if not encoding:
 951             encoding = self._guess_encoding_from_content(headers.get('Content-Type', ''), webpage_bytes)
 952         try:
 953             return webpage_bytes.decode(encoding, 'replace')
 954         except LookupError:
 955             return webpage_bytes.decode('utf-8', 'replace')
 956
 957     def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None, encoding=None):
 958         webpage_bytes = urlh.read()
 959         if prefix is not None:
 960             webpage_bytes = prefix + webpage_bytes
 961         if self.get_param('dump_intermediate_pages', False):
 962             self.to_screen('Dumping request to ' + urlh.geturl())
 963             dump = base64.b64encode(webpage_bytes).decode('ascii')
 964             self._downloader.to_screen(dump)
 965         if self.get_param('write_pages'):
 966             filename = self._request_dump_filename(urlh.geturl(), video_id)
 967             self.to_screen(f'Saving request to {filename}')
 968             with open(filename, 'wb') as outf:
 969                 outf.write(webpage_bytes)
 970
 971         content = self.__decode_webpage(webpage_bytes, encoding, urlh.headers)
 972         self.__check_blocked(content)
 973
 974         return content
 975
 976     def __print_error(self, errnote, fatal, video_id, err):
 977         if fatal:
 978             raise ExtractorError(f'{video_id}: {errnote}', cause=err)
 979         elif errnote:
 980             self.report_warning(f'{video_id}: {errnote}: {err}')
 981
 982     def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True, errnote=None):
 983         if transform_source:
 984             xml_string = transform_source(xml_string)
 985         try:
 986             return compat_etree_fromstring(xml_string.encode('utf-8'))
 987         except xml.etree.ElementTree.ParseError as ve:
 988             self.__print_error('Failed to parse XML' if errnote is None else errnote, fatal, video_id, ve)
 989
 990     def _parse_json(self, json_string, video_id, transform_source=None, fatal=True, errnote=None, **parser_kwargs):
 991         try:
 992             return json.loads(
 993                 json_string, cls=LenientJSONDecoder, strict=False, transform_source=transform_source, **parser_kwargs)
 994         except ValueError as ve:
 995             self.__print_error('Failed to parse JSON' if errnote is None else errnote, fatal, video_id, ve)
 996
 997     def _parse_socket_response_as_json(self, data, *args, **kwargs):
 998         return self._parse_json(data[data.find('{'):data.rfind('}') + 1], *args, **kwargs)
 999
1000     def __create_download_methods(name, parser, note, errnote, return_value):
1001
1002         def parse(ie, content, *args, errnote=errnote, **kwargs):
1003             if parser is None:
1004                 return content
1005             if errnote is False:
1006                 kwargs['errnote'] = errnote
1007             # parser is fetched by name so subclasses can override it
1008             return getattr(ie, parser)(content, *args, **kwargs)
1009
1010         def download_handle(self, url_or_request, video_id, note=note, errnote=errnote, transform_source=None,
1011                             fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
1012             res = self._download_webpage_handle(
1013                 url_or_request, video_id, note=note, errnote=errnote, fatal=fatal, encoding=encoding,
1014                 data=data, headers=headers, query=query, expected_status=expected_status)
1015             if res is False:
1016                 return res
1017             content, urlh = res
1018             return parse(self, content, video_id, transform_source=transform_source, fatal=fatal, errnote=errnote), urlh
1019
1020         def download_content(self, url_or_request, video_id, note=note, errnote=errnote, transform_source=None,
1021                              fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
1022             if self.get_param('load_pages'):
1023                 url_or_request = self._create_request(url_or_request, data, headers, query)
1024                 filename = self._request_dump_filename(url_or_request.full_url, video_id)
1025                 self.to_screen(f'Loading request from {filename}')
1026                 try:
1027                     with open(filename, 'rb') as dumpf:
1028                         webpage_bytes = dumpf.read()
1029                 except OSError as e:
1030                     self.report_warning(f'Unable to load request from disk: {e}')
1031                 else:
1032                     content = self.__decode_webpage(webpage_bytes, encoding, url_or_request.headers)
1033                     return parse(self, content, video_id, transform_source=transform_source, fatal=fatal, errnote=errnote)
1034             kwargs = {
1035                 'note': note,
1036                 'errnote': errnote,
1037                 'transform_source': transform_source,
1038                 'fatal': fatal,
1039                 'encoding': encoding,
1040                 'data': data,
1041                 'headers': headers,
1042                 'query': query,
1043                 'expected_status': expected_status,
1044             }
1045             if parser is None:
1046                 kwargs.pop('transform_source')
1047             # The method is fetched by name so subclasses can override _download_..._handle
1048             res = getattr(self, download_handle.__name__)(url_or_request, video_id, **kwargs)
1049             return res if res is False else res[0]
1050
1051         def impersonate(func, name, return_value):
1052             func.__name__, func.__qualname__ = name, f'InfoExtractor.{name}'
1053             func.__doc__ = f'''
1054                 @param transform_source     Apply this transformation before parsing
1055                 @returns                    {return_value}
1056
1057                 See _download_webpage_handle docstring for other arguments specification
1058             '''
1059
1060         impersonate(download_handle, f'_download_{name}_handle', f'({return_value}, URL handle)')
1061         impersonate(download_content, f'_download_{name}', f'{return_value}')
1062         return download_handle, download_content
1063
1064     _download_xml_handle, _download_xml = __create_download_methods(
1065         'xml', '_parse_xml', 'Downloading XML', 'Unable to download XML', 'xml as an xml.etree.ElementTree.Element')
1066     _download_json_handle, _download_json = __create_download_methods(
1067         'json', '_parse_json', 'Downloading JSON metadata', 'Unable to download JSON metadata', 'JSON object as a dict')
1068     _download_socket_json_handle, _download_socket_json = __create_download_methods(
1069         'socket_json', '_parse_socket_response_as_json', 'Polling socket', 'Unable to poll socket', 'JSON object as a dict')
1070     __download_webpage = __create_download_methods('webpage', None, None, None, 'data of the page as a string')[1]
1071
1072     def _download_webpage(
1073             self, url_or_request, video_id, note=None, errnote=None,
1074             fatal=True, tries=1, timeout=NO_DEFAULT, *args, **kwargs):
1075         """
1076         Return the data of the page as a string.
1077
1078         Keyword arguments:
1079         tries -- number of tries
1080         timeout -- sleep interval between tries
1081
1082         See _download_webpage_handle docstring for other arguments specification.
1083         """
1084
1085         R''' # NB: These are unused; should they be deprecated?
1086         if tries != 1:
1087             self._downloader.deprecation_warning('tries argument is deprecated in InfoExtractor._download_webpage')
1088         if timeout is NO_DEFAULT:
1089             timeout = 5
1090         else:
1091             self._downloader.deprecation_warning('timeout argument is deprecated in InfoExtractor._download_webpage')
1092         '''
1093
1094         try_count = 0
1095         while True:
1096             try:
1097                 return self.__download_webpage(url_or_request, video_id, note, errnote, None, fatal, *args, **kwargs)
1098             except http.client.IncompleteRead as e:
1099                 try_count += 1
1100                 if try_count >= tries:
1101                     raise e
1102                 self._sleep(timeout, video_id)
1103
1104     def report_warning(self, msg, video_id=None, *args, only_once=False, **kwargs):
1105         idstr = format_field(video_id, None, '%s: ')
1106         msg = f'[{self.IE_NAME}] {idstr}{msg}'
1107         if only_once:
1108             if f'WARNING: {msg}' in self._printed_messages:
1109                 return
1110             self._printed_messages.add(f'WARNING: {msg}')
1111         self._downloader.report_warning(msg, *args, **kwargs)
1112
1113     def to_screen(self, msg, *args, **kwargs):
1114         """Print msg to screen, prefixing it with '[ie_name]'"""
1115         self._downloader.to_screen(f'[{self.IE_NAME}] {msg}', *args, **kwargs)
1116
1117     def write_debug(self, msg, *args, **kwargs):
1118         self._downloader.write_debug(f'[{self.IE_NAME}] {msg}', *args, **kwargs)
1119
1120     def get_param(self, name, default=None, *args, **kwargs):
1121         if self._downloader:
1122             return self._downloader.params.get(name, default, *args, **kwargs)
1123         return default
1124
1125     def report_drm(self, video_id, partial=NO_DEFAULT):
1126         if partial is not NO_DEFAULT:
1127             self._downloader.deprecation_warning('InfoExtractor.report_drm no longer accepts the argument partial')
1128         self.raise_no_formats('This video is DRM protected', expected=True, video_id=video_id)
1129
1130     def report_extraction(self, id_or_name):
1131         """Report information extraction."""
1132         self.to_screen('%s: Extracting information' % id_or_name)
1133
1134     def report_download_webpage(self, video_id):
1135         """Report webpage download."""
1136         self.to_screen('%s: Downloading webpage' % video_id)
1137
1138     def report_age_confirmation(self):
1139         """Report attempt to confirm age."""
1140         self.to_screen('Confirming age')
1141
1142     def report_login(self):
1143         """Report attempt to log in."""
1144         self.to_screen('Logging in')
1145
1146     def raise_login_required(
1147             self, msg='This video is only available for registered users',
1148             metadata_available=False, method=NO_DEFAULT):
1149         if metadata_available and (
1150                 self.get_param('ignore_no_formats_error') or self.get_param('wait_for_video')):
1151             self.report_warning(msg)
1152             return
1153         msg += format_field(self._login_hint(method), None, '. %s')
1154         raise ExtractorError(msg, expected=True)
1155
1156     def raise_geo_restricted(
1157             self, msg='This video is not available from your location due to geo restriction',
1158             countries=None, metadata_available=False):
1159         if metadata_available and (
1160                 self.get_param('ignore_no_formats_error') or self.get_param('wait_for_video')):
1161             self.report_warning(msg)
1162         else:
1163             raise GeoRestrictedError(msg, countries=countries)
1164
1165     def raise_no_formats(self, msg, expected=False, video_id=None):
1166         if expected and (
1167                 self.get_param('ignore_no_formats_error') or self.get_param('wait_for_video')):
1168             self.report_warning(msg, video_id)
1169         elif isinstance(msg, ExtractorError):
1170             raise msg
1171         else:
1172             raise ExtractorError(msg, expected=expected, video_id=video_id)
1173
1174     # Methods for following #608
1175     @staticmethod
1176     def url_result(url, ie=None, video_id=None, video_title=None, *, url_transparent=False, **kwargs):
1177         """Returns a URL that points to a page that should be processed"""
1178         if ie is not None:
1179             kwargs['ie_key'] = ie if isinstance(ie, str) else ie.ie_key()
1180         if video_id is not None:
1181             kwargs['id'] = video_id
1182         if video_title is not None:
1183             kwargs['title'] = video_title
1184         return {
1185             **kwargs,
1186             '_type': 'url_transparent' if url_transparent else 'url',
1187             'url': url,
1188         }
1189
1190     @classmethod
1191     def playlist_from_matches(cls, matches, playlist_id=None, playlist_title=None,
1192                               getter=IDENTITY, ie=None, video_kwargs=None, **kwargs):
1193         return cls.playlist_result(
1194             (cls.url_result(m, ie, **(video_kwargs or {})) for m in orderedSet(map(getter, matches), lazy=True)),
1195             playlist_id, playlist_title, **kwargs)
1196
1197     @staticmethod
1198     def playlist_result(entries, playlist_id=None, playlist_title=None, playlist_description=None, *, multi_video=False, **kwargs):
1199         """Returns a playlist"""
1200         if playlist_id:
1201             kwargs['id'] = playlist_id
1202         if playlist_title:
1203             kwargs['title'] = playlist_title
1204         if playlist_description is not None:
1205             kwargs['description'] = playlist_description
1206         return {
1207             **kwargs,
1208             '_type': 'multi_video' if multi_video else 'playlist',
1209             'entries': entries,
1210         }
1211
1212     def _search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
1213         """
1214         Perform a regex search on the given string, using a single or a list of
1215         patterns returning the first matching group.
1216         In case of failure return a default value or raise a WARNING or a
1217         RegexNotFoundError, depending on fatal, specifying the field name.
1218         """
1219         if string is None:
1220             mobj = None
1221         elif isinstance(pattern, (str, re.Pattern)):
1222             mobj = re.search(pattern, string, flags)
1223         else:
1224             for p in pattern:
1225                 mobj = re.search(p, string, flags)
1226                 if mobj:
1227                     break
1228
1229         _name = self._downloader._format_err(name, self._downloader.Styles.EMPHASIS)
1230
1231         if mobj:
1232             if group is None:
1233                 # return the first matching group
1234                 return next(g for g in mobj.groups() if g is not None)
1235             elif isinstance(group, (list, tuple)):
1236                 return tuple(mobj.group(g) for g in group)
1237             else:
1238                 return mobj.group(group)
1239         elif default is not NO_DEFAULT:
1240             return default
1241         elif fatal:
1242             raise RegexNotFoundError('Unable to extract %s' % _name)
1243         else:
1244             self.report_warning('unable to extract %s' % _name + bug_reports_message())
1245             return None
1246
1247     def _search_json(self, start_pattern, string, name, video_id, *, end_pattern='',
1248                      contains_pattern=r'{(?s:.+)}', fatal=True, default=NO_DEFAULT, **kwargs):
1249         """Searches string for the JSON object specified by start_pattern"""
1250         # NB: end_pattern is only used to reduce the size of the initial match
1251         if default is NO_DEFAULT:
1252             default, has_default = {}, False
1253         else:
1254             fatal, has_default = False, True
1255
1256         json_string = self._search_regex(
1257             rf'(?:{start_pattern})\s*(?P<json>{contains_pattern})\s*(?:{end_pattern})',
1258             string, name, group='json', fatal=fatal, default=None if has_default else NO_DEFAULT)
1259         if not json_string:
1260             return default
1261
1262         _name = self._downloader._format_err(name, self._downloader.Styles.EMPHASIS)
1263         try:
1264             return self._parse_json(json_string, video_id, ignore_extra=True, **kwargs)
1265         except ExtractorError as e:
1266             if fatal:
1267                 raise ExtractorError(
1268                     f'Unable to extract {_name} - Failed to parse JSON', cause=e.cause, video_id=video_id)
1269             elif not has_default:
1270                 self.report_warning(
1271                     f'Unable to extract {_name} - Failed to parse JSON: {e}', video_id=video_id)
1272         return default
1273
1274     def _html_search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
1275         """
1276         Like _search_regex, but strips HTML tags and unescapes entities.
1277         """
1278         res = self._search_regex(pattern, string, name, default, fatal, flags, group)
1279         if isinstance(res, tuple):
1280             return tuple(map(clean_html, res))
1281         return clean_html(res)
1282
1283     def _get_netrc_login_info(self, netrc_machine=None):
1284         username = None
1285         password = None
1286         netrc_machine = netrc_machine or self._NETRC_MACHINE
1287
1288         if self.get_param('usenetrc', False):
1289             try:
1290                 netrc_file = compat_expanduser(self.get_param('netrc_location') or '~')
1291                 if os.path.isdir(netrc_file):
1292                     netrc_file = os.path.join(netrc_file, '.netrc')
1293                 info = netrc.netrc(file=netrc_file).authenticators(netrc_machine)
1294                 if info is not None:
1295                     username = info[0]
1296                     password = info[2]
1297                 else:
1298                     raise netrc.NetrcParseError(
1299                         'No authenticators for %s' % netrc_machine)
1300             except (OSError, netrc.NetrcParseError) as err:
1301                 self.report_warning(
1302                     'parsing .netrc: %s' % error_to_compat_str(err))
1303
1304         return username, password
1305
1306     def _get_login_info(self, username_option='username', password_option='password', netrc_machine=None):
1307         """
1308         Get the login info as (username, password)
1309         First look for the manually specified credentials using username_option
1310         and password_option as keys in params dictionary. If no such credentials
1311         available look in the netrc file using the netrc_machine or _NETRC_MACHINE
1312         value.
1313         If there's no info available, return (None, None)
1314         """
1315
1316         # Attempt to use provided username and password or .netrc data
1317         username = self.get_param(username_option)
1318         if username is not None:
1319             password = self.get_param(password_option)
1320         else:
1321             username, password = self._get_netrc_login_info(netrc_machine)
1322
1323         return username, password
1324
1325     def _get_tfa_info(self, note='two-factor verification code'):
1326         """
1327         Get the two-factor authentication info
1328         TODO - asking the user will be required for sms/phone verify
1329         currently just uses the command line option
1330         If there's no info available, return None
1331         """
1332
1333         tfa = self.get_param('twofactor')
1334         if tfa is not None:
1335             return tfa
1336
1337         return getpass.getpass('Type %s and press [Return]: ' % note)
1338
1339     # Helper functions for extracting OpenGraph info
1340     @staticmethod
1341     def _og_regexes(prop):
1342         content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?)(?=\s|/?>))'
1343         property_re = (r'(?:name|property)=(?:\'og%(sep)s%(prop)s\'|"og%(sep)s%(prop)s"|\s*og%(sep)s%(prop)s\b)'
1344                        % {'prop': re.escape(prop), 'sep': '(?:&#x3A;|[:-])'})
1345         template = r'<meta[^>]+?%s[^>]+?%s'
1346         return [
1347             template % (property_re, content_re),
1348             template % (content_re, property_re),
1349         ]
1350
1351     @staticmethod
1352     def _meta_regex(prop):
1353         return r'''(?isx)<meta
1354                     (?=[^>]+(?:itemprop|name|property|id|http-equiv)=(["\']?)%s\1)
1355                     [^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(prop)
1356
1357     def _og_search_property(self, prop, html, name=None, **kargs):
1358         prop = variadic(prop)
1359         if name is None:
1360             name = 'OpenGraph %s' % prop[0]
1361         og_regexes = []
1362         for p in prop:
1363             og_regexes.extend(self._og_regexes(p))
1364         escaped = self._search_regex(og_regexes, html, name, flags=re.DOTALL, **kargs)
1365         if escaped is None:
1366             return None
1367         return unescapeHTML(escaped)
1368
1369     def _og_search_thumbnail(self, html, **kargs):
1370         return self._og_search_property('image', html, 'thumbnail URL', fatal=False, **kargs)
1371
1372     def _og_search_description(self, html, **kargs):
1373         return self._og_search_property('description', html, fatal=False, **kargs)
1374
1375     def _og_search_title(self, html, *, fatal=False, **kargs):
1376         return self._og_search_property('title', html, fatal=fatal, **kargs)
1377
1378     def _og_search_video_url(self, html, name='video url', secure=True, **kargs):
1379         regexes = self._og_regexes('video') + self._og_regexes('video:url')
1380         if secure:
1381             regexes = self._og_regexes('video:secure_url') + regexes
1382         return self._html_search_regex(regexes, html, name, **kargs)
1383
1384     def _og_search_url(self, html, **kargs):
1385         return self._og_search_property('url', html, **kargs)
1386
1387     def _html_extract_title(self, html, name='title', *, fatal=False, **kwargs):
1388         return self._html_search_regex(r'(?s)<title\b[^>]*>([^<]+)</title>', html, name, fatal=fatal, **kwargs)
1389
1390     def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs):
1391         name = variadic(name)
1392         if display_name is None:
1393             display_name = name[0]
1394         return self._html_search_regex(
1395             [self._meta_regex(n) for n in name],
1396             html, display_name, fatal=fatal, group='content', **kwargs)
1397
1398     def _dc_search_uploader(self, html):
1399         return self._html_search_meta('dc.creator', html, 'uploader')
1400
1401     @staticmethod
1402     def _rta_search(html):
1403         # See http://www.rtalabel.org/index.php?content=howtofaq#single
1404         if re.search(r'(?ix)<meta\s+name="rating"\s+'
1405                      r'     content="RTA-5042-1996-1400-1577-RTA"',
1406                      html):
1407             return 18
1408
1409         # And then there are the jokers who advertise that they use RTA, but actually don't.
1410         AGE_LIMIT_MARKERS = [
1411             r'Proudly Labeled <a href="http://www\.rtalabel\.org/" title="Restricted to Adults">RTA</a>',
1412             r'>[^<]*you acknowledge you are at least (\d+) years old',
1413             r'>\s*(?:18\s+U(?:\.S\.C\.|SC)\s+)?(?:§+\s*)?2257\b',
1414         ]
1415
1416         age_limit = 0
1417         for marker in AGE_LIMIT_MARKERS:
1418             mobj = re.search(marker, html)
1419             if mobj:
1420                 age_limit = max(age_limit, int(traverse_obj(mobj, 1, default=18)))
1421         return age_limit
1422
1423     def _media_rating_search(self, html):
1424         # See http://www.tjg-designs.com/WP/metadata-code-examples-adding-metadata-to-your-web-pages/
1425         rating = self._html_search_meta('rating', html)
1426
1427         if not rating:
1428             return None
1429
1430         RATING_TABLE = {
1431             'safe for kids': 0,
1432             'general': 8,
1433             '14 years': 14,
1434             'mature': 17,
1435             'restricted': 19,
1436         }
1437         return RATING_TABLE.get(rating.lower())
1438
1439     def _family_friendly_search(self, html):
1440         # See http://schema.org/VideoObject
1441         family_friendly = self._html_search_meta(
1442             'isFamilyFriendly', html, default=None)
1443
1444         if not family_friendly:
1445             return None
1446
1447         RATING_TABLE = {
1448             '1': 0,
1449             'true': 0,
1450             '0': 18,
1451             'false': 18,
1452         }
1453         return RATING_TABLE.get(family_friendly.lower())
1454
1455     def _twitter_search_player(self, html):
1456         return self._html_search_meta('twitter:player', html,
1457                                       'twitter card player')
1458
1459     def _yield_json_ld(self, html, video_id, *, fatal=True, default=NO_DEFAULT):
1460         """Yield all json ld objects in the html"""
1461         if default is not NO_DEFAULT:
1462             fatal = False
1463         for mobj in re.finditer(JSON_LD_RE, html):
1464             json_ld_item = self._parse_json(mobj.group('json_ld'), video_id, fatal=fatal)
1465             for json_ld in variadic(json_ld_item):
1466                 if isinstance(json_ld, dict):
1467                     yield json_ld
1468
1469     def _search_json_ld(self, html, video_id, expected_type=None, *, fatal=True, default=NO_DEFAULT):
1470         """Search for a video in any json ld in the html"""
1471         if default is not NO_DEFAULT:
1472             fatal = False
1473         info = self._json_ld(
1474             list(self._yield_json_ld(html, video_id, fatal=fatal, default=default)),
1475             video_id, fatal=fatal, expected_type=expected_type)
1476         if info:
1477             return info
1478         if default is not NO_DEFAULT:
1479             return default
1480         elif fatal:
1481             raise RegexNotFoundError('Unable to extract JSON-LD')
1482         else:
1483             self.report_warning('unable to extract JSON-LD %s' % bug_reports_message())
1484             return {}
1485
1486     def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None):
1487         if isinstance(json_ld, str):
1488             json_ld = self._parse_json(json_ld, video_id, fatal=fatal)
1489         if not json_ld:
1490             return {}
1491         info = {}
1492
1493         INTERACTION_TYPE_MAP = {
1494             'CommentAction': 'comment',
1495             'AgreeAction': 'like',
1496             'DisagreeAction': 'dislike',
1497             'LikeAction': 'like',
1498             'DislikeAction': 'dislike',
1499             'ListenAction': 'view',
1500             'WatchAction': 'view',
1501             'ViewAction': 'view',
1502         }
1503
1504         def is_type(e, *expected_types):
1505             type = variadic(traverse_obj(e, '@type'))
1506             return any(x in type for x in expected_types)
1507
1508         def extract_interaction_type(e):
1509             interaction_type = e.get('interactionType')
1510             if isinstance(interaction_type, dict):
1511                 interaction_type = interaction_type.get('@type')
1512             return str_or_none(interaction_type)
1513
1514         def extract_interaction_statistic(e):
1515             interaction_statistic = e.get('interactionStatistic')
1516             if isinstance(interaction_statistic, dict):
1517                 interaction_statistic = [interaction_statistic]
1518             if not isinstance(interaction_statistic, list):
1519                 return
1520             for is_e in interaction_statistic:
1521                 if not is_type(is_e, 'InteractionCounter'):
1522                     continue
1523                 interaction_type = extract_interaction_type(is_e)
1524                 if not interaction_type:
1525                     continue
1526                 # For interaction count some sites provide string instead of
1527                 # an integer (as per spec) with non digit characters (e.g. ",")
1528                 # so extracting count with more relaxed str_to_int
1529                 interaction_count = str_to_int(is_e.get('userInteractionCount'))
1530                 if interaction_count is None:
1531                     continue
1532                 count_kind = INTERACTION_TYPE_MAP.get(interaction_type.split('/')[-1])
1533                 if not count_kind:
1534                     continue
1535                 count_key = '%s_count' % count_kind
1536                 if info.get(count_key) is not None:
1537                     continue
1538                 info[count_key] = interaction_count
1539
1540         def extract_chapter_information(e):
1541             chapters = [{
1542                 'title': part.get('name'),
1543                 'start_time': part.get('startOffset'),
1544                 'end_time': part.get('endOffset'),
1545             } for part in variadic(e.get('hasPart') or []) if part.get('@type') == 'Clip']
1546             for idx, (last_c, current_c, next_c) in enumerate(zip(
1547                     [{'end_time': 0}] + chapters, chapters, chapters[1:])):
1548                 current_c['end_time'] = current_c['end_time'] or next_c['start_time']
1549                 current_c['start_time'] = current_c['start_time'] or last_c['end_time']
1550                 if None in current_c.values():
1551                     self.report_warning(f'Chapter {idx} contains broken data. Not extracting chapters')
1552                     return
1553             if chapters:
1554                 chapters[-1]['end_time'] = chapters[-1]['end_time'] or info['duration']
1555                 info['chapters'] = chapters
1556
1557         def extract_video_object(e):
1558             author = e.get('author')
1559             info.update({
1560                 'url': url_or_none(e.get('contentUrl')),
1561                 'ext': mimetype2ext(e.get('encodingFormat')),
1562                 'title': unescapeHTML(e.get('name')),
1563                 'description': unescapeHTML(e.get('description')),
1564                 'thumbnails': [{'url': unescapeHTML(url)}
1565                                for url in variadic(traverse_obj(e, 'thumbnailUrl', 'thumbnailURL'))
1566                                if url_or_none(url)],
1567                 'duration': parse_duration(e.get('duration')),
1568                 'timestamp': unified_timestamp(e.get('uploadDate')),
1569                 # author can be an instance of 'Organization' or 'Person' types.
1570                 # both types can have 'name' property(inherited from 'Thing' type). [1]
1571                 # however some websites are using 'Text' type instead.
1572                 # 1. https://schema.org/VideoObject
1573                 'uploader': author.get('name') if isinstance(author, dict) else author if isinstance(author, str) else None,
1574                 'artist': traverse_obj(e, ('byArtist', 'name'), expected_type=str),
1575                 'filesize': int_or_none(float_or_none(e.get('contentSize'))),
1576                 'tbr': int_or_none(e.get('bitrate')),
1577                 'width': int_or_none(e.get('width')),
1578                 'height': int_or_none(e.get('height')),
1579                 'view_count': int_or_none(e.get('interactionCount')),
1580                 'tags': try_call(lambda: e.get('keywords').split(',')),
1581             })
1582             if is_type(e, 'AudioObject'):
1583                 info.update({
1584                     'vcodec': 'none',
1585                     'abr': int_or_none(e.get('bitrate')),
1586                 })
1587             extract_interaction_statistic(e)
1588             extract_chapter_information(e)
1589
1590         def traverse_json_ld(json_ld, at_top_level=True):
1591             for e in variadic(json_ld):
1592                 if not isinstance(e, dict):
1593                     continue
1594                 if at_top_level and '@context' not in e:
1595                     continue
1596                 if at_top_level and set(e.keys()) == {'@context', '@graph'}:
1597                     traverse_json_ld(e['@graph'], at_top_level=False)
1598                     continue
1599                 if expected_type is not None and not is_type(e, expected_type):
1600                     continue
1601                 rating = traverse_obj(e, ('aggregateRating', 'ratingValue'), expected_type=float_or_none)
1602                 if rating is not None:
1603                     info['average_rating'] = rating
1604                 if is_type(e, 'TVEpisode', 'Episode'):
1605                     episode_name = unescapeHTML(e.get('name'))
1606                     info.update({
1607                         'episode': episode_name,
1608                         'episode_number': int_or_none(e.get('episodeNumber')),
1609                         'description': unescapeHTML(e.get('description')),
1610                     })
1611                     if not info.get('title') and episode_name:
1612                         info['title'] = episode_name
1613                     part_of_season = e.get('partOfSeason')
1614                     if is_type(part_of_season, 'TVSeason', 'Season', 'CreativeWorkSeason'):
1615                         info.update({
1616                             'season': unescapeHTML(part_of_season.get('name')),
1617                             'season_number': int_or_none(part_of_season.get('seasonNumber')),
1618                         })
1619                     part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries')
1620                     if is_type(part_of_series, 'TVSeries', 'Series', 'CreativeWorkSeries'):
1621                         info['series'] = unescapeHTML(part_of_series.get('name'))
1622                 elif is_type(e, 'Movie'):
1623                     info.update({
1624                         'title': unescapeHTML(e.get('name')),
1625                         'description': unescapeHTML(e.get('description')),
1626                         'duration': parse_duration(e.get('duration')),
1627                         'timestamp': unified_timestamp(e.get('dateCreated')),
1628                     })
1629                 elif is_type(e, 'Article', 'NewsArticle'):
1630                     info.update({
1631                         'timestamp': parse_iso8601(e.get('datePublished')),
1632                         'title': unescapeHTML(e.get('headline')),
1633                         'description': unescapeHTML(e.get('articleBody') or e.get('description')),
1634                     })
1635                     if is_type(traverse_obj(e, ('video', 0)), 'VideoObject'):
1636                         extract_video_object(e['video'][0])
1637                     elif is_type(traverse_obj(e, ('subjectOf', 0)), 'VideoObject'):
1638                         extract_video_object(e['subjectOf'][0])
1639                 elif is_type(e, 'VideoObject', 'AudioObject'):
1640                     extract_video_object(e)
1641                     if expected_type is None:
1642                         continue
1643                     else:
1644                         break
1645                 video = e.get('video')
1646                 if is_type(video, 'VideoObject'):
1647                     extract_video_object(video)
1648                 if expected_type is None:
1649                     continue
1650                 else:
1651                     break
1652
1653         traverse_json_ld(json_ld)
1654         return filter_dict(info)
1655
1656     def _search_nextjs_data(self, webpage, video_id, *, transform_source=None, fatal=True, **kw):
1657         return self._parse_json(
1658             self._search_regex(
1659                 r'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>',
1660                 webpage, 'next.js data', fatal=fatal, **kw),
1661             video_id, transform_source=transform_source, fatal=fatal)
1662
1663     def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)):
1664         """Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function"""
1665         rectx = re.escape(context_name)
1666         FUNCTION_RE = r'\(function\((?P<arg_keys>.*?)\){return\s+(?P<js>{.*?})\s*;?\s*}\((?P<arg_vals>.*?)\)'
1667         js, arg_keys, arg_vals = self._search_regex(
1668             (rf'<script>\s*window\.{rectx}={FUNCTION_RE}\s*\)\s*;?\s*</script>', rf'{rectx}\(.*?{FUNCTION_RE}'),
1669             webpage, context_name, group=('js', 'arg_keys', 'arg_vals'),
1670             default=NO_DEFAULT if fatal else (None, None, None))
1671         if js is None:
1672             return {}
1673
1674         args = dict(zip(arg_keys.split(','), map(json.dumps, self._parse_json(
1675             f'[{arg_vals}]', video_id, transform_source=js_to_json, fatal=fatal) or ())))
1676
1677         ret = self._parse_json(js, video_id, transform_source=functools.partial(js_to_json, vars=args), fatal=fatal)
1678         return traverse_obj(ret, traverse) or {}
1679
1680     @staticmethod
1681     def _hidden_inputs(html):
1682         html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html)
1683         hidden_inputs = {}
1684         for input in re.findall(r'(?i)(<input[^>]+>)', html):
1685             attrs = extract_attributes(input)
1686             if not input:
1687                 continue
1688             if attrs.get('type') not in ('hidden', 'submit'):
1689                 continue
1690             name = attrs.get('name') or attrs.get('id')
1691             value = attrs.get('value')
1692             if name and value is not None:
1693                 hidden_inputs[name] = value
1694         return hidden_inputs
1695
1696     def _form_hidden_inputs(self, form_id, html):
1697         form = self._search_regex(
1698             r'(?is)<form[^>]+?id=(["\'])%s\1[^>]*>(?P<form>.+?)</form>' % form_id,
1699             html, '%s form' % form_id, group='form')
1700         return self._hidden_inputs(form)
1701
1702     @classproperty(cache=True)
1703     def FormatSort(cls):
1704         class FormatSort(FormatSorter):
1705             def __init__(ie, *args, **kwargs):
1706                 super().__init__(ie._downloader, *args, **kwargs)
1707
1708         deprecation_warning(
1709             'yt_dlp.InfoExtractor.FormatSort is deprecated and may be removed in the future. '
1710             'Use yt_dlp.utils.FormatSorter instead')
1711         return FormatSort
1712
1713     def _sort_formats(self, formats, field_preference=[]):
1714         if not field_preference:
1715             self._downloader.deprecation_warning(
1716                 'yt_dlp.InfoExtractor._sort_formats is deprecated and is no longer required')
1717             return
1718         self._downloader.deprecation_warning(
1719             'yt_dlp.InfoExtractor._sort_formats is deprecated and no longer works as expected. '
1720             'Return _format_sort_fields in the info_dict instead')
1721         if formats:
1722             formats[0]['__sort_fields'] = field_preference
1723
1724     def _check_formats(self, formats, video_id):
1725         if formats:
1726             formats[:] = filter(
1727                 lambda f: self._is_valid_url(
1728                     f['url'], video_id,
1729                     item='%s video format' % f.get('format_id') if f.get('format_id') else 'video'),
1730                 formats)
1731
1732     @staticmethod
1733     def _remove_duplicate_formats(formats):
1734         format_urls = set()
1735         unique_formats = []
1736         for f in formats:
1737             if f['url'] not in format_urls:
1738                 format_urls.add(f['url'])
1739                 unique_formats.append(f)
1740         formats[:] = unique_formats
1741
1742     def _is_valid_url(self, url, video_id, item='video', headers={}):
1743         url = self._proto_relative_url(url, scheme='http:')
1744         # For now assume non HTTP(S) URLs always valid
1745         if not (url.startswith('http://') or url.startswith('https://')):
1746             return True
1747         try:
1748             self._request_webpage(url, video_id, 'Checking %s URL' % item, headers=headers)
1749             return True
1750         except ExtractorError as e:
1751             self.to_screen(
1752                 '%s: %s URL is invalid, skipping: %s'
1753                 % (video_id, item, error_to_compat_str(e.cause)))
1754             return False
1755
1756     def http_scheme(self):
1757         """ Either "http:" or "https:", depending on the user's preferences """
1758         return (
1759             'http:'
1760             if self.get_param('prefer_insecure', False)
1761             else 'https:')
1762
1763     def _proto_relative_url(self, url, scheme=None):
1764         scheme = scheme or self.http_scheme()
1765         assert scheme.endswith(':')
1766         return sanitize_url(url, scheme=scheme[:-1])
1767
1768     def _sleep(self, timeout, video_id, msg_template=None):
1769         if msg_template is None:
1770             msg_template = '%(video_id)s: Waiting for %(timeout)s seconds'
1771         msg = msg_template % {'video_id': video_id, 'timeout': timeout}
1772         self.to_screen(msg)
1773         time.sleep(timeout)
1774
1775     def _extract_f4m_formats(self, manifest_url, video_id, preference=None, quality=None, f4m_id=None,
1776                              transform_source=lambda s: fix_xml_ampersands(s).strip(),
1777                              fatal=True, m3u8_id=None, data=None, headers={}, query={}):
1778         if self.get_param('ignore_no_formats_error'):
1779             fatal = False
1780
1781         res = self._download_xml_handle(
1782             manifest_url, video_id, 'Downloading f4m manifest',
1783             'Unable to download f4m manifest',
1784             # Some manifests may be malformed, e.g. prosiebensat1 generated manifests
1785             # (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244)
1786             transform_source=transform_source,
1787             fatal=fatal, data=data, headers=headers, query=query)
1788         if res is False:
1789             return []
1790
1791         manifest, urlh = res
1792         manifest_url = urlh.geturl()
1793
1794         return self._parse_f4m_formats(
1795             manifest, manifest_url, video_id, preference=preference, quality=quality, f4m_id=f4m_id,
1796             transform_source=transform_source, fatal=fatal, m3u8_id=m3u8_id)
1797
1798     def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, quality=None, f4m_id=None,
1799                            transform_source=lambda s: fix_xml_ampersands(s).strip(),
1800                            fatal=True, m3u8_id=None):
1801         if not isinstance(manifest, xml.etree.ElementTree.Element) and not fatal:
1802             return []
1803
1804         # currently yt-dlp cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy
1805         akamai_pv = manifest.find('{http://ns.adobe.com/f4m/1.0}pv-2.0')
1806         if akamai_pv is not None and ';' in akamai_pv.text:
1807             playerVerificationChallenge = akamai_pv.text.split(';')[0]
1808             if playerVerificationChallenge.strip() != '':
1809                 return []
1810
1811         formats = []
1812         manifest_version = '1.0'
1813         media_nodes = manifest.findall('{http://ns.adobe.com/f4m/1.0}media')
1814         if not media_nodes:
1815             manifest_version = '2.0'
1816             media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
1817         # Remove unsupported DRM protected media from final formats
1818         # rendition (see https://github.com/ytdl-org/youtube-dl/issues/8573).
1819         media_nodes = remove_encrypted_media(media_nodes)
1820         if not media_nodes:
1821             return formats
1822
1823         manifest_base_url = get_base_url(manifest)
1824
1825         bootstrap_info = xpath_element(
1826             manifest, ['{http://ns.adobe.com/f4m/1.0}bootstrapInfo', '{http://ns.adobe.com/f4m/2.0}bootstrapInfo'],
1827             'bootstrap info', default=None)
1828
1829         vcodec = None
1830         mime_type = xpath_text(
1831             manifest, ['{http://ns.adobe.com/f4m/1.0}mimeType', '{http://ns.adobe.com/f4m/2.0}mimeType'],
1832             'base URL', default=None)
1833         if mime_type and mime_type.startswith('audio/'):
1834             vcodec = 'none'
1835
1836         for i, media_el in enumerate(media_nodes):
1837             tbr = int_or_none(media_el.attrib.get('bitrate'))
1838             width = int_or_none(media_el.attrib.get('width'))
1839             height = int_or_none(media_el.attrib.get('height'))
1840             format_id = join_nonempty(f4m_id, tbr or i)
1841             # If <bootstrapInfo> is present, the specified f4m is a
1842             # stream-level manifest, and only set-level manifests may refer to
1843             # external resources.  See section 11.4 and section 4 of F4M spec
1844             if bootstrap_info is None:
1845                 media_url = None
1846                 # @href is introduced in 2.0, see section 11.6 of F4M spec
1847                 if manifest_version == '2.0':
1848                     media_url = media_el.attrib.get('href')
1849                 if media_url is None:
1850                     media_url = media_el.attrib.get('url')
1851                 if not media_url:
1852                     continue
1853                 manifest_url = (
1854                     media_url if media_url.startswith('http://') or media_url.startswith('https://')
1855                     else ((manifest_base_url or '/'.join(manifest_url.split('/')[:-1])) + '/' + media_url))
1856                 # If media_url is itself a f4m manifest do the recursive extraction
1857                 # since bitrates in parent manifest (this one) and media_url manifest
1858                 # may differ leading to inability to resolve the format by requested
1859                 # bitrate in f4m downloader
1860                 ext = determine_ext(manifest_url)
1861                 if ext == 'f4m':
1862                     f4m_formats = self._extract_f4m_formats(
1863                         manifest_url, video_id, preference=preference, quality=quality, f4m_id=f4m_id,
1864                         transform_source=transform_source, fatal=fatal)
1865                     # Sometimes stream-level manifest contains single media entry that
1866                     # does not contain any quality metadata (e.g. http://matchtv.ru/#live-player).
1867                     # At the same time parent's media entry in set-level manifest may
1868                     # contain it. We will copy it from parent in such cases.
1869                     if len(f4m_formats) == 1:
1870                         f = f4m_formats[0]
1871                         f.update({
1872                             'tbr': f.get('tbr') or tbr,
1873                             'width': f.get('width') or width,
1874                             'height': f.get('height') or height,
1875                             'format_id': f.get('format_id') if not tbr else format_id,
1876                             'vcodec': vcodec,
1877                         })
1878                     formats.extend(f4m_formats)
1879                     continue
1880                 elif ext == 'm3u8':
1881                     formats.extend(self._extract_m3u8_formats(
1882                         manifest_url, video_id, 'mp4', preference=preference,
1883                         quality=quality, m3u8_id=m3u8_id, fatal=fatal))
1884                     continue
1885             formats.append({
1886                 'format_id': format_id,
1887                 'url': manifest_url,
1888                 'manifest_url': manifest_url,
1889                 'ext': 'flv' if bootstrap_info is not None else None,
1890                 'protocol': 'f4m',
1891                 'tbr': tbr,
1892                 'width': width,
1893                 'height': height,
1894                 'vcodec': vcodec,
1895                 'preference': preference,
1896                 'quality': quality,
1897             })
1898         return formats
1899
1900     def _m3u8_meta_format(self, m3u8_url, ext=None, preference=None, quality=None, m3u8_id=None):
1901         return {
1902             'format_id': join_nonempty(m3u8_id, 'meta'),
1903             'url': m3u8_url,
1904             'ext': ext,
1905             'protocol': 'm3u8',
1906             'preference': preference - 100 if preference else -100,
1907             'quality': quality,
1908             'resolution': 'multiple',
1909             'format_note': 'Quality selection URL',
1910         }
1911
1912     def _report_ignoring_subs(self, name):
1913         self.report_warning(bug_reports_message(
1914             f'Ignoring subtitle tracks found in the {name} manifest; '
1915             'if any subtitle tracks are missing,'
1916         ), only_once=True)
1917
1918     def _extract_m3u8_formats(self, *args, **kwargs):
1919         fmts, subs = self._extract_m3u8_formats_and_subtitles(*args, **kwargs)
1920         if subs:
1921             self._report_ignoring_subs('HLS')
1922         return fmts
1923
1924     def _extract_m3u8_formats_and_subtitles(
1925             self, m3u8_url, video_id, ext=None, entry_protocol='m3u8_native',
1926             preference=None, quality=None, m3u8_id=None, note=None,
1927             errnote=None, fatal=True, live=False, data=None, headers={},
1928             query={}):
1929
1930         if self.get_param('ignore_no_formats_error'):
1931             fatal = False
1932
1933         if not m3u8_url:
1934             if errnote is not False:
1935                 errnote = errnote or 'Failed to obtain m3u8 URL'
1936                 if fatal:
1937                     raise ExtractorError(errnote, video_id=video_id)
1938                 self.report_warning(f'{errnote}{bug_reports_message()}')
1939             return [], {}
1940
1941         res = self._download_webpage_handle(
1942             m3u8_url, video_id,
1943             note='Downloading m3u8 information' if note is None else note,
1944             errnote='Failed to download m3u8 information' if errnote is None else errnote,
1945             fatal=fatal, data=data, headers=headers, query=query)
1946
1947         if res is False:
1948             return [], {}
1949
1950         m3u8_doc, urlh = res
1951         m3u8_url = urlh.geturl()
1952
1953         return self._parse_m3u8_formats_and_subtitles(
1954             m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol,
1955             preference=preference, quality=quality, m3u8_id=m3u8_id,
1956             note=note, errnote=errnote, fatal=fatal, live=live, data=data,
1957             headers=headers, query=query, video_id=video_id)
1958
1959     def _parse_m3u8_formats_and_subtitles(
1960             self, m3u8_doc, m3u8_url=None, ext=None, entry_protocol='m3u8_native',
1961             preference=None, quality=None, m3u8_id=None, live=False, note=None,
1962             errnote=None, fatal=True, data=None, headers={}, query={},
1963             video_id=None):
1964         formats, subtitles = [], {}
1965
1966         has_drm = re.search('|'.join([
1967             r'#EXT-X-FAXS-CM:',  # Adobe Flash Access
1968             r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://',  # Apple FairPlay
1969         ]), m3u8_doc)
1970
1971         def format_url(url):
1972             return url if re.match(r'^https?://', url) else urllib.parse.urljoin(m3u8_url, url)
1973
1974         if self.get_param('hls_split_discontinuity', False):
1975             def _extract_m3u8_playlist_indices(manifest_url=None, m3u8_doc=None):
1976                 if not m3u8_doc:
1977                     if not manifest_url:
1978                         return []
1979                     m3u8_doc = self._download_webpage(
1980                         manifest_url, video_id, fatal=fatal, data=data, headers=headers,
1981                         note=False, errnote='Failed to download m3u8 playlist information')
1982                     if m3u8_doc is False:
1983                         return []
1984                 return range(1 + sum(line.startswith('#EXT-X-DISCONTINUITY') for line in m3u8_doc.splitlines()))
1985
1986         else:
1987             def _extract_m3u8_playlist_indices(*args, **kwargs):
1988                 return [None]
1989
1990         # References:
1991         # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-21
1992         # 2. https://github.com/ytdl-org/youtube-dl/issues/12211
1993         # 3. https://github.com/ytdl-org/youtube-dl/issues/18923
1994
1995         # We should try extracting formats only from master playlists [1, 4.3.4],
1996         # i.e. playlists that describe available qualities. On the other hand
1997         # media playlists [1, 4.3.3] should be returned as is since they contain
1998         # just the media without qualities renditions.
1999         # Fortunately, master playlist can be easily distinguished from media
2000         # playlist based on particular tags availability. As of [1, 4.3.3, 4.3.4]
2001         # master playlist tags MUST NOT appear in a media playlist and vice versa.
2002         # As of [1, 4.3.3.1] #EXT-X-TARGETDURATION tag is REQUIRED for every
2003         # media playlist and MUST NOT appear in master playlist thus we can
2004         # clearly detect media playlist with this criterion.
2005
2006         if '#EXT-X-TARGETDURATION' in m3u8_doc:  # media playlist, return as is
2007             formats = [{
2008                 'format_id': join_nonempty(m3u8_id, idx),
2009                 'format_index': idx,
2010                 'url': m3u8_url or encode_data_uri(m3u8_doc.encode('utf-8'), 'application/x-mpegurl'),
2011                 'ext': ext,
2012                 'protocol': entry_protocol,
2013                 'preference': preference,
2014                 'quality': quality,
2015                 'has_drm': has_drm,
2016             } for idx in _extract_m3u8_playlist_indices(m3u8_doc=m3u8_doc)]
2017
2018             return formats, subtitles
2019
2020         groups = {}
2021         last_stream_inf = {}
2022
2023         def extract_media(x_media_line):
2024             media = parse_m3u8_attributes(x_media_line)
2025             # As per [1, 4.3.4.1] TYPE, GROUP-ID and NAME are REQUIRED
2026             media_type, group_id, name = media.get('TYPE'), media.get('GROUP-ID'), media.get('NAME')
2027             if not (media_type and group_id and name):
2028                 return
2029             groups.setdefault(group_id, []).append(media)
2030             # <https://tools.ietf.org/html/rfc8216#section-4.3.4.1>
2031             if media_type == 'SUBTITLES':
2032                 # According to RFC 8216 §4.3.4.2.1, URI is REQUIRED in the
2033                 # EXT-X-MEDIA tag if the media type is SUBTITLES.
2034                 # However, lack of URI has been spotted in the wild.
2035                 # e.g. NebulaIE; see https://github.com/yt-dlp/yt-dlp/issues/339
2036                 if not media.get('URI'):
2037                     return
2038                 url = format_url(media['URI'])
2039                 sub_info = {
2040                     'url': url,
2041                     'ext': determine_ext(url),
2042                 }
2043                 if sub_info['ext'] == 'm3u8':
2044                     # Per RFC 8216 §3.1, the only possible subtitle format m3u8
2045                     # files may contain is WebVTT:
2046                     # <https://tools.ietf.org/html/rfc8216#section-3.1>
2047                     sub_info['ext'] = 'vtt'
2048                     sub_info['protocol'] = 'm3u8_native'
2049                 lang = media.get('LANGUAGE') or 'und'
2050                 subtitles.setdefault(lang, []).append(sub_info)
2051             if media_type not in ('VIDEO', 'AUDIO'):
2052                 return
2053             media_url = media.get('URI')
2054             if media_url:
2055                 manifest_url = format_url(media_url)
2056                 formats.extend({
2057                     'format_id': join_nonempty(m3u8_id, group_id, name, idx),
2058                     'format_note': name,
2059                     'format_index': idx,
2060                     'url': manifest_url,
2061                     'manifest_url': m3u8_url,
2062                     'language': media.get('LANGUAGE'),
2063                     'ext': ext,
2064                     'protocol': entry_protocol,
2065                     'preference': preference,
2066                     'quality': quality,
2067                     'has_drm': has_drm,
2068                     'vcodec': 'none' if media_type == 'AUDIO' else None,
2069                 } for idx in _extract_m3u8_playlist_indices(manifest_url))
2070
2071         def build_stream_name():
2072             # Despite specification does not mention NAME attribute for
2073             # EXT-X-STREAM-INF tag it still sometimes may be present (see [1]
2074             # or vidio test in TestInfoExtractor.test_parse_m3u8_formats)
2075             # 1. http://www.vidio.com/watch/165683-dj_ambred-booyah-live-2015
2076             stream_name = last_stream_inf.get('NAME')
2077             if stream_name:
2078                 return stream_name
2079             # If there is no NAME in EXT-X-STREAM-INF it will be obtained
2080             # from corresponding rendition group
2081             stream_group_id = last_stream_inf.get('VIDEO')
2082             if not stream_group_id:
2083                 return
2084             stream_group = groups.get(stream_group_id)
2085             if not stream_group:
2086                 return stream_group_id
2087             rendition = stream_group[0]
2088             return rendition.get('NAME') or stream_group_id
2089
2090         # parse EXT-X-MEDIA tags before EXT-X-STREAM-INF in order to have the
2091         # chance to detect video only formats when EXT-X-STREAM-INF tags
2092         # precede EXT-X-MEDIA tags in HLS manifest such as [3].
2093         for line in m3u8_doc.splitlines():
2094             if line.startswith('#EXT-X-MEDIA:'):
2095                 extract_media(line)
2096
2097         for line in m3u8_doc.splitlines():
2098             if line.startswith('#EXT-X-STREAM-INF:'):
2099                 last_stream_inf = parse_m3u8_attributes(line)
2100             elif line.startswith('#') or not line.strip():
2101                 continue
2102             else:
2103                 tbr = float_or_none(
2104                     last_stream_inf.get('AVERAGE-BANDWIDTH')
2105                     or last_stream_inf.get('BANDWIDTH'), scale=1000)
2106                 manifest_url = format_url(line.strip())
2107
2108                 for idx in _extract_m3u8_playlist_indices(manifest_url):
2109                     format_id = [m3u8_id, None, idx]
2110                     # Bandwidth of live streams may differ over time thus making
2111                     # format_id unpredictable. So it's better to keep provided
2112                     # format_id intact.
2113                     if not live:
2114                         stream_name = build_stream_name()
2115                         format_id[1] = stream_name or '%d' % (tbr or len(formats))
2116                     f = {
2117                         'format_id': join_nonempty(*format_id),
2118                         'format_index': idx,
2119                         'url': manifest_url,
2120                         'manifest_url': m3u8_url,
2121                         'tbr': tbr,
2122                         'ext': ext,
2123                         'fps': float_or_none(last_stream_inf.get('FRAME-RATE')),
2124                         'protocol': entry_protocol,
2125                         'preference': preference,
2126                         'quality': quality,
2127                         'has_drm': has_drm,
2128                     }
2129                     resolution = last_stream_inf.get('RESOLUTION')
2130                     if resolution:
2131                         mobj = re.search(r'(?P<width>\d+)[xX](?P<height>\d+)', resolution)
2132                         if mobj:
2133                             f['width'] = int(mobj.group('width'))
2134                             f['height'] = int(mobj.group('height'))
2135                     # Unified Streaming Platform
2136                     mobj = re.search(
2137                         r'audio.*?(?:%3D|=)(\d+)(?:-video.*?(?:%3D|=)(\d+))?', f['url'])
2138                     if mobj:
2139                         abr, vbr = mobj.groups()
2140                         abr, vbr = float_or_none(abr, 1000), float_or_none(vbr, 1000)
2141                         f.update({
2142                             'vbr': vbr,
2143                             'abr': abr,
2144                         })
2145                     codecs = parse_codecs(last_stream_inf.get('CODECS'))
2146                     f.update(codecs)
2147                     audio_group_id = last_stream_inf.get('AUDIO')
2148                     # As per [1, 4.3.4.1.1] any EXT-X-STREAM-INF tag which
2149                     # references a rendition group MUST have a CODECS attribute.
2150                     # However, this is not always respected. E.g. [2]
2151                     # contains EXT-X-STREAM-INF tag which references AUDIO
2152                     # rendition group but does not have CODECS and despite
2153                     # referencing an audio group it represents a complete
2154                     # (with audio and video) format. So, for such cases we will
2155                     # ignore references to rendition groups and treat them
2156                     # as complete formats.
2157                     if audio_group_id and codecs and f.get('vcodec') != 'none':
2158                         audio_group = groups.get(audio_group_id)
2159                         if audio_group and audio_group[0].get('URI'):
2160                             # TODO: update acodec for audio only formats with
2161                             # the same GROUP-ID
2162                             f['acodec'] = 'none'
2163                     if not f.get('ext'):
2164                         f['ext'] = 'm4a' if f.get('vcodec') == 'none' else 'mp4'
2165                     formats.append(f)
2166
2167                     # for DailyMotion
2168                     progressive_uri = last_stream_inf.get('PROGRESSIVE-URI')
2169                     if progressive_uri:
2170                         http_f = f.copy()
2171                         del http_f['manifest_url']
2172                         http_f.update({
2173                             'format_id': f['format_id'].replace('hls-', 'http-'),
2174                             'protocol': 'http',
2175                             'url': progressive_uri,
2176                         })
2177                         formats.append(http_f)
2178
2179                 last_stream_inf = {}
2180         return formats, subtitles
2181
2182     def _extract_m3u8_vod_duration(
2183             self, m3u8_vod_url, video_id, note=None, errnote=None, data=None, headers={}, query={}):
2184
2185         m3u8_vod = self._download_webpage(
2186             m3u8_vod_url, video_id,
2187             note='Downloading m3u8 VOD manifest' if note is None else note,
2188             errnote='Failed to download VOD manifest' if errnote is None else errnote,
2189             fatal=False, data=data, headers=headers, query=query)
2190
2191         return self._parse_m3u8_vod_duration(m3u8_vod or '', video_id)
2192
2193     def _parse_m3u8_vod_duration(self, m3u8_vod, video_id):
2194         if '#EXT-X-ENDLIST' not in m3u8_vod:
2195             return None
2196
2197         return int(sum(
2198             float(line[len('#EXTINF:'):].split(',')[0])
2199             for line in m3u8_vod.splitlines() if line.startswith('#EXTINF:'))) or None
2200
2201     def _extract_mpd_vod_duration(
2202             self, mpd_url, video_id, note=None, errnote=None, data=None, headers={}, query={}):
2203
2204         mpd_doc = self._download_xml(
2205             mpd_url, video_id,
2206             note='Downloading MPD VOD manifest' if note is None else note,
2207             errnote='Failed to download VOD manifest' if errnote is None else errnote,
2208             fatal=False, data=data, headers=headers, query=query) or {}
2209         return int_or_none(parse_duration(mpd_doc.get('mediaPresentationDuration')))
2210
2211     @staticmethod
2212     def _xpath_ns(path, namespace=None):
2213         if not namespace:
2214             return path
2215         out = []
2216         for c in path.split('/'):
2217             if not c or c == '.':
2218                 out.append(c)
2219             else:
2220                 out.append('{%s}%s' % (namespace, c))
2221         return '/'.join(out)
2222
2223     def _extract_smil_formats_and_subtitles(self, smil_url, video_id, fatal=True, f4m_params=None, transform_source=None):
2224         if self.get_param('ignore_no_formats_error'):
2225             fatal = False
2226
2227         res = self._download_smil(smil_url, video_id, fatal=fatal, transform_source=transform_source)
2228         if res is False:
2229             assert not fatal
2230             return [], {}
2231
2232         smil, urlh = res
2233         smil_url = urlh.geturl()
2234
2235         namespace = self._parse_smil_namespace(smil)
2236
2237         fmts = self._parse_smil_formats(
2238             smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
2239         subs = self._parse_smil_subtitles(
2240             smil, namespace=namespace)
2241
2242         return fmts, subs
2243
2244     def _extract_smil_formats(self, *args, **kwargs):
2245         fmts, subs = self._extract_smil_formats_and_subtitles(*args, **kwargs)
2246         if subs:
2247             self._report_ignoring_subs('SMIL')
2248         return fmts
2249
2250     def _extract_smil_info(self, smil_url, video_id, fatal=True, f4m_params=None):
2251         res = self._download_smil(smil_url, video_id, fatal=fatal)
2252         if res is False:
2253             return {}
2254
2255         smil, urlh = res
2256         smil_url = urlh.geturl()
2257
2258         return self._parse_smil(smil, smil_url, video_id, f4m_params=f4m_params)
2259
2260     def _download_smil(self, smil_url, video_id, fatal=True, transform_source=None):
2261         return self._download_xml_handle(
2262             smil_url, video_id, 'Downloading SMIL file',
2263             'Unable to download SMIL file', fatal=fatal, transform_source=transform_source)
2264
2265     def _parse_smil(self, smil, smil_url, video_id, f4m_params=None):
2266         namespace = self._parse_smil_namespace(smil)
2267
2268         formats = self._parse_smil_formats(
2269             smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
2270         subtitles = self._parse_smil_subtitles(smil, namespace=namespace)
2271
2272         video_id = os.path.splitext(url_basename(smil_url))[0]
2273         title = None
2274         description = None
2275         upload_date = None
2276         for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):
2277             name = meta.attrib.get('name')
2278             content = meta.attrib.get('content')
2279             if not name or not content:
2280                 continue
2281             if not title and name == 'title':
2282                 title = content
2283             elif not description and name in ('description', 'abstract'):
2284                 description = content
2285             elif not upload_date and name == 'date':
2286                 upload_date = unified_strdate(content)
2287
2288         thumbnails = [{
2289             'id': image.get('type'),
2290             'url': image.get('src'),
2291             'width': int_or_none(image.get('width')),
2292             'height': int_or_none(image.get('height')),
2293         } for image in smil.findall(self._xpath_ns('.//image', namespace)) if image.get('src')]
2294
2295         return {
2296             'id': video_id,
2297             'title': title or video_id,
2298             'description': description,
2299             'upload_date': upload_date,
2300             'thumbnails': thumbnails,
2301             'formats': formats,
2302             'subtitles': subtitles,
2303         }
2304
2305     def _parse_smil_namespace(self, smil):
2306         return self._search_regex(
2307             r'(?i)^{([^}]+)?}smil$', smil.tag, 'namespace', default=None)
2308
2309     def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
2310         base = smil_url
2311         for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):
2312             b = meta.get('base') or meta.get('httpBase')
2313             if b:
2314                 base = b
2315                 break
2316
2317         formats = []
2318         rtmp_count = 0
2319         http_count = 0
2320         m3u8_count = 0
2321         imgs_count = 0
2322
2323         srcs = set()
2324         media = smil.findall(self._xpath_ns('.//video', namespace)) + smil.findall(self._xpath_ns('.//audio', namespace))
2325         for medium in media:
2326             src = medium.get('src')
2327             if not src or src in srcs:
2328                 continue
2329             srcs.add(src)
2330
2331             bitrate = float_or_none(medium.get('system-bitrate') or medium.get('systemBitrate'), 1000)
2332             filesize = int_or_none(medium.get('size') or medium.get('fileSize'))
2333             width = int_or_none(medium.get('width'))
2334             height = int_or_none(medium.get('height'))
2335             proto = medium.get('proto')
2336             ext = medium.get('ext')
2337             src_ext = determine_ext(src, default_ext=None) or ext or urlhandle_detect_ext(
2338                 self._request_webpage(HEADRequest(src), video_id, note='Requesting extension info', fatal=False))
2339             streamer = medium.get('streamer') or base
2340
2341             if proto == 'rtmp' or streamer.startswith('rtmp'):
2342                 rtmp_count += 1
2343                 formats.append({
2344                     'url': streamer,
2345                     'play_path': src,
2346                     'ext': 'flv',
2347                     'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
2348                     'tbr': bitrate,
2349                     'filesize': filesize,
2350                     'width': width,
2351                     'height': height,
2352                 })
2353                 if transform_rtmp_url:
2354                     streamer, src = transform_rtmp_url(streamer, src)
2355                     formats[-1].update({
2356                         'url': streamer,
2357                         'play_path': src,
2358                     })
2359                 continue
2360
2361             src_url = src if src.startswith('http') else urllib.parse.urljoin(base, src)
2362             src_url = src_url.strip()
2363
2364             if proto == 'm3u8' or src_ext == 'm3u8':
2365                 m3u8_formats = self._extract_m3u8_formats(
2366                     src_url, video_id, ext or 'mp4', m3u8_id='hls', fatal=False)
2367                 if len(m3u8_formats) == 1:
2368                     m3u8_count += 1
2369                     m3u8_formats[0].update({
2370                         'format_id': 'hls-%d' % (m3u8_count if bitrate is None else bitrate),
2371                         'tbr': bitrate,
2372                         'width': width,
2373                         'height': height,
2374                     })
2375                 formats.extend(m3u8_formats)
2376             elif src_ext == 'f4m':
2377                 f4m_url = src_url
2378                 if not f4m_params:
2379                     f4m_params = {
2380                         'hdcore': '3.2.0',
2381                         'plugin': 'flowplayer-3.2.0.1',
2382                     }
2383                 f4m_url += '&' if '?' in f4m_url else '?'
2384                 f4m_url += urllib.parse.urlencode(f4m_params)
2385                 formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False))
2386             elif src_ext == 'mpd':
2387                 formats.extend(self._extract_mpd_formats(
2388                     src_url, video_id, mpd_id='dash', fatal=False))
2389             elif re.search(r'\.ism/[Mm]anifest', src_url):
2390                 formats.extend(self._extract_ism_formats(
2391                     src_url, video_id, ism_id='mss', fatal=False))
2392             elif src_url.startswith('http') and self._is_valid_url(src, video_id):
2393                 http_count += 1
2394                 formats.append({
2395                     'url': src_url,
2396                     'ext': ext or src_ext or 'flv',
2397                     'format_id': 'http-%d' % (bitrate or http_count),
2398                     'tbr': bitrate,
2399                     'filesize': filesize,
2400                     'width': width,
2401                     'height': height,
2402                 })
2403
2404         for medium in smil.findall(self._xpath_ns('.//imagestream', namespace)):
2405             src = medium.get('src')
2406             if not src or src in srcs:
2407                 continue
2408             srcs.add(src)
2409
2410             imgs_count += 1
2411             formats.append({
2412                 'format_id': 'imagestream-%d' % (imgs_count),
2413                 'url': src,
2414                 'ext': mimetype2ext(medium.get('type')),
2415                 'acodec': 'none',
2416                 'vcodec': 'none',
2417                 'width': int_or_none(medium.get('width')),
2418                 'height': int_or_none(medium.get('height')),
2419                 'format_note': 'SMIL storyboards',
2420             })
2421
2422         return formats
2423
2424     def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
2425         urls = []
2426         subtitles = {}
2427         for num, textstream in enumerate(smil.findall(self._xpath_ns('.//textstream', namespace))):
2428             src = textstream.get('src')
2429             if not src or src in urls:
2430                 continue
2431             urls.append(src)
2432             ext = textstream.get('ext') or mimetype2ext(textstream.get('type')) or determine_ext(src)
2433             lang = textstream.get('systemLanguage') or textstream.get('systemLanguageName') or textstream.get('lang') or subtitles_lang
2434             subtitles.setdefault(lang, []).append({
2435                 'url': src,
2436                 'ext': ext,
2437             })
2438         return subtitles
2439
2440     def _extract_xspf_playlist(self, xspf_url, playlist_id, fatal=True):
2441         res = self._download_xml_handle(
2442             xspf_url, playlist_id, 'Downloading xpsf playlist',
2443             'Unable to download xspf manifest', fatal=fatal)
2444         if res is False:
2445             return []
2446
2447         xspf, urlh = res
2448         xspf_url = urlh.geturl()
2449
2450         return self._parse_xspf(
2451             xspf, playlist_id, xspf_url=xspf_url,
2452             xspf_base_url=base_url(xspf_url))
2453
2454     def _parse_xspf(self, xspf_doc, playlist_id, xspf_url=None, xspf_base_url=None):
2455         NS_MAP = {
2456             'xspf': 'http://xspf.org/ns/0/',
2457             's1': 'http://static.streamone.nl/player/ns/0',
2458         }
2459
2460         entries = []
2461         for track in xspf_doc.findall(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)):
2462             title = xpath_text(
2463                 track, xpath_with_ns('./xspf:title', NS_MAP), 'title', default=playlist_id)
2464             description = xpath_text(
2465                 track, xpath_with_ns('./xspf:annotation', NS_MAP), 'description')
2466             thumbnail = xpath_text(
2467                 track, xpath_with_ns('./xspf:image', NS_MAP), 'thumbnail')
2468             duration = float_or_none(
2469                 xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'), 1000)
2470
2471             formats = []
2472             for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP)):
2473                 format_url = urljoin(xspf_base_url, location.text)
2474                 if not format_url:
2475                     continue
2476                 formats.append({
2477                     'url': format_url,
2478                     'manifest_url': xspf_url,
2479                     'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)),
2480                     'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))),
2481                     'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))),
2482                 })
2483
2484             entries.append({
2485                 'id': playlist_id,
2486                 'title': title,
2487                 'description': description,
2488                 'thumbnail': thumbnail,
2489                 'duration': duration,
2490                 'formats': formats,
2491             })
2492         return entries
2493
2494     def _extract_mpd_formats(self, *args, **kwargs):
2495         fmts, subs = self._extract_mpd_formats_and_subtitles(*args, **kwargs)
2496         if subs:
2497             self._report_ignoring_subs('DASH')
2498         return fmts
2499
2500     def _extract_mpd_formats_and_subtitles(
2501             self, mpd_url, video_id, mpd_id=None, note=None, errnote=None,
2502             fatal=True, data=None, headers={}, query={}):
2503
2504         if self.get_param('ignore_no_formats_error'):
2505             fatal = False
2506
2507         res = self._download_xml_handle(
2508             mpd_url, video_id,
2509             note='Downloading MPD manifest' if note is None else note,
2510             errnote='Failed to download MPD manifest' if errnote is None else errnote,
2511             fatal=fatal, data=data, headers=headers, query=query)
2512         if res is False:
2513             return [], {}
2514         mpd_doc, urlh = res
2515         if mpd_doc is None:
2516             return [], {}
2517
2518         # We could have been redirected to a new url when we retrieved our mpd file.
2519         mpd_url = urlh.geturl()
2520         mpd_base_url = base_url(mpd_url)
2521
2522         return self._parse_mpd_formats_and_subtitles(
2523             mpd_doc, mpd_id, mpd_base_url, mpd_url)
2524
2525     def _parse_mpd_formats(self, *args, **kwargs):
2526         fmts, subs = self._parse_mpd_formats_and_subtitles(*args, **kwargs)
2527         if subs:
2528             self._report_ignoring_subs('DASH')
2529         return fmts
2530
2531     def _parse_mpd_formats_and_subtitles(
2532             self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None):
2533         """
2534         Parse formats from MPD manifest.
2535         References:
2536          1. MPEG-DASH Standard, ISO/IEC 23009-1:2014(E),
2537             http://standards.iso.org/ittf/PubliclyAvailableStandards/c065274_ISO_IEC_23009-1_2014.zip
2538          2. https://en.wikipedia.org/wiki/Dynamic_Adaptive_Streaming_over_HTTP
2539         """
2540         if not self.get_param('dynamic_mpd', True):
2541             if mpd_doc.get('type') == 'dynamic':
2542                 return [], {}
2543
2544         namespace = self._search_regex(r'(?i)^{([^}]+)?}MPD$', mpd_doc.tag, 'namespace', default=None)
2545
2546         def _add_ns(path):
2547             return self._xpath_ns(path, namespace)
2548
2549         def is_drm_protected(element):
2550             return element.find(_add_ns('ContentProtection')) is not None
2551
2552         def extract_multisegment_info(element, ms_parent_info):
2553             ms_info = ms_parent_info.copy()
2554
2555             # As per [1, 5.3.9.2.2] SegmentList and SegmentTemplate share some
2556             # common attributes and elements.  We will only extract relevant
2557             # for us.
2558             def extract_common(source):
2559                 segment_timeline = source.find(_add_ns('SegmentTimeline'))
2560                 if segment_timeline is not None:
2561                     s_e = segment_timeline.findall(_add_ns('S'))
2562                     if s_e:
2563                         ms_info['total_number'] = 0
2564                         ms_info['s'] = []
2565                         for s in s_e:
2566                             r = int(s.get('r', 0))
2567                             ms_info['total_number'] += 1 + r
2568                             ms_info['s'].append({
2569                                 't': int(s.get('t', 0)),
2570                                 # @d is mandatory (see [1, 5.3.9.6.2, Table 17, page 60])
2571                                 'd': int(s.attrib['d']),
2572                                 'r': r,
2573                             })
2574                 start_number = source.get('startNumber')
2575                 if start_number:
2576                     ms_info['start_number'] = int(start_number)
2577                 timescale = source.get('timescale')
2578                 if timescale:
2579                     ms_info['timescale'] = int(timescale)
2580                 segment_duration = source.get('duration')
2581                 if segment_duration:
2582                     ms_info['segment_duration'] = float(segment_duration)
2583
2584             def extract_Initialization(source):
2585                 initialization = source.find(_add_ns('Initialization'))
2586                 if initialization is not None:
2587                     ms_info['initialization_url'] = initialization.attrib['sourceURL']
2588
2589             segment_list = element.find(_add_ns('SegmentList'))
2590             if segment_list is not None:
2591                 extract_common(segment_list)
2592                 extract_Initialization(segment_list)
2593                 segment_urls_e = segment_list.findall(_add_ns('SegmentURL'))
2594                 if segment_urls_e:
2595                     ms_info['segment_urls'] = [segment.attrib['media'] for segment in segment_urls_e]
2596             else:
2597                 segment_template = element.find(_add_ns('SegmentTemplate'))
2598                 if segment_template is not None:
2599                     extract_common(segment_template)
2600                     media = segment_template.get('media')
2601                     if media:
2602                         ms_info['media'] = media
2603                     initialization = segment_template.get('initialization')
2604                     if initialization:
2605                         ms_info['initialization'] = initialization
2606                     else:
2607                         extract_Initialization(segment_template)
2608             return ms_info
2609
2610         mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
2611         formats, subtitles = [], {}
2612         stream_numbers = collections.defaultdict(int)
2613         for period in mpd_doc.findall(_add_ns('Period')):
2614             period_duration = parse_duration(period.get('duration')) or mpd_duration
2615             period_ms_info = extract_multisegment_info(period, {
2616                 'start_number': 1,
2617                 'timescale': 1,
2618             })
2619             for adaptation_set in period.findall(_add_ns('AdaptationSet')):
2620                 adaption_set_ms_info = extract_multisegment_info(adaptation_set, period_ms_info)
2621                 for representation in adaptation_set.findall(_add_ns('Representation')):
2622                     representation_attrib = adaptation_set.attrib.copy()
2623                     representation_attrib.update(representation.attrib)
2624                     # According to [1, 5.3.7.2, Table 9, page 41], @mimeType is mandatory
2625                     mime_type = representation_attrib['mimeType']
2626                     content_type = representation_attrib.get('contentType', mime_type.split('/')[0])
2627
2628                     codec_str = representation_attrib.get('codecs', '')
2629                     # Some kind of binary subtitle found in some youtube livestreams
2630                     if mime_type == 'application/x-rawcc':
2631                         codecs = {'scodec': codec_str}
2632                     else:
2633                         codecs = parse_codecs(codec_str)
2634                     if content_type not in ('video', 'audio', 'text'):
2635                         if mime_type == 'image/jpeg':
2636                             content_type = mime_type
2637                         elif codecs.get('vcodec', 'none') != 'none':
2638                             content_type = 'video'
2639                         elif codecs.get('acodec', 'none') != 'none':
2640                             content_type = 'audio'
2641                         elif codecs.get('scodec', 'none') != 'none':
2642                             content_type = 'text'
2643                         elif mimetype2ext(mime_type) in ('tt', 'dfxp', 'ttml', 'xml', 'json'):
2644                             content_type = 'text'
2645                         else:
2646                             self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
2647                             continue
2648
2649                     base_url = ''
2650                     for element in (representation, adaptation_set, period, mpd_doc):
2651                         base_url_e = element.find(_add_ns('BaseURL'))
2652                         if try_call(lambda: base_url_e.text) is not None:
2653                             base_url = base_url_e.text + base_url
2654                             if re.match(r'^https?://', base_url):
2655                                 break
2656                     if mpd_base_url and base_url.startswith('/'):
2657                         base_url = urllib.parse.urljoin(mpd_base_url, base_url)
2658                     elif mpd_base_url and not re.match(r'^https?://', base_url):
2659                         if not mpd_base_url.endswith('/'):
2660                             mpd_base_url += '/'
2661                         base_url = mpd_base_url + base_url
2662                     representation_id = representation_attrib.get('id')
2663                     lang = representation_attrib.get('lang')
2664                     url_el = representation.find(_add_ns('BaseURL'))
2665                     filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
2666                     bandwidth = int_or_none(representation_attrib.get('bandwidth'))
2667                     if representation_id is not None:
2668                         format_id = representation_id
2669                     else:
2670                         format_id = content_type
2671                     if mpd_id:
2672                         format_id = mpd_id + '-' + format_id
2673                     if content_type in ('video', 'audio'):
2674                         f = {
2675                             'format_id': format_id,
2676                             'manifest_url': mpd_url,
2677                             'ext': mimetype2ext(mime_type),
2678                             'width': int_or_none(representation_attrib.get('width')),
2679                             'height': int_or_none(representation_attrib.get('height')),
2680                             'tbr': float_or_none(bandwidth, 1000),
2681                             'asr': int_or_none(representation_attrib.get('audioSamplingRate')),
2682                             'fps': int_or_none(representation_attrib.get('frameRate')),
2683                             'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
2684                             'format_note': 'DASH %s' % content_type,
2685                             'filesize': filesize,
2686                             'container': mimetype2ext(mime_type) + '_dash',
2687                             **codecs
2688                         }
2689                     elif content_type == 'text':
2690                         f = {
2691                             'ext': mimetype2ext(mime_type),
2692                             'manifest_url': mpd_url,
2693                             'filesize': filesize,
2694                         }
2695                     elif content_type == 'image/jpeg':
2696                         # See test case in VikiIE
2697                         # https://www.viki.com/videos/1175236v-choosing-spouse-by-lottery-episode-1
2698                         f = {
2699                             'format_id': format_id,
2700                             'ext': 'mhtml',
2701                             'manifest_url': mpd_url,
2702                             'format_note': 'DASH storyboards (jpeg)',
2703                             'acodec': 'none',
2704                             'vcodec': 'none',
2705                         }
2706                     if is_drm_protected(adaptation_set) or is_drm_protected(representation):
2707                         f['has_drm'] = True
2708                     representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
2709
2710                     def prepare_template(template_name, identifiers):
2711                         tmpl = representation_ms_info[template_name]
2712                         if representation_id is not None:
2713                             tmpl = tmpl.replace('$RepresentationID$', representation_id)
2714                         # First of, % characters outside $...$ templates
2715                         # must be escaped by doubling for proper processing
2716                         # by % operator string formatting used further (see
2717                         # https://github.com/ytdl-org/youtube-dl/issues/16867).
2718                         t = ''
2719                         in_template = False
2720                         for c in tmpl:
2721                             t += c
2722                             if c == '$':
2723                                 in_template = not in_template
2724                             elif c == '%' and not in_template:
2725                                 t += c
2726                         # Next, $...$ templates are translated to their
2727                         # %(...) counterparts to be used with % operator
2728                         t = re.sub(r'\$(%s)\$' % '|'.join(identifiers), r'%(\1)d', t)
2729                         t = re.sub(r'\$(%s)%%([^$]+)\$' % '|'.join(identifiers), r'%(\1)\2', t)
2730                         t.replace('$$', '$')
2731                         return t
2732
2733                     # @initialization is a regular template like @media one
2734                     # so it should be handled just the same way (see
2735                     # https://github.com/ytdl-org/youtube-dl/issues/11605)
2736                     if 'initialization' in representation_ms_info:
2737                         initialization_template = prepare_template(
2738                             'initialization',
2739                             # As per [1, 5.3.9.4.2, Table 15, page 54] $Number$ and
2740                             # $Time$ shall not be included for @initialization thus
2741                             # only $Bandwidth$ remains
2742                             ('Bandwidth', ))
2743                         representation_ms_info['initialization_url'] = initialization_template % {
2744                             'Bandwidth': bandwidth,
2745                         }
2746
2747                     def location_key(location):
2748                         return 'url' if re.match(r'^https?://', location) else 'path'
2749
2750                     if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
2751
2752                         media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time'))
2753                         media_location_key = location_key(media_template)
2754
2755                         # As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
2756                         # can't be used at the same time
2757                         if '%(Number' in media_template and 's' not in representation_ms_info:
2758                             segment_duration = None
2759                             if 'total_number' not in representation_ms_info and 'segment_duration' in representation_ms_info:
2760                                 segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
2761                                 representation_ms_info['total_number'] = int(math.ceil(
2762                                     float_or_none(period_duration, segment_duration, default=0)))
2763                             representation_ms_info['fragments'] = [{
2764                                 media_location_key: media_template % {
2765                                     'Number': segment_number,
2766                                     'Bandwidth': bandwidth,
2767                                 },
2768                                 'duration': segment_duration,
2769                             } for segment_number in range(
2770                                 representation_ms_info['start_number'],
2771                                 representation_ms_info['total_number'] + representation_ms_info['start_number'])]
2772                         else:
2773                             # $Number*$ or $Time$ in media template with S list available
2774                             # Example $Number*$: http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg
2775                             # Example $Time$: https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411
2776                             representation_ms_info['fragments'] = []
2777                             segment_time = 0
2778                             segment_d = None
2779                             segment_number = representation_ms_info['start_number']
2780
2781                             def add_segment_url():
2782                                 segment_url = media_template % {
2783                                     'Time': segment_time,
2784                                     'Bandwidth': bandwidth,
2785                                     'Number': segment_number,
2786                                 }
2787                                 representation_ms_info['fragments'].append({
2788                                     media_location_key: segment_url,
2789                                     'duration': float_or_none(segment_d, representation_ms_info['timescale']),
2790                                 })
2791
2792                             for num, s in enumerate(representation_ms_info['s']):
2793                                 segment_time = s.get('t') or segment_time
2794                                 segment_d = s['d']
2795                                 add_segment_url()
2796                                 segment_number += 1
2797                                 for r in range(s.get('r', 0)):
2798                                     segment_time += segment_d
2799                                     add_segment_url()
2800                                     segment_number += 1
2801                                 segment_time += segment_d
2802                     elif 'segment_urls' in representation_ms_info and 's' in representation_ms_info:
2803                         # No media template,
2804                         # e.g. https://www.youtube.com/watch?v=iXZV5uAYMJI
2805                         # or any YouTube dashsegments video
2806                         fragments = []
2807                         segment_index = 0
2808                         timescale = representation_ms_info['timescale']
2809                         for s in representation_ms_info['s']:
2810                             duration = float_or_none(s['d'], timescale)
2811                             for r in range(s.get('r', 0) + 1):
2812                                 segment_uri = representation_ms_info['segment_urls'][segment_index]
2813                                 fragments.append({
2814                                     location_key(segment_uri): segment_uri,
2815                                     'duration': duration,
2816                                 })
2817                                 segment_index += 1
2818                         representation_ms_info['fragments'] = fragments
2819                     elif 'segment_urls' in representation_ms_info:
2820                         # Segment URLs with no SegmentTimeline
2821                         # E.g. https://www.seznam.cz/zpravy/clanek/cesko-zasahne-vitr-o-sile-vichrice-muze-byt-i-zivotu-nebezpecny-39091
2822                         # https://github.com/ytdl-org/youtube-dl/pull/14844
2823                         fragments = []
2824                         segment_duration = float_or_none(
2825                             representation_ms_info['segment_duration'],
2826                             representation_ms_info['timescale']) if 'segment_duration' in representation_ms_info else None
2827                         for segment_url in representation_ms_info['segment_urls']:
2828                             fragment = {
2829                                 location_key(segment_url): segment_url,
2830                             }
2831                             if segment_duration:
2832                                 fragment['duration'] = segment_duration
2833                             fragments.append(fragment)
2834                         representation_ms_info['fragments'] = fragments
2835                     # If there is a fragments key available then we correctly recognized fragmented media.
2836                     # Otherwise we will assume unfragmented media with direct access. Technically, such
2837                     # assumption is not necessarily correct since we may simply have no support for
2838                     # some forms of fragmented media renditions yet, but for now we'll use this fallback.
2839                     if 'fragments' in representation_ms_info:
2840                         f.update({
2841                             # NB: mpd_url may be empty when MPD manifest is parsed from a string
2842                             'url': mpd_url or base_url,
2843                             'fragment_base_url': base_url,
2844                             'fragments': [],
2845                             'protocol': 'http_dash_segments' if mime_type != 'image/jpeg' else 'mhtml',
2846                         })
2847                         if 'initialization_url' in representation_ms_info:
2848                             initialization_url = representation_ms_info['initialization_url']
2849                             if not f.get('url'):
2850                                 f['url'] = initialization_url
2851                             f['fragments'].append({location_key(initialization_url): initialization_url})
2852                         f['fragments'].extend(representation_ms_info['fragments'])
2853                         if not period_duration:
2854                             period_duration = try_get(
2855                                 representation_ms_info,
2856                                 lambda r: sum(frag['duration'] for frag in r['fragments']), float)
2857                     else:
2858                         # Assuming direct URL to unfragmented media.
2859                         f['url'] = base_url
2860                     if content_type in ('video', 'audio', 'image/jpeg'):
2861                         f['manifest_stream_number'] = stream_numbers[f['url']]
2862                         stream_numbers[f['url']] += 1
2863                         formats.append(f)
2864                     elif content_type == 'text':
2865                         subtitles.setdefault(lang or 'und', []).append(f)
2866
2867         return formats, subtitles
2868
2869     def _extract_ism_formats(self, *args, **kwargs):
2870         fmts, subs = self._extract_ism_formats_and_subtitles(*args, **kwargs)
2871         if subs:
2872             self._report_ignoring_subs('ISM')
2873         return fmts
2874
2875     def _extract_ism_formats_and_subtitles(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
2876         if self.get_param('ignore_no_formats_error'):
2877             fatal = False
2878
2879         res = self._download_xml_handle(
2880             ism_url, video_id,
2881             note='Downloading ISM manifest' if note is None else note,
2882             errnote='Failed to download ISM manifest' if errnote is None else errnote,
2883             fatal=fatal, data=data, headers=headers, query=query)
2884         if res is False:
2885             return [], {}
2886         ism_doc, urlh = res
2887         if ism_doc is None:
2888             return [], {}
2889
2890         return self._parse_ism_formats_and_subtitles(ism_doc, urlh.geturl(), ism_id)
2891
2892     def _parse_ism_formats_and_subtitles(self, ism_doc, ism_url, ism_id=None):
2893         """
2894         Parse formats from ISM manifest.
2895         References:
2896          1. [MS-SSTR]: Smooth Streaming Protocol,
2897             https://msdn.microsoft.com/en-us/library/ff469518.aspx
2898         """
2899         if ism_doc.get('IsLive') == 'TRUE':
2900             return [], {}
2901
2902         duration = int(ism_doc.attrib['Duration'])
2903         timescale = int_or_none(ism_doc.get('TimeScale')) or 10000000
2904
2905         formats = []
2906         subtitles = {}
2907         for stream in ism_doc.findall('StreamIndex'):
2908             stream_type = stream.get('Type')
2909             if stream_type not in ('video', 'audio', 'text'):
2910                 continue
2911             url_pattern = stream.attrib['Url']
2912             stream_timescale = int_or_none(stream.get('TimeScale')) or timescale
2913             stream_name = stream.get('Name')
2914             stream_language = stream.get('Language', 'und')
2915             for track in stream.findall('QualityLevel'):
2916                 KNOWN_TAGS = {'255': 'AACL', '65534': 'EC-3'}
2917                 fourcc = track.get('FourCC') or KNOWN_TAGS.get(track.get('AudioTag'))
2918                 # TODO: add support for WVC1 and WMAP
2919                 if fourcc not in ('H264', 'AVC1', 'AACL', 'TTML', 'EC-3'):
2920                     self.report_warning('%s is not a supported codec' % fourcc)
2921                     continue
2922                 tbr = int(track.attrib['Bitrate']) // 1000
2923                 # [1] does not mention Width and Height attributes. However,
2924                 # they're often present while MaxWidth and MaxHeight are
2925                 # missing, so should be used as fallbacks
2926                 width = int_or_none(track.get('MaxWidth') or track.get('Width'))
2927                 height = int_or_none(track.get('MaxHeight') or track.get('Height'))
2928                 sampling_rate = int_or_none(track.get('SamplingRate'))
2929
2930                 track_url_pattern = re.sub(r'{[Bb]itrate}', track.attrib['Bitrate'], url_pattern)
2931                 track_url_pattern = urllib.parse.urljoin(ism_url, track_url_pattern)
2932
2933                 fragments = []
2934                 fragment_ctx = {
2935                     'time': 0,
2936                 }
2937                 stream_fragments = stream.findall('c')
2938                 for stream_fragment_index, stream_fragment in enumerate(stream_fragments):
2939                     fragment_ctx['time'] = int_or_none(stream_fragment.get('t')) or fragment_ctx['time']
2940                     fragment_repeat = int_or_none(stream_fragment.get('r')) or 1
2941                     fragment_ctx['duration'] = int_or_none(stream_fragment.get('d'))
2942                     if not fragment_ctx['duration']:
2943                         try:
2944                             next_fragment_time = int(stream_fragment[stream_fragment_index + 1].attrib['t'])
2945                         except IndexError:
2946                             next_fragment_time = duration
2947                         fragment_ctx['duration'] = (next_fragment_time - fragment_ctx['time']) / fragment_repeat
2948                     for _ in range(fragment_repeat):
2949                         fragments.append({
2950                             'url': re.sub(r'{start[ _]time}', str(fragment_ctx['time']), track_url_pattern),
2951                             'duration': fragment_ctx['duration'] / stream_timescale,
2952                         })
2953                         fragment_ctx['time'] += fragment_ctx['duration']
2954
2955                 if stream_type == 'text':
2956                     subtitles.setdefault(stream_language, []).append({
2957                         'ext': 'ismt',
2958                         'protocol': 'ism',
2959                         'url': ism_url,
2960                         'manifest_url': ism_url,
2961                         'fragments': fragments,
2962                         '_download_params': {
2963                             'stream_type': stream_type,
2964                             'duration': duration,
2965                             'timescale': stream_timescale,
2966                             'fourcc': fourcc,
2967                             'language': stream_language,
2968                             'codec_private_data': track.get('CodecPrivateData'),
2969                         }
2970                     })
2971                 elif stream_type in ('video', 'audio'):
2972                     formats.append({
2973                         'format_id': join_nonempty(ism_id, stream_name, tbr),
2974                         'url': ism_url,
2975                         'manifest_url': ism_url,
2976                         'ext': 'ismv' if stream_type == 'video' else 'isma',
2977                         'width': width,
2978                         'height': height,
2979                         'tbr': tbr,
2980                         'asr': sampling_rate,
2981                         'vcodec': 'none' if stream_type == 'audio' else fourcc,
2982                         'acodec': 'none' if stream_type == 'video' else fourcc,
2983                         'protocol': 'ism',
2984                         'fragments': fragments,
2985                         'has_drm': ism_doc.find('Protection') is not None,
2986                         'language': stream_language,
2987                         'audio_channels': int_or_none(track.get('Channels')),
2988                         '_download_params': {
2989                             'stream_type': stream_type,
2990                             'duration': duration,
2991                             'timescale': stream_timescale,
2992                             'width': width or 0,
2993                             'height': height or 0,
2994                             'fourcc': fourcc,
2995                             'language': stream_language,
2996                             'codec_private_data': track.get('CodecPrivateData'),
2997                             'sampling_rate': sampling_rate,
2998                             'channels': int_or_none(track.get('Channels', 2)),
2999                             'bits_per_sample': int_or_none(track.get('BitsPerSample', 16)),
3000                             'nal_unit_length_field': int_or_none(track.get('NALUnitLengthField', 4)),
3001                         },
3002                     })
3003         return formats, subtitles
3004
3005     def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8_native', mpd_id=None, preference=None, quality=None):
3006         def absolute_url(item_url):
3007             return urljoin(base_url, item_url)
3008
3009         def parse_content_type(content_type):
3010             if not content_type:
3011                 return {}
3012             ctr = re.search(r'(?P<mimetype>[^/]+/[^;]+)(?:;\s*codecs="?(?P<codecs>[^"]+))?', content_type)
3013             if ctr:
3014                 mimetype, codecs = ctr.groups()
3015                 f = parse_codecs(codecs)
3016                 f['ext'] = mimetype2ext(mimetype)
3017                 return f
3018             return {}
3019
3020         def _media_formats(src, cur_media_type, type_info=None):
3021             type_info = type_info or {}
3022             full_url = absolute_url(src)
3023             ext = type_info.get('ext') or determine_ext(full_url)
3024             if ext == 'm3u8':
3025                 is_plain_url = False
3026                 formats = self._extract_m3u8_formats(
3027                     full_url, video_id, ext='mp4',
3028                     entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id,
3029                     preference=preference, quality=quality, fatal=False)
3030             elif ext == 'mpd':
3031                 is_plain_url = False
3032                 formats = self._extract_mpd_formats(
3033                     full_url, video_id, mpd_id=mpd_id, fatal=False)
3034             else:
3035                 is_plain_url = True
3036                 formats = [{
3037                     'url': full_url,
3038                     'vcodec': 'none' if cur_media_type == 'audio' else None,
3039                     'ext': ext,
3040                 }]
3041             return is_plain_url, formats
3042
3043         entries = []
3044         # amp-video and amp-audio are very similar to their HTML5 counterparts
3045         # so we will include them right here (see
3046         # https://www.ampproject.org/docs/reference/components/amp-video)
3047         # For dl8-* tags see https://delight-vr.com/documentation/dl8-video/
3048         _MEDIA_TAG_NAME_RE = r'(?:(?:amp|dl8(?:-live)?)-)?(video|audio)'
3049         media_tags = [(media_tag, media_tag_name, media_type, '')
3050                       for media_tag, media_tag_name, media_type
3051                       in re.findall(r'(?s)(<(%s)[^>]*/>)' % _MEDIA_TAG_NAME_RE, webpage)]
3052         media_tags.extend(re.findall(
3053             # We only allow video|audio followed by a whitespace or '>'.
3054             # Allowing more characters may end up in significant slow down (see
3055             # https://github.com/ytdl-org/youtube-dl/issues/11979,
3056             # e.g. http://www.porntrex.com/maps/videositemap.xml).
3057             r'(?s)(<(?P<tag>%s)(?:\s+[^>]*)?>)(.*?)</(?P=tag)>' % _MEDIA_TAG_NAME_RE, webpage))
3058         for media_tag, _, media_type, media_content in media_tags:
3059             media_info = {
3060                 'formats': [],
3061                 'subtitles': {},
3062             }
3063             media_attributes = extract_attributes(media_tag)
3064             src = strip_or_none(dict_get(media_attributes, ('src', 'data-video-src', 'data-src', 'data-source')))
3065             if src:
3066                 f = parse_content_type(media_attributes.get('type'))
3067                 _, formats = _media_formats(src, media_type, f)
3068                 media_info['formats'].extend(formats)
3069             media_info['thumbnail'] = absolute_url(media_attributes.get('poster'))
3070             if media_content:
3071                 for source_tag in re.findall(r'<source[^>]+>', media_content):
3072                     s_attr = extract_attributes(source_tag)
3073                     # data-video-src and data-src are non standard but seen
3074                     # several times in the wild
3075                     src = strip_or_none(dict_get(s_attr, ('src', 'data-video-src', 'data-src', 'data-source')))
3076                     if not src:
3077                         continue
3078                     f = parse_content_type(s_attr.get('type'))
3079                     is_plain_url, formats = _media_formats(src, media_type, f)
3080                     if is_plain_url:
3081                         # width, height, res, label and title attributes are
3082                         # all not standard but seen several times in the wild
3083                         labels = [
3084                             s_attr.get(lbl)
3085                             for lbl in ('label', 'title')
3086                             if str_or_none(s_attr.get(lbl))
3087                         ]
3088                         width = int_or_none(s_attr.get('width'))
3089                         height = (int_or_none(s_attr.get('height'))
3090                                   or int_or_none(s_attr.get('res')))
3091                         if not width or not height:
3092                             for lbl in labels:
3093                                 resolution = parse_resolution(lbl)
3094                                 if not resolution:
3095                                     continue
3096                                 width = width or resolution.get('width')
3097                                 height = height or resolution.get('height')
3098                         for lbl in labels:
3099                             tbr = parse_bitrate(lbl)
3100                             if tbr:
3101                                 break
3102                         else:
3103                             tbr = None
3104                         f.update({
3105                             'width': width,
3106                             'height': height,
3107                             'tbr': tbr,
3108                             'format_id': s_attr.get('label') or s_attr.get('title'),
3109                         })
3110                         f.update(formats[0])
3111                         media_info['formats'].append(f)
3112                     else:
3113                         media_info['formats'].extend(formats)
3114                 for track_tag in re.findall(r'<track[^>]+>', media_content):
3115                     track_attributes = extract_attributes(track_tag)
3116                     kind = track_attributes.get('kind')
3117                     if not kind or kind in ('subtitles', 'captions'):
3118                         src = strip_or_none(track_attributes.get('src'))
3119                         if not src:
3120                             continue
3121                         lang = track_attributes.get('srclang') or track_attributes.get('lang') or track_attributes.get('label')
3122                         media_info['subtitles'].setdefault(lang, []).append({
3123                             'url': absolute_url(src),
3124                         })
3125             for f in media_info['formats']:
3126                 f.setdefault('http_headers', {})['Referer'] = base_url
3127             if media_info['formats'] or media_info['subtitles']:
3128                 entries.append(media_info)
3129         return entries
3130
3131     def _extract_akamai_formats(self, *args, **kwargs):
3132         fmts, subs = self._extract_akamai_formats_and_subtitles(*args, **kwargs)
3133         if subs:
3134             self._report_ignoring_subs('akamai')
3135         return fmts
3136
3137     def _extract_akamai_formats_and_subtitles(self, manifest_url, video_id, hosts={}):
3138         signed = 'hdnea=' in manifest_url
3139         if not signed:
3140             # https://learn.akamai.com/en-us/webhelp/media-services-on-demand/stream-packaging-user-guide/GUID-BE6C0F73-1E06-483B-B0EA-57984B91B7F9.html
3141             manifest_url = re.sub(
3142                 r'(?:b=[\d,-]+|(?:__a__|attributes)=off|__b__=\d+)&?',
3143                 '', manifest_url).strip('?')
3144
3145         formats = []
3146         subtitles = {}
3147
3148         hdcore_sign = 'hdcore=3.7.0'
3149         f4m_url = re.sub(r'(https?://[^/]+)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
3150         hds_host = hosts.get('hds')
3151         if hds_host:
3152             f4m_url = re.sub(r'(https?://)[^/]+', r'\1' + hds_host, f4m_url)
3153         if 'hdcore=' not in f4m_url:
3154             f4m_url += ('&' if '?' in f4m_url else '?') + hdcore_sign
3155         f4m_formats = self._extract_f4m_formats(
3156             f4m_url, video_id, f4m_id='hds', fatal=False)
3157         for entry in f4m_formats:
3158             entry.update({'extra_param_to_segment_url': hdcore_sign})
3159         formats.extend(f4m_formats)
3160
3161         m3u8_url = re.sub(r'(https?://[^/]+)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8')
3162         hls_host = hosts.get('hls')
3163         if hls_host:
3164             m3u8_url = re.sub(r'(https?://)[^/]+', r'\1' + hls_host, m3u8_url)
3165         m3u8_formats, m3u8_subtitles = self._extract_m3u8_formats_and_subtitles(
3166             m3u8_url, video_id, 'mp4', 'm3u8_native',
3167             m3u8_id='hls', fatal=False)
3168         formats.extend(m3u8_formats)
3169         subtitles = self._merge_subtitles(subtitles, m3u8_subtitles)
3170
3171         http_host = hosts.get('http')
3172         if http_host and m3u8_formats and not signed:
3173             REPL_REGEX = r'https?://[^/]+/i/([^,]+),([^/]+),([^/]+)\.csmil/.+'
3174             qualities = re.match(REPL_REGEX, m3u8_url).group(2).split(',')
3175             qualities_length = len(qualities)
3176             if len(m3u8_formats) in (qualities_length, qualities_length + 1):
3177                 i = 0
3178                 for f in m3u8_formats:
3179                     if f['vcodec'] != 'none':
3180                         for protocol in ('http', 'https'):
3181                             http_f = f.copy()
3182                             del http_f['manifest_url']
3183                             http_url = re.sub(
3184                                 REPL_REGEX, protocol + fr'://{http_host}/\g<1>{qualities[i]}\3', f['url'])
3185                             http_f.update({
3186                                 'format_id': http_f['format_id'].replace('hls-', protocol + '-'),
3187                                 'url': http_url,
3188                                 'protocol': protocol,
3189                             })
3190                             formats.append(http_f)
3191                         i += 1
3192
3193         return formats, subtitles
3194
3195     def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
3196         query = urllib.parse.urlparse(url).query
3197         url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url)
3198         mobj = re.search(
3199             r'(?:(?:http|rtmp|rtsp)(?P<s>s)?:)?(?P<url>//[^?]+)', url)
3200         url_base = mobj.group('url')
3201         http_base_url = '%s%s:%s' % ('http', mobj.group('s') or '', url_base)
3202         formats = []
3203
3204         def manifest_url(manifest):
3205             m_url = f'{http_base_url}/{manifest}'
3206             if query:
3207                 m_url += '?%s' % query
3208             return m_url
3209
3210         if 'm3u8' not in skip_protocols:
3211             formats.extend(self._extract_m3u8_formats(
3212                 manifest_url('playlist.m3u8'), video_id, 'mp4',
3213                 m3u8_entry_protocol, m3u8_id='hls', fatal=False))
3214         if 'f4m' not in skip_protocols:
3215             formats.extend(self._extract_f4m_formats(
3216                 manifest_url('manifest.f4m'),
3217                 video_id, f4m_id='hds', fatal=False))
3218         if 'dash' not in skip_protocols:
3219             formats.extend(self._extract_mpd_formats(
3220                 manifest_url('manifest.mpd'),
3221                 video_id, mpd_id='dash', fatal=False))
3222         if re.search(r'(?:/smil:|\.smil)', url_base):
3223             if 'smil' not in skip_protocols:
3224                 rtmp_formats = self._extract_smil_formats(
3225                     manifest_url('jwplayer.smil'),
3226                     video_id, fatal=False)
3227                 for rtmp_format in rtmp_formats:
3228                     rtsp_format = rtmp_format.copy()
3229                     rtsp_format['url'] = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path'])
3230                     del rtsp_format['play_path']
3231                     del rtsp_format['ext']
3232                     rtsp_format.update({
3233                         'url': rtsp_format['url'].replace('rtmp://', 'rtsp://'),
3234                         'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'),
3235                         'protocol': 'rtsp',
3236                     })
3237                     formats.extend([rtmp_format, rtsp_format])
3238         else:
3239             for protocol in ('rtmp', 'rtsp'):
3240                 if protocol not in skip_protocols:
3241                     formats.append({
3242                         'url': f'{protocol}:{url_base}',
3243                         'format_id': protocol,
3244                         'protocol': protocol,
3245                     })
3246         return formats
3247
3248     def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json):
3249         mobj = re.search(
3250             r'''(?s)jwplayer\s*\(\s*(?P<q>'|")(?!(?P=q)).+(?P=q)\s*\)(?!</script>).*?\.\s*setup\s*\(\s*(?P<options>(?:\([^)]*\)|[^)])+)\s*\)''',
3251             webpage)
3252         if mobj:
3253             try:
3254                 jwplayer_data = self._parse_json(mobj.group('options'),
3255                                                  video_id=video_id,
3256                                                  transform_source=transform_source)
3257             except ExtractorError:
3258                 pass
3259             else:
3260                 if isinstance(jwplayer_data, dict):
3261                     return jwplayer_data
3262
3263     def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
3264         jwplayer_data = self._find_jwplayer_data(
3265             webpage, video_id, transform_source=js_to_json)
3266         return self._parse_jwplayer_data(
3267             jwplayer_data, video_id, *args, **kwargs)
3268
3269     def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True,
3270                              m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
3271         entries = []
3272         if not isinstance(jwplayer_data, dict):
3273             return entries
3274
3275         playlist_items = jwplayer_data.get('playlist')
3276         # JWPlayer backward compatibility: single playlist item/flattened playlists
3277         # https://github.com/jwplayer/jwplayer/blob/v7.7.0/src/js/playlist/playlist.js#L10
3278         # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96
3279         if not isinstance(playlist_items, list):
3280             playlist_items = (playlist_items or jwplayer_data, )
3281
3282         for video_data in playlist_items:
3283             if not isinstance(video_data, dict):
3284                 continue
3285             # JWPlayer backward compatibility: flattened sources
3286             # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35
3287             if 'sources' not in video_data:
3288                 video_data['sources'] = [video_data]
3289
3290             this_video_id = video_id or video_data['mediaid']
3291
3292             formats = self._parse_jwplayer_formats(
3293                 video_data['sources'], video_id=this_video_id, m3u8_id=m3u8_id,
3294                 mpd_id=mpd_id, rtmp_params=rtmp_params, base_url=base_url)
3295
3296             subtitles = {}
3297             tracks = video_data.get('tracks')
3298             if tracks and isinstance(tracks, list):
3299                 for track in tracks:
3300                     if not isinstance(track, dict):
3301                         continue
3302                     track_kind = track.get('kind')
3303                     if not track_kind or not isinstance(track_kind, str):
3304                         continue
3305                     if track_kind.lower() not in ('captions', 'subtitles'):
3306                         continue
3307                     track_url = urljoin(base_url, track.get('file'))
3308                     if not track_url:
3309                         continue
3310                     subtitles.setdefault(track.get('label') or 'en', []).append({
3311                         'url': self._proto_relative_url(track_url)
3312                     })
3313
3314             entry = {
3315                 'id': this_video_id,
3316                 'title': unescapeHTML(video_data['title'] if require_title else video_data.get('title')),
3317                 'description': clean_html(video_data.get('description')),
3318                 'thumbnail': urljoin(base_url, self._proto_relative_url(video_data.get('image'))),
3319                 'timestamp': int_or_none(video_data.get('pubdate')),
3320                 'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
3321                 'subtitles': subtitles,
3322                 'alt_title': clean_html(video_data.get('subtitle')),  # attributes used e.g. by Tele5 ...
3323                 'genre': clean_html(video_data.get('genre')),
3324                 'channel': clean_html(dict_get(video_data, ('category', 'channel'))),
3325                 'season_number': int_or_none(video_data.get('season')),
3326                 'episode_number': int_or_none(video_data.get('episode')),
3327                 'release_year': int_or_none(video_data.get('releasedate')),
3328                 'age_limit': int_or_none(video_data.get('age_restriction')),
3329             }
3330             # https://github.com/jwplayer/jwplayer/blob/master/src/js/utils/validator.js#L32
3331             if len(formats) == 1 and re.search(r'^(?:http|//).*(?:youtube\.com|youtu\.be)/.+', formats[0]['url']):
3332                 entry.update({
3333                     '_type': 'url_transparent',
3334                     'url': formats[0]['url'],
3335                 })
3336             else:
3337                 entry['formats'] = formats
3338             entries.append(entry)
3339         if len(entries) == 1:
3340             return entries[0]
3341         else:
3342             return self.playlist_result(entries)
3343
3344     def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None,
3345                                 m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
3346         urls = set()
3347         formats = []
3348         for source in jwplayer_sources_data:
3349             if not isinstance(source, dict):
3350                 continue
3351             source_url = urljoin(
3352                 base_url, self._proto_relative_url(source.get('file')))
3353             if not source_url or source_url in urls:
3354                 continue
3355             urls.add(source_url)
3356             source_type = source.get('type') or ''
3357             ext = mimetype2ext(source_type) or determine_ext(source_url)
3358             if source_type == 'hls' or ext == 'm3u8' or 'format=m3u8-aapl' in source_url:
3359                 formats.extend(self._extract_m3u8_formats(
3360                     source_url, video_id, 'mp4', entry_protocol='m3u8_native',
3361                     m3u8_id=m3u8_id, fatal=False))
3362             elif source_type == 'dash' or ext == 'mpd' or 'format=mpd-time-csf' in source_url:
3363                 formats.extend(self._extract_mpd_formats(
3364                     source_url, video_id, mpd_id=mpd_id, fatal=False))
3365             elif ext == 'smil':
3366                 formats.extend(self._extract_smil_formats(
3367                     source_url, video_id, fatal=False))
3368             # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67
3369             elif source_type.startswith('audio') or ext in (
3370                     'oga', 'aac', 'mp3', 'mpeg', 'vorbis'):
3371                 formats.append({
3372                     'url': source_url,
3373                     'vcodec': 'none',
3374                     'ext': ext,
3375                 })
3376             else:
3377                 format_id = str_or_none(source.get('label'))
3378                 height = int_or_none(source.get('height'))
3379                 if height is None and format_id:
3380                     # Often no height is provided but there is a label in
3381                     # format like "1080p", "720p SD", or 1080.
3382                     height = parse_resolution(format_id).get('height')
3383                 a_format = {
3384                     'url': source_url,
3385                     'width': int_or_none(source.get('width')),
3386                     'height': height,
3387                     'tbr': int_or_none(source.get('bitrate'), scale=1000),
3388                     'filesize': int_or_none(source.get('filesize')),
3389                     'ext': ext,
3390                     'format_id': format_id
3391                 }
3392                 if source_url.startswith('rtmp'):
3393                     a_format['ext'] = 'flv'
3394                     # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
3395                     # of jwplayer.flash.swf
3396                     rtmp_url_parts = re.split(
3397                         r'((?:mp4|mp3|flv):)', source_url, 1)
3398                     if len(rtmp_url_parts) == 3:
3399                         rtmp_url, prefix, play_path = rtmp_url_parts
3400                         a_format.update({
3401                             'url': rtmp_url,
3402                             'play_path': prefix + play_path,
3403                         })
3404                     if rtmp_params:
3405                         a_format.update(rtmp_params)
3406                 formats.append(a_format)
3407         return formats
3408
3409     def _live_title(self, name):
3410         self._downloader.deprecation_warning('yt_dlp.InfoExtractor._live_title is deprecated and does not work as expected')
3411         return name
3412
3413     def _int(self, v, name, fatal=False, **kwargs):
3414         res = int_or_none(v, **kwargs)
3415         if res is None:
3416             msg = f'Failed to extract {name}: Could not parse value {v!r}'
3417             if fatal:
3418                 raise ExtractorError(msg)
3419             else:
3420                 self.report_warning(msg)
3421         return res
3422
3423     def _float(self, v, name, fatal=False, **kwargs):
3424         res = float_or_none(v, **kwargs)
3425         if res is None:
3426             msg = f'Failed to extract {name}: Could not parse value {v!r}'
3427             if fatal:
3428                 raise ExtractorError(msg)
3429             else:
3430                 self.report_warning(msg)
3431         return res
3432
3433     def _set_cookie(self, domain, name, value, expire_time=None, port=None,
3434                     path='/', secure=False, discard=False, rest={}, **kwargs):
3435         cookie = http.cookiejar.Cookie(
3436             0, name, value, port, port is not None, domain, True,
3437             domain.startswith('.'), path, True, secure, expire_time,
3438             discard, None, None, rest)
3439         self.cookiejar.set_cookie(cookie)
3440
3441     def _get_cookies(self, url):
3442         """ Return a http.cookies.SimpleCookie with the cookies for the url """
3443         return LenientSimpleCookie(self._downloader._calc_cookies(url))
3444
3445     def _apply_first_set_cookie_header(self, url_handle, cookie):
3446         """
3447         Apply first Set-Cookie header instead of the last. Experimental.
3448
3449         Some sites (e.g. [1-3]) may serve two cookies under the same name
3450         in Set-Cookie header and expect the first (old) one to be set rather
3451         than second (new). However, as of RFC6265 the newer one cookie
3452         should be set into cookie store what actually happens.
3453         We will workaround this issue by resetting the cookie to
3454         the first one manually.
3455         1. https://new.vk.com/
3456         2. https://github.com/ytdl-org/youtube-dl/issues/9841#issuecomment-227871201
3457         3. https://learning.oreilly.com/
3458         """
3459         for header, cookies in url_handle.headers.items():
3460             if header.lower() != 'set-cookie':
3461                 continue
3462             cookies = cookies.encode('iso-8859-1').decode('utf-8')
3463             cookie_value = re.search(
3464                 r'%s=(.+?);.*?\b[Dd]omain=(.+?)(?:[,;]|$)' % cookie, cookies)
3465             if cookie_value:
3466                 value, domain = cookie_value.groups()
3467                 self._set_cookie(domain, cookie, value)
3468                 break
3469
3470     @classmethod
3471     def get_testcases(cls, include_onlymatching=False):
3472         # Do not look in super classes
3473         t = vars(cls).get('_TEST')
3474         if t:
3475             assert not hasattr(cls, '_TESTS'), f'{cls.ie_key()}IE has _TEST and _TESTS'
3476             tests = [t]
3477         else:
3478             tests = vars(cls).get('_TESTS', [])
3479         for t in tests:
3480             if not include_onlymatching and t.get('only_matching', False):
3481                 continue
3482             t['name'] = cls.ie_key()
3483             yield t
3484         if getattr(cls, '__wrapped__', None):
3485             yield from cls.__wrapped__.get_testcases(include_onlymatching)
3486
3487     @classmethod
3488     def get_webpage_testcases(cls):
3489         tests = vars(cls).get('_WEBPAGE_TESTS', [])
3490         for t in tests:
3491             t['name'] = cls.ie_key()
3492             yield t
3493         if getattr(cls, '__wrapped__', None):
3494             yield from cls.__wrapped__.get_webpage_testcases()
3495
3496     @classproperty(cache=True)
3497     def age_limit(cls):
3498         """Get age limit from the testcases"""
3499         return max(traverse_obj(
3500             (*cls.get_testcases(include_onlymatching=False), *cls.get_webpage_testcases()),
3501             (..., (('playlist', 0), None), 'info_dict', 'age_limit')) or [0])
3502
3503     @classproperty(cache=True)
3504     def _RETURN_TYPE(cls):
3505         """What the extractor returns: "video", "playlist", "any", or None (Unknown)"""
3506         tests = tuple(cls.get_testcases(include_onlymatching=False))
3507         if not tests:
3508             return None
3509         elif not any(k.startswith('playlist') for test in tests for k in test):
3510             return 'video'
3511         elif all(any(k.startswith('playlist') for k in test) for test in tests):
3512             return 'playlist'
3513         return 'any'
3514
3515     @classmethod
3516     def is_single_video(cls, url):
3517         """Returns whether the URL is of a single video, None if unknown"""
3518         if cls.suitable(url):
3519             return {'video': True, 'playlist': False}.get(cls._RETURN_TYPE)
3520
3521     @classmethod
3522     def is_suitable(cls, age_limit):
3523         """Test whether the extractor is generally suitable for the given age limit"""
3524         return not age_restricted(cls.age_limit, age_limit)
3525
3526     @classmethod
3527     def description(cls, *, markdown=True, search_examples=None):
3528         """Description of the extractor"""
3529         desc = ''
3530         if cls._NETRC_MACHINE:
3531             if markdown:
3532                 desc += f' [*{cls._NETRC_MACHINE}*](## "netrc machine")'
3533             else:
3534                 desc += f' [{cls._NETRC_MACHINE}]'
3535         if cls.IE_DESC is False:
3536             desc += ' [HIDDEN]'
3537         elif cls.IE_DESC:
3538             desc += f' {cls.IE_DESC}'
3539         if cls.SEARCH_KEY:
3540             desc += f'{";" if cls.IE_DESC else ""} "{cls.SEARCH_KEY}:" prefix'
3541             if search_examples:
3542                 _COUNTS = ('', '5', '10', 'all')
3543                 desc += f' (e.g. "{cls.SEARCH_KEY}{random.choice(_COUNTS)}:{random.choice(search_examples)}")'
3544         if not cls.working():
3545             desc += ' (**Currently broken**)' if markdown else ' (Currently broken)'
3546
3547         # Escape emojis. Ref: https://github.com/github/markup/issues/1153
3548         name = (' - **%s**' % re.sub(r':(\w+:)', ':\u200B\\g<1>', cls.IE_NAME)) if markdown else cls.IE_NAME
3549         return f'{name}:{desc}' if desc else name
3550
3551     def extract_subtitles(self, *args, **kwargs):
3552         if (self.get_param('writesubtitles', False)
3553                 or self.get_param('listsubtitles')):
3554             return self._get_subtitles(*args, **kwargs)
3555         return {}
3556
3557     def _get_subtitles(self, *args, **kwargs):
3558         raise NotImplementedError('This method must be implemented by subclasses')
3559
3560     class CommentsDisabled(Exception):
3561         """Raise in _get_comments if comments are disabled for the video"""
3562
3563     def extract_comments(self, *args, **kwargs):
3564         if not self.get_param('getcomments'):
3565             return None
3566         generator = self._get_comments(*args, **kwargs)
3567
3568         def extractor():
3569             comments = []
3570             interrupted = True
3571             try:
3572                 while True:
3573                     comments.append(next(generator))
3574             except StopIteration:
3575                 interrupted = False
3576             except KeyboardInterrupt:
3577                 self.to_screen('Interrupted by user')
3578             except self.CommentsDisabled:
3579                 return {'comments': None, 'comment_count': None}
3580             except Exception as e:
3581                 if self.get_param('ignoreerrors') is not True:
3582                     raise
3583                 self._downloader.report_error(e)
3584             comment_count = len(comments)
3585             self.to_screen(f'Extracted {comment_count} comments')
3586             return {
3587                 'comments': comments,
3588                 'comment_count': None if interrupted else comment_count
3589             }
3590         return extractor
3591
3592     def _get_comments(self, *args, **kwargs):
3593         raise NotImplementedError('This method must be implemented by subclasses')
3594
3595     @staticmethod
3596     def _merge_subtitle_items(subtitle_list1, subtitle_list2):
3597         """ Merge subtitle items for one language. Items with duplicated URLs/data
3598         will be dropped. """
3599         list1_data = {(item.get('url'), item.get('data')) for item in subtitle_list1}
3600         ret = list(subtitle_list1)
3601         ret.extend(item for item in subtitle_list2 if (item.get('url'), item.get('data')) not in list1_data)
3602         return ret
3603
3604     @classmethod
3605     def _merge_subtitles(cls, *dicts, target=None):
3606         """ Merge subtitle dictionaries, language by language. """
3607         if target is None:
3608             target = {}
3609         for d in dicts:
3610             for lang, subs in d.items():
3611                 target[lang] = cls._merge_subtitle_items(target.get(lang, []), subs)
3612         return target
3613
3614     def extract_automatic_captions(self, *args, **kwargs):
3615         if (self.get_param('writeautomaticsub', False)
3616                 or self.get_param('listsubtitles')):
3617             return self._get_automatic_captions(*args, **kwargs)
3618         return {}
3619
3620     def _get_automatic_captions(self, *args, **kwargs):
3621         raise NotImplementedError('This method must be implemented by subclasses')
3622
3623     @functools.cached_property
3624     def _cookies_passed(self):
3625         """Whether cookies have been passed to YoutubeDL"""
3626         return self.get_param('cookiefile') is not None or self.get_param('cookiesfrombrowser') is not None
3627
3628     def mark_watched(self, *args, **kwargs):
3629         if not self.get_param('mark_watched', False):
3630             return
3631         if self.supports_login() and self._get_login_info()[0] is not None or self._cookies_passed:
3632             self._mark_watched(*args, **kwargs)
3633
3634     def _mark_watched(self, *args, **kwargs):
3635         raise NotImplementedError('This method must be implemented by subclasses')
3636
3637     def geo_verification_headers(self):
3638         headers = {}
3639         geo_verification_proxy = self.get_param('geo_verification_proxy')
3640         if geo_verification_proxy:
3641             headers['Ytdl-request-proxy'] = geo_verification_proxy
3642         return headers
3643
3644     @staticmethod
3645     def _generic_id(url):
3646         return urllib.parse.unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
3647
3648     def _generic_title(self, url='', webpage='', *, default=None):
3649         return (self._og_search_title(webpage, default=None)
3650                 or self._html_extract_title(webpage, default=None)
3651                 or urllib.parse.unquote(os.path.splitext(url_basename(url))[0])
3652                 or default)
3653
3654     def _extract_chapters_helper(self, chapter_list, start_function, title_function, duration, strict=True):
3655         if not duration:
3656             return
3657         chapter_list = [{
3658             'start_time': start_function(chapter),
3659             'title': title_function(chapter),
3660         } for chapter in chapter_list or []]
3661         if not strict:
3662             chapter_list.sort(key=lambda c: c['start_time'] or 0)
3663
3664         chapters = [{'start_time': 0}]
3665         for idx, chapter in enumerate(chapter_list):
3666             if chapter['start_time'] is None:
3667                 self.report_warning(f'Incomplete chapter {idx}')
3668             elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:
3669                 chapters.append(chapter)
3670             elif chapter not in chapters:
3671                 self.report_warning(
3672                     f'Invalid start time ({chapter["start_time"]} < {chapters[-1]["start_time"]}) for chapter "{chapter["title"]}"')
3673         return chapters[1:]
3674
3675     def _extract_chapters_from_description(self, description, duration):
3676         duration_re = r'(?:\d+:)?\d{1,2}:\d{2}'
3677         sep_re = r'(?m)^\s*(%s)\b\W*\s(%s)\s*$'
3678         return self._extract_chapters_helper(
3679             re.findall(sep_re % (duration_re, r'.+?'), description or ''),
3680             start_function=lambda x: parse_duration(x[0]), title_function=lambda x: x[1],
3681             duration=duration, strict=False) or self._extract_chapters_helper(
3682             re.findall(sep_re % (r'.+?', duration_re), description or ''),
3683             start_function=lambda x: parse_duration(x[1]), title_function=lambda x: x[0],
3684             duration=duration, strict=False)
3685
3686     @staticmethod
3687     def _availability(is_private=None, needs_premium=None, needs_subscription=None, needs_auth=None, is_unlisted=None):
3688         all_known = all(map(
3689             lambda x: x is not None,
3690             (is_private, needs_premium, needs_subscription, needs_auth, is_unlisted)))
3691         return (
3692             'private' if is_private
3693             else 'premium_only' if needs_premium
3694             else 'subscriber_only' if needs_subscription
3695             else 'needs_auth' if needs_auth
3696             else 'unlisted' if is_unlisted
3697             else 'public' if all_known
3698             else None)
3699
3700     def _configuration_arg(self, key, default=NO_DEFAULT, *, ie_key=None, casesense=False):
3701         '''
3702         @returns            A list of values for the extractor argument given by "key"
3703                             or "default" if no such key is present
3704         @param default      The default value to return when the key is not present (default: [])
3705         @param casesense    When false, the values are converted to lower case
3706         '''
3707         ie_key = ie_key if isinstance(ie_key, str) else (ie_key or self).ie_key()
3708         val = traverse_obj(self._downloader.params, ('extractor_args', ie_key.lower(), key))
3709         if val is None:
3710             return [] if default is NO_DEFAULT else default
3711         return list(val) if casesense else [x.lower() for x in val]
3712
3713     def _yes_playlist(self, playlist_id, video_id, smuggled_data=None, *, playlist_label='playlist', video_label='video'):
3714         if not playlist_id or not video_id:
3715             return not video_id
3716
3717         no_playlist = (smuggled_data or {}).get('force_noplaylist')
3718         if no_playlist is not None:
3719             return not no_playlist
3720
3721         video_id = '' if video_id is True else f' {video_id}'
3722         playlist_id = '' if playlist_id is True else f' {playlist_id}'
3723         if self.get_param('noplaylist'):
3724             self.to_screen(f'Downloading just the {video_label}{video_id} because of --no-playlist')
3725             return False
3726         self.to_screen(f'Downloading {playlist_label}{playlist_id} - add --no-playlist to download just the {video_label}{video_id}')
3727         return True
3728
3729     def _error_or_warning(self, err, _count=None, _retries=0, *, fatal=True):
3730         RetryManager.report_retry(
3731             err, _count or int(fatal), _retries,
3732             info=self.to_screen, warn=self.report_warning, error=None if fatal else self.report_warning,
3733             sleep_func=self.get_param('retry_sleep_functions', {}).get('extractor'))
3734
3735     def RetryManager(self, **kwargs):
3736         return RetryManager(self.get_param('extractor_retries', 3), self._error_or_warning, **kwargs)
3737
3738     def _extract_generic_embeds(self, url, *args, info_dict={}, note='Extracting generic embeds', **kwargs):
3739         display_id = traverse_obj(info_dict, 'display_id', 'id')
3740         self.to_screen(f'{format_field(display_id, None, "%s: ")}{note}')
3741         return self._downloader.get_info_extractor('Generic')._extract_embeds(
3742             smuggle_url(url, {'block_ies': [self.ie_key()]}), *args, **kwargs)
3743
3744     @classmethod
3745     def extract_from_webpage(cls, ydl, url, webpage):
3746         ie = (cls if isinstance(cls._extract_from_webpage, types.MethodType)
3747               else ydl.get_info_extractor(cls.ie_key()))
3748         for info in ie._extract_from_webpage(url, webpage) or []:
3749             # url = None since we do not want to set (webpage/original)_url
3750             ydl.add_default_extra_info(info, ie, None)
3751             yield info
3752
3753     @classmethod
3754     def _extract_from_webpage(cls, url, webpage):
3755         for embed_url in orderedSet(
3756                 cls._extract_embed_urls(url, webpage) or [], lazy=True):
3757             yield cls.url_result(embed_url, None if cls._VALID_URL is False else cls)
3758
3759     @classmethod
3760     def _extract_embed_urls(cls, url, webpage):
3761         """@returns all the embed urls on the webpage"""
3762         if '_EMBED_URL_RE' not in cls.__dict__:
3763             assert isinstance(cls._EMBED_REGEX, (list, tuple))
3764             for idx, regex in enumerate(cls._EMBED_REGEX):
3765                 assert regex.count('(?P<url>') == 1, \
3766                     f'{cls.__name__}._EMBED_REGEX[{idx}] must have exactly 1 url group\n\t{regex}'
3767             cls._EMBED_URL_RE = tuple(map(re.compile, cls._EMBED_REGEX))
3768
3769         for regex in cls._EMBED_URL_RE:
3770             for mobj in regex.finditer(webpage):
3771                 embed_url = urllib.parse.urljoin(url, unescapeHTML(mobj.group('url')))
3772                 if cls._VALID_URL is False or cls.suitable(embed_url):
3773                     yield embed_url
3774
3775     class StopExtraction(Exception):
3776         pass
3777
3778     @classmethod
3779     def _extract_url(cls, webpage):  # TODO: Remove
3780         """Only for compatibility with some older extractors"""
3781         return next(iter(cls._extract_embed_urls(None, webpage) or []), None)
3782
3783     @classmethod
3784     def __init_subclass__(cls, *, plugin_name=None, **kwargs):
3785         if plugin_name:
3786             mro = inspect.getmro(cls)
3787             super_class = cls.__wrapped__ = mro[mro.index(cls) + 1]
3788             cls.PLUGIN_NAME, cls.ie_key = plugin_name, super_class.ie_key
3789             cls.IE_NAME = f'{super_class.IE_NAME}+{plugin_name}'
3790             while getattr(super_class, '__wrapped__', None):
3791                 super_class = super_class.__wrapped__
3792             setattr(sys.modules[super_class.__module__], super_class.__name__, cls)
3793             _PLUGIN_OVERRIDES[super_class].append(cls)
3794
3795         return super().__init_subclass__(**kwargs)
3796
3797
3798 class SearchInfoExtractor(InfoExtractor):
3799     """
3800     Base class for paged search queries extractors.
3801     They accept URLs in the format _SEARCH_KEY(|all|[0-9]):{query}
3802     Instances should define _SEARCH_KEY and optionally _MAX_RESULTS
3803     """
3804
3805     _MAX_RESULTS = float('inf')
3806     _RETURN_TYPE = 'playlist'
3807
3808     @classproperty
3809     def _VALID_URL(cls):
3810         return r'%s(?P<prefix>|[1-9][0-9]*|all):(?P<query>[\s\S]+)' % cls._SEARCH_KEY
3811
3812     def _real_extract(self, query):
3813         prefix, query = self._match_valid_url(query).group('prefix', 'query')
3814         if prefix == '':
3815             return self._get_n_results(query, 1)
3816         elif prefix == 'all':
3817             return self._get_n_results(query, self._MAX_RESULTS)
3818         else:
3819             n = int(prefix)
3820             if n <= 0:
3821                 raise ExtractorError(f'invalid download number {n} for query "{query}"')
3822             elif n > self._MAX_RESULTS:
3823                 self.report_warning('%s returns max %i results (you requested %i)' % (self._SEARCH_KEY, self._MAX_RESULTS, n))
3824                 n = self._MAX_RESULTS
3825             return self._get_n_results(query, n)
3826
3827     def _get_n_results(self, query, n):
3828         """Get a specified number of results for a query.
3829         Either this function or _search_results must be overridden by subclasses """
3830         return self.playlist_result(
3831             itertools.islice(self._search_results(query), 0, None if n == float('inf') else n),
3832             query, query)
3833
3834     def _search_results(self, query):
3835         """Returns an iterator of search results"""
3836         raise NotImplementedError('This method must be implemented by subclasses')
3837
3838     @classproperty
3839     def SEARCH_KEY(cls):
3840         return cls._SEARCH_KEY
3841
3842
3843 class UnsupportedURLIE(InfoExtractor):
3844     _VALID_URL = '.*'
3845     _ENABLED = False
3846     IE_DESC = False
3847
3848     def _real_extract(self, url):
3849         raise UnsupportedError(url)
3850
3851
3852 _PLUGIN_OVERRIDES = collections.defaultdict(list)