[ie/EuroParlWebstream] Support new URL format (#9647)

[yt-dlp.git] / yt_dlp / downloader / f4m.py
diff --git a/yt_dlp/downloader/f4m.py b/yt_dlp/downloader/f4m.py

index 9da2776d92c60d2e9564005c7135539cb023a91b..28cbba0169d827ec9c100a0c30c77d0ee4b8c6c6 100644 (file)
--- a/yt_dlp/downloader/f4m.py
+++ b/yt_dlp/downloader/f4m.py
@@ -1,23 +1,14 @@
-from __future__ import division, unicode_literals
-
+import base64
  import io
  import itertools
+import struct
  import time
+import urllib.parse
  
  from .fragment import FragmentFD
-from ..compat import (
-    compat_b64decode,
-    compat_etree_fromstring,
-    compat_urlparse,
-    compat_urllib_error,
-    compat_urllib_parse_urlparse,
-    compat_struct_pack,
-    compat_struct_unpack,
-)
-from ..utils import (
-    fix_xml_ampersands,
-    xpath_text,
-)
+from ..compat import compat_etree_fromstring
+from ..networking.exceptions import HTTPError
+from ..utils import fix_xml_ampersands, xpath_text
  
  
  class DataTruncatedError(Exception):
@@ -40,13 +31,13 @@ def read_bytes(self, n):
  
      # Utility functions for reading numbers and strings
      def read_unsigned_long_long(self):
-        return compat_struct_unpack('!Q', self.read_bytes(8))[0]
+        return struct.unpack('!Q', self.read_bytes(8))[0]
  
      def read_unsigned_int(self):
-        return compat_struct_unpack('!I', self.read_bytes(4))[0]
+        return struct.unpack('!I', self.read_bytes(4))[0]
  
      def read_unsigned_char(self):
-        return compat_struct_unpack('!B', self.read_bytes(1))[0]
+        return struct.unpack('!B', self.read_bytes(1))[0]
  
      def read_string(self):
          res = b''
@@ -193,7 +184,7 @@ def build_fragments_list(boot_info):
      first_frag_number = fragment_run_entry_table[0]['first']
      fragments_counter = itertools.count(first_frag_number)
      for segment, fragments_count in segment_run_table['segment_run']:
-        # In some live HDS streams (for example Rai), `fragments_count` is
+        # In some live HDS streams (e.g. Rai), `fragments_count` is
          # abnormal and causing out-of-memory errors. It's OK to change the
          # number of fragments for live streams as they are updated periodically
          if fragments_count == 4294967295 and boot_info['live']:
@@ -208,11 +199,11 @@ def build_fragments_list(boot_info):
  
  
  def write_unsigned_int(stream, val):
-    stream.write(compat_struct_pack('!I', val))
+    stream.write(struct.pack('!I', val))
  
  
  def write_unsigned_int_24(stream, val):
-    stream.write(compat_struct_pack('!I', val)[1:])
+    stream.write(struct.pack('!I', val)[1:])
  
  
  def write_flv_header(stream):
@@ -261,8 +252,6 @@ class F4mFD(FragmentFD):
      A downloader for f4m manifests or AdobeHDS.
      """
  
-    FD_NAME = 'f4m'
-
      def _get_unencrypted_media(self, doc):
          media = doc.findall(_add_ns('media'))
          if not media:
@@ -308,12 +297,12 @@ def _parse_bootstrap_node(self, node, base_url):
          # 1. http://live-1-1.rutube.ru/stream/1024/HDS/SD/C2NKsS85HQNckgn5HdEmOQ/1454167650/S-s604419906/move/four/dirs/upper/1024-576p.f4m
          bootstrap_url = node.get('url')
          if bootstrap_url:
-            bootstrap_url = compat_urlparse.urljoin(
+            bootstrap_url = urllib.parse.urljoin(
                  base_url, bootstrap_url)
              boot_info = self._get_bootstrap_from_url(bootstrap_url)
          else:
              bootstrap_url = None
-            bootstrap = compat_b64decode(node.text)
+            bootstrap = base64.b64decode(node.text)
              boot_info = read_bootstrap_info(bootstrap)
          return boot_info, bootstrap_url
  
@@ -323,7 +312,7 @@ def real_download(self, filename, info_dict):
          self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME)
  
          urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
-        man_url = urlh.geturl()
+        man_url = urlh.url
          # Some manifests may be malformed, e.g. prosiebensat1 generated manifests
          # (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244
          # and https://github.com/ytdl-org/youtube-dl/issues/7823)
@@ -343,14 +332,14 @@ def real_download(self, filename, info_dict):
          # Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec.
          man_base_url = get_base_url(doc) or man_url
  
-        base_url = compat_urlparse.urljoin(man_base_url, media.attrib['url'])
+        base_url = urllib.parse.urljoin(man_base_url, media.attrib['url'])
          bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
          boot_info, bootstrap_url = self._parse_bootstrap_node(
              bootstrap_node, man_base_url)
          live = boot_info['live']
          metadata_node = media.find(_add_ns('metadata'))
          if metadata_node is not None:
-            metadata = compat_b64decode(metadata_node.text)
+            metadata = base64.b64decode(metadata_node.text)
          else:
              metadata = None
  
@@ -366,7 +355,7 @@ def real_download(self, filename, info_dict):
          ctx = {
              'filename': filename,
              'total_frags': total_frags,
-            'live': live,
+            'live': bool(live),
          }
  
          self._prepare_frag_download(ctx)
@@ -378,7 +367,7 @@ def real_download(self, filename, info_dict):
              if not live:
                  write_metadata_tag(dest_stream, metadata)
  
-        base_url_parsed = compat_urllib_parse_urlparse(base_url)
+        base_url_parsed = urllib.parse.urlparse(base_url)
  
          self._start_frag_download(ctx, info_dict)
  
@@ -398,9 +387,10 @@ def real_download(self, filename, info_dict):
                  query.append(info_dict['extra_param_to_segment_url'])
              url_parsed = base_url_parsed._replace(path=base_url_parsed.path + name, query='&'.join(query))
              try:
-                success, down_data = self._download_fragment(ctx, url_parsed.geturl(), info_dict)
+                success = self._download_fragment(ctx, url_parsed.geturl(), info_dict)
                  if not success:
                      return False
+                down_data = self._read_fragment(ctx)
                  reader = FlvReader(down_data)
                  while True:
                      try:
@@ -417,8 +407,8 @@ def real_download(self, filename, info_dict):
                      if box_type == b'mdat':
                          self._append_fragment(ctx, box_data)
                          break
-            except (compat_urllib_error.HTTPError, ) as err:
-                if live and (err.code == 404 or err.code == 410):
+            except HTTPError as err:
+                if live and (err.status == 404 or err.status == 410):
                      # We didn't keep up with the live window. Continue
                      # with the next available fragment.
                      msg = 'Fragment %d unavailable' % frag_i
@@ -434,6 +424,4 @@ def real_download(self, filename, info_dict):
                      msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1))
                      self.report_warning(msg)
  
-        self._finish_frag_download(ctx, info_dict)
-
-        return True
+        return self._finish_frag_download(ctx, info_dict)