]> jfr.im git - yt-dlp.git/commitdiff
[downloader/fragment] Fix bugs around resuming with Range (#2901)
authorLesmiscore (Naoya Ozaki) <redacted>
Mon, 28 Feb 2022 04:10:54 +0000 (13:10 +0900)
committerGitHub <redacted>
Mon, 28 Feb 2022 04:10:54 +0000 (13:10 +0900)
Authored by: Lesmiscore

yt_dlp/downloader/fragment.py
yt_dlp/downloader/http.py
yt_dlp/utils.py

index 24f4ec959ee2d236bfe9ea83ce6e34e164ba34ad..83a9f81b6788279d967bfff55b67a96494682b38 100644 (file)
@@ -178,7 +178,7 @@ def _prepare_frag_download(self, ctx):
         dl = HttpQuietDownloader(
             self.ydl,
             {
-                'continuedl': True,
+                'continuedl': self.params.get('continuedl', True),
                 'quiet': self.params.get('quiet'),
                 'noprogress': True,
                 'ratelimit': self.params.get('ratelimit'),
index 34a1eb59b6a84e84b334c4ba5d5f71c415345aa0..10ba610241402c0ef46956ce45e359ddf6c91015 100644 (file)
@@ -5,7 +5,6 @@
 import socket
 import time
 import random
-import re
 
 from .common import FileDownloader
 from ..compat import (
@@ -16,6 +15,7 @@
     ContentTooShortError,
     encodeFilename,
     int_or_none,
+    parse_http_range,
     sanitized_Request,
     ThrottledDownload,
     write_xattr,
@@ -59,6 +59,9 @@ class DownloadContext(dict):
         ctx.chunk_size = None
         throttle_start = None
 
+        # parse given Range
+        req_start, req_end, _ = parse_http_range(headers.get('Range'))
+
         if self.params.get('continuedl', True):
             # Establish possible resume length
             if os.path.isfile(encodeFilename(ctx.tmpfilename)):
@@ -91,6 +94,9 @@ def establish_connection():
                               if not is_test and chunk_size else chunk_size)
             if ctx.resume_len > 0:
                 range_start = ctx.resume_len
+                if req_start is not None:
+                    # offset the beginning of Range to be within request
+                    range_start += req_start
                 if ctx.is_resume:
                     self.report_resuming_byte(ctx.resume_len)
                 ctx.open_mode = 'ab'
@@ -99,7 +105,17 @@ def establish_connection():
             else:
                 range_start = None
             ctx.is_resume = False
-            range_end = range_start + ctx.chunk_size - 1 if ctx.chunk_size else None
+
+            if ctx.chunk_size:
+                chunk_aware_end = range_start + ctx.chunk_size - 1
+                # we're not allowed to download outside Range
+                range_end = chunk_aware_end if req_end is None else min(chunk_aware_end, req_end)
+            elif req_end is not None:
+                # there's no need for chunked downloads, so download until the end of Range
+                range_end = req_end
+            else:
+                range_end = None
+
             if range_end and ctx.data_len is not None and range_end >= ctx.data_len:
                 range_end = ctx.data_len - 1
             has_range = range_start is not None
@@ -124,23 +140,19 @@ def establish_connection():
                 # https://github.com/ytdl-org/youtube-dl/issues/6057#issuecomment-126129799)
                 if has_range:
                     content_range = ctx.data.headers.get('Content-Range')
-                    if content_range:
-                        content_range_m = re.search(r'bytes (\d+)-(\d+)?(?:/(\d+))?', content_range)
+                    content_range_start, content_range_end, content_len = parse_http_range(content_range)
+                    if content_range_start is not None and range_start == content_range_start:
                         # Content-Range is present and matches requested Range, resume is possible
-                        if content_range_m:
-                            if range_start == int(content_range_m.group(1)):
-                                content_range_end = int_or_none(content_range_m.group(2))
-                                content_len = int_or_none(content_range_m.group(3))
-                                accept_content_len = (
-                                    # Non-chunked download
-                                    not ctx.chunk_size
-                                    # Chunked download and requested piece or
-                                    # its part is promised to be served
-                                    or content_range_end == range_end
-                                    or content_len < range_end)
-                                if accept_content_len:
-                                    ctx.data_len = content_len
-                                    return
+                        accept_content_len = (
+                            # Non-chunked download
+                            not ctx.chunk_size
+                            # Chunked download and requested piece or
+                            # its part is promised to be served
+                            or content_range_end == range_end
+                            or content_len < range_end)
+                        if accept_content_len:
+                            ctx.data_len = content_len
+                            return
                     # Content-Range is either not present or invalid. Assuming remote webserver is
                     # trying to send the whole file, resume is not possible, so wiping the local file
                     # and performing entire redownload
index 6ec8da11b7050c15730f480db5985e399944c0e4..cc08bd1301837fac12ad81329d247234af325ed6 100644 (file)
@@ -5252,6 +5252,16 @@ def join_nonempty(*values, delim='-', from_dict=None):
     return delim.join(map(str, filter(None, values)))
 
 
+def parse_http_range(range):
+    """ Parse value of "Range" or "Content-Range" HTTP header into tuple. """
+    if not range:
+        return None, None, None
+    crg = re.search(r'bytes[ =](\d+)-(\d+)?(?:/(\d+))?', range)
+    if not crg:
+        return None, None, None
+    return int(crg.group(1)), int_or_none(crg.group(2)), int_or_none(crg.group(3))
+
+
 class Config:
     own_args = None
     filename = None