Implemented all Youtube Feeds (ytfav, ytwatchlater, ytsubs, ythistory, ytrec) and...

[yt-dlp.git] / youtube_dlc / YoutubeDL.py
diff --git a/youtube_dlc/YoutubeDL.py b/youtube_dlc/YoutubeDL.py

index fc351db0dc6b8245f3191d3946b8cd5de4417127..3c2970d9fcd2786d82d3da178af233e2c19209ac 100644 (file)
--- a/youtube_dlc/YoutubeDL.py
+++ b/youtube_dlc/YoutubeDL.py
@@ -210,6 +210,8 @@ class YoutubeDL(object):
      download_archive:  File name of a file where all downloads are recorded.
                         Videos already present in the file are not downloaded
                         again.
+    break_on_existing: Stop the download process after attempting to download a file that's
+                       in the archive.
      cookiefile:        File name where cookies should be read from and dumped to.
      nocheckcertificate:Do not verify SSL certificates
      prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
@@ -801,7 +803,7 @@ def add_extra_info(info_dict, extra_info):
          for key, value in extra_info.items():
              info_dict.setdefault(key, value)
  
-    def extract_info(self, url, download=True, ie_key=None, extra_info={},
+    def extract_info(self, url, download=True, ie_key=None, info_dict=None, extra_info={},
                       process=True, force_generic_extractor=False):
          '''
          Returns a list with a dictionary for each video we find.
@@ -821,12 +823,22 @@ def extract_info(self, url, download=True, ie_key=None, extra_info={},
              if not ie.suitable(url):
                  continue
  
-            ie = self.get_info_extractor(ie.ie_key())
+            ie_key = ie.ie_key()
+            ie = self.get_info_extractor(ie_key)
              if not ie.working():
                  self.report_warning('The program functionality for this site has been marked as broken, '
                                      'and will probably not work.')
  
              try:
+                try:
+                    temp_id = ie.extract_id(url) if callable(getattr(ie, 'extract_id', None)) else ie._match_id(url)
+                except (AssertionError, IndexError, AttributeError):
+                    temp_id = None
+                if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
+                    self.to_screen("[%s] %s: has already been recorded in archive" % (
+                                   ie_key, temp_id))
+                    break
+
                  ie_result = ie.extract(url)
                  if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
                      break
@@ -836,6 +848,11 @@ def extract_info(self, url, download=True, ie_key=None, extra_info={},
                          '_type': 'compat_list',
                          'entries': ie_result,
                      }
+                if info_dict:
+                    if info_dict.get('id'):
+                        ie_result['id'] = info_dict['id']
+                    if info_dict.get('title'):
+                        ie_result['title'] = info_dict['title']
                  self.add_default_extra_info(ie_result, ie, url)
                  if process:
                      return self.process_ie_result(ie_result, download, extra_info)
@@ -898,7 +915,7 @@ def process_ie_result(self, ie_result, download=True, extra_info={}):
              # We have to add extra_info to the results because it may be
              # contained in a playlist
              return self.extract_info(ie_result['url'],
-                                     download,
+                                     download, info_dict=ie_result,
                                       ie_key=ie_result.get('ie_key'),
                                       extra_info=extra_info)
          elif result_type == 'url_transparent':
@@ -1033,8 +1050,12 @@ def report_download(num_entries):
  
                  reason = self._match_entry(entry, incomplete=True)
                  if reason is not None:
-                    self.to_screen('[download] ' + reason)
-                    continue
+                    if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing'):
+                        print('[download] tried downloading a file that\'s already in the archive, stopping since --break-on-existing is set.')
+                        break
+                    else:
+                        self.to_screen('[download] ' + reason)
+                        continue
  
                  entry_result = self.process_ie_result(entry,
                                                        download=download,
@@ -1852,13 +1873,13 @@ def ensure_dir_exists(path):
                      self.report_error('Cannot write annotations file: ' + annofn)
                      return
  
-        def dl(name, info):
+        def dl(name, info, subtitle=False):
              fd = get_suitable_downloader(info, self.params)(self, self.params)
              for ph in self._progress_hooks:
                  fd.add_progress_hook(ph)
              if self.params.get('verbose'):
                  self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
-            return fd.download(name, info)
+            return fd.download(name, info, subtitle)
  
          subtitles_are_requested = any([self.params.get('writesubtitles', False),
                                         self.params.get('writeautomaticsub')])
@@ -1867,7 +1888,7 @@ def dl(name, info):
              # subtitles download errors are already managed as troubles in relevant IE
              # that way it will silently go on when used with unsupporting IE
              subtitles = info_dict['requested_subtitles']
-            ie = self.get_info_extractor(info_dict['extractor_key'])
+            # ie = self.get_info_extractor(info_dict['extractor_key'])
              for sub_lang, sub_info in subtitles.items():
                  sub_format = sub_info['ext']
                  sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
@@ -1886,6 +1907,8 @@ def dl(name, info):
                              return
                      else:
                          try:
+                            dl(sub_filename, sub_info, subtitle=True)
+                            '''
                              if self.params.get('sleep_interval_subtitles', False):
                                  dl(sub_filename, sub_info)
                              else:
@@ -1893,6 +1916,7 @@ def dl(name, info):
                                      sub_info['url'], info_dict['id'], note=False).read()
                                  with io.open(encodeFilename(sub_filename), 'wb') as subfile:
                                      subfile.write(sub_data)
+                            '''
                          except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
                              self.report_warning('Unable to download subtitle for "%s": %s' %
                                                  (sub_lang, error_to_compat_str(err)))