Fix efficiency bug regarding re-downloading targets
Remove unnecessary queries when a file in a target does not have to be re-downloaded. This significantly speeds up the download in cases where the majority of media is already present locally. Co-Authored-By: André Koch-Kramer <koch-kramer@web.de>
This commit is contained in:
parent
43c521afa9
commit
672b0b1840
@ -551,6 +551,27 @@ class Instaloader:
|
||||
:return: True if something was downloaded, False otherwise, i.e. file was already there
|
||||
"""
|
||||
|
||||
def _already_downloaded(path: str) -> bool:
|
||||
if not os.path.isfile(path):
|
||||
return False
|
||||
else:
|
||||
self.context.log(path + ' exists', end=' ', flush=True)
|
||||
return True
|
||||
|
||||
def _all_already_downloaded(path_base, is_videos_enumerated) -> bool:
|
||||
if '{filename}' in self.filename_pattern:
|
||||
# full URL needed to evaluate actual filename, cannot determine at
|
||||
# this point if all sidecar nodes were already downloaded.
|
||||
return False
|
||||
for idx, is_video in is_videos_enumerated:
|
||||
if self.download_pictures and (not is_video or self.download_video_thumbnails):
|
||||
if not _already_downloaded("{0}_{1}.jpg".format(path_base, idx)):
|
||||
return False
|
||||
if is_video and self.download_videos:
|
||||
if not _already_downloaded("{0}_{1}.mp4".format(path_base, idx)):
|
||||
return False
|
||||
return True
|
||||
|
||||
dirname = _PostPathFormatter(post).format(self.dirname_pattern, target=target)
|
||||
filename_template = os.path.join(dirname, self.format_filename(post, target=target))
|
||||
filename = self.__prepare_filename(filename_template, lambda: post.url)
|
||||
@ -559,37 +580,45 @@ class Instaloader:
|
||||
downloaded = True
|
||||
if post.typename == 'GraphSidecar':
|
||||
if self.download_pictures or self.download_videos:
|
||||
for edge_number, sidecar_node in enumerate(
|
||||
post.get_sidecar_nodes(self.slide_start, self.slide_end),
|
||||
start=post.mediacount if self.slide_start < 0 else self.slide_start + 1
|
||||
if not _all_already_downloaded(
|
||||
filename_template, enumerate(
|
||||
(post.get_is_videos()[i]
|
||||
for i in range(self.slide_start % post.mediacount, self.slide_end % post.mediacount + 1)),
|
||||
start=self.slide_start % post.mediacount + 1
|
||||
)
|
||||
):
|
||||
if self.download_pictures and (not sidecar_node.is_video or self.download_video_thumbnails):
|
||||
suffix = str(edge_number)
|
||||
for edge_number, sidecar_node in enumerate(
|
||||
post.get_sidecar_nodes(self.slide_start, self.slide_end),
|
||||
start=self.slide_start % post.mediacount + 1
|
||||
):
|
||||
suffix = str(edge_number) # type: Optional[str]
|
||||
if '{filename}' in self.filename_pattern:
|
||||
suffix = ''
|
||||
# pylint:disable=cell-var-from-loop
|
||||
filename = self.__prepare_filename(filename_template, lambda: sidecar_node.display_url)
|
||||
# Download sidecar picture or video thumbnail (--no-pictures implies --no-video-thumbnails)
|
||||
downloaded &= self.download_pic(filename=filename, url=sidecar_node.display_url,
|
||||
mtime=post.date_local, filename_suffix=suffix)
|
||||
if sidecar_node.is_video and self.download_videos:
|
||||
suffix = str(edge_number)
|
||||
if '{filename}' in self.filename_pattern:
|
||||
suffix = ''
|
||||
# pylint:disable=cell-var-from-loop
|
||||
filename = self.__prepare_filename(filename_template, lambda: sidecar_node.video_url)
|
||||
# Download sidecar video if desired
|
||||
downloaded &= self.download_pic(filename=filename, url=sidecar_node.video_url,
|
||||
mtime=post.date_local, filename_suffix=suffix)
|
||||
suffix = None
|
||||
if self.download_pictures and (not sidecar_node.is_video or self.download_video_thumbnails):
|
||||
# pylint:disable=cell-var-from-loop
|
||||
filename = self.__prepare_filename(filename_template, lambda: sidecar_node.display_url)
|
||||
# Download sidecar picture or video thumbnail (--no-pictures implies --no-video-thumbnails)
|
||||
downloaded &= self.download_pic(filename=filename, url=sidecar_node.display_url,
|
||||
mtime=post.date_local, filename_suffix=suffix)
|
||||
if sidecar_node.is_video and self.download_videos:
|
||||
# pylint:disable=cell-var-from-loop
|
||||
filename = self.__prepare_filename(filename_template, lambda: sidecar_node.video_url)
|
||||
# Download sidecar video if desired
|
||||
downloaded &= self.download_pic(filename=filename, url=sidecar_node.video_url,
|
||||
mtime=post.date_local, filename_suffix=suffix)
|
||||
else:
|
||||
downloaded = False
|
||||
elif post.typename == 'GraphImage':
|
||||
# Download picture
|
||||
if self.download_pictures:
|
||||
downloaded = self.download_pic(filename=filename, url=post.url, mtime=post.date_local)
|
||||
downloaded = (not _already_downloaded(filename + ".jpg") and
|
||||
self.download_pic(filename=filename, url=post.url, mtime=post.date_local))
|
||||
elif post.typename == 'GraphVideo':
|
||||
# Download video thumbnail (--no-pictures implies --no-video-thumbnails)
|
||||
if self.download_pictures and self.download_video_thumbnails:
|
||||
with self.context.error_catcher("Video thumbnail of {}".format(post)):
|
||||
downloaded = self.download_pic(filename=filename, url=post.url, mtime=post.date_local)
|
||||
downloaded = (not _already_downloaded(filename + ".jpg") and
|
||||
self.download_pic(filename=filename, url=post.url, mtime=post.date_local))
|
||||
else:
|
||||
self.context.error("Warning: {0} has unknown typename: {1}".format(post, post.typename))
|
||||
|
||||
@ -600,7 +629,8 @@ class Instaloader:
|
||||
|
||||
# Download video if desired
|
||||
if post.is_video and self.download_videos:
|
||||
downloaded &= self.download_pic(filename=filename, url=post.video_url, mtime=post.date_local)
|
||||
downloaded &= (not _already_downloaded(filename + ".mp4") and
|
||||
self.download_pic(filename=filename, url=post.video_url, mtime=post.date_local))
|
||||
|
||||
# Download geotags if desired
|
||||
if self.download_geotags and post.location:
|
||||
@ -693,17 +723,25 @@ class Instaloader:
|
||||
:return: True if something was downloaded, False otherwise, i.e. file was already there
|
||||
"""
|
||||
|
||||
def _already_downloaded(path: str) -> bool:
|
||||
if not os.path.isfile(path):
|
||||
return False
|
||||
else:
|
||||
self.context.log(path + ' exists', end=' ', flush=True)
|
||||
return True
|
||||
|
||||
date_local = item.date_local
|
||||
dirname = _PostPathFormatter(item).format(self.dirname_pattern, target=target)
|
||||
filename_template = os.path.join(dirname, self.format_filename(item, target=target))
|
||||
filename = self.__prepare_filename(filename_template, lambda: item.url)
|
||||
downloaded = False
|
||||
if not item.is_video or self.download_video_thumbnails is True:
|
||||
url = item.url
|
||||
downloaded = self.download_pic(filename=filename, url=url, mtime=date_local)
|
||||
downloaded = (not _already_downloaded(filename + ".jpg") and
|
||||
self.download_pic(filename=filename, url=item.url, mtime=date_local))
|
||||
if item.is_video and self.download_videos is True:
|
||||
filename = self.__prepare_filename(filename_template, lambda: str(item.video_url))
|
||||
downloaded |= self.download_pic(filename=filename, url=item.video_url, mtime=date_local)
|
||||
downloaded |= (not _already_downloaded(filename + ".mp4") and
|
||||
self.download_pic(filename=filename, url=item.video_url, mtime=date_local))
|
||||
# Save caption if desired
|
||||
metadata_string = _ArbitraryItemFormatter(item).format(self.storyitem_metadata_txt_pattern).strip()
|
||||
if metadata_string:
|
||||
|
@ -272,6 +272,17 @@ class Post:
|
||||
return len(edges)
|
||||
return 1
|
||||
|
||||
def get_is_videos(self) -> List[bool]:
|
||||
"""
|
||||
Return a list containing the ``is_video`` property for each media in the post.
|
||||
|
||||
.. versionadded:: 4.7
|
||||
"""
|
||||
if self.typename == 'GraphSidecar':
|
||||
edges = self._field('edge_sidecar_to_children', 'edges')
|
||||
return [edge['node']['is_video'] for edge in edges]
|
||||
return [self.is_video]
|
||||
|
||||
def get_sidecar_nodes(self, start=0, end=-1) -> Iterator[PostSidecarNode]:
|
||||
"""
|
||||
Sidecar nodes of a Post with typename==GraphSidecar.
|
||||
@ -281,13 +292,13 @@ class Post:
|
||||
"""
|
||||
if self.typename == 'GraphSidecar':
|
||||
edges = self._field('edge_sidecar_to_children', 'edges')
|
||||
if any(edge['node']['is_video'] for edge in edges):
|
||||
# video_url is only present in full metadata, issue #558.
|
||||
edges = self._full_metadata['edge_sidecar_to_children']['edges']
|
||||
if end < 0:
|
||||
end = len(edges)-1
|
||||
if start < 0:
|
||||
start = len(edges)-1
|
||||
if any(self.get_is_videos()[start:(end+1)]):
|
||||
# video_url is only present in full metadata, issue #558.
|
||||
edges = self._full_metadata['edge_sidecar_to_children']['edges']
|
||||
for idx, edge in enumerate(edges):
|
||||
if start <= idx <= end:
|
||||
node = edge['node']
|
||||
|
Loading…
Reference in New Issue
Block a user