Fix efficiency bug regarding re-downloading targets
Remove unnecessary queries when a file in a target does not have to be re-downloaded. This significantly speeds up the download in cases where the majority of media is already present locally. Co-Authored-By: André Koch-Kramer <koch-kramer@web.de>
This commit is contained in:
parent
43c521afa9
commit
672b0b1840
@ -551,6 +551,27 @@ class Instaloader:
|
|||||||
:return: True if something was downloaded, False otherwise, i.e. file was already there
|
:return: True if something was downloaded, False otherwise, i.e. file was already there
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
def _already_downloaded(path: str) -> bool:
|
||||||
|
if not os.path.isfile(path):
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
self.context.log(path + ' exists', end=' ', flush=True)
|
||||||
|
return True
|
||||||
|
|
||||||
|
def _all_already_downloaded(path_base, is_videos_enumerated) -> bool:
|
||||||
|
if '{filename}' in self.filename_pattern:
|
||||||
|
# full URL needed to evaluate actual filename, cannot determine at
|
||||||
|
# this point if all sidecar nodes were already downloaded.
|
||||||
|
return False
|
||||||
|
for idx, is_video in is_videos_enumerated:
|
||||||
|
if self.download_pictures and (not is_video or self.download_video_thumbnails):
|
||||||
|
if not _already_downloaded("{0}_{1}.jpg".format(path_base, idx)):
|
||||||
|
return False
|
||||||
|
if is_video and self.download_videos:
|
||||||
|
if not _already_downloaded("{0}_{1}.mp4".format(path_base, idx)):
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
dirname = _PostPathFormatter(post).format(self.dirname_pattern, target=target)
|
dirname = _PostPathFormatter(post).format(self.dirname_pattern, target=target)
|
||||||
filename_template = os.path.join(dirname, self.format_filename(post, target=target))
|
filename_template = os.path.join(dirname, self.format_filename(post, target=target))
|
||||||
filename = self.__prepare_filename(filename_template, lambda: post.url)
|
filename = self.__prepare_filename(filename_template, lambda: post.url)
|
||||||
@ -559,37 +580,45 @@ class Instaloader:
|
|||||||
downloaded = True
|
downloaded = True
|
||||||
if post.typename == 'GraphSidecar':
|
if post.typename == 'GraphSidecar':
|
||||||
if self.download_pictures or self.download_videos:
|
if self.download_pictures or self.download_videos:
|
||||||
for edge_number, sidecar_node in enumerate(
|
if not _all_already_downloaded(
|
||||||
post.get_sidecar_nodes(self.slide_start, self.slide_end),
|
filename_template, enumerate(
|
||||||
start=post.mediacount if self.slide_start < 0 else self.slide_start + 1
|
(post.get_is_videos()[i]
|
||||||
|
for i in range(self.slide_start % post.mediacount, self.slide_end % post.mediacount + 1)),
|
||||||
|
start=self.slide_start % post.mediacount + 1
|
||||||
|
)
|
||||||
):
|
):
|
||||||
if self.download_pictures and (not sidecar_node.is_video or self.download_video_thumbnails):
|
for edge_number, sidecar_node in enumerate(
|
||||||
suffix = str(edge_number)
|
post.get_sidecar_nodes(self.slide_start, self.slide_end),
|
||||||
|
start=self.slide_start % post.mediacount + 1
|
||||||
|
):
|
||||||
|
suffix = str(edge_number) # type: Optional[str]
|
||||||
if '{filename}' in self.filename_pattern:
|
if '{filename}' in self.filename_pattern:
|
||||||
suffix = ''
|
suffix = None
|
||||||
# pylint:disable=cell-var-from-loop
|
if self.download_pictures and (not sidecar_node.is_video or self.download_video_thumbnails):
|
||||||
filename = self.__prepare_filename(filename_template, lambda: sidecar_node.display_url)
|
# pylint:disable=cell-var-from-loop
|
||||||
# Download sidecar picture or video thumbnail (--no-pictures implies --no-video-thumbnails)
|
filename = self.__prepare_filename(filename_template, lambda: sidecar_node.display_url)
|
||||||
downloaded &= self.download_pic(filename=filename, url=sidecar_node.display_url,
|
# Download sidecar picture or video thumbnail (--no-pictures implies --no-video-thumbnails)
|
||||||
mtime=post.date_local, filename_suffix=suffix)
|
downloaded &= self.download_pic(filename=filename, url=sidecar_node.display_url,
|
||||||
if sidecar_node.is_video and self.download_videos:
|
mtime=post.date_local, filename_suffix=suffix)
|
||||||
suffix = str(edge_number)
|
if sidecar_node.is_video and self.download_videos:
|
||||||
if '{filename}' in self.filename_pattern:
|
# pylint:disable=cell-var-from-loop
|
||||||
suffix = ''
|
filename = self.__prepare_filename(filename_template, lambda: sidecar_node.video_url)
|
||||||
# pylint:disable=cell-var-from-loop
|
# Download sidecar video if desired
|
||||||
filename = self.__prepare_filename(filename_template, lambda: sidecar_node.video_url)
|
downloaded &= self.download_pic(filename=filename, url=sidecar_node.video_url,
|
||||||
# Download sidecar video if desired
|
mtime=post.date_local, filename_suffix=suffix)
|
||||||
downloaded &= self.download_pic(filename=filename, url=sidecar_node.video_url,
|
else:
|
||||||
mtime=post.date_local, filename_suffix=suffix)
|
downloaded = False
|
||||||
elif post.typename == 'GraphImage':
|
elif post.typename == 'GraphImage':
|
||||||
# Download picture
|
# Download picture
|
||||||
if self.download_pictures:
|
if self.download_pictures:
|
||||||
downloaded = self.download_pic(filename=filename, url=post.url, mtime=post.date_local)
|
downloaded = (not _already_downloaded(filename + ".jpg") and
|
||||||
|
self.download_pic(filename=filename, url=post.url, mtime=post.date_local))
|
||||||
elif post.typename == 'GraphVideo':
|
elif post.typename == 'GraphVideo':
|
||||||
# Download video thumbnail (--no-pictures implies --no-video-thumbnails)
|
# Download video thumbnail (--no-pictures implies --no-video-thumbnails)
|
||||||
if self.download_pictures and self.download_video_thumbnails:
|
if self.download_pictures and self.download_video_thumbnails:
|
||||||
with self.context.error_catcher("Video thumbnail of {}".format(post)):
|
with self.context.error_catcher("Video thumbnail of {}".format(post)):
|
||||||
downloaded = self.download_pic(filename=filename, url=post.url, mtime=post.date_local)
|
downloaded = (not _already_downloaded(filename + ".jpg") and
|
||||||
|
self.download_pic(filename=filename, url=post.url, mtime=post.date_local))
|
||||||
else:
|
else:
|
||||||
self.context.error("Warning: {0} has unknown typename: {1}".format(post, post.typename))
|
self.context.error("Warning: {0} has unknown typename: {1}".format(post, post.typename))
|
||||||
|
|
||||||
@ -600,7 +629,8 @@ class Instaloader:
|
|||||||
|
|
||||||
# Download video if desired
|
# Download video if desired
|
||||||
if post.is_video and self.download_videos:
|
if post.is_video and self.download_videos:
|
||||||
downloaded &= self.download_pic(filename=filename, url=post.video_url, mtime=post.date_local)
|
downloaded &= (not _already_downloaded(filename + ".mp4") and
|
||||||
|
self.download_pic(filename=filename, url=post.video_url, mtime=post.date_local))
|
||||||
|
|
||||||
# Download geotags if desired
|
# Download geotags if desired
|
||||||
if self.download_geotags and post.location:
|
if self.download_geotags and post.location:
|
||||||
@ -693,17 +723,25 @@ class Instaloader:
|
|||||||
:return: True if something was downloaded, False otherwise, i.e. file was already there
|
:return: True if something was downloaded, False otherwise, i.e. file was already there
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
def _already_downloaded(path: str) -> bool:
|
||||||
|
if not os.path.isfile(path):
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
self.context.log(path + ' exists', end=' ', flush=True)
|
||||||
|
return True
|
||||||
|
|
||||||
date_local = item.date_local
|
date_local = item.date_local
|
||||||
dirname = _PostPathFormatter(item).format(self.dirname_pattern, target=target)
|
dirname = _PostPathFormatter(item).format(self.dirname_pattern, target=target)
|
||||||
filename_template = os.path.join(dirname, self.format_filename(item, target=target))
|
filename_template = os.path.join(dirname, self.format_filename(item, target=target))
|
||||||
filename = self.__prepare_filename(filename_template, lambda: item.url)
|
filename = self.__prepare_filename(filename_template, lambda: item.url)
|
||||||
downloaded = False
|
downloaded = False
|
||||||
if not item.is_video or self.download_video_thumbnails is True:
|
if not item.is_video or self.download_video_thumbnails is True:
|
||||||
url = item.url
|
downloaded = (not _already_downloaded(filename + ".jpg") and
|
||||||
downloaded = self.download_pic(filename=filename, url=url, mtime=date_local)
|
self.download_pic(filename=filename, url=item.url, mtime=date_local))
|
||||||
if item.is_video and self.download_videos is True:
|
if item.is_video and self.download_videos is True:
|
||||||
filename = self.__prepare_filename(filename_template, lambda: str(item.video_url))
|
filename = self.__prepare_filename(filename_template, lambda: str(item.video_url))
|
||||||
downloaded |= self.download_pic(filename=filename, url=item.video_url, mtime=date_local)
|
downloaded |= (not _already_downloaded(filename + ".mp4") and
|
||||||
|
self.download_pic(filename=filename, url=item.video_url, mtime=date_local))
|
||||||
# Save caption if desired
|
# Save caption if desired
|
||||||
metadata_string = _ArbitraryItemFormatter(item).format(self.storyitem_metadata_txt_pattern).strip()
|
metadata_string = _ArbitraryItemFormatter(item).format(self.storyitem_metadata_txt_pattern).strip()
|
||||||
if metadata_string:
|
if metadata_string:
|
||||||
|
@ -272,6 +272,17 @@ class Post:
|
|||||||
return len(edges)
|
return len(edges)
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
|
def get_is_videos(self) -> List[bool]:
|
||||||
|
"""
|
||||||
|
Return a list containing the ``is_video`` property for each media in the post.
|
||||||
|
|
||||||
|
.. versionadded:: 4.7
|
||||||
|
"""
|
||||||
|
if self.typename == 'GraphSidecar':
|
||||||
|
edges = self._field('edge_sidecar_to_children', 'edges')
|
||||||
|
return [edge['node']['is_video'] for edge in edges]
|
||||||
|
return [self.is_video]
|
||||||
|
|
||||||
def get_sidecar_nodes(self, start=0, end=-1) -> Iterator[PostSidecarNode]:
|
def get_sidecar_nodes(self, start=0, end=-1) -> Iterator[PostSidecarNode]:
|
||||||
"""
|
"""
|
||||||
Sidecar nodes of a Post with typename==GraphSidecar.
|
Sidecar nodes of a Post with typename==GraphSidecar.
|
||||||
@ -281,13 +292,13 @@ class Post:
|
|||||||
"""
|
"""
|
||||||
if self.typename == 'GraphSidecar':
|
if self.typename == 'GraphSidecar':
|
||||||
edges = self._field('edge_sidecar_to_children', 'edges')
|
edges = self._field('edge_sidecar_to_children', 'edges')
|
||||||
if any(edge['node']['is_video'] for edge in edges):
|
|
||||||
# video_url is only present in full metadata, issue #558.
|
|
||||||
edges = self._full_metadata['edge_sidecar_to_children']['edges']
|
|
||||||
if end < 0:
|
if end < 0:
|
||||||
end = len(edges)-1
|
end = len(edges)-1
|
||||||
if start < 0:
|
if start < 0:
|
||||||
start = len(edges)-1
|
start = len(edges)-1
|
||||||
|
if any(self.get_is_videos()[start:(end+1)]):
|
||||||
|
# video_url is only present in full metadata, issue #558.
|
||||||
|
edges = self._full_metadata['edge_sidecar_to_children']['edges']
|
||||||
for idx, edge in enumerate(edges):
|
for idx, edge in enumerate(edges):
|
||||||
if start <= idx <= end:
|
if start <= idx <= end:
|
||||||
node = edge['node']
|
node = edge['node']
|
||||||
|
Loading…
x
Reference in New Issue
Block a user