Deduplicate Post Download Loop
This commit is contained in:
parent
c76385bde2
commit
a794089c4f
@ -691,6 +691,59 @@ class Instaloader:
|
|||||||
if fast_update and not downloaded:
|
if fast_update and not downloaded:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
def posts_download_loop(self,
|
||||||
|
posts: Iterator[Post],
|
||||||
|
target: Union[str, Path],
|
||||||
|
fast_update: bool = False,
|
||||||
|
post_filter: Optional[Callable[[Post], bool]] = None,
|
||||||
|
max_count: Optional[int] = None,
|
||||||
|
total_count: Optional[int] = None) -> None:
|
||||||
|
"""
|
||||||
|
Download the Posts returned by given Post Iterator.
|
||||||
|
|
||||||
|
:param posts: Post Iterator to loop through.
|
||||||
|
:param target: Target name
|
||||||
|
:param fast_update: :option:`--fast-update`
|
||||||
|
:param post_filter: :option:`--post-filter`
|
||||||
|
:param max_count: Maximum count of Posts to download (:option:`--count`)
|
||||||
|
:param total_count: Total number of posts returned by given iterator
|
||||||
|
"""
|
||||||
|
for number, post in enumerate(posts):
|
||||||
|
if max_count is not None and number >= max_count:
|
||||||
|
break
|
||||||
|
if total_count is not None:
|
||||||
|
self.context.log("[{0:{w}d}/{1:{w}d}] ".format(number + 1, total_count,
|
||||||
|
w=len(str(total_count))),
|
||||||
|
end="", flush=True)
|
||||||
|
else:
|
||||||
|
if max_count is not None:
|
||||||
|
self.context.log("[{0:{w}d}/{1:{w}d}] ".format(number + 1, max_count,
|
||||||
|
w=len(str(max_count))),
|
||||||
|
end="", flush=True)
|
||||||
|
else:
|
||||||
|
self.context.log("[{:3d}] ".format(number + 1), end="", flush=True)
|
||||||
|
if post_filter is not None and not post_filter(post):
|
||||||
|
self.context.log("<{} skipped>".format(post))
|
||||||
|
continue
|
||||||
|
with self.context.error_catcher("Download {} of {}".format(post, target)):
|
||||||
|
# The PostChangedException gets raised if the Post's id/shortcode changed while obtaining
|
||||||
|
# additional metadata. This is most likely the case if a HTTP redirect takes place while
|
||||||
|
# resolving the shortcode URL.
|
||||||
|
# The `post_changed` variable keeps the fast-update functionality alive: A Post which is
|
||||||
|
# obained after a redirect has probably already been downloaded as a previous Post of the
|
||||||
|
# same Profile.
|
||||||
|
# Observed in issue #225: https://github.com/instaloader/instaloader/issues/225
|
||||||
|
post_changed = False
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
downloaded = self.download_post(post, target=target)
|
||||||
|
break
|
||||||
|
except PostChangedException:
|
||||||
|
post_changed = True
|
||||||
|
continue
|
||||||
|
if fast_update and not downloaded and not post_changed:
|
||||||
|
break
|
||||||
|
|
||||||
@_requires_login
|
@_requires_login
|
||||||
def get_feed_posts(self) -> Iterator[Post]:
|
def get_feed_posts(self) -> Iterator[Post]:
|
||||||
"""Get Posts of the user's feed.
|
"""Get Posts of the user's feed.
|
||||||
@ -733,20 +786,7 @@ class Instaloader:
|
|||||||
:raises LoginRequiredException: If called without being logged in.
|
:raises LoginRequiredException: If called without being logged in.
|
||||||
"""
|
"""
|
||||||
self.context.log("Retrieving pictures from your feed...")
|
self.context.log("Retrieving pictures from your feed...")
|
||||||
count = 1
|
self.posts_download_loop(self.get_feed_posts(), ":feed", fast_update, post_filter, max_count=max_count)
|
||||||
for post in self.get_feed_posts():
|
|
||||||
if max_count is not None and count > max_count:
|
|
||||||
break
|
|
||||||
name = post.owner_username
|
|
||||||
if post_filter is not None and not post_filter(post):
|
|
||||||
self.context.log("<pic by %s skipped>" % name, flush=True)
|
|
||||||
continue
|
|
||||||
self.context.log("[%3i] %s " % (count, name), end="", flush=True)
|
|
||||||
count += 1
|
|
||||||
with self.context.error_catcher('Download feed'):
|
|
||||||
downloaded = self.download_post(post, target=':feed')
|
|
||||||
if fast_update and not downloaded:
|
|
||||||
break
|
|
||||||
|
|
||||||
@_requires_login
|
@_requires_login
|
||||||
def download_saved_posts(self, max_count: int = None, fast_update: bool = False,
|
def download_saved_posts(self, max_count: int = None, fast_update: bool = False,
|
||||||
@ -759,20 +799,9 @@ class Instaloader:
|
|||||||
:raises LoginRequiredException: If called without being logged in.
|
:raises LoginRequiredException: If called without being logged in.
|
||||||
"""
|
"""
|
||||||
self.context.log("Retrieving saved posts...")
|
self.context.log("Retrieving saved posts...")
|
||||||
count = 1
|
assert self.context.username is not None # safe due to @_requires_login; required by typechecker
|
||||||
assert self.context.username is not None
|
self.posts_download_loop(Profile.from_username(self.context, self.context.username).get_saved_posts(), ":saved",
|
||||||
for post in Profile.from_username(self.context, self.context.username).get_saved_posts():
|
fast_update, post_filter, max_count=max_count)
|
||||||
if max_count is not None and count > max_count:
|
|
||||||
break
|
|
||||||
if post_filter is not None and not post_filter(post):
|
|
||||||
self.context.log("<{} skipped>".format(post), flush=True)
|
|
||||||
continue
|
|
||||||
self.context.log("[{:>3}] ".format(count), end=str(), flush=True)
|
|
||||||
count += 1
|
|
||||||
with self.context.error_catcher('Download saved posts'):
|
|
||||||
downloaded = self.download_post(post, target=':saved')
|
|
||||||
if fast_update and not downloaded:
|
|
||||||
break
|
|
||||||
|
|
||||||
@_requires_login
|
@_requires_login
|
||||||
def get_location_posts(self, location: str) -> Iterator[Post]:
|
def get_location_posts(self, location: str) -> Iterator[Post]:
|
||||||
@ -823,19 +852,8 @@ class Instaloader:
|
|||||||
Require being logged in (as required by Instagram)
|
Require being logged in (as required by Instagram)
|
||||||
"""
|
"""
|
||||||
self.context.log("Retrieving pictures for location {}...".format(location))
|
self.context.log("Retrieving pictures for location {}...".format(location))
|
||||||
count = 1
|
self.posts_download_loop(self.get_location_posts(location), "%" + location, fast_update, post_filter,
|
||||||
for post in self.get_location_posts(location):
|
max_count=max_count)
|
||||||
if max_count is not None and count > max_count:
|
|
||||||
break
|
|
||||||
self.context.log('[{0:3d}] %{1} '.format(count, location), end='', flush=True)
|
|
||||||
if post_filter is not None and not post_filter(post):
|
|
||||||
self.context.log('<skipped>')
|
|
||||||
continue
|
|
||||||
count += 1
|
|
||||||
with self.context.error_catcher('Download location {}'.format(location)):
|
|
||||||
downloaded = self.download_post(post, target='%' + location)
|
|
||||||
if fast_update and not downloaded:
|
|
||||||
break
|
|
||||||
|
|
||||||
@_requires_login
|
@_requires_login
|
||||||
def get_explore_posts(self) -> Iterator[Post]:
|
def get_explore_posts(self) -> Iterator[Post]:
|
||||||
@ -884,19 +902,8 @@ class Instaloader:
|
|||||||
"""
|
"""
|
||||||
hashtag = hashtag.lower()
|
hashtag = hashtag.lower()
|
||||||
self.context.log("Retrieving pictures with hashtag {}...".format(hashtag))
|
self.context.log("Retrieving pictures with hashtag {}...".format(hashtag))
|
||||||
count = 1
|
self.posts_download_loop(self.get_hashtag_posts(hashtag), "#" + hashtag, fast_update, post_filter,
|
||||||
for post in self.get_hashtag_posts(hashtag):
|
max_count=max_count)
|
||||||
if max_count is not None and count > max_count:
|
|
||||||
break
|
|
||||||
self.context.log('[{0:3d}] #{1} '.format(count, hashtag), end='', flush=True)
|
|
||||||
if post_filter is not None and not post_filter(post):
|
|
||||||
self.context.log('<skipped>')
|
|
||||||
continue
|
|
||||||
count += 1
|
|
||||||
with self.context.error_catcher('Download hashtag #{}'.format(hashtag)):
|
|
||||||
downloaded = self.download_post(post, target='#' + hashtag)
|
|
||||||
if fast_update and not downloaded:
|
|
||||||
break
|
|
||||||
|
|
||||||
def download_tagged(self, profile: Profile, fast_update: bool = False,
|
def download_tagged(self, profile: Profile, fast_update: bool = False,
|
||||||
target: Optional[str] = None,
|
target: Optional[str] = None,
|
||||||
@ -905,17 +912,9 @@ class Instaloader:
|
|||||||
|
|
||||||
.. versionadded:: 4.1"""
|
.. versionadded:: 4.1"""
|
||||||
self.context.log("Retrieving tagged posts for profile {}.".format(profile.username))
|
self.context.log("Retrieving tagged posts for profile {}.".format(profile.username))
|
||||||
count = 1
|
self.posts_download_loop(profile.get_tagged_posts(),
|
||||||
for post in profile.get_tagged_posts():
|
target if target else Path(profile.username) / Path(':tagged'), fast_update,
|
||||||
self.context.log("[%3i/???] " % (count), end="", flush=True)
|
post_filter)
|
||||||
count += 1
|
|
||||||
if post_filter is not None and not post_filter(post):
|
|
||||||
self.context.log('<{} skipped>'.format(post))
|
|
||||||
continue
|
|
||||||
with self.context.error_catcher('Download tagged {}'.format(profile.username)):
|
|
||||||
downloaded = self.download_post(post, target if target else Path(profile.username) / Path(':tagged'))
|
|
||||||
if fast_update and not downloaded:
|
|
||||||
break
|
|
||||||
|
|
||||||
def download_igtv(self, profile: Profile, fast_update: bool = False,
|
def download_igtv(self, profile: Profile, fast_update: bool = False,
|
||||||
post_filter: Optional[Callable[[Post], bool]] = None) -> None:
|
post_filter: Optional[Callable[[Post], bool]] = None) -> None:
|
||||||
@ -923,16 +922,8 @@ class Instaloader:
|
|||||||
|
|
||||||
.. versionadded:: 4.3"""
|
.. versionadded:: 4.3"""
|
||||||
self.context.log("Retrieving IGTV videos for profile {}.".format(profile.username))
|
self.context.log("Retrieving IGTV videos for profile {}.".format(profile.username))
|
||||||
for number, post in enumerate(profile.get_igtv_posts()):
|
self.posts_download_loop(profile.get_igtv_posts(), profile.username, fast_update, post_filter,
|
||||||
self.context.log("[{0:{w}d}/{1:{w}d}] ".format(number+1, profile.igtvcount, w=len(str(profile.igtvcount))),
|
total_count=profile.igtvcount)
|
||||||
end="", flush=True)
|
|
||||||
if post_filter is not None and not post_filter(post):
|
|
||||||
self.context.log('<{} skipped>'.format(post))
|
|
||||||
continue
|
|
||||||
with self.context.error_catcher('Download IGTV {}'.format(post.shortcode)):
|
|
||||||
downloaded = self.download_post(post, target=profile.username)
|
|
||||||
if fast_update and not downloaded:
|
|
||||||
break
|
|
||||||
|
|
||||||
def _get_id_filename(self, profile_name: str) -> str:
|
def _get_id_filename(self, profile_name: str) -> str:
|
||||||
if ((format_string_contains_key(self.dirname_pattern, 'profile') or
|
if ((format_string_contains_key(self.dirname_pattern, 'profile') or
|
||||||
@ -1084,32 +1075,8 @@ class Instaloader:
|
|||||||
# Iterate over pictures and download them
|
# Iterate over pictures and download them
|
||||||
if posts:
|
if posts:
|
||||||
self.context.log("Retrieving posts from profile {}.".format(profile_name))
|
self.context.log("Retrieving posts from profile {}.".format(profile_name))
|
||||||
totalcount = profile.mediacount
|
self.posts_download_loop(profile.get_posts(), profile_name, fast_update, post_filter,
|
||||||
count = 1
|
total_count=profile.mediacount)
|
||||||
for post in profile.get_posts():
|
|
||||||
self.context.log("[%3i/%3i] " % (count, totalcount), end="", flush=True)
|
|
||||||
count += 1
|
|
||||||
if post_filter is not None and not post_filter(post):
|
|
||||||
self.context.log('<skipped>')
|
|
||||||
continue
|
|
||||||
with self.context.error_catcher("Download {} of {}".format(post, profile_name)):
|
|
||||||
# The PostChangedException gets raised if the Post's id/shortcode changed while obtaining
|
|
||||||
# additional metadata. This is most likely the case if a HTTP redirect takes place while
|
|
||||||
# resolving the shortcode URL.
|
|
||||||
# The `post_changed` variable keeps the fast-update functionality alive: A Post which is
|
|
||||||
# obained after a redirect has probably already been downloaded as a previous Post of the
|
|
||||||
# same Profile.
|
|
||||||
# Observed in issue #225: https://github.com/instaloader/instaloader/issues/225
|
|
||||||
post_changed = False
|
|
||||||
while True:
|
|
||||||
try:
|
|
||||||
downloaded = self.download_post(post, target=profile_name)
|
|
||||||
break
|
|
||||||
except PostChangedException:
|
|
||||||
post_changed = True
|
|
||||||
continue
|
|
||||||
if fast_update and not downloaded and not post_changed:
|
|
||||||
break
|
|
||||||
|
|
||||||
if stories and profiles:
|
if stories and profiles:
|
||||||
with self.context.error_catcher("Download stories"):
|
with self.context.error_catcher("Download stories"):
|
||||||
@ -1188,18 +1155,8 @@ class Instaloader:
|
|||||||
|
|
||||||
# Iterate over pictures and download them
|
# Iterate over pictures and download them
|
||||||
self.context.log("Retrieving posts from profile {}.".format(profile_name))
|
self.context.log("Retrieving posts from profile {}.".format(profile_name))
|
||||||
totalcount = profile.mediacount
|
self.posts_download_loop(profile.get_posts(), profile_name, fast_update, post_filter,
|
||||||
count = 1
|
total_count=profile.mediacount)
|
||||||
for post in profile.get_posts():
|
|
||||||
self.context.log("[%3i/%3i] " % (count, totalcount), end="", flush=True)
|
|
||||||
count += 1
|
|
||||||
if post_filter is not None and not post_filter(post):
|
|
||||||
self.context.log('<skipped>')
|
|
||||||
continue
|
|
||||||
with self.context.error_catcher('Download profile {}'.format(profile_name)):
|
|
||||||
downloaded = self.download_post(post, target=profile_name)
|
|
||||||
if fast_update and not downloaded:
|
|
||||||
break
|
|
||||||
|
|
||||||
def check_if_committed(self, filename: str) -> bool:
|
def check_if_committed(self, filename: str) -> bool:
|
||||||
"""Checks to see if the current post has been committed.
|
"""Checks to see if the current post has been committed.
|
||||||
|
Loading…
Reference in New Issue
Block a user