From 30555b576c8363e6cec171466df063015e296cda Mon Sep 17 00:00:00 2001 From: sushilicious <*> Date: Mon, 18 Jun 2018 03:45:11 -0400 Subject: [PATCH 1/3] Added commit mode The commit mode ensures pictures are not corrupted when Instaloader is unexpectedly interrupted. In the case that the last picture is corrupted because of an interruption, Instaloader will redownload the picture. Since the metadata is the last object saved to disk, we can consider a post as "committed" if its json metadata file exists and is not malformed. Instaloader should download any posts which are not committed. Downside is commit mode requires metadata to be saved. --- docs/cli-options.rst | 6 ++++++ instaloader/__main__.py | 10 +++++++++- instaloader/instaloader.py | 37 ++++++++++++++++++++++++++++++++----- 3 files changed, 47 insertions(+), 6 deletions(-) diff --git a/docs/cli-options.rst b/docs/cli-options.rst index 1b8375b..1d4f310 100644 --- a/docs/cli-options.rst +++ b/docs/cli-options.rst @@ -230,6 +230,12 @@ How to Download to ``3``. If a connection fails, it can be manually skipped by hitting :kbd:`Control-c`. Set this to ``0`` to retry infinitely. +.. option:: --commit-mode + + Tries to ensure downloaded images avoid corruption in case of unexpected + interruption. If the last picture is corrupted, Instaloader will fix the + picture the next time it is run. + Miscellaneous Options ^^^^^^^^^^^^^^^^^^^^^ diff --git a/instaloader/__main__.py b/instaloader/__main__.py index ca54e88..f9029b8 100644 --- a/instaloader/__main__.py +++ b/instaloader/__main__.py @@ -320,6 +320,10 @@ def main(): help='Maximum number of connection attempts until a request is aborted. Defaults to 3. If a ' 'connection fails, it can be manually skipped by hitting CTRL+C. Set this to 0 to retry ' 'infinitely.') + g_how.add_argument('--commit-mode', action='store_true', + help='Tries to ensure downloaded images avoid corruption in case of unexpected interruption. ' + 'If the last picture is corrupted, Instaloader will fix the picture the next time it is run. ' + 'Requires the JSON metadata to be saved .') g_misc = parser.add_argument_group('Miscellaneous Options') g_misc.add_argument('-q', '--quiet', action='store_true', @@ -361,6 +365,9 @@ def main(): download_posts = not (args.no_posts or args.stories_only or args.profile_pic_only) download_stories = args.stories or args.stories_only + if args.commit_mode and args.no_metadata_json: + raise SystemExit('--commit-mode requires JSON metadata to be saved.') + loader = Instaloader(sleep=not args.no_sleep, quiet=args.quiet, user_agent=args.user_agent, dirname_pattern=args.dirname_pattern, filename_pattern=args.filename_pattern, download_pictures=not args.no_pictures, @@ -371,7 +378,8 @@ def main(): post_metadata_txt_pattern=post_metadata_txt_pattern, storyitem_metadata_txt_pattern=storyitem_metadata_txt_pattern, graphql_rate_limit=args.graphql_rate_limit, - max_connection_attempts=args.max_connection_attempts) + max_connection_attempts=args.max_connection_attempts, + commit_mode=args.commit_mode) _main(loader, args.profile, username=args.login.lower() if args.login is not None else None, diff --git a/instaloader/instaloader.py b/instaloader/instaloader.py index 8c238dd..b9a8075 100644 --- a/instaloader/instaloader.py +++ b/instaloader/instaloader.py @@ -1,5 +1,6 @@ import getpass import json +import lzma import os import platform import re @@ -15,7 +16,7 @@ from typing import Any, Callable, Iterator, List, Optional, Set, Union from .exceptions import * from .instaloadercontext import InstaloaderContext -from .structures import Highlight, JsonExportable, Post, PostLocation, Profile, Story, StoryItem, save_structure_to_file +from .structures import Highlight, JsonExportable, Post, PostLocation, Profile, Story, StoryItem, save_structure_to_file, load_structure_from_file def get_default_session_filename(username: str) -> str: @@ -116,7 +117,8 @@ class Instaloader: post_metadata_txt_pattern: str = None, storyitem_metadata_txt_pattern: str = None, graphql_rate_limit: Optional[int] = None, - max_connection_attempts: int = 3): + max_connection_attempts: int = 3, + commit_mode: bool = False): self.context = InstaloaderContext(sleep, quiet, user_agent, graphql_rate_limit, max_connection_attempts) @@ -134,6 +136,9 @@ class Instaloader: else post_metadata_txt_pattern self.storyitem_metadata_txt_pattern = '' if storyitem_metadata_txt_pattern is None \ else storyitem_metadata_txt_pattern + self.commit_mode = commit_mode + if self.commit_mode and not self.save_metadata: + raise InvalidArgumentException("Commit mode requires JSON metadata to be saved.") @contextmanager def anonymous_copy(self): @@ -173,9 +178,15 @@ class Instaloader: if filename_suffix is not None: filename += '_' + filename_suffix filename += '.' + file_extension - if os.path.isfile(filename): - self.context.log(filename + ' exists', end=' ', flush=True) - return False + # A post is considered "commited" if the json file exists and is not malformed. + if self.commit_mode: + if self._committed: + self.context.log(filename + ' exists', end=' ', flush=True) + return False + else: + if os.path.isfile(filename): + self.context.log(filename + ' exists', end=' ', flush=True) + return False self.context.get_and_write_raw(url, filename) os.utime(filename, (datetime.now().timestamp(), mtime.timestamp())) return True @@ -359,6 +370,7 @@ class Instaloader: # Download the image(s) / video thumbnail and videos within sidecars if desired downloaded = True + self._committed = self.check_if_committed(filename) if self.download_pictures: if post.typename == 'GraphSidecar': edge_number = 1 @@ -945,6 +957,21 @@ class Instaloader: if fast_update and not downloaded: break + def check_if_committed(self, filename: str) -> bool: + """Checks to see if the current post has been committed.""" + # A post is considered committed if its json metadata file exists and is not malformed. + if os.path.isfile(filename + '.json.xz'): + filename += '.json.xz' + elif os.path.isfile(filename + '.json'): + filename += '.json' + else: + return False + try: + load_structure_from_file(self.context, filename) + return True + except (FileNotFoundError, lzma.LZMAError, json.decoder.JSONDecodeError): + return False + def interactive_login(self, username: str) -> None: """Logs in and internally stores session, asking user for password interactively. From e753406a6c82620b827953a6a1bca8222cec446e Mon Sep 17 00:00:00 2001 From: sushilicious <*> Date: Mon, 18 Jun 2018 04:42:06 -0400 Subject: [PATCH 2/3] Just in case, check whether picture exists for commit This is in addition to the other commit logic. --- instaloader/instaloader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/instaloader/instaloader.py b/instaloader/instaloader.py index b9a8075..62ae227 100644 --- a/instaloader/instaloader.py +++ b/instaloader/instaloader.py @@ -180,7 +180,7 @@ class Instaloader: filename += '.' + file_extension # A post is considered "commited" if the json file exists and is not malformed. if self.commit_mode: - if self._committed: + if self._committed and os.path.isfile(filename): self.context.log(filename + ' exists', end=' ', flush=True) return False else: From ffda2265bdf00ebcd0024e9a8e73399263132923 Mon Sep 17 00:00:00 2001 From: sushilicious <*> Date: Fri, 22 Jun 2018 14:53:47 -0400 Subject: [PATCH 3/3] Added _committed to __init__ to make pylint shut up --- instaloader/instaloader.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/instaloader/instaloader.py b/instaloader/instaloader.py index 62ae227..024d822 100644 --- a/instaloader/instaloader.py +++ b/instaloader/instaloader.py @@ -140,6 +140,9 @@ class Instaloader: if self.commit_mode and not self.save_metadata: raise InvalidArgumentException("Commit mode requires JSON metadata to be saved.") + # Used to keep state in commit mode + self._committed = None + @contextmanager def anonymous_copy(self): """Yield an anonymous, otherwise equally-configured copy of an Instaloader instance; Then copy its error log."""