From 30555b576c8363e6cec171466df063015e296cda Mon Sep 17 00:00:00 2001 From: sushilicious <*> Date: Mon, 18 Jun 2018 03:45:11 -0400 Subject: [PATCH] Added commit mode The commit mode ensures pictures are not corrupted when Instaloader is unexpectedly interrupted. In the case that the last picture is corrupted because of an interruption, Instaloader will redownload the picture. Since the metadata is the last object saved to disk, we can consider a post as "committed" if its json metadata file exists and is not malformed. Instaloader should download any posts which are not committed. Downside is commit mode requires metadata to be saved. --- docs/cli-options.rst | 6 ++++++ instaloader/__main__.py | 10 +++++++++- instaloader/instaloader.py | 37 ++++++++++++++++++++++++++++++++----- 3 files changed, 47 insertions(+), 6 deletions(-) diff --git a/docs/cli-options.rst b/docs/cli-options.rst index 1b8375b..1d4f310 100644 --- a/docs/cli-options.rst +++ b/docs/cli-options.rst @@ -230,6 +230,12 @@ How to Download to ``3``. If a connection fails, it can be manually skipped by hitting :kbd:`Control-c`. Set this to ``0`` to retry infinitely. +.. option:: --commit-mode + + Tries to ensure downloaded images avoid corruption in case of unexpected + interruption. If the last picture is corrupted, Instaloader will fix the + picture the next time it is run. + Miscellaneous Options ^^^^^^^^^^^^^^^^^^^^^ diff --git a/instaloader/__main__.py b/instaloader/__main__.py index ca54e88..f9029b8 100644 --- a/instaloader/__main__.py +++ b/instaloader/__main__.py @@ -320,6 +320,10 @@ def main(): help='Maximum number of connection attempts until a request is aborted. Defaults to 3. If a ' 'connection fails, it can be manually skipped by hitting CTRL+C. Set this to 0 to retry ' 'infinitely.') + g_how.add_argument('--commit-mode', action='store_true', + help='Tries to ensure downloaded images avoid corruption in case of unexpected interruption. ' + 'If the last picture is corrupted, Instaloader will fix the picture the next time it is run. ' + 'Requires the JSON metadata to be saved .') g_misc = parser.add_argument_group('Miscellaneous Options') g_misc.add_argument('-q', '--quiet', action='store_true', @@ -361,6 +365,9 @@ def main(): download_posts = not (args.no_posts or args.stories_only or args.profile_pic_only) download_stories = args.stories or args.stories_only + if args.commit_mode and args.no_metadata_json: + raise SystemExit('--commit-mode requires JSON metadata to be saved.') + loader = Instaloader(sleep=not args.no_sleep, quiet=args.quiet, user_agent=args.user_agent, dirname_pattern=args.dirname_pattern, filename_pattern=args.filename_pattern, download_pictures=not args.no_pictures, @@ -371,7 +378,8 @@ def main(): post_metadata_txt_pattern=post_metadata_txt_pattern, storyitem_metadata_txt_pattern=storyitem_metadata_txt_pattern, graphql_rate_limit=args.graphql_rate_limit, - max_connection_attempts=args.max_connection_attempts) + max_connection_attempts=args.max_connection_attempts, + commit_mode=args.commit_mode) _main(loader, args.profile, username=args.login.lower() if args.login is not None else None, diff --git a/instaloader/instaloader.py b/instaloader/instaloader.py index 8c238dd..b9a8075 100644 --- a/instaloader/instaloader.py +++ b/instaloader/instaloader.py @@ -1,5 +1,6 @@ import getpass import json +import lzma import os import platform import re @@ -15,7 +16,7 @@ from typing import Any, Callable, Iterator, List, Optional, Set, Union from .exceptions import * from .instaloadercontext import InstaloaderContext -from .structures import Highlight, JsonExportable, Post, PostLocation, Profile, Story, StoryItem, save_structure_to_file +from .structures import Highlight, JsonExportable, Post, PostLocation, Profile, Story, StoryItem, save_structure_to_file, load_structure_from_file def get_default_session_filename(username: str) -> str: @@ -116,7 +117,8 @@ class Instaloader: post_metadata_txt_pattern: str = None, storyitem_metadata_txt_pattern: str = None, graphql_rate_limit: Optional[int] = None, - max_connection_attempts: int = 3): + max_connection_attempts: int = 3, + commit_mode: bool = False): self.context = InstaloaderContext(sleep, quiet, user_agent, graphql_rate_limit, max_connection_attempts) @@ -134,6 +136,9 @@ class Instaloader: else post_metadata_txt_pattern self.storyitem_metadata_txt_pattern = '' if storyitem_metadata_txt_pattern is None \ else storyitem_metadata_txt_pattern + self.commit_mode = commit_mode + if self.commit_mode and not self.save_metadata: + raise InvalidArgumentException("Commit mode requires JSON metadata to be saved.") @contextmanager def anonymous_copy(self): @@ -173,9 +178,15 @@ class Instaloader: if filename_suffix is not None: filename += '_' + filename_suffix filename += '.' + file_extension - if os.path.isfile(filename): - self.context.log(filename + ' exists', end=' ', flush=True) - return False + # A post is considered "commited" if the json file exists and is not malformed. + if self.commit_mode: + if self._committed: + self.context.log(filename + ' exists', end=' ', flush=True) + return False + else: + if os.path.isfile(filename): + self.context.log(filename + ' exists', end=' ', flush=True) + return False self.context.get_and_write_raw(url, filename) os.utime(filename, (datetime.now().timestamp(), mtime.timestamp())) return True @@ -359,6 +370,7 @@ class Instaloader: # Download the image(s) / video thumbnail and videos within sidecars if desired downloaded = True + self._committed = self.check_if_committed(filename) if self.download_pictures: if post.typename == 'GraphSidecar': edge_number = 1 @@ -945,6 +957,21 @@ class Instaloader: if fast_update and not downloaded: break + def check_if_committed(self, filename: str) -> bool: + """Checks to see if the current post has been committed.""" + # A post is considered committed if its json metadata file exists and is not malformed. + if os.path.isfile(filename + '.json.xz'): + filename += '.json.xz' + elif os.path.isfile(filename + '.json'): + filename += '.json' + else: + return False + try: + load_structure_from_file(self.context, filename) + return True + except (FileNotFoundError, lzma.LZMAError, json.decoder.JSONDecodeError): + return False + def interactive_login(self, username: str) -> None: """Logs in and internally stores session, asking user for password interactively.