diff --git a/docs/cli-options.rst b/docs/cli-options.rst index 1b8375b..1d4f310 100644 --- a/docs/cli-options.rst +++ b/docs/cli-options.rst @@ -230,6 +230,12 @@ How to Download to ``3``. If a connection fails, it can be manually skipped by hitting :kbd:`Control-c`. Set this to ``0`` to retry infinitely. +.. option:: --commit-mode + + Tries to ensure downloaded images avoid corruption in case of unexpected + interruption. If the last picture is corrupted, Instaloader will fix the + picture the next time it is run. + Miscellaneous Options ^^^^^^^^^^^^^^^^^^^^^ diff --git a/instaloader/__main__.py b/instaloader/__main__.py index ca54e88..9caf8bd 100644 --- a/instaloader/__main__.py +++ b/instaloader/__main__.py @@ -320,6 +320,10 @@ def main(): help='Maximum number of connection attempts until a request is aborted. Defaults to 3. If a ' 'connection fails, it can be manually skipped by hitting CTRL+C. Set this to 0 to retry ' 'infinitely.') + g_how.add_argument('--commit-mode', action='store_true', + help='Tries to ensure downloaded images avoid corruption in case of unexpected interruption. ' + 'If the last picture is corrupted, Instaloader will fix the picture the next time it is run. ' + 'Requires the JSON metadata to be saved.') g_misc = parser.add_argument_group('Miscellaneous Options') g_misc.add_argument('-q', '--quiet', action='store_true', @@ -361,6 +365,9 @@ def main(): download_posts = not (args.no_posts or args.stories_only or args.profile_pic_only) download_stories = args.stories or args.stories_only + if args.commit_mode and args.no_metadata_json: + raise SystemExit('--commit-mode requires JSON metadata to be saved.') + loader = Instaloader(sleep=not args.no_sleep, quiet=args.quiet, user_agent=args.user_agent, dirname_pattern=args.dirname_pattern, filename_pattern=args.filename_pattern, download_pictures=not args.no_pictures, @@ -371,7 +378,8 @@ def main(): post_metadata_txt_pattern=post_metadata_txt_pattern, storyitem_metadata_txt_pattern=storyitem_metadata_txt_pattern, graphql_rate_limit=args.graphql_rate_limit, - max_connection_attempts=args.max_connection_attempts) + max_connection_attempts=args.max_connection_attempts, + commit_mode=args.commit_mode) _main(loader, args.profile, username=args.login.lower() if args.login is not None else None, diff --git a/instaloader/instaloader.py b/instaloader/instaloader.py index 8c238dd..024d822 100644 --- a/instaloader/instaloader.py +++ b/instaloader/instaloader.py @@ -1,5 +1,6 @@ import getpass import json +import lzma import os import platform import re @@ -15,7 +16,7 @@ from typing import Any, Callable, Iterator, List, Optional, Set, Union from .exceptions import * from .instaloadercontext import InstaloaderContext -from .structures import Highlight, JsonExportable, Post, PostLocation, Profile, Story, StoryItem, save_structure_to_file +from .structures import Highlight, JsonExportable, Post, PostLocation, Profile, Story, StoryItem, save_structure_to_file, load_structure_from_file def get_default_session_filename(username: str) -> str: @@ -116,7 +117,8 @@ class Instaloader: post_metadata_txt_pattern: str = None, storyitem_metadata_txt_pattern: str = None, graphql_rate_limit: Optional[int] = None, - max_connection_attempts: int = 3): + max_connection_attempts: int = 3, + commit_mode: bool = False): self.context = InstaloaderContext(sleep, quiet, user_agent, graphql_rate_limit, max_connection_attempts) @@ -134,6 +136,12 @@ class Instaloader: else post_metadata_txt_pattern self.storyitem_metadata_txt_pattern = '' if storyitem_metadata_txt_pattern is None \ else storyitem_metadata_txt_pattern + self.commit_mode = commit_mode + if self.commit_mode and not self.save_metadata: + raise InvalidArgumentException("Commit mode requires JSON metadata to be saved.") + + # Used to keep state in commit mode + self._committed = None @contextmanager def anonymous_copy(self): @@ -173,9 +181,15 @@ class Instaloader: if filename_suffix is not None: filename += '_' + filename_suffix filename += '.' + file_extension - if os.path.isfile(filename): - self.context.log(filename + ' exists', end=' ', flush=True) - return False + # A post is considered "commited" if the json file exists and is not malformed. + if self.commit_mode: + if self._committed and os.path.isfile(filename): + self.context.log(filename + ' exists', end=' ', flush=True) + return False + else: + if os.path.isfile(filename): + self.context.log(filename + ' exists', end=' ', flush=True) + return False self.context.get_and_write_raw(url, filename) os.utime(filename, (datetime.now().timestamp(), mtime.timestamp())) return True @@ -359,6 +373,7 @@ class Instaloader: # Download the image(s) / video thumbnail and videos within sidecars if desired downloaded = True + self._committed = self.check_if_committed(filename) if self.download_pictures: if post.typename == 'GraphSidecar': edge_number = 1 @@ -945,6 +960,21 @@ class Instaloader: if fast_update and not downloaded: break + def check_if_committed(self, filename: str) -> bool: + """Checks to see if the current post has been committed.""" + # A post is considered committed if its json metadata file exists and is not malformed. + if os.path.isfile(filename + '.json.xz'): + filename += '.json.xz' + elif os.path.isfile(filename + '.json'): + filename += '.json' + else: + return False + try: + load_structure_from_file(self.context, filename) + return True + except (FileNotFoundError, lzma.LZMAError, json.decoder.JSONDecodeError): + return False + def interactive_login(self, username: str) -> None: """Logs in and internally stores session, asking user for password interactively.