Merge branch 'master' of https://github.com/sushilicious/instaloader into v4.2-dev

This commit is contained in:
André Koch-Kramer 2018-12-13 15:40:08 +01:00
commit 6212d22e26
3 changed files with 50 additions and 6 deletions

View File

@ -230,6 +230,12 @@ How to Download
to ``3``. If a connection fails, it can be manually skipped by hitting to ``3``. If a connection fails, it can be manually skipped by hitting
:kbd:`Control-c`. Set this to ``0`` to retry infinitely. :kbd:`Control-c`. Set this to ``0`` to retry infinitely.
.. option:: --commit-mode
Tries to ensure downloaded images avoid corruption in case of unexpected
interruption. If the last picture is corrupted, Instaloader will fix the
picture the next time it is run.
Miscellaneous Options Miscellaneous Options
^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^

View File

@ -320,6 +320,10 @@ def main():
help='Maximum number of connection attempts until a request is aborted. Defaults to 3. If a ' help='Maximum number of connection attempts until a request is aborted. Defaults to 3. If a '
'connection fails, it can be manually skipped by hitting CTRL+C. Set this to 0 to retry ' 'connection fails, it can be manually skipped by hitting CTRL+C. Set this to 0 to retry '
'infinitely.') 'infinitely.')
g_how.add_argument('--commit-mode', action='store_true',
help='Tries to ensure downloaded images avoid corruption in case of unexpected interruption. '
'If the last picture is corrupted, Instaloader will fix the picture the next time it is run. '
'Requires the JSON metadata to be saved.')
g_misc = parser.add_argument_group('Miscellaneous Options') g_misc = parser.add_argument_group('Miscellaneous Options')
g_misc.add_argument('-q', '--quiet', action='store_true', g_misc.add_argument('-q', '--quiet', action='store_true',
@ -361,6 +365,9 @@ def main():
download_posts = not (args.no_posts or args.stories_only or args.profile_pic_only) download_posts = not (args.no_posts or args.stories_only or args.profile_pic_only)
download_stories = args.stories or args.stories_only download_stories = args.stories or args.stories_only
if args.commit_mode and args.no_metadata_json:
raise SystemExit('--commit-mode requires JSON metadata to be saved.')
loader = Instaloader(sleep=not args.no_sleep, quiet=args.quiet, user_agent=args.user_agent, loader = Instaloader(sleep=not args.no_sleep, quiet=args.quiet, user_agent=args.user_agent,
dirname_pattern=args.dirname_pattern, filename_pattern=args.filename_pattern, dirname_pattern=args.dirname_pattern, filename_pattern=args.filename_pattern,
download_pictures=not args.no_pictures, download_pictures=not args.no_pictures,
@ -371,7 +378,8 @@ def main():
post_metadata_txt_pattern=post_metadata_txt_pattern, post_metadata_txt_pattern=post_metadata_txt_pattern,
storyitem_metadata_txt_pattern=storyitem_metadata_txt_pattern, storyitem_metadata_txt_pattern=storyitem_metadata_txt_pattern,
graphql_rate_limit=args.graphql_rate_limit, graphql_rate_limit=args.graphql_rate_limit,
max_connection_attempts=args.max_connection_attempts) max_connection_attempts=args.max_connection_attempts,
commit_mode=args.commit_mode)
_main(loader, _main(loader,
args.profile, args.profile,
username=args.login.lower() if args.login is not None else None, username=args.login.lower() if args.login is not None else None,

View File

@ -1,5 +1,6 @@
import getpass import getpass
import json import json
import lzma
import os import os
import platform import platform
import re import re
@ -15,7 +16,7 @@ from typing import Any, Callable, Iterator, List, Optional, Set, Union
from .exceptions import * from .exceptions import *
from .instaloadercontext import InstaloaderContext from .instaloadercontext import InstaloaderContext
from .structures import Highlight, JsonExportable, Post, PostLocation, Profile, Story, StoryItem, save_structure_to_file from .structures import Highlight, JsonExportable, Post, PostLocation, Profile, Story, StoryItem, save_structure_to_file, load_structure_from_file
def get_default_session_filename(username: str) -> str: def get_default_session_filename(username: str) -> str:
@ -116,7 +117,8 @@ class Instaloader:
post_metadata_txt_pattern: str = None, post_metadata_txt_pattern: str = None,
storyitem_metadata_txt_pattern: str = None, storyitem_metadata_txt_pattern: str = None,
graphql_rate_limit: Optional[int] = None, graphql_rate_limit: Optional[int] = None,
max_connection_attempts: int = 3): max_connection_attempts: int = 3,
commit_mode: bool = False):
self.context = InstaloaderContext(sleep, quiet, user_agent, graphql_rate_limit, max_connection_attempts) self.context = InstaloaderContext(sleep, quiet, user_agent, graphql_rate_limit, max_connection_attempts)
@ -134,6 +136,12 @@ class Instaloader:
else post_metadata_txt_pattern else post_metadata_txt_pattern
self.storyitem_metadata_txt_pattern = '' if storyitem_metadata_txt_pattern is None \ self.storyitem_metadata_txt_pattern = '' if storyitem_metadata_txt_pattern is None \
else storyitem_metadata_txt_pattern else storyitem_metadata_txt_pattern
self.commit_mode = commit_mode
if self.commit_mode and not self.save_metadata:
raise InvalidArgumentException("Commit mode requires JSON metadata to be saved.")
# Used to keep state in commit mode
self._committed = None
@contextmanager @contextmanager
def anonymous_copy(self): def anonymous_copy(self):
@ -173,9 +181,15 @@ class Instaloader:
if filename_suffix is not None: if filename_suffix is not None:
filename += '_' + filename_suffix filename += '_' + filename_suffix
filename += '.' + file_extension filename += '.' + file_extension
if os.path.isfile(filename): # A post is considered "commited" if the json file exists and is not malformed.
self.context.log(filename + ' exists', end=' ', flush=True) if self.commit_mode:
return False if self._committed and os.path.isfile(filename):
self.context.log(filename + ' exists', end=' ', flush=True)
return False
else:
if os.path.isfile(filename):
self.context.log(filename + ' exists', end=' ', flush=True)
return False
self.context.get_and_write_raw(url, filename) self.context.get_and_write_raw(url, filename)
os.utime(filename, (datetime.now().timestamp(), mtime.timestamp())) os.utime(filename, (datetime.now().timestamp(), mtime.timestamp()))
return True return True
@ -359,6 +373,7 @@ class Instaloader:
# Download the image(s) / video thumbnail and videos within sidecars if desired # Download the image(s) / video thumbnail and videos within sidecars if desired
downloaded = True downloaded = True
self._committed = self.check_if_committed(filename)
if self.download_pictures: if self.download_pictures:
if post.typename == 'GraphSidecar': if post.typename == 'GraphSidecar':
edge_number = 1 edge_number = 1
@ -945,6 +960,21 @@ class Instaloader:
if fast_update and not downloaded: if fast_update and not downloaded:
break break
def check_if_committed(self, filename: str) -> bool:
"""Checks to see if the current post has been committed."""
# A post is considered committed if its json metadata file exists and is not malformed.
if os.path.isfile(filename + '.json.xz'):
filename += '.json.xz'
elif os.path.isfile(filename + '.json'):
filename += '.json'
else:
return False
try:
load_structure_from_file(self.context, filename)
return True
except (FileNotFoundError, lzma.LZMAError, json.decoder.JSONDecodeError):
return False
def interactive_login(self, username: str) -> None: def interactive_login(self, username: str) -> None:
"""Logs in and internally stores session, asking user for password interactively. """Logs in and internally stores session, asking user for password interactively.