Flag --abort-on to abort on given status codes

Closes #920.
This commit is contained in:
Alexander Graf 2021-02-13 19:04:05 +01:00
parent e15d67c065
commit a2d756b177
7 changed files with 67 additions and 12 deletions

View File

@ -255,6 +255,17 @@ How to Download
.. versionchanged:: 4.6 .. versionchanged:: 4.6
Enabled this option by default with a timeout of 300 seconds. Enabled this option by default with a timeout of 300 seconds.
.. option:: --abort-on STATUS_CODE_LIST
Comma-separated list of HTTP status codes that cause Instaloader to abort,
bypassing all retry logic.
For example, with ``--abort-on=302,400,429``, Instaloader will stop if a
request is responded with a 302 redirect, a Bad Request error, or a Too Many
Requests error.
.. versionadded:: 4.7
Miscellaneous Options Miscellaneous Options
^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^

View File

@ -47,3 +47,5 @@ Exceptions
.. autoexception:: TooManyRequestsException .. autoexception:: TooManyRequestsException
.. autoexception:: AbortDownloadException

View File

@ -5,12 +5,12 @@ import datetime
import os import os
import re import re
import sys import sys
from argparse import ArgumentParser, SUPPRESS from argparse import ArgumentParser, ArgumentTypeError, SUPPRESS
from typing import List, Optional from typing import List, Optional
from . import (Instaloader, InstaloaderException, InvalidArgumentException, Post, Profile, ProfileNotExistsException, from . import (AbortDownloadException, BadCredentialsException, Instaloader, InstaloaderException,
StoryItem, __version__, load_structure_from_file, TwoFactorAuthRequiredException, InvalidArgumentException, Post, Profile, ProfileNotExistsException, StoryItem,
BadCredentialsException) TwoFactorAuthRequiredException, __version__, load_structure_from_file)
from .instaloader import get_default_session_filename from .instaloader import get_default_session_filename
from .instaloadercontext import default_user_agent from .instaloadercontext import default_user_agent
@ -27,6 +27,14 @@ def usage_string():
{0} --help""".format(argv0, len(argv0), '') {0} --help""".format(argv0, len(argv0), '')
def http_status_code_list(code_list_str: str) -> List[int]:
codes = [int(s) for s in code_list_str.split(',')]
for code in codes:
if not 100 <= code <= 599:
raise ArgumentTypeError("Invalid HTTP status code: {}".format(code))
return codes
def filterstr_to_filterfunc(filter_str: str, item_type: type): def filterstr_to_filterfunc(filter_str: str, item_type: type):
"""Takes an --post-filter=... or --storyitem-filter=... filter """Takes an --post-filter=... or --storyitem-filter=... filter
specification and makes a filter_func Callable out of it.""" specification and makes a filter_func Callable out of it."""
@ -213,6 +221,8 @@ def _main(instaloader: Instaloader, targetlist: List[str],
fast_update=fast_update, post_filter=post_filter) fast_update=fast_update, post_filter=post_filter)
except KeyboardInterrupt: except KeyboardInterrupt:
print("\nInterrupted by user.", file=sys.stderr) print("\nInterrupted by user.", file=sys.stderr)
except AbortDownloadException as exc:
print("\nDownload aborted: {}.".format(exc), file=sys.stderr)
# Save session if it is useful # Save session if it is useful
if instaloader.context.is_logged_in: if instaloader.context.is_logged_in:
instaloader.save_session_to_file(sessionfile) instaloader.save_session_to_file(sessionfile)
@ -369,6 +379,9 @@ def main():
g_how.add_argument('--commit-mode', action='store_true', help=SUPPRESS) g_how.add_argument('--commit-mode', action='store_true', help=SUPPRESS)
g_how.add_argument('--request-timeout', metavar='N', type=float, default=300.0, g_how.add_argument('--request-timeout', metavar='N', type=float, default=300.0,
help='Seconds to wait before timing out a connection request. Defaults to 300.') help='Seconds to wait before timing out a connection request. Defaults to 300.')
g_how.add_argument('--abort-on', type=http_status_code_list, metavar="STATUS_CODES",
help='Comma-separated list of HTTP status codes that cause Instaloader to abort, bypassing all '
'retry logic.')
g_misc = parser.add_argument_group('Miscellaneous Options') g_misc = parser.add_argument_group('Miscellaneous Options')
g_misc.add_argument('-q', '--quiet', action='store_true', g_misc.add_argument('-q', '--quiet', action='store_true',
@ -427,7 +440,8 @@ def main():
request_timeout=args.request_timeout, request_timeout=args.request_timeout,
resume_prefix=resume_prefix, resume_prefix=resume_prefix,
check_resume_bbd=not args.use_aged_resume_files, check_resume_bbd=not args.use_aged_resume_files,
slide=args.slide) slide=args.slide,
fatal_status_codes=args.abort_on)
_main(loader, _main(loader,
args.profile, args.profile,
username=args.login.lower() if args.login is not None else None, username=args.login.lower() if args.login is not None else None,

View File

@ -64,3 +64,15 @@ class QueryReturnedNotFoundException(ConnectionException):
class TooManyRequestsException(ConnectionException): class TooManyRequestsException(ConnectionException):
pass pass
class AbortDownloadException(Exception):
"""
Exception that is not catched in the error catchers inside the download loop and so aborts the
download loop.
This exception is not a subclass of ``InstaloaderException``.
.. versionadded:: 4.7
"""
pass

View File

@ -161,6 +161,7 @@ class Instaloader:
:param resume_prefix: :option:`--resume-prefix`, or None for :option:`--no-resume`. :param resume_prefix: :option:`--resume-prefix`, or None for :option:`--no-resume`.
:param check_resume_bbd: Whether to check the date of expiry of resume files and reject them if expired. :param check_resume_bbd: Whether to check the date of expiry of resume files and reject them if expired.
:param slide: :option:`--slide` :param slide: :option:`--slide`
:param fatal_status_codes: :option:`--abort-on`
.. attribute:: context .. attribute:: context
@ -187,10 +188,11 @@ class Instaloader:
rate_controller: Optional[Callable[[InstaloaderContext], RateController]] = None, rate_controller: Optional[Callable[[InstaloaderContext], RateController]] = None,
resume_prefix: Optional[str] = "iterator", resume_prefix: Optional[str] = "iterator",
check_resume_bbd: bool = True, check_resume_bbd: bool = True,
slide: Optional[str] = None): slide: Optional[str] = None,
fatal_status_codes: Optional[List[int]] = None):
self.context = InstaloaderContext(sleep, quiet, user_agent, max_connection_attempts, self.context = InstaloaderContext(sleep, quiet, user_agent, max_connection_attempts,
request_timeout, rate_controller) request_timeout, rate_controller, fatal_status_codes)
# configuration parameters # configuration parameters
self.dirname_pattern = dirname_pattern or "{target}" self.dirname_pattern = dirname_pattern or "{target}"
@ -256,7 +258,8 @@ class Instaloader:
request_timeout=self.context.request_timeout, request_timeout=self.context.request_timeout,
resume_prefix=self.resume_prefix, resume_prefix=self.resume_prefix,
check_resume_bbd=self.check_resume_bbd, check_resume_bbd=self.check_resume_bbd,
slide=self.slide) slide=self.slide,
fatal_status_codes=self.context.fatal_status_codes)
yield new_loader yield new_loader
self.context.error_log.extend(new_loader.context.error_log) self.context.error_log.extend(new_loader.context.error_log)
new_loader.context.error_log = [] # avoid double-printing of errors new_loader.context.error_log = [] # avoid double-printing of errors

View File

@ -53,7 +53,8 @@ class InstaloaderContext:
def __init__(self, sleep: bool = True, quiet: bool = False, user_agent: Optional[str] = None, def __init__(self, sleep: bool = True, quiet: bool = False, user_agent: Optional[str] = None,
max_connection_attempts: int = 3, request_timeout: float = 300.0, max_connection_attempts: int = 3, request_timeout: float = 300.0,
rate_controller: Optional[Callable[["InstaloaderContext"], "RateController"]] = None): rate_controller: Optional[Callable[["InstaloaderContext"], "RateController"]] = None,
fatal_status_codes: Optional[List[int]] = None):
self.user_agent = user_agent if user_agent is not None else default_user_agent() self.user_agent = user_agent if user_agent is not None else default_user_agent()
self.request_timeout = request_timeout self.request_timeout = request_timeout
@ -74,6 +75,9 @@ class InstaloaderContext:
# Can be set to True for testing, disables supression of InstaloaderContext._error_catcher # Can be set to True for testing, disables supression of InstaloaderContext._error_catcher
self.raise_all_errors = False self.raise_all_errors = False
# HTTP status codes that should cause an AbortDownloadException
self.fatal_status_codes = fatal_status_codes or []
# Cache profile from id (mapping from id to Profile) # Cache profile from id (mapping from id to Profile)
self.profile_id_cache = dict() # type: Dict[int, Any] self.profile_id_cache = dict() # type: Dict[int, Any]
@ -316,6 +320,11 @@ class InstaloaderContext:
if is_other_query: if is_other_query:
self._rate_controller.wait_before_query('other') self._rate_controller.wait_before_query('other')
resp = sess.get('https://{0}/{1}'.format(host, path), params=params, allow_redirects=False) resp = sess.get('https://{0}/{1}'.format(host, path), params=params, allow_redirects=False)
if resp.status_code in self.fatal_status_codes:
redirect = " redirect to {}".format(resp.headers['location']) if 'location' in resp.headers else ""
raise AbortDownloadException("Query to https://{}/{} responded with \"{} {}\"{}".format(
host, path, resp.status_code, resp.reason, redirect
))
while resp.is_redirect: while resp.is_redirect:
redirect_url = resp.headers['location'] redirect_url = resp.headers['location']
self.log('\nHTTP redirect from https://{0}/{1} to {2}'.format(host, path, redirect_url)) self.log('\nHTTP redirect from https://{0}/{1} to {2}'.format(host, path, redirect_url))

View File

@ -7,7 +7,7 @@ from datetime import datetime, timedelta
from lzma import LZMAError from lzma import LZMAError
from typing import Any, Callable, Dict, Iterator, NamedTuple, Optional, Tuple, TypeVar from typing import Any, Callable, Dict, Iterator, NamedTuple, Optional, Tuple, TypeVar
from .exceptions import InvalidArgumentException, QueryReturnedBadRequestException from .exceptions import AbortDownloadException, InvalidArgumentException, QueryReturnedBadRequestException
from .instaloadercontext import InstaloaderContext from .instaloadercontext import InstaloaderContext
FrozenNodeIterator = NamedTuple('FrozenNodeIterator', FrozenNodeIterator = NamedTuple('FrozenNodeIterator',
@ -211,7 +211,8 @@ def resumable_iteration(context: InstaloaderContext,
check_bbd: bool = True, check_bbd: bool = True,
enabled: bool = True) -> Iterator[Tuple[bool, int]]: enabled: bool = True) -> Iterator[Tuple[bool, int]]:
""" """
High-level context manager to handle a resumable iteration that can be interrupted with a KeyboardInterrupt. High-level context manager to handle a resumable iteration that can be interrupted
with a :class:`KeyboardInterrupt` or an :class:`AbortDownloadException`.
It can be used as follows to automatically load a previously-saved state into the iterator, save the iterator's It can be used as follows to automatically load a previously-saved state into the iterator, save the iterator's
state when interrupted, and delete the resume file upon completion:: state when interrupted, and delete the resume file upon completion::
@ -239,6 +240,9 @@ def resumable_iteration(context: InstaloaderContext,
:param format_path: Returns the path to the resume file for the given magic. :param format_path: Returns the path to the resume file for the given magic.
:param check_bbd: Whether to check the best before date and reject an expired FrozenNodeIterator. :param check_bbd: Whether to check the best before date and reject an expired FrozenNodeIterator.
:param enabled: Set to False to disable all functionality and simply execute the inner body. :param enabled: Set to False to disable all functionality and simply execute the inner body.
.. versionchanged:: 4.7
Also interrupt on :class:`AbortDownloadException`.
""" """
if not enabled or not isinstance(iterator, NodeIterator): if not enabled or not isinstance(iterator, NodeIterator):
yield False, 0 yield False, 0
@ -262,7 +266,7 @@ def resumable_iteration(context: InstaloaderContext,
context.error("Warning: Not resuming from {}: {}".format(resume_file_path, exc)) context.error("Warning: Not resuming from {}: {}".format(resume_file_path, exc))
try: try:
yield is_resuming, start_index yield is_resuming, start_index
except KeyboardInterrupt: except (KeyboardInterrupt, AbortDownloadException):
if os.path.dirname(resume_file_path): if os.path.dirname(resume_file_path):
os.makedirs(os.path.dirname(resume_file_path), exist_ok=True) os.makedirs(os.path.dirname(resume_file_path), exist_ok=True)
save(iterator.freeze(), resume_file_path) save(iterator.freeze(), resume_file_path)