More control about connection retry mechanism
- If when retrying a failed connection ^C is hit, the retrying is aborted, - Option --max-connection-attempts to override default of how many connection attempts should be made. Closes #51.
This commit is contained in:
parent
e0ed4cf16c
commit
a1920b97a4
@ -142,6 +142,12 @@ How to Download
|
||||
User Agent to use for HTTP requests. Per default, Instaloader pretends being
|
||||
Chrome/51.
|
||||
|
||||
.. option:: --max-connection-attempts N
|
||||
|
||||
Maximum number of connection attempts until a request is aborted. Defaults
|
||||
to ``3``. If a connection fails, it can by manually skipped by hitting
|
||||
:kbd:`Control-c`. Set this to ``0`` to retry infinitely.
|
||||
|
||||
Miscellaneous Options
|
||||
^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
|
@ -405,7 +405,8 @@ class Instaloader:
|
||||
download_geotags: Tristate = Tristate.no_extra_query,
|
||||
save_captions: Tristate = Tristate.no_extra_query,
|
||||
download_comments: Tristate = Tristate.no_extra_query,
|
||||
save_metadata: Tristate = Tristate.never):
|
||||
save_metadata: Tristate = Tristate.never,
|
||||
max_connection_attempts: int = 3):
|
||||
|
||||
# configuration parameters
|
||||
self.user_agent = user_agent if user_agent is not None else default_user_agent()
|
||||
@ -421,6 +422,7 @@ class Instaloader:
|
||||
self.save_captions = save_captions
|
||||
self.download_comments = download_comments
|
||||
self.save_metadata = save_metadata
|
||||
self.max_connection_attempts = max_connection_attempts
|
||||
|
||||
# error log, filled with error() and printed at the end of Instaloader.main()
|
||||
self.error_log = []
|
||||
@ -439,7 +441,8 @@ class Instaloader:
|
||||
new_loader = Instaloader(self.sleep, self.quiet, self.user_agent,
|
||||
self.dirname_pattern, self.filename_pattern,
|
||||
self.download_videos, self.download_geotags,
|
||||
self.save_captions, self.download_comments)
|
||||
self.save_captions, self.download_comments,
|
||||
self.save_metadata, self.max_connection_attempts)
|
||||
new_loader.previous_queries = self.previous_queries
|
||||
yield new_loader
|
||||
self.error_log.extend(new_loader.error_log)
|
||||
@ -477,7 +480,7 @@ class Instaloader:
|
||||
if self.sleep:
|
||||
time.sleep(random.uniform(0.5, 3))
|
||||
|
||||
def _get_and_write_raw(self, url: str, filename: str, tries: int = 3) -> None:
|
||||
def _get_and_write_raw(self, url: str, filename: str, _attempt = 1) -> None:
|
||||
"""Downloads raw data.
|
||||
|
||||
:raises QueryReturnedNotFoundException: When the server responds with a 404.
|
||||
@ -496,21 +499,23 @@ class Instaloader:
|
||||
raise ConnectionException("HTTP error code {}.".format(resp.status_code))
|
||||
except (urllib3.exceptions.HTTPError, requests.exceptions.RequestException, ConnectionException) as err:
|
||||
error_string = "URL {}: {}".format(url, err)
|
||||
if tries <= 1:
|
||||
if _attempt == self.max_connection_attempts:
|
||||
raise ConnectionException(error_string)
|
||||
self.error(error_string + " [retrying; skip with ^C]", repeat_at_end=False)
|
||||
try:
|
||||
self._sleep()
|
||||
self._get_and_write_raw(url, filename, _attempt + 1)
|
||||
except KeyboardInterrupt:
|
||||
self.error("[skipped by user]", repeat_at_end=False)
|
||||
raise ConnectionException(error_string)
|
||||
else:
|
||||
self.error(error_string + " [retrying]", repeat_at_end=False)
|
||||
self._sleep()
|
||||
self._get_and_write_raw(url, filename, tries - 1)
|
||||
|
||||
def get_json(self, url: str, params: Dict[str, Any],
|
||||
session: Optional[requests.Session] = None, tries: int = 3) -> Dict[str, Any]:
|
||||
session: Optional[requests.Session] = None, _attempt = 1) -> Dict[str, Any]:
|
||||
"""JSON request to Instagram.
|
||||
|
||||
:param url: URL, relative to www.instagram.com/
|
||||
:param params: GET parameters
|
||||
:param session: Session to use, or None to use self.session
|
||||
:param tries: Maximum number of attempts until an exception is raised
|
||||
:return: Decoded response dictionary
|
||||
:raises QueryReturnedNotFoundException: When the server responds with a 404.
|
||||
:raises ConnectionException: When query repeatedly failed.
|
||||
@ -558,21 +563,25 @@ class Instaloader:
|
||||
return resp_json
|
||||
except (ConnectionException, json.decoder.JSONDecodeError, requests.exceptions.RequestException) as err:
|
||||
error_string = "JSON Query to {}: {}".format(url, err)
|
||||
if tries <= 1:
|
||||
if _attempt == self.max_connection_attempts:
|
||||
raise ConnectionException(error_string)
|
||||
self.error(error_string + " [retrying; skip with ^C]", repeat_at_end=False)
|
||||
text_for_429 = ("HTTP error code 429 was returned because too many queries occured in the last time. "
|
||||
"Please do not use Instagram in your browser or run multiple instances of Instaloader "
|
||||
"in parallel.")
|
||||
try:
|
||||
if isinstance(err, TooManyRequests):
|
||||
print(textwrap.fill(text_for_429), file=sys.stderr)
|
||||
if is_graphql_query:
|
||||
waittime = graphql_query_waittime(query_id=params['query_id'], untracked_queries=True)
|
||||
if waittime > 0:
|
||||
self._log('The request will be retried in {} seconds.'.format(waittime))
|
||||
time.sleep(waittime)
|
||||
self._sleep()
|
||||
return self.get_json(url, params, sess, _attempt + 1)
|
||||
except KeyboardInterrupt:
|
||||
self.error("[skipped by user]", repeat_at_end=False)
|
||||
raise ConnectionException(error_string)
|
||||
self.error(error_string + " [retrying]", repeat_at_end=False)
|
||||
if isinstance(err, TooManyRequests):
|
||||
text_for_429 = ("HTTP error code 429 was returned because too many queries occured in the last time. "
|
||||
"Please do not use Instagram in your browser or run multiple instances of Instaloader "
|
||||
"in parallel.")
|
||||
print(textwrap.fill(text_for_429), file=sys.stderr)
|
||||
if is_graphql_query:
|
||||
waittime = graphql_query_waittime(query_id=params['query_id'], untracked_queries=True)
|
||||
if waittime > 0:
|
||||
self._log('The request will be retried in {} seconds.'.format(waittime))
|
||||
time.sleep(waittime)
|
||||
self._sleep()
|
||||
return self.get_json(url, params, sess, tries - 1)
|
||||
|
||||
def _default_http_header(self, empty_session_only: bool = False) -> Dict[str, str]:
|
||||
"""Returns default HTTP header we use for requests."""
|
||||
@ -1464,6 +1473,10 @@ def main():
|
||||
g_how.add_argument('--user-agent',
|
||||
help='User Agent to use for HTTP requests. Defaults to \'{}\'.'.format(default_user_agent()))
|
||||
g_how.add_argument('-S', '--no-sleep', action='store_true', help=SUPPRESS)
|
||||
g_how.add_argument('--max-connection-attempts', metavar='N', type=int, default=3,
|
||||
help='Maximum number of connection attempts until a request is aborted. Defaults to 3. If a '
|
||||
'connection fails, it can by manually skipped by hitting CTRL+C. Set this to 0 to retry '
|
||||
'infinitely.')
|
||||
|
||||
g_misc = parser.add_argument_group('Miscellaneous Options')
|
||||
g_misc.add_argument('-q', '--quiet', action='store_true',
|
||||
@ -1505,7 +1518,7 @@ def main():
|
||||
dirname_pattern=args.dirname_pattern, filename_pattern=args.filename_pattern,
|
||||
download_videos=download_videos, download_geotags=download_geotags,
|
||||
save_captions=save_captions, download_comments=download_comments,
|
||||
save_metadata=save_metadata)
|
||||
save_metadata=save_metadata, max_connection_attempts=args.max_connection_attempts)
|
||||
loader.main(args.profile, args.login.lower() if args.login is not None else None, args.password,
|
||||
args.sessionfile,
|
||||
int(args.count) if args.count is not None else None,
|
||||
|
Loading…
Reference in New Issue
Block a user