Limit GraphQl queries to 20 per 11 minutes
- Set GRAPHQL_PAGE_LENGTH to 50 what appears to be the new working maximum. - Limit GQL queries to 20 per 666 seconds. - Remove logic for tracking queries per query identifier as Instagram only allows 20 overall GQL queries per sliding window. Related to #101
This commit is contained in:
parent
eecaca7c7b
commit
d90c05e0a4
@ -460,7 +460,7 @@ class _PathPattern(str):
|
|||||||
|
|
||||||
|
|
||||||
class Instaloader:
|
class Instaloader:
|
||||||
GRAPHQL_PAGE_LENGTH = 12
|
GRAPHQL_PAGE_LENGTH = 50
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
sleep: bool = True, quiet: bool = False,
|
sleep: bool = True, quiet: bool = False,
|
||||||
@ -506,7 +506,7 @@ class Instaloader:
|
|||||||
self.error_log = []
|
self.error_log = []
|
||||||
|
|
||||||
# For the adaption of sleep intervals (rate control)
|
# For the adaption of sleep intervals (rate control)
|
||||||
self.previous_queries = dict()
|
self.query_timestamps = list()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def is_logged_in(self) -> bool:
|
def is_logged_in(self) -> bool:
|
||||||
@ -523,10 +523,10 @@ class Instaloader:
|
|||||||
self.download_geotags,
|
self.download_geotags,
|
||||||
self.save_captions, self.download_comments,
|
self.save_captions, self.download_comments,
|
||||||
self.save_metadata, self.max_connection_attempts)
|
self.save_metadata, self.max_connection_attempts)
|
||||||
new_loader.previous_queries = self.previous_queries
|
new_loader.query_timestamps = self.query_timestamps
|
||||||
yield new_loader
|
yield new_loader
|
||||||
self.error_log.extend(new_loader.error_log)
|
self.error_log.extend(new_loader.error_log)
|
||||||
self.previous_queries = new_loader.previous_queries
|
self.query_timestamps = new_loader.query_timestamps
|
||||||
|
|
||||||
def _log(self, *msg, sep='', end='\n', flush=False):
|
def _log(self, *msg, sep='', end='\n', flush=False):
|
||||||
"""Log a message to stdout that can be suppressed with --quiet."""
|
"""Log a message to stdout that can be suppressed with --quiet."""
|
||||||
@ -604,29 +604,25 @@ class Instaloader:
|
|||||||
:raises QueryReturnedNotFoundException: When the server responds with a 404.
|
:raises QueryReturnedNotFoundException: When the server responds with a 404.
|
||||||
:raises ConnectionException: When query repeatedly failed.
|
:raises ConnectionException: When query repeatedly failed.
|
||||||
"""
|
"""
|
||||||
def graphql_query_waittime(query_id: Union[int, str], untracked_queries: bool = False) -> int:
|
def graphql_query_waittime(untracked_queries: bool = False) -> int:
|
||||||
sliding_window = 660
|
sliding_window = 660
|
||||||
timestamps = self.previous_queries.get(query_id)
|
if not self.query_timestamps:
|
||||||
if not timestamps:
|
|
||||||
return sliding_window if untracked_queries else 0
|
return sliding_window if untracked_queries else 0
|
||||||
current_time = time.monotonic()
|
current_time = time.monotonic()
|
||||||
timestamps = list(filter(lambda t: t > current_time - sliding_window, timestamps))
|
self.query_timestamps = list(filter(lambda t: t > current_time - sliding_window, self.query_timestamps))
|
||||||
self.previous_queries[query_id] = timestamps
|
if len(self.query_timestamps) < 20 and not untracked_queries:
|
||||||
if len(timestamps) < 100 and not untracked_queries:
|
|
||||||
return 0
|
return 0
|
||||||
return round(min(timestamps) + sliding_window - current_time) + 6
|
return round(min(self.query_timestamps) + sliding_window - current_time) + 6
|
||||||
is_graphql_query = 'graphql/query' in path
|
is_graphql_query = 'graphql/query' in path
|
||||||
if is_graphql_query:
|
if is_graphql_query:
|
||||||
query_id = params['query_id'] if 'query_id' in params else params['query_hash']
|
waittime = graphql_query_waittime()
|
||||||
waittime = graphql_query_waittime(query_id)
|
|
||||||
if waittime > 0:
|
if waittime > 0:
|
||||||
self._log('\nToo many queries in the last time. Need to wait {} seconds.'.format(waittime))
|
self._log('\nToo many queries in the last time. Need to wait {} seconds.'.format(waittime))
|
||||||
time.sleep(waittime)
|
time.sleep(waittime)
|
||||||
timestamp_list = self.previous_queries.get(query_id)
|
if self.query_timestamps is not None:
|
||||||
if timestamp_list is not None:
|
self.query_timestamps.append(time.monotonic())
|
||||||
timestamp_list.append(time.monotonic())
|
|
||||||
else:
|
else:
|
||||||
self.previous_queries[query_id] = [time.monotonic()]
|
self.query_timestamps = [time.monotonic()]
|
||||||
sess = session if session else self.session
|
sess = session if session else self.session
|
||||||
try:
|
try:
|
||||||
self._sleep()
|
self._sleep()
|
||||||
@ -663,8 +659,7 @@ class Instaloader:
|
|||||||
if isinstance(err, TooManyRequests):
|
if isinstance(err, TooManyRequests):
|
||||||
print(textwrap.fill(text_for_429), file=sys.stderr)
|
print(textwrap.fill(text_for_429), file=sys.stderr)
|
||||||
if is_graphql_query:
|
if is_graphql_query:
|
||||||
query_id = params['query_id'] if 'query_id' in params else params['query_hash']
|
waittime = graphql_query_waittime(untracked_queries=True)
|
||||||
waittime = graphql_query_waittime(query_id, untracked_queries=True)
|
|
||||||
if waittime > 0:
|
if waittime > 0:
|
||||||
self._log('The request will be retried in {} seconds.'.format(waittime))
|
self._log('The request will be retried in {} seconds.'.format(waittime))
|
||||||
time.sleep(waittime)
|
time.sleep(waittime)
|
||||||
|
Loading…
Reference in New Issue
Block a user