diff --git a/docs/cli-options.rst b/docs/cli-options.rst index d84f6f0..3f7a4bb 100644 --- a/docs/cli-options.rst +++ b/docs/cli-options.rst @@ -238,7 +238,7 @@ How to Download .. option:: --user-agent USER_AGENT User Agent to use for HTTP requests. Per default, Instaloader pretends being - Chrome/51. + Chrome/88 on Linux. .. option:: --max-connection-attempts N diff --git a/instaloader/__init__.py b/instaloader/__init__.py index 7703197..15d438f 100644 --- a/instaloader/__init__.py +++ b/instaloader/__init__.py @@ -1,7 +1,7 @@ """Download pictures (or videos) along with their captions and other metadata from Instagram.""" -__version__ = '4.6' +__version__ = '4.6.1' try: diff --git a/instaloader/instaloadercontext.py b/instaloader/instaloadercontext.py index 1407e80..68c9b53 100644 --- a/instaloader/instaloadercontext.py +++ b/instaloader/instaloadercontext.py @@ -33,7 +33,7 @@ def copy_session(session: requests.Session, request_timeout: Optional[float] = N def default_user_agent() -> str: return 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 ' \ - '(KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36' + '(KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36' class InstaloaderContext: @@ -288,7 +288,7 @@ class InstaloaderContext: def do_sleep(self): """Sleep a short time if self.sleep is set. Called before each request to instagram.com.""" if self.sleep: - time.sleep(min(random.expovariate(0.7), 5.0)) + time.sleep(min(random.expovariate(0.6), 15.0)) def get_json(self, path: str, params: Dict[str, Any], host: str = 'www.instagram.com', session: Optional[requests.Session] = None, _attempt=1) -> Dict[str, Any]: @@ -549,6 +549,7 @@ class RateController: self._context = context self._query_timestamps = dict() # type: Dict[str, List[float]] self._earliest_next_request_time = 0.0 + self._iphone_earliest_next_request_time = 0.0 def sleep(self, secs: float): """Wait given number of seconds.""" @@ -559,7 +560,7 @@ class RateController: def _dump_query_timestamps(self, current_time: float, failed_query_type: str): windows = [10, 11, 20, 22, 30, 60] - self._context.error("Requests within last {} minutes grouped by type:" + self._context.error("Number of requests within last {} minutes grouped by type:" .format('/'.join(str(w) for w in windows)), repeat_at_end=False) for query_type, times in self._query_timestamps.items(): @@ -571,11 +572,15 @@ class RateController: ), repeat_at_end=False) def count_per_sliding_window(self, query_type: str) -> int: - """Return how many requests can be done within the sliding window.""" + """Return how many requests of the given type can be done within a sliding window of 11 minutes. + + This is called by :meth:`RateController.query_waittime` and allows to simply customize wait times before queries + at query_type granularity. Consider overriding :meth:`RateController.query_waittime` directly if you need more + control.""" # Not static, to allow for the count_per_sliding_window to depend on context-inherent properties, such as # whether we are logged in. # pylint:disable=no-self-use - return 75 if query_type in ['iphone', 'other'] else 200 + return 75 if query_type == 'other' else 200 def _reqs_in_sliding_window(self, query_type: Optional[str], current_time: float, window: float) -> List[float]: if query_type is not None: @@ -591,6 +596,7 @@ class RateController: def query_waittime(self, query_type: str, current_time: float, untracked_queries: bool = False) -> float: """Calculate time needed to wait before query can be executed.""" per_type_sliding_window = 660 + iphone_sliding_window = 1800 if query_type not in self._query_timestamps: self._query_timestamps[query_type] = [] self._query_timestamps[query_type] = list(filter(lambda t: t > current_time - 60 * 60, @@ -616,25 +622,43 @@ class RateController: def untracked_next_request_time(): if untracked_queries: - reqs_in_sliding_window = self._reqs_in_sliding_window(query_type, current_time, per_type_sliding_window) - self._earliest_next_request_time = min(reqs_in_sliding_window) + per_type_sliding_window + 6 - return self._earliest_next_request_time + if query_type == "iphone": + reqs_in_sliding_window = self._reqs_in_sliding_window(query_type, current_time, + iphone_sliding_window) + self._iphone_earliest_next_request_time = min(reqs_in_sliding_window) + iphone_sliding_window + 18 + else: + reqs_in_sliding_window = self._reqs_in_sliding_window(query_type, current_time, + per_type_sliding_window) + self._earliest_next_request_time = min(reqs_in_sliding_window) + per_type_sliding_window + 6 + return max(self._iphone_earliest_next_request_time, self._earliest_next_request_time) + + def iphone_next_request(): + if query_type == "iphone": + reqs_in_sliding_window = self._reqs_in_sliding_window(query_type, current_time, iphone_sliding_window) + if len(reqs_in_sliding_window) >= 199: + return min(reqs_in_sliding_window) + iphone_sliding_window + 18 + return 0.0 return max(0.0, max( per_type_next_request_time(), gql_accumulated_next_request_time(), untracked_next_request_time(), + iphone_next_request(), ) - current_time) def wait_before_query(self, query_type: str) -> None: - """This method is called before a query to Instagram. It calls :meth:`RateController.sleep` to wait - until the request can be made.""" + """This method is called before a query to Instagram. + + It calls :meth:`RateController.query_waittime` to determine the time needed to wait and then calls + :meth:`RateController.sleep` to wait until the request can be made.""" waittime = self.query_waittime(query_type, time.monotonic(), False) assert waittime >= 0 if waittime > 15: - self._context.log("\nToo many queries in the last time. Need to wait {} seconds, until {:%H:%M}." - .format(round(waittime), datetime.now() + timedelta(seconds=waittime))) + formatted_waittime = ("{} seconds".format(round(waittime)) if waittime <= 666 else + "{} minutes".format(round(waittime / 60))) + self._context.log("\nToo many queries in the last time. Need to wait {}, until {:%H:%M}." + .format(formatted_waittime, datetime.now() + timedelta(seconds=waittime))) if waittime > 0: self.sleep(waittime) if query_type not in self._query_timestamps: @@ -643,8 +667,10 @@ class RateController: self._query_timestamps[query_type].append(time.monotonic()) def handle_429(self, query_type: str) -> None: - """This method is called to handle a 429 Too Many Requests response. It calls :meth:`RateController.sleep` to - wait until we can repeat the same request.""" + """This method is called to handle a 429 Too Many Requests response. + + It calls :meth:`RateController.query_waittime` to determine the time needed to wait and then calls + :meth:`RateController.sleep` to wait until we can repeat the same request.""" current_time = time.monotonic() waittime = self.query_waittime(query_type, current_time, True) assert waittime >= 0 @@ -654,8 +680,10 @@ class RateController: "App while Instaloader is running.") self._context.error(textwrap.fill(text_for_429), repeat_at_end=False) if waittime > 1.5: - self._context.error("The request will be retried in {} seconds, at {:%H:%M}." - .format(round(waittime), datetime.now() + timedelta(seconds=waittime)), + formatted_waittime = ("{} seconds".format(round(waittime)) if waittime <= 666 else + "{} minutes".format(round(waittime / 60))) + self._context.error("The request will be retried in {}, at {:%H:%M}." + .format(formatted_waittime, datetime.now() + timedelta(seconds=waittime)), repeat_at_end=False) if waittime > 0: self.sleep(waittime)