diff --git a/instaloader/instaloader.py b/instaloader/instaloader.py index 2ed91fe..e83a64f 100644 --- a/instaloader/instaloader.py +++ b/instaloader/instaloader.py @@ -1203,14 +1203,14 @@ class Instaloader: if latest_stamps is not None: last_scraped = latest_stamps.get_last_tagged_timestamp(profile.username) posts_takewhile = lambda p: p.date_utc.replace(tzinfo=timezone.utc) > last_scraped - scraped_timestamp = datetime.now().astimezone() - self.posts_download_loop(profile.get_tagged_posts(), + tagged_posts = profile.get_tagged_posts() + self.posts_download_loop(tagged_posts, target if target else (Path(_PostPathFormatter.sanitize_path(profile.username)) / _PostPathFormatter.sanitize_path(':tagged')), fast_update, post_filter, takewhile=posts_takewhile) - if latest_stamps is not None: - latest_stamps.set_last_tagged_timestamp(profile.username, scraped_timestamp) + if latest_stamps is not None and tagged_posts.first_item is not None: + latest_stamps.set_last_tagged_timestamp(profile.username, tagged_posts.first_item.date_local.astimezone()) def download_igtv(self, profile: Profile, fast_update: bool = False, post_filter: Optional[Callable[[Post], bool]] = None, @@ -1226,11 +1226,11 @@ class Instaloader: if latest_stamps is not None: last_scraped = latest_stamps.get_last_igtv_timestamp(profile.username) posts_takewhile = lambda p: p.date_utc.replace(tzinfo=timezone.utc) > last_scraped - scraped_timestamp = datetime.now().astimezone() - self.posts_download_loop(profile.get_igtv_posts(), profile.username, fast_update, post_filter, + igtv_posts = profile.get_igtv_posts() + self.posts_download_loop(igtv_posts, profile.username, fast_update, post_filter, total_count=profile.igtvcount, owner_profile=profile, takewhile=posts_takewhile) - if latest_stamps is not None: - latest_stamps.set_last_igtv_timestamp(profile.username, scraped_timestamp) + if latest_stamps is not None and igtv_posts.first_item is not None: + latest_stamps.set_last_igtv_timestamp(profile.username, igtv_posts.first_item.date_local.astimezone()) def _get_id_filename(self, profile_name: str) -> str: if ((format_string_contains_key(self.dirname_pattern, 'profile') or @@ -1424,12 +1424,13 @@ class Instaloader: # pylint:disable=cell-var-from-loop last_scraped = latest_stamps.get_last_post_timestamp(profile_name) posts_takewhile = lambda p: p.date_utc.replace(tzinfo=timezone.utc) > last_scraped - scraped_timestamp = datetime.now().astimezone() - self.posts_download_loop(profile.get_posts(), profile_name, fast_update, post_filter, + posts_to_download = profile.get_posts() + self.posts_download_loop(posts_to_download, profile_name, fast_update, post_filter, total_count=profile.mediacount, owner_profile=profile, takewhile=posts_takewhile) - if latest_stamps is not None: - latest_stamps.set_last_post_timestamp(profile_name, scraped_timestamp) + if latest_stamps is not None and posts_to_download.first_item is not None: + latest_stamps.set_last_post_timestamp(profile_name, + posts_to_download.first_item.date_local.astimezone()) if stories and profiles: with self.context.error_catcher("Download stories"): diff --git a/instaloader/nodeiterator.py b/instaloader/nodeiterator.py index cb030b2..90d3937 100644 --- a/instaloader/nodeiterator.py +++ b/instaloader/nodeiterator.py @@ -17,7 +17,8 @@ FrozenNodeIterator = NamedTuple('FrozenNodeIterator', ('context_username', Optional[str]), ('total_index', int), ('best_before', Optional[float]), - ('remaining_data', Optional[Dict])]) + ('remaining_data', Optional[Dict]), + ('first_node', Optional[Dict])]) FrozenNodeIterator.query_hash.__doc__ = """The GraphQL ``query_hash`` parameter.""" FrozenNodeIterator.query_variables.__doc__ = """The GraphQL ``query_variables`` parameter.""" FrozenNodeIterator.query_referer.__doc__ = """The HTTP referer used for the GraphQL query.""" @@ -26,7 +27,7 @@ FrozenNodeIterator.total_index.__doc__ = """Number of items that have already be FrozenNodeIterator.best_before.__doc__ = """Date when parts of the stored nodes might have expired.""" FrozenNodeIterator.remaining_data.__doc__ = \ """The already-retrieved, yet-unprocessed ``edges`` and the ``page_info`` at time of freezing.""" - +FrozenNodeIterator.first_node.__doc__ = """Node data of the first item, if an item has been produced.""" T = TypeVar('T') @@ -89,6 +90,7 @@ class NodeIterator(Iterator[T]): self._best_before = datetime.now() + NodeIterator._shelf_life else: self._data = self._query() + self._first_node: Optional[Dict] = None def _query(self, after: Optional[str] = None) -> Dict: pagination_variables = {'first': NodeIterator._graphql_page_length} # type: Dict[str, Any] @@ -125,7 +127,10 @@ class NodeIterator(Iterator[T]): except KeyboardInterrupt: self._page_index, self._total_index = page_index, total_index raise - return self._node_wrapper(node) + item = self._node_wrapper(node) + if self._first_node is None: + self._first_node = node + return item if self._data['page_info']['has_next_page']: query_response = self._query(self._data['page_info']['end_cursor']) page_index, data = self._page_index, self._data @@ -157,6 +162,15 @@ class NodeIterator(Iterator[T]): ).encode()) return base64.urlsafe_b64encode(magic_hash.digest()).decode() + @property + def first_item(self) -> Optional[T]: + """ + If this iterator has produced any items, returns the first item produced. + + .. versionadded:: 4.8 + """ + return self._node_wrapper(self._first_node) if self._first_node is not None else None + def freeze(self) -> FrozenNodeIterator: """Freeze the iterator for later resuming.""" remaining_data = None @@ -171,6 +185,7 @@ class NodeIterator(Iterator[T]): total_index=max(self.total_index - 1, 0), best_before=self._best_before.timestamp() if self._best_before else None, remaining_data=remaining_data, + first_node=self._first_node, ) def thaw(self, frozen: FrozenNodeIterator) -> None: @@ -197,6 +212,8 @@ class NodeIterator(Iterator[T]): self._total_index = frozen.total_index self._best_before = datetime.fromtimestamp(frozen.best_before) self._data = frozen.remaining_data + if frozen.first_node is not None: + self._first_node = frozen.first_node @contextmanager diff --git a/instaloader/structures.py b/instaloader/structures.py index 7100c1f..de21e5c 100644 --- a/instaloader/structures.py +++ b/instaloader/structures.py @@ -1643,6 +1643,8 @@ def load_structure(context: InstaloaderContext, json_structure: dict) -> JsonExp elif node_type == "Hashtag": return Hashtag(context, json_structure['node']) elif node_type == "FrozenNodeIterator": + if not 'first_node' in json_structure['node']: + json_structure['node']['first_node'] = None return FrozenNodeIterator(**json_structure['node']) elif 'shortcode' in json_structure: # Post JSON created with Instaloader v3