From 815ba83d7775cc70db2dfed2f0a36676aab56c4b Mon Sep 17 00:00:00 2001 From: JeremyKj87 <94681296+JeremyKj87@users.noreply.github.com> Date: Sat, 27 Nov 2021 15:47:32 +0100 Subject: [PATCH 1/9] avoid comments endless loop - check if query_response is always the same (#1347) --- instaloader/nodeiterator.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/instaloader/nodeiterator.py b/instaloader/nodeiterator.py index 90d3937..df833a8 100644 --- a/instaloader/nodeiterator.py +++ b/instaloader/nodeiterator.py @@ -133,14 +133,15 @@ class NodeIterator(Iterator[T]): return item if self._data['page_info']['has_next_page']: query_response = self._query(self._data['page_info']['end_cursor']) - page_index, data = self._page_index, self._data - try: - self._page_index = 0 - self._data = query_response - except KeyboardInterrupt: - self._page_index, self._data = page_index, data - raise - return self.__next__() + if self._data['edges'] != query_response['edges']: + page_index, data = self._page_index, self._data + try: + self._page_index = 0 + self._data = query_response + except KeyboardInterrupt: + self._page_index, self._data = page_index, data + raise + return self.__next__() raise StopIteration() @property From 06574eb428880e02564ab7ea2c3149871e9e81b5 Mon Sep 17 00:00:00 2001 From: Alexander Graf <17130992+aandergr@users.noreply.github.com> Date: Sat, 27 Nov 2021 16:21:26 +0100 Subject: [PATCH 2/9] Fix KeyError on attempt to get incomplete location Fixes #1349. --- instaloader/instaloader.py | 9 ++++++--- instaloader/structures.py | 8 ++++---- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/instaloader/instaloader.py b/instaloader/instaloader.py index 5e4c2d2..c425093 100644 --- a/instaloader/instaloader.py +++ b/instaloader/instaloader.py @@ -453,9 +453,12 @@ class Instaloader: def save_location(self, filename: str, location: PostLocation, mtime: datetime) -> None: """Save post location name and Google Maps link.""" filename += '_location.txt' - location_string = (location.name + "\n" + - "https://maps.google.com/maps?q={0},{1}&ll={0},{1}\n".format(location.lat, - location.lng)) + if location.lat is not None and location.lng is not None: + location_string = (location.name + "\n" + + "https://maps.google.com/maps?q={0},{1}&ll={0},{1}\n".format(location.lat, + location.lng)) + else: + location_string = location.name with open(filename, 'wb') as text_file: with BytesIO(location_string.encode()) as bio: shutil.copyfileobj(cast(IO, bio), text_file) diff --git a/instaloader/structures.py b/instaloader/structures.py index f4ac793..59f0b31 100644 --- a/instaloader/structures.py +++ b/instaloader/structures.py @@ -35,8 +35,8 @@ PostLocation.id.__doc__ = "ID number of location." PostLocation.name.__doc__ = "Location name." PostLocation.slug.__doc__ = "URL friendly variant of location name." PostLocation.has_public_page.__doc__ = "Whether location has a public page." -PostLocation.lat.__doc__ = "Latitude (:class:`float`)." -PostLocation.lng.__doc__ = "Longitude (:class:`float`)." +PostLocation.lat.__doc__ = "Latitude (:class:`float` or None)." +PostLocation.lng.__doc__ = "Longitude (:class:`float` or None)." class Post: @@ -581,7 +581,7 @@ class Post: loc.update(self._context.get_json("explore/locations/{0}/".format(location_id), params={'__a': 1})['native_location_data']['location_info']) self._location = PostLocation(location_id, loc['name'], loc['slug'], loc['has_public_page'], - loc['lat'], loc['lng']) + loc.get('lat'), loc.get('lng')) return self._location @@ -1559,7 +1559,7 @@ class TopSearchResults: place = location.get('place', {}) slug = place.get('slug') loc = place.get('location', {}) - yield PostLocation(int(loc['pk']), loc['name'], slug, None, loc['lat'], loc['lng']) + yield PostLocation(int(loc['pk']), loc['name'], slug, None, loc.get('lat'), loc.get('lng')) def get_hashtag_strings(self) -> Iterator[str]: """ From 6343e9a69d3e5214963fd2a1878595904d643782 Mon Sep 17 00:00:00 2001 From: Alexander Graf <17130992+aandergr@users.noreply.github.com> Date: Sat, 27 Nov 2021 16:22:52 +0100 Subject: [PATCH 3/9] Release of Version 4.8.2 --- instaloader/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/instaloader/__init__.py b/instaloader/__init__.py index f3b81a8..215381a 100644 --- a/instaloader/__init__.py +++ b/instaloader/__init__.py @@ -1,7 +1,7 @@ """Download pictures (or videos) along with their captions and other metadata from Instagram.""" -__version__ = '4.8.1' +__version__ = '4.8.2' try: From 21365ec6688b55c91803040693c3b67cc8820cb8 Mon Sep 17 00:00:00 2001 From: Misael Date: Fri, 31 Dec 2021 08:50:16 -0700 Subject: [PATCH 4/9] Timezone option for datetime style formatting (#1316) Add timezone awareness for datetime objects returned by `date_local` and `date_utc`. Fixes #1305. --- instaloader/structures.py | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/instaloader/structures.py b/instaloader/structures.py index 59f0b31..5851a5d 100644 --- a/instaloader/structures.py +++ b/instaloader/structures.py @@ -3,7 +3,7 @@ import lzma import re from base64 import b64decode, b64encode from collections import namedtuple -from datetime import datetime +from datetime import datetime, timezone, timedelta from pathlib import Path from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple, Union @@ -225,16 +225,18 @@ class Post: @property def date_local(self) -> datetime: """Timestamp when the post was created (local time zone).""" - return datetime.fromtimestamp(self._node["date"] - if "date" in self._node - else self._node["taken_at_timestamp"]) + def get_timedelta(timestamp) -> timedelta: + """Timedelta for a given date""" + return datetime.fromtimestamp(timestamp) - datetime.utcfromtimestamp(timestamp) + + timestamp_date = self.get_timestamp_date_created() + tzinfo = timezone(get_timedelta(timestamp_date)) + return datetime.fromtimestamp(timestamp_date, tzinfo) @property def date_utc(self) -> datetime: """Timestamp when the post was created (UTC).""" - return datetime.utcfromtimestamp(self._node["date"] - if "date" in self._node - else self._node["taken_at_timestamp"]) + return datetime.utcfromtimestamp(self.get_timestamp_date_created()) @property def date(self) -> datetime: @@ -275,6 +277,12 @@ class Post: return len(edges) return 1 + def get_timestamp_date_created(self) -> float: + """Timestamp when the post was created""" + return (self._node["date"] + if "date" in self._node + else self._node["taken_at_timestamp"]) + def get_is_videos(self) -> List[bool]: """ Return a list containing the ``is_video`` property for each media in the post. @@ -1079,7 +1087,12 @@ class StoryItem: @property def date_local(self) -> datetime: """Timestamp when the StoryItem was created (local time zone).""" - return datetime.fromtimestamp(self._node['taken_at_timestamp']) + def get_timedelta(timestamp) -> timedelta: + """Timedelta for a given date""" + return datetime.fromtimestamp(timestamp) - datetime.utcfromtimestamp(timestamp) + + tzinfo = timezone(get_timedelta(self._node['taken_at_timestamp'])) + return datetime.fromtimestamp(self._node['taken_at_timestamp'], tzinfo) @property def date_utc(self) -> datetime: From 555c86633c61b106c5c3201e1d5f030a04a4f801 Mon Sep 17 00:00:00 2001 From: Eduardo Kalinowski Date: Fri, 7 Jan 2022 10:31:41 -0300 Subject: [PATCH 5/9] Remove unnecessary conversion to timezone aware timestamp (#1372) Post.date_local now already returns a timestamp with the appropriate timezone. --- instaloader/instaloader.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/instaloader/instaloader.py b/instaloader/instaloader.py index c425093..adfb156 100644 --- a/instaloader/instaloader.py +++ b/instaloader/instaloader.py @@ -811,7 +811,7 @@ class Instaloader: last_scraped = latest_stamps.get_last_story_timestamp(name) scraped_timestamp = datetime.now().astimezone() for item in user_story.get_items(): - if latest_stamps is not None and item.date_utc.replace(tzinfo=timezone.utc) <= last_scraped: + if latest_stamps is not None and item.date_local <= last_scraped: break if storyitem_filter is not None and not storyitem_filter(item): self.context.log("<{} skipped>".format(item), flush=True) @@ -1206,7 +1206,7 @@ class Instaloader: posts_takewhile: Optional[Callable[[Post], bool]] = None if latest_stamps is not None: last_scraped = latest_stamps.get_last_tagged_timestamp(profile.username) - posts_takewhile = lambda p: p.date_utc.replace(tzinfo=timezone.utc) > last_scraped + posts_takewhile = lambda p: p.date_local > last_scraped tagged_posts = profile.get_tagged_posts() self.posts_download_loop(tagged_posts, target if target @@ -1214,7 +1214,7 @@ class Instaloader: _PostPathFormatter.sanitize_path(':tagged')), fast_update, post_filter, takewhile=posts_takewhile) if latest_stamps is not None and tagged_posts.first_item is not None: - latest_stamps.set_last_tagged_timestamp(profile.username, tagged_posts.first_item.date_local.astimezone()) + latest_stamps.set_last_tagged_timestamp(profile.username, tagged_posts.first_item.date_local) def download_igtv(self, profile: Profile, fast_update: bool = False, post_filter: Optional[Callable[[Post], bool]] = None, @@ -1229,12 +1229,12 @@ class Instaloader: posts_takewhile: Optional[Callable[[Post], bool]] = None if latest_stamps is not None: last_scraped = latest_stamps.get_last_igtv_timestamp(profile.username) - posts_takewhile = lambda p: p.date_utc.replace(tzinfo=timezone.utc) > last_scraped + posts_takewhile = lambda p: p.date_local > last_scraped igtv_posts = profile.get_igtv_posts() self.posts_download_loop(igtv_posts, profile.username, fast_update, post_filter, total_count=profile.igtvcount, owner_profile=profile, takewhile=posts_takewhile) if latest_stamps is not None and igtv_posts.first_item is not None: - latest_stamps.set_last_igtv_timestamp(profile.username, igtv_posts.first_item.date_local.astimezone()) + latest_stamps.set_last_igtv_timestamp(profile.username, igtv_posts.first_item.date_local) def _get_id_filename(self, profile_name: str) -> str: if ((format_string_contains_key(self.dirname_pattern, 'profile') or @@ -1427,14 +1427,14 @@ class Instaloader: if latest_stamps is not None: # pylint:disable=cell-var-from-loop last_scraped = latest_stamps.get_last_post_timestamp(profile_name) - posts_takewhile = lambda p: p.date_utc.replace(tzinfo=timezone.utc) > last_scraped + posts_takewhile = lambda p: p.date_local > last_scraped posts_to_download = profile.get_posts() self.posts_download_loop(posts_to_download, profile_name, fast_update, post_filter, total_count=profile.mediacount, owner_profile=profile, takewhile=posts_takewhile) if latest_stamps is not None and posts_to_download.first_item is not None: latest_stamps.set_last_post_timestamp(profile_name, - posts_to_download.first_item.date_local.astimezone()) + posts_to_download.first_item.date_local) if stories and profiles: with self.context.error_catcher("Download stories"): From d864ce08ff43a2d86ac6123459bb9fa32b924657 Mon Sep 17 00:00:00 2001 From: Alexander Graf <17130992+aandergr@users.noreply.github.com> Date: Fri, 7 Jan 2022 14:49:27 +0100 Subject: [PATCH 6/9] Make {Post,StoryItem}.date_utc timezone aware Also simplify {Post,StoryItem}.date_local. Discussed in #1316. --- instaloader/structures.py | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/instaloader/structures.py b/instaloader/structures.py index 5851a5d..d749370 100644 --- a/instaloader/structures.py +++ b/instaloader/structures.py @@ -3,7 +3,7 @@ import lzma import re from base64 import b64decode, b64encode from collections import namedtuple -from datetime import datetime, timezone, timedelta +from datetime import datetime, timezone from pathlib import Path from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple, Union @@ -225,18 +225,12 @@ class Post: @property def date_local(self) -> datetime: """Timestamp when the post was created (local time zone).""" - def get_timedelta(timestamp) -> timedelta: - """Timedelta for a given date""" - return datetime.fromtimestamp(timestamp) - datetime.utcfromtimestamp(timestamp) - - timestamp_date = self.get_timestamp_date_created() - tzinfo = timezone(get_timedelta(timestamp_date)) - return datetime.fromtimestamp(timestamp_date, tzinfo) + return datetime.fromtimestamp(self.get_timestamp_date_created()).astimezone() @property def date_utc(self) -> datetime: """Timestamp when the post was created (UTC).""" - return datetime.utcfromtimestamp(self.get_timestamp_date_created()) + return datetime.utcfromtimestamp(self.get_timestamp_date_created()).replace(tzinfo=timezone.utc) @property def date(self) -> datetime: @@ -1087,17 +1081,12 @@ class StoryItem: @property def date_local(self) -> datetime: """Timestamp when the StoryItem was created (local time zone).""" - def get_timedelta(timestamp) -> timedelta: - """Timedelta for a given date""" - return datetime.fromtimestamp(timestamp) - datetime.utcfromtimestamp(timestamp) - - tzinfo = timezone(get_timedelta(self._node['taken_at_timestamp'])) - return datetime.fromtimestamp(self._node['taken_at_timestamp'], tzinfo) + return datetime.fromtimestamp(self._node['taken_at_timestamp']).astimezone() @property def date_utc(self) -> datetime: """Timestamp when the StoryItem was created (UTC).""" - return datetime.utcfromtimestamp(self._node['taken_at_timestamp']) + return datetime.utcfromtimestamp(self._node['taken_at_timestamp']).replace(tzinfo=timezone.utc) @property def date(self) -> datetime: From d6e5e310054aa42d3fd8d881389f04d1b4cdbe72 Mon Sep 17 00:00:00 2001 From: Alexander Graf <17130992+aandergr@users.noreply.github.com> Date: Fri, 7 Jan 2022 14:56:40 +0100 Subject: [PATCH 7/9] Release of Version 4.8.3 --- instaloader/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/instaloader/__init__.py b/instaloader/__init__.py index 215381a..9daf477 100644 --- a/instaloader/__init__.py +++ b/instaloader/__init__.py @@ -1,7 +1,7 @@ """Download pictures (or videos) along with their captions and other metadata from Instagram.""" -__version__ = '4.8.2' +__version__ = '4.8.3' try: From ca78fee307ed45651c598659947d44a281e7f352 Mon Sep 17 00:00:00 2001 From: Alexander Graf <17130992+aandergr@users.noreply.github.com> Date: Sat, 15 Jan 2022 17:05:41 +0100 Subject: [PATCH 8/9] Revert date_utc and date_local being timezone aware Fixes #1379. Revert "Make {Post,StoryItem}.date_utc timezone aware" d864ce08ff43a2d86ac6123459bb9fa32b924657. Revert "Remove unnecessary conversion to timezone aware timestamp (#1372)" 555c86633c61b106c5c3201e1d5f030a04a4f801. Revert "Timezone option for datetime style formatting (#1316)" 21365ec6688b55c91803040693c3b67cc8820cb8. --- instaloader/instaloader.py | 14 +++++++------- instaloader/structures.py | 20 +++++++++----------- 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/instaloader/instaloader.py b/instaloader/instaloader.py index adfb156..c425093 100644 --- a/instaloader/instaloader.py +++ b/instaloader/instaloader.py @@ -811,7 +811,7 @@ class Instaloader: last_scraped = latest_stamps.get_last_story_timestamp(name) scraped_timestamp = datetime.now().astimezone() for item in user_story.get_items(): - if latest_stamps is not None and item.date_local <= last_scraped: + if latest_stamps is not None and item.date_utc.replace(tzinfo=timezone.utc) <= last_scraped: break if storyitem_filter is not None and not storyitem_filter(item): self.context.log("<{} skipped>".format(item), flush=True) @@ -1206,7 +1206,7 @@ class Instaloader: posts_takewhile: Optional[Callable[[Post], bool]] = None if latest_stamps is not None: last_scraped = latest_stamps.get_last_tagged_timestamp(profile.username) - posts_takewhile = lambda p: p.date_local > last_scraped + posts_takewhile = lambda p: p.date_utc.replace(tzinfo=timezone.utc) > last_scraped tagged_posts = profile.get_tagged_posts() self.posts_download_loop(tagged_posts, target if target @@ -1214,7 +1214,7 @@ class Instaloader: _PostPathFormatter.sanitize_path(':tagged')), fast_update, post_filter, takewhile=posts_takewhile) if latest_stamps is not None and tagged_posts.first_item is not None: - latest_stamps.set_last_tagged_timestamp(profile.username, tagged_posts.first_item.date_local) + latest_stamps.set_last_tagged_timestamp(profile.username, tagged_posts.first_item.date_local.astimezone()) def download_igtv(self, profile: Profile, fast_update: bool = False, post_filter: Optional[Callable[[Post], bool]] = None, @@ -1229,12 +1229,12 @@ class Instaloader: posts_takewhile: Optional[Callable[[Post], bool]] = None if latest_stamps is not None: last_scraped = latest_stamps.get_last_igtv_timestamp(profile.username) - posts_takewhile = lambda p: p.date_local > last_scraped + posts_takewhile = lambda p: p.date_utc.replace(tzinfo=timezone.utc) > last_scraped igtv_posts = profile.get_igtv_posts() self.posts_download_loop(igtv_posts, profile.username, fast_update, post_filter, total_count=profile.igtvcount, owner_profile=profile, takewhile=posts_takewhile) if latest_stamps is not None and igtv_posts.first_item is not None: - latest_stamps.set_last_igtv_timestamp(profile.username, igtv_posts.first_item.date_local) + latest_stamps.set_last_igtv_timestamp(profile.username, igtv_posts.first_item.date_local.astimezone()) def _get_id_filename(self, profile_name: str) -> str: if ((format_string_contains_key(self.dirname_pattern, 'profile') or @@ -1427,14 +1427,14 @@ class Instaloader: if latest_stamps is not None: # pylint:disable=cell-var-from-loop last_scraped = latest_stamps.get_last_post_timestamp(profile_name) - posts_takewhile = lambda p: p.date_local > last_scraped + posts_takewhile = lambda p: p.date_utc.replace(tzinfo=timezone.utc) > last_scraped posts_to_download = profile.get_posts() self.posts_download_loop(posts_to_download, profile_name, fast_update, post_filter, total_count=profile.mediacount, owner_profile=profile, takewhile=posts_takewhile) if latest_stamps is not None and posts_to_download.first_item is not None: latest_stamps.set_last_post_timestamp(profile_name, - posts_to_download.first_item.date_local) + posts_to_download.first_item.date_local.astimezone()) if stories and profiles: with self.context.error_catcher("Download stories"): diff --git a/instaloader/structures.py b/instaloader/structures.py index d749370..59f0b31 100644 --- a/instaloader/structures.py +++ b/instaloader/structures.py @@ -3,7 +3,7 @@ import lzma import re from base64 import b64decode, b64encode from collections import namedtuple -from datetime import datetime, timezone +from datetime import datetime from pathlib import Path from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple, Union @@ -225,12 +225,16 @@ class Post: @property def date_local(self) -> datetime: """Timestamp when the post was created (local time zone).""" - return datetime.fromtimestamp(self.get_timestamp_date_created()).astimezone() + return datetime.fromtimestamp(self._node["date"] + if "date" in self._node + else self._node["taken_at_timestamp"]) @property def date_utc(self) -> datetime: """Timestamp when the post was created (UTC).""" - return datetime.utcfromtimestamp(self.get_timestamp_date_created()).replace(tzinfo=timezone.utc) + return datetime.utcfromtimestamp(self._node["date"] + if "date" in self._node + else self._node["taken_at_timestamp"]) @property def date(self) -> datetime: @@ -271,12 +275,6 @@ class Post: return len(edges) return 1 - def get_timestamp_date_created(self) -> float: - """Timestamp when the post was created""" - return (self._node["date"] - if "date" in self._node - else self._node["taken_at_timestamp"]) - def get_is_videos(self) -> List[bool]: """ Return a list containing the ``is_video`` property for each media in the post. @@ -1081,12 +1079,12 @@ class StoryItem: @property def date_local(self) -> datetime: """Timestamp when the StoryItem was created (local time zone).""" - return datetime.fromtimestamp(self._node['taken_at_timestamp']).astimezone() + return datetime.fromtimestamp(self._node['taken_at_timestamp']) @property def date_utc(self) -> datetime: """Timestamp when the StoryItem was created (UTC).""" - return datetime.utcfromtimestamp(self._node['taken_at_timestamp']).replace(tzinfo=timezone.utc) + return datetime.utcfromtimestamp(self._node['taken_at_timestamp']) @property def date(self) -> datetime: From fdcf2e136767cfefab5abada9143c8882aea9d41 Mon Sep 17 00:00:00 2001 From: Alexander Graf <17130992+aandergr@users.noreply.github.com> Date: Sat, 15 Jan 2022 17:11:45 +0100 Subject: [PATCH 9/9] Release of Version 4.8.4 --- instaloader/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/instaloader/__init__.py b/instaloader/__init__.py index 9daf477..47f493d 100644 --- a/instaloader/__init__.py +++ b/instaloader/__init__.py @@ -1,7 +1,7 @@ """Download pictures (or videos) along with their captions and other metadata from Instagram.""" -__version__ = '4.8.3' +__version__ = '4.8.4' try: