Merge branch 'master' into upcoming/v4.5
This commit is contained in:
commit
c817d1901a
4
docs/_static/instaloader.css
vendored
4
docs/_static/instaloader.css
vendored
@ -17,7 +17,9 @@ code {
|
|||||||
.highlight pre {
|
.highlight pre {
|
||||||
padding: 0.7em;
|
padding: 0.7em;
|
||||||
color: #fff; }
|
color: #fff; }
|
||||||
.highlight .c1, .highlight .k, .highlight .kn, .highlight .ow {
|
.highlight .c1 {
|
||||||
|
color: #666; }
|
||||||
|
.highlight .k, .highlight .kn, .highlight .ow {
|
||||||
color: #008d06; }
|
color: #008d06; }
|
||||||
.highlight .nb, .highlight .ne, .highlight .nf, .highlight .vm {
|
.highlight .nb, .highlight .ne, .highlight .nf, .highlight .vm {
|
||||||
color: #f48400; }
|
color: #f48400; }
|
||||||
|
6
docs/_static/instaloader.scss
vendored
6
docs/_static/instaloader.scss
vendored
@ -36,7 +36,11 @@ code {
|
|||||||
color: #fff;
|
color: #fff;
|
||||||
}
|
}
|
||||||
|
|
||||||
.c1, .k, .kn, .ow {
|
.c1 {
|
||||||
|
color: #666;
|
||||||
|
}
|
||||||
|
|
||||||
|
.k, .kn, .ow {
|
||||||
color: $color_instaloader_main
|
color: $color_instaloader_main
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -28,8 +28,9 @@ Download Posts in a Specific Period
|
|||||||
-----------------------------------
|
-----------------------------------
|
||||||
|
|
||||||
To only download Instagram pictures (and metadata) that are within a specific
|
To only download Instagram pictures (and metadata) that are within a specific
|
||||||
period, you can play around with :func:`~itertools.dropwhile` and
|
period, you can simply use :func:`~itertools.dropwhile` and
|
||||||
:func:`~itertools.takewhile` from :mod:`itertools` like in this snippet.
|
:func:`~itertools.takewhile` from :mod:`itertools` on a generator that returns
|
||||||
|
Posts in **exact chronological order**, such as :meth:`Profile.get_posts`.
|
||||||
|
|
||||||
.. literalinclude:: codesnippets/121_since_until.py
|
.. literalinclude:: codesnippets/121_since_until.py
|
||||||
|
|
||||||
@ -37,6 +38,14 @@ See also :class:`Post`, :meth:`Instaloader.download_post`.
|
|||||||
|
|
||||||
Discussed in :issue:`121`.
|
Discussed in :issue:`121`.
|
||||||
|
|
||||||
|
The code example with :func:`~itertools.dropwhile` and
|
||||||
|
:func:`~itertools.takewhile` makes the assumption that the post iterator returns
|
||||||
|
posts in exact chronological order. As discussed in :issue:`666`, the following
|
||||||
|
approach fits for an **almost chronological order**, where up to *k* older posts
|
||||||
|
are inserted into an otherwise chronological order, such as an Hashtag feed.
|
||||||
|
|
||||||
|
.. literalinclude:: codesnippets/666_historical_hashtag_data.py
|
||||||
|
|
||||||
Likes of a Profile / Ghost Followers
|
Likes of a Profile / Ghost Followers
|
||||||
------------------------------------
|
------------------------------------
|
||||||
|
|
||||||
|
@ -5,13 +5,11 @@ import instaloader
|
|||||||
|
|
||||||
L = instaloader.Instaloader()
|
L = instaloader.Instaloader()
|
||||||
|
|
||||||
posts = instaloader.Hashtag.from_name(L.context, 'urbanphotography').get_posts()
|
posts = instaloader.Profile.from_username(L.context, "instagram").get_posts()
|
||||||
# or
|
|
||||||
# posts = instaloader.Profile.from_username(L.context, PROFILE).get_posts()
|
|
||||||
|
|
||||||
SINCE = datetime(2015, 5, 1)
|
SINCE = datetime(2015, 5, 1)
|
||||||
UNTIL = datetime(2015, 3, 1)
|
UNTIL = datetime(2015, 3, 1)
|
||||||
|
|
||||||
for post in takewhile(lambda p: p.date > UNTIL, dropwhile(lambda p: p.date > SINCE, posts)):
|
for post in takewhile(lambda p: p.date > UNTIL, dropwhile(lambda p: p.date > SINCE, posts)):
|
||||||
print(post.date)
|
print(post.date)
|
||||||
L.download_post(post, '#urbanphotography')
|
L.download_post(post, "instagram")
|
||||||
|
30
docs/codesnippets/666_historical_hashtag_data.py
Normal file
30
docs/codesnippets/666_historical_hashtag_data.py
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
from datetime import datetime
|
||||||
|
import instaloader
|
||||||
|
|
||||||
|
L = instaloader.Instaloader()
|
||||||
|
|
||||||
|
posts = instaloader.Hashtag.from_name(L.context, "urbanphotography").get_posts()
|
||||||
|
|
||||||
|
SINCE = datetime(2020, 5, 10) # further from today, inclusive
|
||||||
|
UNTIL = datetime(2020, 5, 11) # closer to today, not inclusive
|
||||||
|
|
||||||
|
k = 0 # initiate k
|
||||||
|
k_list = [] # uncomment this to tune k
|
||||||
|
|
||||||
|
for post in posts:
|
||||||
|
postdate = post.date
|
||||||
|
|
||||||
|
if postdate > UNTIL:
|
||||||
|
continue
|
||||||
|
elif postdate <= SINCE:
|
||||||
|
k += 1
|
||||||
|
if k == 50:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
L.download_post(post, "#urbanphotography")
|
||||||
|
k = 0 # set k to 0
|
||||||
|
# if you want to tune k, uncomment below to get your k max
|
||||||
|
#k_list.append(k)
|
||||||
|
#max(k_list)
|
@ -1,7 +1,7 @@
|
|||||||
"""Download pictures (or videos) along with their captions and other metadata from Instagram."""
|
"""Download pictures (or videos) along with their captions and other metadata from Instagram."""
|
||||||
|
|
||||||
|
|
||||||
__version__ = '4.4.4'
|
__version__ = '4.4.5'
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -69,7 +69,6 @@ class Post:
|
|||||||
self._node = node
|
self._node = node
|
||||||
self._owner_profile = owner_profile
|
self._owner_profile = owner_profile
|
||||||
self._full_metadata_dict = None # type: Optional[Dict[str, Any]]
|
self._full_metadata_dict = None # type: Optional[Dict[str, Any]]
|
||||||
self._rhx_gis_str = None # type: Optional[str]
|
|
||||||
self._location = None # type: Optional[PostLocation]
|
self._location = None # type: Optional[PostLocation]
|
||||||
self._iphone_struct_ = None
|
self._iphone_struct_ = None
|
||||||
if 'iphone_struct' in node:
|
if 'iphone_struct' in node:
|
||||||
@ -142,9 +141,11 @@ class Post:
|
|||||||
|
|
||||||
def _obtain_metadata(self):
|
def _obtain_metadata(self):
|
||||||
if not self._full_metadata_dict:
|
if not self._full_metadata_dict:
|
||||||
pic_json = self._context.get_json("p/{0}/".format(self.shortcode), params={})
|
pic_json = self._context.graphql_query(
|
||||||
self._full_metadata_dict = pic_json['entry_data']['PostPage'][0]['graphql']['shortcode_media']
|
'2b0673e0dc4580674a88d426fe00ea90',
|
||||||
self._rhx_gis_str = pic_json.get('rhx_gis')
|
{'shortcode': self.shortcode}
|
||||||
|
)
|
||||||
|
self._full_metadata_dict = pic_json['data']['shortcode_media']
|
||||||
if self._full_metadata_dict is None:
|
if self._full_metadata_dict is None:
|
||||||
# issue #449
|
# issue #449
|
||||||
self._context.error("Fetching Post metadata failed (issue #449). "
|
self._context.error("Fetching Post metadata failed (issue #449). "
|
||||||
@ -161,11 +162,6 @@ class Post:
|
|||||||
assert self._full_metadata_dict is not None
|
assert self._full_metadata_dict is not None
|
||||||
return self._full_metadata_dict
|
return self._full_metadata_dict
|
||||||
|
|
||||||
@property
|
|
||||||
def _rhx_gis(self) -> Optional[str]:
|
|
||||||
self._obtain_metadata()
|
|
||||||
return self._rhx_gis_str
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def _iphone_struct(self) -> Dict[str, Any]:
|
def _iphone_struct(self) -> Dict[str, Any]:
|
||||||
if not self._context.is_logged_in:
|
if not self._context.is_logged_in:
|
||||||
@ -392,7 +388,7 @@ class Post:
|
|||||||
created_at_utc=datetime.utcfromtimestamp(node['created_at']),
|
created_at_utc=datetime.utcfromtimestamp(node['created_at']),
|
||||||
text=node['text'],
|
text=node['text'],
|
||||||
owner=Profile(self._context, node['owner']),
|
owner=Profile(self._context, node['owner']),
|
||||||
likes_count=node['edge_liked_by']['count'])
|
likes_count=node.get('edge_liked_by', {}).get('count', 0))
|
||||||
|
|
||||||
def _postcommentanswers(node):
|
def _postcommentanswers(node):
|
||||||
if 'edge_threaded_comments' not in node:
|
if 'edge_threaded_comments' not in node:
|
||||||
@ -418,14 +414,9 @@ class Post:
|
|||||||
if self.comments == 0:
|
if self.comments == 0:
|
||||||
# Avoid doing additional requests if there are no comments
|
# Avoid doing additional requests if there are no comments
|
||||||
return
|
return
|
||||||
try:
|
|
||||||
comment_edges = self._field('edge_media_to_parent_comment', 'edges')
|
comment_edges = self._field('edge_media_to_comment', 'edges')
|
||||||
answers_count = sum([edge['node']['edge_threaded_comments']['count'] for edge in comment_edges])
|
answers_count = sum([edge['node'].get('edge_threaded_comments', {}).get('count', 0) for edge in comment_edges])
|
||||||
threaded_comments_available = True
|
|
||||||
except KeyError:
|
|
||||||
comment_edges = self._field('edge_media_to_comment', 'edges')
|
|
||||||
answers_count = 0
|
|
||||||
threaded_comments_available = False
|
|
||||||
|
|
||||||
if self.comments == len(comment_edges) + answers_count:
|
if self.comments == len(comment_edges) + answers_count:
|
||||||
# If the Post's metadata already contains all parent comments, don't do GraphQL requests to obtain them
|
# If the Post's metadata already contains all parent comments, don't do GraphQL requests to obtain them
|
||||||
@ -433,14 +424,10 @@ class Post:
|
|||||||
return
|
return
|
||||||
yield from (_postcomment(node) for node in
|
yield from (_postcomment(node) for node in
|
||||||
self._context.graphql_node_list(
|
self._context.graphql_node_list(
|
||||||
"97b41c52301f77ce508f55e66d17620e" if threaded_comments_available
|
"97b41c52301f77ce508f55e66d17620e",
|
||||||
else "f0986789a5c5d17c2400faebf16efd0d",
|
|
||||||
{'shortcode': self.shortcode},
|
{'shortcode': self.shortcode},
|
||||||
'https://www.instagram.com/p/' + self.shortcode + '/',
|
'https://www.instagram.com/p/' + self.shortcode + '/',
|
||||||
lambda d:
|
lambda d: d['data']['shortcode_media']['edge_media_to_parent_comment']))
|
||||||
d['data']['shortcode_media'][
|
|
||||||
'edge_media_to_parent_comment' if threaded_comments_available else 'edge_media_to_comment'],
|
|
||||||
self._rhx_gis))
|
|
||||||
|
|
||||||
def get_likes(self) -> Iterator['Profile']:
|
def get_likes(self) -> Iterator['Profile']:
|
||||||
"""Iterate over all likes of the post. A :class:`Profile` instance of each likee is yielded."""
|
"""Iterate over all likes of the post. A :class:`Profile` instance of each likee is yielded."""
|
||||||
@ -455,8 +442,7 @@ class Post:
|
|||||||
yield from (Profile(self._context, node) for node in
|
yield from (Profile(self._context, node) for node in
|
||||||
self._context.graphql_node_list("1cb6ec562846122743b61e492c85999f", {'shortcode': self.shortcode},
|
self._context.graphql_node_list("1cb6ec562846122743b61e492c85999f", {'shortcode': self.shortcode},
|
||||||
'https://www.instagram.com/p/' + self.shortcode + '/',
|
'https://www.instagram.com/p/' + self.shortcode + '/',
|
||||||
lambda d: d['data']['shortcode_media']['edge_liked_by'],
|
lambda d: d['data']['shortcode_media']['edge_liked_by']))
|
||||||
self._rhx_gis))
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def is_sponsored(self) -> bool:
|
def is_sponsored(self) -> bool:
|
||||||
@ -537,7 +523,6 @@ class Profile:
|
|||||||
self._has_public_story = None # type: Optional[bool]
|
self._has_public_story = None # type: Optional[bool]
|
||||||
self._node = node
|
self._node = node
|
||||||
self._has_full_metadata = False
|
self._has_full_metadata = False
|
||||||
self._rhx_gis = None
|
|
||||||
self._iphone_struct_ = None
|
self._iphone_struct_ = None
|
||||||
if 'iphone_struct' in node:
|
if 'iphone_struct' in node:
|
||||||
# if loaded from JSON with load_structure_from_file()
|
# if loaded from JSON with load_structure_from_file()
|
||||||
@ -599,10 +584,9 @@ class Profile:
|
|||||||
def _obtain_metadata(self):
|
def _obtain_metadata(self):
|
||||||
try:
|
try:
|
||||||
if not self._has_full_metadata:
|
if not self._has_full_metadata:
|
||||||
metadata = self._context.get_json('{}/'.format(self.username), params={})
|
metadata = self._context.get_json('{}/feed/'.format(self.username), params={})
|
||||||
self._node = metadata['entry_data']['ProfilePage'][0]['graphql']['user']
|
self._node = metadata['entry_data']['ProfilePage'][0]['graphql']['user']
|
||||||
self._has_full_metadata = True
|
self._has_full_metadata = True
|
||||||
self._rhx_gis = metadata.get('rhx_gis')
|
|
||||||
except (QueryReturnedNotFoundException, KeyError) as err:
|
except (QueryReturnedNotFoundException, KeyError) as err:
|
||||||
top_search_results = TopSearchResults(self._context, self.username)
|
top_search_results = TopSearchResults(self._context, self.username)
|
||||||
similar_profiles = [profile.username for profile in top_search_results.get_profiles()]
|
similar_profiles = [profile.username for profile in top_search_results.get_profiles()]
|
||||||
@ -735,8 +719,7 @@ class Profile:
|
|||||||
'include_reel': False, 'include_suggested_users': False,
|
'include_reel': False, 'include_suggested_users': False,
|
||||||
'include_logged_out_extras': True,
|
'include_logged_out_extras': True,
|
||||||
'include_highlight_reels': False},
|
'include_highlight_reels': False},
|
||||||
'https://www.instagram.com/{}/'.format(self.username),
|
'https://www.instagram.com/{}/'.format(self.username))
|
||||||
self._rhx_gis)
|
|
||||||
self._has_public_story = data['data']['user']['has_public_story']
|
self._has_public_story = data['data']['user']['has_public_story']
|
||||||
assert self._has_public_story is not None
|
assert self._has_public_story is not None
|
||||||
return self._has_public_story
|
return self._has_public_story
|
||||||
@ -795,8 +778,7 @@ class Profile:
|
|||||||
{'id': self.userid},
|
{'id': self.userid},
|
||||||
'https://www.instagram.com/{0}/'.format(self.username),
|
'https://www.instagram.com/{0}/'.format(self.username),
|
||||||
lambda d: d['data']['user']['edge_owner_to_timeline_media'],
|
lambda d: d['data']['user']['edge_owner_to_timeline_media'],
|
||||||
self._rhx_gis,
|
first_data=self._metadata('edge_owner_to_timeline_media')))
|
||||||
self._metadata('edge_owner_to_timeline_media')))
|
|
||||||
|
|
||||||
def get_saved_posts(self) -> Iterator[Post]:
|
def get_saved_posts(self) -> Iterator[Post]:
|
||||||
"""Get Posts that are marked as saved by the user."""
|
"""Get Posts that are marked as saved by the user."""
|
||||||
@ -810,8 +792,7 @@ class Profile:
|
|||||||
{'id': self.userid},
|
{'id': self.userid},
|
||||||
'https://www.instagram.com/{0}/'.format(self.username),
|
'https://www.instagram.com/{0}/'.format(self.username),
|
||||||
lambda d: d['data']['user']['edge_saved_media'],
|
lambda d: d['data']['user']['edge_saved_media'],
|
||||||
self._rhx_gis,
|
first_data=self._metadata('edge_saved_media')))
|
||||||
self._metadata('edge_saved_media')))
|
|
||||||
|
|
||||||
def get_tagged_posts(self) -> Iterator[Post]:
|
def get_tagged_posts(self) -> Iterator[Post]:
|
||||||
"""Retrieve all posts where a profile is tagged.
|
"""Retrieve all posts where a profile is tagged.
|
||||||
@ -822,8 +803,7 @@ class Profile:
|
|||||||
self._context.graphql_node_list("e31a871f7301132ceaab56507a66bbb7",
|
self._context.graphql_node_list("e31a871f7301132ceaab56507a66bbb7",
|
||||||
{'id': self.userid},
|
{'id': self.userid},
|
||||||
'https://www.instagram.com/{0}/'.format(self.username),
|
'https://www.instagram.com/{0}/'.format(self.username),
|
||||||
lambda d: d['data']['user']['edge_user_to_photos_of_you'],
|
lambda d: d['data']['user']['edge_user_to_photos_of_you']))
|
||||||
self._rhx_gis))
|
|
||||||
|
|
||||||
def get_igtv_posts(self) -> Iterator[Post]:
|
def get_igtv_posts(self) -> Iterator[Post]:
|
||||||
"""Retrieve all IGTV posts.
|
"""Retrieve all IGTV posts.
|
||||||
@ -835,8 +815,7 @@ class Profile:
|
|||||||
{'id': self.userid},
|
{'id': self.userid},
|
||||||
'https://www.instagram.com/{0}/channel/'.format(self.username),
|
'https://www.instagram.com/{0}/channel/'.format(self.username),
|
||||||
lambda d: d['data']['user']['edge_felix_video_timeline'],
|
lambda d: d['data']['user']['edge_felix_video_timeline'],
|
||||||
self._rhx_gis,
|
first_data=self._metadata('edge_felix_video_timeline')))
|
||||||
self._metadata('edge_felix_video_timeline')))
|
|
||||||
|
|
||||||
def get_followers(self) -> Iterator['Profile']:
|
def get_followers(self) -> Iterator['Profile']:
|
||||||
"""
|
"""
|
||||||
@ -850,8 +829,7 @@ class Profile:
|
|||||||
self._context.graphql_node_list("37479f2b8209594dde7facb0d904896a",
|
self._context.graphql_node_list("37479f2b8209594dde7facb0d904896a",
|
||||||
{'id': str(self.userid)},
|
{'id': str(self.userid)},
|
||||||
'https://www.instagram.com/' + self.username + '/',
|
'https://www.instagram.com/' + self.username + '/',
|
||||||
lambda d: d['data']['user']['edge_followed_by'],
|
lambda d: d['data']['user']['edge_followed_by']))
|
||||||
self._rhx_gis))
|
|
||||||
|
|
||||||
def get_followees(self) -> Iterator['Profile']:
|
def get_followees(self) -> Iterator['Profile']:
|
||||||
"""
|
"""
|
||||||
@ -865,8 +843,7 @@ class Profile:
|
|||||||
self._context.graphql_node_list("58712303d941c6855d4e888c5f0cd22f",
|
self._context.graphql_node_list("58712303d941c6855d4e888c5f0cd22f",
|
||||||
{'id': str(self.userid)},
|
{'id': str(self.userid)},
|
||||||
'https://www.instagram.com/' + self.username + '/',
|
'https://www.instagram.com/' + self.username + '/',
|
||||||
lambda d: d['data']['user']['edge_follow'],
|
lambda d: d['data']['user']['edge_follow']))
|
||||||
self._rhx_gis))
|
|
||||||
|
|
||||||
def get_similar_accounts(self) -> Iterator['Profile']:
|
def get_similar_accounts(self) -> Iterator['Profile']:
|
||||||
"""
|
"""
|
||||||
@ -881,8 +858,8 @@ class Profile:
|
|||||||
yield from (Profile(self._context, edge["node"]) for edge in
|
yield from (Profile(self._context, edge["node"]) for edge in
|
||||||
self._context.graphql_query("ad99dd9d3646cc3c0dda65debcd266a7",
|
self._context.graphql_query("ad99dd9d3646cc3c0dda65debcd266a7",
|
||||||
{"user_id": str(self.userid), "include_chaining": True},
|
{"user_id": str(self.userid), "include_chaining": True},
|
||||||
"https://www.instagram.com/{0}/".format(self.username),
|
"https://www.instagram.com/{0}/"
|
||||||
self._rhx_gis)["data"]["user"]["edge_chaining"]["edges"])
|
.format(self.username))["data"]["user"]["edge_chaining"]["edges"])
|
||||||
|
|
||||||
|
|
||||||
class StoryItem:
|
class StoryItem:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user