Download pictures with #hashtag
Instaloader is now capable of downloading all pictures associated with one #hashtag with: instaloader #hashtag This implements the feature requested with #18.
This commit is contained in:
parent
a7d1c5bbb0
commit
3e1360160d
12
README.rst
12
README.rst
@ -51,6 +51,12 @@ To later update your local copy of that profile, you may run
|
|||||||
When ``--fast-update`` is given, Instaloader terminates when arriving at
|
When ``--fast-update`` is given, Instaloader terminates when arriving at
|
||||||
the first already-downloaded picture.
|
the first already-downloaded picture.
|
||||||
|
|
||||||
|
You may also download the most recent pictures with one hashtag:
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
instaloader #hashtag
|
||||||
|
|
||||||
Instaloader can also be used to **download private profiles**. To do so,
|
Instaloader can also be used to **download private profiles**. To do so,
|
||||||
invoke it with
|
invoke it with
|
||||||
|
|
||||||
@ -117,6 +123,12 @@ You could also download your last 20 liked pics with
|
|||||||
instaloader.download_feed_pics(session, max_count=20, fast_update=True,
|
instaloader.download_feed_pics(session, max_count=20, fast_update=True,
|
||||||
filter_func=lambda node: not node["likes"]["viewer_has_liked"])
|
filter_func=lambda node: not node["likes"]["viewer_has_liked"])
|
||||||
|
|
||||||
|
To download the last 20 pictures with hashtag #cat, do
|
||||||
|
|
||||||
|
.. code:: python
|
||||||
|
|
||||||
|
instaloader.download_hashtag('cat', session=instaloader.get_anonymous_session(), max_count=20)
|
||||||
|
|
||||||
Each Instagram profile has its own unique ID which stays unmodified even
|
Each Instagram profile has its own unique ID which stays unmodified even
|
||||||
if a user changes his/her username. To get said ID, given the profile's
|
if a user changes his/her username. To get said ID, given the profile's
|
||||||
name, you may call
|
name, you may call
|
||||||
|
@ -64,9 +64,10 @@ def _log(*msg, sep='', end='\n', flush=False, quiet=False):
|
|||||||
print(*msg, sep=sep, end=end, flush=flush)
|
print(*msg, sep=sep, end=end, flush=flush)
|
||||||
|
|
||||||
|
|
||||||
def get_json(name: str, session: requests.Session, max_id: int = 0, sleep: bool = True) -> Optional[Dict[str, Any]]:
|
def get_json(name: str, session: requests.Session,
|
||||||
|
max_id: Optional[int] = None, sleep: bool = True) -> Optional[Dict[str, Any]]:
|
||||||
"""Return JSON of a profile"""
|
"""Return JSON of a profile"""
|
||||||
if max_id == 0:
|
if not max_id:
|
||||||
resp = session.get('https://www.instagram.com/'+name)
|
resp = session.get('https://www.instagram.com/'+name)
|
||||||
else:
|
else:
|
||||||
resp = session.get('https://www.instagram.com/'+name, params={'max_id': max_id})
|
resp = session.get('https://www.instagram.com/'+name, params={'max_id': max_id})
|
||||||
@ -462,7 +463,7 @@ def download_node(node: Dict[str, Any], session: requests.Session, name: str,
|
|||||||
:param quiet: Suppress output
|
:param quiet: Suppress output
|
||||||
:return: True if something was downloaded, False otherwise, i.e. file was already there
|
:return: True if something was downloaded, False otherwise, i.e. file was already there
|
||||||
"""
|
"""
|
||||||
if node['__typename'] == 'GraphSidecar':
|
if '__typename' in node and node['__typename'] == 'GraphSidecar':
|
||||||
sidecar_data = session.get('https://www.instagram.com/p/' + node['code'] + '/', params={'__a': 1}).json()
|
sidecar_data = session.get('https://www.instagram.com/p/' + node['code'] + '/', params={'__a': 1}).json()
|
||||||
edge_number = 1
|
edge_number = 1
|
||||||
downloaded = False
|
downloaded = False
|
||||||
@ -538,6 +539,56 @@ def download_feed_pics(session: requests.Session, max_count: int = None, fast_up
|
|||||||
sleep=sleep)
|
sleep=sleep)
|
||||||
|
|
||||||
|
|
||||||
|
def get_hashtag_json(hashtag: str, session: requests.Session,
|
||||||
|
max_id: Optional[int] = None, sleep: bool = True) -> Optional[Dict[str, Any]]:
|
||||||
|
"""Return JSON of a #hashtag"""
|
||||||
|
return get_json(name='explore/tags/{0}/'.format(hashtag), session=session, max_id=max_id, sleep=sleep)
|
||||||
|
|
||||||
|
|
||||||
|
def download_hashtag(hashtag: str, session: requests.Session,
|
||||||
|
max_count: Optional[int] = None,
|
||||||
|
filter_func: Optional[Callable[[Dict[str, Dict[str, Any]]], bool]] = None,
|
||||||
|
fast_update: bool = False, download_videos: bool = True, geotags: bool = False,
|
||||||
|
shorter_output: bool = False, sleep: bool = True, quiet: bool = False) -> None:
|
||||||
|
"""Download pictures of one hashtag.
|
||||||
|
|
||||||
|
To download the last 30 pictures with hashtag #cat, do
|
||||||
|
>>> download_hashtag('cat', session=get_anonymous_session(), max_count=30)
|
||||||
|
|
||||||
|
:param hashtag: Hashtag to download, without leading '#'
|
||||||
|
:param session: Session belonging to a user, i.e. not an anonymous session
|
||||||
|
:param max_count: Maximum count of pictures to download
|
||||||
|
:param filter_func: function(node), which returns True if given picture should not be downloaded
|
||||||
|
:param fast_update: If true, abort when first already-downloaded picture is encountered
|
||||||
|
:param download_videos: True, if videos should be downloaded
|
||||||
|
:param geotags: Download geotags
|
||||||
|
:param shorter_output: Shorten log output by not printing captions
|
||||||
|
:param sleep: Sleep between requests to instagram server
|
||||||
|
:param quiet: Suppress output
|
||||||
|
"""
|
||||||
|
data = get_hashtag_json(hashtag, session, sleep=sleep)
|
||||||
|
count = 1
|
||||||
|
while data:
|
||||||
|
for node in data['entry_data']['TagPage'][0]['tag']['media']['nodes']:
|
||||||
|
if max_count is not None and count > max_count:
|
||||||
|
return
|
||||||
|
_log('[{0:3d}] #{1} '.format(count, hashtag), end='', flush=True, quiet=quiet)
|
||||||
|
count += 1
|
||||||
|
if filter_func is not None and filter_func(node):
|
||||||
|
_log('<skipped>', quiet=quiet)
|
||||||
|
continue
|
||||||
|
downloaded = download_node(node, session, '#{0}'.format(hashtag),
|
||||||
|
download_videos=download_videos, geotags=geotags, sleep=sleep,
|
||||||
|
shorter_output=shorter_output, quiet=quiet)
|
||||||
|
if fast_update and not downloaded:
|
||||||
|
return
|
||||||
|
if data['entry_data']['TagPage'][0]['tag']['media']['page_info']['has_next_page']:
|
||||||
|
data = get_hashtag_json(hashtag, session, sleep=sleep,
|
||||||
|
max_id=data['entry_data']['TagPage'][0]['tag']['media']['page_info']['end_cursor'])
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
|
|
||||||
def check_id(profile: str, session: requests.Session, json_data: Dict[str, Any], quiet: bool = False) -> str:
|
def check_id(profile: str, session: requests.Session, json_data: Dict[str, Any], quiet: bool = False) -> str:
|
||||||
"""
|
"""
|
||||||
Consult locally stored ID of profile with given name, check whether ID matches and whether name
|
Consult locally stored ID of profile with given name, check whether ID matches and whether name
|
||||||
@ -575,9 +626,10 @@ def check_id(profile: str, session: requests.Session, json_data: Dict[str, Any],
|
|||||||
raise ProfileNotExistsException("Profile {0} does not exist.".format(profile))
|
raise ProfileNotExistsException("Profile {0} does not exist.".format(profile))
|
||||||
|
|
||||||
|
|
||||||
def download(name: str, session: requests.Session, profile_pic_only: bool = False, download_videos: bool = True,
|
def download(name: str, session: requests.Session,
|
||||||
geotags: bool = False,
|
profile_pic_only: bool = False, download_videos: bool = True, geotags: bool = False,
|
||||||
fast_update: bool = False, shorter_output: bool = False, sleep: bool = True, quiet: bool = False) -> None:
|
fast_update: bool = False, shorter_output: bool = False, sleep: bool = True,
|
||||||
|
quiet: bool = False) -> None:
|
||||||
"""Download one profile"""
|
"""Download one profile"""
|
||||||
# pylint:disable=too-many-branches,too-many-locals
|
# pylint:disable=too-many-branches,too-many-locals
|
||||||
# Get profile main page json
|
# Get profile main page json
|
||||||
@ -667,7 +719,12 @@ def download_profiles(profilelist: List[str], username: Optional[str] = None, pa
|
|||||||
try:
|
try:
|
||||||
# Generate set of targets
|
# Generate set of targets
|
||||||
for pentry in profilelist:
|
for pentry in profilelist:
|
||||||
if pentry[0] == '@' and username is not None:
|
if pentry[0] == '#':
|
||||||
|
_log("Retrieving pictures with hashtag {0}".format(pentry), quiet=quiet)
|
||||||
|
download_hashtag(hashtag=pentry[1:], session=session, fast_update=fast_update,
|
||||||
|
download_videos=download_videos, geotags=geotags, shorter_output=shorter_output,
|
||||||
|
sleep=sleep, quiet=quiet)
|
||||||
|
elif pentry[0] == '@' and username is not None:
|
||||||
_log("Retrieving followees of %s..." % pentry[1:], quiet=quiet)
|
_log("Retrieving followees of %s..." % pentry[1:], quiet=quiet)
|
||||||
followees = get_followees(pentry[1:], session)
|
followees = get_followees(pentry[1:], session)
|
||||||
targets.update([followee['username'] for followee in followees])
|
targets.update([followee['username'] for followee in followees])
|
||||||
@ -684,9 +741,7 @@ def download_profiles(profilelist: List[str], username: Optional[str] = None, pa
|
|||||||
shorter_output=shorter_output, sleep=sleep, quiet=quiet)
|
shorter_output=shorter_output, sleep=sleep, quiet=quiet)
|
||||||
else:
|
else:
|
||||||
targets.add(pentry)
|
targets.add(pentry)
|
||||||
if len(targets) == 0:
|
if len(targets) > 1:
|
||||||
_log("No profiles to download given.", quiet=quiet)
|
|
||||||
elif len(targets) > 1:
|
|
||||||
_log("Downloading %i profiles..." % len(targets), quiet=quiet)
|
_log("Downloading %i profiles..." % len(targets), quiet=quiet)
|
||||||
# Iterate through targets list and download them
|
# Iterate through targets list and download them
|
||||||
for target in targets:
|
for target in targets:
|
||||||
@ -716,8 +771,9 @@ def download_profiles(profilelist: List[str], username: Optional[str] = None, pa
|
|||||||
def main():
|
def main():
|
||||||
parser = ArgumentParser(description=__doc__,
|
parser = ArgumentParser(description=__doc__,
|
||||||
epilog="Report issues at https://github.com/Thammus/instaloader/issues.")
|
epilog="Report issues at https://github.com/Thammus/instaloader/issues.")
|
||||||
parser.add_argument('profile', nargs='*',
|
parser.add_argument('profile', nargs='*', metavar='profile|#hashtag',
|
||||||
help='Name of profile to download; If --login is given: @<profile> to download all followees of '
|
help='Name of profile or #hashtag to download. '
|
||||||
|
'Alternatively, if --login is given: @<profile> to download all followees of '
|
||||||
'<profile>; or the special targets :feed-all or :feed-liked to '
|
'<profile>; or the special targets :feed-all or :feed-liked to '
|
||||||
'download pictures from your feed (using '
|
'download pictures from your feed (using '
|
||||||
'--fast-update is recommended).')
|
'--fast-update is recommended).')
|
||||||
|
Loading…
Reference in New Issue
Block a user