Save metadata JSON with --metadata-json
With --metadata-json, a JSON file for each post is created saving the Post properties defined in instaloader.Post class, i.e. caption, number of likes, people tagged in caption or the picture itself, etc. This closes #33 and closes #47.
This commit is contained in:
parent
e471bd5ad3
commit
e0ed4cf16c
@ -70,10 +70,13 @@ Instaloader supports the following targets:
|
||||
Instaloader goes through all media matching the specified targets and
|
||||
downloads the pictures and videos and their captions. You can specify
|
||||
|
||||
- :option:`--comments`, to also **download comments** of each post and
|
||||
- :option:`--comments`, to also **download comments** of each post,
|
||||
|
||||
- :option:`--geotags`, to **download geotags** of each post and save them as
|
||||
Google Maps link.
|
||||
Google Maps link,
|
||||
|
||||
- :option:`--metadata-json`, to store further post metadata in a separate JSON
|
||||
file.
|
||||
|
||||
.. _filename-specification:
|
||||
|
||||
|
@ -56,6 +56,13 @@ Instead of a *profile* or a *#hashtag*, the special targets
|
||||
Also **download stories** of each profile that is downloaded. Requires
|
||||
:option:`--login`.
|
||||
|
||||
.. option:: --metadata-json
|
||||
|
||||
Create a JSON file containing the metadata of each post. This does not
|
||||
include comments (see :option:`--comments`) nor geotags (see
|
||||
:option:`--geotags`). The JSON files contain the properties of
|
||||
:class:`instaloader.Post`.
|
||||
|
||||
.. option:: --stories-only
|
||||
|
||||
Rather than downloading regular posts of each specified profile, only
|
||||
|
@ -170,8 +170,8 @@ class Post:
|
||||
metadata, if required. This class unifies access to the properties associated with a post. It implements == and is
|
||||
hashable.
|
||||
|
||||
The properties defined here are accessable by the filter expressions specified with the :option:`--only-if`
|
||||
parameter.
|
||||
The properties defined here are accessible by the filter expressions specified with the :option:`--only-if`
|
||||
parameter and exported into JSON files with :option:`--metadata-json`.
|
||||
"""
|
||||
|
||||
LOGIN_REQUIRING_PROPERTIES = ["viewer_has_liked"]
|
||||
@ -361,6 +361,22 @@ class Post:
|
||||
params={'__a': 1})
|
||||
return location_json["location"]
|
||||
|
||||
@staticmethod
|
||||
def json_encoder(obj) -> Dict[str, Any]:
|
||||
"""Convert instance of :class:`Post` to a JSON-serializable dictionary."""
|
||||
if not isinstance(obj, Post):
|
||||
raise TypeError("Object of type {} is not a Post object.".format(obj.__class__.__name__))
|
||||
jsondict = {}
|
||||
for prop in dir(Post):
|
||||
if prop[0].isupper() or prop[0] == '_':
|
||||
# skip uppercase and private properties
|
||||
continue
|
||||
val = obj.__getattribute__(prop)
|
||||
if val is True or val is False or isinstance(val, (str, int, float, list)):
|
||||
jsondict[prop] = val
|
||||
elif isinstance(val, datetime):
|
||||
jsondict[prop] = val.isoformat()
|
||||
return jsondict
|
||||
|
||||
class Tristate(Enum):
|
||||
"""Tri-state to encode whether we should save certain information, i.e. videos, captions, comments or geotags.
|
||||
@ -387,8 +403,9 @@ class Instaloader:
|
||||
filename_pattern: Optional[str] = None,
|
||||
download_videos: Tristate = Tristate.always,
|
||||
download_geotags: Tristate = Tristate.no_extra_query,
|
||||
download_captions: Tristate = Tristate.no_extra_query,
|
||||
download_comments: Tristate = Tristate.no_extra_query):
|
||||
save_captions: Tristate = Tristate.no_extra_query,
|
||||
download_comments: Tristate = Tristate.no_extra_query,
|
||||
save_metadata: Tristate = Tristate.never):
|
||||
|
||||
# configuration parameters
|
||||
self.user_agent = user_agent if user_agent is not None else default_user_agent()
|
||||
@ -401,16 +418,15 @@ class Instaloader:
|
||||
if filename_pattern is not None else '{date:%Y-%m-%d_%H-%M-%S}'
|
||||
self.download_videos = download_videos
|
||||
self.download_geotags = download_geotags
|
||||
self.download_captions = download_captions
|
||||
self.save_captions = save_captions
|
||||
self.download_comments = download_comments
|
||||
self.previous_queries = dict()
|
||||
self.save_metadata = save_metadata
|
||||
|
||||
# error log, filled with error() and printed at the end of Instaloader.main()
|
||||
self.error_log = []
|
||||
|
||||
# For the adaption of sleep intervals (rate control)
|
||||
self.request_count = 0
|
||||
self.last_request_time = 0
|
||||
self.previous_queries = dict()
|
||||
|
||||
@property
|
||||
def is_logged_in(self) -> bool:
|
||||
@ -423,7 +439,7 @@ class Instaloader:
|
||||
new_loader = Instaloader(self.sleep, self.quiet, self.user_agent,
|
||||
self.dirname_pattern, self.filename_pattern,
|
||||
self.download_videos, self.download_geotags,
|
||||
self.download_captions, self.download_comments)
|
||||
self.save_captions, self.download_comments)
|
||||
new_loader.previous_queries = self.previous_queries
|
||||
yield new_loader
|
||||
self.error_log.extend(new_loader.error_log)
|
||||
@ -688,6 +704,12 @@ class Instaloader:
|
||||
os.utime(filename, (datetime.now().timestamp(), mtime.timestamp()))
|
||||
return True
|
||||
|
||||
def save_metadata_json(self, filename: str, post: Post) -> None:
|
||||
"""Saves metadata JSON file of a :class:`Post`."""
|
||||
filename += '.json'
|
||||
json.dump(post, fp=open(filename, 'w'), indent=4, default=Post.json_encoder)
|
||||
self._log('json', end=' ', flush=True)
|
||||
|
||||
def update_comments(self, filename: str, post: Post) -> None:
|
||||
filename += '_comments.json'
|
||||
try:
|
||||
@ -885,7 +907,7 @@ class Instaloader:
|
||||
downloaded = False
|
||||
|
||||
# Save caption if desired
|
||||
if self.download_captions is not Tristate.never:
|
||||
if self.save_captions is not Tristate.never:
|
||||
if post.caption:
|
||||
self.save_caption(filename, post.date, post.caption)
|
||||
else:
|
||||
@ -905,6 +927,11 @@ class Instaloader:
|
||||
if self.download_comments is Tristate.always:
|
||||
self.update_comments(filename, post)
|
||||
|
||||
# Save metadata as JSON if desired. It might require an extra query, depending on which information has been
|
||||
# already obtained. Regarding Tristate interpretation, we always assume that it requires an extra query.
|
||||
if self.save_metadata is Tristate.always:
|
||||
self.save_metadata_json(filename, post)
|
||||
|
||||
self._log()
|
||||
return downloaded
|
||||
|
||||
@ -989,7 +1016,7 @@ class Instaloader:
|
||||
self._log("Warning: Unable to find story image.")
|
||||
downloaded = False
|
||||
if "caption" in item and item["caption"] is not None and \
|
||||
self.download_captions is not Tristate.never:
|
||||
self.save_captions is not Tristate.never:
|
||||
caption = item["caption"]
|
||||
if isinstance(caption, dict) and "text" in caption:
|
||||
caption = caption["text"]
|
||||
@ -1385,6 +1412,9 @@ def main():
|
||||
'server for each post, which is why it is disabled by default.')
|
||||
g_what.add_argument('--no-captions', action='store_true',
|
||||
help='Do not store media captions, although no additional request is needed to obtain them.')
|
||||
g_what.add_argument('--metadata-json', action='store_true',
|
||||
help='Create a JSON file containing the metadata of each post. This does not include comments '
|
||||
'nor geotags.')
|
||||
g_what.add_argument('-s', '--stories', action='store_true',
|
||||
help='Also download stories of each profile that is downloaded. Requires --login.')
|
||||
g_what.add_argument('--stories-only', action='store_true',
|
||||
@ -1458,7 +1488,8 @@ def main():
|
||||
|
||||
download_videos = Tristate.always if not args.no_videos else Tristate.no_extra_query
|
||||
download_comments = Tristate.always if args.comments else Tristate.no_extra_query
|
||||
download_captions = Tristate.no_extra_query if not args.no_captions else Tristate.never
|
||||
save_captions = Tristate.no_extra_query if not args.no_captions else Tristate.never
|
||||
save_metadata = Tristate.always if args.metadata_json else Tristate.never
|
||||
|
||||
if args.geotags and args.no_geotags:
|
||||
raise SystemExit("--geotags and --no-geotags given. I am confused and refuse to work.")
|
||||
@ -1473,7 +1504,8 @@ def main():
|
||||
user_agent=args.user_agent,
|
||||
dirname_pattern=args.dirname_pattern, filename_pattern=args.filename_pattern,
|
||||
download_videos=download_videos, download_geotags=download_geotags,
|
||||
download_captions=download_captions, download_comments=download_comments)
|
||||
save_captions=save_captions, download_comments=download_comments,
|
||||
save_metadata=save_metadata)
|
||||
loader.main(args.profile, args.login.lower() if args.login is not None else None, args.password,
|
||||
args.sessionfile,
|
||||
int(args.count) if args.count is not None else None,
|
||||
|
Loading…
Reference in New Issue
Block a user