Saving and reimporting of JSON files
Metadata JSON files are now created by default. They can later be given as a target to redownload Posts, StoryItems or Profiles with new settings.
This commit is contained in:
parent
5d249c5401
commit
f0bebd0d96
@ -67,12 +67,13 @@ automatically **finds it by its unique ID** and renames the folder likewise.
|
||||
Also **download stories** of each profile that is downloaded. Requires
|
||||
:option:`--login`.
|
||||
|
||||
.. option:: --metadata-json
|
||||
.. option:: --no-metadata-json
|
||||
|
||||
Create a JSON file containing the metadata of each post. This does not
|
||||
include comments (see :option:`--comments`) nor geotags (see
|
||||
:option:`--geotags`). The JSON files contain the properties of
|
||||
:class:`instaloader.Post`.
|
||||
Do not create a JSON file containing the metadata of each post.
|
||||
|
||||
.. option:: --no-compress-json
|
||||
|
||||
Do not xz compress JSON files, rather create pretty formatted JSONs.
|
||||
|
||||
.. option:: --stories-only
|
||||
|
||||
|
@ -14,4 +14,5 @@ else:
|
||||
|
||||
from .exceptions import *
|
||||
from .instaloader import Instaloader, Tristate
|
||||
from .structures import Post, Profile, Story, StoryItem, shortcode_to_mediaid, mediaid_to_shortcode
|
||||
from .structures import (Post, Profile, Story, StoryItem, load_structure_from_file, mediaid_to_shortcode,
|
||||
save_structure_to_file, shortcode_to_mediaid)
|
||||
|
@ -7,7 +7,7 @@ from argparse import ArgumentParser, SUPPRESS
|
||||
from typing import Callable, List, Optional
|
||||
|
||||
from . import (Instaloader, InstaloaderException, InvalidArgumentException, Post, Profile, ProfileNotExistsException,
|
||||
Tristate, __version__)
|
||||
StoryItem, Tristate, __version__, load_structure_from_file)
|
||||
from .instaloader import get_default_session_filename
|
||||
from .instaloadercontext import default_user_agent
|
||||
|
||||
@ -86,6 +86,22 @@ def _main(instaloader: Instaloader, targetlist: List[str],
|
||||
try:
|
||||
# Generate set of profiles, already downloading non-profile targets
|
||||
for target in targetlist:
|
||||
if (target.endswith('.json') or target.endswith('.json.xz')) and os.path.isfile(target):
|
||||
with instaloader.context.error_catcher(target):
|
||||
structure = load_structure_from_file(instaloader.context, target)
|
||||
if isinstance(structure, Post):
|
||||
instaloader.context.log("Downloading {} ({})".format(structure, target))
|
||||
instaloader.download_post(structure, os.path.dirname(target))
|
||||
elif isinstance(structure, StoryItem):
|
||||
instaloader.context.log("Attempting to download {} ({})".format(structure, target))
|
||||
instaloader.download_story(structure, os.path.dirname(target))
|
||||
elif isinstance(structure, Profile):
|
||||
instaloader.context.log("Going to download {} ({})".format(structure.username, target))
|
||||
profiles.add(structure.username)
|
||||
else:
|
||||
raise InvalidArgumentException("{} JSON file not supported as target"
|
||||
.format(structure.__class__.__name__))
|
||||
continue
|
||||
# strip '/' characters to be more shell-autocompletion-friendly
|
||||
target = target.rstrip('/')
|
||||
with instaloader.context.error_catcher(target):
|
||||
@ -181,9 +197,12 @@ def main():
|
||||
'server for each post, which is why it is disabled by default.')
|
||||
g_what.add_argument('--no-captions', action='store_true',
|
||||
help='Do not store media captions, although no additional request is needed to obtain them.')
|
||||
g_what.add_argument('--no-metadata-json', action='store_true',
|
||||
help='Do not create a JSON file containing the metadata of each post.')
|
||||
g_what.add_argument('--metadata-json', action='store_true',
|
||||
help='Create a JSON file containing the metadata of each post. This does not include comments '
|
||||
'nor geotags.')
|
||||
help=SUPPRESS)
|
||||
g_what.add_argument('--no-compress-json', action='store_true',
|
||||
help='Do not xz compress JSON files, rather create pretty formatted JSONs.')
|
||||
g_what.add_argument('-s', '--stories', action='store_true',
|
||||
help='Also download stories of each profile that is downloaded. Requires --login.')
|
||||
g_what.add_argument('--stories-only', action='store_true',
|
||||
@ -264,7 +283,7 @@ def main():
|
||||
download_video_thumbnails = Tristate.always if not args.no_video_thumbnails else Tristate.never
|
||||
download_comments = Tristate.always if args.comments else Tristate.no_extra_query
|
||||
save_captions = Tristate.no_extra_query if not args.no_captions else Tristate.never
|
||||
save_metadata = Tristate.always if args.metadata_json else Tristate.never
|
||||
save_metadata = Tristate.always if not args.no_metadata_json else Tristate.never
|
||||
|
||||
if args.geotags and args.no_geotags:
|
||||
raise SystemExit("--geotags and --no-geotags given. I am confused and refuse to work.")
|
||||
@ -281,7 +300,8 @@ def main():
|
||||
download_videos=download_videos, download_video_thumbnails=download_video_thumbnails,
|
||||
download_geotags=download_geotags,
|
||||
save_captions=save_captions, download_comments=download_comments,
|
||||
save_metadata=save_metadata, max_connection_attempts=args.max_connection_attempts)
|
||||
save_metadata=save_metadata, compress_json=not args.no_compress_json,
|
||||
max_connection_attempts=args.max_connection_attempts)
|
||||
_main(loader,
|
||||
args.profile,
|
||||
username=args.login.lower() if args.login is not None else None,
|
||||
|
@ -16,7 +16,7 @@ from typing import Any, Callable, Dict, Iterator, List, Optional
|
||||
|
||||
from .exceptions import *
|
||||
from .instaloadercontext import InstaloaderContext
|
||||
from .structures import Post, Profile, Story, StoryItem
|
||||
from .structures import JsonExportable, Post, Profile, Story, StoryItem, save_structure_to_file
|
||||
|
||||
|
||||
def get_default_session_filename(username: str) -> str:
|
||||
@ -83,7 +83,8 @@ class Instaloader:
|
||||
download_geotags: Tristate = Tristate.no_extra_query,
|
||||
save_captions: Tristate = Tristate.no_extra_query,
|
||||
download_comments: Tristate = Tristate.no_extra_query,
|
||||
save_metadata: Tristate = Tristate.never,
|
||||
save_metadata: Tristate = Tristate.no_extra_query,
|
||||
compress_json: bool = True,
|
||||
max_connection_attempts: int = 3):
|
||||
|
||||
self.context = InstaloaderContext(sleep, quiet, user_agent, max_connection_attempts)
|
||||
@ -108,6 +109,7 @@ class Instaloader:
|
||||
self.save_captions = save_captions
|
||||
self.download_comments = download_comments
|
||||
self.save_metadata = save_metadata
|
||||
self.compress_json = compress_json
|
||||
|
||||
@contextmanager
|
||||
def anonymous_copy(self):
|
||||
@ -118,7 +120,7 @@ class Instaloader:
|
||||
self.download_video_thumbnails,
|
||||
self.download_geotags,
|
||||
self.save_captions, self.download_comments,
|
||||
self.save_metadata, self.context.max_connection_attempts)
|
||||
self.save_metadata, self.compress_json, self.context.max_connection_attempts)
|
||||
new_loader.context.previous_queries = self.context.previous_queries
|
||||
yield new_loader
|
||||
self.context.error_log.extend(new_loader.context.error_log)
|
||||
@ -158,12 +160,16 @@ class Instaloader:
|
||||
os.utime(filename, (datetime.now().timestamp(), mtime.timestamp()))
|
||||
return True
|
||||
|
||||
def save_metadata_json(self, filename: str, post: Post) -> None:
|
||||
"""Saves metadata JSON file of a :class:`Post`."""
|
||||
filename += '.json'
|
||||
with open(filename, 'w') as fp:
|
||||
json.dump(post, fp=fp, indent=4, default=Post.json_encoder)
|
||||
self.context.log('json', end=' ', flush=True)
|
||||
def save_metadata_json(self, filename: str, structure: JsonExportable) -> None:
|
||||
"""Saves metadata JSON file of a structure."""
|
||||
if self.compress_json:
|
||||
filename += '.json.xz'
|
||||
else:
|
||||
filename += '.json'
|
||||
save_structure_to_file(structure, filename)
|
||||
if isinstance(structure, (Post, StoryItem)):
|
||||
# log 'json ' message when saving Post or StoryItem
|
||||
self.context.log('json', end=' ', flush=True)
|
||||
|
||||
def update_comments(self, filename: str, post: Post, filename_alt: Optional[str] = None) -> None:
|
||||
try:
|
||||
@ -393,9 +399,8 @@ class Instaloader:
|
||||
if self.download_comments is Tristate.always:
|
||||
self.update_comments(filename=filename, filename_alt=filename_old, post=post)
|
||||
|
||||
# Save metadata as JSON if desired. It might require an extra query, depending on which information has been
|
||||
# already obtained. Regarding Tristate interpretation, we always assume that it requires an extra query.
|
||||
if self.save_metadata is Tristate.always:
|
||||
# Save metadata as JSON if desired.
|
||||
if self.save_metadata is not Tristate.never:
|
||||
self.save_metadata_json(filename, post)
|
||||
|
||||
self.context.log()
|
||||
@ -489,6 +494,9 @@ class Instaloader:
|
||||
filename_alt=filename_old,
|
||||
url=item.video_url,
|
||||
mtime=date_local)
|
||||
# Save metadata as JSON if desired.
|
||||
if self.save_metadata is not Tristate.never:
|
||||
self.save_metadata_json(filename, item)
|
||||
self.context.log()
|
||||
return downloaded
|
||||
|
||||
@ -698,6 +706,12 @@ class Instaloader:
|
||||
|
||||
profile_name = profile.username
|
||||
|
||||
# Save metadata as JSON if desired.
|
||||
if self.save_metadata is not Tristate.never:
|
||||
json_filename = '{0}/{1}_{2}'.format(self.dirname_pattern.format(profile=profile_name, target=profile_name),
|
||||
profile_name, profile.userid)
|
||||
self.save_metadata_json(json_filename, profile)
|
||||
|
||||
if self.context.is_logged_in and profile.has_blocked_viewer and not profile.is_private:
|
||||
# raising ProfileNotExistsException invokes "trying again anonymously" logic
|
||||
raise ProfileNotExistsException("Profile {} has blocked you".format(profile_name))
|
||||
|
@ -1,8 +1,11 @@
|
||||
import json
|
||||
import lzma
|
||||
import re
|
||||
from base64 import b64decode, b64encode
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, Iterator, List, Optional
|
||||
from typing import Any, Dict, Iterator, List, Optional, Union
|
||||
|
||||
from . import __version__
|
||||
from .exceptions import *
|
||||
from .instaloadercontext import InstaloaderContext
|
||||
|
||||
@ -65,6 +68,15 @@ class Post:
|
||||
"""Create a post object from a given mediaid"""
|
||||
return cls.from_shortcode(context, mediaid_to_shortcode(mediaid))
|
||||
|
||||
def get_node(self):
|
||||
if self._full_metadata_dict:
|
||||
node = self._full_metadata_dict
|
||||
else:
|
||||
node = self._node
|
||||
if self._owner_profile:
|
||||
node['owner'] = self.owner_profile.get_node()
|
||||
return node
|
||||
|
||||
@property
|
||||
def shortcode(self) -> str:
|
||||
"""Media shortcode. URL of the post is instagram.com/p/<shortcode>/."""
|
||||
@ -283,23 +295,6 @@ class Post:
|
||||
params={'__a': 1})
|
||||
return location_json["location"] if "location" in location_json else location_json['graphql']['location']
|
||||
|
||||
@staticmethod
|
||||
def json_encoder(obj) -> Dict[str, Any]:
|
||||
"""Convert instance of :class:`Post` to a JSON-serializable dictionary."""
|
||||
if not isinstance(obj, Post):
|
||||
raise TypeError("Object of type {} is not a Post object.".format(obj.__class__.__name__))
|
||||
jsondict = {}
|
||||
for prop in dir(Post):
|
||||
if prop[0].isupper() or prop[0] == '_':
|
||||
# skip uppercase and private properties
|
||||
continue
|
||||
val = obj.__getattribute__(prop)
|
||||
if val is True or val is False or isinstance(val, (str, int, float, list)):
|
||||
jsondict[prop] = val
|
||||
elif isinstance(val, datetime):
|
||||
jsondict[prop] = val.isoformat()
|
||||
return jsondict
|
||||
|
||||
|
||||
class Profile:
|
||||
"""
|
||||
@ -342,6 +337,14 @@ class Profile:
|
||||
username = Post.from_mediaid(context, int(data['edges'][0]["node"]["id"])).owner_username
|
||||
return cls(context, {'username': username.lower(), 'id': profile_id})
|
||||
|
||||
def get_node(self):
|
||||
json_node = self._node.copy()
|
||||
# remove posts
|
||||
json_node.pop('edge_media_collections', None)
|
||||
json_node.pop('edge_owner_to_timeline_media', None)
|
||||
json_node.pop('edge_saved_media', None)
|
||||
return json_node
|
||||
|
||||
def _obtain_metadata(self):
|
||||
try:
|
||||
if not self._rhx_gis:
|
||||
@ -517,6 +520,12 @@ class StoryItem:
|
||||
self._node = node
|
||||
self._owner_profile = owner_profile
|
||||
|
||||
def get_node(self):
|
||||
node = self._node
|
||||
if self._owner_profile:
|
||||
node['owner'] = self._owner_profile.get_node()
|
||||
return node
|
||||
|
||||
@property
|
||||
def mediaid(self) -> int:
|
||||
"""The mediaid is a decimal representation of the media shortcode."""
|
||||
@ -684,3 +693,44 @@ class Story:
|
||||
def get_items(self) -> Iterator[StoryItem]:
|
||||
"""Retrieve all items from a story."""
|
||||
yield from (StoryItem(self._context, item, self.owner_profile) for item in self._node['items'])
|
||||
|
||||
|
||||
JsonExportable = Union[Post, Profile, StoryItem]
|
||||
|
||||
|
||||
def save_structure_to_file(structure: JsonExportable, filename: str):
|
||||
json_structure = {'node': structure.get_node(),
|
||||
'instaloader': {'version': __version__, 'node_type': structure.__class__.__name__}}
|
||||
compress = filename.endswith('.xz')
|
||||
if compress:
|
||||
with lzma.open(filename, 'wt', check=lzma.CHECK_NONE) as fp:
|
||||
json.dump(json_structure, fp=fp, separators=(',', ':'))
|
||||
else:
|
||||
with open(filename, 'wt') as fp:
|
||||
json.dump(json_structure, fp=fp, indent=4, sort_keys=True)
|
||||
|
||||
|
||||
def load_structure_from_file(context: InstaloaderContext, filename: str) -> JsonExportable:
|
||||
compressed = filename.endswith('.xz')
|
||||
if compressed:
|
||||
fp = lzma.open(filename, 'rt')
|
||||
else:
|
||||
fp = open(filename, 'rt')
|
||||
json_structure = json.load(fp)
|
||||
fp.close()
|
||||
if 'node' in json_structure and 'instaloader' in json_structure and \
|
||||
'node_type' in json_structure['instaloader']:
|
||||
node_type = json_structure['instaloader']['node_type']
|
||||
if node_type == "Post":
|
||||
return Post(context, json_structure['node'])
|
||||
elif node_type == "Profile":
|
||||
return Profile(context, json_structure['node'])
|
||||
elif node_type == "StoryItem":
|
||||
return StoryItem(context, json_structure['node'])
|
||||
else:
|
||||
raise InvalidArgumentException("{}: Not an Instaloader JSON.".format(filename))
|
||||
elif 'shortcode' in json_structure:
|
||||
# Post JSON created with Instaloader v3
|
||||
return Post.from_shortcode(context, json_structure['shortcode'])
|
||||
else:
|
||||
raise InvalidArgumentException("{}: Not an Instaloader JSON.".format(filename))
|
||||
|
Loading…
Reference in New Issue
Block a user