From 3dcfcff04ccccb6db7a958db2c2d77fcd1fcb95e Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Sat, 28 Apr 2018 17:08:16 +0200 Subject: [PATCH] namedtuples PostLocation and PostSidecarNode Post.get_sidecar_edges() has been renamed to Post.get_sidecar_nodes(). Post.get_location() has been made to a property Post.location and it now internally stores location struct and makes it JSON store-/loadable. --- instaloader/instaloader.py | 28 +++++++++++++--------------- instaloader/structures.py | 38 +++++++++++++++++++++++++++++--------- 2 files changed, 42 insertions(+), 24 deletions(-) diff --git a/instaloader/instaloader.py b/instaloader/instaloader.py index a3c9de9..2efc299 100644 --- a/instaloader/instaloader.py +++ b/instaloader/instaloader.py @@ -11,11 +11,11 @@ from contextlib import contextmanager, suppress from datetime import datetime from functools import wraps from io import BytesIO -from typing import Callable, Dict, Iterator, List, Optional, Any +from typing import Callable, Iterator, List, Optional, Any from .exceptions import * from .instaloadercontext import InstaloaderContext -from .structures import JsonExportable, Post, Profile, Story, StoryItem, save_structure_to_file +from .structures import JsonExportable, Post, PostLocation, Profile, Story, StoryItem, save_structure_to_file def get_default_session_filename(username: str) -> str: @@ -244,12 +244,12 @@ class Instaloader: shutil.copyfileobj(BytesIO(caption), text_file) os.utime(filename, (datetime.now().timestamp(), mtime.timestamp())) - def save_location(self, filename: str, location_json: Dict[str, str], mtime: datetime) -> None: + def save_location(self, filename: str, location: PostLocation, mtime: datetime) -> None: """Save post location name and Google Maps link.""" filename += '_location.txt' - location_string = (location_json["name"] + "\n" + - "https://maps.google.com/maps?q={0},{1}&ll={0},{1}\n".format(location_json["lat"], - location_json["lng"])) + location_string = (location.name + "\n" + + "https://maps.google.com/maps?q={0},{1}&ll={0},{1}\n".format(location.lat, + location.lng)) with open(filename, 'wb') as text_file: shutil.copyfileobj(BytesIO(location_string.encode()), text_file) os.utime(filename, (datetime.now().timestamp(), mtime.timestamp())) @@ -335,14 +335,14 @@ class Instaloader: downloaded = False if post.typename == 'GraphSidecar': edge_number = 1 - for edge in post.get_sidecar_edges(): + for sidecar_node in post.get_sidecar_nodes(): # Download picture or video thumbnail - if not edge['node']['is_video'] or self.download_video_thumbnails is True: - downloaded |= self.download_pic(filename=filename, url=edge['node']['display_url'], + if not sidecar_node.is_video or self.download_video_thumbnails is True: + downloaded |= self.download_pic(filename=filename, url=sidecar_node.display_url, mtime=post.date_local, filename_suffix=str(edge_number)) # Additionally download video if available and desired - if edge['node']['is_video'] and self.download_videos is True: - downloaded |= self.download_pic(filename=filename, url=edge['node']['video_url'], + if sidecar_node.is_video and self.download_videos is True: + downloaded |= self.download_pic(filename=filename, url=sidecar_node.video_url, mtime=post.date_local, filename_suffix=str(edge_number)) edge_number += 1 elif post.typename == 'GraphImage': @@ -366,10 +366,8 @@ class Instaloader: downloaded |= self.download_pic(filename=filename, url=post.video_url, mtime=post.date_local) # Download geotags if desired - if self.download_geotags is True: - location = post.get_location() - if location: - self.save_location(filename, location, post.date_local) + if self.download_geotags and post.location: + self.save_location(filename, post.location, post.date_local) # Update comments if desired if self.download_comments is True: diff --git a/instaloader/structures.py b/instaloader/structures.py index 9fe8ed9..78cffd4 100644 --- a/instaloader/structures.py +++ b/instaloader/structures.py @@ -2,6 +2,7 @@ import json import lzma import re from base64 import b64decode, b64encode +from collections import namedtuple from datetime import datetime from typing import Any, Dict, Iterator, List, Optional, Union @@ -23,6 +24,10 @@ def mediaid_to_shortcode(mediaid: int) -> str: return b64encode(mediaid.to_bytes(9, 'big'), b'-_').decode().replace('A', ' ').lstrip().replace(' ', 'A') +PostSidecarNode = namedtuple('PostSidecarNode', ['is_video', 'display_url', 'video_url']) +PostLocation = namedtuple('PostLocation', ['id', 'name', 'slug', 'has_public_page', 'lat', 'lng']) + + class Post: """ Structure containing information about an Instagram post. @@ -55,6 +60,7 @@ class Post: self._owner_profile = owner_profile self._full_metadata_dict = None self._rhx_gis_str = None + self._location = None @classmethod def from_shortcode(cls, context: InstaloaderContext, shortcode: str): @@ -76,6 +82,8 @@ class Post: node = self._node if self._owner_profile: node['owner'] = self.owner_profile.get_node() + if self._location: + node['location'] = self._location._asdict() return node @property @@ -189,8 +197,14 @@ class Post: # if __typename is not in node, it is an old image or video return 'GraphImage' - def get_sidecar_edges(self) -> List[Dict[str, Any]]: - return self._field('edge_sidecar_to_children', 'edges') + def get_sidecar_nodes(self) -> Iterator[PostSidecarNode]: + """Sidecar nodes of a Post with typename==GraphSidecar.""" + if self.typename == 'GraphSidecar': + for edge in self._field('edge_sidecar_to_children', 'edges'): + node = edge['node'] + is_video = node['is_video'] + yield PostSidecarNode(is_video=is_video, display_url=node['display_url'], + video_url=node['video_url'] if is_video else None) @property def caption(self) -> Optional[str]: @@ -298,13 +312,19 @@ class Post: lambda d: d['data']['shortcode_media']['edge_liked_by'], self._rhx_gis) - def get_location(self) -> Optional[Dict[str, str]]: - """If the Post has a location, returns a dictionary with fields 'lat' and 'lng' and 'name'.""" - loc_dict = self._field("location") - if loc_dict is not None: - location_json = self._context.get_json("explore/locations/{0}/".format(loc_dict["id"]), - params={'__a': 1}) - return location_json["location"] if "location" in location_json else location_json['graphql']['location'] + @property + def location(self) -> Optional[PostLocation]: + """If the Post has a location, returns PostLocation namedtuple with fields 'id', 'lat' and 'lng' and 'name'.""" + loc = self._field("location") + if self._location or not loc: + return self._location + location_id = int(loc['id']) + if any(k not in loc for k in ('name', 'slug', 'has_public_page', 'lat', 'lng')): + loc = self._context.get_json("explore/locations/{0}/".format(location_id), + params={'__a': 1})['graphql']['location'] + self._location = PostLocation(location_id, loc['name'], loc['slug'], loc['has_public_page'], + loc['lat'], loc['lng']) + return self._location class Profile: