New formatter for filename pattern

This commit is contained in:
Alexander Graf 2018-04-13 21:19:04 +02:00
parent df1cdb5d48
commit 80f701150c
2 changed files with 75 additions and 117 deletions

View File

@ -11,7 +11,7 @@ from contextlib import contextmanager, suppress
from datetime import datetime from datetime import datetime
from functools import wraps from functools import wraps
from io import BytesIO from io import BytesIO
from typing import Any, Callable, Dict, Iterator, List, Optional from typing import Callable, Dict, Iterator, List, Optional, Union
from .exceptions import * from .exceptions import *
from .instaloadercontext import InstaloaderContext from .instaloadercontext import InstaloaderContext
@ -25,7 +25,7 @@ def get_default_session_filename(username: str) -> str:
return filename.lower() return filename.lower()
def format_string_contains_key(format_string: '_PathPattern', key: str) -> bool: def format_string_contains_key(format_string: str, key: str) -> bool:
# pylint:disable=unused-variable # pylint:disable=unused-variable
for literal_text, field_name, format_spec, conversion in string.Formatter().parse(format_string): for literal_text, field_name, format_spec, conversion in string.Formatter().parse(format_string):
if field_name and (field_name == key or field_name.startswith(key + '.')): if field_name and (field_name == key or field_name.startswith(key + '.')):
@ -45,13 +45,31 @@ def _requires_login(func: Callable) -> Callable:
return call return call
class _PathPattern(str): class _PostPathFormatter(string.Formatter):
"""Class overriding :meth:`str.format` for character substitution in paths for Windows, see issue #84.""" def __init__(self, post: Union[Post, StoryItem], target: str):
self._post = post
self._target = target
def format(self, *args: Any, **kwargs: Any) -> str: def vformat(self, format_string, args, kwargs):
ret = super().format(*args, **kwargs) """Override :meth:`string.Formatter.vformat` for character substitution in paths for Windows, see issue #84."""
ret = super().vformat(format_string, args, kwargs)
return ret.replace(':', '\ua789') if platform.system() == 'Windows' else ret return ret.replace(':', '\ua789') if platform.system() == 'Windows' else ret
def get_field(self, field_name, args, kwargs):
"""Override :meth:`string.Formatter.get_field` to substitue {target} and {<POST_ATTRIBUTE>}."""
if field_name == "target":
return self._target, None
if hasattr(Post, field_name) and hasattr(StoryItem, field_name):
return self._post.__getattribute__(field_name), None
return super().get_field(field_name, args, kwargs)
def format_field(self, value, format_spec):
"""Override :meth:`string.Formatter.format_field` to have our
default format_spec for :class:`datetime.Datetime` objects."""
if isinstance(value, datetime) and not format_spec:
return super().format_field(value, '%Y-%m-%d_%H-%M-%S')
return super().format_field(value, format_spec)
class Instaloader: class Instaloader:
@ -72,19 +90,8 @@ class Instaloader:
self.context = InstaloaderContext(sleep, quiet, user_agent, max_connection_attempts) self.context = InstaloaderContext(sleep, quiet, user_agent, max_connection_attempts)
# configuration parameters # configuration parameters
self.dirname_pattern = _PathPattern(dirname_pattern if dirname_pattern is not None else '{target}') self.dirname_pattern = dirname_pattern or "{target}"
if filename_pattern is not None: self.filename_pattern = filename_pattern or "{date_utc}_UTC"
filename_pattern = re.sub(r"({(?:post\.)?date)([:}])", r"\1_utc\2", filename_pattern)
self.filename_pattern_old = filename_pattern.replace('{date_utc}', '{date_utc:%Y-%m-%d_%H-%M-%S}')
self.filename_pattern_old = _PathPattern(re.sub(r"(?i)({(?:post\.)?date_utc:[^}]*?)_UTC",
r"\1", self.filename_pattern_old))
filename_pattern = re.sub(r"(?i)({(date_utc|post\.date_utc):(?![^}]*UTC[^}]*).*?)}",
r"\1_UTC}", filename_pattern)
self.filename_pattern = _PathPattern(filename_pattern.replace('{date_utc}',
'{date_utc:%Y-%m-%d_%H-%M-%S_UTC}'))
else:
self.filename_pattern = _PathPattern('{date_utc:%Y-%m-%d_%H-%M-%S_UTC}')
self.filename_pattern_old = _PathPattern('{date_utc:%Y-%m-%d_%H-%M-%S}')
self.download_videos = download_videos self.download_videos = download_videos
self.download_video_thumbnails = download_video_thumbnails self.download_video_thumbnails = download_video_thumbnails
self.download_geotags = download_geotags self.download_geotags = download_geotags
@ -119,25 +126,17 @@ class Instaloader:
def __exit__(self, *args): def __exit__(self, *args):
self.close() self.close()
def download_pic(self, filename: str, url: str, mtime: datetime, def download_pic(self, filename: str, url: str, mtime: datetime, filename_suffix: Optional[str] = None) -> bool:
filename_alt: Optional[str] = None, filename_suffix: Optional[str] = None) -> bool:
"""Downloads and saves picture with given url under given directory with given timestamp. """Downloads and saves picture with given url under given directory with given timestamp.
Returns true, if file was actually downloaded, i.e. updated.""" Returns true, if file was actually downloaded, i.e. updated."""
urlmatch = re.search('\\.[a-z0-9]*\\?', url) urlmatch = re.search('\\.[a-z0-9]*\\?', url)
file_extension = url[-3:] if urlmatch is None else urlmatch.group(0)[1:-1] file_extension = url[-3:] if urlmatch is None else urlmatch.group(0)[1:-1]
if filename_suffix is not None: if filename_suffix is not None:
filename += '_' + filename_suffix filename += '_' + filename_suffix
if filename_alt is not None:
filename_alt += '_' + filename_suffix
filename += '.' + file_extension filename += '.' + file_extension
if os.path.isfile(filename): if os.path.isfile(filename):
self.context.log(filename + ' exists', end=' ', flush=True) self.context.log(filename + ' exists', end=' ', flush=True)
return False return False
if filename_alt is not None:
filename_alt += '.' + file_extension
if os.path.isfile(filename_alt):
self.context.log(filename_alt + 'exists', end=' ', flush=True)
return False
self.context.get_and_write_raw(url, filename) self.context.get_and_write_raw(url, filename)
os.utime(filename, (datetime.now().timestamp(), mtime.timestamp())) os.utime(filename, (datetime.now().timestamp(), mtime.timestamp()))
return True return True
@ -153,20 +152,15 @@ class Instaloader:
# log 'json ' message when saving Post or StoryItem # log 'json ' message when saving Post or StoryItem
self.context.log('json', end=' ', flush=True) self.context.log('json', end=' ', flush=True)
def update_comments(self, filename: str, post: Post, filename_alt: Optional[str] = None) -> None: def update_comments(self, filename: str, post: Post) -> None:
filename += '_comments.json'
try: try:
filename_current = filename + '_comments.json' comments = json.load(open(filename))
comments = json.load(open(filename_current))
except FileNotFoundError: except FileNotFoundError:
try: comments = list()
filename_current = filename_alt + '_comments.json'
comments = json.load(open(filename_current))
except (FileNotFoundError, TypeError):
filename_current = filename + '_comments.json'
comments = list()
comments.extend(post.get_comments()) comments.extend(post.get_comments())
if comments: if comments:
with open(filename_current, 'w') as file: with open(filename, 'w') as file:
comments_list = sorted(sorted(list(comments), key=lambda t: t['id']), comments_list = sorted(sorted(list(comments), key=lambda t: t['id']),
key=lambda t: t['created_at'], reverse=True) key=lambda t: t['created_at'], reverse=True)
unique_comments_list = [comments_list[0]] unique_comments_list = [comments_list[0]]
@ -178,25 +172,17 @@ class Instaloader:
if x['id'] != y['id']: if x['id'] != y['id']:
unique_comments_list.append(y) unique_comments_list.append(y)
file.write(json.dumps(unique_comments_list, indent=4)) file.write(json.dumps(unique_comments_list, indent=4))
os.rename(filename_current, filename + '_comments.json')
self.context.log('comments', end=' ', flush=True) self.context.log('comments', end=' ', flush=True)
def save_caption(self, filename: str, mtime: datetime, caption: str, filename_alt: Optional[str] = None) -> None: def save_caption(self, filename: str, mtime: datetime, caption: str) -> None:
"""Updates picture caption""" """Updates picture caption"""
filename += '.txt' filename += '.txt'
if filename_alt is not None:
filename_alt += '.txt'
pcaption = caption.replace('\n', ' ').strip() pcaption = caption.replace('\n', ' ').strip()
caption = caption.encode("UTF-8") caption = caption.encode("UTF-8")
pcaption = '[' + ((pcaption[:29] + u"\u2026") if len(pcaption) > 31 else pcaption) + ']' pcaption = '[' + ((pcaption[:29] + u"\u2026") if len(pcaption) > 31 else pcaption) + ']'
with suppress(FileNotFoundError): with suppress(FileNotFoundError):
try: with open(filename, 'rb') as file:
with open(filename, 'rb') as file: file_caption = file.read()
file_caption = file.read()
except FileNotFoundError:
if filename_alt is not None:
with open(filename_alt, 'rb') as file:
file_caption = file.read()
if file_caption.replace(b'\r\n', b'\n') == caption.replace(b'\r\n', b'\n'): if file_caption.replace(b'\r\n', b'\n') == caption.replace(b'\r\n', b'\n'):
try: try:
self.context.log(pcaption + ' unchanged', end=' ', flush=True) self.context.log(pcaption + ' unchanged', end=' ', flush=True)
@ -204,22 +190,15 @@ class Instaloader:
self.context.log('txt unchanged', end=' ', flush=True) self.context.log('txt unchanged', end=' ', flush=True)
return None return None
else: else:
def get_filename(file, index): def get_filename(index):
return file if index == 0 else (file[:-4] + '_old_' + return filename if index == 0 else (filename[:-4] + '_old_' +
(str(0) if index < 10 else str()) + str(index) + file[-4:]) (str(0) if index < 10 else str()) + str(index) + filename[-4:])
i = 0 i = 0
file_exists_list = [] while os.path.isfile(get_filename(i)):
while True:
file_exists_list.append(1 if os.path.isfile(get_filename(filename, i)) else 0)
if not file_exists_list[i] and filename_alt is not None:
file_exists_list[i] = 2 if os.path.isfile(get_filename(filename_alt, i)) else 0
if not file_exists_list[i]:
break
i = i + 1 i = i + 1
for index in range(i, 0, -1): for index in range(i, 0, -1):
os.rename(get_filename(filename if file_exists_list[index - 1] % 2 else filename_alt, index - 1), os.rename(get_filename(index - 1), get_filename(index))
get_filename(filename, index))
try: try:
self.context.log(pcaption + ' updated', end=' ', flush=True) self.context.log(pcaption + ' updated', end=' ', flush=True)
except UnicodeEncodeError: except UnicodeEncodeError:
@ -311,21 +290,8 @@ class Instaloader:
:return: True if something was downloaded, False otherwise, i.e. file was already there :return: True if something was downloaded, False otherwise, i.e. file was already there
""" """
# Format dirname and filename. post.owner_username might do an additional request, so only access it, if dirname = _PostPathFormatter(post, target).format(self.dirname_pattern)
# {profile} is part of the dirname pattern or filename pattern. filename = dirname + '/' + _PostPathFormatter(post, target).format(self.filename_pattern)
needs_profilename = (format_string_contains_key(self.dirname_pattern, 'profile') or
format_string_contains_key(self.filename_pattern, 'profile'))
profilename = post.owner_username if needs_profilename else None
dirname = self.dirname_pattern.format(profile=profilename, target=target.lower())
filename = dirname + '/' + self.filename_pattern.format(profile=profilename, target=target.lower(),
date_utc=post.date_utc,
shortcode=post.shortcode,
post=post)
filename_old = dirname + '/' + self.filename_pattern_old.replace("{post.date_utc", "{date_utc") \
.format(profile=profilename, target=target.lower(),
date_utc=post.date_local,
shortcode=post.shortcode,
post=post)
os.makedirs(os.path.dirname(filename), exist_ok=True) os.makedirs(os.path.dirname(filename), exist_ok=True)
# Download the image(s) / video thumbnail and videos within sidecars if desired # Download the image(s) / video thumbnail and videos within sidecars if desired
@ -335,41 +301,31 @@ class Instaloader:
for edge in post.get_sidecar_edges(): for edge in post.get_sidecar_edges():
# Download picture or video thumbnail # Download picture or video thumbnail
if not edge['node']['is_video'] or self.download_video_thumbnails is True: if not edge['node']['is_video'] or self.download_video_thumbnails is True:
downloaded |= self.download_pic(filename=filename, downloaded |= self.download_pic(filename=filename, url=edge['node']['display_url'],
filename_alt=filename_old, mtime=post.date_local, filename_suffix=str(edge_number))
url=edge['node']['display_url'],
mtime=post.date_local,
filename_suffix=str(edge_number))
# Additionally download video if available and desired # Additionally download video if available and desired
if edge['node']['is_video'] and self.download_videos is True: if edge['node']['is_video'] and self.download_videos is True:
downloaded |= self.download_pic(filename=filename, downloaded |= self.download_pic(filename=filename, url=edge['node']['video_url'],
filename_alt=filename_old, mtime=post.date_local, filename_suffix=str(edge_number))
url=edge['node']['video_url'],
mtime=post.date_local,
filename_suffix=str(edge_number))
edge_number += 1 edge_number += 1
elif post.typename == 'GraphImage': elif post.typename == 'GraphImage':
downloaded = self.download_pic(filename=filename, filename_alt=filename_old, downloaded = self.download_pic(filename=filename, url=post.url, mtime=post.date_local)
url=post.url, mtime=post.date_local)
elif post.typename == 'GraphVideo': elif post.typename == 'GraphVideo':
if self.download_video_thumbnails is True: if self.download_video_thumbnails is True:
downloaded = self.download_pic(filename=filename, filename_alt=filename_old, downloaded = self.download_pic(filename=filename, url=post.url, mtime=post.date_local)
url=post.url, mtime=post.date_local)
else: else:
self.context.error("Warning: {0} has unknown typename: {1}".format(post, post.typename)) self.context.error("Warning: {0} has unknown typename: {1}".format(post, post.typename))
# Save caption if desired # Save caption if desired
if self.save_captions is not False: if self.save_captions is not False:
if post.caption: if post.caption:
self.save_caption(filename=filename, filename_alt=filename_old, self.save_caption(filename=filename, mtime=post.date_local, caption=post.caption)
mtime=post.date_local, caption=post.caption)
else: else:
self.context.log("<no caption>", end=' ', flush=True) self.context.log("<no caption>", end=' ', flush=True)
# Download video if desired # Download video if desired
if post.is_video and self.download_videos is True: if post.is_video and self.download_videos is True:
downloaded |= self.download_pic(filename=filename, filename_alt=filename_old, downloaded |= self.download_pic(filename=filename, url=post.video_url, mtime=post.date_local)
url=post.video_url, mtime=post.date_local)
# Download geotags if desired # Download geotags if desired
if self.download_geotags is True: if self.download_geotags is True:
@ -379,7 +335,7 @@ class Instaloader:
# Update comments if desired # Update comments if desired
if self.download_comments is True: if self.download_comments is True:
self.update_comments(filename=filename, filename_alt=filename_old, post=post) self.update_comments(filename=filename, post=post)
# Save metadata as JSON if desired. # Save metadata as JSON if desired.
if self.save_metadata is not False: if self.save_metadata is not False:
@ -427,10 +383,6 @@ class Instaloader:
if not userids: if not userids:
self.context.log("Retrieving all visible stories...") self.context.log("Retrieving all visible stories...")
if format_string_contains_key(self.filename_pattern, 'post'):
raise InvalidArgumentException("The \"post\" keyword is not supported in the filename pattern when "
"downloading stories.")
for user_story in self.get_stories(userids): for user_story in self.get_stories(userids):
name = user_story.owner_username name = user_story.owner_username
self.context.log("Retrieving stories from profile {}.".format(name)) self.context.log("Retrieving stories from profile {}.".format(name))
@ -452,30 +404,16 @@ class Instaloader:
:return: True if something was downloaded, False otherwise, i.e. file was already there :return: True if something was downloaded, False otherwise, i.e. file was already there
""" """
owner_name = item.owner_username
shortcode = item.shortcode
date_local = item.date_local date_local = item.date_local
date_utc = item.date_utc dirname = _PostPathFormatter(item, target).format(self.dirname_pattern)
dirname = self.dirname_pattern.format(profile=owner_name, target=target) filename = dirname + '/' + _PostPathFormatter(item, target).format(self.filename_pattern)
filename = dirname + '/' + self.filename_pattern.format(profile=owner_name, target=target,
date_utc=date_utc,
shortcode=shortcode)
filename_old = dirname + '/' + self.filename_pattern_old.format(profile=owner_name, target=target,
date_utc=date_local,
shortcode=shortcode)
os.makedirs(os.path.dirname(filename), exist_ok=True) os.makedirs(os.path.dirname(filename), exist_ok=True)
downloaded = False downloaded = False
if not item.is_video or self.download_video_thumbnails is True: if not item.is_video or self.download_video_thumbnails is True:
url = item.url url = item.url
downloaded = self.download_pic(filename=filename, downloaded = self.download_pic(filename=filename, url=url, mtime=date_local)
filename_alt=filename_old,
url=url,
mtime=date_local)
if item.is_video and self.download_videos is True: if item.is_video and self.download_videos is True:
downloaded |= self.download_pic(filename=filename, downloaded |= self.download_pic(filename=filename, url=item.video_url, mtime=date_local)
filename_alt=filename_old,
url=item.video_url,
mtime=date_local)
# Save metadata as JSON if desired. # Save metadata as JSON if desired.
if self.save_metadata is not False: if self.save_metadata is not False:
self.save_metadata_json(filename, item) self.save_metadata_json(filename, item)

View File

@ -165,6 +165,16 @@ class Post:
"""Timestamp when the post was created (UTC).""" """Timestamp when the post was created (UTC)."""
return datetime.utcfromtimestamp(self._node["date"] if "date" in self._node else self._node["taken_at_timestamp"]) return datetime.utcfromtimestamp(self._node["date"] if "date" in self._node else self._node["taken_at_timestamp"])
@property
def date(self) -> datetime:
"""Synonym to :meth:`.date_utc`"""
return self.date_utc
@property
def profile(self) -> str:
"""Synonym to :meth:`.owner_username`"""
return self.owner_username
@property @property
def url(self) -> str: def url(self) -> str:
"""URL of the picture / video thumbnail of the post""" """URL of the picture / video thumbnail of the post"""
@ -573,6 +583,16 @@ class StoryItem:
"""Timestamp when the StoryItem was created (UTC).""" """Timestamp when the StoryItem was created (UTC)."""
return datetime.utcfromtimestamp(self._node['taken_at_timestamp']) return datetime.utcfromtimestamp(self._node['taken_at_timestamp'])
@property
def date(self) -> datetime:
"""Synonym to :meth:`.date_utc`"""
return self.date_utc
@property
def profile(self) -> str:
"""Synonym to :meth:`.owner_username`"""
return self.owner_username
@property @property
def expiring_local(self) -> datetime: def expiring_local(self) -> datetime:
"""Timestamp when the StoryItem will get unavailable (local time zone).""" """Timestamp when the StoryItem will get unavailable (local time zone)."""