Allow specification of what to write in txt files

This commit is contained in:
Alexander Graf 2018-04-13 22:54:40 +02:00
parent 80f701150c
commit 59152214cf
3 changed files with 62 additions and 37 deletions

View File

@ -54,8 +54,15 @@ automatically **finds it by its unique ID** and renames the folder likewise.
.. option:: --no-captions .. option:: --no-captions
Do not store media captions, although no additional request is needed to Do not create txt files.
obtain them.
.. option:: --post-metadata-txt
Template to write in txt file for each Post.
.. option:: --storyitem-metadata-txt
Template to write in txt file for each StoryItem.
.. option:: --stories .. option:: --stories

View File

@ -194,7 +194,11 @@ def main():
'This requires an additional request to the Instagram ' 'This requires an additional request to the Instagram '
'server for each post, which is why it is disabled by default.') 'server for each post, which is why it is disabled by default.')
g_what.add_argument('--no-captions', action='store_true', g_what.add_argument('--no-captions', action='store_true',
help='Do not store media captions, although no additional request is needed to obtain them.') help='Do not create txt files.')
g_what.add_argument('--post-metadata-txt', action='append',
help='Template to write in txt file for each Post.')
g_what.add_argument('--storyitem-metadata-txt', action='append',
help='Template to write in txt file for each StoryItem.')
g_what.add_argument('--no-metadata-json', action='store_true', g_what.add_argument('--no-metadata-json', action='store_true',
help='Do not create a JSON file containing the metadata of each post.') help='Do not create a JSON file containing the metadata of each post.')
g_what.add_argument('--metadata-json', action='store_true', g_what.add_argument('--metadata-json', action='store_true',
@ -277,13 +281,17 @@ def main():
raise SystemExit(":feed-all and :feed-liked were removed. Use :feed as target and " raise SystemExit(":feed-all and :feed-liked were removed. Use :feed as target and "
"eventually --only-if=viewer_has_liked.") "eventually --only-if=viewer_has_liked.")
loader = Instaloader(sleep=not args.no_sleep, quiet=args.quiet, post_metadata_txt_pattern = '\n'.join(args.post_metadata_txt) if args.post_metadata_txt else None
user_agent=args.user_agent, storyitem_metadata_txt_pattern = '\n'.join(args.storyitem_metadata_txt) if args.storyitem_metadata_txt else None
loader = Instaloader(sleep=not args.no_sleep, quiet=args.quiet, user_agent=args.user_agent,
dirname_pattern=args.dirname_pattern, filename_pattern=args.filename_pattern, dirname_pattern=args.dirname_pattern, filename_pattern=args.filename_pattern,
download_videos=not args.no_videos, download_video_thumbnails=not args.no_video_thumbnails, download_videos=not args.no_videos, download_video_thumbnails=not args.no_video_thumbnails,
download_geotags=args.geotags, download_geotags=args.geotags, save_captions=not args.no_captions,
save_captions=not args.no_captions, download_comments=args.comments, download_comments=args.comments, save_metadata=not args.no_metadata_json,
save_metadata=not args.no_metadata_json, compress_json=not args.no_compress_json, compress_json=not args.no_compress_json,
post_metadata_txt_pattern=post_metadata_txt_pattern,
storyitem_metadata_txt_pattern=storyitem_metadata_txt_pattern,
max_connection_attempts=args.max_connection_attempts) max_connection_attempts=args.max_connection_attempts)
_main(loader, _main(loader,
args.profile, args.profile,

View File

@ -11,7 +11,7 @@ from contextlib import contextmanager, suppress
from datetime import datetime from datetime import datetime
from functools import wraps from functools import wraps
from io import BytesIO from io import BytesIO
from typing import Callable, Dict, Iterator, List, Optional, Union from typing import Callable, Dict, Iterator, List, Optional, Any
from .exceptions import * from .exceptions import *
from .instaloadercontext import InstaloaderContext from .instaloadercontext import InstaloaderContext
@ -45,22 +45,14 @@ def _requires_login(func: Callable) -> Callable:
return call return call
class _PostPathFormatter(string.Formatter): class _ArbitraryItemFormatter(string.Formatter):
def __init__(self, post: Union[Post, StoryItem], target: str): def __init__(self, item: Any):
self._post = post self._item = item
self._target = target
def vformat(self, format_string, args, kwargs):
"""Override :meth:`string.Formatter.vformat` for character substitution in paths for Windows, see issue #84."""
ret = super().vformat(format_string, args, kwargs)
return ret.replace(':', '\ua789') if platform.system() == 'Windows' else ret
def get_field(self, field_name, args, kwargs): def get_field(self, field_name, args, kwargs):
"""Override :meth:`string.Formatter.get_field` to substitue {target} and {<POST_ATTRIBUTE>}.""" """Override to substitute {ATTRIBUTE} by attributes of our _item."""
if field_name == "target": if hasattr(self._item, field_name):
return self._target, None return self._item.__getattribute__(field_name), None
if hasattr(Post, field_name) and hasattr(StoryItem, field_name):
return self._post.__getattribute__(field_name), None
return super().get_field(field_name, args, kwargs) return super().get_field(field_name, args, kwargs)
def format_field(self, value, format_spec): def format_field(self, value, format_spec):
@ -71,6 +63,13 @@ class _PostPathFormatter(string.Formatter):
return super().format_field(value, format_spec) return super().format_field(value, format_spec)
class _PostPathFormatter(_ArbitraryItemFormatter):
def vformat(self, format_string, args, kwargs):
"""Override :meth:`string.Formatter.vformat` for character substitution in paths for Windows, see issue #84."""
ret = super().vformat(format_string, args, kwargs)
return ret.replace(':', '\ua789') if platform.system() == 'Windows' else ret
class Instaloader: class Instaloader:
def __init__(self, def __init__(self,
@ -85,6 +84,8 @@ class Instaloader:
download_comments: bool = True, download_comments: bool = True,
save_metadata: bool = True, save_metadata: bool = True,
compress_json: bool = True, compress_json: bool = True,
post_metadata_txt_pattern: str = None,
storyitem_metadata_txt_pattern: str = None,
max_connection_attempts: int = 3): max_connection_attempts: int = 3):
self.context = InstaloaderContext(sleep, quiet, user_agent, max_connection_attempts) self.context = InstaloaderContext(sleep, quiet, user_agent, max_connection_attempts)
@ -99,17 +100,17 @@ class Instaloader:
self.download_comments = download_comments self.download_comments = download_comments
self.save_metadata = save_metadata self.save_metadata = save_metadata
self.compress_json = compress_json self.compress_json = compress_json
self.post_metadata_txt_pattern = post_metadata_txt_pattern or '{caption}'
self.storyitem_metadata_txt_pattern = storyitem_metadata_txt_pattern or ''
@contextmanager @contextmanager
def anonymous_copy(self): def anonymous_copy(self):
"""Yield an anonymous, otherwise equally-configured copy of an Instaloader instance; Then copy its error log.""" """Yield an anonymous, otherwise equally-configured copy of an Instaloader instance; Then copy its error log."""
new_loader = Instaloader(self.context.sleep, self.context.quiet, self.context.user_agent, new_loader = Instaloader(self.context.sleep, self.context.quiet, self.context.user_agent, self.dirname_pattern,
self.dirname_pattern, self.filename_pattern, self.filename_pattern, self.download_videos, self.download_video_thumbnails,
self.download_videos, self.download_geotags, self.save_captions, self.download_comments, self.save_metadata,
self.download_video_thumbnails, self.compress_json, self.post_metadata_txt_pattern,
self.download_geotags, self.storyitem_metadata_txt_pattern, self.context.max_connection_attempts)
self.save_captions, self.download_comments,
self.save_metadata, self.compress_json, self.context.max_connection_attempts)
new_loader.context.previous_queries = self.context.previous_queries new_loader.context.previous_queries = self.context.previous_queries
yield new_loader yield new_loader
self.context.error_log.extend(new_loader.context.error_log) self.context.error_log.extend(new_loader.context.error_log)
@ -177,6 +178,7 @@ class Instaloader:
def save_caption(self, filename: str, mtime: datetime, caption: str) -> None: def save_caption(self, filename: str, mtime: datetime, caption: str) -> None:
"""Updates picture caption""" """Updates picture caption"""
filename += '.txt' filename += '.txt'
caption += '\n'
pcaption = caption.replace('\n', ' ').strip() pcaption = caption.replace('\n', ' ').strip()
caption = caption.encode("UTF-8") caption = caption.encode("UTF-8")
pcaption = '[' + ((pcaption[:29] + u"\u2026") if len(pcaption) > 31 else pcaption) + ']' pcaption = '[' + ((pcaption[:29] + u"\u2026") if len(pcaption) > 31 else pcaption) + ']'
@ -290,8 +292,8 @@ class Instaloader:
:return: True if something was downloaded, False otherwise, i.e. file was already there :return: True if something was downloaded, False otherwise, i.e. file was already there
""" """
dirname = _PostPathFormatter(post, target).format(self.dirname_pattern) dirname = _PostPathFormatter(post).format(self.dirname_pattern, target=target)
filename = dirname + '/' + _PostPathFormatter(post, target).format(self.filename_pattern) filename = dirname + '/' + _PostPathFormatter(post).format(self.filename_pattern, target=target)
os.makedirs(os.path.dirname(filename), exist_ok=True) os.makedirs(os.path.dirname(filename), exist_ok=True)
# Download the image(s) / video thumbnail and videos within sidecars if desired # Download the image(s) / video thumbnail and videos within sidecars if desired
@ -318,10 +320,11 @@ class Instaloader:
# Save caption if desired # Save caption if desired
if self.save_captions is not False: if self.save_captions is not False:
if post.caption: metadata_string = _ArbitraryItemFormatter(post).format(self.post_metadata_txt_pattern)
self.save_caption(filename=filename, mtime=post.date_local, caption=post.caption) if metadata_string:
self.save_caption(filename=filename, mtime=post.date_local, caption=metadata_string)
else: else:
self.context.log("<no caption>", end=' ', flush=True) self.context.log("<no txt>", end=' ', flush=True)
# Download video if desired # Download video if desired
if post.is_video and self.download_videos is True: if post.is_video and self.download_videos is True:
@ -405,8 +408,8 @@ class Instaloader:
""" """
date_local = item.date_local date_local = item.date_local
dirname = _PostPathFormatter(item, target).format(self.dirname_pattern) dirname = _PostPathFormatter(item).format(self.dirname_pattern, target=target)
filename = dirname + '/' + _PostPathFormatter(item, target).format(self.filename_pattern) filename = dirname + '/' + _PostPathFormatter(item).format(self.filename_pattern, target=target)
os.makedirs(os.path.dirname(filename), exist_ok=True) os.makedirs(os.path.dirname(filename), exist_ok=True)
downloaded = False downloaded = False
if not item.is_video or self.download_video_thumbnails is True: if not item.is_video or self.download_video_thumbnails is True:
@ -414,6 +417,13 @@ class Instaloader:
downloaded = self.download_pic(filename=filename, url=url, mtime=date_local) downloaded = self.download_pic(filename=filename, url=url, mtime=date_local)
if item.is_video and self.download_videos is True: if item.is_video and self.download_videos is True:
downloaded |= self.download_pic(filename=filename, url=item.video_url, mtime=date_local) downloaded |= self.download_pic(filename=filename, url=item.video_url, mtime=date_local)
# Save caption if desired
if self.save_captions is not False:
metadata_string = _ArbitraryItemFormatter(item).format(self.storyitem_metadata_txt_pattern)
if metadata_string:
self.save_caption(filename=filename, mtime=item.date_local, caption=metadata_string)
else:
self.context.log("<no txt>", end=' ', flush=True)
# Save metadata as JSON if desired. # Save metadata as JSON if desired.
if self.save_metadata is not False: if self.save_metadata is not False:
self.save_metadata_json(filename, item) self.save_metadata_json(filename, item)