From 9d4df0d5fb7fedace73a5229db9ef12eb27961ee Mon Sep 17 00:00:00 2001 From: fireattack Date: Fri, 22 May 2020 11:30:55 -0500 Subject: [PATCH] Only substitute characters from auto generated fields (#631) Only substitute characters from auto generated fields in _PostPathFormatter This fixes issues of path problem on Windows such as #304, #529, etc. Also made the following changes: 1. Changes substitution of colon to full-width colon (U+FF1A) instead of MODIFIER LETTER COLON (U+A789) to be consistent with others (we use full-width backslash, pipe, etc. already). 2. Also replaces `\r` on Windows with space, just like `\n`. --- instaloader/instaloader.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/instaloader/instaloader.py b/instaloader/instaloader.py index 62652e1..1ef41a3 100644 --- a/instaloader/instaloader.py +++ b/instaloader/instaloader.py @@ -103,22 +103,20 @@ class _ArbitraryItemFormatter(string.Formatter): class _PostPathFormatter(_ArbitraryItemFormatter): - def vformat(self, format_string, args, kwargs): - """Override :meth:`string.Formatter.vformat` for character substitution in paths for Windows, see issue #84.""" - ret = super().vformat(format_string, args, kwargs) - if platform.system() == 'Windows': - ret = ret.replace(':', '\ua789').replace('<', '\ufe64').replace('>', '\ufe65').replace('\"', '\uff02') - ret = ret.replace('\\', '\ufe68').replace('|', '\uff5c').replace('?', '\ufe16').replace('*', '\uff0a') - return ret - def get_value(self, key, args, kwargs): - """Replaces '/' with similar looking Division Slash and on windows newline with space""" ret = super().get_value(key, args, kwargs) if not isinstance(ret, str): return ret + return self.sanitize_path(ret) + + @staticmethod + def sanitize_path(ret: str) -> str: + """Replaces '/' with similar looking Division Slash and some other illegal filename characters on Windows.""" ret = ret.replace('/', '\u2215') if platform.system() == 'Windows': - ret = ret.replace('\n', ' ') + ret = ret.replace(':', '\uff1a').replace('<', '\ufe64').replace('>', '\ufe65').replace('\"', '\uff02') + ret = ret.replace('\\', '\ufe68').replace('|', '\uff5c').replace('?', '\ufe16').replace('*', '\uff0a') + ret = ret.replace('\n', ' ').replace('\r', ' ') return ret @@ -681,7 +679,8 @@ class Instaloader: name = user_highlight.owner_username highlight_target = (filename_target if filename_target - else Path(name) / Path(user_highlight.title)) # type: Union[str, Path] + else (Path(_PostPathFormatter.sanitize_path(name)) / + _PostPathFormatter.sanitize_path(user_highlight.title))) # type: Union[str, Path] self.context.log("Retrieving highlights \"{}\" from profile {}".format(user_highlight.title, name)) self.download_highlight_cover(user_highlight, highlight_target) totalcount = user_highlight.itemcount @@ -942,8 +941,10 @@ class Instaloader: .. versionadded:: 4.1""" self.context.log("Retrieving tagged posts for profile {}.".format(profile.username)) self.posts_download_loop(profile.get_tagged_posts(), - target if target else Path(profile.username) / Path(':tagged'), fast_update, - post_filter) + target if target + else (Path(_PostPathFormatter.sanitize_path(profile.username)) / + _PostPathFormatter.sanitize_path(':tagged')), + fast_update, post_filter) def download_igtv(self, profile: Profile, fast_update: bool = False, post_filter: Optional[Callable[[Post], bool]] = None) -> None: