Merge branch 'bug_69_fix'
This commit is contained in:
commit
882d460a67
116
instaloader.py
116
instaloader.py
@ -267,7 +267,7 @@ class Post:
|
|||||||
return int(self._field('owner', 'id'))
|
return int(self._field('owner', 'id'))
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def date(self) -> datetime:
|
def date_local(self) -> datetime:
|
||||||
"""Timestamp when the post was created (local time zone)."""
|
"""Timestamp when the post was created (local time zone)."""
|
||||||
return datetime.fromtimestamp(self._node["date"] if "date" in self._node else self._node["taken_at_timestamp"])
|
return datetime.fromtimestamp(self._node["date"] if "date" in self._node else self._node["taken_at_timestamp"])
|
||||||
|
|
||||||
@ -436,11 +436,16 @@ class Instaloader:
|
|||||||
self.quiet = quiet
|
self.quiet = quiet
|
||||||
self.dirname_pattern = dirname_pattern if dirname_pattern is not None else '{target}'
|
self.dirname_pattern = dirname_pattern if dirname_pattern is not None else '{target}'
|
||||||
if filename_pattern is not None:
|
if filename_pattern is not None:
|
||||||
self.filename_pattern = filename_pattern \
|
filename_pattern = re.sub(r"(\{(?:post\.)?date)([:}])", r"\1_utc\2", filename_pattern)
|
||||||
.replace('{date}', '{date:%Y-%m-%d_%H-%M-%S}') \
|
self.filename_pattern_old = filename_pattern.replace('{date_utc}', '{date_utc:%Y-%m-%d_%H-%M-%S}')
|
||||||
.replace('{date_utc}', '{date_utc:%Y-%m-%d_%H-%M-%S_UTC}')
|
self.filename_pattern_old = re.sub(r"(?i)(\{(?:post\.)?date_utc:[^}]*?)_UTC",
|
||||||
|
r"\1", self.filename_pattern_old)
|
||||||
|
filename_pattern = re.sub(r"(?i)(\{(date_utc|post\.date_utc):(?![^}]*UTC[^}]*).*?)}",
|
||||||
|
r"\1_UTC}", filename_pattern)
|
||||||
|
self.filename_pattern = filename_pattern.replace('{date_utc}', '{date_utc:%Y-%m-%d_%H-%M-%S_UTC}')
|
||||||
else:
|
else:
|
||||||
self.filename_pattern = '{date:%Y-%m-%d_%H-%M-%S}'
|
self.filename_pattern = '{date_utc:%Y-%m-%d_%H-%M-%S_UTC}'
|
||||||
|
self.filename_pattern_old = '{date_utc:%Y-%m-%d_%H-%M-%S}'
|
||||||
self.download_videos = download_videos
|
self.download_videos = download_videos
|
||||||
self.download_video_thumbnails = download_video_thumbnails
|
self.download_video_thumbnails = download_video_thumbnails
|
||||||
self.download_geotags = download_geotags
|
self.download_geotags = download_geotags
|
||||||
@ -725,17 +730,24 @@ class Instaloader:
|
|||||||
lambda d: d['data']['user']['edge_follow'])
|
lambda d: d['data']['user']['edge_follow'])
|
||||||
|
|
||||||
def download_pic(self, filename: str, url: str, mtime: datetime,
|
def download_pic(self, filename: str, url: str, mtime: datetime,
|
||||||
filename_suffix: Optional[str] = None) -> bool:
|
filename_alt: Optional[str] = None, filename_suffix: Optional[str] = None) -> bool:
|
||||||
"""Downloads and saves picture with given url under given directory with given timestamp.
|
"""Downloads and saves picture with given url under given directory with given timestamp.
|
||||||
Returns true, if file was actually downloaded, i.e. updated."""
|
Returns true, if file was actually downloaded, i.e. updated."""
|
||||||
urlmatch = re.search('\\.[a-z0-9]*\\?', url)
|
urlmatch = re.search('\\.[a-z0-9]*\\?', url)
|
||||||
file_extension = url[-3:] if urlmatch is None else urlmatch.group(0)[1:-1]
|
file_extension = url[-3:] if urlmatch is None else urlmatch.group(0)[1:-1]
|
||||||
if filename_suffix is not None:
|
if filename_suffix is not None:
|
||||||
filename += '_' + filename_suffix
|
filename += '_' + filename_suffix
|
||||||
|
if filename_alt is not None:
|
||||||
|
filename_alt += '_' + filename_suffix
|
||||||
filename += '.' + file_extension
|
filename += '.' + file_extension
|
||||||
if os.path.isfile(filename):
|
if os.path.isfile(filename):
|
||||||
self._log(filename + ' exists', end=' ', flush=True)
|
self._log(filename + ' exists', end=' ', flush=True)
|
||||||
return False
|
return False
|
||||||
|
if filename_alt is not None:
|
||||||
|
filename_alt += '.' + file_extension
|
||||||
|
if os.path.isfile(filename_alt):
|
||||||
|
self._log(filename_alt + 'exists', end=' ', flush=True)
|
||||||
|
return False
|
||||||
self._get_and_write_raw(url, filename)
|
self._get_and_write_raw(url, filename)
|
||||||
os.utime(filename, (datetime.now().timestamp(), mtime.timestamp()))
|
os.utime(filename, (datetime.now().timestamp(), mtime.timestamp()))
|
||||||
return True
|
return True
|
||||||
@ -746,15 +758,20 @@ class Instaloader:
|
|||||||
json.dump(post, fp=open(filename, 'w'), indent=4, default=Post.json_encoder)
|
json.dump(post, fp=open(filename, 'w'), indent=4, default=Post.json_encoder)
|
||||||
self._log('json', end=' ', flush=True)
|
self._log('json', end=' ', flush=True)
|
||||||
|
|
||||||
def update_comments(self, filename: str, post: Post) -> None:
|
def update_comments(self, filename: str, post: Post, filename_alt: Optional[str] = None) -> None:
|
||||||
filename += '_comments.json'
|
|
||||||
try:
|
try:
|
||||||
comments = json.load(open(filename))
|
filename_current = filename + '_comments.json'
|
||||||
|
comments = json.load(open(filename_current))
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
comments = list()
|
try:
|
||||||
|
filename_current = filename_alt + '_comments.json'
|
||||||
|
comments = json.load(open(filename_current))
|
||||||
|
except (FileNotFoundError, TypeError):
|
||||||
|
filename_current = filename + '_comments.json'
|
||||||
|
comments = list()
|
||||||
comments.extend(post.get_comments())
|
comments.extend(post.get_comments())
|
||||||
if comments:
|
if comments:
|
||||||
with open(filename, 'w') as file:
|
with open(filename_current, 'w') as file:
|
||||||
comments_list = sorted(sorted(list(comments), key=lambda t: t['id']),
|
comments_list = sorted(sorted(list(comments), key=lambda t: t['id']),
|
||||||
key=lambda t: t['created_at'], reverse=True)
|
key=lambda t: t['created_at'], reverse=True)
|
||||||
unique_comments_list = [comments_list[0]]
|
unique_comments_list = [comments_list[0]]
|
||||||
@ -767,17 +784,25 @@ class Instaloader:
|
|||||||
if x['id'] != y['id']:
|
if x['id'] != y['id']:
|
||||||
unique_comments_list.append(y)
|
unique_comments_list.append(y)
|
||||||
file.write(json.dumps(unique_comments_list, indent=4))
|
file.write(json.dumps(unique_comments_list, indent=4))
|
||||||
|
os.rename(filename_current, filename + '_comments.json')
|
||||||
self._log('comments', end=' ', flush=True)
|
self._log('comments', end=' ', flush=True)
|
||||||
|
|
||||||
def save_caption(self, filename: str, mtime: datetime, caption: str) -> None:
|
def save_caption(self, filename: str, mtime: datetime, caption: str, filename_alt: Optional[str] = None) -> None:
|
||||||
"""Updates picture caption"""
|
"""Updates picture caption"""
|
||||||
filename += '.txt'
|
filename += '.txt'
|
||||||
|
if filename_alt is not None:
|
||||||
|
filename_alt += '.txt'
|
||||||
pcaption = caption.replace('\n', ' ').strip()
|
pcaption = caption.replace('\n', ' ').strip()
|
||||||
caption = caption.encode("UTF-8")
|
caption = caption.encode("UTF-8")
|
||||||
pcaption = '[' + ((pcaption[:29] + u"\u2026") if len(pcaption) > 31 else pcaption) + ']'
|
pcaption = '[' + ((pcaption[:29] + u"\u2026") if len(pcaption) > 31 else pcaption) + ']'
|
||||||
with suppress(FileNotFoundError):
|
with suppress(FileNotFoundError):
|
||||||
with open(filename, 'rb') as file:
|
try:
|
||||||
file_caption = file.read()
|
with open(filename, 'rb') as file:
|
||||||
|
file_caption = file.read()
|
||||||
|
except FileNotFoundError:
|
||||||
|
if filename_alt is not None:
|
||||||
|
with open(filename_alt, 'rb') as file:
|
||||||
|
file_caption = file.read()
|
||||||
if file_caption.replace(b'\r\n', b'\n') == caption.replace(b'\r\n', b'\n'):
|
if file_caption.replace(b'\r\n', b'\n') == caption.replace(b'\r\n', b'\n'):
|
||||||
try:
|
try:
|
||||||
self._log(pcaption + ' unchanged', end=' ', flush=True)
|
self._log(pcaption + ' unchanged', end=' ', flush=True)
|
||||||
@ -785,15 +810,22 @@ class Instaloader:
|
|||||||
self._log('txt unchanged', end=' ', flush=True)
|
self._log('txt unchanged', end=' ', flush=True)
|
||||||
return None
|
return None
|
||||||
else:
|
else:
|
||||||
def get_filename(index):
|
def get_filename(file, index):
|
||||||
return filename if index == 0 else (filename[:-4] + '_old_' +
|
return file if index == 0 else (file[:-4] + '_old_' +
|
||||||
(str(0) if index < 10 else str()) + str(index) + filename[-4:])
|
(str(0) if index < 10 else str()) + str(index) + file[-4:])
|
||||||
|
|
||||||
i = 0
|
i = 0
|
||||||
while os.path.isfile(get_filename(i)):
|
file_exists_list = []
|
||||||
|
while True:
|
||||||
|
file_exists_list.append(1 if os.path.isfile(get_filename(filename, i)) else 0)
|
||||||
|
if not file_exists_list[i] and filename_alt is not None:
|
||||||
|
file_exists_list[i] = 2 if os.path.isfile(get_filename(filename_alt, i)) else 0
|
||||||
|
if not file_exists_list[i]:
|
||||||
|
break
|
||||||
i = i + 1
|
i = i + 1
|
||||||
for index in range(i, 0, -1):
|
for index in range(i, 0, -1):
|
||||||
os.rename(get_filename(index - 1), get_filename(index))
|
os.rename(get_filename(filename if file_exists_list[index - 1] % 2 else filename_alt, index - 1),
|
||||||
|
get_filename(filename, index))
|
||||||
try:
|
try:
|
||||||
self._log(pcaption + ' updated', end=' ', flush=True)
|
self._log(pcaption + ' updated', end=' ', flush=True)
|
||||||
except UnicodeEncodeError:
|
except UnicodeEncodeError:
|
||||||
@ -916,9 +948,14 @@ class Instaloader:
|
|||||||
profilename = post.owner_username if needs_profilename else None
|
profilename = post.owner_username if needs_profilename else None
|
||||||
dirname = self.dirname_pattern.format(profile=profilename, target=target.lower())
|
dirname = self.dirname_pattern.format(profile=profilename, target=target.lower())
|
||||||
filename = dirname + '/' + self.filename_pattern.format(profile=profilename, target=target.lower(),
|
filename = dirname + '/' + self.filename_pattern.format(profile=profilename, target=target.lower(),
|
||||||
date=post.date, date_utc=post.date_utc,
|
date_utc=post.date_utc,
|
||||||
shortcode=post.shortcode,
|
shortcode=post.shortcode,
|
||||||
post=post)
|
post=post)
|
||||||
|
filename_old = dirname + '/' + self.filename_pattern_old.replace("{post.date_utc", "{date_utc") \
|
||||||
|
.format(profile=profilename, target=target.lower(),
|
||||||
|
date_utc=post.date_local,
|
||||||
|
shortcode=post.shortcode,
|
||||||
|
post=post)
|
||||||
os.makedirs(os.path.dirname(filename), exist_ok=True)
|
os.makedirs(os.path.dirname(filename), exist_ok=True)
|
||||||
|
|
||||||
# Download the image(s) / video thumbnail and videos within sidecars if desired
|
# Download the image(s) / video thumbnail and videos within sidecars if desired
|
||||||
@ -929,44 +966,50 @@ class Instaloader:
|
|||||||
# Download picture or video thumbnail
|
# Download picture or video thumbnail
|
||||||
if not edge['node']['is_video'] or self.download_video_thumbnails is Tristate.always:
|
if not edge['node']['is_video'] or self.download_video_thumbnails is Tristate.always:
|
||||||
downloaded |= self.download_pic(filename=filename,
|
downloaded |= self.download_pic(filename=filename,
|
||||||
|
filename_alt=filename_old,
|
||||||
url=edge['node']['display_url'],
|
url=edge['node']['display_url'],
|
||||||
mtime=post.date,
|
mtime=post.date_local,
|
||||||
filename_suffix=str(edge_number))
|
filename_suffix=str(edge_number))
|
||||||
# Additionally download video if available and desired
|
# Additionally download video if available and desired
|
||||||
if edge['node']['is_video'] and self.download_videos is Tristate.always:
|
if edge['node']['is_video'] and self.download_videos is Tristate.always:
|
||||||
downloaded |= self.download_pic(filename=filename,
|
downloaded |= self.download_pic(filename=filename,
|
||||||
|
filename_alt=filename_old,
|
||||||
url=edge['node']['video_url'],
|
url=edge['node']['video_url'],
|
||||||
mtime=post.date,
|
mtime=post.date_local,
|
||||||
filename_suffix=str(edge_number))
|
filename_suffix=str(edge_number))
|
||||||
edge_number += 1
|
edge_number += 1
|
||||||
elif post.typename == 'GraphImage':
|
elif post.typename == 'GraphImage':
|
||||||
downloaded = self.download_pic(filename=filename, url=post.url, mtime=post.date)
|
downloaded = self.download_pic(filename=filename, filename_alt=filename_old,
|
||||||
|
url=post.url, mtime=post.date_local)
|
||||||
elif post.typename == 'GraphVideo':
|
elif post.typename == 'GraphVideo':
|
||||||
if self.download_video_thumbnails is Tristate.always:
|
if self.download_video_thumbnails is Tristate.always:
|
||||||
downloaded = self.download_pic(filename=filename, url=post.url, mtime=post.date)
|
downloaded = self.download_pic(filename=filename, filename_alt=filename_old,
|
||||||
|
url=post.url, mtime=post.date_local)
|
||||||
else:
|
else:
|
||||||
self.error("Warning: {0} has unknown typename: {1}".format(post, post.typename))
|
self.error("Warning: {0} has unknown typename: {1}".format(post, post.typename))
|
||||||
|
|
||||||
# Save caption if desired
|
# Save caption if desired
|
||||||
if self.save_captions is not Tristate.never:
|
if self.save_captions is not Tristate.never:
|
||||||
if post.caption:
|
if post.caption:
|
||||||
self.save_caption(filename, post.date, post.caption)
|
self.save_caption(filename=filename, filename_alt=filename_old,
|
||||||
|
mtime=post.date_local, caption=post.caption)
|
||||||
else:
|
else:
|
||||||
self._log("<no caption>", end=' ', flush=True)
|
self._log("<no caption>", end=' ', flush=True)
|
||||||
|
|
||||||
# Download video if desired
|
# Download video if desired
|
||||||
if post.is_video and self.download_videos is Tristate.always:
|
if post.is_video and self.download_videos is Tristate.always:
|
||||||
downloaded |= self.download_pic(filename=filename, url=post.video_url, mtime=post.date)
|
downloaded |= self.download_pic(filename=filename, filename_alt=filename_old,
|
||||||
|
url=post.video_url, mtime=post.date_local)
|
||||||
|
|
||||||
# Download geotags if desired
|
# Download geotags if desired
|
||||||
if self.download_geotags is Tristate.always:
|
if self.download_geotags is Tristate.always:
|
||||||
location = post.get_location()
|
location = post.get_location()
|
||||||
if location:
|
if location:
|
||||||
self.save_location(filename, location, post.date)
|
self.save_location(filename, location, post.date_local)
|
||||||
|
|
||||||
# Update comments if desired
|
# Update comments if desired
|
||||||
if self.download_comments is Tristate.always:
|
if self.download_comments is Tristate.always:
|
||||||
self.update_comments(filename, post)
|
self.update_comments(filename=filename, filename_alt=filename_old, post=post)
|
||||||
|
|
||||||
# Save metadata as JSON if desired. It might require an extra query, depending on which information has been
|
# Save metadata as JSON if desired. It might require an extra query, depending on which information has been
|
||||||
# already obtained. Regarding Tristate interpretation, we always assume that it requires an extra query.
|
# already obtained. Regarding Tristate interpretation, we always assume that it requires an extra query.
|
||||||
@ -1057,20 +1100,24 @@ class Instaloader:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
shortcode = item["code"] if "code" in item else "no_code"
|
shortcode = item["code"] if "code" in item else "no_code"
|
||||||
date = datetime.fromtimestamp(item["taken_at"])
|
date_local = datetime.fromtimestamp(item["taken_at"])
|
||||||
date_utc = datetime.utcfromtimestamp(item["taken_at"])
|
date_utc = datetime.utcfromtimestamp(item["taken_at"])
|
||||||
dirname = self.dirname_pattern.format(profile=profile, target=target)
|
dirname = self.dirname_pattern.format(profile=profile, target=target)
|
||||||
filename = dirname + '/' + self.filename_pattern.format(profile=profile, target=target,
|
filename = dirname + '/' + self.filename_pattern.format(profile=profile, target=target,
|
||||||
date=date, date_utc=date_utc,
|
date_utc=date_utc,
|
||||||
shortcode=shortcode)
|
shortcode=shortcode)
|
||||||
|
filename_old = dirname + '/' + self.filename_pattern_old.format(profile=profile, target=target,
|
||||||
|
date_utc=date_local,
|
||||||
|
shortcode=shortcode)
|
||||||
os.makedirs(os.path.dirname(filename), exist_ok=True)
|
os.makedirs(os.path.dirname(filename), exist_ok=True)
|
||||||
downloaded = False
|
downloaded = False
|
||||||
if "image_versions2" in item:
|
if "image_versions2" in item:
|
||||||
if "video_versions" not in item or self.download_video_thumbnails is Tristate.always:
|
if "video_versions" not in item or self.download_video_thumbnails is Tristate.always:
|
||||||
url = item["image_versions2"]["candidates"][0]["url"]
|
url = item["image_versions2"]["candidates"][0]["url"]
|
||||||
downloaded = self.download_pic(filename=filename,
|
downloaded = self.download_pic(filename=filename,
|
||||||
|
filename_alt=filename_old,
|
||||||
url=url,
|
url=url,
|
||||||
mtime=date)
|
mtime=date_local)
|
||||||
else:
|
else:
|
||||||
self._log("Warning: Unable to find story image.")
|
self._log("Warning: Unable to find story image.")
|
||||||
if "caption" in item and item["caption"] is not None and \
|
if "caption" in item and item["caption"] is not None and \
|
||||||
@ -1078,17 +1125,18 @@ class Instaloader:
|
|||||||
caption = item["caption"]
|
caption = item["caption"]
|
||||||
if isinstance(caption, dict) and "text" in caption:
|
if isinstance(caption, dict) and "text" in caption:
|
||||||
caption = caption["text"]
|
caption = caption["text"]
|
||||||
self.save_caption(filename, date, caption)
|
self.save_caption(filename=filename, filename_alt=filename_old, mtime=date_local, caption=caption)
|
||||||
else:
|
else:
|
||||||
self._log("<no caption>", end=' ', flush=True)
|
self._log("<no caption>", end=' ', flush=True)
|
||||||
if "video_versions" in item and self.download_videos is Tristate.always:
|
if "video_versions" in item and self.download_videos is Tristate.always:
|
||||||
downloaded |= self.download_pic(filename=filename,
|
downloaded |= self.download_pic(filename=filename,
|
||||||
|
filename_alt=filename_old,
|
||||||
url=item["video_versions"][0]["url"],
|
url=item["video_versions"][0]["url"],
|
||||||
mtime=date)
|
mtime=date_local)
|
||||||
if item["story_locations"] and self.download_geotags is not Tristate.never:
|
if item["story_locations"] and self.download_geotags is not Tristate.never:
|
||||||
location = item["story_locations"][0]["location"]
|
location = item["story_locations"][0]["location"]
|
||||||
if location:
|
if location:
|
||||||
self.save_location(filename, location, date)
|
self.save_location(filename, location, date_local)
|
||||||
self._log()
|
self._log()
|
||||||
return downloaded
|
return downloaded
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user