From 463ada519e92fc5688c7b229a1c247dcadc2a244 Mon Sep 17 00:00:00 2001 From: MiguelX413 Date: Thu, 17 Mar 2022 15:06:26 +0000 Subject: [PATCH] Add unicode normalization of captions (#1453) --- instaloader/structures.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/instaloader/structures.py b/instaloader/structures.py index 59f0b31..8b1f363 100644 --- a/instaloader/structures.py +++ b/instaloader/structures.py @@ -6,6 +6,7 @@ from collections import namedtuple from datetime import datetime from pathlib import Path from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple, Union +from unicodedata import normalize from . import __version__ from .exceptions import * @@ -322,9 +323,9 @@ class Post: def caption(self) -> Optional[str]: """Caption.""" if "edge_media_to_caption" in self._node and self._node["edge_media_to_caption"]["edges"]: - return self._node["edge_media_to_caption"]["edges"][0]["node"]["text"] + return normalize("NFC", self._node["edge_media_to_caption"]["edges"][0]["node"]["text"]) elif "caption" in self._node: - return self._node["caption"] + return normalize("NFC", self._node["caption"]) return None @property