Fix and globalize regex
This commit is contained in:
parent
0c21da8c18
commit
8784ac7d9b
@ -73,6 +73,22 @@ PostLocation.has_public_page.__doc__ = "Whether location has a public page."
|
|||||||
PostLocation.lat.__doc__ = "Latitude (:class:`float` or None)."
|
PostLocation.lat.__doc__ = "Latitude (:class:`float` or None)."
|
||||||
PostLocation.lng.__doc__ = "Longitude (:class:`float` or None)."
|
PostLocation.lng.__doc__ = "Longitude (:class:`float` or None)."
|
||||||
|
|
||||||
|
# This regular expression is by MiguelX413
|
||||||
|
_hashtag_regex = re.compile(r"(?:#)((?:\w){1,150})")
|
||||||
|
|
||||||
|
# This regular expression is modified from jStassen, adjusted to use Python's \w to
|
||||||
|
# support Unicode and a word/beginning of string delimiter at the beginning to ensure
|
||||||
|
# that no email addresses join the list of mentions.
|
||||||
|
# http://blog.jstassen.com/2016/03/code-regex-for-instagram-username-and-hashtags/
|
||||||
|
_mention_regex = re.compile(r"(?:^|[^\w\n]|_)(?:@)(\w(?:(?:\w|(?:\.(?!\.))){0,28}(?:\w))?)", re.ASCII)
|
||||||
|
|
||||||
|
|
||||||
|
def _optional_normalize(string: Optional[str]) -> Optional[str]:
|
||||||
|
if string is not None:
|
||||||
|
return normalize("NFC", string)
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
class Post:
|
class Post:
|
||||||
"""
|
"""
|
||||||
@ -396,16 +412,10 @@ class Post:
|
|||||||
@property
|
@property
|
||||||
def caption(self) -> Optional[str]:
|
def caption(self) -> Optional[str]:
|
||||||
"""Caption."""
|
"""Caption."""
|
||||||
def _normalize(string: Optional[str]) -> Optional[str]:
|
|
||||||
if string is not None:
|
|
||||||
return normalize("NFC", string)
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
|
|
||||||
if "edge_media_to_caption" in self._node and self._node["edge_media_to_caption"]["edges"]:
|
if "edge_media_to_caption" in self._node and self._node["edge_media_to_caption"]["edges"]:
|
||||||
return _normalize(self._node["edge_media_to_caption"]["edges"][0]["node"]["text"])
|
return _optional_normalize(self._node["edge_media_to_caption"]["edges"][0]["node"]["text"])
|
||||||
elif "caption" in self._node:
|
elif "caption" in self._node:
|
||||||
return _normalize(self._node["caption"])
|
return _optional_normalize(self._node["caption"])
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@ -413,22 +423,14 @@ class Post:
|
|||||||
"""List of all lowercased hashtags (without preceeding #) that occur in the Post's caption."""
|
"""List of all lowercased hashtags (without preceeding #) that occur in the Post's caption."""
|
||||||
if not self.caption:
|
if not self.caption:
|
||||||
return []
|
return []
|
||||||
# This regular expression is from jStassen, adjusted to use Python's \w to support Unicode
|
return _hashtag_regex.findall(self.caption.lower())
|
||||||
# http://blog.jstassen.com/2016/03/code-regex-for-instagram-username-and-hashtags/
|
|
||||||
hashtag_regex = re.compile(r"(?:#)(\w(?:(?:\w|(?:\.(?!\.))){0,28}(?:\w))?)")
|
|
||||||
return re.findall(hashtag_regex, self.caption.lower())
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def caption_mentions(self) -> List[str]:
|
def caption_mentions(self) -> List[str]:
|
||||||
"""List of all lowercased profiles that are mentioned in the Post's caption, without preceeding @."""
|
"""List of all lowercased profiles that are mentioned in the Post's caption, without preceeding @."""
|
||||||
if not self.caption:
|
if not self.caption:
|
||||||
return []
|
return []
|
||||||
# This regular expression is modified from jStassen, adjusted to use Python's \w to
|
return _mention_regex.findall(self.caption.lower())
|
||||||
# support Unicode and a word/beginning of string delimiter at the beginning to ensure
|
|
||||||
# that no email addresses join the list of mentions.
|
|
||||||
# http://blog.jstassen.com/2016/03/code-regex-for-instagram-username-and-hashtags/
|
|
||||||
mention_regex = re.compile(r"(?:^|\W|_)(?:@)(\w(?:(?:\w|(?:\.(?!\.))){0,28}(?:\w))?)", re.ASCII)
|
|
||||||
return re.findall(mention_regex, self.caption.lower())
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def pcaption(self) -> str:
|
def pcaption(self) -> str:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user