Fix and globalize regex
This commit is contained in:
parent
0c21da8c18
commit
8784ac7d9b
@ -73,6 +73,22 @@ PostLocation.has_public_page.__doc__ = "Whether location has a public page."
|
||||
PostLocation.lat.__doc__ = "Latitude (:class:`float` or None)."
|
||||
PostLocation.lng.__doc__ = "Longitude (:class:`float` or None)."
|
||||
|
||||
# This regular expression is by MiguelX413
|
||||
_hashtag_regex = re.compile(r"(?:#)((?:\w){1,150})")
|
||||
|
||||
# This regular expression is modified from jStassen, adjusted to use Python's \w to
|
||||
# support Unicode and a word/beginning of string delimiter at the beginning to ensure
|
||||
# that no email addresses join the list of mentions.
|
||||
# http://blog.jstassen.com/2016/03/code-regex-for-instagram-username-and-hashtags/
|
||||
_mention_regex = re.compile(r"(?:^|[^\w\n]|_)(?:@)(\w(?:(?:\w|(?:\.(?!\.))){0,28}(?:\w))?)", re.ASCII)
|
||||
|
||||
|
||||
def _optional_normalize(string: Optional[str]) -> Optional[str]:
|
||||
if string is not None:
|
||||
return normalize("NFC", string)
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
class Post:
|
||||
"""
|
||||
@ -396,16 +412,10 @@ class Post:
|
||||
@property
|
||||
def caption(self) -> Optional[str]:
|
||||
"""Caption."""
|
||||
def _normalize(string: Optional[str]) -> Optional[str]:
|
||||
if string is not None:
|
||||
return normalize("NFC", string)
|
||||
else:
|
||||
return None
|
||||
|
||||
if "edge_media_to_caption" in self._node and self._node["edge_media_to_caption"]["edges"]:
|
||||
return _normalize(self._node["edge_media_to_caption"]["edges"][0]["node"]["text"])
|
||||
return _optional_normalize(self._node["edge_media_to_caption"]["edges"][0]["node"]["text"])
|
||||
elif "caption" in self._node:
|
||||
return _normalize(self._node["caption"])
|
||||
return _optional_normalize(self._node["caption"])
|
||||
return None
|
||||
|
||||
@property
|
||||
@ -413,22 +423,14 @@ class Post:
|
||||
"""List of all lowercased hashtags (without preceeding #) that occur in the Post's caption."""
|
||||
if not self.caption:
|
||||
return []
|
||||
# This regular expression is from jStassen, adjusted to use Python's \w to support Unicode
|
||||
# http://blog.jstassen.com/2016/03/code-regex-for-instagram-username-and-hashtags/
|
||||
hashtag_regex = re.compile(r"(?:#)(\w(?:(?:\w|(?:\.(?!\.))){0,28}(?:\w))?)")
|
||||
return re.findall(hashtag_regex, self.caption.lower())
|
||||
return _hashtag_regex.findall(self.caption.lower())
|
||||
|
||||
@property
|
||||
def caption_mentions(self) -> List[str]:
|
||||
"""List of all lowercased profiles that are mentioned in the Post's caption, without preceeding @."""
|
||||
if not self.caption:
|
||||
return []
|
||||
# This regular expression is modified from jStassen, adjusted to use Python's \w to
|
||||
# support Unicode and a word/beginning of string delimiter at the beginning to ensure
|
||||
# that no email addresses join the list of mentions.
|
||||
# http://blog.jstassen.com/2016/03/code-regex-for-instagram-username-and-hashtags/
|
||||
mention_regex = re.compile(r"(?:^|\W|_)(?:@)(\w(?:(?:\w|(?:\.(?!\.))){0,28}(?:\w))?)", re.ASCII)
|
||||
return re.findall(mention_regex, self.caption.lower())
|
||||
return _mention_regex.findall(self.caption.lower())
|
||||
|
||||
@property
|
||||
def pcaption(self) -> str:
|
||||
|
Loading…
Reference in New Issue
Block a user