54 lines
2.2 KiB
Python
54 lines
2.2 KiB
Python
# -*- coding: utf-8 -*-
|
||
|
||
from cadpoint.settings import *
|
||
import re
|
||
|
||
|
||
def check_cookies(request) -> bool:
|
||
# проверка, что посетитель согласился со сбором данных через cookies
|
||
if request.COOKIES.get('cookie_accept'):
|
||
return False
|
||
return True
|
||
|
||
|
||
def safe_html_special_symbols(s: str) -> str:
|
||
""" Очистка строки от HTML-разметки типографа
|
||
|
||
:param s: строка которую надо очистить
|
||
:return: str:
|
||
"""
|
||
# очистка строки от некоторых спец-символов HTML
|
||
result = s.replace('­', '')
|
||
result = result.replace('<span class="laquo">', '')
|
||
result = result.replace('<span style="margin-right:0.44em;">', '')
|
||
result = result.replace('<span style="margin-left:-0.44em;">', '')
|
||
result = result.replace('<span class="raquo">', '')
|
||
result = result.replace('<span class="point">', '')
|
||
result = result.replace('<span class="thinsp">', ' ')
|
||
result = result.replace('<span class="ensp">', '')
|
||
result = result.replace('</span>', '')
|
||
result = result.replace(' ', ' ')
|
||
result = result.replace('«', '«')
|
||
result = result.replace('»', '»')
|
||
result = result.replace('…', '…')
|
||
result = result.replace('<nobr>', '')
|
||
result = result.replace('</nobr>', '')
|
||
result = result.replace('—', '—')
|
||
result = result.replace('№', '№')
|
||
result = result.replace('<br />', ' ')
|
||
result = result.replace('<br>', ' ')
|
||
return result
|
||
|
||
|
||
def post_processing_html(s: str) -> str:
|
||
s = re.sub(r"\s+", " ", s, flags=re.IGNORECASE)
|
||
s = re.sub(r">\s+|> ", "> ", s, flags=re.IGNORECASE)
|
||
s = re.sub(r"\n|\r|<p[^>]*>\s*</p>|<p> </p>", "", s, flags=re.IGNORECASE)
|
||
s = re.sub(r"</p>\s*<br[^>]*>", "</p>", s, flags=re.IGNORECASE)
|
||
s = re.sub(r"<br[^>]*>\s*<p>|<p[^>]*>\s*<p[^>]*>", "<p>", s, flags=re.IGNORECASE)
|
||
s = re.sub(r"</p>\s*</p>", "</p>", s, flags=re.IGNORECASE)
|
||
s = re.sub(r"<br[^>]*>\s*<br[^>]*>", "<br />", s, flags=re.IGNORECASE)
|
||
s = re.sub(r"<p><blockquote>", "<blockquote>", s, flags=re.IGNORECASE)
|
||
s = re.sub(r"</blockquote></p>", "</blockquote>", s, flags=re.IGNORECASE)
|
||
return s
|