diff --git a/cadpoint/web/admin.py b/cadpoint/web/admin.py
index 12d026e..5b5fb45 100644
--- a/cadpoint/web/admin.py
+++ b/cadpoint/web/admin.py
@@ -5,11 +5,25 @@ from django.db import models
from django.forms import TextInput, Textarea
from django.urls import reverse
from django_select2.forms import Select2TagWidget
+from etpgrf.config import MODE_MIXED, MODE_MNEMONIC, MODE_UNICODE, SANITIZE_ALL_HTML, SANITIZE_ETPGRF
from web.models import TbContent
from web.add_function import safe_html_special_symbols
from cadpoint import settings
+TYPOGRAPH_MODE_CHOICES = [
+ (MODE_MIXED, 'Смешанный (Mixed)'),
+ (MODE_UNICODE, 'Юникод (Unicode)'),
+ (MODE_MNEMONIC, 'Мнемоники'),
+]
+
+TYPOGRAPH_SANITIZER_CHOICES = [
+ (SANITIZE_ALL_HTML, 'Очистка от HTML на входе'),
+ (SANITIZE_ETPGRF, 'Очистка висячей пунктуации'),
+ ('None', 'Без очистки'),
+]
+
+
class AjaxCommaSeparatedSelect2TagWidget(Select2TagWidget):
"""
Select2-виджет для `taggit`.
@@ -48,9 +62,46 @@ class AjaxCommaSeparatedSelect2TagWidget(Select2TagWidget):
class AdminContentForm(forms.ModelForm):
+ typograph_enabled = forms.BooleanField(
+ label='Типограф etpgrf вкл.',
+ required=False,
+ initial=True,
+ help_text="Обработать через Типограф ETPRGF
"
+ "СТАБИЛЬНЫЙ И СОВРЕМЕННЫЙ ТИПОГРАФ, РЕКОМЕНДУЕМ
"
+ "«приклеивает» союзы и предлоги, поддерживает неразрывные конструкции, "
+ "замена тире, кавычек и дефисов, расстановка «мягких переносов» "
+ "в словах длиннее 14 символов, висячая "
+ "пунктуация (только в заголовках) и т.п."
+ )
+ typograph_strip_soft_hyphens = forms.BooleanField(
+ label='Удалять переносы',
+ required=False,
+ initial=False,
+ help_text='Убирает `­`, `­` и Unicode-символ мягкого переноса
'
+ 'перед типографом.',
+ )
+ typograph_mode = forms.ChoiceField(
+ label='Режим вывода',
+ choices=TYPOGRAPH_MODE_CHOICES,
+ initial=MODE_MIXED,
+ )
+ typograph_hyphenation = forms.BooleanField(
+ label='Расстановка переносов',
+ required=False,
+ initial=True,
+ )
+ typograph_sanitizer = forms.ChoiceField(
+ label='Санитайзинг',
+ choices=TYPOGRAPH_SANITIZER_CHOICES,
+ initial='None',
+ )
+
class Meta:
model = TbContent
- fields = '__all__'
+ exclude = ('bTypograf',)
class Media:
css = {
@@ -59,6 +110,7 @@ class AdminContentForm(forms.ModelForm):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
+ self.fields['typograph_enabled'].initial = self.instance.bTypograf
# AJAX-виджет подгружает список тегов лениво, а здесь мы оставляем
# только уже выбранные значения, чтобы не тащить все теги из базы при
# открытии формы и не провоцировать лишние запросы к SQLite.
@@ -117,7 +169,11 @@ class AdminContent(admin.ModelAdmin):
'fields': ('tags', 'szContentHead', 'imgContentPreview', 'szContentIntro', 'szContentBody')
}),
('Типограф', {
- 'fields': ('bTypograf', ),
+ 'fields': (
+ ('typograph_enabled', ),
+ ('typograph_mode', 'typograph_sanitizer', ),
+ ('typograph_strip_soft_hyphens', 'typograph_hyphenation', ),
+ ),
'classes': ('collapse',),
}),
('Поля для SEO', {
@@ -130,6 +186,14 @@ class AdminContent(admin.ModelAdmin):
actions_on_top = False
actions_on_bottom = False
+ def save_model(self, request, obj, form, change):
+ obj.bTypograf = form.cleaned_data.get('typograph_enabled', False)
+ obj._typograph_strip_soft_hyphens = form.cleaned_data.get('typograph_strip_soft_hyphens', True)
+ obj._typograph_mode = form.cleaned_data.get('typograph_mode', MODE_MIXED)
+ obj._typograph_hyphenation = form.cleaned_data.get('typograph_hyphenation', True)
+ obj._typograph_sanitizer = form.cleaned_data.get('typograph_sanitizer', 'None')
+ super().save_model(request, obj, form, change)
+
def ContentHeadSafe(self, obj) -> str:
return safe_html_special_symbols(obj.szContentHead)
diff --git a/cadpoint/web/models.py b/cadpoint/web/models.py
index afa9e03..d55bfaf 100644
--- a/cadpoint/web/models.py
+++ b/cadpoint/web/models.py
@@ -6,6 +6,14 @@ import logging
from django.db import models
from django.utils.timezone import now
from etpgrf import Hyphenator, Typographer
+from etpgrf.config import (
+ MODE_MIXED,
+ MODE_MNEMONIC,
+ MODE_UNICODE,
+ SANITIZE_ALL_HTML,
+ SANITIZE_ETPGRF,
+ SANITIZE_NONE,
+)
from filer.fields.image import FilerFileField
from taggit.managers import TaggableManager
from taggit.models import Tag, TaggedItem
@@ -20,15 +28,61 @@ logger = logging.getLogger(__name__)
_TYPOGRAPHER_LANGS = 'ru+en'
_TYPOGRAPHER_MAX_UNHYPHENATED_LEN = 14
-def _build_typographer(hanging_punctuation=None) -> Typographer:
+_TYPOGRAPHER_DEFAULT_MODE = MODE_MIXED
+_TYPOGRAPHER_DEFAULT_HYPHENATION = True
+_TYPOGRAPHER_DEFAULT_SANITIZER = SANITIZE_NONE
+_TYPOGRAPHER_DEFAULT_STRIP_SOFT_HYPHENS = True
+
+
+def _normalize_typograph_mode(value: str | None) -> str:
+ if value in {MODE_MIXED, MODE_UNICODE, MODE_MNEMONIC}:
+ return str(value)
+ return _TYPOGRAPHER_DEFAULT_MODE
+
+
+def _normalize_typograph_hyphenation(value) -> bool:
+ return bool(_TYPOGRAPHER_DEFAULT_HYPHENATION if value is None else value)
+
+
+def _normalize_typograph_sanitizer(value):
+ if value in (None, '', 'None', SANITIZE_NONE):
+ return SANITIZE_NONE
+ if value == SANITIZE_ALL_HTML:
+ return SANITIZE_ALL_HTML
+ if value == SANITIZE_ETPGRF:
+ return SANITIZE_ETPGRF
+ return SANITIZE_NONE
+
+
+def _strip_soft_hyphens(text: str) -> str:
+ """Удаляет мягкие переносы в любом виде перед передачей текста в etpgrf."""
+ if not text:
+ return text
+ return (
+ text
+ .replace("", "")
+ .replace("", "")
+ .replace("", "")
+ .replace("\u00ad", "")
+ )
+
+
+def _build_typographer(mode=None, hyphenation=True, sanitizer=None, hanging_punctuation=None) -> Typographer:
"""Собирает `etpgrf` с едиными настройками для заголовка и текста."""
+ normalized_mode = _normalize_typograph_mode(mode)
+ normalized_hyphenation = _normalize_typograph_hyphenation(hyphenation)
+ normalized_sanitizer = _normalize_typograph_sanitizer(sanitizer)
return Typographer(
langs=_TYPOGRAPHER_LANGS,
+ mode=normalized_mode,
process_html=True,
- hyphenation=Hyphenator(
- langs=_TYPOGRAPHER_LANGS,
- max_unhyphenated_len=_TYPOGRAPHER_MAX_UNHYPHENATED_LEN,
+ hyphenation=(
+ Hyphenator(
+ langs=_TYPOGRAPHER_LANGS,
+ max_unhyphenated_len=_TYPOGRAPHER_MAX_UNHYPHENATED_LEN,
+ ) if normalized_hyphenation else False
),
+ sanitizer=normalized_sanitizer,
hanging_punctuation=hanging_punctuation,
)
@@ -82,7 +136,7 @@ class TbContent(models.Model):
# | szContentTitle -- title для SEO | longtext NOT NULL,
# | szContentKeywords -- keywords для SEO | longtext NOT NULL,
# | szContentDescription -- Description для SEO | longtext NOT NULL,
- # | dtContentCreate -- дата и время создания | datetime(6) NOT NULL,
+ # | dtContentCreate -- дата и время создания | datetime(6) NOT NULL,
# | dtContentTimeStamp -- штамп времени (время последнего обновления в базе) | datetime(6) NOT NULL
# ============================================================
bContentPublish = models.BooleanField(
@@ -145,6 +199,7 @@ class TbContent(models.Model):
verbose_name="◉",
help_text="Число просмотров"
)
+ # Поле для удаления. Все будет делаться с помощью виртуальных полей админки
bTypograf = models.BooleanField(
default=False,
verbose_name="Типограф etpgrf",
@@ -193,6 +248,14 @@ class TbContent(models.Model):
def save(self, *args, **kwargs):
# Переопределяем save(), чтобы автоматически типографировать контент перед сохранением.
+ typograph_mode = getattr(self, '_typograph_mode', _TYPOGRAPHER_DEFAULT_MODE)
+ typograph_hyphenation = getattr(self, '_typograph_hyphenation', _TYPOGRAPHER_DEFAULT_HYPHENATION)
+ typograph_sanitizer = getattr(self, '_typograph_sanitizer', _TYPOGRAPHER_DEFAULT_SANITIZER)
+ typograph_strip_soft_hyphens = getattr(
+ self,
+ '_typograph_strip_soft_hyphens',
+ _TYPOGRAPHER_DEFAULT_STRIP_SOFT_HYPHENS,
+ )
if self.szContentSlug is None or self.szContentSlug == "" or " " in self.szContentSlug:
# print("ку-ку", self.szContentHead)
base_slug = clean_text_to_slug(self.szContentHead)
@@ -205,11 +268,29 @@ class TbContent(models.Model):
if self.bTypograf:
# `etpgrf` уже умеет HTML-режим и висячую пунктуацию, поэтому здесь
# не нужен старый локальный fallback.
+ # Мягкие переносы убираем заранее: `etpgrf` не очищает их сам, а они
+ # потом мешают и типографу, и последующей нормализации текста.
# Для заголовка включаем левую висячую пунктуацию, а для анонса и
# тела текста оставляем обычную обработку без hanging punctuation.
- self.szContentHead = _typograph_text(self.szContentHead, _TYPOGRAPHER_HEAD)
- self.szContentIntro = _typograph_text(self.szContentIntro, _TYPOGRAPHER_TEXT)
- self.szContentBody = _typograph_text(self.szContentBody, _TYPOGRAPHER_TEXT)
+ if typograph_strip_soft_hyphens:
+ self.szContentHead = _strip_soft_hyphens(self.szContentHead)
+ self.szContentIntro = _strip_soft_hyphens(self.szContentIntro)
+ self.szContentBody = _strip_soft_hyphens(self.szContentBody)
+ head_typographer = _build_typographer(
+ mode=typograph_mode,
+ hyphenation=typograph_hyphenation,
+ sanitizer=typograph_sanitizer,
+ hanging_punctuation='left',
+ )
+ text_typographer = _build_typographer(
+ mode=typograph_mode,
+ hyphenation=typograph_hyphenation,
+ sanitizer=typograph_sanitizer,
+ hanging_punctuation=False,
+ )
+ self.szContentHead = _typograph_text(self.szContentHead, head_typographer)
+ self.szContentIntro = _typograph_text(self.szContentIntro, text_typographer)
+ self.szContentBody = _typograph_text(self.szContentBody, text_typographer)
self.bTypograf = False
if self.dtContentCreate is None:
self.dtContentCreate = datetime.datetime.now()
diff --git a/cadpoint/web/tests.py b/cadpoint/web/tests.py
index a44cde5..5824fad 100644
--- a/cadpoint/web/tests.py
+++ b/cadpoint/web/tests.py
@@ -3,8 +3,10 @@ from unittest.mock import patch
from django.contrib.auth import get_user_model
from django.test import SimpleTestCase, TestCase
from django.urls import reverse
+from etpgrf.config import MODE_UNICODE, SANITIZE_ETPGRF
from taggit.models import Tag
+from web.admin import AdminContentForm
from web.add_function import clean_text_to_slug, safe_html_special_symbols
from web.legacy_links import build_canonical_url, replace_legacy_links
from web.models import TbContent
@@ -50,6 +52,22 @@ class SafeHtmlSpecialSymbolsTests(SimpleTestCase):
self.assertEqual(clean_text_to_slug('αβγ ΔΩ'), 'content')
self.assertEqual(clean_text_to_slug('₽ € $ ₴ ₿'), 'content')
+
+class AdminTypographFormTests(SimpleTestCase):
+ def test_admin_form_exposes_virtual_typograph_fields(self):
+ form = AdminContentForm()
+
+ self.assertNotIn('bTypograf', form.fields)
+ self.assertIn('typograph_enabled', form.fields)
+ self.assertIn('typograph_strip_soft_hyphens', form.fields)
+ self.assertIn('typograph_mode', form.fields)
+ self.assertIn('typograph_hyphenation', form.fields)
+ self.assertIn('typograph_sanitizer', form.fields)
+ self.assertEqual(form.fields['typograph_mode'].initial, 'mixed')
+ self.assertTrue(form.fields['typograph_strip_soft_hyphens'].initial)
+ self.assertTrue(form.fields['typograph_hyphenation'].initial)
+ self.assertEqual(form.fields['typograph_sanitizer'].initial, 'None')
+
def test_tbcontent_str_uses_clean_text(self):
item = TbContent(id=7, szContentHead='«Привет мир»')
@@ -129,17 +147,85 @@ class TypographTests(TestCase):
bTypograf=True,
)
- with patch('web.models._TYPOGRAPHER_HEAD.process') as head_process_mock, \
- patch('web.models._TYPOGRAPHER_TEXT.process') as text_process_mock:
- head_process_mock.side_effect = lambda text: f'HEAD[{text}]'
- text_process_mock.side_effect = lambda text: f'TEXT[{text}]'
+ with patch('web.models._build_typographer') as build_mock:
+ build_mock.return_value.process.side_effect = lambda text: f'[{text}]'
item.save()
- self.assertEqual(head_process_mock.call_count, 1)
- self.assertEqual(text_process_mock.call_count, 2)
- self.assertEqual(item.szContentHead, 'HEAD[«Привет»]')
- self.assertEqual(item.szContentIntro, 'TEXT[
Абзац
]') - self.assertEqual(item.szContentBody, 'TEXT[Тело
]') + self.assertEqual(build_mock.call_count, 2) + self.assertEqual(item.szContentHead, '[«Привет»]') + self.assertEqual(item.szContentIntro, '[Абзац
]') + self.assertEqual(item.szContentBody, '[Тело
]') self.assertFalse(item.bTypograf) + def test_save_uses_virtual_typograph_options(self): + item = TbContent( + szContentHead='Привет', + szContentIntro='Текст', + szContentBody='Тело', + bTypograf=True, + ) + item._typograph_mode = MODE_UNICODE + item._typograph_hyphenation = False + item._typograph_sanitizer = SANITIZE_ETPGRF + + with patch('web.models._build_typographer') as build_mock: + fake_typographer = build_mock.return_value + fake_typographer.process.side_effect = lambda text: text + item.save() + + self.assertEqual(build_mock.call_count, 2) + self.assertEqual( + build_mock.call_args_list[0].kwargs, + { + 'mode': MODE_UNICODE, + 'hyphenation': False, + 'sanitizer': SANITIZE_ETPGRF, + 'hanging_punctuation': 'left', + }, + ) + self.assertEqual( + build_mock.call_args_list[1].kwargs, + { + 'mode': MODE_UNICODE, + 'hyphenation': False, + 'sanitizer': SANITIZE_ETPGRF, + 'hanging_punctuation': False, + }, + ) + + def test_save_strips_soft_hyphens_before_typograph(self): + item = TbContent( + szContentHead='Привет\u00ad', + szContentIntro='Анонс', + szContentBody='Тело\u00ad', + bTypograf=True, + ) + + with patch('web.models._build_typographer') as build_mock: + build_mock.return_value.process.side_effect = lambda text: f'[{text}]' + item.save() + + self.assertEqual(build_mock.call_count, 2) + self.assertEqual(item.szContentHead, '[Привет]') + self.assertEqual(item.szContentIntro, '[Анонс]') + self.assertEqual(item.szContentBody, '[Тело]') + self.assertFalse(item.bTypograf) + + def test_show_item_increments_hits_without_touching_timestamp(self): + item = TbContent.objects.create( + szContentHead='Проверка просмотра', + szContentIntro='Короткий анонс', + szContentBody='Полный текст', + szContentSlug='proverka-prosmotra', + bContentPublish=True, + ) + timestamp_before = item.dtContentTimeStamp + + response = self.client.get(f'/item/{item.id}-{item.szContentSlug}') + + self.assertEqual(response.status_code, 200) + item.refresh_from_db() + self.assertEqual(item.iContentHits, 1) + self.assertEqual(item.dtContentTimeStamp, timestamp_before) +