diff --git a/cadpoint/web/admin.py b/cadpoint/web/admin.py index 12d026e..5b5fb45 100644 --- a/cadpoint/web/admin.py +++ b/cadpoint/web/admin.py @@ -5,11 +5,25 @@ from django.db import models from django.forms import TextInput, Textarea from django.urls import reverse from django_select2.forms import Select2TagWidget +from etpgrf.config import MODE_MIXED, MODE_MNEMONIC, MODE_UNICODE, SANITIZE_ALL_HTML, SANITIZE_ETPGRF from web.models import TbContent from web.add_function import safe_html_special_symbols from cadpoint import settings +TYPOGRAPH_MODE_CHOICES = [ + (MODE_MIXED, 'Смешанный (Mixed)'), + (MODE_UNICODE, 'Юникод (Unicode)'), + (MODE_MNEMONIC, 'Мнемоники'), +] + +TYPOGRAPH_SANITIZER_CHOICES = [ + (SANITIZE_ALL_HTML, 'Очистка от HTML на входе'), + (SANITIZE_ETPGRF, 'Очистка висячей пунктуации'), + ('None', 'Без очистки'), +] + + class AjaxCommaSeparatedSelect2TagWidget(Select2TagWidget): """ Select2-виджет для `taggit`. @@ -48,9 +62,46 @@ class AjaxCommaSeparatedSelect2TagWidget(Select2TagWidget): class AdminContentForm(forms.ModelForm): + typograph_enabled = forms.BooleanField( + label='Типограф etpgrf вкл.', + required=False, + initial=True, + help_text="Обработать через Типограф ETPRGF
" + "СТАБИЛЬНЫЙ И СОВРЕМЕННЫЙ ТИПОГРАФ, РЕКОМЕНДУЕМ
" + "«приклеивает» союзы и предлоги, поддерживает неразрывные конструкции, " + "замена тире, кавычек и дефисов, расстановка «мягких переносов» " + "в словах длиннее 14 символов, висячая " + "пунктуация (только в заголовках) и т.п.
" + ) + typograph_strip_soft_hyphens = forms.BooleanField( + label='Удалять переносы', + required=False, + initial=False, + help_text='Убирает `­`, `­` и Unicode-символ мягкого переноса
' + 'перед типографом.', + ) + typograph_mode = forms.ChoiceField( + label='Режим вывода', + choices=TYPOGRAPH_MODE_CHOICES, + initial=MODE_MIXED, + ) + typograph_hyphenation = forms.BooleanField( + label='Расстановка переносов', + required=False, + initial=True, + ) + typograph_sanitizer = forms.ChoiceField( + label='Санитайзинг', + choices=TYPOGRAPH_SANITIZER_CHOICES, + initial='None', + ) + class Meta: model = TbContent - fields = '__all__' + exclude = ('bTypograf',) class Media: css = { @@ -59,6 +110,7 @@ class AdminContentForm(forms.ModelForm): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) + self.fields['typograph_enabled'].initial = self.instance.bTypograf # AJAX-виджет подгружает список тегов лениво, а здесь мы оставляем # только уже выбранные значения, чтобы не тащить все теги из базы при # открытии формы и не провоцировать лишние запросы к SQLite. @@ -117,7 +169,11 @@ class AdminContent(admin.ModelAdmin): 'fields': ('tags', 'szContentHead', 'imgContentPreview', 'szContentIntro', 'szContentBody') }), ('Типограф', { - 'fields': ('bTypograf', ), + 'fields': ( + ('typograph_enabled', ), + ('typograph_mode', 'typograph_sanitizer', ), + ('typograph_strip_soft_hyphens', 'typograph_hyphenation', ), + ), 'classes': ('collapse',), }), ('Поля для SEO', { @@ -130,6 +186,14 @@ class AdminContent(admin.ModelAdmin): actions_on_top = False actions_on_bottom = False + def save_model(self, request, obj, form, change): + obj.bTypograf = form.cleaned_data.get('typograph_enabled', False) + obj._typograph_strip_soft_hyphens = form.cleaned_data.get('typograph_strip_soft_hyphens', True) + obj._typograph_mode = form.cleaned_data.get('typograph_mode', MODE_MIXED) + obj._typograph_hyphenation = form.cleaned_data.get('typograph_hyphenation', True) + obj._typograph_sanitizer = form.cleaned_data.get('typograph_sanitizer', 'None') + super().save_model(request, obj, form, change) + def ContentHeadSafe(self, obj) -> str: return safe_html_special_symbols(obj.szContentHead) diff --git a/cadpoint/web/models.py b/cadpoint/web/models.py index afa9e03..d55bfaf 100644 --- a/cadpoint/web/models.py +++ b/cadpoint/web/models.py @@ -6,6 +6,14 @@ import logging from django.db import models from django.utils.timezone import now from etpgrf import Hyphenator, Typographer +from etpgrf.config import ( + MODE_MIXED, + MODE_MNEMONIC, + MODE_UNICODE, + SANITIZE_ALL_HTML, + SANITIZE_ETPGRF, + SANITIZE_NONE, +) from filer.fields.image import FilerFileField from taggit.managers import TaggableManager from taggit.models import Tag, TaggedItem @@ -20,15 +28,61 @@ logger = logging.getLogger(__name__) _TYPOGRAPHER_LANGS = 'ru+en' _TYPOGRAPHER_MAX_UNHYPHENATED_LEN = 14 -def _build_typographer(hanging_punctuation=None) -> Typographer: +_TYPOGRAPHER_DEFAULT_MODE = MODE_MIXED +_TYPOGRAPHER_DEFAULT_HYPHENATION = True +_TYPOGRAPHER_DEFAULT_SANITIZER = SANITIZE_NONE +_TYPOGRAPHER_DEFAULT_STRIP_SOFT_HYPHENS = True + + +def _normalize_typograph_mode(value: str | None) -> str: + if value in {MODE_MIXED, MODE_UNICODE, MODE_MNEMONIC}: + return str(value) + return _TYPOGRAPHER_DEFAULT_MODE + + +def _normalize_typograph_hyphenation(value) -> bool: + return bool(_TYPOGRAPHER_DEFAULT_HYPHENATION if value is None else value) + + +def _normalize_typograph_sanitizer(value): + if value in (None, '', 'None', SANITIZE_NONE): + return SANITIZE_NONE + if value == SANITIZE_ALL_HTML: + return SANITIZE_ALL_HTML + if value == SANITIZE_ETPGRF: + return SANITIZE_ETPGRF + return SANITIZE_NONE + + +def _strip_soft_hyphens(text: str) -> str: + """Удаляет мягкие переносы в любом виде перед передачей текста в etpgrf.""" + if not text: + return text + return ( + text + .replace("­", "") + .replace("­", "") + .replace("­", "") + .replace("\u00ad", "") + ) + + +def _build_typographer(mode=None, hyphenation=True, sanitizer=None, hanging_punctuation=None) -> Typographer: """Собирает `etpgrf` с едиными настройками для заголовка и текста.""" + normalized_mode = _normalize_typograph_mode(mode) + normalized_hyphenation = _normalize_typograph_hyphenation(hyphenation) + normalized_sanitizer = _normalize_typograph_sanitizer(sanitizer) return Typographer( langs=_TYPOGRAPHER_LANGS, + mode=normalized_mode, process_html=True, - hyphenation=Hyphenator( - langs=_TYPOGRAPHER_LANGS, - max_unhyphenated_len=_TYPOGRAPHER_MAX_UNHYPHENATED_LEN, + hyphenation=( + Hyphenator( + langs=_TYPOGRAPHER_LANGS, + max_unhyphenated_len=_TYPOGRAPHER_MAX_UNHYPHENATED_LEN, + ) if normalized_hyphenation else False ), + sanitizer=normalized_sanitizer, hanging_punctuation=hanging_punctuation, ) @@ -82,7 +136,7 @@ class TbContent(models.Model): # | szContentTitle -- title для SEO | longtext NOT NULL, # | szContentKeywords -- keywords для SEO | longtext NOT NULL, # | szContentDescription -- Description для SEO | longtext NOT NULL, - # | dtContentCreate -- дата и время создания | datetime(6) NOT NULL, + # | dtContentCreate -- дата и время создания | datetime(6) NOT NULL, # | dtContentTimeStamp -- штамп времени (время последнего обновления в базе) | datetime(6) NOT NULL # ============================================================ bContentPublish = models.BooleanField( @@ -145,6 +199,7 @@ class TbContent(models.Model): verbose_name="◉", help_text="Число просмотров" ) + # Поле для удаления. Все будет делаться с помощью виртуальных полей админки bTypograf = models.BooleanField( default=False, verbose_name="Типограф etpgrf", @@ -193,6 +248,14 @@ class TbContent(models.Model): def save(self, *args, **kwargs): # Переопределяем save(), чтобы автоматически типографировать контент перед сохранением. + typograph_mode = getattr(self, '_typograph_mode', _TYPOGRAPHER_DEFAULT_MODE) + typograph_hyphenation = getattr(self, '_typograph_hyphenation', _TYPOGRAPHER_DEFAULT_HYPHENATION) + typograph_sanitizer = getattr(self, '_typograph_sanitizer', _TYPOGRAPHER_DEFAULT_SANITIZER) + typograph_strip_soft_hyphens = getattr( + self, + '_typograph_strip_soft_hyphens', + _TYPOGRAPHER_DEFAULT_STRIP_SOFT_HYPHENS, + ) if self.szContentSlug is None or self.szContentSlug == "" or " " in self.szContentSlug: # print("ку-ку", self.szContentHead) base_slug = clean_text_to_slug(self.szContentHead) @@ -205,11 +268,29 @@ class TbContent(models.Model): if self.bTypograf: # `etpgrf` уже умеет HTML-режим и висячую пунктуацию, поэтому здесь # не нужен старый локальный fallback. + # Мягкие переносы убираем заранее: `etpgrf` не очищает их сам, а они + # потом мешают и типографу, и последующей нормализации текста. # Для заголовка включаем левую висячую пунктуацию, а для анонса и # тела текста оставляем обычную обработку без hanging punctuation. - self.szContentHead = _typograph_text(self.szContentHead, _TYPOGRAPHER_HEAD) - self.szContentIntro = _typograph_text(self.szContentIntro, _TYPOGRAPHER_TEXT) - self.szContentBody = _typograph_text(self.szContentBody, _TYPOGRAPHER_TEXT) + if typograph_strip_soft_hyphens: + self.szContentHead = _strip_soft_hyphens(self.szContentHead) + self.szContentIntro = _strip_soft_hyphens(self.szContentIntro) + self.szContentBody = _strip_soft_hyphens(self.szContentBody) + head_typographer = _build_typographer( + mode=typograph_mode, + hyphenation=typograph_hyphenation, + sanitizer=typograph_sanitizer, + hanging_punctuation='left', + ) + text_typographer = _build_typographer( + mode=typograph_mode, + hyphenation=typograph_hyphenation, + sanitizer=typograph_sanitizer, + hanging_punctuation=False, + ) + self.szContentHead = _typograph_text(self.szContentHead, head_typographer) + self.szContentIntro = _typograph_text(self.szContentIntro, text_typographer) + self.szContentBody = _typograph_text(self.szContentBody, text_typographer) self.bTypograf = False if self.dtContentCreate is None: self.dtContentCreate = datetime.datetime.now() diff --git a/cadpoint/web/tests.py b/cadpoint/web/tests.py index a44cde5..5824fad 100644 --- a/cadpoint/web/tests.py +++ b/cadpoint/web/tests.py @@ -3,8 +3,10 @@ from unittest.mock import patch from django.contrib.auth import get_user_model from django.test import SimpleTestCase, TestCase from django.urls import reverse +from etpgrf.config import MODE_UNICODE, SANITIZE_ETPGRF from taggit.models import Tag +from web.admin import AdminContentForm from web.add_function import clean_text_to_slug, safe_html_special_symbols from web.legacy_links import build_canonical_url, replace_legacy_links from web.models import TbContent @@ -50,6 +52,22 @@ class SafeHtmlSpecialSymbolsTests(SimpleTestCase): self.assertEqual(clean_text_to_slug('αβγ ΔΩ'), 'content') self.assertEqual(clean_text_to_slug('₽ € $ ₴ ₿'), 'content') + +class AdminTypographFormTests(SimpleTestCase): + def test_admin_form_exposes_virtual_typograph_fields(self): + form = AdminContentForm() + + self.assertNotIn('bTypograf', form.fields) + self.assertIn('typograph_enabled', form.fields) + self.assertIn('typograph_strip_soft_hyphens', form.fields) + self.assertIn('typograph_mode', form.fields) + self.assertIn('typograph_hyphenation', form.fields) + self.assertIn('typograph_sanitizer', form.fields) + self.assertEqual(form.fields['typograph_mode'].initial, 'mixed') + self.assertTrue(form.fields['typograph_strip_soft_hyphens'].initial) + self.assertTrue(form.fields['typograph_hyphenation'].initial) + self.assertEqual(form.fields['typograph_sanitizer'].initial, 'None') + def test_tbcontent_str_uses_clean_text(self): item = TbContent(id=7, szContentHead='«Привет мир»') @@ -129,17 +147,85 @@ class TypographTests(TestCase): bTypograf=True, ) - with patch('web.models._TYPOGRAPHER_HEAD.process') as head_process_mock, \ - patch('web.models._TYPOGRAPHER_TEXT.process') as text_process_mock: - head_process_mock.side_effect = lambda text: f'HEAD[{text}]' - text_process_mock.side_effect = lambda text: f'TEXT[{text}]' + with patch('web.models._build_typographer') as build_mock: + build_mock.return_value.process.side_effect = lambda text: f'[{text}]' item.save() - self.assertEqual(head_process_mock.call_count, 1) - self.assertEqual(text_process_mock.call_count, 2) - self.assertEqual(item.szContentHead, 'HEAD[«Привет»]') - self.assertEqual(item.szContentIntro, 'TEXT[

Абзац

]') - self.assertEqual(item.szContentBody, 'TEXT[

Тело

]') + self.assertEqual(build_mock.call_count, 2) + self.assertEqual(item.szContentHead, '[«Привет»]') + self.assertEqual(item.szContentIntro, '[

Абзац

]') + self.assertEqual(item.szContentBody, '[

Тело

]') self.assertFalse(item.bTypograf) + def test_save_uses_virtual_typograph_options(self): + item = TbContent( + szContentHead='Привет', + szContentIntro='Текст', + szContentBody='Тело', + bTypograf=True, + ) + item._typograph_mode = MODE_UNICODE + item._typograph_hyphenation = False + item._typograph_sanitizer = SANITIZE_ETPGRF + + with patch('web.models._build_typographer') as build_mock: + fake_typographer = build_mock.return_value + fake_typographer.process.side_effect = lambda text: text + item.save() + + self.assertEqual(build_mock.call_count, 2) + self.assertEqual( + build_mock.call_args_list[0].kwargs, + { + 'mode': MODE_UNICODE, + 'hyphenation': False, + 'sanitizer': SANITIZE_ETPGRF, + 'hanging_punctuation': 'left', + }, + ) + self.assertEqual( + build_mock.call_args_list[1].kwargs, + { + 'mode': MODE_UNICODE, + 'hyphenation': False, + 'sanitizer': SANITIZE_ETPGRF, + 'hanging_punctuation': False, + }, + ) + + def test_save_strips_soft_hyphens_before_typograph(self): + item = TbContent( + szContentHead='При­вет\u00ad', + szContentIntro='А­нонс', + szContentBody='Те­ло\u00ad', + bTypograf=True, + ) + + with patch('web.models._build_typographer') as build_mock: + build_mock.return_value.process.side_effect = lambda text: f'[{text}]' + item.save() + + self.assertEqual(build_mock.call_count, 2) + self.assertEqual(item.szContentHead, '[Привет]') + self.assertEqual(item.szContentIntro, '[Анонс]') + self.assertEqual(item.szContentBody, '[Тело]') + self.assertFalse(item.bTypograf) + + def test_show_item_increments_hits_without_touching_timestamp(self): + item = TbContent.objects.create( + szContentHead='Проверка просмотра', + szContentIntro='Короткий анонс', + szContentBody='Полный текст', + szContentSlug='proverka-prosmotra', + bContentPublish=True, + ) + timestamp_before = item.dtContentTimeStamp + + response = self.client.get(f'/item/{item.id}-{item.szContentSlug}') + + self.assertEqual(response.status_code, 200) + item.refresh_from_db() + self.assertEqual(item.iContentHits, 1) + self.assertEqual(item.dtContentTimeStamp, timestamp_before) +