mod: виртуальные поля типографа

This commit is contained in:
2026-04-11 16:09:38 +03:00
parent 50067b9bd2
commit 117af4b125
3 changed files with 250 additions and 19 deletions

View File

@@ -5,11 +5,25 @@ from django.db import models
from django.forms import TextInput, Textarea
from django.urls import reverse
from django_select2.forms import Select2TagWidget
from etpgrf.config import MODE_MIXED, MODE_MNEMONIC, MODE_UNICODE, SANITIZE_ALL_HTML, SANITIZE_ETPGRF
from web.models import TbContent
from web.add_function import safe_html_special_symbols
from cadpoint import settings
TYPOGRAPH_MODE_CHOICES = [
(MODE_MIXED, 'Смешанный (Mixed)'),
(MODE_UNICODE, 'Юникод (Unicode)'),
(MODE_MNEMONIC, 'Мнемоники'),
]
TYPOGRAPH_SANITIZER_CHOICES = [
(SANITIZE_ALL_HTML, 'Очистка от HTML на входе'),
(SANITIZE_ETPGRF, 'Очистка висячей пунктуации'),
('None', 'Без очистки'),
]
class AjaxCommaSeparatedSelect2TagWidget(Select2TagWidget):
"""
Select2-виджет для `taggit`.
@@ -48,9 +62,46 @@ class AjaxCommaSeparatedSelect2TagWidget(Select2TagWidget):
class AdminContentForm(forms.ModelForm):
typograph_enabled = forms.BooleanField(
label='Типограф etpgrf вкл.',
required=False,
initial=True,
help_text="Обработать через <a href=\"https://typograph.cube2.ru/\""
" target=\"_blank\">Типограф ETPRGF</a><br />"
"<small><u>СТАБИЛЬНЫЙ И СОВРЕМЕННЫЙ ТИПОГРАФ, РЕКОМЕНДУЕМ</u><br />"
"&laquo;приклеивает&raquo; союзы и предлоги, поддерживает неразрывные конструкции, "
"замена тире, кавычек и дефисов, расстановка &laquo;мягких переносов&raquo; "
"в словах длиннее 14 символов, <!-- убирает &laquo;вдовы&raquo; &laquo;сироты&raquo; (кроме "
"заголовков), расставляет абзацы (кроме заголовков), расшифровывает "
"аббревиатуры (те, что знает и кроме заголовков), --> висячая "
"пунктуация (только в заголовках) и т.п.</small>"
)
typograph_strip_soft_hyphens = forms.BooleanField(
label='Удалять переносы',
required=False,
initial=False,
help_text='Убирает `&amp;shy;`, `&amp;#173;` и Unicode-символ мягкого переноса<br />'
'перед типографом.',
)
typograph_mode = forms.ChoiceField(
label='Режим вывода',
choices=TYPOGRAPH_MODE_CHOICES,
initial=MODE_MIXED,
)
typograph_hyphenation = forms.BooleanField(
label='Расстановка переносов',
required=False,
initial=True,
)
typograph_sanitizer = forms.ChoiceField(
label='Санитайзинг',
choices=TYPOGRAPH_SANITIZER_CHOICES,
initial='None',
)
class Meta:
model = TbContent
fields = '__all__'
exclude = ('bTypograf',)
class Media:
css = {
@@ -59,6 +110,7 @@ class AdminContentForm(forms.ModelForm):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.fields['typograph_enabled'].initial = self.instance.bTypograf
# AJAX-виджет подгружает список тегов лениво, а здесь мы оставляем
# только уже выбранные значения, чтобы не тащить все теги из базы при
# открытии формы и не провоцировать лишние запросы к SQLite.
@@ -117,7 +169,11 @@ class AdminContent(admin.ModelAdmin):
'fields': ('tags', 'szContentHead', 'imgContentPreview', 'szContentIntro', 'szContentBody')
}),
('Типограф', {
'fields': ('bTypograf', ),
'fields': (
('typograph_enabled', ),
('typograph_mode', 'typograph_sanitizer', ),
('typograph_strip_soft_hyphens', 'typograph_hyphenation', ),
),
'classes': ('collapse',),
}),
('Поля для SEO', {
@@ -130,6 +186,14 @@ class AdminContent(admin.ModelAdmin):
actions_on_top = False
actions_on_bottom = False
def save_model(self, request, obj, form, change):
obj.bTypograf = form.cleaned_data.get('typograph_enabled', False)
obj._typograph_strip_soft_hyphens = form.cleaned_data.get('typograph_strip_soft_hyphens', True)
obj._typograph_mode = form.cleaned_data.get('typograph_mode', MODE_MIXED)
obj._typograph_hyphenation = form.cleaned_data.get('typograph_hyphenation', True)
obj._typograph_sanitizer = form.cleaned_data.get('typograph_sanitizer', 'None')
super().save_model(request, obj, form, change)
def ContentHeadSafe(self, obj) -> str:
return safe_html_special_symbols(obj.szContentHead)

View File

@@ -6,6 +6,14 @@ import logging
from django.db import models
from django.utils.timezone import now
from etpgrf import Hyphenator, Typographer
from etpgrf.config import (
MODE_MIXED,
MODE_MNEMONIC,
MODE_UNICODE,
SANITIZE_ALL_HTML,
SANITIZE_ETPGRF,
SANITIZE_NONE,
)
from filer.fields.image import FilerFileField
from taggit.managers import TaggableManager
from taggit.models import Tag, TaggedItem
@@ -20,15 +28,61 @@ logger = logging.getLogger(__name__)
_TYPOGRAPHER_LANGS = 'ru+en'
_TYPOGRAPHER_MAX_UNHYPHENATED_LEN = 14
def _build_typographer(hanging_punctuation=None) -> Typographer:
_TYPOGRAPHER_DEFAULT_MODE = MODE_MIXED
_TYPOGRAPHER_DEFAULT_HYPHENATION = True
_TYPOGRAPHER_DEFAULT_SANITIZER = SANITIZE_NONE
_TYPOGRAPHER_DEFAULT_STRIP_SOFT_HYPHENS = True
def _normalize_typograph_mode(value: str | None) -> str:
if value in {MODE_MIXED, MODE_UNICODE, MODE_MNEMONIC}:
return str(value)
return _TYPOGRAPHER_DEFAULT_MODE
def _normalize_typograph_hyphenation(value) -> bool:
return bool(_TYPOGRAPHER_DEFAULT_HYPHENATION if value is None else value)
def _normalize_typograph_sanitizer(value):
if value in (None, '', 'None', SANITIZE_NONE):
return SANITIZE_NONE
if value == SANITIZE_ALL_HTML:
return SANITIZE_ALL_HTML
if value == SANITIZE_ETPGRF:
return SANITIZE_ETPGRF
return SANITIZE_NONE
def _strip_soft_hyphens(text: str) -> str:
"""Удаляет мягкие переносы в любом виде перед передачей текста в etpgrf."""
if not text:
return text
return (
text
.replace("&shy;", "")
.replace("&#173;", "")
.replace("&#xad;", "")
.replace("\u00ad", "")
)
def _build_typographer(mode=None, hyphenation=True, sanitizer=None, hanging_punctuation=None) -> Typographer:
"""Собирает `etpgrf` с едиными настройками для заголовка и текста."""
normalized_mode = _normalize_typograph_mode(mode)
normalized_hyphenation = _normalize_typograph_hyphenation(hyphenation)
normalized_sanitizer = _normalize_typograph_sanitizer(sanitizer)
return Typographer(
langs=_TYPOGRAPHER_LANGS,
mode=normalized_mode,
process_html=True,
hyphenation=Hyphenator(
langs=_TYPOGRAPHER_LANGS,
max_unhyphenated_len=_TYPOGRAPHER_MAX_UNHYPHENATED_LEN,
hyphenation=(
Hyphenator(
langs=_TYPOGRAPHER_LANGS,
max_unhyphenated_len=_TYPOGRAPHER_MAX_UNHYPHENATED_LEN,
) if normalized_hyphenation else False
),
sanitizer=normalized_sanitizer,
hanging_punctuation=hanging_punctuation,
)
@@ -82,7 +136,7 @@ class TbContent(models.Model):
# | szContentTitle -- title для SEO | longtext NOT NULL,
# | szContentKeywords -- keywords для SEO | longtext NOT NULL,
# | szContentDescription -- Description для SEO | longtext NOT NULL,
# | dtContentCreate -- дата и время создания | datetime(6) NOT NULL,
# | dtContentCreate -- дата и время создания | datetime(6) NOT NULL,
# | dtContentTimeStamp -- штамп времени (время последнего обновления в базе) | datetime(6) NOT NULL
# ============================================================
bContentPublish = models.BooleanField(
@@ -145,6 +199,7 @@ class TbContent(models.Model):
verbose_name="",
help_text="Число просмотров"
)
# Поле для удаления. Все будет делаться с помощью виртуальных полей админки
bTypograf = models.BooleanField(
default=False,
verbose_name="Типограф etpgrf",
@@ -193,6 +248,14 @@ class TbContent(models.Model):
def save(self, *args, **kwargs):
# Переопределяем save(), чтобы автоматически типографировать контент перед сохранением.
typograph_mode = getattr(self, '_typograph_mode', _TYPOGRAPHER_DEFAULT_MODE)
typograph_hyphenation = getattr(self, '_typograph_hyphenation', _TYPOGRAPHER_DEFAULT_HYPHENATION)
typograph_sanitizer = getattr(self, '_typograph_sanitizer', _TYPOGRAPHER_DEFAULT_SANITIZER)
typograph_strip_soft_hyphens = getattr(
self,
'_typograph_strip_soft_hyphens',
_TYPOGRAPHER_DEFAULT_STRIP_SOFT_HYPHENS,
)
if self.szContentSlug is None or self.szContentSlug == "" or " " in self.szContentSlug:
# print("ку-ку", self.szContentHead)
base_slug = clean_text_to_slug(self.szContentHead)
@@ -205,11 +268,29 @@ class TbContent(models.Model):
if self.bTypograf:
# `etpgrf` уже умеет HTML-режим и висячую пунктуацию, поэтому здесь
# не нужен старый локальный fallback.
# Мягкие переносы убираем заранее: `etpgrf` не очищает их сам, а они
# потом мешают и типографу, и последующей нормализации текста.
# Для заголовка включаем левую висячую пунктуацию, а для анонса и
# тела текста оставляем обычную обработку без hanging punctuation.
self.szContentHead = _typograph_text(self.szContentHead, _TYPOGRAPHER_HEAD)
self.szContentIntro = _typograph_text(self.szContentIntro, _TYPOGRAPHER_TEXT)
self.szContentBody = _typograph_text(self.szContentBody, _TYPOGRAPHER_TEXT)
if typograph_strip_soft_hyphens:
self.szContentHead = _strip_soft_hyphens(self.szContentHead)
self.szContentIntro = _strip_soft_hyphens(self.szContentIntro)
self.szContentBody = _strip_soft_hyphens(self.szContentBody)
head_typographer = _build_typographer(
mode=typograph_mode,
hyphenation=typograph_hyphenation,
sanitizer=typograph_sanitizer,
hanging_punctuation='left',
)
text_typographer = _build_typographer(
mode=typograph_mode,
hyphenation=typograph_hyphenation,
sanitizer=typograph_sanitizer,
hanging_punctuation=False,
)
self.szContentHead = _typograph_text(self.szContentHead, head_typographer)
self.szContentIntro = _typograph_text(self.szContentIntro, text_typographer)
self.szContentBody = _typograph_text(self.szContentBody, text_typographer)
self.bTypograf = False
if self.dtContentCreate is None:
self.dtContentCreate = datetime.datetime.now()

View File

@@ -3,8 +3,10 @@ from unittest.mock import patch
from django.contrib.auth import get_user_model
from django.test import SimpleTestCase, TestCase
from django.urls import reverse
from etpgrf.config import MODE_UNICODE, SANITIZE_ETPGRF
from taggit.models import Tag
from web.admin import AdminContentForm
from web.add_function import clean_text_to_slug, safe_html_special_symbols
from web.legacy_links import build_canonical_url, replace_legacy_links
from web.models import TbContent
@@ -50,6 +52,22 @@ class SafeHtmlSpecialSymbolsTests(SimpleTestCase):
self.assertEqual(clean_text_to_slug('αβγ ΔΩ'), 'content')
self.assertEqual(clean_text_to_slug('₽ € $ ₴ ₿'), 'content')
class AdminTypographFormTests(SimpleTestCase):
def test_admin_form_exposes_virtual_typograph_fields(self):
form = AdminContentForm()
self.assertNotIn('bTypograf', form.fields)
self.assertIn('typograph_enabled', form.fields)
self.assertIn('typograph_strip_soft_hyphens', form.fields)
self.assertIn('typograph_mode', form.fields)
self.assertIn('typograph_hyphenation', form.fields)
self.assertIn('typograph_sanitizer', form.fields)
self.assertEqual(form.fields['typograph_mode'].initial, 'mixed')
self.assertTrue(form.fields['typograph_strip_soft_hyphens'].initial)
self.assertTrue(form.fields['typograph_hyphenation'].initial)
self.assertEqual(form.fields['typograph_sanitizer'].initial, 'None')
def test_tbcontent_str_uses_clean_text(self):
item = TbContent(id=7, szContentHead='<b>&laquo;Привет&nbsp;мир&raquo;</b>')
@@ -129,17 +147,85 @@ class TypographTests(TestCase):
bTypograf=True,
)
with patch('web.models._TYPOGRAPHER_HEAD.process') as head_process_mock, \
patch('web.models._TYPOGRAPHER_TEXT.process') as text_process_mock:
head_process_mock.side_effect = lambda text: f'HEAD[{text}]'
text_process_mock.side_effect = lambda text: f'TEXT[{text}]'
with patch('web.models._build_typographer') as build_mock:
build_mock.return_value.process.side_effect = lambda text: f'[{text}]'
item.save()
self.assertEqual(head_process_mock.call_count, 1)
self.assertEqual(text_process_mock.call_count, 2)
self.assertEqual(item.szContentHead, 'HEAD[«Привет»]')
self.assertEqual(item.szContentIntro, 'TEXT[<p>Абзац</p>]')
self.assertEqual(item.szContentBody, 'TEXT[<p>Тело</p>]')
self.assertEqual(build_mock.call_count, 2)
self.assertEqual(item.szContentHead, '[«Привет»]')
self.assertEqual(item.szContentIntro, '[<p>Абзац</p>]')
self.assertEqual(item.szContentBody, '[<p>Тело</p>]')
self.assertFalse(item.bTypograf)
def test_save_uses_virtual_typograph_options(self):
item = TbContent(
szContentHead='Привет',
szContentIntro='Текст',
szContentBody='Тело',
bTypograf=True,
)
item._typograph_mode = MODE_UNICODE
item._typograph_hyphenation = False
item._typograph_sanitizer = SANITIZE_ETPGRF
with patch('web.models._build_typographer') as build_mock:
fake_typographer = build_mock.return_value
fake_typographer.process.side_effect = lambda text: text
item.save()
self.assertEqual(build_mock.call_count, 2)
self.assertEqual(
build_mock.call_args_list[0].kwargs,
{
'mode': MODE_UNICODE,
'hyphenation': False,
'sanitizer': SANITIZE_ETPGRF,
'hanging_punctuation': 'left',
},
)
self.assertEqual(
build_mock.call_args_list[1].kwargs,
{
'mode': MODE_UNICODE,
'hyphenation': False,
'sanitizer': SANITIZE_ETPGRF,
'hanging_punctuation': False,
},
)
def test_save_strips_soft_hyphens_before_typograph(self):
item = TbContent(
szContentHead='При&shy;вет\u00ad',
szContentIntro='А&#173;нонс',
szContentBody='Те&shy;ло\u00ad',
bTypograf=True,
)
with patch('web.models._build_typographer') as build_mock:
build_mock.return_value.process.side_effect = lambda text: f'[{text}]'
item.save()
self.assertEqual(build_mock.call_count, 2)
self.assertEqual(item.szContentHead, '[Привет]')
self.assertEqual(item.szContentIntro, '[Анонс]')
self.assertEqual(item.szContentBody, '[Тело]')
self.assertFalse(item.bTypograf)
def test_show_item_increments_hits_without_touching_timestamp(self):
item = TbContent.objects.create(
szContentHead='Проверка просмотра',
szContentIntro='Короткий анонс',
szContentBody='Полный текст',
szContentSlug='proverka-prosmotra',
bContentPublish=True,
)
timestamp_before = item.dtContentTimeStamp
response = self.client.get(f'/item/{item.id}-{item.szContentSlug}')
self.assertEqual(response.status_code, 200)
item.refresh_from_db()
self.assertEqual(item.iContentHits, 1)
self.assertEqual(item.dtContentTimeStamp, timestamp_before)