max-telegram-bridge-bot/markup.go
Andrey Lugovskoy f4baf9b211
Some checks failed
Build / build (push) Has been cancelled
Fix overlapping entities (bold+italic+strike) duplicating text
tgEntitiesToMarkdown used a linear walk that output each entity's text
independently — when multiple entities covered the same range (e.g.
bold+italic), the text was duplicated for each entity.

Rewrote to tag-insertion approach (matching maxMarkupsToHTML): collect
open/close markers at UTF-16 positions, sort with proper nesting order,
then walk text inserting markers without duplication.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-08 22:31:12 +03:00

178 lines
4.7 KiB
Go
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package main
import (
"fmt"
"html"
"sort"
"strings"
"unicode/utf16"
maxschemes "github.com/max-messenger/max-bot-api-client-go/schemes"
)
// --- TG Entities → Markdown (для MAX) ---
// tgEntitiesToMarkdown конвертирует TG text + entities в markdown-текст для MAX.
// Использует tag-insertion подход для корректной обработки вложенных/перекрывающихся entities
// (например bold+italic на одном тексте).
func tgEntitiesToMarkdown(text string, entities []Entity) string {
if len(entities) == 0 {
return text
}
// Конвертируем в UTF-16 для корректных offsets (TG использует UTF-16)
runes := []rune(text)
utf16units := utf16.Encode(runes)
type tag struct {
pos int
open bool
idx int // индекс entity — для правильного порядка вложенных тегов
text string
}
var tags []tag
for i, e := range entities {
var open, close string
switch e.Type {
case "bold":
open, close = "**", "**"
case "italic":
open, close = "_", "_"
case "code":
open, close = "`", "`"
case "pre":
open, close = "```\n", "\n```"
case "strikethrough":
open, close = "~~", "~~"
case "underline":
// MAX markdown не поддерживает underline — пропускаем
continue
case "text_link":
open = "["
close = fmt.Sprintf("](%s)", e.URL)
default:
continue
}
end := e.Offset + e.Length
if end > len(utf16units) {
end = len(utf16units)
}
tags = append(tags, tag{pos: e.Offset, open: true, idx: i, text: open})
tags = append(tags, tag{pos: end, open: false, idx: i, text: close})
}
if len(tags) == 0 {
return text
}
sort.Slice(tags, func(i, j int) bool {
if tags[i].pos != tags[j].pos {
return tags[i].pos < tags[j].pos
}
// На одной позиции: close перед open (для смежных entities)
if tags[i].open != tags[j].open {
return !tags[i].open
}
// Среди open на одной позиции: по порядку entity
if tags[i].open {
return tags[i].idx < tags[j].idx
}
// Среди close на одной позиции: в обратном порядке (правильная вложенность)
return tags[i].idx > tags[j].idx
})
var sb strings.Builder
tagIdx := 0
for i := 0; i <= len(utf16units); i++ {
for tagIdx < len(tags) && tags[tagIdx].pos == i {
sb.WriteString(tags[tagIdx].text)
tagIdx++
}
if i < len(utf16units) {
if utf16.IsSurrogate(rune(utf16units[i])) && i+1 < len(utf16units) {
r := utf16.DecodeRune(rune(utf16units[i]), rune(utf16units[i+1]))
sb.WriteRune(r)
i++
} else {
sb.WriteRune(rune(utf16units[i]))
}
}
}
return sb.String()
}
// utf16ToString конвертирует UTF-16 slice обратно в Go string.
func utf16ToString(units []uint16) string {
runes := utf16.Decode(units)
return string(runes)
}
// --- MAX Markups → TG HTML ---
// maxMarkupsToHTML конвертирует MAX text + markups в TG-совместимый HTML.
func maxMarkupsToHTML(text string, markups []maxschemes.MarkUp) string {
if len(markups) == 0 {
return html.EscapeString(text)
}
runes := []rune(text)
utf16units := utf16.Encode(runes)
type tag struct {
pos int
open bool
order int
tag string
}
var tags []tag
for _, m := range markups {
var openTag, closeTag string
switch m.Type {
case maxschemes.MarkupStrong:
openTag, closeTag = "<b>", "</b>"
case maxschemes.MarkupEmphasized:
openTag, closeTag = "<i>", "</i>"
case maxschemes.MarkupMonospaced:
openTag, closeTag = "<code>", "</code>"
case maxschemes.MarkupStrikethrough:
openTag, closeTag = "<s>", "</s>"
case maxschemes.MarkupUnderline:
openTag, closeTag = "<u>", "</u>"
case maxschemes.MarkupLink:
openTag = `<a href="` + html.EscapeString(m.URL) + `">`
closeTag = "</a>"
default:
continue
}
tags = append(tags, tag{pos: m.From, open: true, order: 0, tag: openTag})
tags = append(tags, tag{pos: m.From + m.Length, open: false, order: 1, tag: closeTag})
}
sort.Slice(tags, func(i, j int) bool {
if tags[i].pos != tags[j].pos {
return tags[i].pos < tags[j].pos
}
return tags[i].order > tags[j].order
})
var sb strings.Builder
tagIdx := 0
for i := 0; i <= len(utf16units); i++ {
for tagIdx < len(tags) && tags[tagIdx].pos == i {
sb.WriteString(tags[tagIdx].tag)
tagIdx++
}
if i < len(utf16units) {
if utf16.IsSurrogate(rune(utf16units[i])) && i+1 < len(utf16units) {
r := utf16.DecodeRune(rune(utf16units[i]), rune(utf16units[i+1]))
sb.WriteString(html.EscapeString(string(r)))
i++
} else {
sb.WriteString(html.EscapeString(string(rune(utf16units[i]))))
}
}
}
return sb.String()
}