mirror of
https://github.com/BEARlogin/max-telegram-bridge-bot.git
synced 2026-04-26 10:50:57 +00:00
Some checks failed
Build / build (push) Has been cancelled
tgEntitiesToMarkdown used a linear walk that output each entity's text independently — when multiple entities covered the same range (e.g. bold+italic), the text was duplicated for each entity. Rewrote to tag-insertion approach (matching maxMarkupsToHTML): collect open/close markers at UTF-16 positions, sort with proper nesting order, then walk text inserting markers without duplication. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
178 lines
4.7 KiB
Go
178 lines
4.7 KiB
Go
package main
|
||
|
||
import (
|
||
"fmt"
|
||
"html"
|
||
"sort"
|
||
"strings"
|
||
"unicode/utf16"
|
||
|
||
maxschemes "github.com/max-messenger/max-bot-api-client-go/schemes"
|
||
)
|
||
|
||
// --- TG Entities → Markdown (для MAX) ---
|
||
|
||
// tgEntitiesToMarkdown конвертирует TG text + entities в markdown-текст для MAX.
|
||
// Использует tag-insertion подход для корректной обработки вложенных/перекрывающихся entities
|
||
// (например bold+italic на одном тексте).
|
||
func tgEntitiesToMarkdown(text string, entities []Entity) string {
|
||
if len(entities) == 0 {
|
||
return text
|
||
}
|
||
|
||
// Конвертируем в UTF-16 для корректных offsets (TG использует UTF-16)
|
||
runes := []rune(text)
|
||
utf16units := utf16.Encode(runes)
|
||
|
||
type tag struct {
|
||
pos int
|
||
open bool
|
||
idx int // индекс entity — для правильного порядка вложенных тегов
|
||
text string
|
||
}
|
||
|
||
var tags []tag
|
||
for i, e := range entities {
|
||
var open, close string
|
||
switch e.Type {
|
||
case "bold":
|
||
open, close = "**", "**"
|
||
case "italic":
|
||
open, close = "_", "_"
|
||
case "code":
|
||
open, close = "`", "`"
|
||
case "pre":
|
||
open, close = "```\n", "\n```"
|
||
case "strikethrough":
|
||
open, close = "~~", "~~"
|
||
case "underline":
|
||
// MAX markdown не поддерживает underline — пропускаем
|
||
continue
|
||
case "text_link":
|
||
open = "["
|
||
close = fmt.Sprintf("](%s)", e.URL)
|
||
default:
|
||
continue
|
||
}
|
||
end := e.Offset + e.Length
|
||
if end > len(utf16units) {
|
||
end = len(utf16units)
|
||
}
|
||
tags = append(tags, tag{pos: e.Offset, open: true, idx: i, text: open})
|
||
tags = append(tags, tag{pos: end, open: false, idx: i, text: close})
|
||
}
|
||
|
||
if len(tags) == 0 {
|
||
return text
|
||
}
|
||
|
||
sort.Slice(tags, func(i, j int) bool {
|
||
if tags[i].pos != tags[j].pos {
|
||
return tags[i].pos < tags[j].pos
|
||
}
|
||
// На одной позиции: close перед open (для смежных entities)
|
||
if tags[i].open != tags[j].open {
|
||
return !tags[i].open
|
||
}
|
||
// Среди open на одной позиции: по порядку entity
|
||
if tags[i].open {
|
||
return tags[i].idx < tags[j].idx
|
||
}
|
||
// Среди close на одной позиции: в обратном порядке (правильная вложенность)
|
||
return tags[i].idx > tags[j].idx
|
||
})
|
||
|
||
var sb strings.Builder
|
||
tagIdx := 0
|
||
for i := 0; i <= len(utf16units); i++ {
|
||
for tagIdx < len(tags) && tags[tagIdx].pos == i {
|
||
sb.WriteString(tags[tagIdx].text)
|
||
tagIdx++
|
||
}
|
||
if i < len(utf16units) {
|
||
if utf16.IsSurrogate(rune(utf16units[i])) && i+1 < len(utf16units) {
|
||
r := utf16.DecodeRune(rune(utf16units[i]), rune(utf16units[i+1]))
|
||
sb.WriteRune(r)
|
||
i++
|
||
} else {
|
||
sb.WriteRune(rune(utf16units[i]))
|
||
}
|
||
}
|
||
}
|
||
return sb.String()
|
||
}
|
||
|
||
// utf16ToString конвертирует UTF-16 slice обратно в Go string.
|
||
func utf16ToString(units []uint16) string {
|
||
runes := utf16.Decode(units)
|
||
return string(runes)
|
||
}
|
||
|
||
// --- MAX Markups → TG HTML ---
|
||
|
||
// maxMarkupsToHTML конвертирует MAX text + markups в TG-совместимый HTML.
|
||
func maxMarkupsToHTML(text string, markups []maxschemes.MarkUp) string {
|
||
if len(markups) == 0 {
|
||
return html.EscapeString(text)
|
||
}
|
||
|
||
runes := []rune(text)
|
||
utf16units := utf16.Encode(runes)
|
||
|
||
type tag struct {
|
||
pos int
|
||
open bool
|
||
order int
|
||
tag string
|
||
}
|
||
|
||
var tags []tag
|
||
for _, m := range markups {
|
||
var openTag, closeTag string
|
||
switch m.Type {
|
||
case maxschemes.MarkupStrong:
|
||
openTag, closeTag = "<b>", "</b>"
|
||
case maxschemes.MarkupEmphasized:
|
||
openTag, closeTag = "<i>", "</i>"
|
||
case maxschemes.MarkupMonospaced:
|
||
openTag, closeTag = "<code>", "</code>"
|
||
case maxschemes.MarkupStrikethrough:
|
||
openTag, closeTag = "<s>", "</s>"
|
||
case maxschemes.MarkupUnderline:
|
||
openTag, closeTag = "<u>", "</u>"
|
||
case maxschemes.MarkupLink:
|
||
openTag = `<a href="` + html.EscapeString(m.URL) + `">`
|
||
closeTag = "</a>"
|
||
default:
|
||
continue
|
||
}
|
||
tags = append(tags, tag{pos: m.From, open: true, order: 0, tag: openTag})
|
||
tags = append(tags, tag{pos: m.From + m.Length, open: false, order: 1, tag: closeTag})
|
||
}
|
||
|
||
sort.Slice(tags, func(i, j int) bool {
|
||
if tags[i].pos != tags[j].pos {
|
||
return tags[i].pos < tags[j].pos
|
||
}
|
||
return tags[i].order > tags[j].order
|
||
})
|
||
|
||
var sb strings.Builder
|
||
tagIdx := 0
|
||
for i := 0; i <= len(utf16units); i++ {
|
||
for tagIdx < len(tags) && tags[tagIdx].pos == i {
|
||
sb.WriteString(tags[tagIdx].tag)
|
||
tagIdx++
|
||
}
|
||
if i < len(utf16units) {
|
||
if utf16.IsSurrogate(rune(utf16units[i])) && i+1 < len(utf16units) {
|
||
r := utf16.DecodeRune(rune(utf16units[i]), rune(utf16units[i+1]))
|
||
sb.WriteString(html.EscapeString(string(r)))
|
||
i++
|
||
} else {
|
||
sb.WriteString(html.EscapeString(string(rune(utf16units[i]))))
|
||
}
|
||
}
|
||
}
|
||
return sb.String()
|
||
}
|