2020-01-09 21:16:40 +00:00
|
|
|
package formatter
|
|
|
|
|
|
|
|
import (
|
|
|
|
"sort"
|
2022-03-12 01:27:15 +00:00
|
|
|
"unicode"
|
2020-01-09 21:16:40 +00:00
|
|
|
|
|
|
|
log "github.com/sirupsen/logrus"
|
2022-01-17 20:45:40 +00:00
|
|
|
"github.com/zelenin/go-tdlib/client"
|
2020-01-09 21:16:40 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
// Insertion is a piece of text in given position
|
|
|
|
type Insertion struct {
|
|
|
|
Offset int32
|
|
|
|
Runes []rune
|
|
|
|
}
|
|
|
|
|
|
|
|
// InsertionStack contains the sequence of insertions
|
|
|
|
// from the start or from the end
|
|
|
|
type InsertionStack []*Insertion
|
|
|
|
|
2021-12-18 16:04:24 +00:00
|
|
|
var boldRunesMarkdown = []rune("**")
|
|
|
|
var boldRunesXEP0393 = []rune("*")
|
2020-01-09 21:16:40 +00:00
|
|
|
var italicRunes = []rune("_")
|
2022-03-11 17:54:03 +00:00
|
|
|
var strikeRunesMarkdown = []rune("~~")
|
|
|
|
var strikeRunesXEP0393 = []rune("~")
|
2022-03-11 17:01:38 +00:00
|
|
|
var codeRunes = []rune("`")
|
|
|
|
var preRuneStart = []rune("```\n")
|
|
|
|
var preRuneEnd = []rune("\n```")
|
2020-01-09 21:16:40 +00:00
|
|
|
|
2022-03-11 16:12:36 +00:00
|
|
|
// rebalance pumps all the values until the given offset to current stack (growing
|
2020-01-09 21:16:40 +00:00
|
|
|
// from start) from given stack (growing from end); should be called
|
|
|
|
// before any insertions to the current stack at the given offset
|
|
|
|
func (s InsertionStack) rebalance(s2 InsertionStack, offset int32) (InsertionStack, InsertionStack) {
|
|
|
|
for len(s2) > 0 && s2[len(s2)-1].Offset <= offset {
|
|
|
|
s = append(s, s2[len(s2)-1])
|
|
|
|
s2 = s2[:len(s2)-1]
|
|
|
|
}
|
|
|
|
|
|
|
|
return s, s2
|
|
|
|
}
|
|
|
|
|
|
|
|
// NewIterator is a second order function that sequentially scans and returns
|
|
|
|
// stack elements; starts returning nil when elements are ended
|
|
|
|
func (s InsertionStack) NewIterator() func() *Insertion {
|
|
|
|
i := -1
|
|
|
|
|
|
|
|
return func() *Insertion {
|
|
|
|
i++
|
|
|
|
if i < len(s) {
|
|
|
|
return s[i]
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// SortEntities arranges the entities in traversal-ready order
|
|
|
|
func SortEntities(entities []*client.TextEntity) []*client.TextEntity {
|
|
|
|
sortedEntities := make([]*client.TextEntity, len(entities))
|
|
|
|
copy(sortedEntities, entities)
|
|
|
|
|
|
|
|
sort.Slice(sortedEntities, func(i int, j int) bool {
|
2022-03-13 12:55:59 +00:00
|
|
|
entity1 := sortedEntities[i]
|
|
|
|
entity2 := sortedEntities[j]
|
2020-01-09 21:16:40 +00:00
|
|
|
if entity1.Offset < entity2.Offset {
|
|
|
|
return true
|
|
|
|
} else if entity1.Offset == entity2.Offset {
|
|
|
|
return entity1.Length > entity2.Length
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
})
|
|
|
|
return sortedEntities
|
|
|
|
}
|
|
|
|
|
2022-03-11 16:12:36 +00:00
|
|
|
// MergeAdjacentEntities merges entities of a same kind
|
|
|
|
func MergeAdjacentEntities(entities []*client.TextEntity) []*client.TextEntity {
|
|
|
|
mergedEntities := make([]*client.TextEntity, 0, len(entities))
|
|
|
|
excludedIndices := make(map[int]bool)
|
|
|
|
|
|
|
|
for i, entity := range entities {
|
2022-03-14 20:00:00 +00:00
|
|
|
if excludedIndices[i] || entity.Type == nil {
|
2022-03-11 16:12:36 +00:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
typ := entity.Type.TextEntityTypeType()
|
|
|
|
start := entity.Offset
|
|
|
|
end := start + entity.Length
|
|
|
|
ei := make(map[int]bool)
|
|
|
|
|
|
|
|
// collect continuations
|
|
|
|
for j, entity2 := range entities[i+1:] {
|
2022-03-14 20:00:00 +00:00
|
|
|
if entity2.Type != nil && entity2.Type.TextEntityTypeType() == typ && entity2.Offset == end {
|
2022-03-11 16:12:36 +00:00
|
|
|
end += entity2.Length
|
|
|
|
ei[j+i+1] = true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// check for intersections with other entities
|
|
|
|
var isIntersecting bool
|
|
|
|
if len(ei) > 0 {
|
|
|
|
for _, entity2 := range entities {
|
|
|
|
entity2End := entity2.Offset + entity2.Length
|
|
|
|
if (entity2.Offset < start && entity2End > start && entity2End < end) ||
|
|
|
|
(entity2.Offset > start && entity2.Offset < end && entity2End > end) {
|
|
|
|
isIntersecting = true
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if !isIntersecting {
|
|
|
|
entity.Length = end - start
|
|
|
|
for j := range ei {
|
|
|
|
excludedIndices[j] = true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
mergedEntities = append(mergedEntities, entity)
|
|
|
|
}
|
|
|
|
|
|
|
|
return mergedEntities
|
|
|
|
}
|
|
|
|
|
2022-03-12 01:27:15 +00:00
|
|
|
// ClaspDirectives to the following span as required by XEP-0393
|
|
|
|
func ClaspDirectives(text string, entities []*client.TextEntity) []*client.TextEntity {
|
|
|
|
alignedEntities := make([]*client.TextEntity, len(entities))
|
|
|
|
copy(alignedEntities, entities)
|
|
|
|
|
|
|
|
// transform the source text into a form with uniform runes and code points,
|
|
|
|
// by duplicating the Basic Multilingual Plane
|
|
|
|
doubledRunes := make([]rune, 0, len(text)*2)
|
|
|
|
|
|
|
|
for _, cp := range text {
|
|
|
|
if cp > 0x0000ffff {
|
|
|
|
doubledRunes = append(doubledRunes, cp, cp)
|
|
|
|
} else {
|
|
|
|
doubledRunes = append(doubledRunes, cp)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for i, entity := range alignedEntities {
|
|
|
|
var dirty bool
|
|
|
|
endOffset := entity.Offset + entity.Length
|
|
|
|
|
|
|
|
if unicode.IsSpace(doubledRunes[entity.Offset]) {
|
|
|
|
for j, r := range doubledRunes[entity.Offset+1:endOffset] {
|
|
|
|
if !unicode.IsSpace(r) {
|
|
|
|
dirty = true
|
|
|
|
entity.Offset += int32(j+1)
|
|
|
|
entity.Length -= int32(j+1)
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if unicode.IsSpace(doubledRunes[endOffset-1]) {
|
|
|
|
for j := endOffset-2; j >= entity.Offset; j-- {
|
|
|
|
if !unicode.IsSpace(doubledRunes[j]) {
|
|
|
|
dirty = true
|
|
|
|
entity.Length = j+1-entity.Offset
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if dirty {
|
|
|
|
alignedEntities[i] = entity
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return alignedEntities
|
|
|
|
}
|
|
|
|
|
2020-01-09 21:16:40 +00:00
|
|
|
func markupBraces(entity *client.TextEntity, lbrace, rbrace []rune) (*Insertion, *Insertion) {
|
|
|
|
return &Insertion{
|
|
|
|
Offset: entity.Offset,
|
|
|
|
Runes: lbrace,
|
|
|
|
}, &Insertion{
|
|
|
|
Offset: entity.Offset + entity.Length,
|
|
|
|
Runes: rbrace,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// EntityToMarkdown generates the wrapping Markdown tags
|
|
|
|
func EntityToMarkdown(entity *client.TextEntity) (*Insertion, *Insertion) {
|
|
|
|
switch entity.Type.TextEntityTypeType() {
|
|
|
|
case client.TypeTextEntityTypeBold:
|
2021-12-18 16:04:24 +00:00
|
|
|
return markupBraces(entity, boldRunesMarkdown, boldRunesMarkdown)
|
2020-01-09 21:16:40 +00:00
|
|
|
case client.TypeTextEntityTypeItalic:
|
|
|
|
return markupBraces(entity, italicRunes, italicRunes)
|
2022-03-11 17:54:03 +00:00
|
|
|
case client.TypeTextEntityTypeStrikethrough:
|
|
|
|
return markupBraces(entity, strikeRunesMarkdown, strikeRunesMarkdown)
|
2022-03-11 17:01:38 +00:00
|
|
|
case client.TypeTextEntityTypeCode:
|
2020-01-09 21:16:40 +00:00
|
|
|
return markupBraces(entity, codeRunes, codeRunes)
|
2022-03-11 17:01:38 +00:00
|
|
|
case client.TypeTextEntityTypePre:
|
|
|
|
return markupBraces(entity, preRuneStart, preRuneEnd)
|
2020-01-09 21:16:40 +00:00
|
|
|
case client.TypeTextEntityTypePreCode:
|
|
|
|
preCode, _ := entity.Type.(*client.TextEntityTypePreCode)
|
|
|
|
return markupBraces(entity, []rune("\n```"+preCode.Language+"\n"), codeRunes)
|
|
|
|
case client.TypeTextEntityTypeTextUrl:
|
|
|
|
textURL, _ := entity.Type.(*client.TextEntityTypeTextUrl)
|
2021-12-18 16:04:24 +00:00
|
|
|
return markupBraces(entity, []rune("["), []rune("]("+textURL.Url+")"))
|
2020-01-09 21:16:40 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return nil, nil
|
|
|
|
}
|
|
|
|
|
2021-12-18 16:04:24 +00:00
|
|
|
// EntityToXEP0393 generates the wrapping XEP-0393 tags
|
|
|
|
func EntityToXEP0393(entity *client.TextEntity) (*Insertion, *Insertion) {
|
2022-02-18 23:41:08 +00:00
|
|
|
if entity == nil || entity.Type == nil {
|
|
|
|
return nil, nil
|
|
|
|
}
|
|
|
|
|
2021-12-18 16:04:24 +00:00
|
|
|
switch entity.Type.TextEntityTypeType() {
|
|
|
|
case client.TypeTextEntityTypeBold:
|
|
|
|
return markupBraces(entity, boldRunesXEP0393, boldRunesXEP0393)
|
|
|
|
case client.TypeTextEntityTypeItalic:
|
|
|
|
return markupBraces(entity, italicRunes, italicRunes)
|
2022-03-11 17:54:03 +00:00
|
|
|
case client.TypeTextEntityTypeStrikethrough:
|
|
|
|
return markupBraces(entity, strikeRunesXEP0393, strikeRunesXEP0393)
|
2022-03-11 17:01:38 +00:00
|
|
|
case client.TypeTextEntityTypeCode:
|
2021-12-18 16:04:24 +00:00
|
|
|
return markupBraces(entity, codeRunes, codeRunes)
|
2022-03-11 17:01:38 +00:00
|
|
|
case client.TypeTextEntityTypePre:
|
|
|
|
return markupBraces(entity, preRuneStart, preRuneEnd)
|
2021-12-18 16:04:24 +00:00
|
|
|
case client.TypeTextEntityTypePreCode:
|
|
|
|
preCode, _ := entity.Type.(*client.TextEntityTypePreCode)
|
|
|
|
return markupBraces(entity, []rune("\n```"+preCode.Language+"\n"), codeRunes)
|
|
|
|
case client.TypeTextEntityTypeTextUrl:
|
|
|
|
textURL, _ := entity.Type.(*client.TextEntityTypeTextUrl)
|
|
|
|
// non-standard, Pidgin-specific
|
|
|
|
return markupBraces(entity, []rune{}, []rune(" <"+textURL.Url+">"))
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Format traverses an already sorted list of entities and wraps the text in a markup
|
2020-01-09 21:16:40 +00:00
|
|
|
func Format(
|
|
|
|
sourceText string,
|
|
|
|
entities []*client.TextEntity,
|
|
|
|
entityToMarkup func(*client.TextEntity) (*Insertion, *Insertion),
|
|
|
|
) string {
|
|
|
|
if len(entities) == 0 {
|
|
|
|
return sourceText
|
|
|
|
}
|
|
|
|
|
2022-03-12 01:27:15 +00:00
|
|
|
mergedEntities := SortEntities(ClaspDirectives(sourceText, MergeAdjacentEntities(SortEntities(entities))))
|
2022-03-11 16:12:36 +00:00
|
|
|
|
2020-01-09 21:16:40 +00:00
|
|
|
startStack := make(InsertionStack, 0, len(sourceText))
|
|
|
|
endStack := make(InsertionStack, 0, len(sourceText))
|
|
|
|
|
|
|
|
// convert entities to a stack of brackets
|
|
|
|
var maxEndOffset int32
|
2022-03-11 16:12:36 +00:00
|
|
|
for _, entity := range mergedEntities {
|
2020-01-09 21:16:40 +00:00
|
|
|
log.Debugf("%#v", entity)
|
|
|
|
if entity.Length <= 0 {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
endOffset := entity.Offset + entity.Length
|
|
|
|
if endOffset > maxEndOffset {
|
|
|
|
maxEndOffset = endOffset
|
|
|
|
}
|
|
|
|
|
|
|
|
startStack, endStack = startStack.rebalance(endStack, entity.Offset)
|
|
|
|
|
|
|
|
startInsertion, endInsertion := entityToMarkup(entity)
|
|
|
|
if startInsertion != nil {
|
|
|
|
startStack = append(startStack, startInsertion)
|
|
|
|
}
|
|
|
|
if endInsertion != nil {
|
|
|
|
endStack = append(endStack, endInsertion)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// flush the closing brackets that still remain in endStack
|
|
|
|
startStack, endStack = startStack.rebalance(endStack, maxEndOffset)
|
|
|
|
|
|
|
|
// merge brackets into text
|
|
|
|
markupRunes := make([]rune, 0, len(sourceText))
|
|
|
|
|
|
|
|
nextInsertion := startStack.NewIterator()
|
|
|
|
insertion := nextInsertion()
|
|
|
|
var runeI int32
|
|
|
|
|
|
|
|
for _, cp := range sourceText {
|
|
|
|
for insertion != nil && insertion.Offset <= runeI {
|
|
|
|
markupRunes = append(markupRunes, insertion.Runes...)
|
|
|
|
insertion = nextInsertion()
|
|
|
|
}
|
|
|
|
|
|
|
|
markupRunes = append(markupRunes, cp)
|
|
|
|
// skip two UTF-16 code units (not points actually!) if needed
|
|
|
|
if cp > 0x0000ffff {
|
|
|
|
runeI += 2
|
|
|
|
} else {
|
|
|
|
runeI++
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for insertion != nil {
|
|
|
|
markupRunes = append(markupRunes, insertion.Runes...)
|
|
|
|
insertion = nextInsertion()
|
|
|
|
}
|
|
|
|
|
|
|
|
return string(markupRunes)
|
|
|
|
}
|