Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Markdown Processor #75

Merged
merged 3 commits into from
Mar 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 17 additions & 15 deletions src/main/kotlin/com/jaoafa/vcspeaker/tts/TextProcessor.kt
Original file line number Diff line number Diff line change
Expand Up @@ -6,24 +6,34 @@ import com.jaoafa.vcspeaker.tools.Emoji.replaceEmojiToName
import com.jaoafa.vcspeaker.tools.getObjectsIn
import com.jaoafa.vcspeaker.tts.api.Emotion
import com.jaoafa.vcspeaker.tts.api.Speaker
import com.jaoafa.vcspeaker.tts.markdown.toMarkdown
import com.jaoafa.vcspeaker.tts.replacers.BaseReplacer
import com.kotlindiscord.kord.extensions.utils.capitalizeWords
import dev.kord.common.entity.Snowflake

object TextProcessor {
suspend fun processText(guildId: Snowflake, text: String): String? {
if (shouldIgnore(text, guildId)) return null
val replacers = getObjectsIn<BaseReplacer>("com.jaoafa.vcspeaker.tts.replacers")
.filterNotNull()
.sortedByDescending { it.priority.level }

val replacers =
getObjectsIn<BaseReplacer>("com.jaoafa.vcspeaker.tts.replacers")
.filterNotNull()
.sortedByDescending { it.priority.level }
private fun String.shouldIgnoreOn(guildId: Snowflake) =
IgnoreStore.filter(guildId).any {
when (it.type) {
IgnoreType.Exact -> this == it.text
IgnoreType.Contains -> contains(it.text)
}
}

suspend fun processText(guildId: Snowflake, text: String): String? {
if (text.shouldIgnoreOn(guildId)) return null

val replacedText = replacers.fold(text) { replacedText, replacer ->
replacer.replace(replacedText, guildId)
}.replaceEmojiToName()

return replacedText.let { if (it.length > 180) it.substring(0, 180) else it }
val markdown = replacedText.toMarkdown().joinToString("") { it.toReadable() }

return markdown.let { if (it.length > 180) it.substring(0, 180) else it }
}

fun extractInlineVoice(text: String, voice: Voice): Pair<String, Voice> {
Expand All @@ -50,12 +60,4 @@ object TextProcessor {

return newText to newVoice
}

private fun shouldIgnore(text: String, guildId: Snowflake) =
IgnoreStore.filter(guildId).any {
when (it.type) {
IgnoreType.Exact -> text == it.text
IgnoreType.Contains -> text.contains(it.text)
}
}
}
76 changes: 76 additions & 0 deletions src/main/kotlin/com/jaoafa/vcspeaker/tts/markdown/Inline.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
package com.jaoafa.vcspeaker.tts.markdown

data class InlineMatch(val match: String, val text: String, val range: IntRange, val effect: InlineEffect)

data class Inline(val text: String, val effects: Set<InlineEffect>) {
companion object {
fun from(paragraph: String): List<Inline> {
val allInlines = InlineEffect.entries.flatMap { effect ->
effect.regex.findAll(paragraph).map { it to effect }
}.map { (it, effect) ->
val match = try {
it.groups["all"]!!.value
} catch (e: Exception) {
it.value
}

val text = it.groups["text"]?.value ?: ""

val range = try {
it.groups["all"]!!.range
} catch (e: Exception) {
it.range
}

InlineMatch(match, text, range, effect)
}.sortedBy { it.range.first }.toMutableList()

val removedInlines = mutableListOf<InlineMatch>()

// Remove non-effective inline effects
for (testerMatch in listOf(*allInlines.toTypedArray())) { // Clone all inlines to test
if (removedInlines.contains(testerMatch)) continue

val range = testerMatch.range

fun predicateRemove(match: InlineMatch) =
(range.contains(match.range.first) && !range.contains(match.range.last)) // Crossed each other
|| (match.range == (range.first + 1) until range.last && match.text == testerMatch.text) // Remove redundant match

removedInlines.addAll(allInlines.filter(::predicateRemove))
allInlines.removeIf(::predicateRemove)
}

// Split paragraph into inlines
val inlines = mutableListOf(Inline(paragraph, mutableSetOf()))

for (inline in allInlines) {
val targetInline = inlines.first { it.text.contains(inline.match) }
val (beforeMatch, afterMatch) = targetInline.text.split(inline.match, limit = 2)

val index = inlines.indexOf(targetInline)
inlines.remove(targetInline)

val effects = targetInline.effects

inlines.addAll(index, listOf(
Inline(beforeMatch, effects),
Inline(inline.text, mutableSetOf(*effects.toTypedArray()).apply { add(inline.effect) }),
Inline(afterMatch, effects)
).filter { it.text.isNotEmpty() })
}

return inlines
}
}
}

enum class InlineEffect(val regex: Regex, val replacer: ((String) -> String)? = null) {
Link(Regex("\\[(?<text>((?!https?://).)+?)]\\(<?(?<url>https?://.+?)>?\\)")),
Code(Regex("`(?<text>.+?)`")),
Bold(Regex("\\*\\*(?<text>.+?)\\*\\*")),
Italic(Regex("(?=(?<all>(?<literal>[*_])(?<text>((?!\\k<literal>).)+?)\\k<literal>))")),
Underline(Regex("__(?<text>.+?)__")),
Strikethrough(Regex("~~(?<text>.+?)~~"), { "パー" }),
Spoiler(Regex("\\|\\|(?<text>.+?)\\|\\|"), { "ピー" })
}
37 changes: 37 additions & 0 deletions src/main/kotlin/com/jaoafa/vcspeaker/tts/markdown/Line.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
package com.jaoafa.vcspeaker.tts.markdown

data class Line(val inlines: List<Inline>, val effects: Set<LineEffect>) {
companion object {
fun from(paragraph: String): Line {
val inlines = Inline.from(paragraph)
val plainText = inlines.joinToString("") { it.text }
val prefixCandidates = plainText.split(" ").filter { it.isNotEmpty() }

val effects = mutableSetOf<LineEffect>()
var skipped = false

for (prefixCandidate in prefixCandidates) {
// null if this is not a prefix
val prefix = LineEffect.entries.firstOrNull { it.regex.matches(prefixCandidate) }

if (prefix != null && !skipped) effects.add(prefix)
else skipped = true
}

return Line(inlines, effects)
}
}

fun toReadable() = inlines.joinToString("") {
it.effects.fold(it.text) { text, effect ->
effect.replacer?.invoke(text) ?: text
}
}
}

enum class LineEffect(val regex: Regex) {
Header(Regex("^#{1,3}$")),
Quote(Regex("^>$")),
BulletList(Regex("^[*-]$")),
NumberedList(Regex("^\\d+\\.$"))
}
12 changes: 12 additions & 0 deletions src/main/kotlin/com/jaoafa/vcspeaker/tts/markdown/Markdown.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package com.jaoafa.vcspeaker.tts.markdown

fun String.removeCodeBlock(): String {
val texts = split("```")
val plainTextCandidates = texts.filterIndexed { index, _ -> index % 2 == 0 }
return plainTextCandidates.joinToString("").let {
if (texts.size % 2 == 0) it + "```" + texts.last()
else it
}.lines().filter { it.isNotEmpty() }.joinToString("\n")
}

fun String.toMarkdown() = removeCodeBlock().lines().map { Line.from(it) }

This file was deleted.

Loading