tts: add native tts plugin for Android (#1376)

This commit is contained in:
Huang Xin 2025-06-09 22:54:12 +08:00 committed by GitHub
parent 69d418aa61
commit f8ac30adf1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
45 changed files with 2256 additions and 3 deletions

View file

@ -0,0 +1,2 @@
/build
/.tauri

View file

@ -0,0 +1,44 @@
plugins {
id("com.android.library")
id("org.jetbrains.kotlin.android")
}
android {
namespace = "com.readest.native_tts"
compileSdk = 34
defaultConfig {
minSdk = 21
testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner"
consumerProguardFiles("consumer-rules.pro")
}
buildTypes {
release {
isMinifyEnabled = false
proguardFiles(
getDefaultProguardFile("proguard-android-optimize.txt"),
"proguard-rules.pro"
)
}
}
compileOptions {
sourceCompatibility = JavaVersion.VERSION_1_8
targetCompatibility = JavaVersion.VERSION_1_8
}
kotlinOptions {
jvmTarget = "1.8"
}
}
dependencies {
implementation("androidx.core:core-ktx:1.9.0")
implementation("androidx.appcompat:appcompat:1.6.0")
implementation("com.google.android.material:material:1.7.0")
testImplementation("junit:junit:4.13.2")
androidTestImplementation("androidx.test.ext:junit:1.1.5")
androidTestImplementation("androidx.test.espresso:espresso-core:3.5.1")
implementation(project(":tauri-android"))
}

View file

@ -0,0 +1,21 @@
# Add project specific ProGuard rules here.
# You can control the set of applied configuration files using the
# proguardFiles setting in build.gradle.
#
# For more details, see
# http://developer.android.com/guide/developing/tools/proguard.html
# If your project uses WebView with JS, uncomment the following
# and specify the fully qualified class name to the JavaScript interface
# class:
#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
# public *;
#}
# Uncomment this to preserve the line number information for
# debugging stack traces.
#-keepattributes SourceFile,LineNumberTable
# If you keep the line number information, uncomment this to
# hide the original source file name.
#-renamesourcefileattribute SourceFile

View file

@ -0,0 +1,31 @@
pluginManagement {
repositories {
mavenCentral()
gradlePluginPortal()
google()
}
resolutionStrategy {
eachPlugin {
switch (requested.id.id) {
case "com.android.library":
useVersion("8.0.2")
break
case "org.jetbrains.kotlin.android":
useVersion("1.8.20")
break
}
}
}
}
dependencyResolutionManagement {
repositoriesMode.set(RepositoriesMode.FAIL_ON_PROJECT_REPOS)
repositories {
mavenCentral()
google()
}
}
include ':tauri-android'
project(':tauri-android').projectDir = new File('./.tauri/tauri-api')

View file

@ -0,0 +1,24 @@
package com.readest.native-tts
import androidx.test.platform.app.InstrumentationRegistry
import androidx.test.ext.junit.runners.AndroidJUnit4
import org.junit.Test
import org.junit.runner.RunWith
import org.junit.Assert.*
/**
* Instrumented test, which will execute on an Android device.
*
* See [testing documentation](http://d.android.com/tools/testing).
*/
@RunWith(AndroidJUnit4::class)
class ExampleInstrumentedTest {
@Test
fun useAppContext() {
// Context of the app under test.
val appContext = InstrumentationRegistry.getInstrumentation().targetContext
assertEquals("com.readest.native-tts", appContext.packageName)
}
}

View file

@ -0,0 +1,3 @@
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android">
</manifest>

View file

@ -0,0 +1,485 @@
package com.readest.native_tts
import android.os.Bundle
import android.app.Activity
import android.content.Context
import android.speech.tts.TextToSpeech
import android.speech.tts.UtteranceProgressListener
import android.speech.tts.Voice
import android.util.Log
import app.tauri.annotation.Command
import app.tauri.annotation.InvokeArg
import app.tauri.annotation.TauriPlugin
import app.tauri.plugin.Invoke
import app.tauri.plugin.JSObject
import app.tauri.plugin.Plugin
import app.tauri.plugin.PluginResult
import kotlinx.coroutines.*
import kotlinx.coroutines.channels.Channel
import kotlinx.coroutines.flow.Flow
import kotlinx.coroutines.flow.flow
import org.json.JSONArray
import org.json.JSONObject
import java.util.*
import java.util.concurrent.ConcurrentHashMap
import java.util.concurrent.atomic.AtomicBoolean
import java.util.concurrent.atomic.AtomicReference
data class TTSVoiceData(
val id: String,
val name: String,
val lang: String,
val disabled: Boolean = false
)
data class TTSMessageEvent(
val code: String, // 'boundary' | 'error' | 'end'
val message: String? = null,
val mark: String? = null
)
enum class TTSGranularity(val value: String) {
WORD("word"),
SENTENCE("sentence"),
PARAGRAPH("paragraph")
}
@TauriPlugin
class NativeTTSPlugin(private val activity: Activity) : Plugin(activity) {
companion object {
private const val TAG = "NativeTTSPlugin"
private const val CHANNEL_NAME = "tts_events"
}
private var textToSpeech: TextToSpeech? = null
private var isInitialized = AtomicBoolean(false)
private var isPaused = AtomicBoolean(false)
private var isSpeaking = AtomicBoolean(false)
private var currentVoiceId = AtomicReference<String>("")
private var currentLang = AtomicReference<String>("en-US")
private var currentRate = AtomicReference<Float>(1.0f)
private var currentPitch = AtomicReference<Float>(1.0f)
// Event channels for each speaking session
private val eventChannels = ConcurrentHashMap<String, Channel<TTSMessageEvent>>()
private val speakingJobs = ConcurrentHashMap<String, Job>()
private val coroutineScope = CoroutineScope(Dispatchers.Main + SupervisorJob())
@Command
fun init(invoke: Invoke) {
coroutineScope.launch {
try {
val success = initializeTTS()
val result = JSObject().apply {
put("success", success)
}
invoke.resolve(result)
} catch (e: Exception) {
Log.e(TAG, "Failed to initialize TTS", e)
invoke.reject("Failed to initialize TTS: ${e.message}")
}
}
}
private suspend fun initializeTTS(): Boolean = suspendCancellableCoroutine { continuation ->
try {
textToSpeech = TextToSpeech(activity) { status ->
when (status) {
TextToSpeech.SUCCESS -> {
setupTTSListener()
isInitialized.set(true)
continuation.resume(true) {}
}
else -> {
Log.e(TAG, "TTS initialization failed with status: $status")
continuation.resume(false) {}
}
}
}
} catch (e: Exception) {
Log.e(TAG, "Exception during TTS initialization", e)
continuation.resume(false) {}
}
}
private fun setupTTSListener() {
textToSpeech?.setOnUtteranceProgressListener(object : UtteranceProgressListener() {
override fun onStart(utteranceId: String?) {
utteranceId?.let { id ->
isSpeaking.set(true)
sendEvent(id, TTSMessageEvent("boundary", "start"))
}
}
override fun onDone(utteranceId: String?) {
utteranceId?.let { id ->
isSpeaking.set(false)
sendEvent(id, TTSMessageEvent("end"))
closeEventChannel(id)
}
}
override fun onError(utteranceId: String?) {
utteranceId?.let { id ->
isSpeaking.set(false)
sendEvent(id, TTSMessageEvent("error", "TTS playback error"))
closeEventChannel(id)
}
}
override fun onRangeStart(utteranceId: String?, start: Int, end: Int, frame: Int) {
utteranceId?.let { id ->
sendEvent(id, TTSMessageEvent("boundary", "range", "pos:$start-$end"))
}
}
})
}
@Command
fun speak(invoke: Invoke) {
val args = invoke.parseArgs(SpeakArgs::class.java)
if (!isInitialized.get()) {
invoke.reject("TTS not initialized")
return
}
val utteranceId = UUID.randomUUID().toString()
coroutineScope.launch {
try {
val eventChannel = Channel<TTSMessageEvent>(Channel.UNLIMITED)
eventChannels[utteranceId] = eventChannel
val speakJob = launch {
speakText(args.ssml, utteranceId, args.preload ?: false)
}
speakingJobs[utteranceId] = speakJob
// Return the utterance ID so frontend can listen to events
val result = JSObject().apply {
put("utteranceId", utteranceId)
}
invoke.resolve(result)
// Start sending events to the frontend
startEventStream(utteranceId)
} catch (e: Exception) {
Log.e(TAG, "Failed to start speaking", e)
invoke.reject("Failed to start speaking: ${e.message}")
}
}
}
private suspend fun speakText(ssml: String, utteranceId: String, preload: Boolean) {
withContext(Dispatchers.Main) {
try {
// Parse SSML and extract text
val text = parseSSML(ssml)
textToSpeech?.apply {
setSpeechRate(currentRate.get())
setPitch(currentPitch.get())
}
val params = Bundle().apply {
putString(TextToSpeech.Engine.KEY_PARAM_UTTERANCE_ID, utteranceId)
}
val result = textToSpeech?.speak(
text,
if (preload) TextToSpeech.QUEUE_ADD else TextToSpeech.QUEUE_FLUSH,
params,
utteranceId
)
if (result != TextToSpeech.SUCCESS) {
sendEvent(utteranceId, TTSMessageEvent("error", "Failed to start speech"))
}
} catch (e: Exception) {
sendEvent(utteranceId, TTSMessageEvent("error", "Exception during speech: ${e.message}"))
}
}
}
private fun parseSSML(ssml: String): String {
// Simple SSML parsing - extract text content
return ssml
.replace(Regex("<[^>]*>"), " ")
.replace(Regex("\\s+"), " ")
.trim()
}
private fun startEventStream(utteranceId: String) {
coroutineScope.launch {
val channel = eventChannels[utteranceId] ?: return@launch
try {
for (event in channel) {
val eventData = JSObject().apply {
put("utteranceId", utteranceId)
put("code", event.code)
event.message?.let { put("message", it) }
event.mark?.let { put("mark", it) }
}
// Send event to frontend via Tauri event system
trigger(CHANNEL_NAME, eventData)
}
} catch (e: Exception) {
Log.e(TAG, "Error in event stream for $utteranceId", e)
}
}
}
private fun sendEvent(utteranceId: String, event: TTSMessageEvent) {
coroutineScope.launch {
eventChannels[utteranceId]?.trySend(event)
}
}
private fun closeEventChannel(utteranceId: String) {
coroutineScope.launch {
eventChannels[utteranceId]?.close()
eventChannels.remove(utteranceId)
speakingJobs[utteranceId]?.cancel()
speakingJobs.remove(utteranceId)
}
}
@Command
fun pause(invoke: Invoke) {
try {
if (textToSpeech?.stop() == TextToSpeech.SUCCESS) {
isPaused.set(true)
invoke.resolve()
} else {
invoke.reject("Failed to pause TTS")
}
} catch (e: Exception) {
invoke.reject("Exception while pausing: ${e.message}")
}
}
@Command
fun resume(invoke: Invoke) {
// Android TTS doesn't have native resume, so we'll need to track state
try {
isPaused.set(false)
invoke.resolve()
} catch (e: Exception) {
invoke.reject("Exception while resuming: ${e.message}")
}
}
@Command
fun stop(invoke: Invoke) {
try {
if (textToSpeech?.stop() == TextToSpeech.SUCCESS) {
isSpeaking.set(false)
isPaused.set(false)
// Cancel all active speaking jobs and close channels
speakingJobs.values.forEach { it.cancel() }
eventChannels.values.forEach { it.close() }
speakingJobs.clear()
eventChannels.clear()
invoke.resolve()
} else {
invoke.reject("Failed to stop TTS")
}
} catch (e: Exception) {
invoke.reject("Exception while stopping: ${e.message}")
}
}
@Command
fun set_primary_lang(invoke: Invoke) {
val args = invoke.parseArgs(SetLangArgs::class.java)
try {
val locale = Locale.forLanguageTag(args.lang)
val result = textToSpeech?.setLanguage(locale)
when (result) {
TextToSpeech.LANG_AVAILABLE,
TextToSpeech.LANG_COUNTRY_AVAILABLE,
TextToSpeech.LANG_COUNTRY_VAR_AVAILABLE -> {
currentLang.set(args.lang)
invoke.resolve()
}
else -> {
invoke.reject("Language not supported: ${args.lang}")
}
}
} catch (e: Exception) {
invoke.reject("Exception setting language: ${e.message}")
}
}
@Command
fun set_rate(invoke: Invoke) {
val args = invoke.parseArgs(SetRateArgs::class.java)
try {
currentRate.set(args.rate)
invoke.resolve()
} catch (e: Exception) {
invoke.reject("Exception setting rate: ${e.message}")
}
}
@Command
fun set_pitch(invoke: Invoke) {
val args = invoke.parseArgs(SetPitchArgs::class.java)
try {
currentPitch.set(args.pitch)
invoke.resolve()
} catch (e: Exception) {
invoke.reject("Exception setting pitch: ${e.message}")
}
}
@Command
fun set_voice(invoke: Invoke) {
val args = invoke.parseArgs(SetVoiceArgs::class.java)
try {
val voices = textToSpeech?.voices
val targetVoice = voices?.find { it.name == args.voice }
if (targetVoice != null) {
val result = textToSpeech?.setVoice(targetVoice)
if (result == TextToSpeech.SUCCESS) {
currentVoiceId.set(args.voice)
invoke.resolve()
} else {
invoke.reject("Failed to set voice: ${args.voice}")
}
} else {
invoke.reject("Voice not found: ${args.voice}")
}
} catch (e: Exception) {
invoke.reject("Exception setting voice: ${e.message}")
}
}
@Command
fun get_all_voices(invoke: Invoke) {
try {
val voices = textToSpeech?.voices?.map { voice ->
JSObject().apply {
put("id", voice.name)
put("name", voice.name)
put("lang", voice.locale.toLanguageTag())
put("disabled", false)
}
} ?: emptyList()
val result = JSObject().apply {
put("voices", JSONArray(voices))
}
invoke.resolve(result)
} catch (e: Exception) {
invoke.reject("Exception getting voices: ${e.message}")
}
}
@Command
fun get_voices(invoke: Invoke) {
val args = invoke.parseArgs(GetVoicesArgs::class.java)
try {
val locale = Locale.forLanguageTag(args.lang)
val voices = textToSpeech?.voices?.filter { voice ->
voice.locale.language == locale.language
}?.map { voice ->
JSObject().apply {
put("id", voice.name)
put("name", voice.name)
put("lang", voice.locale.toLanguageTag())
put("disabled", false)
}
} ?: emptyList()
val result = JSObject().apply {
put("voices", JSONArray(voices))
}
invoke.resolve(result)
} catch (e: Exception) {
invoke.reject("Exception getting voices for language: ${e.message}")
}
}
@Command
fun get_granularities(invoke: Invoke) {
try {
val granularities = TTSGranularity.values().map { it.value }
val result = JSObject().apply {
put("granularities", JSONArray(granularities))
}
invoke.resolve(result)
} catch (e: Exception) {
invoke.reject("Exception getting granularities: ${e.message}")
}
}
@Command
fun get_voice_id(invoke: Invoke) {
try {
val result = JSObject().apply {
put("voiceId", currentVoiceId.get())
}
invoke.resolve(result)
} catch (e: Exception) {
invoke.reject("Exception getting voice ID: ${e.message}")
}
}
@Command
fun get_speaking_lang(invoke: Invoke) {
try {
val result = JSObject().apply {
put("lang", currentLang.get())
}
invoke.resolve(result)
} catch (e: Exception) {
invoke.reject("Exception getting speaking language: ${e.message}")
}
}
fun destroy() {
coroutineScope.cancel()
textToSpeech?.shutdown()
eventChannels.values.forEach { it.close() }
eventChannels.clear()
speakingJobs.values.forEach { it.cancel() }
speakingJobs.clear()
}
}
// Data classes for command arguments
data class SpeakArgs(
val ssml: String,
val preload: Boolean? = false
)
data class SetLangArgs(
val lang: String
)
data class SetRateArgs(
val rate: Float
)
data class SetPitchArgs(
val pitch: Float
)
data class SetVoiceArgs(
val voice: String
)
data class GetVoicesArgs(
val lang: String
)

View file

@ -0,0 +1,17 @@
package com.readest.native-tts
import org.junit.Test
import org.junit.Assert.*
/**
* Example local unit test, which will execute on the development machine (host).
*
* See [testing documentation](http://d.android.com/tools/testing).
*/
class ExampleUnitTest {
@Test
fun addition_isCorrect() {
assertEquals(4, 2 + 2)
}
}