mirror of
https://github.com/AstrBotDevs/AstrBot
synced 2026-07-01 01:10:21 +08:00
feat: add ElevenLabs TTS API provider
This commit is contained in:
@@ -1807,6 +1807,25 @@ CONFIG_METADATA_2 = {
|
||||
"gemini_tts_voice_name": "Leda",
|
||||
"proxy": "",
|
||||
},
|
||||
"ElevenLabs TTS(API)": {
|
||||
"hint": "API Key 从 https://elevenlabs.io/app/settings/api-keys 获取。Voice ID 可在 https://elevenlabs.io/app/voice-library 浏览选择。",
|
||||
"id": "elevenlabs_tts",
|
||||
"type": "elevenlabs_tts_api",
|
||||
"provider": "elevenlabs",
|
||||
"provider_type": "text_to_speech",
|
||||
"enable": False,
|
||||
"api_key": "",
|
||||
"api_base": "https://api.elevenlabs.io/v1",
|
||||
"model": "eleven_multilingual_v2",
|
||||
"elevenlabs-tts-voice-id": "JBFqnCBsd6RMkjVDRZzb",
|
||||
"elevenlabs-tts-output-format": "mp3_44100_128",
|
||||
"elevenlabs-tts-stability": "",
|
||||
"elevenlabs-tts-similarity-boost": "",
|
||||
"elevenlabs-tts-style": "",
|
||||
"elevenlabs-tts-use-speaker-boost": True,
|
||||
"timeout": "20",
|
||||
"proxy": "",
|
||||
},
|
||||
"OpenAI Embedding": {
|
||||
"id": "openai_embedding",
|
||||
"type": "openai_embedding",
|
||||
|
||||
@@ -467,6 +467,10 @@ class ProviderManager:
|
||||
from .sources.gemini_tts_source import (
|
||||
ProviderGeminiTTSAPI as ProviderGeminiTTSAPI,
|
||||
)
|
||||
case "elevenlabs_tts_api":
|
||||
from .sources.elevenlabs_tts_source import (
|
||||
ProviderElevenLabsTTSAPI as ProviderElevenLabsTTSAPI,
|
||||
)
|
||||
case "openai_embedding":
|
||||
from .sources.openai_embedding_source import (
|
||||
OpenAIEmbeddingProvider as OpenAIEmbeddingProvider,
|
||||
|
||||
173
astrbot/core/provider/sources/elevenlabs_tts_source.py
Normal file
173
astrbot/core/provider/sources/elevenlabs_tts_source.py
Normal file
@@ -0,0 +1,173 @@
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
|
||||
import httpx
|
||||
|
||||
from astrbot import logger
|
||||
from astrbot.core.utils.astrbot_path import get_astrbot_temp_path
|
||||
|
||||
from ..entities import ProviderType
|
||||
from ..provider import TTSProvider
|
||||
from ..register import register_provider_adapter
|
||||
|
||||
SUPPORTED_CONTAINER_OUTPUT_PREFIXES = ("mp3", "wav", "opus")
|
||||
RAW_AUDIO_OUTPUT_PREFIXES = ("pcm", "ulaw", "alaw")
|
||||
|
||||
|
||||
def _parse_optional_float(
|
||||
provider_config: dict,
|
||||
cfg_name: str,
|
||||
) -> float | None:
|
||||
value = provider_config.get(cfg_name, "")
|
||||
if value in ("", None):
|
||||
return None
|
||||
try:
|
||||
parsed = float(value)
|
||||
except (TypeError, ValueError) as exc:
|
||||
raise ValueError(f"{cfg_name} must be a number between 0 and 1.") from exc
|
||||
if not 0 <= parsed <= 1:
|
||||
raise ValueError(f"{cfg_name} must be between 0 and 1.")
|
||||
return parsed
|
||||
|
||||
|
||||
def _parse_bool(provider_config: dict, cfg_name: str) -> bool:
|
||||
value = provider_config[cfg_name]
|
||||
if isinstance(value, bool):
|
||||
return value
|
||||
if isinstance(value, int):
|
||||
return bool(value)
|
||||
if isinstance(value, str):
|
||||
normalized = value.strip().lower()
|
||||
if normalized in {"true", "1", "yes", "y", "on"}:
|
||||
return True
|
||||
if normalized in {"false", "0", "no", "n", "off"}:
|
||||
return False
|
||||
raise ValueError(f"{cfg_name} must be a boolean value.")
|
||||
|
||||
|
||||
def _normalize_timeout(value: int | str | None) -> int:
|
||||
if value in ("", None):
|
||||
return 20
|
||||
try:
|
||||
timeout = int(value)
|
||||
except (TypeError, ValueError) as exc:
|
||||
raise ValueError("timeout must be a positive integer.") from exc
|
||||
if timeout <= 0:
|
||||
raise ValueError("timeout must be a positive integer.")
|
||||
return timeout
|
||||
|
||||
|
||||
def _validate_output_format(output_format: str) -> None:
|
||||
fmt = output_format.lower()
|
||||
if fmt.startswith(RAW_AUDIO_OUTPUT_PREFIXES):
|
||||
raise ValueError(
|
||||
"ElevenLabs raw audio output formats are not supported by this provider. "
|
||||
"Use an mp3, wav, or opus output format instead."
|
||||
)
|
||||
if not fmt.startswith(SUPPORTED_CONTAINER_OUTPUT_PREFIXES):
|
||||
raise ValueError(
|
||||
"Unsupported ElevenLabs output format. "
|
||||
"Use an mp3, wav, or opus output format."
|
||||
)
|
||||
|
||||
|
||||
@register_provider_adapter(
|
||||
"elevenlabs_tts_api",
|
||||
"ElevenLabs TTS API",
|
||||
provider_type=ProviderType.TEXT_TO_SPEECH,
|
||||
)
|
||||
class ProviderElevenLabsTTSAPI(TTSProvider):
|
||||
def __init__(
|
||||
self,
|
||||
provider_config: dict,
|
||||
provider_settings: dict,
|
||||
) -> None:
|
||||
super().__init__(provider_config, provider_settings)
|
||||
self.api_key = provider_config.get("api_key", "")
|
||||
self.api_base = provider_config.get(
|
||||
"api_base", "https://api.elevenlabs.io/v1"
|
||||
).removesuffix("/")
|
||||
self.voice_id = provider_config.get(
|
||||
"elevenlabs-tts-voice-id", "JBFqnCBsd6RMkjVDRZzb"
|
||||
)
|
||||
self.model_id = provider_config.get("model", "eleven_multilingual_v2")
|
||||
self.set_model(self.model_id)
|
||||
self.output_format = provider_config.get(
|
||||
"elevenlabs-tts-output-format", "mp3_44100_128"
|
||||
)
|
||||
_validate_output_format(self.output_format)
|
||||
|
||||
# Only send explicitly configured voice settings so the API can apply defaults.
|
||||
self.voice_settings: dict = {}
|
||||
for key, cfg_name in (
|
||||
("stability", "elevenlabs-tts-stability"),
|
||||
("similarity_boost", "elevenlabs-tts-similarity-boost"),
|
||||
("style", "elevenlabs-tts-style"),
|
||||
):
|
||||
value = _parse_optional_float(provider_config, cfg_name)
|
||||
if value is not None:
|
||||
self.voice_settings[key] = value
|
||||
if "elevenlabs-tts-use-speaker-boost" in provider_config:
|
||||
self.voice_settings["use_speaker_boost"] = _parse_bool(
|
||||
provider_config,
|
||||
"elevenlabs-tts-use-speaker-boost",
|
||||
)
|
||||
|
||||
timeout = _normalize_timeout(provider_config.get("timeout", 20))
|
||||
|
||||
proxy = provider_config.get("proxy", "")
|
||||
if proxy:
|
||||
logger.info(f"[ElevenLabs TTS] 使用代理: {proxy}")
|
||||
self.client = httpx.AsyncClient(
|
||||
timeout=timeout,
|
||||
proxy=proxy or None,
|
||||
trust_env=False,
|
||||
)
|
||||
|
||||
def _output_extension(self) -> str:
|
||||
"""Infer the audio file extension from the configured output format."""
|
||||
fmt = self.output_format.lower()
|
||||
if fmt.startswith("mp3"):
|
||||
return "mp3"
|
||||
if fmt.startswith("opus"):
|
||||
return "opus"
|
||||
if fmt.startswith("wav"):
|
||||
return "wav"
|
||||
return "mp3"
|
||||
|
||||
async def get_audio(self, text: str) -> str:
|
||||
url = f"{self.api_base}/text-to-speech/{self.voice_id}"
|
||||
headers = {
|
||||
"xi-api-key": self.api_key,
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
payload: dict = {
|
||||
"text": text,
|
||||
"model_id": self.model_name,
|
||||
}
|
||||
if self.voice_settings:
|
||||
payload["voice_settings"] = self.voice_settings
|
||||
|
||||
response = await self.client.post(
|
||||
url,
|
||||
headers=headers,
|
||||
params={"output_format": self.output_format},
|
||||
json=payload,
|
||||
)
|
||||
if response.status_code != 200:
|
||||
error_text = response.text[:1024]
|
||||
raise Exception(
|
||||
f"ElevenLabs TTS API 请求失败: {response.status_code}, {error_text}"
|
||||
)
|
||||
|
||||
temp_dir = Path(get_astrbot_temp_path())
|
||||
temp_dir.mkdir(parents=True, exist_ok=True)
|
||||
path = (
|
||||
temp_dir / f"elevenlabs_tts_api_{uuid.uuid4()}.{self._output_extension()}"
|
||||
)
|
||||
path.write_bytes(response.content)
|
||||
return str(path)
|
||||
|
||||
async def terminate(self):
|
||||
if self.client:
|
||||
await self.client.aclose()
|
||||
@@ -1590,6 +1590,30 @@
|
||||
"description": "voice",
|
||||
"hint": "OpenAI TTS voice. OpenAI defaults: 'alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer'."
|
||||
},
|
||||
"elevenlabs-tts-voice-id": {
|
||||
"description": "Voice ID",
|
||||
"hint": "ElevenLabs voice ID. Browse and copy voice IDs at https://elevenlabs.io/app/voice-library. Default 'JBFqnCBsd6RMkjVDRZzb' (George)."
|
||||
},
|
||||
"elevenlabs-tts-output-format": {
|
||||
"description": "Output format",
|
||||
"hint": "Audio output format, e.g. 'mp3_44100_128', 'mp3_22050_32', 'wav_44100', or 'opus_48000_128'. Raw PCM/u-law/a-law formats are not supported. Default 'mp3_44100_128'."
|
||||
},
|
||||
"elevenlabs-tts-stability": {
|
||||
"description": "Stability",
|
||||
"hint": "Voice stability, range [0, 1]. Higher is more consistent, lower is more expressive. Leave empty to use the server default."
|
||||
},
|
||||
"elevenlabs-tts-similarity-boost": {
|
||||
"description": "Similarity boost",
|
||||
"hint": "How closely the output matches the original voice, range [0, 1]. Leave empty to use the server default."
|
||||
},
|
||||
"elevenlabs-tts-style": {
|
||||
"description": "Style exaggeration",
|
||||
"hint": "Style exaggeration of the voice, range [0, 1]. Higher values increase latency. Leave empty to use the server default."
|
||||
},
|
||||
"elevenlabs-tts-use-speaker-boost": {
|
||||
"description": "Speaker boost",
|
||||
"hint": "Boost similarity to the original speaker. May slightly increase latency."
|
||||
},
|
||||
"mimo-tts-voice": {
|
||||
"description": "Voice",
|
||||
"hint": "MiMo TTS voice name. Supported values include 'mimo_default', 'default_en', and 'default_zh'."
|
||||
|
||||
@@ -1587,6 +1587,30 @@
|
||||
"description": "API Base URL",
|
||||
"hint": "Голоса OpenAI TTS: alloy, echo и др."
|
||||
},
|
||||
"elevenlabs-tts-voice-id": {
|
||||
"description": "ID голоса",
|
||||
"hint": "ID голоса ElevenLabs. Просмотрите и скопируйте ID на https://elevenlabs.io/app/voice-library. По умолчанию 'JBFqnCBsd6RMkjVDRZzb' (George)."
|
||||
},
|
||||
"elevenlabs-tts-output-format": {
|
||||
"description": "Формат вывода",
|
||||
"hint": "Формат аудио, например 'mp3_44100_128', 'mp3_22050_32', 'wav_44100' или 'opus_48000_128'. Raw PCM/u-law/a-law форматы не поддерживаются. По умолчанию 'mp3_44100_128'."
|
||||
},
|
||||
"elevenlabs-tts-stability": {
|
||||
"description": "Стабильность",
|
||||
"hint": "Стабильность голоса, диапазон [0, 1]. Оставьте пустым для значения по умолчанию."
|
||||
},
|
||||
"elevenlabs-tts-similarity-boost": {
|
||||
"description": "Усиление сходства",
|
||||
"hint": "Насколько вывод соответствует исходному голосу, диапазон [0, 1]. Оставьте пустым для значения по умолчанию."
|
||||
},
|
||||
"elevenlabs-tts-style": {
|
||||
"description": "Выразительность стиля",
|
||||
"hint": "Выразительность стиля голоса, диапазон [0, 1]. Высокие значения увеличивают задержку. Оставьте пустым для значения по умолчанию."
|
||||
},
|
||||
"elevenlabs-tts-use-speaker-boost": {
|
||||
"description": "Усиление диктора",
|
||||
"hint": "Усиливает сходство с исходным диктором. Может немного увеличить задержку."
|
||||
},
|
||||
"mimo-tts-voice": {
|
||||
"description": "Голос",
|
||||
"hint": "Имя голоса MiMo TTS. Поддерживаются значения 'mimo_default', 'default_en' и 'default_zh'."
|
||||
|
||||
@@ -1592,6 +1592,30 @@
|
||||
"description": "voice",
|
||||
"hint": "OpenAI TTS 的声音。OpenAI 默认支持:'alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer'"
|
||||
},
|
||||
"elevenlabs-tts-voice-id": {
|
||||
"description": "音色 ID",
|
||||
"hint": "ElevenLabs 音色 ID。可在 https://elevenlabs.io/app/voice-library 浏览并复制音色 ID。默认 'JBFqnCBsd6RMkjVDRZzb'(George)。"
|
||||
},
|
||||
"elevenlabs-tts-output-format": {
|
||||
"description": "输出格式",
|
||||
"hint": "音频输出格式,例如 'mp3_44100_128'、'mp3_22050_32'、'wav_44100'、'opus_48000_128'。不支持裸 PCM/u-law/a-law 格式。默认 'mp3_44100_128'。"
|
||||
},
|
||||
"elevenlabs-tts-stability": {
|
||||
"description": "稳定性",
|
||||
"hint": "音色稳定性,范围 [0, 1]。值越高越稳定,越低越富有表现力。留空则使用服务端默认值。"
|
||||
},
|
||||
"elevenlabs-tts-similarity-boost": {
|
||||
"description": "相似度增强",
|
||||
"hint": "输出与原始音色的接近程度,范围 [0, 1]。留空则使用服务端默认值。"
|
||||
},
|
||||
"elevenlabs-tts-style": {
|
||||
"description": "风格夸张度",
|
||||
"hint": "音色风格的夸张程度,范围 [0, 1]。值越高延迟越大。留空则使用服务端默认值。"
|
||||
},
|
||||
"elevenlabs-tts-use-speaker-boost": {
|
||||
"description": "说话人增强",
|
||||
"hint": "增强与原始说话人的相似度,可能略微增加延迟。"
|
||||
},
|
||||
"mimo-tts-voice": {
|
||||
"description": "音色",
|
||||
"hint": "MiMo TTS 的音色名称。可选值包括 'mimo_default'、'default_en'、'default_zh'。"
|
||||
|
||||
Reference in New Issue
Block a user