Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,6 +3,7 @@ from huggingface_hub import InferenceClient
|
|
| 3 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 4 |
import torch
|
| 5 |
import os
|
|
|
|
| 6 |
|
| 7 |
# Replace 'your_huggingface_token' with your actual Hugging Face access token
|
| 8 |
access_token = os.getenv('token')
|
|
@@ -19,6 +20,29 @@ model.eval() # Set the model to evaluation mode
|
|
| 19 |
# Initialize the inference client (if needed for other API-based tasks)
|
| 20 |
client = InferenceClient(token=access_token)
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
def conversation_predict(input_text):
|
| 23 |
"""Generate a response for single-turn input using the model."""
|
| 24 |
# Tokenize the input text
|
|
@@ -28,7 +52,12 @@ def conversation_predict(input_text):
|
|
| 28 |
outputs = model.generate(input_ids, max_new_tokens=2048)
|
| 29 |
|
| 30 |
# Decode and return the generated response
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
def respond(
|
| 34 |
message: str,
|
|
|
|
| 3 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 4 |
import torch
|
| 5 |
import os
|
| 6 |
+
import pyttsx3 # Importing pyttsx3 for text-to-speech
|
| 7 |
|
| 8 |
# Replace 'your_huggingface_token' with your actual Hugging Face access token
|
| 9 |
access_token = os.getenv('token')
|
|
|
|
| 20 |
# Initialize the inference client (if needed for other API-based tasks)
|
| 21 |
client = InferenceClient(token=access_token)
|
| 22 |
|
| 23 |
+
# Initialize the text-to-speech engine
|
| 24 |
+
tts_engine = pyttsx3.init()
|
| 25 |
+
|
| 26 |
+
# Import required modules for E2-F5-TTS
|
| 27 |
+
from huggingface_hub import Client
|
| 28 |
+
|
| 29 |
+
# Initialize the E2-F5-TTS client
|
| 30 |
+
client_tts = Client("mrfakename/E2-F5-TTS")
|
| 31 |
+
|
| 32 |
+
def text_to_speech(text, sample):
|
| 33 |
+
result = client_tts.predict(
|
| 34 |
+
ref_audio_input=handle_file(f'input/{sample}.mp3'),
|
| 35 |
+
ref_text_input="",
|
| 36 |
+
gen_text_input=text,
|
| 37 |
+
remove_silence=False,
|
| 38 |
+
cross_fade_duration_slider=0.15,
|
| 39 |
+
speed_slider=1,
|
| 40 |
+
api_name="/basic_tts"
|
| 41 |
+
)
|
| 42 |
+
audio_file = open(result[0], "rb")
|
| 43 |
+
audio_bytes = audio_file.read()
|
| 44 |
+
return audio_bytes
|
| 45 |
+
|
| 46 |
def conversation_predict(input_text):
|
| 47 |
"""Generate a response for single-turn input using the model."""
|
| 48 |
# Tokenize the input text
|
|
|
|
| 52 |
outputs = model.generate(input_ids, max_new_tokens=2048)
|
| 53 |
|
| 54 |
# Decode and return the generated response
|
| 55 |
+
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 56 |
+
|
| 57 |
+
# Convert the text response to speech using E2-F5-TTS
|
| 58 |
+
audio_bytes = text_to_speech(response, sample="input")
|
| 59 |
+
|
| 60 |
+
return response, audio_bytes
|
| 61 |
|
| 62 |
def respond(
|
| 63 |
message: str,
|