Spaces:
Sleeping
Sleeping
File size: 2,897 Bytes
ab9dfb3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
import os
import streamlit as st
import speech_recognition as sr
from gtts import gTTS
from groq import Groq
from dotenv import load_dotenv
import tempfile
import base64
# Load environment variables
load_dotenv()
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
# Initialize Groq Client
client = Groq(api_key=GROQ_API_KEY)
# Function to transcribe voice to text
def transcribe_audio(audio_file):
recognizer = sr.Recognizer()
with sr.AudioFile(audio_file) as source:
audio_data = recognizer.record(source)
try:
text = recognizer.recognize_google(audio_data)
return text
except sr.UnknownValueError:
return "Could not understand the audio"
except sr.RequestError:
return "Error with speech recognition service"
# Function to generate speech from text
def text_to_speech(text):
tts = gTTS(text=text, lang="en")
temp_audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
tts.save(temp_audio_path)
return temp_audio_path
# Function to get AI insights from LLaMA 3 70B
def get_ai_response(text):
response = client.chat.completions.create(
model="llama-3.3-70b-versatile",
messages=[
{"role": "system", "content": "You are an advanced AI that helps with speech processing."},
{"role": "user", "content": f"Analyze this text: {text}"}
]
)
return response.choices[0].message.content
# Streamlit UI
st.title("ποΈ AI Voice Converter: Speech-to-Text & Text-to-Speech")
st.write("Convert voice into text and generate AI-powered speech.")
# Voice-to-Text Section
st.subheader("π€ Voice-to-Text")
audio_file = st.file_uploader("Upload an audio file (WAV format)", type=["wav"])
if audio_file:
with open("temp.wav", "wb") as f:
f.write(audio_file.getbuffer())
st.audio(audio_file, format="audio/wav")
if st.button("Transcribe Audio"):
transcribed_text = transcribe_audio("temp.wav")
st.write("**Transcribed Text:**", transcribed_text)
# AI insights
ai_insights = get_ai_response(transcribed_text)
st.write("**AI Analysis:**", ai_insights)
# Text-to-Voice Section
st.subheader("π Text-to-Speech")
input_text = st.text_area("Enter text to convert into speech")
if st.button("Generate Speech"):
if input_text.strip():
audio_path = text_to_speech(input_text)
# Convert audio file to base64 for download
with open(audio_path, "rb") as f:
audio_bytes = f.read()
b64 = base64.b64encode(audio_bytes).decode()
# Audio player and download button
st.audio(audio_path, format="audio/mp3")
st.markdown(f'<a href="data:audio/mp3;base64,{b64}" download="output.mp3">Download Speech</a>', unsafe_allow_html=True)
else:
st.error("Please enter text to generate speech.") |