File size: 2,897 Bytes
ab9dfb3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import os
import streamlit as st
import speech_recognition as sr
from gtts import gTTS
from groq import Groq
from dotenv import load_dotenv
import tempfile
import base64

# Load environment variables
load_dotenv()
GROQ_API_KEY = os.getenv("GROQ_API_KEY")

# Initialize Groq Client
client = Groq(api_key=GROQ_API_KEY)

# Function to transcribe voice to text
def transcribe_audio(audio_file):
    recognizer = sr.Recognizer()
    with sr.AudioFile(audio_file) as source:
        audio_data = recognizer.record(source)
        try:
            text = recognizer.recognize_google(audio_data)
            return text
        except sr.UnknownValueError:
            return "Could not understand the audio"
        except sr.RequestError:
            return "Error with speech recognition service"

# Function to generate speech from text
def text_to_speech(text):
    tts = gTTS(text=text, lang="en")
    temp_audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
    tts.save(temp_audio_path)
    return temp_audio_path

# Function to get AI insights from LLaMA 3 70B
def get_ai_response(text):
    response = client.chat.completions.create(
        model="llama-3.3-70b-versatile",
        messages=[
            {"role": "system", "content": "You are an advanced AI that helps with speech processing."},
            {"role": "user", "content": f"Analyze this text: {text}"}
        ]
    )
    return response.choices[0].message.content

# Streamlit UI
st.title("πŸŽ™οΈ AI Voice Converter: Speech-to-Text & Text-to-Speech")
st.write("Convert voice into text and generate AI-powered speech.")

# Voice-to-Text Section
st.subheader("🎀 Voice-to-Text")
audio_file = st.file_uploader("Upload an audio file (WAV format)", type=["wav"])

if audio_file:
    with open("temp.wav", "wb") as f:
        f.write(audio_file.getbuffer())
    
    st.audio(audio_file, format="audio/wav")
    
    if st.button("Transcribe Audio"):
        transcribed_text = transcribe_audio("temp.wav")
        st.write("**Transcribed Text:**", transcribed_text)

        # AI insights
        ai_insights = get_ai_response(transcribed_text)
        st.write("**AI Analysis:**", ai_insights)

# Text-to-Voice Section
st.subheader("πŸ“ Text-to-Speech")
input_text = st.text_area("Enter text to convert into speech")

if st.button("Generate Speech"):
    if input_text.strip():
        audio_path = text_to_speech(input_text)
        
        # Convert audio file to base64 for download
        with open(audio_path, "rb") as f:
            audio_bytes = f.read()
        b64 = base64.b64encode(audio_bytes).decode()

        # Audio player and download button
        st.audio(audio_path, format="audio/mp3")
        st.markdown(f'<a href="data:audio/mp3;base64,{b64}" download="output.mp3">Download Speech</a>', unsafe_allow_html=True)
    else:
        st.error("Please enter text to generate speech.")