Spaces:

Nick021402
/

Voice2PersonaAI

Running

App Files Files Community

Nick021402 commited on May 20

Commit

66a19be

verified ·

1 Parent(s): 9bb4922

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -53

app.py CHANGED Viewed

@@ -1,53 +1,37 @@
-import gradio as gr
-import torchaudio
-from transformers import pipeline
-import numpy as np
-# Load model for speech emotion recognition
-emotion_pipeline = pipeline("audio-classification", model="superb/hubert-large-superb-er")
-# Voice Analysis Logic
-def analyze_voice(audio):
-    if audio is None:
-        return "Please record or upload your voice."
-    # Get emotion
-    results = emotion_pipeline(audio["array"])
-    top = max(results, key=lambda x: x["score"])
-    emotion = top["label"].capitalize()
-    confidence = round(top["score"] * 100, 2)
-    # Personality interpretation (rule-based)
-    personality_map = {
-        "Happy": "You sound energetic, warm, and approachable. Possibly a social butterfly or a natural leader.",
-        "Sad": "You're deep and thoughtful. Often empathetic and emotionally intelligent.",
-        "Angry": "You radiate power and confidence. A strong-willed individual who’s not afraid to speak out.",
-        "Neutral": "Balanced and calm. Likely introspective and logical in decision-making.",
-        "Fearful": "You might be cautious, observant, and sensitive to surroundings.",
-        "Disgust": "Sharp-witted with a strong sense of right and wrong.",
-        "Surprise": "Curious and spontaneous. You embrace the unexpected with open arms."
-    }
-    persona = personality_map.get(emotion, "A unique voice with a unique mind. Hard to label, easy to admire.")
-    return f"""
-## 🎙️ Voice2Persona-AI Report
-**Detected Emotion**: {emotion}
-**Confidence**: {confidence}%
-**Inferred Personality Vibe**:
-{persona}
-    """
-# Gradio UI
-demo = gr.Interface(
-    fn=analyze_voice,
-    inputs=gr.Audio(source="microphone", type="numpy", label="🎤 Record or Upload Your Voice"),
-    outputs=gr.Markdown(),
-    title="🔊 Voice2Persona-AI",
-    description="Speak for 5–10 seconds and get a fun AI-generated personality profile based on your tone & emotion.",
-    theme="soft",
-    live=True
-)
-if __name__ == "__main__":
-    demo.launch()

+import gradio as gr
+from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
+import torch
+import numpy as np
+# Load the pretrained model and processor
+processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
+model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)
+# Transcription function
+def transcribe(audio):
+    if audio is None:
+        return "Please upload or record an audio file."
+    input_values = processor(audio, sampling_rate=16000, return_tensors="pt").input_values.to(device)
+    with torch.no_grad():
+        logits = model(input_values).logits
+    predicted_ids = torch.argmax(logits, dim=-1)
+    transcription = processor.decode(predicted_ids[0])
+    return transcription.lower()
+# Gradio interface
+with gr.Blocks(theme=gr.themes.Soft()) as app:
+    gr.Markdown("# Voice2PersonaAI")
+    gr.Markdown("Upload or record your voice, and this app will transcribe what you say.")
+    with gr.Row():
+        audio_input = gr.Audio(label="🎤 Record or Upload Your Voice", type="numpy")
+        output_text = gr.Textbox(label="📝 Transcribed Text")
+    transcribe_button = gr.Button("Transcribe")
+    transcribe_button.click(fn=transcribe, inputs=audio_input, outputs=output_text)
+app.launch()