File size: 4,254 Bytes
6f6cd40
0241b19
 
2b3dccf
0241b19
 
 
 
 
6f6cd40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0241b19
0d4d906
81bedca
6f6cd40
 
81bedca
6f6cd40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import os

# Fix permission errors for Streamlit
os.environ["XDG_CACHE_HOME"] = "/tmp/.cache"
os.environ["STREAMLIT_CONFIG_DIR"] = "/tmp/streamlit/config"
os.environ["STREAMLIT_CACHE_DIR"] = "/tmp/streamlit/cache"
os.environ["STREAMLIT_STATIC_DIR"] = "/tmp/streamlit/static"
os.environ["STREAMLIT_RUNTIME_DIR"] = "/tmp/streamlit/runtime"

import requests
import tempfile
import streamlit as st
from moviepy.editor import VideoFileClip
from speechbrain.inference.classifiers import EncoderClassifier

st.set_page_config(page_title="Accent Classifier", page_icon="πŸ—£οΈ", layout="centered")

# Custom CSS to change background color and style input box
st.markdown(
    """

    <style>

    .stApp {

        background-color: #f0f0f0;

    }

    input[type="text"] {

        background-color: white;

        color: black;

        border: 1px solid #ccc;

        padding: 0.5rem;

        border-radius: 5px;

    }

    .stTextInput > div > div > input {

        background-color: white !important;

        color: black !important;

        border: 1px solid #ccc !important;

    }

    </style>

    """,
    unsafe_allow_html=True
)

MODEL_ID = "Jzuluaga/accent-id-commonaccent_ecapa"

def download_video(url, output_path):
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        with open(output_path, "wb") as f:
            for chunk in response.iter_content(1024):
                f.write(chunk)
        if not os.path.exists(output_path) or os.path.getsize(output_path) < 1024:
            raise Exception("❌ Video download failed or file too small.")
    else:
        raise Exception("❌ Failed to download video.")

def extract_audio(video_path, audio_path):
    clip = VideoFileClip(video_path)
    audio = clip.audio
    audio.write_audiofile(audio_path, fps=16000, nbytes=2, codec='pcm_s16le', ffmpeg_params=["-ac", "1"])
    clip.close()
    audio.close()

@st.cache_resource(show_spinner="Loading model...")
def load_model():
    model_path = "/tmp/accent-id-model"  # βœ… switch from /data to /tmp
    os.makedirs(model_path, exist_ok=True)

    classifier = EncoderClassifier.from_hparams(
        source=MODEL_ID,
        savedir=model_path
    )
    return classifier

def classify_accent(audio_path, classifier):
    audio_path_clean = os.path.abspath(audio_path).replace('\\', '/')
    if not os.path.exists(audio_path_clean):
        raise FileNotFoundError(f"Audio file not found: {audio_path_clean}")
    out_prob, score, index, label = classifier.classify_file(audio_path_clean)
    return label, round(score.item() * 100, 2)

# ---------------- UI ----------------
st.title("πŸ—£οΈ Accent Classifier from Video")
st.markdown("Paste a direct **video URL (MP4)** and then press **Enter** or click **Identify the Accent**.")

with st.form("url_form", clear_on_submit=False):
    video_url = st.text_input("πŸ”— Video URL", placeholder="https://...")
    submitted = st.form_submit_button("πŸ—£οΈ Identify the Accent")

if submitted:
    if not video_url:
        st.warning("⚠️ Please enter a video URL.")
    else:
        try:
            if "dropbox.com" in video_url and "raw=1" not in video_url:
                video_url = video_url.replace("dl=0", "raw=1").replace("?dl=0", "?raw=1")

            with st.spinner("πŸ”„ Downloading and processing video..."):
                with tempfile.TemporaryDirectory() as tmpdir:
                    video_path = os.path.join(tmpdir, "input_video.mp4")
                    audio_path = os.path.join(tmpdir, "output_audio.wav")

                    download_video(video_url, video_path)
                    extract_audio(video_path, audio_path)
                    classifier = load_model()
                    label, confidence = classify_accent(audio_path, classifier)

            st.success("βœ… Accent classified successfully!")
            st.markdown(f"### 🎯 Prediction: **{label}**")
            st.markdown(f"🧠 Confidence: **{confidence}%**")
            st.info(f"The speaker's accent is predicted to be **{label}** with **{confidence}%** confidence.")
        except Exception as e:
            st.error(f"❌ Error: {e}")