Spaces:
Running
Running
added readme
Browse files
app.py
CHANGED
@@ -12,8 +12,10 @@ from transformers import WhisperForConditionalGeneration, WhisperProcessor
|
|
12 |
# Load Whisper model to confirm English
|
13 |
whisper_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-tiny", device="cpu")
|
14 |
|
|
|
15 |
classifier = foreign_class(source="Jzuluaga/accent-id-commonaccent_xlsr-en-english", pymodule_file="custom_interface.py", classname="CustomEncoderWav2vec2Classifier")
|
16 |
|
|
|
17 |
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny")
|
18 |
processor = WhisperProcessor.from_pretrained("openai/whisper-tiny")
|
19 |
|
@@ -38,7 +40,6 @@ ACCENT_LABELS = {
|
|
38 |
}
|
39 |
|
40 |
|
41 |
-
# Placeholder accent classifier (replace with real one or your own logic)
|
42 |
def classify_accent(audio_tensor, sample_rate):
|
43 |
if sample_rate != 16000:
|
44 |
resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
|
@@ -143,22 +144,6 @@ def analyze_accent(url_or_file):
|
|
143 |
return output
|
144 |
except Exception as e:
|
145 |
return f"❌ Error: {str(e)}"
|
146 |
-
|
147 |
-
|
148 |
-
# gr.Interface(
|
149 |
-
# fn=analyze_accent,
|
150 |
-
# inputs=gr.Textbox(label="Public Video URL (e.g. MP4)", placeholder="https://..."),
|
151 |
-
# outputs=gr.Markdown(label="Accent Analysis Result"),
|
152 |
-
# title="English Accent Classifier",
|
153 |
-
# description="Paste a video URL (MP4) to extract audio, transcribe speech, and classify the English accent (e.g., American, British, etc.).",
|
154 |
-
|
155 |
-
# examples=[
|
156 |
-
# ["https://example.com/sample.mp4"], # example URL
|
157 |
-
# [open("cleo-abram.mp4", "rb")] # local file example
|
158 |
-
# ],
|
159 |
-
# live=True
|
160 |
-
# ).launch()
|
161 |
-
|
162 |
|
163 |
|
164 |
with gr.Blocks() as demo:
|
|
|
12 |
# Load Whisper model to confirm English
|
13 |
whisper_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-tiny", device="cpu")
|
14 |
|
15 |
+
# Loading accent classifier
|
16 |
classifier = foreign_class(source="Jzuluaga/accent-id-commonaccent_xlsr-en-english", pymodule_file="custom_interface.py", classname="CustomEncoderWav2vec2Classifier")
|
17 |
|
18 |
+
# these are for fallback in case transformer's whisper-tiny doesn't return language
|
19 |
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny")
|
20 |
processor = WhisperProcessor.from_pretrained("openai/whisper-tiny")
|
21 |
|
|
|
40 |
}
|
41 |
|
42 |
|
|
|
43 |
def classify_accent(audio_tensor, sample_rate):
|
44 |
if sample_rate != 16000:
|
45 |
resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
|
|
|
144 |
return output
|
145 |
except Exception as e:
|
146 |
return f"❌ Error: {str(e)}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
147 |
|
148 |
|
149 |
with gr.Blocks() as demo:
|