Spaces:
Running
Running
Bouaziz-bad
commited on
Commit
·
aab61cd
1
Parent(s):
3697d7f
Add Kabyle ASR for free tier (GPL-3.0)
Browse files- LICENSE.txt +8 -0
- README.md +26 -14
- app.py +22 -12
- app_full.py +0 -137
- backend.py +42 -0
- frontend/package.js +0 -210
- requirements.txt +10 -2
- requirements_full.txt +0 -7
LICENSE.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
GNU GENERAL PUBLIC LICENSE
|
2 |
+
Version 3, 29 June 2007
|
3 |
+
|
4 |
+
Copyright (C) 2025 [Your Name or Organization]
|
5 |
+
Everyone is permitted to copy and distribute verbatim copies
|
6 |
+
of this license document, but changing it is not allowed.
|
7 |
+
|
8 |
+
(See full text at: https://www.gnu.org/licenses/gpl-3.0.txt)
|
README.md
CHANGED
@@ -1,21 +1,33 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
-
sdk:
|
7 |
-
|
|
|
8 |
license: gpl-3.0
|
9 |
-
short_description: Backend of Kab ASR using nemo Nvidia
|
10 |
-
---
|
11 |
-
# Kabyle ASR Web App on Hugging Face Spaces
|
12 |
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
-
|
|
|
|
|
16 |
|
17 |
-
|
|
|
18 |
|
19 |
-
|
|
|
20 |
|
21 |
-
|
|
|
|
|
|
1 |
---
|
2 |
+
title: Tanti - Kabyle ASR
|
3 |
+
emoji: 🎤
|
4 |
+
colorFrom: purple
|
5 |
+
colorTo: blue
|
6 |
+
sdk: gradio
|
7 |
+
app_file: app.py
|
8 |
+
python_version: "3.10"
|
9 |
license: gpl-3.0
|
|
|
|
|
|
|
10 |
|
11 |
+
short_description: "Kabyle speech-to-text using NeMo on CPU (free tier)."
|
12 |
+
|
13 |
+
tags:
|
14 |
+
- asr
|
15 |
+
- kabyle
|
16 |
+
- nemo
|
17 |
+
- speech-to-text
|
18 |
+
- cpu
|
19 |
+
- gpl
|
20 |
|
21 |
+
# Preload the large NeMo model during build
|
22 |
+
preload_from_hub:
|
23 |
+
- nvidia/stt_kab_conformer_transducer_large
|
24 |
|
25 |
+
# Allow up to 30 minutes startup (critical for CPU + large model)
|
26 |
+
startup_duration_timeout: 30m
|
27 |
|
28 |
+
# Allow embedding in Google Sites
|
29 |
+
disable_embedding: false
|
30 |
|
31 |
+
# No GPU (free tier)
|
32 |
+
# Do NOT include suggested_hardware to default to cpu-basic
|
33 |
+
---
|
app.py
CHANGED
@@ -1,16 +1,26 @@
|
|
1 |
-
|
|
|
|
|
2 |
|
3 |
-
|
|
|
4 |
|
5 |
-
|
6 |
-
|
7 |
-
|
|
|
8 |
|
9 |
-
|
10 |
-
|
11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
-
#
|
14 |
-
|
15 |
-
|
16 |
-
return "Backend received audio and is ready to transcribe!"
|
|
|
1 |
+
# app.py
|
2 |
+
import gradio as gr
|
3 |
+
from backend import KabyleASR
|
4 |
|
5 |
+
# Initialize ASR (happens once at startup)
|
6 |
+
asr = KabyleASR()
|
7 |
|
8 |
+
def transcribe_audio(audio):
|
9 |
+
if audio is None:
|
10 |
+
return "Please upload an audio file."
|
11 |
+
return asr.transcribe(audio)
|
12 |
|
13 |
+
# Gradio Interface
|
14 |
+
demo = gr.Interface(
|
15 |
+
fn=transcribe_audio,
|
16 |
+
inputs=gr.Audio(sources=["upload"], type="filepath"),
|
17 |
+
outputs=gr.Textbox(label="Kabyle Transcription", lines=6),
|
18 |
+
title="🎙️ Tanti: Kabyle ASR (Free Tier)",
|
19 |
+
description="Upload a Kabyle audio file. Transcription may take 1–2 minutes per 30 seconds of audio. Powered by NeMo on CPU.",
|
20 |
+
flagging_mode="never",
|
21 |
+
allow_screenshot=True
|
22 |
+
)
|
23 |
|
24 |
+
# Launch without SSR
|
25 |
+
if __name__ == "__main__":
|
26 |
+
demo.launch(ssr_mode=False)
|
|
app_full.py
DELETED
@@ -1,137 +0,0 @@
|
|
1 |
-
# app.py - Flask server to handle ASR requests using the NeMo model (Corrected)
|
2 |
-
|
3 |
-
import os
|
4 |
-
import tempfile
|
5 |
-
import logging
|
6 |
-
import sys
|
7 |
-
from flask import Flask, request, jsonify
|
8 |
-
from flask_cors import CORS
|
9 |
-
import nemo.collections.asr as nemo_asr
|
10 |
-
from pydub import AudioSegment
|
11 |
-
import re
|
12 |
-
import datetime
|
13 |
-
|
14 |
-
# --- Suppress verbose NeMo logging ---
|
15 |
-
logging.getLogger('nemo_logger').setLevel(logging.ERROR)
|
16 |
-
|
17 |
-
app = Flask(__name__)
|
18 |
-
CORS(app)
|
19 |
-
|
20 |
-
# --- Post-processing function to correct annexation in Kabyle transcription ---
|
21 |
-
def post_process_kabyle_text(text):
|
22 |
-
"""
|
23 |
-
Corrects annexation in Kabyle transcription by replacing spaces with dashes.
|
24 |
-
This version uses regular expressions for more robust pattern matching.
|
25 |
-
"""
|
26 |
-
# Defensive check to ensure 'text' is a string before processing
|
27 |
-
if not isinstance(text, str):
|
28 |
-
print(f"Warning: Expected string for post-processing, but received type: {type(text)}. Skipping post-processing.")
|
29 |
-
return text
|
30 |
-
|
31 |
-
if not text:
|
32 |
-
return ""
|
33 |
-
|
34 |
-
# Ensure text is lowercase for consistent matching
|
35 |
-
text = text.lower()
|
36 |
-
|
37 |
-
# Define the sets of particles
|
38 |
-
PoPro = {'inu', 'inem', 'ines', 'nneɣ', 'ntex', 'nwen', 'nwent', 'nsen', 'nsent',
|
39 |
-
'iw', 'ik', 'im', 'is', 'w', 'k', 'm', 's', 'tneɣ', 'tentex', 'tsen', 'tsent'}
|
40 |
-
SpWo = {'deg', 'gar', 'ɣer', 'ɣur', 'fell', 'ɣef', 'ddaw', 'nnig', 'ɣid', 'aql', 'sɣur', 'sennig', 'deffir', 'sdat'}
|
41 |
-
StPaSp = {'i', 'am', 'at', 's', 'neɣ', 'aɣ'}
|
42 |
-
StPa = {'ak', 'as', 'aneɣ', 'anteɣ', 'awen', 'awent', 'asen', 'asent',
|
43 |
-
'k', 'm', 'ntex', 'wen', 'went', 'sen', 'sent', 'akem', 'att',
|
44 |
-
'aken', 'akent', 'aten', 'atent'}
|
45 |
-
DePa = {'a', 'agi', 'nni', 'ihin', 'nniden'}
|
46 |
-
DiPa = {'id', 'in'}
|
47 |
-
FuPa = {'ad', 'ara'}
|
48 |
-
DiObPa = {'yi', 'k', 'kem', 't', 'tt', 'ay', 'ken', 'kent', 'ten', 'tent',
|
49 |
-
'iyi', 'ik', 'ikem', 'it', 'itt', 'iken', 'ikent', 'iten', 'itent'}
|
50 |
-
InObPa = {'yi', 'yak', 'yam', 'yas', 'yaɣ', 'yawen', 'yawent', 'yasen', 'yasent'}
|
51 |
-
|
52 |
-
# Combine all particles that can be annexed.
|
53 |
-
all_annexable_particles = PoPro.union(SpWo, StPa, StPaSp, DePa, DiPa, FuPa, DiObPa, InObPa)
|
54 |
-
sorted_all_annexable = sorted(list(all_annexable_particles), key=len, reverse=True)
|
55 |
-
|
56 |
-
# Create a single regex pattern to handle all annexations in one go.
|
57 |
-
annexation_pattern = r'\b(\w{2,})\s+(' + '|'.join(sorted_all_annexable) + r')\b'
|
58 |
-
text = re.sub(annexation_pattern, r'\1-\2', text)
|
59 |
-
|
60 |
-
# Final cleanup for any remaining double spaces or trailing hyphens
|
61 |
-
text = re.sub(r'\s+', ' ', text).strip()
|
62 |
-
text = re.sub(r'-+', '-', text)
|
63 |
-
|
64 |
-
return text
|
65 |
-
|
66 |
-
# --- Load the ASR model once at the beginning to avoid reloading on every request ---
|
67 |
-
print("Loading NeMo ASR model...")
|
68 |
-
try:
|
69 |
-
asr_model = nemo_asr.models.EncDecRNNTBPEModel.from_pretrained("nvidia/stt_kab_conformer_transducer_large")
|
70 |
-
print("NeMo ASR model loaded successfully.")
|
71 |
-
except Exception as e:
|
72 |
-
print(f"Error loading NeMo ASR model: {e}")
|
73 |
-
print("Please check your internet connection and ensure nemo_toolkit[asr] is correctly installed.")
|
74 |
-
asr_model = None
|
75 |
-
|
76 |
-
@app.route('/transcribe', methods=['POST'])
|
77 |
-
def transcribe():
|
78 |
-
if asr_model is None:
|
79 |
-
return jsonify({"error": "ASR model is not loaded."}), 503
|
80 |
-
|
81 |
-
if 'audio' not in request.files:
|
82 |
-
return jsonify({"error": "No audio file provided"}), 400
|
83 |
-
|
84 |
-
audio_file = request.files['audio']
|
85 |
-
if audio_file.filename == '':
|
86 |
-
return jsonify({"error": "No selected file"}), 400
|
87 |
-
|
88 |
-
temp_input_file = None
|
89 |
-
processed_file_path = None
|
90 |
-
try:
|
91 |
-
# Save the uploaded file to a temporary location
|
92 |
-
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_audio:
|
93 |
-
audio_file.save(tmp_audio.name)
|
94 |
-
temp_input_file = tmp_audio.name
|
95 |
-
|
96 |
-
try:
|
97 |
-
# The model requires the audio to be in a specific format (16kHz mono).
|
98 |
-
input_audio = AudioSegment.from_file(temp_input_file)
|
99 |
-
processed_audio = input_audio.set_frame_rate(16000).set_channels(1)
|
100 |
-
|
101 |
-
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as processed_tmp:
|
102 |
-
processed_audio.export(processed_tmp.name, format="wav")
|
103 |
-
processed_file_path = processed_tmp.name
|
104 |
-
except Exception as audio_e:
|
105 |
-
print(f"Error during audio processing with pydub: {audio_e}", file=sys.stderr)
|
106 |
-
return jsonify({"error": "Failed to process audio file. Please ensure it's a valid audio format."}), 500
|
107 |
-
|
108 |
-
try:
|
109 |
-
# Transcribe the processed file using the loaded model
|
110 |
-
transcription_list = asr_model.transcribe([processed_file_path])
|
111 |
-
except Exception as asr_e:
|
112 |
-
print(f"Error during transcription with NeMo model: {asr_e}", file=sys.stderr)
|
113 |
-
return jsonify({"error": "Transcription failed due to a model error."}), 500
|
114 |
-
|
115 |
-
if transcription_list and transcription_list[0] and hasattr(transcription_list[0], 'text'):
|
116 |
-
raw_transcription = transcription_list[0].text
|
117 |
-
final_transcription = post_process_kabyle_text(raw_transcription)
|
118 |
-
|
119 |
-
return jsonify({"transcription": final_transcription})
|
120 |
-
else:
|
121 |
-
print("ASR model returned an empty, invalid, or unexpected transcription object.")
|
122 |
-
return jsonify({"error": "Transcription failed. No text returned."}), 500
|
123 |
-
|
124 |
-
except Exception as e:
|
125 |
-
print(f"An unhandled server error occurred: {e}", file=sys.stderr)
|
126 |
-
return jsonify({"error": "An internal server error occurred."}), 500
|
127 |
-
finally:
|
128 |
-
# Cleanup temporary files
|
129 |
-
if temp_input_file and os.path.exists(temp_input_file):
|
130 |
-
os.remove(temp_input_file)
|
131 |
-
if processed_file_path and os.path.exists(processed_file_path):
|
132 |
-
os.remove(processed_file_path)
|
133 |
-
|
134 |
-
if __name__ == '__main__':
|
135 |
-
print("Starting Flask server...")
|
136 |
-
print("Server running at http://127.0.0.1:5000")
|
137 |
-
app.run(debug=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# backend.py
|
2 |
+
import os
|
3 |
+
import torch
|
4 |
+
from nemo.collections.asr.models import EncDecRNNTBPEModel
|
5 |
+
|
6 |
+
class KabyleASR:
|
7 |
+
def __init__(self):
|
8 |
+
self.device = "cpu" # Force CPU
|
9 |
+
self.model = None
|
10 |
+
self.load_model()
|
11 |
+
|
12 |
+
def load_model(self):
|
13 |
+
"""Load the NeMo Kabyle model on CPU."""
|
14 |
+
print("Loading NeMo ASR model for Kabyle (CPU mode)...")
|
15 |
+
try:
|
16 |
+
# Load from Hugging Face Hub
|
17 |
+
self.model = EncDecRNNTBPEModel.from_pretrained(
|
18 |
+
"nvidia/stt_kab_conformer_transducer_large"
|
19 |
+
)
|
20 |
+
self.model = self.model.to(self.device)
|
21 |
+
self.model.preprocessor.featurizer.dither = 0.0
|
22 |
+
self.model.preprocessor.featurizer.pad_to = 0
|
23 |
+
print("Model loaded successfully on CPU.")
|
24 |
+
except Exception as e:
|
25 |
+
raise RuntimeError(f"Failed to load model: {str(e)}")
|
26 |
+
|
27 |
+
def transcribe(self, audio_file):
|
28 |
+
if not os.path.exists(audio_file):
|
29 |
+
return "Error: Audio file not found."
|
30 |
+
|
31 |
+
try:
|
32 |
+
# Transcribe (this will be slow on CPU)
|
33 |
+
with torch.no_grad():
|
34 |
+
transcriptions = self.model.transcribe(
|
35 |
+
[audio_file],
|
36 |
+
batch_size=1,
|
37 |
+
num_workers=0 # CPU-friendly
|
38 |
+
)
|
39 |
+
text = transcriptions[0] if transcriptions else ""
|
40 |
+
return str(text).strip()
|
41 |
+
except Exception as e:
|
42 |
+
return f"Transcription error: {str(e)}"
|
frontend/package.js
DELETED
@@ -1,210 +0,0 @@
|
|
1 |
-
import React, { useState, useRef } from 'react';
|
2 |
-
|
3 |
-
// The URL of our new Flask backend server
|
4 |
-
const BACKEND_URL = 'http://127.0.0.1:5000/transcribe';
|
5 |
-
|
6 |
-
const App = () => {
|
7 |
-
const [isRecording, setIsRecording] = useState(false);
|
8 |
-
const [isLoading, setIsLoading] = useState(false);
|
9 |
-
const [statusMessage, setStatusMessage] = useState('Ready to transcribe Kabyle.');
|
10 |
-
const [transcription, setTranscription] = useState('');
|
11 |
-
const [audioURL, setAudioURL] = useState('');
|
12 |
-
const mediaRecorderRef = useRef(null);
|
13 |
-
const audioChunksRef = useRef([]);
|
14 |
-
|
15 |
-
const startRecording = async () => {
|
16 |
-
try {
|
17 |
-
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
18 |
-
mediaRecorderRef.current = new MediaRecorder(stream);
|
19 |
-
audioChunksRef.current = [];
|
20 |
-
mediaRecorderRef.current.ondataavailable = event => {
|
21 |
-
audioChunksRef.current.push(event.data);
|
22 |
-
};
|
23 |
-
mediaRecorderRef.current.onstop = () => {
|
24 |
-
const audioBlob = new Blob(audioChunksRef.current, { type: 'audio/wav' });
|
25 |
-
const url = URL.createObjectURL(audioBlob);
|
26 |
-
setAudioURL(url);
|
27 |
-
stream.getTracks().forEach(track => track.stop()); // Stop microphone stream
|
28 |
-
handleTranscription(audioBlob);
|
29 |
-
};
|
30 |
-
mediaRecorderRef.current.start();
|
31 |
-
setIsRecording(true);
|
32 |
-
setStatusMessage('Recording started... Click again to stop.');
|
33 |
-
} catch (err) {
|
34 |
-
console.error("Error accessing microphone:", err);
|
35 |
-
setStatusMessage('Error: Could not access microphone. Please check permissions.');
|
36 |
-
}
|
37 |
-
};
|
38 |
-
|
39 |
-
const stopRecording = () => {
|
40 |
-
if (mediaRecorderRef.current && mediaRecorderRef.current.state === 'recording') {
|
41 |
-
mediaRecorderRef.current.stop();
|
42 |
-
setIsRecording(false);
|
43 |
-
setStatusMessage('Recording stopped. Processing audio...');
|
44 |
-
}
|
45 |
-
};
|
46 |
-
|
47 |
-
const handleFileUpload = (event) => {
|
48 |
-
const file = event.target.files[0];
|
49 |
-
if (file) {
|
50 |
-
const audioBlob = new Blob([file], { type: file.type });
|
51 |
-
const url = URL.createObjectURL(audioBlob);
|
52 |
-
setAudioURL(url);
|
53 |
-
handleTranscription(audioBlob);
|
54 |
-
}
|
55 |
-
};
|
56 |
-
|
57 |
-
const handleTranscription = async (audioBlob) => {
|
58 |
-
setIsLoading(true);
|
59 |
-
setStatusMessage('Transcribing audio...');
|
60 |
-
setTranscription('');
|
61 |
-
|
62 |
-
const transcribedText = await sendAudioToServer(audioBlob);
|
63 |
-
|
64 |
-
// Check if the transcription was successful
|
65 |
-
if (transcribedText && !transcribedText.startsWith("Error:")) {
|
66 |
-
// The server is now responsible for post-processing, so we display the text as-is.
|
67 |
-
setTranscription(transcribedText);
|
68 |
-
setStatusMessage('Transcription complete.');
|
69 |
-
} else {
|
70 |
-
setTranscription(transcribedText);
|
71 |
-
setStatusMessage('Transcription failed.');
|
72 |
-
}
|
73 |
-
|
74 |
-
setIsLoading(false);
|
75 |
-
};
|
76 |
-
|
77 |
-
// --- THIS IS THE NEW FUNCTION THAT SENDS AUDIO TO THE FLASK SERVER ---
|
78 |
-
const sendAudioToServer = async (audioBlob) => {
|
79 |
-
const formData = new FormData();
|
80 |
-
formData.append('audio', audioBlob, 'audio.wav');
|
81 |
-
|
82 |
-
try {
|
83 |
-
const response = await fetch(BACKEND_URL, {
|
84 |
-
method: 'POST',
|
85 |
-
body: formData,
|
86 |
-
});
|
87 |
-
|
88 |
-
if (!response.ok) {
|
89 |
-
throw new Error(`Server error: ${response.status} ${response.statusText}`);
|
90 |
-
}
|
91 |
-
|
92 |
-
const data = await response.json();
|
93 |
-
return data.transcription;
|
94 |
-
} catch (error) {
|
95 |
-
console.error("Error sending audio to server:", error);
|
96 |
-
return `Error: Failed to get transcription from server. ${error.message}`;
|
97 |
-
}
|
98 |
-
};
|
99 |
-
|
100 |
-
const handlePlayAudio = () => {
|
101 |
-
const audio = new Audio(audioURL);
|
102 |
-
audio.play();
|
103 |
-
};
|
104 |
-
|
105 |
-
return (
|
106 |
-
<div className="min-h-screen bg-gray-100 flex flex-col items-center justify-center font-sans p-4">
|
107 |
-
<style>
|
108 |
-
{`
|
109 |
-
@keyframes spin {
|
110 |
-
from {
|
111 |
-
transform: rotate(0deg);
|
112 |
-
}
|
113 |
-
to {
|
114 |
-
transform: rotate(360deg);
|
115 |
-
}
|
116 |
-
}
|
117 |
-
.animate-spin {
|
118 |
-
animation: spin 1s linear infinite;
|
119 |
-
}
|
120 |
-
`}
|
121 |
-
</style>
|
122 |
-
<div className="bg-white shadow-xl rounded-2xl p-8 max-w-2xl w-full text-center space-y-6">
|
123 |
-
<h1 className="text-4xl font-extrabold text-gray-800">Kabyle ASR Web App</h1>
|
124 |
-
<p className="text-gray-600">Record or upload audio to get a transcription.</p>
|
125 |
-
|
126 |
-
<div className="flex flex-col sm:flex-row justify-center items-center space-y-4 sm:space-y-0 sm:space-x-4 mt-6">
|
127 |
-
<button
|
128 |
-
onClick={isRecording ? stopRecording : startRecording}
|
129 |
-
className={`flex items-center justify-center px-6 py-3 rounded-xl font-bold text-lg transition-all duration-300 transform hover:scale-105 shadow-md
|
130 |
-
${isRecording ? 'bg-red-500 text-white hover:bg-red-600' : 'bg-blue-600 text-white hover:bg-blue-700'}`}
|
131 |
-
disabled={isLoading}
|
132 |
-
>
|
133 |
-
{isRecording ? (
|
134 |
-
<>
|
135 |
-
<svg className="w-6 h-6 mr-2 animate-pulse" fill="none" stroke="currentColor" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg">
|
136 |
-
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth="2" d="M19 11a7 7 0 01-7 7m0 0a7 7 0 01-7-7m7 7v4m0 0H8m4 0h4m-4-8a4 4 0 01-4-4V5a4 4 0 118 0v4a4 4 0 01-4 4z"></path>
|
137 |
-
</svg>
|
138 |
-
Stop Recording
|
139 |
-
</>
|
140 |
-
) : (
|
141 |
-
<>
|
142 |
-
<svg className="w-6 h-6 mr-2" fill="none" stroke="currentColor" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg">
|
143 |
-
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth="2" d="M19 11a7 7 0 01-7 7m0 0a7 7 0 01-7-7m7 7v4m0 0H8m4 0h4m-4-8a4 4 0 01-4-4V5a4 4 0 118 0v4a4 4 0 01-4 4z"></path>
|
144 |
-
</svg>
|
145 |
-
Start Recording
|
146 |
-
</>
|
147 |
-
)}
|
148 |
-
</button>
|
149 |
-
|
150 |
-
<label htmlFor="file-upload" className={`flex items-center justify-center px-6 py-3 rounded-xl font-bold text-lg transition-all duration-300 transform hover:scale-105 shadow-md
|
151 |
-
bg-gray-200 text-gray-800 hover:bg-gray-300 cursor-pointer ${isLoading || isRecording ? 'opacity-50 pointer-events-none' : ''}`}>
|
152 |
-
<svg className="w-6 h-6 mr-2" fill="none" stroke="currentColor" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg">
|
153 |
-
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth="2" d="M4 16v1a3 3 0 003 3h10a3 3 0 003-3v-1m-4-8l-4-4m0 0L8 8m4-4v12"></path>
|
154 |
-
</svg>
|
155 |
-
Upload Audio
|
156 |
-
<input
|
157 |
-
id="file-upload"
|
158 |
-
type="file"
|
159 |
-
accept="audio/*"
|
160 |
-
className="hidden"
|
161 |
-
onChange={handleFileUpload}
|
162 |
-
disabled={isLoading || isRecording}
|
163 |
-
/>
|
164 |
-
</label>
|
165 |
-
</div>
|
166 |
-
|
167 |
-
{audioURL && (
|
168 |
-
<div className="flex items-center justify-center mt-4">
|
169 |
-
<button
|
170 |
-
onClick={handlePlayAudio}
|
171 |
-
className="flex items-center px-4 py-2 rounded-lg bg-green-500 text-white font-semibold shadow-md hover:bg-green-600 transition-colors"
|
172 |
-
>
|
173 |
-
<svg className="w-5 h-5 mr-2" fill="currentColor" viewBox="0 0 20 20" xmlns="http://www.w3.org/2000/svg">
|
174 |
-
<path fillRule="evenodd" d="M10 18a8 8 0 100-16 8 8 0 000 16zM9.555 7.168A1 1 0 008 8v4a1 1 0 001.555.832l3-2a1 1 0 000-1.664l-3-2z" clipRule="evenodd"></path>
|
175 |
-
</svg>
|
176 |
-
Play Audio
|
177 |
-
</button>
|
178 |
-
</div>
|
179 |
-
)}
|
180 |
-
|
181 |
-
<div className="mt-6 text-xl font-medium text-gray-700 h-8">
|
182 |
-
{isLoading ? (
|
183 |
-
<div className="flex items-center justify-center">
|
184 |
-
<svg className="w-6 h-6 animate-spin text-blue-500 mr-2" fill="none" stroke="currentColor" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg">
|
185 |
-
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth="2" d="M4 4v5h.582m15.418 5v5h.582M18 10V4.5a2.5 2.5 0 00-2.5-2.5h-8A2.5 2.5 0 005 4.5V10m13 0l-3 3m0 0l-3 3m3-3v14m0-14H10"></path>
|
186 |
-
</svg>
|
187 |
-
<span className="text-blue-500">{statusMessage}</span>
|
188 |
-
</div>
|
189 |
-
) : (
|
190 |
-
<span className="text-gray-500">{statusMessage}</span>
|
191 |
-
)}
|
192 |
-
</div>
|
193 |
-
|
194 |
-
{transcription && (
|
195 |
-
<div className="mt-8 p-6 bg-gray-50 rounded-xl shadow-inner text-left">
|
196 |
-
<h3 className="text-2xl font-bold text-gray-800 flex items-center mb-4">
|
197 |
-
<svg className="w-6 h-6 text-green-500 mr-2" fill="none" stroke="currentColor" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg">
|
198 |
-
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth="2" d="M9 12l2 2 4-4m6 2a9 9 0 11-18 0 9 9 0 0118 0z"></path>
|
199 |
-
</svg>
|
200 |
-
Transcription
|
201 |
-
</h3>
|
202 |
-
<p className="text-gray-800 text-xl leading-relaxed">{transcription}</p>
|
203 |
-
</div>
|
204 |
-
)}
|
205 |
-
</div>
|
206 |
-
</div>
|
207 |
-
);
|
208 |
-
};
|
209 |
-
|
210 |
-
export default App;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
@@ -1,2 +1,10 @@
|
|
1 |
-
|
2 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
torch==1.13.1
|
2 |
+
torchaudio==0.13.1
|
3 |
+
pytorch-lightning==1.9.5
|
4 |
+
omegaconf>=2.0
|
5 |
+
hydra-core
|
6 |
+
numpy<1.24.0
|
7 |
+
gradio==4.25.0
|
8 |
+
|
9 |
+
# Install NeMo ASR only (lighter)
|
10 |
+
git+https://github.com/NVIDIA/[email protected]#egg=nemo_toolkit[asr]&subdirectory=.
|
requirements_full.txt
DELETED
@@ -1,7 +0,0 @@
|
|
1 |
-
numpy
|
2 |
-
typing_extensions
|
3 |
-
Flask
|
4 |
-
Flask-CORS
|
5 |
-
pydub
|
6 |
-
gunicorn
|
7 |
-
nemo_toolkit[asr]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|