Bouaziz-bad commited on
Commit
aab61cd
·
1 Parent(s): 3697d7f

Add Kabyle ASR for free tier (GPL-3.0)

Browse files
Files changed (8) hide show
  1. LICENSE.txt +8 -0
  2. README.md +26 -14
  3. app.py +22 -12
  4. app_full.py +0 -137
  5. backend.py +42 -0
  6. frontend/package.js +0 -210
  7. requirements.txt +10 -2
  8. requirements_full.txt +0 -7
LICENSE.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ GNU GENERAL PUBLIC LICENSE
2
+ Version 3, 29 June 2007
3
+
4
+ Copyright (C) 2025 [Your Name or Organization]
5
+ Everyone is permitted to copy and distribute verbatim copies
6
+ of this license document, but changing it is not allowed.
7
+
8
+ (See full text at: https://www.gnu.org/licenses/gpl-3.0.txt)
README.md CHANGED
@@ -1,21 +1,33 @@
1
  ---
2
- title: Kab Asr Tanti
3
- emoji: 👀
4
- colorFrom: pink
5
- colorTo: indigo
6
- sdk: docker
7
- pinned: false
 
8
  license: gpl-3.0
9
- short_description: Backend of Kab ASR using nemo Nvidia
10
- ---
11
- # Kabyle ASR Web App on Hugging Face Spaces
12
 
13
- This is a Hugging Face Space for a Kabyle Automatic Speech Recognition (ASR) web application.
 
 
 
 
 
 
 
 
14
 
15
- The backend is a Flask app that uses the `nvidia/stt_kab_conformer_transducer_large` NeMo ASR model to transcribe Kabyle speech. The frontend is a separate React application that communicates with this backend.
 
 
16
 
17
- The application is deployed using Docker on Hugging Face Spaces, leveraging its generous free-tier memory to accommodate the large ASR model.
 
18
 
19
- [Add more details about the project, how to use it, etc.]
 
20
 
21
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
1
  ---
2
+ title: Tanti - Kabyle ASR
3
+ emoji: 🎤
4
+ colorFrom: purple
5
+ colorTo: blue
6
+ sdk: gradio
7
+ app_file: app.py
8
+ python_version: "3.10"
9
  license: gpl-3.0
 
 
 
10
 
11
+ short_description: "Kabyle speech-to-text using NeMo on CPU (free tier)."
12
+
13
+ tags:
14
+ - asr
15
+ - kabyle
16
+ - nemo
17
+ - speech-to-text
18
+ - cpu
19
+ - gpl
20
 
21
+ # Preload the large NeMo model during build
22
+ preload_from_hub:
23
+ - nvidia/stt_kab_conformer_transducer_large
24
 
25
+ # Allow up to 30 minutes startup (critical for CPU + large model)
26
+ startup_duration_timeout: 30m
27
 
28
+ # Allow embedding in Google Sites
29
+ disable_embedding: false
30
 
31
+ # No GPU (free tier)
32
+ # Do NOT include suggested_hardware to default to cpu-basic
33
+ ---
app.py CHANGED
@@ -1,16 +1,26 @@
1
- from flask import Flask
 
 
2
 
3
- app = Flask(__name__)
 
4
 
5
- @app.route("/")
6
- def hello_world():
7
- return "Hello from the backend!"
 
8
 
9
- @app.route("/health")
10
- def health_check():
11
- return "Healthy", 200
 
 
 
 
 
 
 
12
 
13
- # Add a simple test route for transcription logic
14
- @app.route("/transcribe", methods=['POST'])
15
- def transcribe_test():
16
- return "Backend received audio and is ready to transcribe!"
 
1
+ # app.py
2
+ import gradio as gr
3
+ from backend import KabyleASR
4
 
5
+ # Initialize ASR (happens once at startup)
6
+ asr = KabyleASR()
7
 
8
+ def transcribe_audio(audio):
9
+ if audio is None:
10
+ return "Please upload an audio file."
11
+ return asr.transcribe(audio)
12
 
13
+ # Gradio Interface
14
+ demo = gr.Interface(
15
+ fn=transcribe_audio,
16
+ inputs=gr.Audio(sources=["upload"], type="filepath"),
17
+ outputs=gr.Textbox(label="Kabyle Transcription", lines=6),
18
+ title="🎙️ Tanti: Kabyle ASR (Free Tier)",
19
+ description="Upload a Kabyle audio file. Transcription may take 1–2 minutes per 30 seconds of audio. Powered by NeMo on CPU.",
20
+ flagging_mode="never",
21
+ allow_screenshot=True
22
+ )
23
 
24
+ # Launch without SSR
25
+ if __name__ == "__main__":
26
+ demo.launch(ssr_mode=False)
 
app_full.py DELETED
@@ -1,137 +0,0 @@
1
- # app.py - Flask server to handle ASR requests using the NeMo model (Corrected)
2
-
3
- import os
4
- import tempfile
5
- import logging
6
- import sys
7
- from flask import Flask, request, jsonify
8
- from flask_cors import CORS
9
- import nemo.collections.asr as nemo_asr
10
- from pydub import AudioSegment
11
- import re
12
- import datetime
13
-
14
- # --- Suppress verbose NeMo logging ---
15
- logging.getLogger('nemo_logger').setLevel(logging.ERROR)
16
-
17
- app = Flask(__name__)
18
- CORS(app)
19
-
20
- # --- Post-processing function to correct annexation in Kabyle transcription ---
21
- def post_process_kabyle_text(text):
22
- """
23
- Corrects annexation in Kabyle transcription by replacing spaces with dashes.
24
- This version uses regular expressions for more robust pattern matching.
25
- """
26
- # Defensive check to ensure 'text' is a string before processing
27
- if not isinstance(text, str):
28
- print(f"Warning: Expected string for post-processing, but received type: {type(text)}. Skipping post-processing.")
29
- return text
30
-
31
- if not text:
32
- return ""
33
-
34
- # Ensure text is lowercase for consistent matching
35
- text = text.lower()
36
-
37
- # Define the sets of particles
38
- PoPro = {'inu', 'inem', 'ines', 'nneɣ', 'ntex', 'nwen', 'nwent', 'nsen', 'nsent',
39
- 'iw', 'ik', 'im', 'is', 'w', 'k', 'm', 's', 'tneɣ', 'tentex', 'tsen', 'tsent'}
40
- SpWo = {'deg', 'gar', 'ɣer', 'ɣur', 'fell', 'ɣef', 'ddaw', 'nnig', 'ɣid', 'aql', 'sɣur', 'sennig', 'deffir', 'sdat'}
41
- StPaSp = {'i', 'am', 'at', 's', 'neɣ', 'aɣ'}
42
- StPa = {'ak', 'as', 'aneɣ', 'anteɣ', 'awen', 'awent', 'asen', 'asent',
43
- 'k', 'm', 'ntex', 'wen', 'went', 'sen', 'sent', 'akem', 'att',
44
- 'aken', 'akent', 'aten', 'atent'}
45
- DePa = {'a', 'agi', 'nni', 'ihin', 'nniden'}
46
- DiPa = {'id', 'in'}
47
- FuPa = {'ad', 'ara'}
48
- DiObPa = {'yi', 'k', 'kem', 't', 'tt', 'ay', 'ken', 'kent', 'ten', 'tent',
49
- 'iyi', 'ik', 'ikem', 'it', 'itt', 'iken', 'ikent', 'iten', 'itent'}
50
- InObPa = {'yi', 'yak', 'yam', 'yas', 'yaɣ', 'yawen', 'yawent', 'yasen', 'yasent'}
51
-
52
- # Combine all particles that can be annexed.
53
- all_annexable_particles = PoPro.union(SpWo, StPa, StPaSp, DePa, DiPa, FuPa, DiObPa, InObPa)
54
- sorted_all_annexable = sorted(list(all_annexable_particles), key=len, reverse=True)
55
-
56
- # Create a single regex pattern to handle all annexations in one go.
57
- annexation_pattern = r'\b(\w{2,})\s+(' + '|'.join(sorted_all_annexable) + r')\b'
58
- text = re.sub(annexation_pattern, r'\1-\2', text)
59
-
60
- # Final cleanup for any remaining double spaces or trailing hyphens
61
- text = re.sub(r'\s+', ' ', text).strip()
62
- text = re.sub(r'-+', '-', text)
63
-
64
- return text
65
-
66
- # --- Load the ASR model once at the beginning to avoid reloading on every request ---
67
- print("Loading NeMo ASR model...")
68
- try:
69
- asr_model = nemo_asr.models.EncDecRNNTBPEModel.from_pretrained("nvidia/stt_kab_conformer_transducer_large")
70
- print("NeMo ASR model loaded successfully.")
71
- except Exception as e:
72
- print(f"Error loading NeMo ASR model: {e}")
73
- print("Please check your internet connection and ensure nemo_toolkit[asr] is correctly installed.")
74
- asr_model = None
75
-
76
- @app.route('/transcribe', methods=['POST'])
77
- def transcribe():
78
- if asr_model is None:
79
- return jsonify({"error": "ASR model is not loaded."}), 503
80
-
81
- if 'audio' not in request.files:
82
- return jsonify({"error": "No audio file provided"}), 400
83
-
84
- audio_file = request.files['audio']
85
- if audio_file.filename == '':
86
- return jsonify({"error": "No selected file"}), 400
87
-
88
- temp_input_file = None
89
- processed_file_path = None
90
- try:
91
- # Save the uploaded file to a temporary location
92
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_audio:
93
- audio_file.save(tmp_audio.name)
94
- temp_input_file = tmp_audio.name
95
-
96
- try:
97
- # The model requires the audio to be in a specific format (16kHz mono).
98
- input_audio = AudioSegment.from_file(temp_input_file)
99
- processed_audio = input_audio.set_frame_rate(16000).set_channels(1)
100
-
101
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as processed_tmp:
102
- processed_audio.export(processed_tmp.name, format="wav")
103
- processed_file_path = processed_tmp.name
104
- except Exception as audio_e:
105
- print(f"Error during audio processing with pydub: {audio_e}", file=sys.stderr)
106
- return jsonify({"error": "Failed to process audio file. Please ensure it's a valid audio format."}), 500
107
-
108
- try:
109
- # Transcribe the processed file using the loaded model
110
- transcription_list = asr_model.transcribe([processed_file_path])
111
- except Exception as asr_e:
112
- print(f"Error during transcription with NeMo model: {asr_e}", file=sys.stderr)
113
- return jsonify({"error": "Transcription failed due to a model error."}), 500
114
-
115
- if transcription_list and transcription_list[0] and hasattr(transcription_list[0], 'text'):
116
- raw_transcription = transcription_list[0].text
117
- final_transcription = post_process_kabyle_text(raw_transcription)
118
-
119
- return jsonify({"transcription": final_transcription})
120
- else:
121
- print("ASR model returned an empty, invalid, or unexpected transcription object.")
122
- return jsonify({"error": "Transcription failed. No text returned."}), 500
123
-
124
- except Exception as e:
125
- print(f"An unhandled server error occurred: {e}", file=sys.stderr)
126
- return jsonify({"error": "An internal server error occurred."}), 500
127
- finally:
128
- # Cleanup temporary files
129
- if temp_input_file and os.path.exists(temp_input_file):
130
- os.remove(temp_input_file)
131
- if processed_file_path and os.path.exists(processed_file_path):
132
- os.remove(processed_file_path)
133
-
134
- if __name__ == '__main__':
135
- print("Starting Flask server...")
136
- print("Server running at http://127.0.0.1:5000")
137
- app.run(debug=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # backend.py
2
+ import os
3
+ import torch
4
+ from nemo.collections.asr.models import EncDecRNNTBPEModel
5
+
6
+ class KabyleASR:
7
+ def __init__(self):
8
+ self.device = "cpu" # Force CPU
9
+ self.model = None
10
+ self.load_model()
11
+
12
+ def load_model(self):
13
+ """Load the NeMo Kabyle model on CPU."""
14
+ print("Loading NeMo ASR model for Kabyle (CPU mode)...")
15
+ try:
16
+ # Load from Hugging Face Hub
17
+ self.model = EncDecRNNTBPEModel.from_pretrained(
18
+ "nvidia/stt_kab_conformer_transducer_large"
19
+ )
20
+ self.model = self.model.to(self.device)
21
+ self.model.preprocessor.featurizer.dither = 0.0
22
+ self.model.preprocessor.featurizer.pad_to = 0
23
+ print("Model loaded successfully on CPU.")
24
+ except Exception as e:
25
+ raise RuntimeError(f"Failed to load model: {str(e)}")
26
+
27
+ def transcribe(self, audio_file):
28
+ if not os.path.exists(audio_file):
29
+ return "Error: Audio file not found."
30
+
31
+ try:
32
+ # Transcribe (this will be slow on CPU)
33
+ with torch.no_grad():
34
+ transcriptions = self.model.transcribe(
35
+ [audio_file],
36
+ batch_size=1,
37
+ num_workers=0 # CPU-friendly
38
+ )
39
+ text = transcriptions[0] if transcriptions else ""
40
+ return str(text).strip()
41
+ except Exception as e:
42
+ return f"Transcription error: {str(e)}"
frontend/package.js DELETED
@@ -1,210 +0,0 @@
1
- import React, { useState, useRef } from 'react';
2
-
3
- // The URL of our new Flask backend server
4
- const BACKEND_URL = 'http://127.0.0.1:5000/transcribe';
5
-
6
- const App = () => {
7
- const [isRecording, setIsRecording] = useState(false);
8
- const [isLoading, setIsLoading] = useState(false);
9
- const [statusMessage, setStatusMessage] = useState('Ready to transcribe Kabyle.');
10
- const [transcription, setTranscription] = useState('');
11
- const [audioURL, setAudioURL] = useState('');
12
- const mediaRecorderRef = useRef(null);
13
- const audioChunksRef = useRef([]);
14
-
15
- const startRecording = async () => {
16
- try {
17
- const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
18
- mediaRecorderRef.current = new MediaRecorder(stream);
19
- audioChunksRef.current = [];
20
- mediaRecorderRef.current.ondataavailable = event => {
21
- audioChunksRef.current.push(event.data);
22
- };
23
- mediaRecorderRef.current.onstop = () => {
24
- const audioBlob = new Blob(audioChunksRef.current, { type: 'audio/wav' });
25
- const url = URL.createObjectURL(audioBlob);
26
- setAudioURL(url);
27
- stream.getTracks().forEach(track => track.stop()); // Stop microphone stream
28
- handleTranscription(audioBlob);
29
- };
30
- mediaRecorderRef.current.start();
31
- setIsRecording(true);
32
- setStatusMessage('Recording started... Click again to stop.');
33
- } catch (err) {
34
- console.error("Error accessing microphone:", err);
35
- setStatusMessage('Error: Could not access microphone. Please check permissions.');
36
- }
37
- };
38
-
39
- const stopRecording = () => {
40
- if (mediaRecorderRef.current && mediaRecorderRef.current.state === 'recording') {
41
- mediaRecorderRef.current.stop();
42
- setIsRecording(false);
43
- setStatusMessage('Recording stopped. Processing audio...');
44
- }
45
- };
46
-
47
- const handleFileUpload = (event) => {
48
- const file = event.target.files[0];
49
- if (file) {
50
- const audioBlob = new Blob([file], { type: file.type });
51
- const url = URL.createObjectURL(audioBlob);
52
- setAudioURL(url);
53
- handleTranscription(audioBlob);
54
- }
55
- };
56
-
57
- const handleTranscription = async (audioBlob) => {
58
- setIsLoading(true);
59
- setStatusMessage('Transcribing audio...');
60
- setTranscription('');
61
-
62
- const transcribedText = await sendAudioToServer(audioBlob);
63
-
64
- // Check if the transcription was successful
65
- if (transcribedText && !transcribedText.startsWith("Error:")) {
66
- // The server is now responsible for post-processing, so we display the text as-is.
67
- setTranscription(transcribedText);
68
- setStatusMessage('Transcription complete.');
69
- } else {
70
- setTranscription(transcribedText);
71
- setStatusMessage('Transcription failed.');
72
- }
73
-
74
- setIsLoading(false);
75
- };
76
-
77
- // --- THIS IS THE NEW FUNCTION THAT SENDS AUDIO TO THE FLASK SERVER ---
78
- const sendAudioToServer = async (audioBlob) => {
79
- const formData = new FormData();
80
- formData.append('audio', audioBlob, 'audio.wav');
81
-
82
- try {
83
- const response = await fetch(BACKEND_URL, {
84
- method: 'POST',
85
- body: formData,
86
- });
87
-
88
- if (!response.ok) {
89
- throw new Error(`Server error: ${response.status} ${response.statusText}`);
90
- }
91
-
92
- const data = await response.json();
93
- return data.transcription;
94
- } catch (error) {
95
- console.error("Error sending audio to server:", error);
96
- return `Error: Failed to get transcription from server. ${error.message}`;
97
- }
98
- };
99
-
100
- const handlePlayAudio = () => {
101
- const audio = new Audio(audioURL);
102
- audio.play();
103
- };
104
-
105
- return (
106
- <div className="min-h-screen bg-gray-100 flex flex-col items-center justify-center font-sans p-4">
107
- <style>
108
- {`
109
- @keyframes spin {
110
- from {
111
- transform: rotate(0deg);
112
- }
113
- to {
114
- transform: rotate(360deg);
115
- }
116
- }
117
- .animate-spin {
118
- animation: spin 1s linear infinite;
119
- }
120
- `}
121
- </style>
122
- <div className="bg-white shadow-xl rounded-2xl p-8 max-w-2xl w-full text-center space-y-6">
123
- <h1 className="text-4xl font-extrabold text-gray-800">Kabyle ASR Web App</h1>
124
- <p className="text-gray-600">Record or upload audio to get a transcription.</p>
125
-
126
- <div className="flex flex-col sm:flex-row justify-center items-center space-y-4 sm:space-y-0 sm:space-x-4 mt-6">
127
- <button
128
- onClick={isRecording ? stopRecording : startRecording}
129
- className={`flex items-center justify-center px-6 py-3 rounded-xl font-bold text-lg transition-all duration-300 transform hover:scale-105 shadow-md
130
- ${isRecording ? 'bg-red-500 text-white hover:bg-red-600' : 'bg-blue-600 text-white hover:bg-blue-700'}`}
131
- disabled={isLoading}
132
- >
133
- {isRecording ? (
134
- <>
135
- <svg className="w-6 h-6 mr-2 animate-pulse" fill="none" stroke="currentColor" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg">
136
- <path strokeLinecap="round" strokeLinejoin="round" strokeWidth="2" d="M19 11a7 7 0 01-7 7m0 0a7 7 0 01-7-7m7 7v4m0 0H8m4 0h4m-4-8a4 4 0 01-4-4V5a4 4 0 118 0v4a4 4 0 01-4 4z"></path>
137
- </svg>
138
- Stop Recording
139
- </>
140
- ) : (
141
- <>
142
- <svg className="w-6 h-6 mr-2" fill="none" stroke="currentColor" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg">
143
- <path strokeLinecap="round" strokeLinejoin="round" strokeWidth="2" d="M19 11a7 7 0 01-7 7m0 0a7 7 0 01-7-7m7 7v4m0 0H8m4 0h4m-4-8a4 4 0 01-4-4V5a4 4 0 118 0v4a4 4 0 01-4 4z"></path>
144
- </svg>
145
- Start Recording
146
- </>
147
- )}
148
- </button>
149
-
150
- <label htmlFor="file-upload" className={`flex items-center justify-center px-6 py-3 rounded-xl font-bold text-lg transition-all duration-300 transform hover:scale-105 shadow-md
151
- bg-gray-200 text-gray-800 hover:bg-gray-300 cursor-pointer ${isLoading || isRecording ? 'opacity-50 pointer-events-none' : ''}`}>
152
- <svg className="w-6 h-6 mr-2" fill="none" stroke="currentColor" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg">
153
- <path strokeLinecap="round" strokeLinejoin="round" strokeWidth="2" d="M4 16v1a3 3 0 003 3h10a3 3 0 003-3v-1m-4-8l-4-4m0 0L8 8m4-4v12"></path>
154
- </svg>
155
- Upload Audio
156
- <input
157
- id="file-upload"
158
- type="file"
159
- accept="audio/*"
160
- className="hidden"
161
- onChange={handleFileUpload}
162
- disabled={isLoading || isRecording}
163
- />
164
- </label>
165
- </div>
166
-
167
- {audioURL && (
168
- <div className="flex items-center justify-center mt-4">
169
- <button
170
- onClick={handlePlayAudio}
171
- className="flex items-center px-4 py-2 rounded-lg bg-green-500 text-white font-semibold shadow-md hover:bg-green-600 transition-colors"
172
- >
173
- <svg className="w-5 h-5 mr-2" fill="currentColor" viewBox="0 0 20 20" xmlns="http://www.w3.org/2000/svg">
174
- <path fillRule="evenodd" d="M10 18a8 8 0 100-16 8 8 0 000 16zM9.555 7.168A1 1 0 008 8v4a1 1 0 001.555.832l3-2a1 1 0 000-1.664l-3-2z" clipRule="evenodd"></path>
175
- </svg>
176
- Play Audio
177
- </button>
178
- </div>
179
- )}
180
-
181
- <div className="mt-6 text-xl font-medium text-gray-700 h-8">
182
- {isLoading ? (
183
- <div className="flex items-center justify-center">
184
- <svg className="w-6 h-6 animate-spin text-blue-500 mr-2" fill="none" stroke="currentColor" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg">
185
- <path strokeLinecap="round" strokeLinejoin="round" strokeWidth="2" d="M4 4v5h.582m15.418 5v5h.582M18 10V4.5a2.5 2.5 0 00-2.5-2.5h-8A2.5 2.5 0 005 4.5V10m13 0l-3 3m0 0l-3 3m3-3v14m0-14H10"></path>
186
- </svg>
187
- <span className="text-blue-500">{statusMessage}</span>
188
- </div>
189
- ) : (
190
- <span className="text-gray-500">{statusMessage}</span>
191
- )}
192
- </div>
193
-
194
- {transcription && (
195
- <div className="mt-8 p-6 bg-gray-50 rounded-xl shadow-inner text-left">
196
- <h3 className="text-2xl font-bold text-gray-800 flex items-center mb-4">
197
- <svg className="w-6 h-6 text-green-500 mr-2" fill="none" stroke="currentColor" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg">
198
- <path strokeLinecap="round" strokeLinejoin="round" strokeWidth="2" d="M9 12l2 2 4-4m6 2a9 9 0 11-18 0 9 9 0 0118 0z"></path>
199
- </svg>
200
- Transcription
201
- </h3>
202
- <p className="text-gray-800 text-xl leading-relaxed">{transcription}</p>
203
- </div>
204
- )}
205
- </div>
206
- </div>
207
- );
208
- };
209
-
210
- export default App;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,2 +1,10 @@
1
- Flask==2.3.3
2
- gunicorn==21.2.0
 
 
 
 
 
 
 
 
 
1
+ torch==1.13.1
2
+ torchaudio==0.13.1
3
+ pytorch-lightning==1.9.5
4
+ omegaconf>=2.0
5
+ hydra-core
6
+ numpy<1.24.0
7
+ gradio==4.25.0
8
+
9
+ # Install NeMo ASR only (lighter)
10
+ git+https://github.com/NVIDIA/[email protected]#egg=nemo_toolkit[asr]&subdirectory=.
requirements_full.txt DELETED
@@ -1,7 +0,0 @@
1
- numpy
2
- typing_extensions
3
- Flask
4
- Flask-CORS
5
- pydub
6
- gunicorn
7
- nemo_toolkit[asr]