Spaces:
Running
Running
Bouaziz-bad
commited on
Commit
·
6810fc4
1
Parent(s):
41394f5
Test: Backend sanity check with simple Flask app
Browse files- app.py +11 -133
- app_full.py +137 -0
- requirements.txt +2 -7
- requirements_full.txt +7 -0
app.py
CHANGED
@@ -1,137 +1,15 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
import os
|
4 |
-
import tempfile
|
5 |
-
import logging
|
6 |
-
import sys
|
7 |
-
from flask import Flask, request, jsonify
|
8 |
-
from flask_cors import CORS
|
9 |
-
import nemo.collections.asr as nemo_asr
|
10 |
-
from pydub import AudioSegment
|
11 |
-
import re
|
12 |
-
import datetime
|
13 |
-
|
14 |
-
# --- Suppress verbose NeMo logging ---
|
15 |
-
logging.getLogger('nemo_logger').setLevel(logging.ERROR)
|
16 |
|
|
|
17 |
app = Flask(__name__)
|
18 |
-
CORS(app)
|
19 |
-
|
20 |
-
# --- Post-processing function to correct annexation in Kabyle transcription ---
|
21 |
-
def post_process_kabyle_text(text):
|
22 |
-
"""
|
23 |
-
Corrects annexation in Kabyle transcription by replacing spaces with dashes.
|
24 |
-
This version uses regular expressions for more robust pattern matching.
|
25 |
-
"""
|
26 |
-
# Defensive check to ensure 'text' is a string before processing
|
27 |
-
if not isinstance(text, str):
|
28 |
-
print(f"Warning: Expected string for post-processing, but received type: {type(text)}. Skipping post-processing.")
|
29 |
-
return text
|
30 |
-
|
31 |
-
if not text:
|
32 |
-
return ""
|
33 |
-
|
34 |
-
# Ensure text is lowercase for consistent matching
|
35 |
-
text = text.lower()
|
36 |
-
|
37 |
-
# Define the sets of particles
|
38 |
-
PoPro = {'inu', 'inem', 'ines', 'nneɣ', 'ntex', 'nwen', 'nwent', 'nsen', 'nsent',
|
39 |
-
'iw', 'ik', 'im', 'is', 'w', 'k', 'm', 's', 'tneɣ', 'tentex', 'tsen', 'tsent'}
|
40 |
-
SpWo = {'deg', 'gar', 'ɣer', 'ɣur', 'fell', 'ɣef', 'ddaw', 'nnig', 'ɣid', 'aql', 'sɣur', 'sennig', 'deffir', 'sdat'}
|
41 |
-
StPaSp = {'i', 'am', 'at', 's', 'neɣ', 'aɣ'}
|
42 |
-
StPa = {'ak', 'as', 'aneɣ', 'anteɣ', 'awen', 'awent', 'asen', 'asent',
|
43 |
-
'k', 'm', 'ntex', 'wen', 'went', 'sen', 'sent', 'akem', 'att',
|
44 |
-
'aken', 'akent', 'aten', 'atent'}
|
45 |
-
DePa = {'a', 'agi', 'nni', 'ihin', 'nniden'}
|
46 |
-
DiPa = {'id', 'in'}
|
47 |
-
FuPa = {'ad', 'ara'}
|
48 |
-
DiObPa = {'yi', 'k', 'kem', 't', 'tt', 'ay', 'ken', 'kent', 'ten', 'tent',
|
49 |
-
'iyi', 'ik', 'ikem', 'it', 'itt', 'iken', 'ikent', 'iten', 'itent'}
|
50 |
-
InObPa = {'yi', 'yak', 'yam', 'yas', 'yaɣ', 'yawen', 'yawent', 'yasen', 'yasent'}
|
51 |
-
|
52 |
-
# Combine all particles that can be annexed.
|
53 |
-
all_annexable_particles = PoPro.union(SpWo, StPa, StPaSp, DePa, DiPa, FuPa, DiObPa, InObPa)
|
54 |
-
sorted_all_annexable = sorted(list(all_annexable_particles), key=len, reverse=True)
|
55 |
-
|
56 |
-
# Create a single regex pattern to handle all annexations in one go.
|
57 |
-
annexation_pattern = r'\b(\w{2,})\s+(' + '|'.join(sorted_all_annexable) + r')\b'
|
58 |
-
text = re.sub(annexation_pattern, r'\1-\2', text)
|
59 |
-
|
60 |
-
# Final cleanup for any remaining double spaces or trailing hyphens
|
61 |
-
text = re.sub(r'\s+', ' ', text).strip()
|
62 |
-
text = re.sub(r'-+', '-', text)
|
63 |
-
|
64 |
-
return text
|
65 |
-
|
66 |
-
# --- Load the ASR model once at the beginning to avoid reloading on every request ---
|
67 |
-
print("Loading NeMo ASR model...")
|
68 |
-
try:
|
69 |
-
asr_model = nemo_asr.models.EncDecRNNTBPEModel.from_pretrained("nvidia/stt_kab_conformer_transducer_large")
|
70 |
-
print("NeMo ASR model loaded successfully.")
|
71 |
-
except Exception as e:
|
72 |
-
print(f"Error loading NeMo ASR model: {e}")
|
73 |
-
print("Please check your internet connection and ensure nemo_toolkit[asr] is correctly installed.")
|
74 |
-
asr_model = None
|
75 |
-
|
76 |
-
@app.route('/transcribe', methods=['POST'])
|
77 |
-
def transcribe():
|
78 |
-
if asr_model is None:
|
79 |
-
return jsonify({"error": "ASR model is not loaded."}), 503
|
80 |
-
|
81 |
-
if 'audio' not in request.files:
|
82 |
-
return jsonify({"error": "No audio file provided"}), 400
|
83 |
-
|
84 |
-
audio_file = request.files['audio']
|
85 |
-
if audio_file.filename == '':
|
86 |
-
return jsonify({"error": "No selected file"}), 400
|
87 |
-
|
88 |
-
temp_input_file = None
|
89 |
-
processed_file_path = None
|
90 |
-
try:
|
91 |
-
# Save the uploaded file to a temporary location
|
92 |
-
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_audio:
|
93 |
-
audio_file.save(tmp_audio.name)
|
94 |
-
temp_input_file = tmp_audio.name
|
95 |
-
|
96 |
-
try:
|
97 |
-
# The model requires the audio to be in a specific format (16kHz mono).
|
98 |
-
input_audio = AudioSegment.from_file(temp_input_file)
|
99 |
-
processed_audio = input_audio.set_frame_rate(16000).set_channels(1)
|
100 |
-
|
101 |
-
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as processed_tmp:
|
102 |
-
processed_audio.export(processed_tmp.name, format="wav")
|
103 |
-
processed_file_path = processed_tmp.name
|
104 |
-
except Exception as audio_e:
|
105 |
-
print(f"Error during audio processing with pydub: {audio_e}", file=sys.stderr)
|
106 |
-
return jsonify({"error": "Failed to process audio file. Please ensure it's a valid audio format."}), 500
|
107 |
-
|
108 |
-
try:
|
109 |
-
# Transcribe the processed file using the loaded model
|
110 |
-
transcription_list = asr_model.transcribe([processed_file_path])
|
111 |
-
except Exception as asr_e:
|
112 |
-
print(f"Error during transcription with NeMo model: {asr_e}", file=sys.stderr)
|
113 |
-
return jsonify({"error": "Transcription failed due to a model error."}), 500
|
114 |
-
|
115 |
-
if transcription_list and transcription_list[0] and hasattr(transcription_list[0], 'text'):
|
116 |
-
raw_transcription = transcription_list[0].text
|
117 |
-
final_transcription = post_process_kabyle_text(raw_transcription)
|
118 |
-
|
119 |
-
return jsonify({"transcription": final_transcription})
|
120 |
-
else:
|
121 |
-
print("ASR model returned an empty, invalid, or unexpected transcription object.")
|
122 |
-
return jsonify({"error": "Transcription failed. No text returned."}), 500
|
123 |
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
finally:
|
128 |
-
# Cleanup temporary files
|
129 |
-
if temp_input_file and os.path.exists(temp_input_file):
|
130 |
-
os.remove(temp_input_file)
|
131 |
-
if processed_file_path and os.path.exists(processed_file_path):
|
132 |
-
os.remove(processed_file_path)
|
133 |
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
app
|
|
|
|
|
|
1 |
+
from flask import Flask
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
+
# Create a Flask application instance
|
4 |
app = Flask(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
+
@app.route("/")
|
7 |
+
def hello_world():
|
8 |
+
return "Hello from the backend!"
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
+
# Add a simple test route for transcription logic
|
11 |
+
@app.route("/transcribe", methods=['POST'])
|
12 |
+
def transcribe_test():
|
13 |
+
# In a real app, you would get audio from the request here
|
14 |
+
# For this test, we'll just return a success message
|
15 |
+
return "Backend received audio and is ready to transcribe!"
|
app_full.py
ADDED
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# app.py - Flask server to handle ASR requests using the NeMo model (Corrected)
|
2 |
+
|
3 |
+
import os
|
4 |
+
import tempfile
|
5 |
+
import logging
|
6 |
+
import sys
|
7 |
+
from flask import Flask, request, jsonify
|
8 |
+
from flask_cors import CORS
|
9 |
+
import nemo.collections.asr as nemo_asr
|
10 |
+
from pydub import AudioSegment
|
11 |
+
import re
|
12 |
+
import datetime
|
13 |
+
|
14 |
+
# --- Suppress verbose NeMo logging ---
|
15 |
+
logging.getLogger('nemo_logger').setLevel(logging.ERROR)
|
16 |
+
|
17 |
+
app = Flask(__name__)
|
18 |
+
CORS(app)
|
19 |
+
|
20 |
+
# --- Post-processing function to correct annexation in Kabyle transcription ---
|
21 |
+
def post_process_kabyle_text(text):
|
22 |
+
"""
|
23 |
+
Corrects annexation in Kabyle transcription by replacing spaces with dashes.
|
24 |
+
This version uses regular expressions for more robust pattern matching.
|
25 |
+
"""
|
26 |
+
# Defensive check to ensure 'text' is a string before processing
|
27 |
+
if not isinstance(text, str):
|
28 |
+
print(f"Warning: Expected string for post-processing, but received type: {type(text)}. Skipping post-processing.")
|
29 |
+
return text
|
30 |
+
|
31 |
+
if not text:
|
32 |
+
return ""
|
33 |
+
|
34 |
+
# Ensure text is lowercase for consistent matching
|
35 |
+
text = text.lower()
|
36 |
+
|
37 |
+
# Define the sets of particles
|
38 |
+
PoPro = {'inu', 'inem', 'ines', 'nneɣ', 'ntex', 'nwen', 'nwent', 'nsen', 'nsent',
|
39 |
+
'iw', 'ik', 'im', 'is', 'w', 'k', 'm', 's', 'tneɣ', 'tentex', 'tsen', 'tsent'}
|
40 |
+
SpWo = {'deg', 'gar', 'ɣer', 'ɣur', 'fell', 'ɣef', 'ddaw', 'nnig', 'ɣid', 'aql', 'sɣur', 'sennig', 'deffir', 'sdat'}
|
41 |
+
StPaSp = {'i', 'am', 'at', 's', 'neɣ', 'aɣ'}
|
42 |
+
StPa = {'ak', 'as', 'aneɣ', 'anteɣ', 'awen', 'awent', 'asen', 'asent',
|
43 |
+
'k', 'm', 'ntex', 'wen', 'went', 'sen', 'sent', 'akem', 'att',
|
44 |
+
'aken', 'akent', 'aten', 'atent'}
|
45 |
+
DePa = {'a', 'agi', 'nni', 'ihin', 'nniden'}
|
46 |
+
DiPa = {'id', 'in'}
|
47 |
+
FuPa = {'ad', 'ara'}
|
48 |
+
DiObPa = {'yi', 'k', 'kem', 't', 'tt', 'ay', 'ken', 'kent', 'ten', 'tent',
|
49 |
+
'iyi', 'ik', 'ikem', 'it', 'itt', 'iken', 'ikent', 'iten', 'itent'}
|
50 |
+
InObPa = {'yi', 'yak', 'yam', 'yas', 'yaɣ', 'yawen', 'yawent', 'yasen', 'yasent'}
|
51 |
+
|
52 |
+
# Combine all particles that can be annexed.
|
53 |
+
all_annexable_particles = PoPro.union(SpWo, StPa, StPaSp, DePa, DiPa, FuPa, DiObPa, InObPa)
|
54 |
+
sorted_all_annexable = sorted(list(all_annexable_particles), key=len, reverse=True)
|
55 |
+
|
56 |
+
# Create a single regex pattern to handle all annexations in one go.
|
57 |
+
annexation_pattern = r'\b(\w{2,})\s+(' + '|'.join(sorted_all_annexable) + r')\b'
|
58 |
+
text = re.sub(annexation_pattern, r'\1-\2', text)
|
59 |
+
|
60 |
+
# Final cleanup for any remaining double spaces or trailing hyphens
|
61 |
+
text = re.sub(r'\s+', ' ', text).strip()
|
62 |
+
text = re.sub(r'-+', '-', text)
|
63 |
+
|
64 |
+
return text
|
65 |
+
|
66 |
+
# --- Load the ASR model once at the beginning to avoid reloading on every request ---
|
67 |
+
print("Loading NeMo ASR model...")
|
68 |
+
try:
|
69 |
+
asr_model = nemo_asr.models.EncDecRNNTBPEModel.from_pretrained("nvidia/stt_kab_conformer_transducer_large")
|
70 |
+
print("NeMo ASR model loaded successfully.")
|
71 |
+
except Exception as e:
|
72 |
+
print(f"Error loading NeMo ASR model: {e}")
|
73 |
+
print("Please check your internet connection and ensure nemo_toolkit[asr] is correctly installed.")
|
74 |
+
asr_model = None
|
75 |
+
|
76 |
+
@app.route('/transcribe', methods=['POST'])
|
77 |
+
def transcribe():
|
78 |
+
if asr_model is None:
|
79 |
+
return jsonify({"error": "ASR model is not loaded."}), 503
|
80 |
+
|
81 |
+
if 'audio' not in request.files:
|
82 |
+
return jsonify({"error": "No audio file provided"}), 400
|
83 |
+
|
84 |
+
audio_file = request.files['audio']
|
85 |
+
if audio_file.filename == '':
|
86 |
+
return jsonify({"error": "No selected file"}), 400
|
87 |
+
|
88 |
+
temp_input_file = None
|
89 |
+
processed_file_path = None
|
90 |
+
try:
|
91 |
+
# Save the uploaded file to a temporary location
|
92 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_audio:
|
93 |
+
audio_file.save(tmp_audio.name)
|
94 |
+
temp_input_file = tmp_audio.name
|
95 |
+
|
96 |
+
try:
|
97 |
+
# The model requires the audio to be in a specific format (16kHz mono).
|
98 |
+
input_audio = AudioSegment.from_file(temp_input_file)
|
99 |
+
processed_audio = input_audio.set_frame_rate(16000).set_channels(1)
|
100 |
+
|
101 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as processed_tmp:
|
102 |
+
processed_audio.export(processed_tmp.name, format="wav")
|
103 |
+
processed_file_path = processed_tmp.name
|
104 |
+
except Exception as audio_e:
|
105 |
+
print(f"Error during audio processing with pydub: {audio_e}", file=sys.stderr)
|
106 |
+
return jsonify({"error": "Failed to process audio file. Please ensure it's a valid audio format."}), 500
|
107 |
+
|
108 |
+
try:
|
109 |
+
# Transcribe the processed file using the loaded model
|
110 |
+
transcription_list = asr_model.transcribe([processed_file_path])
|
111 |
+
except Exception as asr_e:
|
112 |
+
print(f"Error during transcription with NeMo model: {asr_e}", file=sys.stderr)
|
113 |
+
return jsonify({"error": "Transcription failed due to a model error."}), 500
|
114 |
+
|
115 |
+
if transcription_list and transcription_list[0] and hasattr(transcription_list[0], 'text'):
|
116 |
+
raw_transcription = transcription_list[0].text
|
117 |
+
final_transcription = post_process_kabyle_text(raw_transcription)
|
118 |
+
|
119 |
+
return jsonify({"transcription": final_transcription})
|
120 |
+
else:
|
121 |
+
print("ASR model returned an empty, invalid, or unexpected transcription object.")
|
122 |
+
return jsonify({"error": "Transcription failed. No text returned."}), 500
|
123 |
+
|
124 |
+
except Exception as e:
|
125 |
+
print(f"An unhandled server error occurred: {e}", file=sys.stderr)
|
126 |
+
return jsonify({"error": "An internal server error occurred."}), 500
|
127 |
+
finally:
|
128 |
+
# Cleanup temporary files
|
129 |
+
if temp_input_file and os.path.exists(temp_input_file):
|
130 |
+
os.remove(temp_input_file)
|
131 |
+
if processed_file_path and os.path.exists(processed_file_path):
|
132 |
+
os.remove(processed_file_path)
|
133 |
+
|
134 |
+
if __name__ == '__main__':
|
135 |
+
print("Starting Flask server...")
|
136 |
+
print("Server running at http://127.0.0.1:5000")
|
137 |
+
app.run(debug=True)
|
requirements.txt
CHANGED
@@ -1,7 +1,2 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
Flask
|
4 |
-
Flask-CORS
|
5 |
-
pydub
|
6 |
-
gunicorn
|
7 |
-
nemo_toolkit[asr]
|
|
|
1 |
+
Flask==2.3.3
|
2 |
+
gunicorn==21.2.0
|
|
|
|
|
|
|
|
|
|
requirements_full.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
numpy
|
2 |
+
typing_extensions
|
3 |
+
Flask
|
4 |
+
Flask-CORS
|
5 |
+
pydub
|
6 |
+
gunicorn
|
7 |
+
nemo_toolkit[asr]
|