Bouaziz-bad commited on
Commit
6810fc4
·
1 Parent(s): 41394f5

Test: Backend sanity check with simple Flask app

Browse files
Files changed (4) hide show
  1. app.py +11 -133
  2. app_full.py +137 -0
  3. requirements.txt +2 -7
  4. requirements_full.txt +7 -0
app.py CHANGED
@@ -1,137 +1,15 @@
1
- # app.py - Flask server to handle ASR requests using the NeMo model (Corrected)
2
-
3
- import os
4
- import tempfile
5
- import logging
6
- import sys
7
- from flask import Flask, request, jsonify
8
- from flask_cors import CORS
9
- import nemo.collections.asr as nemo_asr
10
- from pydub import AudioSegment
11
- import re
12
- import datetime
13
-
14
- # --- Suppress verbose NeMo logging ---
15
- logging.getLogger('nemo_logger').setLevel(logging.ERROR)
16
 
 
17
  app = Flask(__name__)
18
- CORS(app)
19
-
20
- # --- Post-processing function to correct annexation in Kabyle transcription ---
21
- def post_process_kabyle_text(text):
22
- """
23
- Corrects annexation in Kabyle transcription by replacing spaces with dashes.
24
- This version uses regular expressions for more robust pattern matching.
25
- """
26
- # Defensive check to ensure 'text' is a string before processing
27
- if not isinstance(text, str):
28
- print(f"Warning: Expected string for post-processing, but received type: {type(text)}. Skipping post-processing.")
29
- return text
30
-
31
- if not text:
32
- return ""
33
-
34
- # Ensure text is lowercase for consistent matching
35
- text = text.lower()
36
-
37
- # Define the sets of particles
38
- PoPro = {'inu', 'inem', 'ines', 'nneɣ', 'ntex', 'nwen', 'nwent', 'nsen', 'nsent',
39
- 'iw', 'ik', 'im', 'is', 'w', 'k', 'm', 's', 'tneɣ', 'tentex', 'tsen', 'tsent'}
40
- SpWo = {'deg', 'gar', 'ɣer', 'ɣur', 'fell', 'ɣef', 'ddaw', 'nnig', 'ɣid', 'aql', 'sɣur', 'sennig', 'deffir', 'sdat'}
41
- StPaSp = {'i', 'am', 'at', 's', 'neɣ', 'aɣ'}
42
- StPa = {'ak', 'as', 'aneɣ', 'anteɣ', 'awen', 'awent', 'asen', 'asent',
43
- 'k', 'm', 'ntex', 'wen', 'went', 'sen', 'sent', 'akem', 'att',
44
- 'aken', 'akent', 'aten', 'atent'}
45
- DePa = {'a', 'agi', 'nni', 'ihin', 'nniden'}
46
- DiPa = {'id', 'in'}
47
- FuPa = {'ad', 'ara'}
48
- DiObPa = {'yi', 'k', 'kem', 't', 'tt', 'ay', 'ken', 'kent', 'ten', 'tent',
49
- 'iyi', 'ik', 'ikem', 'it', 'itt', 'iken', 'ikent', 'iten', 'itent'}
50
- InObPa = {'yi', 'yak', 'yam', 'yas', 'yaɣ', 'yawen', 'yawent', 'yasen', 'yasent'}
51
-
52
- # Combine all particles that can be annexed.
53
- all_annexable_particles = PoPro.union(SpWo, StPa, StPaSp, DePa, DiPa, FuPa, DiObPa, InObPa)
54
- sorted_all_annexable = sorted(list(all_annexable_particles), key=len, reverse=True)
55
-
56
- # Create a single regex pattern to handle all annexations in one go.
57
- annexation_pattern = r'\b(\w{2,})\s+(' + '|'.join(sorted_all_annexable) + r')\b'
58
- text = re.sub(annexation_pattern, r'\1-\2', text)
59
-
60
- # Final cleanup for any remaining double spaces or trailing hyphens
61
- text = re.sub(r'\s+', ' ', text).strip()
62
- text = re.sub(r'-+', '-', text)
63
-
64
- return text
65
-
66
- # --- Load the ASR model once at the beginning to avoid reloading on every request ---
67
- print("Loading NeMo ASR model...")
68
- try:
69
- asr_model = nemo_asr.models.EncDecRNNTBPEModel.from_pretrained("nvidia/stt_kab_conformer_transducer_large")
70
- print("NeMo ASR model loaded successfully.")
71
- except Exception as e:
72
- print(f"Error loading NeMo ASR model: {e}")
73
- print("Please check your internet connection and ensure nemo_toolkit[asr] is correctly installed.")
74
- asr_model = None
75
-
76
- @app.route('/transcribe', methods=['POST'])
77
- def transcribe():
78
- if asr_model is None:
79
- return jsonify({"error": "ASR model is not loaded."}), 503
80
-
81
- if 'audio' not in request.files:
82
- return jsonify({"error": "No audio file provided"}), 400
83
-
84
- audio_file = request.files['audio']
85
- if audio_file.filename == '':
86
- return jsonify({"error": "No selected file"}), 400
87
-
88
- temp_input_file = None
89
- processed_file_path = None
90
- try:
91
- # Save the uploaded file to a temporary location
92
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_audio:
93
- audio_file.save(tmp_audio.name)
94
- temp_input_file = tmp_audio.name
95
-
96
- try:
97
- # The model requires the audio to be in a specific format (16kHz mono).
98
- input_audio = AudioSegment.from_file(temp_input_file)
99
- processed_audio = input_audio.set_frame_rate(16000).set_channels(1)
100
-
101
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as processed_tmp:
102
- processed_audio.export(processed_tmp.name, format="wav")
103
- processed_file_path = processed_tmp.name
104
- except Exception as audio_e:
105
- print(f"Error during audio processing with pydub: {audio_e}", file=sys.stderr)
106
- return jsonify({"error": "Failed to process audio file. Please ensure it's a valid audio format."}), 500
107
-
108
- try:
109
- # Transcribe the processed file using the loaded model
110
- transcription_list = asr_model.transcribe([processed_file_path])
111
- except Exception as asr_e:
112
- print(f"Error during transcription with NeMo model: {asr_e}", file=sys.stderr)
113
- return jsonify({"error": "Transcription failed due to a model error."}), 500
114
-
115
- if transcription_list and transcription_list[0] and hasattr(transcription_list[0], 'text'):
116
- raw_transcription = transcription_list[0].text
117
- final_transcription = post_process_kabyle_text(raw_transcription)
118
-
119
- return jsonify({"transcription": final_transcription})
120
- else:
121
- print("ASR model returned an empty, invalid, or unexpected transcription object.")
122
- return jsonify({"error": "Transcription failed. No text returned."}), 500
123
 
124
- except Exception as e:
125
- print(f"An unhandled server error occurred: {e}", file=sys.stderr)
126
- return jsonify({"error": "An internal server error occurred."}), 500
127
- finally:
128
- # Cleanup temporary files
129
- if temp_input_file and os.path.exists(temp_input_file):
130
- os.remove(temp_input_file)
131
- if processed_file_path and os.path.exists(processed_file_path):
132
- os.remove(processed_file_path)
133
 
134
- if __name__ == '__main__':
135
- print("Starting Flask server...")
136
- print("Server running at http://127.0.0.1:5000")
137
- app.run(debug=True)
 
 
 
1
+ from flask import Flask
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
+ # Create a Flask application instance
4
  app = Flask(__name__)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
+ @app.route("/")
7
+ def hello_world():
8
+ return "Hello from the backend!"
 
 
 
 
 
 
9
 
10
+ # Add a simple test route for transcription logic
11
+ @app.route("/transcribe", methods=['POST'])
12
+ def transcribe_test():
13
+ # In a real app, you would get audio from the request here
14
+ # For this test, we'll just return a success message
15
+ return "Backend received audio and is ready to transcribe!"
app_full.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py - Flask server to handle ASR requests using the NeMo model (Corrected)
2
+
3
+ import os
4
+ import tempfile
5
+ import logging
6
+ import sys
7
+ from flask import Flask, request, jsonify
8
+ from flask_cors import CORS
9
+ import nemo.collections.asr as nemo_asr
10
+ from pydub import AudioSegment
11
+ import re
12
+ import datetime
13
+
14
+ # --- Suppress verbose NeMo logging ---
15
+ logging.getLogger('nemo_logger').setLevel(logging.ERROR)
16
+
17
+ app = Flask(__name__)
18
+ CORS(app)
19
+
20
+ # --- Post-processing function to correct annexation in Kabyle transcription ---
21
+ def post_process_kabyle_text(text):
22
+ """
23
+ Corrects annexation in Kabyle transcription by replacing spaces with dashes.
24
+ This version uses regular expressions for more robust pattern matching.
25
+ """
26
+ # Defensive check to ensure 'text' is a string before processing
27
+ if not isinstance(text, str):
28
+ print(f"Warning: Expected string for post-processing, but received type: {type(text)}. Skipping post-processing.")
29
+ return text
30
+
31
+ if not text:
32
+ return ""
33
+
34
+ # Ensure text is lowercase for consistent matching
35
+ text = text.lower()
36
+
37
+ # Define the sets of particles
38
+ PoPro = {'inu', 'inem', 'ines', 'nneɣ', 'ntex', 'nwen', 'nwent', 'nsen', 'nsent',
39
+ 'iw', 'ik', 'im', 'is', 'w', 'k', 'm', 's', 'tneɣ', 'tentex', 'tsen', 'tsent'}
40
+ SpWo = {'deg', 'gar', 'ɣer', 'ɣur', 'fell', 'ɣef', 'ddaw', 'nnig', 'ɣid', 'aql', 'sɣur', 'sennig', 'deffir', 'sdat'}
41
+ StPaSp = {'i', 'am', 'at', 's', 'neɣ', 'aɣ'}
42
+ StPa = {'ak', 'as', 'aneɣ', 'anteɣ', 'awen', 'awent', 'asen', 'asent',
43
+ 'k', 'm', 'ntex', 'wen', 'went', 'sen', 'sent', 'akem', 'att',
44
+ 'aken', 'akent', 'aten', 'atent'}
45
+ DePa = {'a', 'agi', 'nni', 'ihin', 'nniden'}
46
+ DiPa = {'id', 'in'}
47
+ FuPa = {'ad', 'ara'}
48
+ DiObPa = {'yi', 'k', 'kem', 't', 'tt', 'ay', 'ken', 'kent', 'ten', 'tent',
49
+ 'iyi', 'ik', 'ikem', 'it', 'itt', 'iken', 'ikent', 'iten', 'itent'}
50
+ InObPa = {'yi', 'yak', 'yam', 'yas', 'yaɣ', 'yawen', 'yawent', 'yasen', 'yasent'}
51
+
52
+ # Combine all particles that can be annexed.
53
+ all_annexable_particles = PoPro.union(SpWo, StPa, StPaSp, DePa, DiPa, FuPa, DiObPa, InObPa)
54
+ sorted_all_annexable = sorted(list(all_annexable_particles), key=len, reverse=True)
55
+
56
+ # Create a single regex pattern to handle all annexations in one go.
57
+ annexation_pattern = r'\b(\w{2,})\s+(' + '|'.join(sorted_all_annexable) + r')\b'
58
+ text = re.sub(annexation_pattern, r'\1-\2', text)
59
+
60
+ # Final cleanup for any remaining double spaces or trailing hyphens
61
+ text = re.sub(r'\s+', ' ', text).strip()
62
+ text = re.sub(r'-+', '-', text)
63
+
64
+ return text
65
+
66
+ # --- Load the ASR model once at the beginning to avoid reloading on every request ---
67
+ print("Loading NeMo ASR model...")
68
+ try:
69
+ asr_model = nemo_asr.models.EncDecRNNTBPEModel.from_pretrained("nvidia/stt_kab_conformer_transducer_large")
70
+ print("NeMo ASR model loaded successfully.")
71
+ except Exception as e:
72
+ print(f"Error loading NeMo ASR model: {e}")
73
+ print("Please check your internet connection and ensure nemo_toolkit[asr] is correctly installed.")
74
+ asr_model = None
75
+
76
+ @app.route('/transcribe', methods=['POST'])
77
+ def transcribe():
78
+ if asr_model is None:
79
+ return jsonify({"error": "ASR model is not loaded."}), 503
80
+
81
+ if 'audio' not in request.files:
82
+ return jsonify({"error": "No audio file provided"}), 400
83
+
84
+ audio_file = request.files['audio']
85
+ if audio_file.filename == '':
86
+ return jsonify({"error": "No selected file"}), 400
87
+
88
+ temp_input_file = None
89
+ processed_file_path = None
90
+ try:
91
+ # Save the uploaded file to a temporary location
92
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_audio:
93
+ audio_file.save(tmp_audio.name)
94
+ temp_input_file = tmp_audio.name
95
+
96
+ try:
97
+ # The model requires the audio to be in a specific format (16kHz mono).
98
+ input_audio = AudioSegment.from_file(temp_input_file)
99
+ processed_audio = input_audio.set_frame_rate(16000).set_channels(1)
100
+
101
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as processed_tmp:
102
+ processed_audio.export(processed_tmp.name, format="wav")
103
+ processed_file_path = processed_tmp.name
104
+ except Exception as audio_e:
105
+ print(f"Error during audio processing with pydub: {audio_e}", file=sys.stderr)
106
+ return jsonify({"error": "Failed to process audio file. Please ensure it's a valid audio format."}), 500
107
+
108
+ try:
109
+ # Transcribe the processed file using the loaded model
110
+ transcription_list = asr_model.transcribe([processed_file_path])
111
+ except Exception as asr_e:
112
+ print(f"Error during transcription with NeMo model: {asr_e}", file=sys.stderr)
113
+ return jsonify({"error": "Transcription failed due to a model error."}), 500
114
+
115
+ if transcription_list and transcription_list[0] and hasattr(transcription_list[0], 'text'):
116
+ raw_transcription = transcription_list[0].text
117
+ final_transcription = post_process_kabyle_text(raw_transcription)
118
+
119
+ return jsonify({"transcription": final_transcription})
120
+ else:
121
+ print("ASR model returned an empty, invalid, or unexpected transcription object.")
122
+ return jsonify({"error": "Transcription failed. No text returned."}), 500
123
+
124
+ except Exception as e:
125
+ print(f"An unhandled server error occurred: {e}", file=sys.stderr)
126
+ return jsonify({"error": "An internal server error occurred."}), 500
127
+ finally:
128
+ # Cleanup temporary files
129
+ if temp_input_file and os.path.exists(temp_input_file):
130
+ os.remove(temp_input_file)
131
+ if processed_file_path and os.path.exists(processed_file_path):
132
+ os.remove(processed_file_path)
133
+
134
+ if __name__ == '__main__':
135
+ print("Starting Flask server...")
136
+ print("Server running at http://127.0.0.1:5000")
137
+ app.run(debug=True)
requirements.txt CHANGED
@@ -1,7 +1,2 @@
1
- numpy
2
- typing_extensions
3
- Flask
4
- Flask-CORS
5
- pydub
6
- gunicorn
7
- nemo_toolkit[asr]
 
1
+ Flask==2.3.3
2
+ gunicorn==21.2.0
 
 
 
 
 
requirements_full.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ numpy
2
+ typing_extensions
3
+ Flask
4
+ Flask-CORS
5
+ pydub
6
+ gunicorn
7
+ nemo_toolkit[asr]