deepugaur commited on
Commit
8dcb583
·
verified ·
1 Parent(s): 5233da6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -50
app.py CHANGED
@@ -1,51 +1,55 @@
1
- import librosa
2
- import numpy as np
3
- import tensorflow as tf
4
-
5
- def load_audio(file_path):
6
- audio, sr = librosa.load(file_path, sr=16000)
7
- return audio, sr
8
-
9
- def extract_features(audio, sr):
10
- mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)
11
- return mfccs
12
-
13
- # Load pre-trained model (example: DeepSpeech)
14
- model = tf.keras.models.load_model('deepspeech_model.h5')
15
-
16
- def speech_to_text(audio):
17
- features = extract_features(audio, 16000)
18
- features = np.expand_dims(features, axis=0) # Model expects batch dimension
19
- text = model.predict(features)
20
- return text
21
-
22
- from transformers import MarianMTModel, MarianTokenizer
23
-
24
- # Load pre-trained translation model
25
- model_name = 'Helsinki-NLP/opus-mt-en-hi'
26
- tokenizer = MarianTokenizer.from_pretrained(model_name)
27
- translation_model = MarianMTModel.from_pretrained(model_name)
28
-
29
- def translate_text(text, tokenizer, model):
30
- inputs = tokenizer(text, return_tensors="pt", padding=True)
31
- translated = model.generate(**inputs)
32
- translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
33
- return translated_text
34
-
35
- import datetime
36
-
37
- def process_audio_for_translation(audio_file_path):
38
- current_time = datetime.datetime.now().time()
39
- if current_time < datetime.time(18, 0):
40
- return "Translation service is available only after 6 PM IST"
41
-
42
- audio, sr = load_audio(audio_file_path)
43
- english_text = speech_to_text(audio)
44
- hindi_text = translate_text(english_text, tokenizer, translation_model)
45
- return hindi_text
46
-
47
- # Example usage
48
- audio_file_path = 'path_to_audio_file.wav'
49
- translated_text = process_audio_for_translation(audio_file_path)
50
- print(translated_text)
 
 
 
 
51
 
 
1
+ from flask import Flask, request, jsonify
2
+ from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer, pipeline
3
+ from pydub import AudioSegment
4
+ import torch
5
+ import torchaudio
6
+ from datetime import datetime, time
7
+ import pytz
8
+
9
+ app = Flask(__name__)
10
+
11
+ # Load speech recognition model and tokenizer
12
+ tokenizer = Wav2Vec2Tokenizer.from_pretrained("facebook/wav2vec2-base-960h")
13
+ model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
14
+
15
+ # Load translation pipeline
16
+ translation_pipeline = pipeline("translation_en_to_hi")
17
+
18
+ # Function to preprocess audio
19
+ def preprocess_audio(audio_file):
20
+ audio = AudioSegment.from_file(audio_file)
21
+ audio = audio.set_frame_rate(16000)
22
+ audio.export("processed.wav", format="wav")
23
+ waveform, sample_rate = torchaudio.load("processed.wav")
24
+ return waveform
25
+
26
+ # Function to check if the current time is after 6 PM IST
27
+ def is_after_6pm_ist():
28
+ ist = pytz.timezone('Asia/Kolkata')
29
+ current_time = datetime.now(ist).time()
30
+ return current_time >= time(18, 0)
31
+
32
+ @app.route('/translate', methods=['POST'])
33
+ def translate():
34
+ if not is_after_6pm_ist():
35
+ return jsonify({"error": "Service is available only after 6 PM IST"}), 403
36
+
37
+ if 'audio' not in request.files:
38
+ return jsonify({"error": "No audio file provided"}), 400
39
+
40
+ audio_file = request.files['audio']
41
+ waveform = preprocess_audio(audio_file)
42
+
43
+ input_values = tokenizer(waveform.squeeze().numpy(), return_tensors="pt").input_values
44
+ logits = model(input_values).logits
45
+ predicted_ids = torch.argmax(logits, dim=-1)
46
+ transcription = tokenizer.batch_decode(predicted_ids)[0]
47
+
48
+ translation = translation_pipeline(transcription)
49
+ translated_text = translation[0]['translation_text']
50
+
51
+ return jsonify({"transcription": transcription, "translation": translated_text})
52
+
53
+ if __name__ == '__main__':
54
+ app.run(debug=True, host='0.0.0.0', port=8080)
55