mulasagg commited on
Commit
27acc7d
·
1 Parent(s): 8031a8f
Files changed (3) hide show
  1. app.py +8 -11
  2. transcribe.py +14 -20
  3. vers/compute_vers_score.py +3 -0
app.py CHANGED
@@ -18,7 +18,7 @@ from vps.vps_api import main as analyze_vps_main
18
  from ves.ves import calc_voice_engagement_score
19
  from transcribe import transcribe_audio
20
  from filler_count.filler_score import analyze_fillers
21
- #from emotion.emo_predict import predict_emotion
22
 
23
  app = FastAPI()
24
 
@@ -290,9 +290,9 @@ import time
290
 
291
 
292
  @app.post('/transcribe/')
293
- async def transcribe(file: UploadFile):
294
  """
295
- Endpoint to transcribe an uploaded audio file ('.wav', '.mp3','mp4','.m4a','.flac' ).
296
  """
297
  #calculate time to transcribe
298
  start_time = time.time()
@@ -311,7 +311,7 @@ async def transcribe(file: UploadFile):
311
  shutil.copyfileobj(file.file, buffer)
312
 
313
  # Transcribe using your custom function
314
- result = transcribe_audio(temp_filepath, model_size="base")
315
  end_time = time.time()
316
  transcription_time = end_time - start_time
317
  response = {
@@ -329,14 +329,12 @@ async def transcribe(file: UploadFile):
329
  if os.path.exists(temp_filepath):
330
  os.remove(temp_filepath)
331
 
332
- import datetime
333
 
334
  @app.post('/analyze_all/')
335
- async def analyze_all(file: UploadFile):
336
  """
337
  Endpoint to analyze all aspects of an uploaded audio file (.wav or .mp3).
338
  """
339
- print(f"Received request at {datetime.datetime.now()} for file: {file.filename}")
340
  if not file.filename.endswith(('.wav', '.mp3','.m4a','.mp4','.flac')):
341
  raise HTTPException(status_code=400, detail="Invalid file type. Only .wav and .mp3 files are supported.")
342
 
@@ -360,8 +358,8 @@ async def analyze_all(file: UploadFile):
360
  vps_result = analyze_vps_main(temp_filepath)
361
  ves_result = calc_voice_engagement_score(temp_filepath)
362
  filler_count = analyze_fillers(temp_filepath) # Assuming this function returns a dict with filler count
363
- transcript, language, _ = transcribe_audio(temp_filepath, "base") #fix this
364
- #emotion = predict_emotion(temp_filepath)
365
  avg_score = (fluency_result['fluency_score'] + tone_result['speech_dynamism_score'] + vcs_result['Voice Clarity Sore'] + vers_result['VERS Score'] + voice_confidence_result['voice_confidence_score'] + vps_result['VPS'] + ves_result['ves']) / 7
366
 
367
 
@@ -376,8 +374,7 @@ async def analyze_all(file: UploadFile):
376
  "ves": ves_result,
377
  "filler_words": filler_count,
378
  "transcript": transcript,
379
- "Detected Language": language,
380
- #"emotion": emotion ,
381
  "sank_score": avg_score
382
  }
383
 
 
18
  from ves.ves import calc_voice_engagement_score
19
  from transcribe import transcribe_audio
20
  from filler_count.filler_score import analyze_fillers
21
+ from emotion.emo_predict import predict_emotion
22
 
23
  app = FastAPI()
24
 
 
290
 
291
 
292
  @app.post('/transcribe/')
293
+ async def transcribe(file: UploadFile, language: str = Form(...)):
294
  """
295
+ Endpoint to transcribe an uploaded audio file (.wav or .mp3).
296
  """
297
  #calculate time to transcribe
298
  start_time = time.time()
 
311
  shutil.copyfileobj(file.file, buffer)
312
 
313
  # Transcribe using your custom function
314
+ result = transcribe_audio(temp_filepath, language=language, model_size="base")
315
  end_time = time.time()
316
  transcription_time = end_time - start_time
317
  response = {
 
329
  if os.path.exists(temp_filepath):
330
  os.remove(temp_filepath)
331
 
 
332
 
333
  @app.post('/analyze_all/')
334
+ async def analyze_all(file: UploadFile, language: str = Form(...)):
335
  """
336
  Endpoint to analyze all aspects of an uploaded audio file (.wav or .mp3).
337
  """
 
338
  if not file.filename.endswith(('.wav', '.mp3','.m4a','.mp4','.flac')):
339
  raise HTTPException(status_code=400, detail="Invalid file type. Only .wav and .mp3 files are supported.")
340
 
 
358
  vps_result = analyze_vps_main(temp_filepath)
359
  ves_result = calc_voice_engagement_score(temp_filepath)
360
  filler_count = analyze_fillers(temp_filepath) # Assuming this function returns a dict with filler count
361
+ transcript = transcribe_audio(temp_filepath, language, "base") #fix this
362
+ emotion = predict_emotion(temp_filepath)
363
  avg_score = (fluency_result['fluency_score'] + tone_result['speech_dynamism_score'] + vcs_result['Voice Clarity Sore'] + vers_result['VERS Score'] + voice_confidence_result['voice_confidence_score'] + vps_result['VPS'] + ves_result['ves']) / 7
364
 
365
 
 
374
  "ves": ves_result,
375
  "filler_words": filler_count,
376
  "transcript": transcript,
377
+ "emotion": emotion ,
 
378
  "sank_score": avg_score
379
  }
380
 
transcribe.py CHANGED
@@ -1,32 +1,26 @@
1
  import assemblyai as aai
2
 
3
- aai.settings.api_key = "2c02e1bdab874068bdcfb2e226f048a4" # Use env var in production
 
4
 
5
- def transcribe_audio(file_path: str, model_size=None) -> tuple[str, str, float]:
6
- print(f"Transcribing audio file: {file_path} with language detection")
7
 
 
 
8
  config = aai.TranscriptionConfig(
9
  speech_model=aai.SpeechModel.nano,
10
- language_detection=True,
11
- language_confidence_threshold=0.4
12
  )
13
 
14
- transcriber = aai.Transcriber()
15
-
16
- transcript = transcriber.transcribe(file_path, config)
17
 
 
 
 
 
18
  if transcript.status == "error":
19
  raise RuntimeError(f"Transcription failed: {transcript.error}")
20
 
21
- # Access detected language and confidence from json_response
22
- response = transcript.json_response
23
- language = response.get("language_code")
24
- confidence = response.get("language_confidence")
25
-
26
- result = {
27
- "transcript": transcript.text,
28
- "language": language,
29
- "confidence": confidence
30
- }
31
-
32
- return transcript.text, language, confidence
 
1
  import assemblyai as aai
2
 
3
+ # Set your AssemblyAI API key once
4
+ aai.settings.api_key = "2c02e1bdab874068bdcfb2e226f048a4" # Replace with env var for production
5
 
6
+ def transcribe_audio(file_path: str, language, model_size=None) -> str:
 
7
 
8
+ print(f"Transcribing audio file: {file_path} with language: {language}")
9
+ # Configure for Hindi language
10
  config = aai.TranscriptionConfig(
11
  speech_model=aai.SpeechModel.nano,
12
+ language_code=language
 
13
  )
14
 
15
+ # Create transcriber instance
16
+ transcriber = aai.Transcriber(config=config)
 
17
 
18
+ # Perform transcription
19
+ transcript = transcriber.transcribe(file_path)
20
+
21
+ # Check if successful
22
  if transcript.status == "error":
23
  raise RuntimeError(f"Transcription failed: {transcript.error}")
24
 
25
+
26
+ return transcript.text
 
 
 
 
 
 
 
 
 
 
vers/compute_vers_score.py CHANGED
@@ -79,4 +79,7 @@ def compute_vers_score(file_path: str, whisper_model) -> dict:
79
  volume_std=volume_std,
80
  valence_scores=valence_scores
81
  )
 
 
 
82
  return vers_result
 
79
  volume_std=volume_std,
80
  valence_scores=valence_scores
81
  )
82
+
83
+ # Include transcript optionally
84
+ vers_result["transcript"] = transcript
85
  return vers_result