mulasagg commited on
Commit
e6cd41c
·
1 Parent(s): df039e9

recover from backup

Browse files
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ **/__pycache__/
app.py CHANGED
@@ -18,7 +18,7 @@ from vps.vps_api import main as analyze_vps_main
18
  from ves.ves import calc_voice_engagement_score
19
  from transcribe import transcribe_audio
20
  from filler_count.filler_score import analyze_fillers
21
- from emotion.emo_predict import predict_emotion
22
 
23
  app = FastAPI()
24
 
@@ -290,9 +290,9 @@ import time
290
 
291
 
292
  @app.post('/transcribe/')
293
- async def transcribe(file: UploadFile, language: str = Form(...)):
294
  """
295
- Endpoint to transcribe an uploaded audio file (.wav or .mp3).
296
  """
297
  #calculate time to transcribe
298
  start_time = time.time()
@@ -311,7 +311,7 @@ async def transcribe(file: UploadFile, language: str = Form(...)):
311
  shutil.copyfileobj(file.file, buffer)
312
 
313
  # Transcribe using your custom function
314
- result = transcribe_audio(temp_filepath, language=language, model_size="base")
315
  end_time = time.time()
316
  transcription_time = end_time - start_time
317
  response = {
@@ -329,12 +329,14 @@ async def transcribe(file: UploadFile, language: str = Form(...)):
329
  if os.path.exists(temp_filepath):
330
  os.remove(temp_filepath)
331
 
 
332
 
333
  @app.post('/analyze_all/')
334
- async def analyze_all(file: UploadFile, language: str = Form(...)):
335
  """
336
  Endpoint to analyze all aspects of an uploaded audio file (.wav or .mp3).
337
  """
 
338
  if not file.filename.endswith(('.wav', '.mp3','.m4a','.mp4','.flac')):
339
  raise HTTPException(status_code=400, detail="Invalid file type. Only .wav and .mp3 files are supported.")
340
 
@@ -358,8 +360,8 @@ async def analyze_all(file: UploadFile, language: str = Form(...)):
358
  vps_result = analyze_vps_main(temp_filepath)
359
  ves_result = calc_voice_engagement_score(temp_filepath)
360
  filler_count = analyze_fillers(temp_filepath) # Assuming this function returns a dict with filler count
361
- transcript = transcribe_audio(temp_filepath, language, "base") #fix this
362
- emotion = predict_emotion(temp_filepath)
363
  avg_score = (fluency_result['fluency_score'] + tone_result['speech_dynamism_score'] + vcs_result['Voice Clarity Sore'] + vers_result['VERS Score'] + voice_confidence_result['voice_confidence_score'] + vps_result['VPS'] + ves_result['ves']) / 7
364
 
365
 
@@ -374,7 +376,8 @@ async def analyze_all(file: UploadFile, language: str = Form(...)):
374
  "ves": ves_result,
375
  "filler_words": filler_count,
376
  "transcript": transcript,
377
- "emotion": emotion ,
 
378
  "sank_score": avg_score
379
  }
380
 
 
18
  from ves.ves import calc_voice_engagement_score
19
  from transcribe import transcribe_audio
20
  from filler_count.filler_score import analyze_fillers
21
+ #from emotion.emo_predict import predict_emotion
22
 
23
  app = FastAPI()
24
 
 
290
 
291
 
292
  @app.post('/transcribe/')
293
+ async def transcribe(file: UploadFile):
294
  """
295
+ Endpoint to transcribe an uploaded audio file ('.wav', '.mp3','mp4','.m4a','.flac' ).
296
  """
297
  #calculate time to transcribe
298
  start_time = time.time()
 
311
  shutil.copyfileobj(file.file, buffer)
312
 
313
  # Transcribe using your custom function
314
+ result = transcribe_audio(temp_filepath, model_size="base")
315
  end_time = time.time()
316
  transcription_time = end_time - start_time
317
  response = {
 
329
  if os.path.exists(temp_filepath):
330
  os.remove(temp_filepath)
331
 
332
+ import datetime
333
 
334
  @app.post('/analyze_all/')
335
+ async def analyze_all(file: UploadFile):
336
  """
337
  Endpoint to analyze all aspects of an uploaded audio file (.wav or .mp3).
338
  """
339
+ print(f"Received request at {datetime.datetime.now()} for file: {file.filename}")
340
  if not file.filename.endswith(('.wav', '.mp3','.m4a','.mp4','.flac')):
341
  raise HTTPException(status_code=400, detail="Invalid file type. Only .wav and .mp3 files are supported.")
342
 
 
360
  vps_result = analyze_vps_main(temp_filepath)
361
  ves_result = calc_voice_engagement_score(temp_filepath)
362
  filler_count = analyze_fillers(temp_filepath) # Assuming this function returns a dict with filler count
363
+ transcript, language, _ = transcribe_audio(temp_filepath, "base") #fix this
364
+ #emotion = predict_emotion(temp_filepath)
365
  avg_score = (fluency_result['fluency_score'] + tone_result['speech_dynamism_score'] + vcs_result['Voice Clarity Sore'] + vers_result['VERS Score'] + voice_confidence_result['voice_confidence_score'] + vps_result['VPS'] + ves_result['ves']) / 7
366
 
367
 
 
376
  "ves": ves_result,
377
  "filler_words": filler_count,
378
  "transcript": transcript,
379
+ "Detected Language": language,
380
+ #"emotion": emotion ,
381
  "sank_score": avg_score
382
  }
383
 
emotion/__pycache__/emo_predict.cpython-312.pyc CHANGED
Binary files a/emotion/__pycache__/emo_predict.cpython-312.pyc and b/emotion/__pycache__/emo_predict.cpython-312.pyc differ
 
filler_count/__pycache__/filler_score.cpython-312.pyc CHANGED
Binary files a/filler_count/__pycache__/filler_score.cpython-312.pyc and b/filler_count/__pycache__/filler_score.cpython-312.pyc differ
 
tone_modulation/__pycache__/tone_api.cpython-312.pyc CHANGED
Binary files a/tone_modulation/__pycache__/tone_api.cpython-312.pyc and b/tone_modulation/__pycache__/tone_api.cpython-312.pyc differ
 
transcribe.py CHANGED
@@ -1,26 +1,32 @@
1
  import assemblyai as aai
2
 
3
- # Set your AssemblyAI API key once
4
- aai.settings.api_key = "2c02e1bdab874068bdcfb2e226f048a4" # Replace with env var for production
5
 
6
- def transcribe_audio(file_path: str, language, model_size=None) -> str:
 
7
 
8
- print(f"Transcribing audio file: {file_path} with language: {language}")
9
- # Configure for Hindi language
10
  config = aai.TranscriptionConfig(
11
  speech_model=aai.SpeechModel.nano,
12
- language_code=language
 
13
  )
14
 
15
- # Create transcriber instance
16
- transcriber = aai.Transcriber(config=config)
 
17
 
18
- # Perform transcription
19
- transcript = transcriber.transcribe(file_path)
20
-
21
- # Check if successful
22
  if transcript.status == "error":
23
  raise RuntimeError(f"Transcription failed: {transcript.error}")
24
 
25
-
26
- return transcript.text
 
 
 
 
 
 
 
 
 
 
 
1
  import assemblyai as aai
2
 
3
+ aai.settings.api_key = "2c02e1bdab874068bdcfb2e226f048a4" # Use env var in production
 
4
 
5
+ def transcribe_audio(file_path: str, model_size=None) -> tuple[str, str, float]:
6
+ print(f"Transcribing audio file: {file_path} with language detection")
7
 
 
 
8
  config = aai.TranscriptionConfig(
9
  speech_model=aai.SpeechModel.nano,
10
+ language_detection=True,
11
+ language_confidence_threshold=0.4
12
  )
13
 
14
+ transcriber = aai.Transcriber()
15
+
16
+ transcript = transcriber.transcribe(file_path, config)
17
 
 
 
 
 
18
  if transcript.status == "error":
19
  raise RuntimeError(f"Transcription failed: {transcript.error}")
20
 
21
+ # Access detected language and confidence from json_response
22
+ response = transcript.json_response
23
+ language = response.get("language_code")
24
+ confidence = response.get("language_confidence")
25
+
26
+ result = {
27
+ "transcript": transcript.text,
28
+ "language": language,
29
+ "confidence": confidence
30
+ }
31
+
32
+ return transcript.text, language, confidence
vers/__pycache__/compute_vers_score.cpython-312.pyc CHANGED
Binary files a/vers/__pycache__/compute_vers_score.cpython-312.pyc and b/vers/__pycache__/compute_vers_score.cpython-312.pyc differ
 
vers/compute_vers_score.py CHANGED
@@ -79,7 +79,4 @@ def compute_vers_score(file_path: str, whisper_model) -> dict:
79
  volume_std=volume_std,
80
  valence_scores=valence_scores
81
  )
82
-
83
- # Include transcript optionally
84
- vers_result["transcript"] = transcript
85
  return vers_result
 
79
  volume_std=volume_std,
80
  valence_scores=valence_scores
81
  )
 
 
 
82
  return vers_result