sachin commited on
Commit
7220fec
·
1 Parent(s): cf4b677

update-tansc

Browse files
Files changed (1) hide show
  1. src/server/main.py +14 -177
src/server/main.py CHANGED
@@ -444,73 +444,6 @@ async def chat(
444
  logger.error(f"Error processing request: {str(e)}")
445
  raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
446
 
447
- @app.post("/v1/process_audio/",
448
- response_model=AudioProcessingResponse,
449
- summary="Process Audio File",
450
- description="Process an uploaded audio file in the specified language. Rate limited to 100 requests per minute per user. Requires authentication.",
451
- tags=["Audio"],
452
- responses={
453
- 200: {"description": "Processed result", "model": AudioProcessingResponse},
454
- 401: {"description": "Unauthorized - Token required"},
455
- 429: {"description": "Rate limit exceeded"},
456
- 504: {"description": "Audio processing timeout"}
457
- })
458
- @limiter.limit(settings.chat_rate_limit)
459
- async def process_audio(
460
- request: Request,
461
- file: UploadFile = File(..., description="Audio file to process"),
462
- language: str = Query(..., description="Base64-encoded encrypted language of the audio (kannada, hindi, tamil after decryption)"),
463
- credentials: HTTPAuthorizationCredentials = Depends(bearer_scheme),
464
- x_session_key: str = Header(..., alias="X-Session-Key")
465
- ):
466
- user_id = await get_current_user(credentials)
467
- session_key = base64.b64decode(x_session_key)
468
-
469
- # Decrypt the language
470
- try:
471
- encrypted_language = base64.b64decode(language)
472
- decrypted_language = decrypt_data(encrypted_language, session_key).decode("utf-8")
473
- except Exception as e:
474
- logger.error(f"Language decryption failed: {str(e)}")
475
- raise HTTPException(status_code=400, detail="Invalid encrypted language")
476
-
477
- # Validate language
478
- allowed_languages = ["kannada", "hindi", "tamil"]
479
- if decrypted_language not in allowed_languages:
480
- raise HTTPException(status_code=400, detail=f"Language must be one of {allowed_languages}")
481
-
482
- logger.debug("Processing audio processing request", extra={
483
- "endpoint": "/v1/process_audio",
484
- "filename": file.filename,
485
- "language": decrypted_language,
486
- "client_ip": get_remote_address(request),
487
- "user_id": user_id
488
- })
489
-
490
- start_time = time()
491
- try:
492
- file_content = await file.read()
493
- files = {"file": (file.filename, file_content, file.content_type)}
494
-
495
- external_url = f"{settings.external_api_base_url}/process_audio/?language={decrypted_language}"
496
- response = requests.post(
497
- external_url,
498
- files=files,
499
- headers={"accept": "application/json"},
500
- timeout=60
501
- )
502
- response.raise_for_status()
503
-
504
- processed_result = response.json().get("result", "")
505
- logger.debug(f"Audio processing completed in {time() - start_time:.2f} seconds")
506
- return AudioProcessingResponse(result=processed_result)
507
-
508
- except requests.Timeout:
509
- raise HTTPException(status_code=504, detail="Audio processing service timeout")
510
- except requests.RequestException as e:
511
- logger.error(f"Audio processing request failed: {str(e)}")
512
- raise HTTPException(status_code=500, detail=f"Audio processing failed: {str(e)}")
513
-
514
  @app.post("/v1/transcribe/",
515
  response_model=TranscriptionResponse,
516
  summary="Transcribe Audio File",
@@ -548,6 +481,15 @@ async def transcribe_audio(
548
  try:
549
  encrypted_content = await file.read()
550
  file_content = decrypt_data(encrypted_content, session_key)
 
 
 
 
 
 
 
 
 
551
  files = {"file": (file.filename, file_content, file.content_type)}
552
 
553
  external_url = f"{settings.external_api_base_url}/v1/transcribe/?language={decrypted_language}"
@@ -560,6 +502,11 @@ async def transcribe_audio(
560
  response.raise_for_status()
561
 
562
  transcription = response.json().get("text", "")
 
 
 
 
 
563
  logger.debug(f"Transcription completed in {time() - start_time:.2f} seconds")
564
  return TranscriptionResponse(text=transcription)
565
 
@@ -572,44 +519,6 @@ async def transcribe_audio(
572
  logger.error(f"Transcription request failed: {str(e)}")
573
  raise HTTPException(status_code=500, detail=f"Transcription failed: {str(e)}")
574
 
575
- @app.post("/v1/chat_v2",
576
- response_model=TranscriptionResponse,
577
- summary="Chat with Image (V2)",
578
- description="Generate a response from a text prompt and optional image. Rate limited to 100 requests per minute per user. Requires authentication.",
579
- tags=["Chat"],
580
- responses={
581
- 200: {"description": "Chat response", "model": TranscriptionResponse},
582
- 400: {"description": "Invalid prompt"},
583
- 401: {"description": "Unauthorized - Token required"},
584
- 429: {"description": "Rate limit exceeded"}
585
- })
586
- @limiter.limit(settings.chat_rate_limit)
587
- async def chat_v2(
588
- request: Request,
589
- prompt: str = Form(..., description="Text prompt for chat"),
590
- image: UploadFile = File(default=None, description="Optional image to accompany the prompt"),
591
- credentials: HTTPAuthorizationCredentials = Depends(bearer_scheme)
592
- ):
593
- user_id = await get_current_user(credentials)
594
- if not prompt:
595
- raise HTTPException(status_code=400, detail="Prompt cannot be empty")
596
-
597
- logger.debug("Processing chat_v2 request", extra={
598
- "endpoint": "/v1/chat_v2",
599
- "prompt_length": len(prompt),
600
- "has_image": bool(image),
601
- "client_ip": get_remote_address(request),
602
- "user_id": user_id
603
- })
604
-
605
- try:
606
- image_data = Image.open(await image.read()) if image else None
607
- response_text = f"Processed: {prompt}" + (" with image" if image_data else "")
608
- return TranscriptionResponse(text=response_text)
609
- except Exception as e:
610
- logger.error(f"Chat_v2 processing failed: {str(e)}", exc_info=True)
611
- raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
612
-
613
  @app.post("/v1/translate",
614
  response_model=TranslationResponse,
615
  summary="Translate Text",
@@ -1030,78 +939,6 @@ async def speech_to_speech(
1030
  raise HTTPException(status_code=500, detail=f"External API error: {str(e)}")
1031
 
1032
 
1033
- @app.post("/v1/speech_to_speech_v2",
1034
- summary="Speech-to-Speech Conversion",
1035
- description="Convert input encrypted speech to processed speech in the specified encrypted language by calling an external speech-to-speech API. Rate limited to 5 requests per minute per user. Requires authentication and X-Session-Key header.",
1036
- tags=["Audio"],
1037
- responses={
1038
- 200: {"description": "Audio stream", "content": {"audio/mp3": {"example": "Binary audio data"}}},
1039
- 400: {"description": "Invalid input, encrypted audio, or language"},
1040
- 401: {"description": "Unauthorized - Token required"},
1041
- 429: {"description": "Rate limit exceeded"},
1042
- 504: {"description": "External API timeout"},
1043
- 500: {"description": "External API error"}
1044
- })
1045
- async def speech_to_speech_v2(
1046
- request: Request,
1047
- file: UploadFile = File(..., description="Encrypted audio file to process"),
1048
- language: str = Query(..., description="Base64-encoded encrypted language of the audio (kannada, hindi, tamil after decryption)"),
1049
- ) -> StreamingResponse:
1050
-
1051
- # Decrypt the language
1052
- try:
1053
- encrypted_language = language
1054
- decrypted_language = encrypted_language
1055
- except Exception as e:
1056
- logger.error(f"Language decryption failed: {str(e)}")
1057
- raise HTTPException(status_code=400, detail="Invalid encrypted language")
1058
-
1059
- # Validate language
1060
- allowed_languages = [lang.value for lang in SupportedLanguage]
1061
- if decrypted_language not in allowed_languages:
1062
- raise HTTPException(status_code=400, detail=f"Language must be one of {allowed_languages}")
1063
-
1064
- logger.debug("Processing speech-to-speech request", extra={
1065
- "endpoint": "/v1/speech_to_speech",
1066
- "audio_filename": file.filename,
1067
- "language": decrypted_language,
1068
- "client_ip": get_remote_address(request),
1069
- })
1070
-
1071
- try:
1072
- encrypted_content = await file.read()
1073
- file_content = encrypted_content
1074
- files = {"file": (file.filename, file_content, file.content_type)}
1075
- external_url = f"{settings.external_api_base_url}/v1/speech_to_speech?language={decrypted_language}"
1076
-
1077
- response = requests.post(
1078
- external_url,
1079
- files=files,
1080
- headers={"accept": "application/json"},
1081
- stream=True,
1082
- timeout=60
1083
- )
1084
- response.raise_for_status()
1085
-
1086
- headers = {
1087
- "Content-Disposition": f"inline; filename=\"speech.mp3\"",
1088
- "Cache-Control": "no-cache",
1089
- "Content-Type": "audio/mp3"
1090
- }
1091
-
1092
- return StreamingResponse(
1093
- response.iter_content(chunk_size=8192),
1094
- media_type="audio/mp3",
1095
- headers=headers
1096
- )
1097
-
1098
- except requests.Timeout:
1099
- logger.error("External speech-to-speech API timed out")
1100
- raise HTTPException(status_code=504, detail="External API timeout")
1101
- except requests.RequestException as e:
1102
- logger.error(f"External speech-to-speech API error: {str(e)}")
1103
- raise HTTPException(status_code=500, detail=f"External API error: {str(e)}")
1104
-
1105
 
1106
  if __name__ == "__main__":
1107
  parser = argparse.ArgumentParser(description="Run the FastAPI server.")
 
444
  logger.error(f"Error processing request: {str(e)}")
445
  raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
446
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
447
  @app.post("/v1/transcribe/",
448
  response_model=TranscriptionResponse,
449
  summary="Transcribe Audio File",
 
481
  try:
482
  encrypted_content = await file.read()
483
  file_content = decrypt_data(encrypted_content, session_key)
484
+
485
+
486
+ with tempfile.NamedTemporaryFile(delete=False, suffix=file.filename) as temp_file:
487
+ # Write the decrypted content to the temp file
488
+ #decrypted_content = await file.read() # Assuming decrypted_content is the file content
489
+ temp_file.write(file_content)
490
+ temp_file_path = temp_file.name
491
+
492
+ '''
493
  files = {"file": (file.filename, file_content, file.content_type)}
494
 
495
  external_url = f"{settings.external_api_base_url}/v1/transcribe/?language={decrypted_language}"
 
502
  response.raise_for_status()
503
 
504
  transcription = response.json().get("text", "")
505
+ '''
506
+ response = dwani.ASR.transcribe(file_path=temp_file_path, language=decrypted_language)
507
+
508
+ transcription = response.get("text","")
509
+
510
  logger.debug(f"Transcription completed in {time() - start_time:.2f} seconds")
511
  return TranscriptionResponse(text=transcription)
512
 
 
519
  logger.error(f"Transcription request failed: {str(e)}")
520
  raise HTTPException(status_code=500, detail=f"Transcription failed: {str(e)}")
521
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
522
  @app.post("/v1/translate",
523
  response_model=TranslationResponse,
524
  summary="Translate Text",
 
939
  raise HTTPException(status_code=500, detail=f"External API error: {str(e)}")
940
 
941
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
942
 
943
  if __name__ == "__main__":
944
  parser = argparse.ArgumentParser(description="Run the FastAPI server.")