Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
sachin
commited on
Commit
·
7220fec
1
Parent(s):
cf4b677
update-tansc
Browse files- src/server/main.py +14 -177
src/server/main.py
CHANGED
@@ -444,73 +444,6 @@ async def chat(
|
|
444 |
logger.error(f"Error processing request: {str(e)}")
|
445 |
raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
|
446 |
|
447 |
-
@app.post("/v1/process_audio/",
|
448 |
-
response_model=AudioProcessingResponse,
|
449 |
-
summary="Process Audio File",
|
450 |
-
description="Process an uploaded audio file in the specified language. Rate limited to 100 requests per minute per user. Requires authentication.",
|
451 |
-
tags=["Audio"],
|
452 |
-
responses={
|
453 |
-
200: {"description": "Processed result", "model": AudioProcessingResponse},
|
454 |
-
401: {"description": "Unauthorized - Token required"},
|
455 |
-
429: {"description": "Rate limit exceeded"},
|
456 |
-
504: {"description": "Audio processing timeout"}
|
457 |
-
})
|
458 |
-
@limiter.limit(settings.chat_rate_limit)
|
459 |
-
async def process_audio(
|
460 |
-
request: Request,
|
461 |
-
file: UploadFile = File(..., description="Audio file to process"),
|
462 |
-
language: str = Query(..., description="Base64-encoded encrypted language of the audio (kannada, hindi, tamil after decryption)"),
|
463 |
-
credentials: HTTPAuthorizationCredentials = Depends(bearer_scheme),
|
464 |
-
x_session_key: str = Header(..., alias="X-Session-Key")
|
465 |
-
):
|
466 |
-
user_id = await get_current_user(credentials)
|
467 |
-
session_key = base64.b64decode(x_session_key)
|
468 |
-
|
469 |
-
# Decrypt the language
|
470 |
-
try:
|
471 |
-
encrypted_language = base64.b64decode(language)
|
472 |
-
decrypted_language = decrypt_data(encrypted_language, session_key).decode("utf-8")
|
473 |
-
except Exception as e:
|
474 |
-
logger.error(f"Language decryption failed: {str(e)}")
|
475 |
-
raise HTTPException(status_code=400, detail="Invalid encrypted language")
|
476 |
-
|
477 |
-
# Validate language
|
478 |
-
allowed_languages = ["kannada", "hindi", "tamil"]
|
479 |
-
if decrypted_language not in allowed_languages:
|
480 |
-
raise HTTPException(status_code=400, detail=f"Language must be one of {allowed_languages}")
|
481 |
-
|
482 |
-
logger.debug("Processing audio processing request", extra={
|
483 |
-
"endpoint": "/v1/process_audio",
|
484 |
-
"filename": file.filename,
|
485 |
-
"language": decrypted_language,
|
486 |
-
"client_ip": get_remote_address(request),
|
487 |
-
"user_id": user_id
|
488 |
-
})
|
489 |
-
|
490 |
-
start_time = time()
|
491 |
-
try:
|
492 |
-
file_content = await file.read()
|
493 |
-
files = {"file": (file.filename, file_content, file.content_type)}
|
494 |
-
|
495 |
-
external_url = f"{settings.external_api_base_url}/process_audio/?language={decrypted_language}"
|
496 |
-
response = requests.post(
|
497 |
-
external_url,
|
498 |
-
files=files,
|
499 |
-
headers={"accept": "application/json"},
|
500 |
-
timeout=60
|
501 |
-
)
|
502 |
-
response.raise_for_status()
|
503 |
-
|
504 |
-
processed_result = response.json().get("result", "")
|
505 |
-
logger.debug(f"Audio processing completed in {time() - start_time:.2f} seconds")
|
506 |
-
return AudioProcessingResponse(result=processed_result)
|
507 |
-
|
508 |
-
except requests.Timeout:
|
509 |
-
raise HTTPException(status_code=504, detail="Audio processing service timeout")
|
510 |
-
except requests.RequestException as e:
|
511 |
-
logger.error(f"Audio processing request failed: {str(e)}")
|
512 |
-
raise HTTPException(status_code=500, detail=f"Audio processing failed: {str(e)}")
|
513 |
-
|
514 |
@app.post("/v1/transcribe/",
|
515 |
response_model=TranscriptionResponse,
|
516 |
summary="Transcribe Audio File",
|
@@ -548,6 +481,15 @@ async def transcribe_audio(
|
|
548 |
try:
|
549 |
encrypted_content = await file.read()
|
550 |
file_content = decrypt_data(encrypted_content, session_key)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
551 |
files = {"file": (file.filename, file_content, file.content_type)}
|
552 |
|
553 |
external_url = f"{settings.external_api_base_url}/v1/transcribe/?language={decrypted_language}"
|
@@ -560,6 +502,11 @@ async def transcribe_audio(
|
|
560 |
response.raise_for_status()
|
561 |
|
562 |
transcription = response.json().get("text", "")
|
|
|
|
|
|
|
|
|
|
|
563 |
logger.debug(f"Transcription completed in {time() - start_time:.2f} seconds")
|
564 |
return TranscriptionResponse(text=transcription)
|
565 |
|
@@ -572,44 +519,6 @@ async def transcribe_audio(
|
|
572 |
logger.error(f"Transcription request failed: {str(e)}")
|
573 |
raise HTTPException(status_code=500, detail=f"Transcription failed: {str(e)}")
|
574 |
|
575 |
-
@app.post("/v1/chat_v2",
|
576 |
-
response_model=TranscriptionResponse,
|
577 |
-
summary="Chat with Image (V2)",
|
578 |
-
description="Generate a response from a text prompt and optional image. Rate limited to 100 requests per minute per user. Requires authentication.",
|
579 |
-
tags=["Chat"],
|
580 |
-
responses={
|
581 |
-
200: {"description": "Chat response", "model": TranscriptionResponse},
|
582 |
-
400: {"description": "Invalid prompt"},
|
583 |
-
401: {"description": "Unauthorized - Token required"},
|
584 |
-
429: {"description": "Rate limit exceeded"}
|
585 |
-
})
|
586 |
-
@limiter.limit(settings.chat_rate_limit)
|
587 |
-
async def chat_v2(
|
588 |
-
request: Request,
|
589 |
-
prompt: str = Form(..., description="Text prompt for chat"),
|
590 |
-
image: UploadFile = File(default=None, description="Optional image to accompany the prompt"),
|
591 |
-
credentials: HTTPAuthorizationCredentials = Depends(bearer_scheme)
|
592 |
-
):
|
593 |
-
user_id = await get_current_user(credentials)
|
594 |
-
if not prompt:
|
595 |
-
raise HTTPException(status_code=400, detail="Prompt cannot be empty")
|
596 |
-
|
597 |
-
logger.debug("Processing chat_v2 request", extra={
|
598 |
-
"endpoint": "/v1/chat_v2",
|
599 |
-
"prompt_length": len(prompt),
|
600 |
-
"has_image": bool(image),
|
601 |
-
"client_ip": get_remote_address(request),
|
602 |
-
"user_id": user_id
|
603 |
-
})
|
604 |
-
|
605 |
-
try:
|
606 |
-
image_data = Image.open(await image.read()) if image else None
|
607 |
-
response_text = f"Processed: {prompt}" + (" with image" if image_data else "")
|
608 |
-
return TranscriptionResponse(text=response_text)
|
609 |
-
except Exception as e:
|
610 |
-
logger.error(f"Chat_v2 processing failed: {str(e)}", exc_info=True)
|
611 |
-
raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
|
612 |
-
|
613 |
@app.post("/v1/translate",
|
614 |
response_model=TranslationResponse,
|
615 |
summary="Translate Text",
|
@@ -1030,78 +939,6 @@ async def speech_to_speech(
|
|
1030 |
raise HTTPException(status_code=500, detail=f"External API error: {str(e)}")
|
1031 |
|
1032 |
|
1033 |
-
@app.post("/v1/speech_to_speech_v2",
|
1034 |
-
summary="Speech-to-Speech Conversion",
|
1035 |
-
description="Convert input encrypted speech to processed speech in the specified encrypted language by calling an external speech-to-speech API. Rate limited to 5 requests per minute per user. Requires authentication and X-Session-Key header.",
|
1036 |
-
tags=["Audio"],
|
1037 |
-
responses={
|
1038 |
-
200: {"description": "Audio stream", "content": {"audio/mp3": {"example": "Binary audio data"}}},
|
1039 |
-
400: {"description": "Invalid input, encrypted audio, or language"},
|
1040 |
-
401: {"description": "Unauthorized - Token required"},
|
1041 |
-
429: {"description": "Rate limit exceeded"},
|
1042 |
-
504: {"description": "External API timeout"},
|
1043 |
-
500: {"description": "External API error"}
|
1044 |
-
})
|
1045 |
-
async def speech_to_speech_v2(
|
1046 |
-
request: Request,
|
1047 |
-
file: UploadFile = File(..., description="Encrypted audio file to process"),
|
1048 |
-
language: str = Query(..., description="Base64-encoded encrypted language of the audio (kannada, hindi, tamil after decryption)"),
|
1049 |
-
) -> StreamingResponse:
|
1050 |
-
|
1051 |
-
# Decrypt the language
|
1052 |
-
try:
|
1053 |
-
encrypted_language = language
|
1054 |
-
decrypted_language = encrypted_language
|
1055 |
-
except Exception as e:
|
1056 |
-
logger.error(f"Language decryption failed: {str(e)}")
|
1057 |
-
raise HTTPException(status_code=400, detail="Invalid encrypted language")
|
1058 |
-
|
1059 |
-
# Validate language
|
1060 |
-
allowed_languages = [lang.value for lang in SupportedLanguage]
|
1061 |
-
if decrypted_language not in allowed_languages:
|
1062 |
-
raise HTTPException(status_code=400, detail=f"Language must be one of {allowed_languages}")
|
1063 |
-
|
1064 |
-
logger.debug("Processing speech-to-speech request", extra={
|
1065 |
-
"endpoint": "/v1/speech_to_speech",
|
1066 |
-
"audio_filename": file.filename,
|
1067 |
-
"language": decrypted_language,
|
1068 |
-
"client_ip": get_remote_address(request),
|
1069 |
-
})
|
1070 |
-
|
1071 |
-
try:
|
1072 |
-
encrypted_content = await file.read()
|
1073 |
-
file_content = encrypted_content
|
1074 |
-
files = {"file": (file.filename, file_content, file.content_type)}
|
1075 |
-
external_url = f"{settings.external_api_base_url}/v1/speech_to_speech?language={decrypted_language}"
|
1076 |
-
|
1077 |
-
response = requests.post(
|
1078 |
-
external_url,
|
1079 |
-
files=files,
|
1080 |
-
headers={"accept": "application/json"},
|
1081 |
-
stream=True,
|
1082 |
-
timeout=60
|
1083 |
-
)
|
1084 |
-
response.raise_for_status()
|
1085 |
-
|
1086 |
-
headers = {
|
1087 |
-
"Content-Disposition": f"inline; filename=\"speech.mp3\"",
|
1088 |
-
"Cache-Control": "no-cache",
|
1089 |
-
"Content-Type": "audio/mp3"
|
1090 |
-
}
|
1091 |
-
|
1092 |
-
return StreamingResponse(
|
1093 |
-
response.iter_content(chunk_size=8192),
|
1094 |
-
media_type="audio/mp3",
|
1095 |
-
headers=headers
|
1096 |
-
)
|
1097 |
-
|
1098 |
-
except requests.Timeout:
|
1099 |
-
logger.error("External speech-to-speech API timed out")
|
1100 |
-
raise HTTPException(status_code=504, detail="External API timeout")
|
1101 |
-
except requests.RequestException as e:
|
1102 |
-
logger.error(f"External speech-to-speech API error: {str(e)}")
|
1103 |
-
raise HTTPException(status_code=500, detail=f"External API error: {str(e)}")
|
1104 |
-
|
1105 |
|
1106 |
if __name__ == "__main__":
|
1107 |
parser = argparse.ArgumentParser(description="Run the FastAPI server.")
|
|
|
444 |
logger.error(f"Error processing request: {str(e)}")
|
445 |
raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
|
446 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
447 |
@app.post("/v1/transcribe/",
|
448 |
response_model=TranscriptionResponse,
|
449 |
summary="Transcribe Audio File",
|
|
|
481 |
try:
|
482 |
encrypted_content = await file.read()
|
483 |
file_content = decrypt_data(encrypted_content, session_key)
|
484 |
+
|
485 |
+
|
486 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=file.filename) as temp_file:
|
487 |
+
# Write the decrypted content to the temp file
|
488 |
+
#decrypted_content = await file.read() # Assuming decrypted_content is the file content
|
489 |
+
temp_file.write(file_content)
|
490 |
+
temp_file_path = temp_file.name
|
491 |
+
|
492 |
+
'''
|
493 |
files = {"file": (file.filename, file_content, file.content_type)}
|
494 |
|
495 |
external_url = f"{settings.external_api_base_url}/v1/transcribe/?language={decrypted_language}"
|
|
|
502 |
response.raise_for_status()
|
503 |
|
504 |
transcription = response.json().get("text", "")
|
505 |
+
'''
|
506 |
+
response = dwani.ASR.transcribe(file_path=temp_file_path, language=decrypted_language)
|
507 |
+
|
508 |
+
transcription = response.get("text","")
|
509 |
+
|
510 |
logger.debug(f"Transcription completed in {time() - start_time:.2f} seconds")
|
511 |
return TranscriptionResponse(text=transcription)
|
512 |
|
|
|
519 |
logger.error(f"Transcription request failed: {str(e)}")
|
520 |
raise HTTPException(status_code=500, detail=f"Transcription failed: {str(e)}")
|
521 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
522 |
@app.post("/v1/translate",
|
523 |
response_model=TranslationResponse,
|
524 |
summary="Translate Text",
|
|
|
939 |
raise HTTPException(status_code=500, detail=f"External API error: {str(e)}")
|
940 |
|
941 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
942 |
|
943 |
if __name__ == "__main__":
|
944 |
parser = argparse.ArgumentParser(description="Run the FastAPI server.")
|