Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
sachin
commited on
Commit
·
9777c25
1
Parent(s):
5964ead
update-tansc
Browse files- src/server/main.py +41 -26
src/server/main.py
CHANGED
@@ -816,39 +816,54 @@ async def speech_to_speech(
|
|
816 |
try:
|
817 |
encrypted_content = await file.read()
|
818 |
file_content = decrypt_data(encrypted_content, session_key)
|
819 |
-
|
820 |
-
|
821 |
-
|
822 |
with tempfile.NamedTemporaryFile(delete=False, suffix=file.filename) as temp_file:
|
823 |
-
# Write the decrypted content to the temp file
|
824 |
-
#decrypted_content = await file.read() # Assuming decrypted_content is the file content
|
825 |
temp_file.write(file_content)
|
826 |
temp_file_path = temp_file.name
|
827 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
828 |
|
829 |
-
|
830 |
-
|
831 |
-
transcription = response.get("text","")
|
832 |
-
|
833 |
-
chat_response = dwani.Chat.create(prompt=transcription, src_lang=decrypted_language, tgt_lang=decrypted_language)
|
834 |
-
|
835 |
-
|
836 |
-
response_text = chat_response.get("response", "")
|
837 |
-
|
838 |
-
response = dwani.Audio.speech(input=response_text, response_format="mp3")
|
839 |
-
|
840 |
-
headers = {
|
841 |
-
"Content-Disposition": f"inline; filename=\"speech.mp3\"",
|
842 |
-
"Cache-Control": "no-cache",
|
843 |
-
"Content-Type": "audio/mp3"
|
844 |
-
}
|
845 |
|
846 |
-
|
847 |
-
|
848 |
-
|
849 |
-
|
850 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
851 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
852 |
except requests.Timeout:
|
853 |
logger.error("External speech-to-speech API timed out", extra={"user_id": user_id})
|
854 |
raise HTTPException(status_code=504, detail="External API timeout")
|
|
|
816 |
try:
|
817 |
encrypted_content = await file.read()
|
818 |
file_content = decrypt_data(encrypted_content, session_key)
|
|
|
|
|
|
|
819 |
with tempfile.NamedTemporaryFile(delete=False, suffix=file.filename) as temp_file:
|
|
|
|
|
820 |
temp_file.write(file_content)
|
821 |
temp_file_path = temp_file.name
|
822 |
|
823 |
+
try:
|
824 |
+
# Transcribe audio
|
825 |
+
response = dwani.ASR.transcribe(file_path=temp_file_path, language=decrypted_language)
|
826 |
+
transcription = response.get("text", "")
|
827 |
+
if not transcription:
|
828 |
+
logger.error("Transcription is empty")
|
829 |
+
raise HTTPException(status_code=400, detail="Transcription failed or returned empty text")
|
830 |
|
831 |
+
# Debug inputs
|
832 |
+
logger.debug(f"Transcription: {transcription}, Language: {decrypted_language}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
833 |
|
834 |
+
# Chat processing
|
835 |
+
try:
|
836 |
+
chat_response = dwani.Chat.create(
|
837 |
+
prompt=transcription,
|
838 |
+
src_lang=decrypted_language,
|
839 |
+
tgt_lang=decrypted_language
|
840 |
+
)
|
841 |
+
except dwani.exceptions.DhwaniAPIError as e:
|
842 |
+
logger.error(f"Chat API failed: {str(e)}")
|
843 |
+
raise HTTPException(status_code=502, detail=f"Chat service error: {str(e)}")
|
844 |
+
|
845 |
+
response_text = chat_response.get("response", "")
|
846 |
+
if not response_text:
|
847 |
+
logger.error("Chat response is empty")
|
848 |
+
raise HTTPException(status_code=500, detail="Chat service returned empty response")
|
849 |
+
|
850 |
+
# Generate audio
|
851 |
+
response = dwani.Audio.speech(input=response_text, response_format="mp3")
|
852 |
+
|
853 |
+
headers = {
|
854 |
+
"Content-Disposition": f"inline; filename=\"speech.mp3\"",
|
855 |
+
"Cache-Control": "no-cache",
|
856 |
+
"Content-Type": "audio/mp3"
|
857 |
+
}
|
858 |
|
859 |
+
return StreamingResponse(
|
860 |
+
bytes_iterator(response),
|
861 |
+
media_type="audio/mp3",
|
862 |
+
headers=headers
|
863 |
+
)
|
864 |
+
finally:
|
865 |
+
# Clean up temporary file
|
866 |
+
os.unlink(temp_file_path)
|
867 |
except requests.Timeout:
|
868 |
logger.error("External speech-to-speech API timed out", extra={"user_id": user_id})
|
869 |
raise HTTPException(status_code=504, detail="External API timeout")
|