Spaces:

jeongsoo
/

RAG_voice

Paused

App Files Files Community

jeongsoo commited on Mar 22

Commit

78df87c

1 Parent(s): 8370b61

Add application file

Browse files

Files changed (2) hide show

app.py +12 -6
requirements.txt +3 -1

app.py CHANGED Viewed

@@ -495,7 +495,7 @@ class AutoRAGChatApp:
         음성 쿼리 처리
         Args:
-            audio: 녹음된 오디오 데이터
             chat_history: 대화 기록
         Returns:
@@ -505,10 +505,16 @@ class AutoRAGChatApp:
             return "", chat_history
         try:
-            # 임시 파일에 오디오 저장
             with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
                 temp_path = temp_file.name
-                temp_file.write(audio)
             print(f"[STT] 임시 오디오 파일 생성: {temp_path}")
@@ -591,9 +597,9 @@ class AutoRAGChatApp:
                     # 채팅 인터페이스
                     chatbot = gr.Chatbot(
                         label="대화 내용",
-                        bubble_full_width=False,
                         height=500,
-                        show_copy_button=True
                     )
                     with gr.Tabs() as input_tabs:
@@ -614,7 +620,7 @@ class AutoRAGChatApp:
                             audio_input = gr.Audio(
                                 label="마이크 입력",
                                 sources=["microphone"],
-                                type="bytes",
                                 format="wav"
                             )
                             voice_submit_btn = gr.Button("음성 질문 전송", variant="primary")

         음성 쿼리 처리
         Args:
+            audio: 녹음된 오디오 데이터 (numpy 배열: (샘플, 채널))
             chat_history: 대화 기록
         Returns:
             return "", chat_history
         try:
+            import numpy as np
+            import scipy.io.wavfile as wav
+            # numpy 배열을 WAV 파일로 저장
             with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
                 temp_path = temp_file.name
+                # 샘플링 레이트와 오디오 데이터를 WAV 파일로 저장
+                sr, data = audio
+                # 16비트 PCM 형식으로 변환
+                wav.write(temp_path, sr, data.astype(np.int16))
             print(f"[STT] 임시 오디오 파일 생성: {temp_path}")
                     # 채팅 인터페이스
                     chatbot = gr.Chatbot(
                         label="대화 내용",
                         height=500,
+                        show_copy_button=True,
+                        type="messages"
                     )
                     with gr.Tabs() as input_tabs:
                             audio_input = gr.Audio(
                                 label="마이크 입력",
                                 sources=["microphone"],
+                                type="numpy",
                                 format="wav"
                             )
                             voice_submit_btn = gr.Button("음성 질문 전송", variant="primary")

requirements.txt CHANGED Viewed

@@ -11,4 +11,6 @@ transformers>=4.34.0
 langchain-openai>=0.0.2
 openai>=1.0.0
 docling>=0.1.3
-requests>=2.28.0

 langchain-openai>=0.0.2
 openai>=1.0.0
 docling>=0.1.3
+requests>=2.28.0
+scipy>=1.10.0
+numpy>=1.23.0