jeongsoo commited on
Commit
78df87c
Β·
1 Parent(s): 8370b61

Add application file

Browse files
Files changed (2) hide show
  1. app.py +12 -6
  2. requirements.txt +3 -1
app.py CHANGED
@@ -495,7 +495,7 @@ class AutoRAGChatApp:
495
  μŒμ„± 쿼리 처리
496
 
497
  Args:
498
- audio: λ…ΉμŒλœ μ˜€λ””μ˜€ 데이터
499
  chat_history: λŒ€ν™” 기둝
500
 
501
  Returns:
@@ -505,10 +505,16 @@ class AutoRAGChatApp:
505
  return "", chat_history
506
 
507
  try:
508
- # μž„μ‹œ νŒŒμΌμ— μ˜€λ””μ˜€ μ €μž₯
 
 
 
509
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
510
  temp_path = temp_file.name
511
- temp_file.write(audio)
 
 
 
512
 
513
  print(f"[STT] μž„μ‹œ μ˜€λ””μ˜€ 파일 생성: {temp_path}")
514
 
@@ -591,9 +597,9 @@ class AutoRAGChatApp:
591
  # μ±„νŒ… μΈν„°νŽ˜μ΄μŠ€
592
  chatbot = gr.Chatbot(
593
  label="λŒ€ν™” λ‚΄μš©",
594
- bubble_full_width=False,
595
  height=500,
596
- show_copy_button=True
 
597
  )
598
 
599
  with gr.Tabs() as input_tabs:
@@ -614,7 +620,7 @@ class AutoRAGChatApp:
614
  audio_input = gr.Audio(
615
  label="마이크 μž…λ ₯",
616
  sources=["microphone"],
617
- type="bytes",
618
  format="wav"
619
  )
620
  voice_submit_btn = gr.Button("μŒμ„± 질문 전솑", variant="primary")
 
495
  μŒμ„± 쿼리 처리
496
 
497
  Args:
498
+ audio: λ…ΉμŒλœ μ˜€λ””μ˜€ 데이터 (numpy λ°°μ—΄: (μƒ˜ν”Œ, 채널))
499
  chat_history: λŒ€ν™” 기둝
500
 
501
  Returns:
 
505
  return "", chat_history
506
 
507
  try:
508
+ import numpy as np
509
+ import scipy.io.wavfile as wav
510
+
511
+ # numpy 배열을 WAV 파일둜 μ €μž₯
512
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
513
  temp_path = temp_file.name
514
+ # μƒ˜ν”Œλ§ λ ˆμ΄νŠΈμ™€ μ˜€λ””μ˜€ 데이터λ₯Ό WAV 파일둜 μ €μž₯
515
+ sr, data = audio
516
+ # 16λΉ„νŠΈ PCM ν˜•μ‹μœΌλ‘œ λ³€ν™˜
517
+ wav.write(temp_path, sr, data.astype(np.int16))
518
 
519
  print(f"[STT] μž„μ‹œ μ˜€λ””μ˜€ 파일 생성: {temp_path}")
520
 
 
597
  # μ±„νŒ… μΈν„°νŽ˜μ΄μŠ€
598
  chatbot = gr.Chatbot(
599
  label="λŒ€ν™” λ‚΄μš©",
 
600
  height=500,
601
+ show_copy_button=True,
602
+ type="messages"
603
  )
604
 
605
  with gr.Tabs() as input_tabs:
 
620
  audio_input = gr.Audio(
621
  label="마이크 μž…λ ₯",
622
  sources=["microphone"],
623
+ type="numpy",
624
  format="wav"
625
  )
626
  voice_submit_btn = gr.Button("μŒμ„± 질문 전솑", variant="primary")
requirements.txt CHANGED
@@ -11,4 +11,6 @@ transformers>=4.34.0
11
  langchain-openai>=0.0.2
12
  openai>=1.0.0
13
  docling>=0.1.3
14
- requests>=2.28.0
 
 
 
11
  langchain-openai>=0.0.2
12
  openai>=1.0.0
13
  docling>=0.1.3
14
+ requests>=2.28.0
15
+ scipy>=1.10.0
16
+ numpy>=1.23.0