badassgi

Running

App Files Files Community

openfree commited on Jan 8

Commit

81072da

verified ·

1 Parent(s): de23405

Update app.py

Browse files

Files changed (1) hide show

app.py +241 -1

app.py CHANGED Viewed

@@ -1,2 +1,242 @@
 import os
-exec(os.environ.get('APP'))

+import spaces
+import torch
+import gradio as gr
+from transformers import pipeline
+from huggingface_hub import InferenceClient
 import os
+import numpy as np
+from pydub import AudioSegment
+import tempfile
+import math
+# Hugging Face 토큰 설정
+HF_TOKEN = os.getenv("HF_TOKEN")
+if not HF_TOKEN:
+    raise ValueError("HF_TOKEN 환경 변수가 설정되지 않았습니다.")
+MODEL_NAME = "openai/whisper-large-v3-turbo"
+BATCH_SIZE = 8
+FILE_LIMIT_MB = 1000
+CHUNK_LENGTH = 10 * 60  # 10분 단위로 분할
+device = 0 if torch.cuda.is_available() else "cpu"
+# Whisper 파이프라인 초기화
+pipe = pipeline(
+    task="automatic-speech-recognition",
+    model=MODEL_NAME,
+    chunk_length_s=30,
+    device=device,
+    token=HF_TOKEN
+)
+# Hugging Face 추론 클라이언트 설정
+hf_client = InferenceClient(
+    "CohereForAI/c4ai-command-r-plus-08-2024",
+    token=HF_TOKEN
+)
+def split_audio(audio_path, chunk_length=CHUNK_LENGTH):
+    """오디오 파일을 청크로 분할"""
+    audio = AudioSegment.from_file(audio_path)
+    duration = len(audio) / 1000  # 초 단위 변환
+    chunks = []
+    # 청크 개수 계산
+    num_chunks = math.ceil(duration / chunk_length)
+    for i in range(num_chunks):
+        start_time = i * chunk_length * 1000  # milliseconds
+        end_time = min((i + 1) * chunk_length * 1000, len(audio))
+        chunk = audio[start_time:end_time]
+        # 임시 파일로 저장
+        with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_file:
+            chunk.export(temp_file.name, format='wav')
+            chunks.append(temp_file.name)
+    return chunks, num_chunks
+def translate_to_korean(text):
+    """영어 텍스트를 한글로 번역"""
+    try:
+        prompt = f"""다음 영어 텍스트를 자연스러운 한국어로 번역해주세요.
+영어: {text}
+한국어:"""
+        response = hf_client.text_generation(
+            prompt=prompt,
+            max_new_tokens=4000,
+            temperature=0.3,
+            top_p=0.9,
+            repetition_penalty=1.2,
+            stop=["영어:", "한국어:", "\n"]
+        )
+        translated_text = str(response)
+        if "한국어:" in translated_text:
+            translated_text = translated_text.split("한국어:")[1].strip()
+        return translated_text
+    except Exception as e:
+        print(f"번역 중 오류 발생: {str(e)}")
+        return text
+def process_chunk(chunk_path, task):
+    """개별 청크 처리"""
+    if task == "translate":
+        generate_kwargs = {
+            "task": "transcribe",
+            "language": "en",
+            "forced_decoder_ids": None
+        }
+    else:
+        generate_kwargs = {
+            "task": "transcribe",
+            "language": "ko",
+            "forced_decoder_ids": None
+        }
+    try:
+        result = pipe(
+            inputs=chunk_path,
+            batch_size=BATCH_SIZE,
+            generate_kwargs=generate_kwargs,
+            return_timestamps=True
+        )
+        os.unlink(chunk_path)
+        text = result["text"]
+        if task == "translate":
+            text = translate_to_korean(text)
+        return text
+    except Exception as e:
+        print(f"청크 처리 중 오류 발생: {str(e)}")
+        raise e
+@spaces.GPU
+def transcribe_audio(audio_input, task, progress=gr.Progress()):
+    if audio_input is None:
+        raise gr.Error("오디오 파일이 제출되지 않았습니다!")
+    try:
+        chunks, num_chunks = split_audio(audio_input)
+        progress(0, desc="오디오 파일 분할 완료")
+        transcribed_texts = []
+        for i, chunk in enumerate(chunks):
+            try:
+                chunk_text = process_chunk(chunk, task)
+                transcribed_texts.append(chunk_text)
+                progress((i + 1) / num_chunks, desc=f"청크 {i+1}/{num_chunks} 처리 중")
+            except Exception as e:
+                print(f"청크 {i+1} 처리 실패: {str(e)}")
+                continue
+        if not transcribed_texts:
+            raise Exception("모든 청크 처리에 실패했습니다.")
+        transcribed_text = " ".join(transcribed_texts)
+        progress(1.0, desc="처리 완료")
+        return transcribed_text
+    except Exception as e:
+        error_msg = f"음성 처리 중 오류가 발생했습니다: {str(e)}"
+        print(f"상세 오류: {str(e)}")
+        return error_msg
+# CSS 스타일
+css = """
+footer { visibility: hidden; }
+.progress-bar { height: 15px; border-radius: 5px; }
+.container { max-width: 1200px; margin: auto; padding: 20px; }
+.output-text { font-size: 16px; line-height: 1.5; }
+.status-display {
+    background: #f0f0f0;
+    padding: 10px;
+    border-radius: 5px;
+    margin: 10px 0;
+}
+"""
+# 파일 업로드 인터페이스
+file_transcribe = gr.Interface(
+    fn=transcribe_audio,
+    inputs=[
+        gr.Audio(
+            sources="upload",
+            type="filepath",
+            label="오디오 파일"
+        ),
+        gr.Radio(
+            choices=["transcribe", "translate"],
+            label="작업 선택",
+            value="transcribe",
+            info="변환: 한글 음성 → 한글 텍스트 | 번역: 영어 음성 → 한글 텍스트"
+        )
+    ],
+    outputs=gr.Textbox(
+        label="변환/번역된 텍스트",
+        lines=10,
+        max_lines=30,
+        placeholder="음성이 텍스트로 변환되어 여기에 표시됩니다...",
+        elem_classes="output-text"
+    ),
+    title="🎤 음성 변환/번역 AI '받아쓰기'(Badassgi)",
+    description="""
+    한글 음성을 텍스트로 변환하거나 영어 음성을 한글로 번역할 수 있습니다.
+    - 변환: 한글 음성 → 한글 텍스트
+    - 번역: 영어 음성 → 한글 텍스트
+    """,
+    examples=[],
+    cache_examples=False,
+    flagging_mode="never"
+)
+# 마이크 녹음 인터페이스
+mic_transcribe = gr.Interface(
+    fn=transcribe_audio,
+    inputs=[
+        gr.Audio(
+            sources="microphone",
+            type="filepath",
+            label="마이크 녹음"
+        ),
+        gr.Radio(
+            choices=["transcribe", "translate"],
+            label="작업 선택",
+            value="transcribe",
+            info="변환: 한글 음성 → 한글 텍스트 | 번역: 영어 음성 → 한글 텍스트"
+        )
+    ],
+    outputs=gr.Textbox(
+        label="변환/번역된 텍스트",
+        lines=10,
+        max_lines=30,
+        elem_classes="output-text"
+    ),
+    title="🎤 음성 변환/번역 AI '받아쓰기'(Badassgi)",
+    description="마이크로 음성을 녹음하여 텍스트로 변환하거나 번역할 수 있습니다.",
+    flagging_mode="never"
+)
+# 메인 애플리케이션
+demo = gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css)
+with demo:
+    gr.TabbedInterface(
+        [file_transcribe, mic_transcribe],
+        ["오디오 파일", "마이크 녹음"]
+    )
+# 애플리케이션 실행
+demo.queue().launch(
+    server_name="0.0.0.0",
+    share=True,
+    debug=True,
+    ssr_mode=False,
+    max_threads=3,
+    show_error=True
+)