badassgi

Running

App Files Files Community

openfree commited on Oct 22, 2024

Commit

d883298

verified ·

1 Parent(s): 629129d

Update app.py

Browse files

Files changed (1) hide show

app.py +152 -118

app.py CHANGED Viewed

@@ -4,9 +4,7 @@ import gradio as gr
 from transformers import pipeline
 from huggingface_hub import InferenceClient
 import os
-import json
 from datetime import datetime
-import time
 MODEL_NAME = "openai/whisper-large-v3-turbo"
 BATCH_SIZE = 8
@@ -14,10 +12,6 @@ FILE_LIMIT_MB = 1000
 device = 0 if torch.cuda.is_available() else "cpu"
-# 파일 저장 경로 설정
-HISTORY_DIR = "transcription_history"
-os.makedirs(HISTORY_DIR, exist_ok=True)
 # Whisper 파이프라인 초기화
 pipe = pipeline(
     task="automatic-speech-recognition",
@@ -32,151 +26,191 @@ hf_client = InferenceClient(
     token=os.getenv("HF_TOKEN")
 )
-def save_transcription(transcribed_text, summary_text):
-    """변환 결과를 JSON 파일로 저장"""
-    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-    filename = f"{HISTORY_DIR}/transcription_{timestamp}.json"
-    data = {
-        "timestamp": timestamp,
-        "transcribed_text": transcribed_text,
-        "summary": summary_text
-    }
-    with open(filename, "w", encoding="utf-8") as f:
-        json.dump(data, f, ensure_ascii=False, indent=2)
-    return filename
-def process_long_audio(audio_input, chunk_duration=30):
-    """긴 오디오 파일을 청크로 나누어 처리"""
-    # 오디오 처리 로직 구현
-    pass
-def detect_language(text):
-    """텍스트의 언어 감지"""
-    # 언어 감지 로직 구현
-    pass
 def get_word_count(text):
     """텍스트의 단어 수 계산"""
     return len(text.split())
-def get_speaking_time(audio_duration):
-    """음성 길이를 시:분:초 형식으로 변환"""
-    return time.strftime("%H:%M:%S", time.gmtime(audio_duration))
 @spaces.GPU
-def transcribe_summarize(audio_input, task, save_result=False, enable_translation=False):
     if audio_input is None:
         raise gr.Error("오디오 파일이 제출되지 않았습니다!")
-    start_time = time.time()
-    # 음성을 텍스트로 변환
-    result = pipe(
-        audio_input,
-        batch_size=BATCH_SIZE,
-        generate_kwargs={"task": task},
-        return_timestamps=True
-    )
-    transcribed_text = result["text"]
-    # 분석 정보 수집
-    stats = {
-        "word_count": get_word_count(transcribed_text),
-        "processing_time": f"{time.time() - start_time:.2f}초",
-        "audio_duration": get_speaking_time(result.get("duration", 0)),
-        "language": detect_language(transcribed_text)
-    }
-    # 텍스트 요약
     try:
-        prompt = f"""아래 텍스트를 간단히 요약해주세요:
-텍스트: {transcribed_text}
-요약:"""
-        response = hf_client.text_generation(
-            model="CohereForAI/c4ai-command-r-plus-08-2024",
-            prompt=prompt,
-            max_new_tokens=150,
-            temperature=0.3,
-            top_p=0.9,
-            repetition_penalty=1.2,
-            stop_sequences=["\n", "텍스트:", "요약:"]
         )
-        if isinstance(response, str):
-            summary_text = response
-        else:
-            summary_text = response.generated_text if hasattr(response, 'generated_text') else str(response)
-        if "요약:" in summary_text:
-            summary_text = summary_text.split("요약:")[1].strip()
-        if not summary_text:
-            summary_text = "요약을 생성할 수 없습니다."
-    except Exception as e:
-        print(f"요약 생성 중 오류 발생: {str(e)}")
-        summary_text = "요약을 생성할 수 없습니다. 잠시 후 다시 시도해주세요."
-    # 결과 저장
-    if save_result:
-        saved_file = save_transcription(transcribed_text, summary_text)
-        print(f"결과가 저장되었습니다: {saved_file}")
-    # 번역 기능 (옵션)
-    translated_text = ""
-    if enable_translation and task != "translate":
-        try:
-            # 번역 로직 구현
-            pass
         except Exception as e:
-            translated_text = "번역 중 오류가 발생했습니다."
-    return [
-        transcribed_text,
-        summary_text,
-        gr.update(value=f"""
         📊 분석 정보:
-        - 단어 수: {stats['word_count']}개
-        - 처리 시간: {stats['processing_time']}
-        - 음성 길이: {stats['audio_duration']}
-        - 감지된 언어: {stats['language']}
-        """),
-        translated_text if enable_translation else None
-    ]
 # CSS 스타일
 css = """
 footer { visibility: hidden; }
-.gradio-container { max-width: 1200px; margin: auto; }
-.audio-stats { background-color: #f0f0f0; padding: 10px; border-radius: 5px; }
 """
 # 파일 업로드 인터페이스
 file_transcribe = gr.Interface(
     fn=transcribe_summarize,
     inputs=[
-        gr.Audio(sources="upload", type="filepath", label="오디오 파일"),
         gr.Radio(
             choices=["transcribe", "translate"],
-            label="작업",
             value="transcribe"
-        ),
-        gr.Checkbox(label="결과 저장하기", value=False),
-        gr.Checkbox(label="번역 활성화", value=False)
     ],
     outputs=[
-        gr.Textbox(label="변환된 텍스트", lines=5),
-        gr.Textbox(label="요약", lines=3),
-        gr.Textbox(label="분석 정보", lines=4),
-        gr.Textbox(label="번역 결과", lines=5, visible=False)
     ],
-    title="받아쓰기 AI: 음성을 텍스트로 변환하고 요약하기",
-    description="음성 파일을 업로드하거나 직접 녹음하여 텍스트로 변환하고 요약할 수 있습니다.",
     flagging_mode="never"
 )
-# 마이크 녹음 인터페이스와 메인 애플리케이션 코드는 동일하게 유지...

 from transformers import pipeline
 from huggingface_hub import InferenceClient
 import os
 from datetime import datetime
 MODEL_NAME = "openai/whisper-large-v3-turbo"
 BATCH_SIZE = 8
 device = 0 if torch.cuda.is_available() else "cpu"
 # Whisper 파이프라인 초기화
 pipe = pipeline(
     task="automatic-speech-recognition",
     token=os.getenv("HF_TOKEN")
 )
 def get_word_count(text):
     """텍스트의 단어 수 계산"""
+    if not text:
+        return 0
     return len(text.split())
+def format_duration(seconds):
+    """초 단위 시간을 mm:ss 형식으로 변환"""
+    try:
+        minutes = int(seconds // 60)
+        seconds = int(seconds % 60)
+        return f"{minutes:02d}:{seconds:02d}"
+    except:
+        return "00:00"
 @spaces.GPU
+def transcribe_summarize(audio_input, task):
     if audio_input is None:
         raise gr.Error("오디오 파일이 제출되지 않았습니다!")
     try:
+        # 음성을 텍스트로 변환
+        result = pipe(
+            audio_input,
+            batch_size=BATCH_SIZE,
+            generate_kwargs={"task": task},
+            return_timestamps=True
         )
+        transcribed_text = result["text"]
+        # 기본 분석 정보
+        word_count = get_word_count(transcribed_text)
+        duration = format_duration(result.get("duration", 0))
+        # 텍스트 요약
+        try:
+            prompt = (
+                "다음 텍스트를 한국어로 간단히 요약해주세요:\n\n"
+                f"텍스트: {transcribed_text}\n"
+                "요약:"
+            )
+            response = hf_client.text_generation(
+                prompt=prompt,
+                max_new_tokens=150,
+                temperature=0.3,
+                top_p=0.9,
+                repetition_penalty=1.2
+            )
+            summary_text = str(response)
+            if "요약:" in summary_text:
+                summary_text = summary_text.split("요약:")[1].strip()
         except Exception as e:
+            print(f"요약 생성 중 오류: {str(e)}")
+            summary_text = "요약을 생성할 수 없습니다."
+        # 분석 정보 포맷팅
+        stats = f"""
         📊 분석 정보:
+        - 단어 수: {word_count}개
+        - 음성 길이: {duration}
+        - 생성 시간: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
+        """
+        return [transcribed_text, summary_text, stats]
+    except Exception as e:
+        error_msg = f"처리 중 오류가 발생했습니다: {str(e)}"
+        return ["", error_msg, ""]
 # CSS 스타일
 css = """
 footer { visibility: hidden; }
+.gradio-container {
+    max-width: 1000px;
+    margin: auto;
+    padding: 20px;
+}
+.output-stats {
+    background-color: #f5f5f5;
+    padding: 10px;
+    border-radius: 5px;
+    font-family: monospace;
+}
 """
 # 파일 업로드 인터페이스
 file_transcribe = gr.Interface(
     fn=transcribe_summarize,
     inputs=[
+        gr.Audio(
+            sources="upload",
+            type="filepath",
+            label="오디오 파일"
+        ),
         gr.Radio(
             choices=["transcribe", "translate"],
+            label="작업 선택",
             value="transcribe"
+        )
     ],
     outputs=[
+        gr.Textbox(
+            label="변환된 텍스트",
+            lines=5,
+            placeholder="음성이 텍스트로 변환되어 여기에 표시됩니다..."
+        ),
+        gr.Textbox(
+            label="요약",
+            lines=3,
+            placeholder="텍스트 요약이 여기에 표시됩니다..."
+        ),
+        gr.Textbox(
+            label="분석 정보",
+            lines=4,
+            placeholder="분석 정보가 여기에 표시됩니다..."
+        )
     ],
+    title="🎤 받아쓰기 AI",
+    description="""
+    음성 파일을 업로드하거나 직접 녹음하여 텍스트로 변환하고 요약할 수 있습니다.
+    사용 방법:
+    1. 오디오 파일을 업로드하거나 마이크로 녹음하세요
+    2. 작업 유형을 선택하세요 (변환 또는 번역)
+    3. 변환 버튼을 클릭하세요
+    """,
+    article="developed by Claude",
+    examples=[],
+    cache_examples=False,
     flagging_mode="never"
 )
+# 마이크 녹음 인터페이스
+mic_transcribe = gr.Interface(
+    fn=transcribe_summarize,
+    inputs=[
+        gr.Audio(
+            sources="microphone",
+            type="filepath",
+            label="마이크 녹음"
+        ),
+        gr.Radio(
+            choices=["transcribe", "translate"],
+            label="작업 선택",
+            value="transcribe"
+        )
+    ],
+    outputs=[
+        gr.Textbox(
+            label="변환된 텍스트",
+            lines=5,
+            placeholder="음성이 텍스트로 변환되어 여기에 표시됩니다..."
+        ),
+        gr.Textbox(
+            label="요약",
+            lines=3,
+            placeholder="텍스트 요약이 여기에 표시됩니다..."
+        ),
+        gr.Textbox(
+            label="분석 정보",
+            lines=4,
+            placeholder="분석 정보가 여기에 표시됩니다..."
+        )
+    ],
+    title="🎤 받아쓰기 AI",
+    description="마이크로 음성을 녹음하여 텍스트로 변환하고 요약할 수 있습니다.",
+    flagging_mode="never",
+    css=css
+)
+# 메인 애플리케이션
+demo = gr.Blocks(theme="gradio/soft", css=css)
+with demo:
+    gr.TabbedInterface(
+        [file_transcribe, mic_transcribe],
+        ["오디오 파일", "마이크 녹음"]
+    )
+# 애플리케이션 실행
+demo.queue().launch(
+    share=False,
+    debug=True,
+    show_error=True,
+    ssr_mode=False
+)