badassgi

Running

App Files Files Community

openfree commited on Oct 22, 2024

Commit

629129d

verified ·

1 Parent(s): a957d39

Create app.py

Browse files

Files changed (1) hide show

app.py +182 -0

app.py ADDED Viewed

	@@ -0,0 +1,182 @@

+import spaces
+import torch
+import gradio as gr
+from transformers import pipeline
+from huggingface_hub import InferenceClient
+import os
+import json
+from datetime import datetime
+import time
+MODEL_NAME = "openai/whisper-large-v3-turbo"
+BATCH_SIZE = 8
+FILE_LIMIT_MB = 1000
+device = 0 if torch.cuda.is_available() else "cpu"
+# 파일 저장 경로 설정
+HISTORY_DIR = "transcription_history"
+os.makedirs(HISTORY_DIR, exist_ok=True)
+# Whisper 파이프라인 초기화
+pipe = pipeline(
+    task="automatic-speech-recognition",
+    model=MODEL_NAME,
+    chunk_length_s=30,
+    device=device,
+)
+# Hugging Face 추론 클라이언트 설정
+hf_client = InferenceClient(
+    "CohereForAI/c4ai-command-r-plus-08-2024",
+    token=os.getenv("HF_TOKEN")
+)
+def save_transcription(transcribed_text, summary_text):
+    """변환 결과를 JSON 파일로 저장"""
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    filename = f"{HISTORY_DIR}/transcription_{timestamp}.json"
+    data = {
+        "timestamp": timestamp,
+        "transcribed_text": transcribed_text,
+        "summary": summary_text
+    }
+    with open(filename, "w", encoding="utf-8") as f:
+        json.dump(data, f, ensure_ascii=False, indent=2)
+    return filename
+def process_long_audio(audio_input, chunk_duration=30):
+    """긴 오디오 파일을 청크로 나누어 처리"""
+    # 오디오 처리 로직 구현
+    pass
+def detect_language(text):
+    """텍스트의 언어 감지"""
+    # 언어 감지 로직 구현
+    pass
+def get_word_count(text):
+    """텍스트의 단어 수 계산"""
+    return len(text.split())
+def get_speaking_time(audio_duration):
+    """음성 길이를 시:분:초 형식으로 변환"""
+    return time.strftime("%H:%M:%S", time.gmtime(audio_duration))
+@spaces.GPU
+def transcribe_summarize(audio_input, task, save_result=False, enable_translation=False):
+    if audio_input is None:
+        raise gr.Error("오디오 파일이 제출되지 않았습니다!")
+    start_time = time.time()
+    # 음성을 텍스트로 변환
+    result = pipe(
+        audio_input,
+        batch_size=BATCH_SIZE,
+        generate_kwargs={"task": task},
+        return_timestamps=True
+    )
+    transcribed_text = result["text"]
+    # 분석 정보 수집
+    stats = {
+        "word_count": get_word_count(transcribed_text),
+        "processing_time": f"{time.time() - start_time:.2f}초",
+        "audio_duration": get_speaking_time(result.get("duration", 0)),
+        "language": detect_language(transcribed_text)
+    }
+    # 텍스트 요약
+    try:
+        prompt = f"""아래 텍스트를 간단히 요약해주세요:
+텍스트: {transcribed_text}
+요약:"""
+        response = hf_client.text_generation(
+            model="CohereForAI/c4ai-command-r-plus-08-2024",
+            prompt=prompt,
+            max_new_tokens=150,
+            temperature=0.3,
+            top_p=0.9,
+            repetition_penalty=1.2,
+            stop_sequences=["\n", "텍스트:", "요약:"]
+        )
+        if isinstance(response, str):
+            summary_text = response
+        else:
+            summary_text = response.generated_text if hasattr(response, 'generated_text') else str(response)
+        if "요약:" in summary_text:
+            summary_text = summary_text.split("요약:")[1].strip()
+        if not summary_text:
+            summary_text = "요약을 생성할 수 없습니다."
+    except Exception as e:
+        print(f"요약 생성 중 오류 발생: {str(e)}")
+        summary_text = "요약을 생성할 수 없습니다. 잠시 후 다시 시도해주세요."
+    # 결과 저장
+    if save_result:
+        saved_file = save_transcription(transcribed_text, summary_text)
+        print(f"결과가 저장되었습니다: {saved_file}")
+    # 번역 기능 (옵션)
+    translated_text = ""
+    if enable_translation and task != "translate":
+        try:
+            # 번역 로직 구현
+            pass
+        except Exception as e:
+            translated_text = "번역 중 오류가 발생했습니다."
+    return [
+        transcribed_text,
+        summary_text,
+        gr.update(value=f"""
+        📊 분석 정보:
+        - 단어 수: {stats['word_count']}개
+        - 처리 시간: {stats['processing_time']}
+        - 음성 길이: {stats['audio_duration']}
+        - 감지된 언어: {stats['language']}
+        """),
+        translated_text if enable_translation else None
+    ]
+# CSS 스타일
+css = """
+footer { visibility: hidden; }
+.gradio-container { max-width: 1200px; margin: auto; }
+.audio-stats { background-color: #f0f0f0; padding: 10px; border-radius: 5px; }
+"""
+# 파일 업로드 인터페이스
+file_transcribe = gr.Interface(
+    fn=transcribe_summarize,
+    inputs=[
+        gr.Audio(sources="upload", type="filepath", label="오디오 파일"),
+        gr.Radio(
+            choices=["transcribe", "translate"],
+            label="작업",
+            value="transcribe"
+        ),
+        gr.Checkbox(label="결과 저장하기", value=False),
+        gr.Checkbox(label="번역 활성화", value=False)
+    ],
+    outputs=[
+        gr.Textbox(label="변환된 텍스트", lines=5),
+        gr.Textbox(label="요약", lines=3),
+        gr.Textbox(label="분석 정보", lines=4),
+        gr.Textbox(label="번역 결과", lines=5, visible=False)
+    ],
+    title="받아쓰기 AI: 음성을 텍스트로 변환하고 요약하기",
+    description="음성 파일을 업로드하거나 직접 녹음하여 텍스트로 변환하고 요약할 수 있습니다.",
+    flagging_mode="never"
+)
+# 마이크 녹음 인터페이스와 메인 애플리케이션 코드는 동일하게 유지...