badassgi / app.py
openfree's picture
Update app.py
d82704d verified
raw
history blame
3.92 kB
import spaces
import torch
import gradio as gr
from transformers import pipeline
from huggingface_hub import InferenceClient
import os
MODEL_NAME = "openai/whisper-large-v3-turbo"
BATCH_SIZE = 8
FILE_LIMIT_MB = 1000
device = 0 if torch.cuda.is_available() else "cpu"
# Whisper νŒŒμ΄ν”„λΌμΈ μ΄ˆκΈ°ν™”
pipe = pipeline(
task="automatic-speech-recognition",
model=MODEL_NAME,
chunk_length_s=30,
device=device,
)
# Hugging Face μΆ”λ‘  ν΄λΌμ΄μ–ΈνŠΈ μ„€μ •
hf_client = InferenceClient(
"CohereForAI/c4ai-command-r-plus-08-2024",
token=os.getenv("HF_TOKEN")
)
@spaces.GPU
def transcribe_summarize(audio_input, task):
if audio_input is None:
raise gr.Error("μ˜€λ””μ˜€ 파일이 μ œμΆœλ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€!")
# μŒμ„±μ„ ν…μŠ€νŠΈλ‘œ λ³€ν™˜
result = pipe(
audio_input,
batch_size=BATCH_SIZE,
generate_kwargs={"task": task},
return_timestamps=True
)
transcribed_text = result["text"]
# ν…μŠ€νŠΈ μš”μ•½ (μˆ˜μ •λœ λΆ€λΆ„)
try:
# μš”μ•½μš© ν”„λ‘¬ν”„νŠΈ 생성
prompt = f"""μ•„λž˜ ν…μŠ€νŠΈλ₯Ό κ°„λ‹¨νžˆ μš”μ•½ν•΄μ£Όμ„Έμš”:
ν…μŠ€νŠΈ: {transcribed_text}
μš”μ•½:"""
# API 호좜
response = hf_client.text_generation(
model="CohereForAI/c4ai-command-r-plus-08-2024",
prompt=prompt,
max_new_tokens=150,
temperature=0.3,
top_p=0.9,
repetition_penalty=1.2,
stop_sequences=["\n", "ν…μŠ€νŠΈ:", "μš”μ•½:"]
)
# API 응닡 처리 (μˆ˜μ •λœ λΆ€λΆ„)
if isinstance(response, str):
summary_text = response
else:
summary_text = response.generated_text if hasattr(response, 'generated_text') else str(response)
# ν”„λ‘¬ν”„νŠΈ λΆ€λΆ„ 제거
if "μš”μ•½:" in summary_text:
summary_text = summary_text.split("μš”μ•½:")[1].strip()
if not summary_text:
summary_text = "μš”μ•½μ„ 생성할 수 μ—†μŠ΅λ‹ˆλ‹€."
except Exception as e:
print(f"μš”μ•½ 생성 쀑 였λ₯˜ λ°œμƒ: {str(e)}") # λ””λ²„κΉ…μš© 둜그
summary_text = "μš”μ•½μ„ 생성할 수 μ—†μŠ΅λ‹ˆλ‹€. μž μ‹œ ν›„ λ‹€μ‹œ μ‹œλ„ν•΄μ£Όμ„Έμš”."
print(f"λ³€ν™˜λœ ν…μŠ€νŠΈ: {transcribed_text}") # λ””λ²„κΉ…μš© 둜그
print(f"μƒμ„±λœ μš”μ•½: {summary_text}") # λ””λ²„κΉ…μš© 둜그
return [transcribed_text, summary_text]
# CSS μŠ€νƒ€μΌ
css = """
footer { visibility: hidden; }
"""
# 파일 μ—…λ‘œλ“œ μΈν„°νŽ˜μ΄μŠ€
file_transcribe = gr.Interface(
fn=transcribe_summarize,
inputs=[
gr.Audio(sources="upload", type="filepath", label="μ˜€λ””μ˜€ 파일"),
gr.Radio(
choices=["transcribe", "translate"],
label="μž‘μ—…",
value="transcribe"
),
],
outputs=[
gr.Textbox(label="λ³€ν™˜λœ ν…μŠ€νŠΈ", lines=5),
gr.Textbox(label="μš”μ•½", lines=3)
],
title="λ°›μ•„μ“°κΈ° AI: μŒμ„±μ„ ν…μŠ€νŠΈλ‘œ λ³€ν™˜ν•˜κ³  μš”μ•½ν•˜κΈ°",
flagging_mode="never"
)
# 마이크 λ…ΉμŒ μΈν„°νŽ˜μ΄μŠ€
mic_transcribe = gr.Interface(
fn=transcribe_summarize,
inputs=[
gr.Audio(sources="microphone", type="filepath"),
gr.Radio(
choices=["transcribe", "translate"],
label="μž‘μ—…",
value="transcribe"
),
],
outputs=[
gr.Textbox(label="λ³€ν™˜λœ ν…μŠ€νŠΈ", lines=5),
gr.Textbox(label="μš”μ•½", lines=3)
],
title="λ°›μ•„μ“°κΈ° AI: μŒμ„±μ„ ν…μŠ€νŠΈλ‘œ λ³€ν™˜ν•˜κ³  μš”μ•½ν•˜κΈ°",
flagging_mode="never",
css=css
)
# 메인 μ• ν”Œλ¦¬μΌ€μ΄μ…˜
demo = gr.Blocks(theme="Nymbo/Nymbo_Theme",css=css)
with demo:
gr.TabbedInterface(
[file_transcribe, mic_transcribe],
["μ˜€λ””μ˜€ 파일", "마이크"]
)
# μ• ν”Œλ¦¬μΌ€μ΄μ…˜ μ‹€ν–‰
demo.queue().launch(ssr_mode=False)