|
import spaces |
|
import torch |
|
import gradio as gr |
|
from transformers import pipeline |
|
from huggingface_hub import InferenceClient |
|
import os |
|
|
|
MODEL_NAME = "openai/whisper-large-v3-turbo" |
|
BATCH_SIZE = 8 |
|
FILE_LIMIT_MB = 1000 |
|
|
|
device = 0 if torch.cuda.is_available() else "cpu" |
|
|
|
|
|
pipe = pipeline( |
|
task="automatic-speech-recognition", |
|
model=MODEL_NAME, |
|
chunk_length_s=30, |
|
device=device, |
|
) |
|
|
|
|
|
hf_client = InferenceClient( |
|
"CohereForAI/c4ai-command-r-plus-08-2024", |
|
token=os.getenv("HF_TOKEN") |
|
) |
|
|
|
@spaces.GPU |
|
def transcribe_summarize(audio_input, task): |
|
if audio_input is None: |
|
raise gr.Error("μ€λμ€ νμΌμ΄ μ μΆλμ§ μμμ΅λλ€!") |
|
|
|
|
|
result = pipe( |
|
audio_input, |
|
batch_size=BATCH_SIZE, |
|
generate_kwargs={"task": task}, |
|
return_timestamps=True |
|
) |
|
transcribed_text = result["text"] |
|
|
|
|
|
try: |
|
|
|
prompt = f"""μλ ν
μ€νΈλ₯Ό κ°λ¨ν μμ½ν΄μ£ΌμΈμ: |
|
|
|
ν
μ€νΈ: {transcribed_text} |
|
|
|
μμ½:""" |
|
|
|
|
|
response = hf_client.text_generation( |
|
model="CohereForAI/c4ai-command-r-plus-08-2024", |
|
prompt=prompt, |
|
max_new_tokens=150, |
|
temperature=0.3, |
|
top_p=0.9, |
|
repetition_penalty=1.2, |
|
stop_sequences=["\n", "ν
μ€νΈ:", "μμ½:"] |
|
) |
|
|
|
|
|
if isinstance(response, str): |
|
summary_text = response |
|
else: |
|
summary_text = response.generated_text if hasattr(response, 'generated_text') else str(response) |
|
|
|
|
|
if "μμ½:" in summary_text: |
|
summary_text = summary_text.split("μμ½:")[1].strip() |
|
|
|
if not summary_text: |
|
summary_text = "μμ½μ μμ±ν μ μμ΅λλ€." |
|
|
|
except Exception as e: |
|
print(f"μμ½ μμ± μ€ μ€λ₯ λ°μ: {str(e)}") |
|
summary_text = "μμ½μ μμ±ν μ μμ΅λλ€. μ μ ν λ€μ μλν΄μ£ΌμΈμ." |
|
|
|
print(f"λ³νλ ν
μ€νΈ: {transcribed_text}") |
|
print(f"μμ±λ μμ½: {summary_text}") |
|
|
|
return [transcribed_text, summary_text] |
|
|
|
|
|
css = """ |
|
footer { visibility: hidden; } |
|
""" |
|
|
|
|
|
file_transcribe = gr.Interface( |
|
fn=transcribe_summarize, |
|
inputs=[ |
|
gr.Audio(sources="upload", type="filepath", label="μ€λμ€ νμΌ"), |
|
gr.Radio( |
|
choices=["transcribe", "translate"], |
|
label="μμ
", |
|
value="transcribe" |
|
), |
|
], |
|
outputs=[ |
|
gr.Textbox(label="λ³νλ ν
μ€νΈ", lines=5), |
|
gr.Textbox(label="μμ½", lines=3) |
|
], |
|
title="λ°μμ°κΈ° AI: μμ±μ ν
μ€νΈλ‘ λ³ννκ³ μμ½νκΈ°", |
|
flagging_mode="never" |
|
) |
|
|
|
|
|
mic_transcribe = gr.Interface( |
|
fn=transcribe_summarize, |
|
inputs=[ |
|
gr.Audio(sources="microphone", type="filepath"), |
|
gr.Radio( |
|
choices=["transcribe", "translate"], |
|
label="μμ
", |
|
value="transcribe" |
|
), |
|
], |
|
outputs=[ |
|
gr.Textbox(label="λ³νλ ν
μ€νΈ", lines=5), |
|
gr.Textbox(label="μμ½", lines=3) |
|
], |
|
title="λ°μμ°κΈ° AI: μμ±μ ν
μ€νΈλ‘ λ³ννκ³ μμ½νκΈ°", |
|
flagging_mode="never", |
|
css=css |
|
) |
|
|
|
|
|
demo = gr.Blocks(theme="Nymbo/Nymbo_Theme",css=css) |
|
with demo: |
|
gr.TabbedInterface( |
|
[file_transcribe, mic_transcribe], |
|
["μ€λμ€ νμΌ", "λ§μ΄ν¬"] |
|
) |
|
|
|
|
|
demo.queue().launch(ssr_mode=False) |