File size: 2,915 Bytes
5d52c32 6c226f9 a5099f1 d790c0b 88183ad 6c226f9 2362603 9d6fa91 66efbc3 6c226f9 a5099f1 6c226f9 5d52c32 a5099f1 3c0cd8e b74c419 6c226f9 a5099f1 3c0cd8e a5099f1 6c226f9 ae3ef7d 0fdae18 a5099f1 6c226f9 0fdae18 b74c419 3c0cd8e a5099f1 3c0cd8e 0fdae18 a5099f1 3c0cd8e 0fdae18 b74c419 6c226f9 a5099f1 6c226f9 0fdae18 b74c419 0520598 6c226f9 b74c419 6c226f9 47407ef |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
import spaces
import torch
import gradio as gr
from transformers import pipeline
from huggingface_hub import InferenceClient
import tempfile
import os
MODEL_NAME = "openai/whisper-large-v3-turbo"
BATCH_SIZE = 8
FILE_LIMIT_MB = 1000
device = 0 if torch.cuda.is_available() else "cpu"
pipe = pipeline(
task="automatic-speech-recognition",
model=MODEL_NAME,
chunk_length_s=30,
device=device,
)
# Hugging Face InferenceClient μ¬μ©
hf_client = InferenceClient("CohereForAI/c4ai-command-r-plus-08-2024", token=os.getenv("HF_TOKEN"))
@spaces.GPU
def transcribe_summarize_and_blog(inputs, task):
if inputs is None:
raise gr.Error("μ€λμ€ νμΌμ΄ μ μΆλμ§ μμμ΅λλ€! μμ²μ μ μΆνκΈ° μ μ μ€λμ€ νμΌμ μ
λ‘λνκ±°λ λ
Ήμν΄ μ£ΌμΈμ.")
# μμ±μ ν
μ€νΈλ‘ λ³ν
text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
# λ³νλ ν
μ€νΈ μμ½ μμ²
try:
summary = hf_client.summarization(text)
except Exception as e:
raise gr.Error(f"μμ½ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {e}")
# λΈλ‘κ·Έ ν¬μ€ν
μμ± μμ²
try:
blog_post = hf_client.text_generation(
prompt=f"λ€μ λ΄μ©μ κΈ°λ°μΌλ‘ λΈλ‘κ·Έ ν¬μ€ν
μ μμ±ν΄ μ£ΌμΈμ:\n{text}",
max_length=500,
temperature=0.7
)
except Exception as e:
raise gr.Error(f"λΈλ‘κ·Έ κΈ μμ± μ€ μ€λ₯κ° λ°μνμ΅λλ€: {e}")
return {
"transcribed_text": text,
"summary": summary["summary_text"],
"blog_post": blog_post["generated_text"]
}
css = """
footer {
visibility: hidden;
}
"""
file_transcribe = gr.Interface(
fn=transcribe_summarize_and_blog,
inputs=[
gr.Audio(sources="upload", type="filepath", label="μ€λμ€ νμΌ"),
gr.Radio(["transcribe", "translate"], label="μμ
", value="transcribe"),
],
outputs=["text", "text", "text"], # λ³νλ ν
μ€νΈ, μμ½, λΈλ‘κ·Έ κΈ μΆλ ₯
title="λ°μμ°κΈ° AI: μμ±μ ν
μ€νΈ λ³ν, μμ½ λ° λΈλ‘κ·Έ ν¬μ€ν
μλ μμ±",
flagging_mode="never",
)
mf_transcribe = gr.Interface(css=css,
fn=transcribe_summarize_and_blog,
inputs=[
gr.Audio(sources="microphone", type="filepath"),
gr.Radio(["transcribe", "translate"], label="μμ
", value="transcribe"),
],
outputs=["text", "text", "text"], # λ³νλ ν
μ€νΈ, μμ½, λΈλ‘κ·Έ κΈ μΆλ ₯
title="λ°μμ°κΈ° AI: μμ±μ ν
μ€νΈ λ³ν, μμ½ λ° λΈλ‘κ·Έ ν¬μ€ν
μλ μμ±",
flagging_mode="never",
)
# demo λ³μλ₯Ό Gradio Blocks 컨ν
μ΄λλ‘ μ μ
demo = gr.Blocks(theme="Nymbo/Nymbo_Theme")
with demo:
gr.TabbedInterface([mf_transcribe, file_transcribe], ["λ§μ΄ν¬", "μ€λμ€ νμΌ"])
demo.queue().launch(ssr_mode=False)
|