Update app.py
Browse files
app.py
CHANGED
@@ -1,10 +1,8 @@
|
|
1 |
import spaces
|
2 |
-
import torch
|
3 |
-
|
4 |
import gradio as gr
|
5 |
from transformers import pipeline
|
6 |
from huggingface_hub import InferenceClient
|
7 |
-
import tempfile
|
8 |
import os
|
9 |
|
10 |
MODEL_NAME = "openai/whisper-large-v3-turbo"
|
@@ -13,6 +11,7 @@ FILE_LIMIT_MB = 1000
|
|
13 |
|
14 |
device = 0 if torch.cuda.is_available() else "cpu"
|
15 |
|
|
|
16 |
pipe = pipeline(
|
17 |
task="automatic-speech-recognition",
|
18 |
model=MODEL_NAME,
|
@@ -20,64 +19,88 @@ pipe = pipeline(
|
|
20 |
device=device,
|
21 |
)
|
22 |
|
23 |
-
# Hugging Face
|
24 |
-
hf_client = InferenceClient(
|
|
|
|
|
|
|
25 |
|
26 |
@spaces.GPU
|
27 |
-
def transcribe_summarize(
|
28 |
-
|
|
|
29 |
raise gr.Error("μ€λμ€ νμΌμ΄ μ μΆλμ§ μμμ΅λλ€! μμ²μ μ μΆνκΈ° μ μ μ€λμ€ νμΌμ μ
λ‘λνκ±°λ λ
Ήμν΄ μ£ΌμΈμ.")
|
30 |
-
|
31 |
# μμ±μ ν
μ€νΈλ‘ λ³ν
|
32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
|
34 |
-
#
|
35 |
try:
|
36 |
-
summary = hf_client.summarization(
|
37 |
-
summary_text = summary
|
38 |
except Exception as e:
|
39 |
-
summary_text = f"μμ½ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {e}"
|
40 |
|
41 |
-
|
42 |
-
|
43 |
-
"summary": summary_text
|
44 |
-
}
|
45 |
-
|
46 |
|
|
|
47 |
css = """
|
48 |
-
footer {
|
49 |
-
visibility: hidden;
|
50 |
-
}
|
51 |
"""
|
52 |
|
|
|
53 |
file_transcribe = gr.Interface(
|
54 |
fn=transcribe_summarize,
|
55 |
inputs=[
|
56 |
gr.Audio(sources="upload", type="filepath", label="μ€λμ€ νμΌ"),
|
57 |
-
gr.Radio(
|
|
|
|
|
|
|
|
|
58 |
],
|
59 |
-
outputs=[
|
60 |
-
|
61 |
-
|
|
|
|
|
|
|
62 |
)
|
63 |
|
64 |
-
|
|
|
65 |
fn=transcribe_summarize,
|
66 |
inputs=[
|
67 |
gr.Audio(sources="microphone", type="filepath"),
|
68 |
-
gr.Radio(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
],
|
70 |
-
|
71 |
-
title="λ°μμ°κΈ° AI: μμ±μ ν
μ€νΈ λ³ν, μμ½ μλ μμ±",
|
72 |
flagging_mode="never",
|
|
|
73 |
)
|
74 |
|
75 |
-
#
|
76 |
demo = gr.Blocks(theme="Nymbo/Nymbo_Theme")
|
77 |
-
|
78 |
-
# ν μμλ₯Ό "μ€λμ€ νμΌ"μ΄ λ¨Όμ , "λ§μ΄ν¬"κ° λ€μ μ€λλ‘ μ€μ
|
79 |
with demo:
|
80 |
-
gr.TabbedInterface(
|
81 |
-
|
82 |
-
|
|
|
83 |
|
|
|
|
|
|
1 |
import spaces
|
2 |
+
import torch
|
|
|
3 |
import gradio as gr
|
4 |
from transformers import pipeline
|
5 |
from huggingface_hub import InferenceClient
|
|
|
6 |
import os
|
7 |
|
8 |
MODEL_NAME = "openai/whisper-large-v3-turbo"
|
|
|
11 |
|
12 |
device = 0 if torch.cuda.is_available() else "cpu"
|
13 |
|
14 |
+
# Whisper νμ΄νλΌμΈ μ΄κΈ°ν
|
15 |
pipe = pipeline(
|
16 |
task="automatic-speech-recognition",
|
17 |
model=MODEL_NAME,
|
|
|
19 |
device=device,
|
20 |
)
|
21 |
|
22 |
+
# Hugging Face μΆλ‘ ν΄λΌμ΄μΈνΈ μ€μ
|
23 |
+
hf_client = InferenceClient(
|
24 |
+
"CohereForAI/c4ai-command-r-plus-08-2024",
|
25 |
+
token=os.getenv("HF_TOKEN")
|
26 |
+
)
|
27 |
|
28 |
@spaces.GPU
|
29 |
+
def transcribe_summarize(audio_input, task):
|
30 |
+
# μ
λ ₯ κ²μ¦
|
31 |
+
if audio_input is None:
|
32 |
raise gr.Error("μ€λμ€ νμΌμ΄ μ μΆλμ§ μμμ΅λλ€! μμ²μ μ μΆνκΈ° μ μ μ€λμ€ νμΌμ μ
λ‘λνκ±°λ λ
Ήμν΄ μ£ΌμΈμ.")
|
33 |
+
|
34 |
# μμ±μ ν
μ€νΈλ‘ λ³ν
|
35 |
+
result = pipe(
|
36 |
+
audio_input,
|
37 |
+
batch_size=BATCH_SIZE,
|
38 |
+
generate_kwargs={"task": task},
|
39 |
+
return_timestamps=True
|
40 |
+
)
|
41 |
+
transcribed_text = result["text"]
|
42 |
|
43 |
+
# ν
μ€νΈ μμ½
|
44 |
try:
|
45 |
+
summary = hf_client.summarization(transcribed_text)
|
46 |
+
summary_text = summary.get("summary_text", "μμ½ν μ μμ΅λλ€.")
|
47 |
except Exception as e:
|
48 |
+
summary_text = f"μμ½ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}"
|
49 |
|
50 |
+
# κ²°κ³Όλ₯Ό 리μ€νΈλ‘ λ°ν
|
51 |
+
return [transcribed_text, summary_text]
|
|
|
|
|
|
|
52 |
|
53 |
+
# CSS μ€νμΌ
|
54 |
css = """
|
55 |
+
footer { visibility: hidden; }
|
|
|
|
|
56 |
"""
|
57 |
|
58 |
+
# νμΌ μ
λ‘λ μΈν°νμ΄μ€
|
59 |
file_transcribe = gr.Interface(
|
60 |
fn=transcribe_summarize,
|
61 |
inputs=[
|
62 |
gr.Audio(sources="upload", type="filepath", label="μ€λμ€ νμΌ"),
|
63 |
+
gr.Radio(
|
64 |
+
choices=["transcribe", "translate"],
|
65 |
+
label="μμ
",
|
66 |
+
value="transcribe"
|
67 |
+
),
|
68 |
],
|
69 |
+
outputs=[
|
70 |
+
gr.Textbox(label="λ³νλ ν
μ€νΈ"),
|
71 |
+
gr.Textbox(label="μμ½")
|
72 |
+
],
|
73 |
+
title="λ°μμ°κΈ° AI: μμ±μ ν
μ€νΈλ‘ λ³ννκ³ μμ½νκΈ°",
|
74 |
+
flagging_mode="never"
|
75 |
)
|
76 |
|
77 |
+
# λ§μ΄ν¬ λ
Ήμ μΈν°νμ΄μ€
|
78 |
+
mic_transcribe = gr.Interface(
|
79 |
fn=transcribe_summarize,
|
80 |
inputs=[
|
81 |
gr.Audio(sources="microphone", type="filepath"),
|
82 |
+
gr.Radio(
|
83 |
+
choices=["transcribe", "translate"],
|
84 |
+
label="μμ
",
|
85 |
+
value="transcribe"
|
86 |
+
),
|
87 |
+
],
|
88 |
+
outputs=[
|
89 |
+
gr.Textbox(label="λ³νλ ν
μ€νΈ"),
|
90 |
+
gr.Textbox(label="μμ½")
|
91 |
],
|
92 |
+
title="λ°μμ°κΈ° AI: μμ±μ ν
μ€νΈλ‘ λ³ννκ³ μμ½νκΈ°",
|
|
|
93 |
flagging_mode="never",
|
94 |
+
css=css
|
95 |
)
|
96 |
|
97 |
+
# λ©μΈ μ ν리μΌμ΄μ
|
98 |
demo = gr.Blocks(theme="Nymbo/Nymbo_Theme")
|
|
|
|
|
99 |
with demo:
|
100 |
+
gr.TabbedInterface(
|
101 |
+
[file_transcribe, mic_transcribe],
|
102 |
+
["μ€λμ€ νμΌ", "λ§μ΄ν¬"]
|
103 |
+
)
|
104 |
|
105 |
+
# μ ν리μΌμ΄μ
μ€ν
|
106 |
+
demo.queue().launch(ssr_mode=False)
|