openfree commited on
Commit
d8083da
Β·
verified Β·
1 Parent(s): 3291a15

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -35
app.py CHANGED
@@ -1,10 +1,8 @@
1
  import spaces
2
- import torch
3
-
4
  import gradio as gr
5
  from transformers import pipeline
6
  from huggingface_hub import InferenceClient
7
- import tempfile
8
  import os
9
 
10
  MODEL_NAME = "openai/whisper-large-v3-turbo"
@@ -13,6 +11,7 @@ FILE_LIMIT_MB = 1000
13
 
14
  device = 0 if torch.cuda.is_available() else "cpu"
15
 
 
16
  pipe = pipeline(
17
  task="automatic-speech-recognition",
18
  model=MODEL_NAME,
@@ -20,64 +19,88 @@ pipe = pipeline(
20
  device=device,
21
  )
22
 
23
- # Hugging Face InferenceClient μ‚¬μš©
24
- hf_client = InferenceClient("CohereForAI/c4ai-command-r-plus-08-2024", token=os.getenv("HF_TOKEN"))
 
 
 
25
 
26
  @spaces.GPU
27
- def transcribe_summarize(inputs, task):
28
- if inputs is None:
 
29
  raise gr.Error("μ˜€λ””μ˜€ 파일이 μ œμΆœλ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€! μš”μ²­μ„ μ œμΆœν•˜κΈ° 전에 μ˜€λ””μ˜€ νŒŒμΌμ„ μ—…λ‘œλ“œν•˜κ±°λ‚˜ λ…ΉμŒν•΄ μ£Όμ„Έμš”.")
30
-
31
  # μŒμ„±μ„ ν…μŠ€νŠΈλ‘œ λ³€ν™˜
32
- text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
 
 
 
 
 
 
33
 
34
- # λ³€ν™˜λœ ν…μŠ€νŠΈ μš”μ•½ μš”μ²­
35
  try:
36
- summary = hf_client.summarization(text)
37
- summary_text = summary["summary_text"] if summary and "summary_text" in summary else "μš”μ•½ν•  수 μ—†μŠ΅λ‹ˆλ‹€."
38
  except Exception as e:
39
- summary_text = f"μš”μ•½ 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {e}"
40
 
41
- return {
42
- "transcribed_text": text,
43
- "summary": summary_text
44
- }
45
-
46
 
 
47
  css = """
48
- footer {
49
- visibility: hidden;
50
- }
51
  """
52
 
 
53
  file_transcribe = gr.Interface(
54
  fn=transcribe_summarize,
55
  inputs=[
56
  gr.Audio(sources="upload", type="filepath", label="μ˜€λ””μ˜€ 파일"),
57
- gr.Radio(["transcribe", "translate"], label="μž‘μ—…", value="transcribe"),
 
 
 
 
58
  ],
59
- outputs=["text", "text"], # λ³€ν™˜λœ ν…μŠ€νŠΈ, μš”μ•½ 좜λ ₯
60
- title="λ°›μ•„μ“°κΈ° AI: μŒμ„±μ„ ν…μŠ€νŠΈ λ³€ν™˜, μš”μ•½ μžλ™ 생성",
61
- flagging_mode="never",
 
 
 
62
  )
63
 
64
- mf_transcribe = gr.Interface(css=css,
 
65
  fn=transcribe_summarize,
66
  inputs=[
67
  gr.Audio(sources="microphone", type="filepath"),
68
- gr.Radio(["transcribe", "translate"], label="μž‘μ—…", value="transcribe"),
 
 
 
 
 
 
 
 
69
  ],
70
- outputs=["text", "text"], # λ³€ν™˜λœ ν…μŠ€νŠΈ, μš”μ•½ 좜λ ₯
71
- title="λ°›μ•„μ“°κΈ° AI: μŒμ„±μ„ ν…μŠ€νŠΈ λ³€ν™˜, μš”μ•½ μžλ™ 생성",
72
  flagging_mode="never",
 
73
  )
74
 
75
- # demo λ³€μˆ˜λ₯Ό Gradio Blocks μ»¨ν…Œμ΄λ„ˆλ‘œ μ •μ˜
76
  demo = gr.Blocks(theme="Nymbo/Nymbo_Theme")
77
-
78
- # νƒ­ μˆœμ„œλ₯Ό "μ˜€λ””μ˜€ 파일"이 λ¨Όμ €, "마이크"κ°€ 뒀에 μ˜€λ„λ‘ μ„€μ •
79
  with demo:
80
- gr.TabbedInterface([file_transcribe, mf_transcribe], ["μ˜€λ””μ˜€ 파일", "마이크"])
81
-
82
- demo.queue().launch(ssr_mode=False)
 
83
 
 
 
 
1
  import spaces
2
+ import torch
 
3
  import gradio as gr
4
  from transformers import pipeline
5
  from huggingface_hub import InferenceClient
 
6
  import os
7
 
8
  MODEL_NAME = "openai/whisper-large-v3-turbo"
 
11
 
12
  device = 0 if torch.cuda.is_available() else "cpu"
13
 
14
+ # Whisper νŒŒμ΄ν”„λΌμΈ μ΄ˆκΈ°ν™”
15
  pipe = pipeline(
16
  task="automatic-speech-recognition",
17
  model=MODEL_NAME,
 
19
  device=device,
20
  )
21
 
22
+ # Hugging Face μΆ”λ‘  ν΄λΌμ΄μ–ΈνŠΈ μ„€μ •
23
+ hf_client = InferenceClient(
24
+ "CohereForAI/c4ai-command-r-plus-08-2024",
25
+ token=os.getenv("HF_TOKEN")
26
+ )
27
 
28
  @spaces.GPU
29
+ def transcribe_summarize(audio_input, task):
30
+ # μž…λ ₯ 검증
31
+ if audio_input is None:
32
  raise gr.Error("μ˜€λ””μ˜€ 파일이 μ œμΆœλ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€! μš”μ²­μ„ μ œμΆœν•˜κΈ° 전에 μ˜€λ””μ˜€ νŒŒμΌμ„ μ—…λ‘œλ“œν•˜κ±°λ‚˜ λ…ΉμŒν•΄ μ£Όμ„Έμš”.")
33
+
34
  # μŒμ„±μ„ ν…μŠ€νŠΈλ‘œ λ³€ν™˜
35
+ result = pipe(
36
+ audio_input,
37
+ batch_size=BATCH_SIZE,
38
+ generate_kwargs={"task": task},
39
+ return_timestamps=True
40
+ )
41
+ transcribed_text = result["text"]
42
 
43
+ # ν…μŠ€νŠΈ μš”μ•½
44
  try:
45
+ summary = hf_client.summarization(transcribed_text)
46
+ summary_text = summary.get("summary_text", "μš”μ•½ν•  수 μ—†μŠ΅λ‹ˆλ‹€.")
47
  except Exception as e:
48
+ summary_text = f"μš”μ•½ 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}"
49
 
50
+ # κ²°κ³Όλ₯Ό 리슀트둜 λ°˜ν™˜
51
+ return [transcribed_text, summary_text]
 
 
 
52
 
53
+ # CSS μŠ€νƒ€μΌ
54
  css = """
55
+ footer { visibility: hidden; }
 
 
56
  """
57
 
58
+ # 파일 μ—…λ‘œλ“œ μΈν„°νŽ˜μ΄μŠ€
59
  file_transcribe = gr.Interface(
60
  fn=transcribe_summarize,
61
  inputs=[
62
  gr.Audio(sources="upload", type="filepath", label="μ˜€λ””μ˜€ 파일"),
63
+ gr.Radio(
64
+ choices=["transcribe", "translate"],
65
+ label="μž‘μ—…",
66
+ value="transcribe"
67
+ ),
68
  ],
69
+ outputs=[
70
+ gr.Textbox(label="λ³€ν™˜λœ ν…μŠ€νŠΈ"),
71
+ gr.Textbox(label="μš”μ•½")
72
+ ],
73
+ title="λ°›μ•„μ“°κΈ° AI: μŒμ„±μ„ ν…μŠ€νŠΈλ‘œ λ³€ν™˜ν•˜κ³  μš”μ•½ν•˜κΈ°",
74
+ flagging_mode="never"
75
  )
76
 
77
+ # 마이크 λ…ΉμŒ μΈν„°νŽ˜μ΄μŠ€
78
+ mic_transcribe = gr.Interface(
79
  fn=transcribe_summarize,
80
  inputs=[
81
  gr.Audio(sources="microphone", type="filepath"),
82
+ gr.Radio(
83
+ choices=["transcribe", "translate"],
84
+ label="μž‘μ—…",
85
+ value="transcribe"
86
+ ),
87
+ ],
88
+ outputs=[
89
+ gr.Textbox(label="λ³€ν™˜λœ ν…μŠ€νŠΈ"),
90
+ gr.Textbox(label="μš”μ•½")
91
  ],
92
+ title="λ°›μ•„μ“°κΈ° AI: μŒμ„±μ„ ν…μŠ€νŠΈλ‘œ λ³€ν™˜ν•˜κ³  μš”μ•½ν•˜κΈ°",
 
93
  flagging_mode="never",
94
+ css=css
95
  )
96
 
97
+ # 메인 μ• ν”Œλ¦¬μΌ€μ΄μ…˜
98
  demo = gr.Blocks(theme="Nymbo/Nymbo_Theme")
 
 
99
  with demo:
100
+ gr.TabbedInterface(
101
+ [file_transcribe, mic_transcribe],
102
+ ["μ˜€λ””μ˜€ 파일", "마이크"]
103
+ )
104
 
105
+ # μ• ν”Œλ¦¬μΌ€μ΄μ…˜ μ‹€ν–‰
106
+ demo.queue().launch(ssr_mode=False)