openfree commited on
Commit
81072da
Β·
verified Β·
1 Parent(s): de23405

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +241 -1
app.py CHANGED
@@ -1,2 +1,242 @@
 
 
 
 
 
1
  import os
2
- exec(os.environ.get('APP'))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spaces
2
+ import torch
3
+ import gradio as gr
4
+ from transformers import pipeline
5
+ from huggingface_hub import InferenceClient
6
  import os
7
+ import numpy as np
8
+ from pydub import AudioSegment
9
+ import tempfile
10
+ import math
11
+
12
+ # Hugging Face 토큰 μ„€μ •
13
+ HF_TOKEN = os.getenv("HF_TOKEN")
14
+ if not HF_TOKEN:
15
+ raise ValueError("HF_TOKEN ν™˜κ²½ λ³€μˆ˜κ°€ μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€.")
16
+
17
+ MODEL_NAME = "openai/whisper-large-v3-turbo"
18
+ BATCH_SIZE = 8
19
+ FILE_LIMIT_MB = 1000
20
+ CHUNK_LENGTH = 10 * 60 # 10λΆ„ λ‹¨μœ„λ‘œ λΆ„ν• 
21
+
22
+ device = 0 if torch.cuda.is_available() else "cpu"
23
+
24
+ # Whisper νŒŒμ΄ν”„λΌμΈ μ΄ˆκΈ°ν™”
25
+ pipe = pipeline(
26
+ task="automatic-speech-recognition",
27
+ model=MODEL_NAME,
28
+ chunk_length_s=30,
29
+ device=device,
30
+ token=HF_TOKEN
31
+ )
32
+
33
+ # Hugging Face μΆ”λ‘  ν΄λΌμ΄μ–ΈνŠΈ μ„€μ •
34
+ hf_client = InferenceClient(
35
+ "CohereForAI/c4ai-command-r-plus-08-2024",
36
+ token=HF_TOKEN
37
+ )
38
+
39
+ def split_audio(audio_path, chunk_length=CHUNK_LENGTH):
40
+ """μ˜€λ””μ˜€ νŒŒμΌμ„ 청크둜 λΆ„ν• """
41
+ audio = AudioSegment.from_file(audio_path)
42
+ duration = len(audio) / 1000 # 초 λ‹¨μœ„ λ³€ν™˜
43
+ chunks = []
44
+
45
+ # 청크 개수 계산
46
+ num_chunks = math.ceil(duration / chunk_length)
47
+
48
+ for i in range(num_chunks):
49
+ start_time = i * chunk_length * 1000 # milliseconds
50
+ end_time = min((i + 1) * chunk_length * 1000, len(audio))
51
+
52
+ chunk = audio[start_time:end_time]
53
+
54
+ # μž„μ‹œ 파일둜 μ €μž₯
55
+ with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_file:
56
+ chunk.export(temp_file.name, format='wav')
57
+ chunks.append(temp_file.name)
58
+
59
+ return chunks, num_chunks
60
+
61
+ def translate_to_korean(text):
62
+ """μ˜μ–΄ ν…μŠ€νŠΈλ₯Ό ν•œκΈ€λ‘œ λ²ˆμ—­"""
63
+ try:
64
+ prompt = f"""λ‹€μŒ μ˜μ–΄ ν…μŠ€νŠΈλ₯Ό μžμ—°μŠ€λŸ¬μš΄ ν•œκ΅­μ–΄λ‘œ λ²ˆμ—­ν•΄μ£Όμ„Έμš”.
65
+ μ˜μ–΄: {text}
66
+ ν•œκ΅­μ–΄:"""
67
+
68
+ response = hf_client.text_generation(
69
+ prompt=prompt,
70
+ max_new_tokens=4000,
71
+ temperature=0.3,
72
+ top_p=0.9,
73
+ repetition_penalty=1.2,
74
+ stop=["μ˜μ–΄:", "ν•œκ΅­μ–΄:", "\n"]
75
+ )
76
+
77
+ translated_text = str(response)
78
+ if "ν•œκ΅­μ–΄:" in translated_text:
79
+ translated_text = translated_text.split("ν•œκ΅­μ–΄:")[1].strip()
80
+
81
+ return translated_text
82
+ except Exception as e:
83
+ print(f"λ²ˆμ—­ 쀑 였λ₯˜ λ°œμƒ: {str(e)}")
84
+ return text
85
+
86
+ def process_chunk(chunk_path, task):
87
+ """κ°œλ³„ 청크 처리"""
88
+ if task == "translate":
89
+ generate_kwargs = {
90
+ "task": "transcribe",
91
+ "language": "en",
92
+ "forced_decoder_ids": None
93
+ }
94
+ else:
95
+ generate_kwargs = {
96
+ "task": "transcribe",
97
+ "language": "ko",
98
+ "forced_decoder_ids": None
99
+ }
100
+
101
+ try:
102
+ result = pipe(
103
+ inputs=chunk_path,
104
+ batch_size=BATCH_SIZE,
105
+ generate_kwargs=generate_kwargs,
106
+ return_timestamps=True
107
+ )
108
+
109
+ os.unlink(chunk_path)
110
+ text = result["text"]
111
+
112
+ if task == "translate":
113
+ text = translate_to_korean(text)
114
+
115
+ return text
116
+ except Exception as e:
117
+ print(f"청크 처리 쀑 였λ₯˜ λ°œμƒ: {str(e)}")
118
+ raise e
119
+
120
+ @spaces.GPU
121
+ def transcribe_audio(audio_input, task, progress=gr.Progress()):
122
+ if audio_input is None:
123
+ raise gr.Error("μ˜€λ””μ˜€ 파일이 μ œμΆœλ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€!")
124
+
125
+ try:
126
+ chunks, num_chunks = split_audio(audio_input)
127
+ progress(0, desc="μ˜€λ””μ˜€ 파일 λΆ„ν•  μ™„λ£Œ")
128
+
129
+ transcribed_texts = []
130
+ for i, chunk in enumerate(chunks):
131
+ try:
132
+ chunk_text = process_chunk(chunk, task)
133
+ transcribed_texts.append(chunk_text)
134
+ progress((i + 1) / num_chunks, desc=f"청크 {i+1}/{num_chunks} 처리 쀑")
135
+ except Exception as e:
136
+ print(f"청크 {i+1} 처리 μ‹€νŒ¨: {str(e)}")
137
+ continue
138
+
139
+ if not transcribed_texts:
140
+ raise Exception("λͺ¨λ“  청크 μ²˜λ¦¬μ— μ‹€νŒ¨ν–ˆμŠ΅λ‹ˆλ‹€.")
141
+
142
+ transcribed_text = " ".join(transcribed_texts)
143
+ progress(1.0, desc="처리 μ™„λ£Œ")
144
+ return transcribed_text
145
+
146
+ except Exception as e:
147
+ error_msg = f"μŒμ„± 처리 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}"
148
+ print(f"상세 였λ₯˜: {str(e)}")
149
+ return error_msg
150
+
151
+ # CSS μŠ€νƒ€μΌ
152
+ css = """
153
+ footer { visibility: hidden; }
154
+ .progress-bar { height: 15px; border-radius: 5px; }
155
+ .container { max-width: 1200px; margin: auto; padding: 20px; }
156
+ .output-text { font-size: 16px; line-height: 1.5; }
157
+ .status-display {
158
+ background: #f0f0f0;
159
+ padding: 10px;
160
+ border-radius: 5px;
161
+ margin: 10px 0;
162
+ }
163
+ """
164
+
165
+ # 파일 μ—…λ‘œλ“œ μΈν„°νŽ˜μ΄μŠ€
166
+ file_transcribe = gr.Interface(
167
+ fn=transcribe_audio,
168
+ inputs=[
169
+ gr.Audio(
170
+ sources="upload",
171
+ type="filepath",
172
+ label="μ˜€λ””μ˜€ 파일"
173
+ ),
174
+ gr.Radio(
175
+ choices=["transcribe", "translate"],
176
+ label="μž‘μ—… 선택",
177
+ value="transcribe",
178
+ info="λ³€ν™˜: ν•œκΈ€ μŒμ„± β†’ ν•œκΈ€ ν…μŠ€νŠΈ | λ²ˆμ—­: μ˜μ–΄ μŒμ„± β†’ ν•œκΈ€ ν…μŠ€νŠΈ"
179
+ )
180
+ ],
181
+ outputs=gr.Textbox(
182
+ label="λ³€ν™˜/λ²ˆμ—­λœ ν…μŠ€νŠΈ",
183
+ lines=10,
184
+ max_lines=30,
185
+ placeholder="μŒμ„±μ΄ ν…μŠ€νŠΈλ‘œ λ³€ν™˜λ˜μ–΄ 여기에 ν‘œμ‹œλ©λ‹ˆλ‹€...",
186
+ elem_classes="output-text"
187
+ ),
188
+ title="🎀 μŒμ„± λ³€ν™˜/λ²ˆμ—­ AI 'λ°›μ•„μ“°κΈ°'(Badassgi)",
189
+ description="""
190
+ ν•œκΈ€ μŒμ„±μ„ ν…μŠ€νŠΈλ‘œ λ³€ν™˜ν•˜κ±°λ‚˜ μ˜μ–΄ μŒμ„±μ„ ν•œκΈ€λ‘œ λ²ˆμ—­ν•  수 μžˆμŠ΅λ‹ˆλ‹€.
191
+ - λ³€ν™˜: ν•œκΈ€ μŒμ„± β†’ ν•œκΈ€ ν…μŠ€νŠΈ
192
+ - λ²ˆμ—­: μ˜μ–΄ μŒμ„± β†’ ν•œκΈ€ ν…μŠ€νŠΈ
193
+ """,
194
+ examples=[],
195
+ cache_examples=False,
196
+ flagging_mode="never"
197
+ )
198
+
199
+ # 마이크 λ…ΉμŒ μΈν„°νŽ˜μ΄μŠ€
200
+ mic_transcribe = gr.Interface(
201
+ fn=transcribe_audio,
202
+ inputs=[
203
+ gr.Audio(
204
+ sources="microphone",
205
+ type="filepath",
206
+ label="마이크 λ…ΉμŒ"
207
+ ),
208
+ gr.Radio(
209
+ choices=["transcribe", "translate"],
210
+ label="μž‘μ—… 선택",
211
+ value="transcribe",
212
+ info="λ³€ν™˜: ν•œκΈ€ μŒμ„± β†’ ν•œκΈ€ ν…μŠ€νŠΈ | λ²ˆμ—­: μ˜μ–΄ μŒμ„± β†’ ν•œκΈ€ ν…μŠ€νŠΈ"
213
+ )
214
+ ],
215
+ outputs=gr.Textbox(
216
+ label="λ³€ν™˜/λ²ˆμ—­λœ ν…μŠ€νŠΈ",
217
+ lines=10,
218
+ max_lines=30,
219
+ elem_classes="output-text"
220
+ ),
221
+ title="🎀 μŒμ„± λ³€ν™˜/λ²ˆμ—­ AI 'λ°›μ•„μ“°κΈ°'(Badassgi)",
222
+ description="마이크둜 μŒμ„±μ„ λ…ΉμŒν•˜μ—¬ ν…μŠ€νŠΈλ‘œ λ³€ν™˜ν•˜κ±°λ‚˜ λ²ˆμ—­ν•  수 μžˆμŠ΅λ‹ˆλ‹€.",
223
+ flagging_mode="never"
224
+ )
225
+
226
+ # 메인 μ• ν”Œλ¦¬μΌ€μ΄μ…˜
227
+ demo = gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css)
228
+ with demo:
229
+ gr.TabbedInterface(
230
+ [file_transcribe, mic_transcribe],
231
+ ["μ˜€λ””μ˜€ 파일", "마이크 λ…ΉμŒ"]
232
+ )
233
+
234
+ # μ• ν”Œλ¦¬μΌ€μ΄μ…˜ μ‹€ν–‰
235
+ demo.queue().launch(
236
+ server_name="0.0.0.0",
237
+ share=True,
238
+ debug=True,
239
+ ssr_mode=False,
240
+ max_threads=3,
241
+ show_error=True
242
+ )