openfree commited on
Commit
de23405
ยท
verified ยท
1 Parent(s): 72bffdd

Delete app-backup1.py

Browse files
Files changed (1) hide show
  1. app-backup1.py +0 -236
app-backup1.py DELETED
@@ -1,236 +0,0 @@
1
- import spaces
2
- import torch
3
- import gradio as gr
4
- from transformers import pipeline
5
- from huggingface_hub import InferenceClient
6
- import os
7
- import numpy as np
8
- from pydub import AudioSegment
9
- import tempfile
10
- import math
11
-
12
- MODEL_NAME = "openai/whisper-large-v3-turbo"
13
- BATCH_SIZE = 8
14
- FILE_LIMIT_MB = 1000
15
- CHUNK_LENGTH = 10 * 60 # 10๋ถ„ ๋‹จ์œ„๋กœ ๋ถ„ํ• 
16
-
17
- device = 0 if torch.cuda.is_available() else "cpu"
18
-
19
- # Whisper ํŒŒ์ดํ”„๋ผ์ธ ์ดˆ๊ธฐํ™”
20
- pipe = pipeline(
21
- task="automatic-speech-recognition",
22
- model=MODEL_NAME,
23
- chunk_length_s=30,
24
- device=device,
25
- )
26
-
27
- # Hugging Face ์ถ”๋ก  ํด๋ผ์ด์–ธํŠธ ์„ค์ •
28
- hf_client = InferenceClient(
29
- "CohereForAI/c4ai-command-r-plus-08-2024",
30
- token=os.getenv("HF_TOKEN")
31
- )
32
-
33
- def split_audio(audio_path, chunk_length=CHUNK_LENGTH):
34
- """์˜ค๋””์˜ค ํŒŒ์ผ์„ ์ฒญํฌ๋กœ ๋ถ„ํ• """
35
- audio = AudioSegment.from_file(audio_path)
36
- duration = len(audio) / 1000 # ์ดˆ ๋‹จ์œ„ ๋ณ€ํ™˜
37
- chunks = []
38
-
39
- # ์ฒญํฌ ๊ฐœ์ˆ˜ ๊ณ„์‚ฐ
40
- num_chunks = math.ceil(duration / chunk_length)
41
-
42
- for i in range(num_chunks):
43
- start_time = i * chunk_length * 1000 # milliseconds
44
- end_time = min((i + 1) * chunk_length * 1000, len(audio))
45
-
46
- chunk = audio[start_time:end_time]
47
-
48
- # ์ž„์‹œ ํŒŒ์ผ๋กœ ์ €์žฅ
49
- with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_file:
50
- chunk.export(temp_file.name, format='wav')
51
- chunks.append(temp_file.name)
52
-
53
- return chunks, num_chunks
54
-
55
- def process_chunk(chunk_path, task):
56
- """๊ฐœ๋ณ„ ์ฒญํฌ ์ฒ˜๋ฆฌ"""
57
- result = pipe(
58
- chunk_path,
59
- batch_size=BATCH_SIZE,
60
- generate_kwargs={"task": task},
61
- return_timestamps=True
62
- )
63
-
64
- # ์ž„์‹œ ํŒŒ์ผ ์‚ญ์ œ
65
- os.unlink(chunk_path)
66
-
67
- return result["text"]
68
-
69
- def update_progress(progress):
70
- """์ง„ํ–‰ ์ƒํ™ฉ ์—…๋ฐ์ดํŠธ"""
71
- return f"์ฒ˜๋ฆฌ ์ค‘... {progress}% ์™„๋ฃŒ"
72
-
73
- @spaces.GPU
74
- def transcribe_summarize(audio_input, task, progress=gr.Progress()):
75
- if audio_input is None:
76
- raise gr.Error("์˜ค๋””์˜ค ํŒŒ์ผ์ด ์ œ์ถœ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค!")
77
-
78
- try:
79
- # ์˜ค๋””์˜ค ํŒŒ์ผ ๋ถ„ํ• 
80
- chunks, num_chunks = split_audio(audio_input)
81
- progress(0, desc="์˜ค๋””์˜ค ํŒŒ์ผ ๋ถ„ํ•  ์™„๋ฃŒ")
82
-
83
- # ๊ฐ ์ฒญํฌ ์ฒ˜๋ฆฌ
84
- transcribed_texts = []
85
- for i, chunk in enumerate(chunks):
86
- chunk_text = process_chunk(chunk, task)
87
- transcribed_texts.append(chunk_text)
88
- progress((i + 1) / num_chunks, desc=f"์ฒญํฌ {i+1}/{num_chunks} ์ฒ˜๋ฆฌ ์ค‘")
89
-
90
- # ์ „์ฒด ํ…์ŠคํŠธ ์กฐํ•ฉ
91
- transcribed_text = " ".join(transcribed_texts)
92
- progress(0.9, desc="ํ…์ŠคํŠธ ๋ณ€ํ™˜ ์™„๋ฃŒ")
93
-
94
- # ํ…์ŠคํŠธ ์š”์•ฝ
95
- try:
96
- # ๊ธด ํ…์ŠคํŠธ๋ฅผ ์œ„ํ•œ ์š”์•ฝ ํ”„๋กฌํ”„ํŠธ
97
- prompt = f"""๋‹ค์Œ ํ…์ŠคํŠธ๋ฅผ ๊ฐ„๋‹จํžˆ ์š”์•ฝํ•ด์ฃผ์„ธ์š”:
98
- ํ…์ŠคํŠธ: {transcribed_text[:3000]}...
99
- ์š”์•ฝ:"""
100
-
101
- response = hf_client.text_generation(
102
- model="CohereForAI/c4ai-command-r-plus-08-2024",
103
- prompt=prompt,
104
- max_new_tokens=250,
105
- temperature=0.3,
106
- top_p=0.9,
107
- repetition_penalty=1.2,
108
- stop_sequences=["\n", "ํ…์ŠคํŠธ:", "์š”์•ฝ:"]
109
- )
110
-
111
- summary_text = str(response)
112
- if "์š”์•ฝ:" in summary_text:
113
- summary_text = summary_text.split("์š”์•ฝ:")[1].strip()
114
-
115
- except Exception as e:
116
- print(f"์š”์•ฝ ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}")
117
- summary_text = "์š”์•ฝ์„ ์ƒ์„ฑํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
118
-
119
- progress(1.0, desc="์ฒ˜๋ฆฌ ์™„๋ฃŒ")
120
- return [transcribed_text, summary_text]
121
-
122
- except Exception as e:
123
- error_msg = f"์Œ์„ฑ ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"
124
- return ["", error_msg]
125
-
126
- # CSS ์Šคํƒ€์ผ
127
- css = """
128
- footer { visibility: hidden; }
129
- .progress-bar { height: 15px; border-radius: 5px; }
130
- .container { max-width: 1200px; margin: auto; padding: 20px; }
131
- .output-text { font-size: 16px; line-height: 1.5; }
132
- .status-display {
133
- background: #f0f0f0;
134
- padding: 10px;
135
- border-radius: 5px;
136
- margin: 10px 0;
137
- }
138
- """
139
-
140
- # ํŒŒ์ผ ์—…๋กœ๋“œ ์ธํ„ฐํŽ˜์ด์Šค
141
- file_transcribe = gr.Interface(
142
- fn=transcribe_summarize,
143
- inputs=[
144
- gr.Audio(
145
- sources="upload",
146
- type="filepath",
147
- label="์˜ค๋””์˜ค ํŒŒ์ผ"
148
- ),
149
- gr.Radio(
150
- choices=["transcribe", "translate"],
151
- label="์ž‘์—… ์„ ํƒ",
152
- value="transcribe"
153
- )
154
- ],
155
- outputs=[
156
- gr.Textbox(
157
- label="๋ณ€ํ™˜๋œ ํ…์ŠคํŠธ",
158
- lines=10,
159
- max_lines=30,
160
- placeholder="์Œ์„ฑ์ด ํ…์ŠคํŠธ๋กœ ๋ณ€ํ™˜๋˜์–ด ์—ฌ๊ธฐ์— ํ‘œ์‹œ๋ฉ๋‹ˆ๋‹ค...",
161
- elem_classes="output-text"
162
- ),
163
- gr.Textbox(
164
- label="์š”์•ฝ",
165
- lines=5,
166
- placeholder="ํ…์ŠคํŠธ ์š”์•ฝ์ด ์—ฌ๊ธฐ์— ํ‘œ์‹œ๋ฉ๋‹ˆ๋‹ค...",
167
- elem_classes="output-text"
168
- )
169
- ],
170
- title="๐ŸŽค ์Œ์„ฑ ๋ณ€ํ™˜ AI",
171
- description="""
172
- ๊ธด ์Œ์„ฑ ํŒŒ์ผ(1์‹œ๊ฐ„ ์ด์ƒ)๋„ ์ฒ˜๋ฆฌํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
173
- ์ตœ๋Œ€ 3๊ฐœ์˜ ํŒŒ์ผ์„ ๋™์‹œ์— ์ฒ˜๋ฆฌํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
174
- ์ฒ˜๋ฆฌ ์‹œ๊ฐ„์€ ํŒŒ์ผ ๊ธธ์ด์— ๋น„๋ก€ํ•˜์—ฌ ์ฆ๊ฐ€ํ•ฉ๋‹ˆ๋‹ค.
175
- """,
176
- article="""
177
- ์‚ฌ์šฉ ๋ฐฉ๋ฒ•:
178
- 1. ์˜ค๋””์˜ค ํŒŒ์ผ์„ ์—…๋กœ๋“œํ•˜๊ฑฐ๋‚˜ ๋งˆ์ดํฌ๋กœ ๋…น์Œํ•˜์„ธ์š”
179
- 2. ์ž‘์—… ์œ ํ˜•์„ ์„ ํƒํ•˜์„ธ์š” (๋ณ€ํ™˜ ๋˜๋Š” ๋ฒˆ์—ญ)
180
- 3. ๋ณ€ํ™˜ ์‹œ์ž‘ ๋ฒ„ํŠผ์„ ํด๋ฆญํ•˜์„ธ์š”
181
- 4. ์ง„ํ–‰ ์ƒํ™ฉ์„ ํ™•์ธํ•˜์„ธ์š”
182
- """,
183
- examples=[],
184
- cache_examples=False,
185
- flagging_mode="never"
186
- )
187
-
188
- # ๋งˆ์ดํฌ ๋…น์Œ ์ธํ„ฐํŽ˜์ด์Šค
189
- mic_transcribe = gr.Interface(
190
- fn=transcribe_summarize,
191
- inputs=[
192
- gr.Audio(
193
- sources="microphone",
194
- type="filepath",
195
- label="๋งˆ์ดํฌ ๋…น์Œ"
196
- ),
197
- gr.Radio(
198
- choices=["transcribe", "translate"],
199
- label="์ž‘์—… ์„ ํƒ",
200
- value="transcribe"
201
- )
202
- ],
203
- outputs=[
204
- gr.Textbox(
205
- label="๋ณ€ํ™˜๋œ ํ…์ŠคํŠธ",
206
- lines=10,
207
- max_lines=30,
208
- elem_classes="output-text"
209
- ),
210
- gr.Textbox(
211
- label="์š”์•ฝ",
212
- lines=5,
213
- elem_classes="output-text"
214
- )
215
- ],
216
- title="๐ŸŽค ์Œ์„ฑ ๋ณ€ํ™˜ AI",
217
- description="๋งˆ์ดํฌ๋กœ ์Œ์„ฑ์„ ๋…น์Œํ•˜์—ฌ ํ…์ŠคํŠธ๋กœ ๋ณ€ํ™˜ํ•˜๊ณ  ์š”์•ฝํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.",
218
- flagging_mode="never"
219
- )
220
-
221
- # ๋ฉ”์ธ ์• ํ”Œ๋ฆฌ์ผ€์ด์…˜
222
- demo = gr.Blocks(theme="gradio/soft", css=css)
223
- with demo:
224
- gr.TabbedInterface(
225
- [file_transcribe, mic_transcribe],
226
- ["์˜ค๋””์˜ค ํŒŒ์ผ", "๋งˆ์ดํฌ ๋…น์Œ"]
227
- )
228
-
229
- # ์• ํ”Œ๋ฆฌ์ผ€์ด์…˜ ์‹คํ–‰
230
- demo.queue().launch(
231
- server_name="0.0.0.0",
232
- share=False,
233
- debug=True,
234
- ssr_mode=False,
235
- max_threads=3 # ๋™์‹œ ์ฒ˜๋ฆฌ 3๊ฑด์œผ๋กœ ์„ค์ •
236
- )