openfree commited on
Commit
629129d
Β·
verified Β·
1 Parent(s): a957d39

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +182 -0
app.py ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spaces
2
+ import torch
3
+ import gradio as gr
4
+ from transformers import pipeline
5
+ from huggingface_hub import InferenceClient
6
+ import os
7
+ import json
8
+ from datetime import datetime
9
+ import time
10
+
11
+ MODEL_NAME = "openai/whisper-large-v3-turbo"
12
+ BATCH_SIZE = 8
13
+ FILE_LIMIT_MB = 1000
14
+
15
+ device = 0 if torch.cuda.is_available() else "cpu"
16
+
17
+ # 파일 μ €μž₯ 경둜 μ„€μ •
18
+ HISTORY_DIR = "transcription_history"
19
+ os.makedirs(HISTORY_DIR, exist_ok=True)
20
+
21
+ # Whisper νŒŒμ΄ν”„λΌμΈ μ΄ˆκΈ°ν™”
22
+ pipe = pipeline(
23
+ task="automatic-speech-recognition",
24
+ model=MODEL_NAME,
25
+ chunk_length_s=30,
26
+ device=device,
27
+ )
28
+
29
+ # Hugging Face μΆ”λ‘  ν΄λΌμ΄μ–ΈνŠΈ μ„€μ •
30
+ hf_client = InferenceClient(
31
+ "CohereForAI/c4ai-command-r-plus-08-2024",
32
+ token=os.getenv("HF_TOKEN")
33
+ )
34
+
35
+ def save_transcription(transcribed_text, summary_text):
36
+ """λ³€ν™˜ κ²°κ³Όλ₯Ό JSON 파일둜 μ €μž₯"""
37
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
38
+ filename = f"{HISTORY_DIR}/transcription_{timestamp}.json"
39
+
40
+ data = {
41
+ "timestamp": timestamp,
42
+ "transcribed_text": transcribed_text,
43
+ "summary": summary_text
44
+ }
45
+
46
+ with open(filename, "w", encoding="utf-8") as f:
47
+ json.dump(data, f, ensure_ascii=False, indent=2)
48
+
49
+ return filename
50
+
51
+ def process_long_audio(audio_input, chunk_duration=30):
52
+ """κΈ΄ μ˜€λ””μ˜€ νŒŒμΌμ„ 청크둜 λ‚˜λˆ„μ–΄ 처리"""
53
+ # μ˜€λ””μ˜€ 처리 둜직 κ΅¬ν˜„
54
+ pass
55
+
56
+ def detect_language(text):
57
+ """ν…μŠ€νŠΈμ˜ μ–Έμ–΄ 감지"""
58
+ # μ–Έμ–΄ 감지 둜직 κ΅¬ν˜„
59
+ pass
60
+
61
+ def get_word_count(text):
62
+ """ν…μŠ€νŠΈμ˜ 단어 수 계산"""
63
+ return len(text.split())
64
+
65
+ def get_speaking_time(audio_duration):
66
+ """μŒμ„± 길이λ₯Ό μ‹œ:λΆ„:초 ν˜•μ‹μœΌλ‘œ λ³€ν™˜"""
67
+ return time.strftime("%H:%M:%S", time.gmtime(audio_duration))
68
+
69
+ @spaces.GPU
70
+ def transcribe_summarize(audio_input, task, save_result=False, enable_translation=False):
71
+ if audio_input is None:
72
+ raise gr.Error("μ˜€λ””μ˜€ 파일이 μ œμΆœλ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€!")
73
+
74
+ start_time = time.time()
75
+
76
+ # μŒμ„±μ„ ν…μŠ€νŠΈλ‘œ λ³€ν™˜
77
+ result = pipe(
78
+ audio_input,
79
+ batch_size=BATCH_SIZE,
80
+ generate_kwargs={"task": task},
81
+ return_timestamps=True
82
+ )
83
+ transcribed_text = result["text"]
84
+
85
+ # 뢄석 정보 μˆ˜μ§‘
86
+ stats = {
87
+ "word_count": get_word_count(transcribed_text),
88
+ "processing_time": f"{time.time() - start_time:.2f}초",
89
+ "audio_duration": get_speaking_time(result.get("duration", 0)),
90
+ "language": detect_language(transcribed_text)
91
+ }
92
+
93
+ # ν…μŠ€νŠΈ μš”μ•½
94
+ try:
95
+ prompt = f"""μ•„λž˜ ν…μŠ€νŠΈλ₯Ό κ°„λ‹¨νžˆ μš”μ•½ν•΄μ£Όμ„Έμš”:
96
+ ν…μŠ€νŠΈ: {transcribed_text}
97
+ μš”μ•½:"""
98
+
99
+ response = hf_client.text_generation(
100
+ model="CohereForAI/c4ai-command-r-plus-08-2024",
101
+ prompt=prompt,
102
+ max_new_tokens=150,
103
+ temperature=0.3,
104
+ top_p=0.9,
105
+ repetition_penalty=1.2,
106
+ stop_sequences=["\n", "ν…μŠ€νŠΈ:", "μš”μ•½:"]
107
+ )
108
+
109
+ if isinstance(response, str):
110
+ summary_text = response
111
+ else:
112
+ summary_text = response.generated_text if hasattr(response, 'generated_text') else str(response)
113
+
114
+ if "μš”μ•½:" in summary_text:
115
+ summary_text = summary_text.split("μš”μ•½:")[1].strip()
116
+
117
+ if not summary_text:
118
+ summary_text = "μš”μ•½μ„ 생성할 수 μ—†μŠ΅λ‹ˆλ‹€."
119
+
120
+ except Exception as e:
121
+ print(f"μš”μ•½ 생성 쀑 였λ₯˜ λ°œμƒ: {str(e)}")
122
+ summary_text = "μš”μ•½μ„ 생성할 수 μ—†μŠ΅λ‹ˆλ‹€. μž μ‹œ ν›„ λ‹€μ‹œ μ‹œλ„ν•΄μ£Όμ„Έμš”."
123
+
124
+ # κ²°κ³Ό μ €μž₯
125
+ if save_result:
126
+ saved_file = save_transcription(transcribed_text, summary_text)
127
+ print(f"κ²°κ³Όκ°€ μ €μž₯λ˜μ—ˆμŠ΅λ‹ˆλ‹€: {saved_file}")
128
+
129
+ # λ²ˆμ—­ κΈ°λŠ₯ (μ˜΅μ…˜)
130
+ translated_text = ""
131
+ if enable_translation and task != "translate":
132
+ try:
133
+ # λ²ˆμ—­ 둜직 κ΅¬ν˜„
134
+ pass
135
+ except Exception as e:
136
+ translated_text = "λ²ˆμ—­ 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€."
137
+
138
+ return [
139
+ transcribed_text,
140
+ summary_text,
141
+ gr.update(value=f"""
142
+ πŸ“Š 뢄석 정보:
143
+ - 단어 수: {stats['word_count']}개
144
+ - 처리 μ‹œκ°„: {stats['processing_time']}
145
+ - μŒμ„± 길이: {stats['audio_duration']}
146
+ - κ°μ§€λœ μ–Έμ–΄: {stats['language']}
147
+ """),
148
+ translated_text if enable_translation else None
149
+ ]
150
+
151
+ # CSS μŠ€νƒ€μΌ
152
+ css = """
153
+ footer { visibility: hidden; }
154
+ .gradio-container { max-width: 1200px; margin: auto; }
155
+ .audio-stats { background-color: #f0f0f0; padding: 10px; border-radius: 5px; }
156
+ """
157
+
158
+ # 파일 μ—…λ‘œλ“œ μΈν„°νŽ˜μ΄μŠ€
159
+ file_transcribe = gr.Interface(
160
+ fn=transcribe_summarize,
161
+ inputs=[
162
+ gr.Audio(sources="upload", type="filepath", label="μ˜€λ””μ˜€ 파일"),
163
+ gr.Radio(
164
+ choices=["transcribe", "translate"],
165
+ label="μž‘μ—…",
166
+ value="transcribe"
167
+ ),
168
+ gr.Checkbox(label="κ²°κ³Ό μ €μž₯ν•˜κΈ°", value=False),
169
+ gr.Checkbox(label="λ²ˆμ—­ ν™œμ„±ν™”", value=False)
170
+ ],
171
+ outputs=[
172
+ gr.Textbox(label="λ³€ν™˜λœ ν…μŠ€νŠΈ", lines=5),
173
+ gr.Textbox(label="μš”μ•½", lines=3),
174
+ gr.Textbox(label="뢄석 정보", lines=4),
175
+ gr.Textbox(label="λ²ˆμ—­ κ²°κ³Ό", lines=5, visible=False)
176
+ ],
177
+ title="λ°›μ•„μ“°κΈ° AI: μŒμ„±μ„ ν…μŠ€νŠΈλ‘œ λ³€ν™˜ν•˜κ³  μš”μ•½ν•˜κΈ°",
178
+ description="μŒμ„± νŒŒμΌμ„ μ—…λ‘œλ“œν•˜κ±°λ‚˜ 직접 λ…ΉμŒν•˜μ—¬ ν…μŠ€νŠΈλ‘œ λ³€ν™˜ν•˜κ³  μš”μ•½ν•  수 μžˆμŠ΅λ‹ˆλ‹€.",
179
+ flagging_mode="never"
180
+ )
181
+
182
+ # 마이크 λ…ΉμŒ μΈν„°νŽ˜μ΄μŠ€μ™€ 메인 μ• ν”Œλ¦¬μΌ€μ΄μ…˜ μ½”λ“œλŠ” λ™μΌν•˜κ²Œ μœ μ§€...