muhtasham commited on
Commit
2f8a4bc
·
1 Parent(s): 0802c30
Files changed (1) hide show
  1. app.py +20 -17
app.py CHANGED
@@ -5,6 +5,8 @@ from transformers import pipeline
5
  import subprocess
6
  from loguru import logger
7
  import datetime
 
 
8
 
9
  # Configure loguru
10
  logger.add("app.log", rotation="500 MB", level="DEBUG")
@@ -30,6 +32,17 @@ def generate_srt(chunks):
30
  srt_content.append(f"{i}\n{start_time} --> {end_time}\n{text}\n\n")
31
  return "".join(srt_content)
32
 
 
 
 
 
 
 
 
 
 
 
 
33
  # Check if ffmpeg is installed
34
  def check_ffmpeg():
35
  try:
@@ -102,22 +115,20 @@ def transcribe(inputs, return_timestamps, generate_subs, batch_size, chunk_lengt
102
  logger.info(f"Successfully processed transcription with {len(chunks)} chunks")
103
 
104
  # Generate subtitles if requested
105
- srt_content = None
106
  if generate_subs and chunks:
107
  logger.info("Generating SRT subtitles")
108
  srt_content = generate_srt(chunks)
 
109
  logger.info("SRT subtitles generated successfully")
110
 
111
- return formatted_result, srt_content
112
  except Exception as e:
113
  logger.exception(f"Error during transcription: {str(e)}")
114
  raise gr.Error(f"Failed to transcribe audio: {str(e)}")
115
 
116
  demo = gr.Blocks(theme=gr.themes.Ocean())
117
 
118
- # Create flagging callback with custom options
119
- flagging_callback = gr.CSVLogger()
120
-
121
  # Define interfaces first
122
  mf_transcribe = gr.Interface(
123
  fn=transcribe,
@@ -139,12 +150,8 @@ mf_transcribe = gr.Interface(
139
  " of arbitrary length."
140
  ),
141
  flagging_mode="manual",
142
- flagging_options=[
143
- "Text Issue",
144
- "Timestamp Issue",
145
- "Missing Content",
146
- "Other Issue"
147
- ]
148
  )
149
 
150
  file_transcribe = gr.Interface(
@@ -167,12 +174,8 @@ file_transcribe = gr.Interface(
167
  " of arbitrary length."
168
  ),
169
  flagging_mode="manual",
170
- flagging_options=[
171
- "Text Issue",
172
- "Timestamp Issue",
173
- "Missing Content",
174
- "Other Issue"
175
- ]
176
  )
177
 
178
  # Then set up the demo with the interfaces
 
5
  import subprocess
6
  from loguru import logger
7
  import datetime
8
+ import tempfile
9
+ import os
10
 
11
  # Configure loguru
12
  logger.add("app.log", rotation="500 MB", level="DEBUG")
 
32
  srt_content.append(f"{i}\n{start_time} --> {end_time}\n{text}\n\n")
33
  return "".join(srt_content)
34
 
35
+ def save_srt_to_file(srt_content):
36
+ """Save SRT content to a temporary file and return the file path"""
37
+ if not srt_content:
38
+ return None
39
+
40
+ # Create a temporary file with .srt extension
41
+ temp_file = tempfile.NamedTemporaryFile(suffix='.srt', delete=False)
42
+ temp_file.write(srt_content.encode('utf-8'))
43
+ temp_file.close()
44
+ return temp_file.name
45
+
46
  # Check if ffmpeg is installed
47
  def check_ffmpeg():
48
  try:
 
115
  logger.info(f"Successfully processed transcription with {len(chunks)} chunks")
116
 
117
  # Generate subtitles if requested
118
+ srt_file = None
119
  if generate_subs and chunks:
120
  logger.info("Generating SRT subtitles")
121
  srt_content = generate_srt(chunks)
122
+ srt_file = save_srt_to_file(srt_content)
123
  logger.info("SRT subtitles generated successfully")
124
 
125
+ return formatted_result, srt_file
126
  except Exception as e:
127
  logger.exception(f"Error during transcription: {str(e)}")
128
  raise gr.Error(f"Failed to transcribe audio: {str(e)}")
129
 
130
  demo = gr.Blocks(theme=gr.themes.Ocean())
131
 
 
 
 
132
  # Define interfaces first
133
  mf_transcribe = gr.Interface(
134
  fn=transcribe,
 
150
  " of arbitrary length."
151
  ),
152
  flagging_mode="manual",
153
+ flagging_options=["👍 Good", "👎 Bad"],
154
+ flagging_dir="flagged_data"
 
 
 
 
155
  )
156
 
157
  file_transcribe = gr.Interface(
 
174
  " of arbitrary length."
175
  ),
176
  flagging_mode="manual",
177
+ flagging_options=["👍 Good", "👎 Bad"],
178
+ flagging_dir="flagged_data"
 
 
 
 
179
  )
180
 
181
  # Then set up the demo with the interfaces