muhtasham commited on
Commit
0147bd5
·
1 Parent(s): 7c39690
Files changed (1) hide show
  1. app.py +17 -17
app.py CHANGED
@@ -58,8 +58,12 @@ def transcribe(inputs):
58
  logger.error("No transcription text in response")
59
  raise gr.Error("No transcription text in response")
60
 
61
- # Format timestamps as JSON if available
62
- timestamps = []
 
 
 
 
63
  if "chunks" in result:
64
  logger.info(f"Processing {len(result['chunks'])} chunks")
65
  for i, chunk in enumerate(result["chunks"]):
@@ -70,10 +74,9 @@ def transcribe(inputs):
70
  text = chunk.get("text", "").strip()
71
 
72
  if start_time is not None and end_time is not None:
73
- timestamps.append({
74
- "start": f"{start_time:.2f}s",
75
- "end": f"{end_time:.2f}s",
76
- "text": text
77
  })
78
  else:
79
  logger.warning(f"Invalid timestamp in chunk {i}: {chunk}")
@@ -81,15 +84,14 @@ def transcribe(inputs):
81
  logger.error(f"Error processing chunk {i}: {str(chunk_error)}")
82
  continue
83
  else:
84
- logger.info("No chunks found, using single timestamp")
85
- timestamps.append({
86
- "start": "0.00s",
87
- "end": "N/A",
88
- "text": result["text"]
89
  })
90
 
91
- logger.info(f"Successfully processed transcription with {len(timestamps)} timestamps")
92
- return result["text"], timestamps
93
  except Exception as e:
94
  logger.exception(f"Error during transcription: {str(e)}")
95
  raise gr.Error(f"Failed to transcribe audio: {str(e)}")
@@ -102,8 +104,7 @@ mf_transcribe = gr.Interface(
102
  gr.Audio(sources="microphone", type="filepath"),
103
  ],
104
  outputs=[
105
- gr.Textbox(label="Transcription", lines=10),
106
- gr.JSON(label="Timestamps", open=True),
107
  ],
108
  title="Whisper Large V3 Turbo: Transcribe Audio",
109
  description=(
@@ -124,8 +125,7 @@ file_transcribe = gr.Interface(
124
  gr.Audio(sources="upload", type="filepath", label="Audio file"),
125
  ],
126
  outputs=[
127
- gr.Textbox(label="Transcription", lines=10),
128
- gr.JSON(label="Timestamps", open=True),
129
  ],
130
  title="Whisper Large V3: Transcribe Audio",
131
  description=(
 
58
  logger.error("No transcription text in response")
59
  raise gr.Error("No transcription text in response")
60
 
61
+ # Format response as JSON
62
+ formatted_result = {
63
+ "text": result["text"],
64
+ "chunks": []
65
+ }
66
+
67
  if "chunks" in result:
68
  logger.info(f"Processing {len(result['chunks'])} chunks")
69
  for i, chunk in enumerate(result["chunks"]):
 
74
  text = chunk.get("text", "").strip()
75
 
76
  if start_time is not None and end_time is not None:
77
+ formatted_result["chunks"].append({
78
+ "text": text,
79
+ "timestamp": [start_time, end_time]
 
80
  })
81
  else:
82
  logger.warning(f"Invalid timestamp in chunk {i}: {chunk}")
 
84
  logger.error(f"Error processing chunk {i}: {str(chunk_error)}")
85
  continue
86
  else:
87
+ logger.info("No chunks found, using single chunk")
88
+ formatted_result["chunks"].append({
89
+ "text": result["text"],
90
+ "timestamp": [0.0, None]
 
91
  })
92
 
93
+ logger.info(f"Successfully processed transcription with {len(formatted_result['chunks'])} chunks")
94
+ return formatted_result
95
  except Exception as e:
96
  logger.exception(f"Error during transcription: {str(e)}")
97
  raise gr.Error(f"Failed to transcribe audio: {str(e)}")
 
104
  gr.Audio(sources="microphone", type="filepath"),
105
  ],
106
  outputs=[
107
+ gr.JSON(label="Transcription", open=True),
 
108
  ],
109
  title="Whisper Large V3 Turbo: Transcribe Audio",
110
  description=(
 
125
  gr.Audio(sources="upload", type="filepath", label="Audio file"),
126
  ],
127
  outputs=[
128
+ gr.JSON(label="Transcription", open=True),
 
129
  ],
130
  title="Whisper Large V3: Transcribe Audio",
131
  description=(