Spaces:
Sleeping
Sleeping
WIP
Browse files
app.py
CHANGED
@@ -58,8 +58,12 @@ def transcribe(inputs):
|
|
58 |
logger.error("No transcription text in response")
|
59 |
raise gr.Error("No transcription text in response")
|
60 |
|
61 |
-
# Format
|
62 |
-
|
|
|
|
|
|
|
|
|
63 |
if "chunks" in result:
|
64 |
logger.info(f"Processing {len(result['chunks'])} chunks")
|
65 |
for i, chunk in enumerate(result["chunks"]):
|
@@ -70,10 +74,9 @@ def transcribe(inputs):
|
|
70 |
text = chunk.get("text", "").strip()
|
71 |
|
72 |
if start_time is not None and end_time is not None:
|
73 |
-
|
74 |
-
"
|
75 |
-
"
|
76 |
-
"text": text
|
77 |
})
|
78 |
else:
|
79 |
logger.warning(f"Invalid timestamp in chunk {i}: {chunk}")
|
@@ -81,15 +84,14 @@ def transcribe(inputs):
|
|
81 |
logger.error(f"Error processing chunk {i}: {str(chunk_error)}")
|
82 |
continue
|
83 |
else:
|
84 |
-
logger.info("No chunks found, using single
|
85 |
-
|
86 |
-
"
|
87 |
-
"
|
88 |
-
"text": result["text"]
|
89 |
})
|
90 |
|
91 |
-
logger.info(f"Successfully processed transcription with {len(
|
92 |
-
return
|
93 |
except Exception as e:
|
94 |
logger.exception(f"Error during transcription: {str(e)}")
|
95 |
raise gr.Error(f"Failed to transcribe audio: {str(e)}")
|
@@ -102,8 +104,7 @@ mf_transcribe = gr.Interface(
|
|
102 |
gr.Audio(sources="microphone", type="filepath"),
|
103 |
],
|
104 |
outputs=[
|
105 |
-
gr.
|
106 |
-
gr.JSON(label="Timestamps", open=True),
|
107 |
],
|
108 |
title="Whisper Large V3 Turbo: Transcribe Audio",
|
109 |
description=(
|
@@ -124,8 +125,7 @@ file_transcribe = gr.Interface(
|
|
124 |
gr.Audio(sources="upload", type="filepath", label="Audio file"),
|
125 |
],
|
126 |
outputs=[
|
127 |
-
gr.
|
128 |
-
gr.JSON(label="Timestamps", open=True),
|
129 |
],
|
130 |
title="Whisper Large V3: Transcribe Audio",
|
131 |
description=(
|
|
|
58 |
logger.error("No transcription text in response")
|
59 |
raise gr.Error("No transcription text in response")
|
60 |
|
61 |
+
# Format response as JSON
|
62 |
+
formatted_result = {
|
63 |
+
"text": result["text"],
|
64 |
+
"chunks": []
|
65 |
+
}
|
66 |
+
|
67 |
if "chunks" in result:
|
68 |
logger.info(f"Processing {len(result['chunks'])} chunks")
|
69 |
for i, chunk in enumerate(result["chunks"]):
|
|
|
74 |
text = chunk.get("text", "").strip()
|
75 |
|
76 |
if start_time is not None and end_time is not None:
|
77 |
+
formatted_result["chunks"].append({
|
78 |
+
"text": text,
|
79 |
+
"timestamp": [start_time, end_time]
|
|
|
80 |
})
|
81 |
else:
|
82 |
logger.warning(f"Invalid timestamp in chunk {i}: {chunk}")
|
|
|
84 |
logger.error(f"Error processing chunk {i}: {str(chunk_error)}")
|
85 |
continue
|
86 |
else:
|
87 |
+
logger.info("No chunks found, using single chunk")
|
88 |
+
formatted_result["chunks"].append({
|
89 |
+
"text": result["text"],
|
90 |
+
"timestamp": [0.0, None]
|
|
|
91 |
})
|
92 |
|
93 |
+
logger.info(f"Successfully processed transcription with {len(formatted_result['chunks'])} chunks")
|
94 |
+
return formatted_result
|
95 |
except Exception as e:
|
96 |
logger.exception(f"Error during transcription: {str(e)}")
|
97 |
raise gr.Error(f"Failed to transcribe audio: {str(e)}")
|
|
|
104 |
gr.Audio(sources="microphone", type="filepath"),
|
105 |
],
|
106 |
outputs=[
|
107 |
+
gr.JSON(label="Transcription", open=True),
|
|
|
108 |
],
|
109 |
title="Whisper Large V3 Turbo: Transcribe Audio",
|
110 |
description=(
|
|
|
125 |
gr.Audio(sources="upload", type="filepath", label="Audio file"),
|
126 |
],
|
127 |
outputs=[
|
128 |
+
gr.JSON(label="Transcription", open=True),
|
|
|
129 |
],
|
130 |
title="Whisper Large V3: Transcribe Audio",
|
131 |
description=(
|