whisper-tg

Paused

App Files Files Community

muhtasham commited on Mar 21

Commit

0147bd5

1 Parent(s): 7c39690

WIP

Browse files

Files changed (1) hide show

app.py +17 -17

app.py CHANGED Viewed

@@ -58,8 +58,12 @@ def transcribe(inputs):
             logger.error("No transcription text in response")
             raise gr.Error("No transcription text in response")
-        # Format timestamps as JSON if available
-        timestamps = []
         if "chunks" in result:
             logger.info(f"Processing {len(result['chunks'])} chunks")
             for i, chunk in enumerate(result["chunks"]):
@@ -70,10 +74,9 @@ def transcribe(inputs):
                     text = chunk.get("text", "").strip()
                     if start_time is not None and end_time is not None:
-                        timestamps.append({
-                            "start": f"{start_time:.2f}s",
-                            "end": f"{end_time:.2f}s",
-                            "text": text
                         })
                     else:
                         logger.warning(f"Invalid timestamp in chunk {i}: {chunk}")
@@ -81,15 +84,14 @@ def transcribe(inputs):
                     logger.error(f"Error processing chunk {i}: {str(chunk_error)}")
                     continue
         else:
-            logger.info("No chunks found, using single timestamp")
-            timestamps.append({
-                "start": "0.00s",
-                "end": "N/A",
-                "text": result["text"]
             })
-        logger.info(f"Successfully processed transcription with {len(timestamps)} timestamps")
-        return result["text"], timestamps
     except Exception as e:
         logger.exception(f"Error during transcription: {str(e)}")
         raise gr.Error(f"Failed to transcribe audio: {str(e)}")
@@ -102,8 +104,7 @@ mf_transcribe = gr.Interface(
         gr.Audio(sources="microphone", type="filepath"),
     ],
     outputs=[
-        gr.Textbox(label="Transcription", lines=10),
-        gr.JSON(label="Timestamps", open=True),
     ],
     title="Whisper Large V3 Turbo: Transcribe Audio",
     description=(
@@ -124,8 +125,7 @@ file_transcribe = gr.Interface(
         gr.Audio(sources="upload", type="filepath", label="Audio file"),
     ],
     outputs=[
-        gr.Textbox(label="Transcription", lines=10),
-        gr.JSON(label="Timestamps", open=True),
     ],
     title="Whisper Large V3: Transcribe Audio",
     description=(

             logger.error("No transcription text in response")
             raise gr.Error("No transcription text in response")
+        # Format response as JSON
+        formatted_result = {
+            "text": result["text"],
+            "chunks": []
+        }
         if "chunks" in result:
             logger.info(f"Processing {len(result['chunks'])} chunks")
             for i, chunk in enumerate(result["chunks"]):
                     text = chunk.get("text", "").strip()
                     if start_time is not None and end_time is not None:
+                        formatted_result["chunks"].append({
+                            "text": text,
+                            "timestamp": [start_time, end_time]
                         })
                     else:
                         logger.warning(f"Invalid timestamp in chunk {i}: {chunk}")
                     logger.error(f"Error processing chunk {i}: {str(chunk_error)}")
                     continue
         else:
+            logger.info("No chunks found, using single chunk")
+            formatted_result["chunks"].append({
+                "text": result["text"],
+                "timestamp": [0.0, None]
             })
+        logger.info(f"Successfully processed transcription with {len(formatted_result['chunks'])} chunks")
+        return formatted_result
     except Exception as e:
         logger.exception(f"Error during transcription: {str(e)}")
         raise gr.Error(f"Failed to transcribe audio: {str(e)}")
         gr.Audio(sources="microphone", type="filepath"),
     ],
     outputs=[
+        gr.JSON(label="Transcription", open=True),
     ],
     title="Whisper Large V3 Turbo: Transcribe Audio",
     description=(
         gr.Audio(sources="upload", type="filepath", label="Audio file"),
     ],
     outputs=[
+        gr.JSON(label="Transcription", open=True),
     ],
     title="Whisper Large V3: Transcribe Audio",
     description=(