import spaces import gradio as gr from wtpsplit import SaT import json import zipfile import io import os # Initialize the SaT model sat = SaT("sat-3l-sm") sat.half().to("cuda") @spaces.GPU(duration=59) def segment_text(input_text, zip_file): results = {} if input_text: # Process single text input sentences = sat.split(input_text) results["input_text"] = {"segments": sentences} elif zip_file is not None: # Process zip file with zipfile.ZipFile(zip_file.name, 'r') as zip_ref: for file_name in zip_ref.namelist(): if file_name.endswith('.txt'): with zip_ref.open(file_name) as file: content = file.read().decode('utf-8') sentences = sat.split(content) results[file_name] = {"segments": sentences} # Create a JSON object with the results json_output = json.dumps(results, indent=2) return json_output # Create the Gradio interface iface = gr.Interface( fn=segment_text, inputs=[ gr.Textbox(lines=5, label="Input Text (Optional)"), gr.File(label="Upload ZIP file (Optional)", file_types=[".zip"]) ], outputs=gr.JSON(label="Segmented Text (JSON)"), title="Text Segmentation with SaT", description="This app uses the SaT (Segment any Text) model to split input text into sentences and return the result as JSON. You can input text directly or upload a ZIP file containing multiple .txt files. All credits to the respective author(s). Github: https://github.com/segment-any-text/wtpsplit/tree/main", examples=[ ["This is a test This is another test.", None], ["Hello this is a test But this is different now Now the next one starts looool", None], ["The quick brown fox jumps over the lazy dog It was the best of times, it was the worst of times", None], ] ) # Launch the app iface.launch()