Spaces:

acecalisto3
/

urld

Running

App Files Files Community

acecalisto3 commited on Dec 6, 2024

Commit

a514380

verified ·

1 Parent(s): 287afed

Update app.py

Browse files

Files changed (1) hide show

app.py +78 -51

app.py CHANGED Viewed

@@ -14,7 +14,7 @@ import mimetypes
 from tqdm import tqdm
 import logging
 import gradio as gr
 # Setup logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
@@ -113,21 +113,6 @@ def process_input(text: str) -> List[str]:
     processed_text = preprocess_bulk_text(text)
     return [item.strip() for item in processed_text.split(',') if item.strip()]
-# Add to the interface
-with gr.Row():
-    text_input = gr.Textbox(
-        label="Bulk Input",
-        placeholder="Enter items separated by line breaks, slashes, or other separators"
-    )
-    process_btn = gr.Button("Process")
-    output_list = gr.JSON(label="Processed Items")
-process_btn.click(
-    process_input,
-    inputs=[text_input],
-    outputs=[output_list]
-)
 def process_file(file):
     dataset = []
     with tempfile.TemporaryDirectory() as temp_dir:
@@ -308,44 +293,86 @@ def deploy_model(model, tokenizer):
         logger.error(f"Error deploying model: {e}")
         raise
-# Gradio Interface
-def gradio_interface(urls, file, text_input, model_name, batch_size, epochs):
-    try:
-        dataset_file = create_dataset(urls, file, text_input)
-        with open(dataset_file, 'r') as f:
-            dataset = json.load(f)
-        if not dataset:
-            return "Error: The dataset is empty. Please check your inputs."
-        model, tokenizer = train_model(model_name, dataset, batch_size, epochs)
-        deploy_model(model, tokenizer)
-        return dataset_file
-    except Exception as e:
-        logger.error(f"Error in gradio_interface: {e}")
-        return f"An error occurred: {str(e)}"
-# Gradio Interface Setup
-iface = gr.Interface(
-    fn=gradio_interface,
-    inputs=[
-        gr.Textbox(lines=5, label="Enter comma-separated URLs", placeholder="http://example.com, https://example.org"),
-        gr.File(label="Upload file (including zip files)", type="filepath"),
-        gr.Textbox(lines=10, label="Enter or paste large text", placeholder="Your text here..."),
-        gr.Textbox(label="Model name", value="distilbert-base-uncased"),
-        gr.Number(label="Batch size", value=8, precision=0, step=1),
-        gr.Number(label="Epochs", value=3, precision=0, step=1),
-    ],
-    outputs=gr.File(label="Download Combined Dataset"),
-    title="Dataset Creation and Model Training",
-    description="Enter URLs, upload files (including zip files), and/or paste text to create a dataset and train a model.",
-    theme="default",
-)
 # Launch the interface
 if __name__ == "__main__":
-    iface.launch()

 from tqdm import tqdm
 import logging
 import gradio as gr
+from typing import List, Dict,
 # Setup logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
     processed_text = preprocess_bulk_text(text)
     return [item.strip() for item in processed_text.split(',') if item.strip()]
 def process_file(file):
     dataset = []
     with tempfile.TemporaryDirectory() as temp_dir:
         logger.error(f"Error deploying model: {e}")
         raise
+def create_interface():
+    """Create and return the Gradio interface"""
+    with gr.Blocks(title="Dataset Creation and Model Training") as interface:
+        gr.Markdown("# Dataset Creation and Model Training")
+        gr.Markdown("Enter URLs, upload files (including zip files), and/or paste text to create a dataset and train a model.")
+        with gr.Row():
+            with gr.Column():
+                # URL input with auto-separation
+                urls_input = gr.Textbox(
+                    lines=5,
+                    label="Enter URLs",
+                    placeholder="Enter URLs separated by line breaks, commas, or slashes"
+                )
+                # File upload
+                file_input = gr.File(
+                    label="Upload file (including zip files)",
+                    type="filepath"
+                )
+                # Large text input
+                text_input = gr.Textbox(
+                    lines=10,
+                    label="Enter or paste large text",
+                    placeholder="Your text here..."
+                )
+            with gr.Column():
+                # Model configuration
+                model_name = gr.Textbox(
+                    label="Model name",
+                    value="distilbert-base-uncased"
+                )
+                batch_size = gr.Number(
+                    label="Batch size",
+                    value=8,
+                    precision=0,
+                    step=1
+                )
+                epochs = gr.Number(
+                    label="Epochs",
+                    value=3,
+                    precision=0,
+                    step=1
+                )
+        # Process button and output
+        with gr.Row():
+            process_btn = gr.Button("Process and Train")
+            download_output = gr.File(label="Download Combined Dataset")
+        # Event handlers
+        process_btn.click(
+            fn=gradio_interface,
+            inputs=[
+                urls_input,
+                file_input,
+                text_input,
+                model_name,
+                batch_size,
+                epochs
+            ],
+            outputs=download_output
+        )
+        # Preview processed URLs
+        with gr.Row():
+            preview_btn = gr.Button("Preview Processed URLs")
+            preview_output = gr.JSON(label="Processed Items")
+        preview_btn.click(
+            fn=process_input,
+            inputs=[urls_input],
+            outputs=[preview_output]
+        )
+    return interface
 # Launch the interface
 if __name__ == "__main__":
+    demo = create_interface()
+    demo.launch()