acecalisto3 commited on
Commit
a514380
·
verified ·
1 Parent(s): 287afed

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -51
app.py CHANGED
@@ -14,7 +14,7 @@ import mimetypes
14
  from tqdm import tqdm
15
  import logging
16
  import gradio as gr
17
-
18
  # Setup logging
19
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
20
  logger = logging.getLogger(__name__)
@@ -113,21 +113,6 @@ def process_input(text: str) -> List[str]:
113
  processed_text = preprocess_bulk_text(text)
114
  return [item.strip() for item in processed_text.split(',') if item.strip()]
115
 
116
- # Add to the interface
117
- with gr.Row():
118
- text_input = gr.Textbox(
119
- label="Bulk Input",
120
- placeholder="Enter items separated by line breaks, slashes, or other separators"
121
- )
122
- process_btn = gr.Button("Process")
123
- output_list = gr.JSON(label="Processed Items")
124
-
125
- process_btn.click(
126
- process_input,
127
- inputs=[text_input],
128
- outputs=[output_list]
129
- )
130
-
131
  def process_file(file):
132
  dataset = []
133
  with tempfile.TemporaryDirectory() as temp_dir:
@@ -308,44 +293,86 @@ def deploy_model(model, tokenizer):
308
  logger.error(f"Error deploying model: {e}")
309
  raise
310
 
311
- # Gradio Interface
312
- def gradio_interface(urls, file, text_input, model_name, batch_size, epochs):
313
- try:
314
- dataset_file = create_dataset(urls, file, text_input)
315
-
316
- with open(dataset_file, 'r') as f:
317
- dataset = json.load(f)
318
-
319
- if not dataset:
320
- return "Error: The dataset is empty. Please check your inputs."
321
-
322
- model, tokenizer = train_model(model_name, dataset, batch_size, epochs)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
323
 
324
- deploy_model(model, tokenizer)
 
 
 
325
 
326
- return dataset_file
 
 
 
 
327
 
328
- except Exception as e:
329
- logger.error(f"Error in gradio_interface: {e}")
330
- return f"An error occurred: {str(e)}"
331
-
332
- # Gradio Interface Setup
333
- iface = gr.Interface(
334
- fn=gradio_interface,
335
- inputs=[
336
- gr.Textbox(lines=5, label="Enter comma-separated URLs", placeholder="http://example.com, https://example.org"),
337
- gr.File(label="Upload file (including zip files)", type="filepath"),
338
- gr.Textbox(lines=10, label="Enter or paste large text", placeholder="Your text here..."),
339
- gr.Textbox(label="Model name", value="distilbert-base-uncased"),
340
- gr.Number(label="Batch size", value=8, precision=0, step=1),
341
- gr.Number(label="Epochs", value=3, precision=0, step=1),
342
- ],
343
- outputs=gr.File(label="Download Combined Dataset"),
344
- title="Dataset Creation and Model Training",
345
- description="Enter URLs, upload files (including zip files), and/or paste text to create a dataset and train a model.",
346
- theme="default",
347
- )
348
 
349
  # Launch the interface
350
  if __name__ == "__main__":
351
- iface.launch()
 
 
14
  from tqdm import tqdm
15
  import logging
16
  import gradio as gr
17
+ from typing import List, Dict,
18
  # Setup logging
19
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
20
  logger = logging.getLogger(__name__)
 
113
  processed_text = preprocess_bulk_text(text)
114
  return [item.strip() for item in processed_text.split(',') if item.strip()]
115
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  def process_file(file):
117
  dataset = []
118
  with tempfile.TemporaryDirectory() as temp_dir:
 
293
  logger.error(f"Error deploying model: {e}")
294
  raise
295
 
296
+ def create_interface():
297
+ """Create and return the Gradio interface"""
298
+ with gr.Blocks(title="Dataset Creation and Model Training") as interface:
299
+ gr.Markdown("# Dataset Creation and Model Training")
300
+ gr.Markdown("Enter URLs, upload files (including zip files), and/or paste text to create a dataset and train a model.")
301
+
302
+ with gr.Row():
303
+ with gr.Column():
304
+ # URL input with auto-separation
305
+ urls_input = gr.Textbox(
306
+ lines=5,
307
+ label="Enter URLs",
308
+ placeholder="Enter URLs separated by line breaks, commas, or slashes"
309
+ )
310
+
311
+ # File upload
312
+ file_input = gr.File(
313
+ label="Upload file (including zip files)",
314
+ type="filepath"
315
+ )
316
+
317
+ # Large text input
318
+ text_input = gr.Textbox(
319
+ lines=10,
320
+ label="Enter or paste large text",
321
+ placeholder="Your text here..."
322
+ )
323
+
324
+ with gr.Column():
325
+ # Model configuration
326
+ model_name = gr.Textbox(
327
+ label="Model name",
328
+ value="distilbert-base-uncased"
329
+ )
330
+ batch_size = gr.Number(
331
+ label="Batch size",
332
+ value=8,
333
+ precision=0,
334
+ step=1
335
+ )
336
+ epochs = gr.Number(
337
+ label="Epochs",
338
+ value=3,
339
+ precision=0,
340
+ step=1
341
+ )
342
+
343
+ # Process button and output
344
+ with gr.Row():
345
+ process_btn = gr.Button("Process and Train")
346
+ download_output = gr.File(label="Download Combined Dataset")
347
+
348
+ # Event handlers
349
+ process_btn.click(
350
+ fn=gradio_interface,
351
+ inputs=[
352
+ urls_input,
353
+ file_input,
354
+ text_input,
355
+ model_name,
356
+ batch_size,
357
+ epochs
358
+ ],
359
+ outputs=download_output
360
+ )
361
 
362
+ # Preview processed URLs
363
+ with gr.Row():
364
+ preview_btn = gr.Button("Preview Processed URLs")
365
+ preview_output = gr.JSON(label="Processed Items")
366
 
367
+ preview_btn.click(
368
+ fn=process_input,
369
+ inputs=[urls_input],
370
+ outputs=[preview_output]
371
+ )
372
 
373
+ return interface
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
374
 
375
  # Launch the interface
376
  if __name__ == "__main__":
377
+ demo = create_interface()
378
+ demo.launch()