Spaces:

acecalisto3
/

urld

Running

App Files Files Community

acecalisto3 commited on Dec 6, 2024

Commit

287afed

verified ·

1 Parent(s): 10bd6bb

Update app.py

Browse files

Files changed (1) hide show

app.py +61 -0

app.py CHANGED Viewed

@@ -67,6 +67,67 @@ def process_urls(urls):
         time.sleep(1)
     return dataset
 def process_file(file):
     dataset = []
     with tempfile.TemporaryDirectory() as temp_dir:

         time.sleep(1)
     return dataset
+def preprocess_bulk_text(text: str) -> str:
+    """
+    Preprocess bulk text input by adding commas between logical separations.
+    Handles line breaks, slashes, and domain endings.
+    """
+    # First, normalize line endings
+    text = text.replace('\r\n', '\n').replace('\r', '\n')
+    # Split by common separators
+    separators = [
+        '\n',      # Line breaks
+        ' / ',     # Forward slashes with spaces
+        '/',       # Forward slashes
+        ';',       # Semicolons
+        ' - ',     # Dashes with spaces
+        '|',       # Vertical bars
+        '  '       # Double spaces
+    ]
+    # Replace separators with commas if not already comma-separated
+    if ',' not in text:
+        for separator in separators:
+            text = text.replace(separator, ',')
+        # Handle domain endings (e.g., .com .org .net)
+        import re
+        domain_pattern = r'(\.[a-z]{2,})\s+'
+        text = re.sub(domain_pattern, r'\1,', text)
+        # Clean up multiple commas
+        text = re.sub(r',+', ',', text)
+        # Remove leading/trailing commas and whitespace
+        text = text.strip(',' + string.whitespace)
+        # Ensure proper spacing around commas
+        text = re.sub(r'\s*,\s*', ', ', text)
+    return text
+# Example usage:
+def process_input(text: str) -> List[str]:
+    """Process input text and return list of items"""
+    processed_text = preprocess_bulk_text(text)
+    return [item.strip() for item in processed_text.split(',') if item.strip()]
+# Add to the interface
+with gr.Row():
+    text_input = gr.Textbox(
+        label="Bulk Input",
+        placeholder="Enter items separated by line breaks, slashes, or other separators"
+    )
+    process_btn = gr.Button("Process")
+    output_list = gr.JSON(label="Processed Items")
+process_btn.click(
+    process_input,
+    inputs=[text_input],
+    outputs=[output_list]
+)
 def process_file(file):
     dataset = []
     with tempfile.TemporaryDirectory() as temp_dir: