acecalisto3 commited on
Commit
287afed
·
verified ·
1 Parent(s): 10bd6bb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -0
app.py CHANGED
@@ -67,6 +67,67 @@ def process_urls(urls):
67
  time.sleep(1)
68
  return dataset
69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  def process_file(file):
71
  dataset = []
72
  with tempfile.TemporaryDirectory() as temp_dir:
 
67
  time.sleep(1)
68
  return dataset
69
 
70
+ def preprocess_bulk_text(text: str) -> str:
71
+ """
72
+ Preprocess bulk text input by adding commas between logical separations.
73
+ Handles line breaks, slashes, and domain endings.
74
+ """
75
+ # First, normalize line endings
76
+ text = text.replace('\r\n', '\n').replace('\r', '\n')
77
+
78
+ # Split by common separators
79
+ separators = [
80
+ '\n', # Line breaks
81
+ ' / ', # Forward slashes with spaces
82
+ '/', # Forward slashes
83
+ ';', # Semicolons
84
+ ' - ', # Dashes with spaces
85
+ '|', # Vertical bars
86
+ ' ' # Double spaces
87
+ ]
88
+
89
+ # Replace separators with commas if not already comma-separated
90
+ if ',' not in text:
91
+ for separator in separators:
92
+ text = text.replace(separator, ',')
93
+
94
+ # Handle domain endings (e.g., .com .org .net)
95
+ import re
96
+ domain_pattern = r'(\.[a-z]{2,})\s+'
97
+ text = re.sub(domain_pattern, r'\1,', text)
98
+
99
+ # Clean up multiple commas
100
+ text = re.sub(r',+', ',', text)
101
+
102
+ # Remove leading/trailing commas and whitespace
103
+ text = text.strip(',' + string.whitespace)
104
+
105
+ # Ensure proper spacing around commas
106
+ text = re.sub(r'\s*,\s*', ', ', text)
107
+
108
+ return text
109
+
110
+ # Example usage:
111
+ def process_input(text: str) -> List[str]:
112
+ """Process input text and return list of items"""
113
+ processed_text = preprocess_bulk_text(text)
114
+ return [item.strip() for item in processed_text.split(',') if item.strip()]
115
+
116
+ # Add to the interface
117
+ with gr.Row():
118
+ text_input = gr.Textbox(
119
+ label="Bulk Input",
120
+ placeholder="Enter items separated by line breaks, slashes, or other separators"
121
+ )
122
+ process_btn = gr.Button("Process")
123
+ output_list = gr.JSON(label="Processed Items")
124
+
125
+ process_btn.click(
126
+ process_input,
127
+ inputs=[text_input],
128
+ outputs=[output_list]
129
+ )
130
+
131
  def process_file(file):
132
  dataset = []
133
  with tempfile.TemporaryDirectory() as temp_dir: