Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -4,16 +4,22 @@ import re
|
|
4 |
import time
|
5 |
import logging
|
6 |
import mimetypes
|
|
|
|
|
7 |
import zipfile
|
|
|
8 |
from datetime import datetime
|
9 |
from typing import List, Dict, Optional, Union
|
10 |
from pathlib import Path
|
|
|
|
|
11 |
import requests
|
12 |
import validators
|
13 |
-
|
14 |
import gradio as gr
|
|
|
15 |
from bs4 import BeautifulSoup
|
16 |
from fake_useragent import UserAgent
|
|
|
17 |
from cleantext import clean
|
18 |
|
19 |
# Setup logging with detailed configuration
|
@@ -80,10 +86,15 @@ class URLProcessor:
|
|
80 |
def fetch_content(self, url: str) -> Optional[Dict]:
|
81 |
"""Universal content fetcher with special case handling"""
|
82 |
try:
|
|
|
83 |
if 'drive.google.com' in url:
|
84 |
return self._handle_google_drive(url)
|
|
|
|
|
85 |
if 'calendar.google.com' in url and 'ical' in url:
|
86 |
return self._handle_google_calendar(url)
|
|
|
|
|
87 |
return self._fetch_html_content(url)
|
88 |
except Exception as e:
|
89 |
logger.error(f"Content fetch failed: {e}")
|
@@ -96,7 +107,7 @@ class URLProcessor:
|
|
96 |
if not file_id:
|
97 |
logger.error(f"Invalid Google Drive URL: {url}")
|
98 |
return None
|
99 |
-
|
100 |
direct_url = f"https://drive.google.com/uc?export=download&id={file_id.group(1)}"
|
101 |
response = self.session.get(direct_url, timeout=self.timeout)
|
102 |
response.raise_for_status()
|
@@ -132,151 +143,108 @@ class URLProcessor:
|
|
132 |
|
133 |
soup = BeautifulSoup(response.text, 'html.parser')
|
134 |
|
|
|
135 |
for element in soup(['script', 'style', 'nav', 'footer', 'header', 'meta', 'link']):
|
136 |
element.decompose()
|
137 |
-
|
|
|
138 |
main_content = soup.find('main') or soup.find('article') or soup.body
|
139 |
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
else:
|
150 |
-
logger.warning(f"No main content found for URL: {url}")
|
151 |
-
return None
|
152 |
-
|
153 |
except Exception as e:
|
154 |
logger.error(f"HTML processing failed: {e}")
|
155 |
return None
|
156 |
|
157 |
class FileProcessor:
|
158 |
"""Class to handle file processing"""
|
159 |
-
|
160 |
-
def __init__(self, max_file_size: int =
|
161 |
self.max_file_size = max_file_size
|
162 |
self.supported_text_extensions = {'.txt', '.md', '.csv', '.json', '.xml'}
|
163 |
-
self.processed_zip_count = 0
|
164 |
-
self.max_zip_files = 5
|
165 |
|
166 |
-
def is_text_file(self,
|
167 |
-
"""Check if
|
168 |
-
return any(file_path.lower().endswith(ext) for ext in self.supported_text_extensions)
|
169 |
-
|
170 |
-
|
171 |
-
def validate_filepath(path: Path) -> bool:
|
172 |
-
"""Validate file exists and has supported extension"""
|
173 |
try:
|
174 |
-
|
175 |
-
|
176 |
-
|
|
|
177 |
return False
|
178 |
-
|
179 |
-
def
|
180 |
-
"""Process
|
181 |
-
|
182 |
-
|
183 |
-
base_dir = Path(base_path)
|
184 |
-
if not base_dir.exists():
|
185 |
-
base_dir.mkdir(parents=True, exist_ok=True)
|
186 |
-
logger.info(f"Created data directory at {base_dir}")
|
187 |
-
|
188 |
-
for item in base_dir.glob('**/*'):
|
189 |
-
try:
|
190 |
-
# Skip directories immediately
|
191 |
-
if item.is_dir():
|
192 |
-
logger.debug(f"Skipping directory: {item}")
|
193 |
-
continue
|
194 |
-
|
195 |
-
# Validate file using shared function
|
196 |
-
if not validate_filepath(item):
|
197 |
-
logger.warning(f"Invalid file skipped: {item}")
|
198 |
-
continue
|
199 |
-
|
200 |
-
logger.info(f"Processing valid file: {item.name}")
|
201 |
-
|
202 |
-
# Add actual processing logic here
|
203 |
-
file_data = process_single_file(item) # Your processing function
|
204 |
-
combined_data.append(file_data)
|
205 |
-
|
206 |
-
except Exception as e:
|
207 |
-
logger.error(f"Failed processing {item}: {str(e)}")
|
208 |
-
continue
|
209 |
-
|
210 |
-
return combined_data
|
211 |
-
|
212 |
-
def process_single_file(file_path: Path) -> dict:
|
213 |
-
"""Example processing function"""
|
214 |
-
# Add your actual file processing logic here
|
215 |
-
return {
|
216 |
-
'filename': file_path.name,
|
217 |
-
'content': "processed content", # Replace with real content
|
218 |
-
'metadata': {} # Add actual metadata
|
219 |
-
}
|
220 |
-
except Exception as e:
|
221 |
-
logger.error(f"File processing error: {e}")
|
222 |
return []
|
223 |
-
|
224 |
-
|
225 |
-
"""Process a ZIP file and extract data from text files within."""
|
226 |
-
extracted_data = []
|
227 |
try:
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
file_info = zf.getinfo(name)
|
233 |
-
with zf.open(name) as f:
|
234 |
-
content = f.read().decode('utf-8', errors='ignore')
|
235 |
-
|
236 |
-
# Use file_info for file size and date/time
|
237 |
-
extracted_data.append({
|
238 |
-
'source': 'zip',
|
239 |
-
'filename': name,
|
240 |
-
'file_size': file_info.file_size, # Get file size from ZipInfo
|
241 |
-
'mime_type': mimetypes.guess_type(name)[0],
|
242 |
-
'created': datetime(*file_info.date_time).isoformat(), # Get date from ZipInfo
|
243 |
-
'modified': datetime(*file_info.date_time).isoformat(),
|
244 |
-
'content': content,
|
245 |
-
'timestamp': datetime.now().isoformat()
|
246 |
-
})
|
247 |
-
except Exception as e:
|
248 |
-
logger.error(f"Error processing file {name} from ZIP: {e}")
|
249 |
-
except zipfile.BadZipFile:
|
250 |
-
logger.error(f"Error: {zip_file_path} is not a valid ZIP file.")
|
251 |
-
except Exception as e:
|
252 |
-
logger.error(f"Error processing ZIP file {zip_file_path}: {e}")
|
253 |
-
return extracted_data
|
254 |
-
|
255 |
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
self.data = json.loads(json_data)
|
266 |
-
return "Data loaded successfully!"
|
267 |
-
except json.JSONDecodeError:
|
268 |
-
return "Invalid JSON data. Please check your input."
|
269 |
|
270 |
-
|
271 |
-
"""Generate a response based on user input and loaded data."""
|
272 |
-
if not self.data:
|
273 |
-
return "No data loaded. Please load your JSON data first."
|
274 |
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
280 |
|
281 |
def create_interface():
|
282 |
"""Create a comprehensive Gradio interface with advanced features"""
|
@@ -292,7 +260,7 @@ def create_interface():
|
|
292 |
|
293 |
with gr.Tab("URL Processing"):
|
294 |
url_input = gr.Textbox(
|
295 |
-
label="Enter URLs (comma or newline separated)",
|
296 |
lines=5,
|
297 |
placeholder="https://example1.com\nhttps://example2.com"
|
298 |
)
|
@@ -305,31 +273,16 @@ def create_interface():
|
|
305 |
|
306 |
with gr.Tab("Text Input"):
|
307 |
text_input = gr.Textbox(
|
308 |
-
label="Raw Text Input",
|
309 |
lines=5,
|
310 |
placeholder="Paste your text here..."
|
311 |
)
|
312 |
|
313 |
-
with gr.Tab("Chat"):
|
314 |
-
json_input = gr.Textbox(
|
315 |
-
label="Load JSON Data",
|
316 |
-
placeholder="Paste your JSON data here...",
|
317 |
-
lines=5
|
318 |
-
)
|
319 |
-
load_btn = gr.Button("Load Data", variant="primary")
|
320 |
-
chat_input = gr.Textbox(
|
321 |
-
label="Chat with your data",
|
322 |
-
placeholder="Type your question here..."
|
323 |
-
)
|
324 |
-
chat_output = gr.Textbox(label="Chatbot Response", interactive=False)
|
325 |
-
|
326 |
process_btn = gr.Button("Process Input", variant="primary")
|
327 |
|
328 |
output_text = gr.Textbox(label="Processing Results", interactive=False)
|
329 |
output_file = gr.File(label="Processed Output")
|
330 |
|
331 |
-
chatbot = Chatbot()
|
332 |
-
|
333 |
def process_all_inputs(urls, file, text):
|
334 |
"""Process all input types with progress tracking"""
|
335 |
try:
|
@@ -337,6 +290,7 @@ def create_interface():
|
|
337 |
file_processor = FileProcessor()
|
338 |
results = []
|
339 |
|
|
|
340 |
if urls:
|
341 |
url_list = re.split(r'[,\n]', urls)
|
342 |
url_list = [url.strip() for url in url_list if url.strip()]
|
@@ -352,10 +306,12 @@ def create_interface():
|
|
352 |
'content': content,
|
353 |
'timestamp': datetime.now().isoformat()
|
354 |
})
|
355 |
-
|
356 |
-
if file:
|
357 |
-
results.extend(file_processor.process_files(file))
|
358 |
|
|
|
|
|
|
|
|
|
|
|
359 |
if text:
|
360 |
cleaned_text = processor.advanced_text_cleaning(text)
|
361 |
results.append({
|
@@ -364,6 +320,7 @@ def create_interface():
|
|
364 |
'timestamp': datetime.now().isoformat()
|
365 |
})
|
366 |
|
|
|
367 |
if results:
|
368 |
output_dir = Path('output') / datetime.now().strftime('%Y-%m-%d')
|
369 |
output_dir.mkdir(parents=True, exist_ok=True)
|
@@ -373,6 +330,7 @@ def create_interface():
|
|
373 |
json.dump(results, f, ensure_ascii=False, indent=2)
|
374 |
|
375 |
summary = f"Processed {len(results)} items successfully!"
|
|
|
376 |
return str(output_path), summary
|
377 |
else:
|
378 |
return None, "No valid content to process."
|
@@ -380,52 +338,23 @@ def create_interface():
|
|
380 |
except Exception as e:
|
381 |
logger.error(f"Processing error: {e}")
|
382 |
return None, f"Error: {str(e)}"
|
383 |
-
|
384 |
-
def load_chat_data(json_data):
|
385 |
-
"""Load JSON data into the chatbot."""
|
386 |
-
return chatbot.load_data(json_data)
|
387 |
-
|
388 |
-
def chat_with_data(user_input):
|
389 |
-
"""Chat with the loaded data."""
|
390 |
-
return chatbot.chat(user_input)
|
391 |
-
|
392 |
process_btn.click(
|
393 |
-
process_all_inputs,
|
394 |
-
inputs=[url_input, file_input, text_input],
|
395 |
outputs=[output_file, output_text]
|
396 |
)
|
397 |
-
|
398 |
-
load_btn.click(
|
399 |
-
load_chat_data,
|
400 |
-
inputs=json_input,
|
401 |
-
outputs=chat_output
|
402 |
-
)
|
403 |
-
|
404 |
-
chat_input.submit(
|
405 |
-
chat_with_data,
|
406 |
-
inputs=chat_input,
|
407 |
-
outputs=chat_output
|
408 |
-
)
|
409 |
|
410 |
gr.Markdown("""
|
411 |
### Usage Guidelines
|
412 |
- **URL Processing**: Enter valid HTTP/HTTPS URLs
|
413 |
- **File Input**: Upload text files or ZIP archives
|
414 |
- **Text Input**: Direct text processing
|
415 |
-
- **Chat**: Load JSON data and ask questions about it
|
416 |
- Advanced cleaning and validation included
|
417 |
""")
|
418 |
|
419 |
return interface
|
420 |
|
421 |
-
def gradio_interface_handler(input_path: str):
|
422 |
-
"""Example Gradio handler function"""
|
423 |
-
if not validate_filepath(Path(input_path)):
|
424 |
-
raise ValueError("Invalid file path provided")
|
425 |
-
|
426 |
-
processed_data = process_files(input_path)
|
427 |
-
return format_output(processed_data)
|
428 |
-
|
429 |
def main():
|
430 |
# Configure system settings
|
431 |
mimetypes.init()
|
@@ -437,10 +366,8 @@ def main():
|
|
437 |
interface.launch(
|
438 |
server_name="0.0.0.0",
|
439 |
server_port=7860,
|
|
|
440 |
share=False,
|
441 |
-
inbrowser=
|
442 |
-
debug=
|
443 |
)
|
444 |
-
|
445 |
-
if __name__ == "__main__":
|
446 |
-
main()
|
|
|
4 |
import time
|
5 |
import logging
|
6 |
import mimetypes
|
7 |
+
import concurrent.futures
|
8 |
+
import string
|
9 |
import zipfile
|
10 |
+
import tempfile
|
11 |
from datetime import datetime
|
12 |
from typing import List, Dict, Optional, Union
|
13 |
from pathlib import Path
|
14 |
+
from urllib.parse import urlparse
|
15 |
+
|
16 |
import requests
|
17 |
import validators
|
|
|
18 |
import gradio as gr
|
19 |
+
from diskcache import Cache
|
20 |
from bs4 import BeautifulSoup
|
21 |
from fake_useragent import UserAgent
|
22 |
+
from ratelimit import limits, sleep_and_retry
|
23 |
from cleantext import clean
|
24 |
|
25 |
# Setup logging with detailed configuration
|
|
|
86 |
def fetch_content(self, url: str) -> Optional[Dict]:
|
87 |
"""Universal content fetcher with special case handling"""
|
88 |
try:
|
89 |
+
# Google Drive document handling
|
90 |
if 'drive.google.com' in url:
|
91 |
return self._handle_google_drive(url)
|
92 |
+
|
93 |
+
# Google Calendar ICS handling
|
94 |
if 'calendar.google.com' in url and 'ical' in url:
|
95 |
return self._handle_google_calendar(url)
|
96 |
+
|
97 |
+
# Standard HTML processing
|
98 |
return self._fetch_html_content(url)
|
99 |
except Exception as e:
|
100 |
logger.error(f"Content fetch failed: {e}")
|
|
|
107 |
if not file_id:
|
108 |
logger.error(f"Invalid Google Drive URL: {url}")
|
109 |
return None
|
110 |
+
|
111 |
direct_url = f"https://drive.google.com/uc?export=download&id={file_id.group(1)}"
|
112 |
response = self.session.get(direct_url, timeout=self.timeout)
|
113 |
response.raise_for_status()
|
|
|
143 |
|
144 |
soup = BeautifulSoup(response.text, 'html.parser')
|
145 |
|
146 |
+
# Remove unwanted elements
|
147 |
for element in soup(['script', 'style', 'nav', 'footer', 'header', 'meta', 'link']):
|
148 |
element.decompose()
|
149 |
+
|
150 |
+
# Extract main content
|
151 |
main_content = soup.find('main') or soup.find('article') or soup.body
|
152 |
|
153 |
+
# Clean and structure content
|
154 |
+
text_content = main_content.get_text(separator='\n', strip=True)
|
155 |
+
cleaned_content = self.advanced_text_cleaning(text_content)
|
156 |
+
|
157 |
+
return {
|
158 |
+
'content': cleaned_content,
|
159 |
+
'content_type': response.headers.get('Content-Type', ''),
|
160 |
+
'timestamp': datetime.now().isoformat()
|
161 |
+
}
|
|
|
|
|
|
|
|
|
162 |
except Exception as e:
|
163 |
logger.error(f"HTML processing failed: {e}")
|
164 |
return None
|
165 |
|
166 |
class FileProcessor:
|
167 |
"""Class to handle file processing"""
|
168 |
+
|
169 |
+
def __init__(self, max_file_size: int = 10 * 1024 * 1024): # 10MB default
|
170 |
self.max_file_size = max_file_size
|
171 |
self.supported_text_extensions = {'.txt', '.md', '.csv', '.json', '.xml'}
|
|
|
|
|
172 |
|
173 |
+
def is_text_file(self, filepath: str) -> bool:
|
174 |
+
"""Check if file is a text file"""
|
|
|
|
|
|
|
|
|
|
|
175 |
try:
|
176 |
+
mime_type, _ = mimetypes.guess_type(filepath)
|
177 |
+
return (mime_type and mime_type.startswith('text/')) or \
|
178 |
+
(os.path.splitext(filepath)[1].lower() in self.supported_text_extensions)
|
179 |
+
except Exception:
|
180 |
return False
|
181 |
+
|
182 |
+
def process_file(self, file) -> List[Dict]:
|
183 |
+
"""Process uploaded file with enhanced error handling"""
|
184 |
+
if not file:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
185 |
return []
|
186 |
+
|
187 |
+
dataset = []
|
|
|
|
|
188 |
try:
|
189 |
+
file_size = os.path.getsize(file.name)
|
190 |
+
if file_size > self.max_file_size:
|
191 |
+
logger.warning(f"File size ({file_size} bytes) exceeds maximum allowed size")
|
192 |
+
return []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
193 |
|
194 |
+
with tempfile.TemporaryDirectory() as temp_dir:
|
195 |
+
if zipfile.is_zipfile(file.name):
|
196 |
+
dataset.extend(self._process_zip_file(file.name, temp_dir))
|
197 |
+
else:
|
198 |
+
dataset.extend(self._process_single_file(file))
|
199 |
|
200 |
+
except Exception as e:
|
201 |
+
logger.error(f"Error processing file: {str(e)}")
|
202 |
+
return []
|
|
|
|
|
|
|
|
|
203 |
|
204 |
+
return dataset
|
|
|
|
|
|
|
205 |
|
206 |
+
def _process_zip_file(self, zip_path: str, temp_dir: str) -> List[Dict]:
|
207 |
+
"""Process ZIP file contents"""
|
208 |
+
results = []
|
209 |
+
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
|
210 |
+
zip_ref.extractall(temp_dir)
|
211 |
+
for root, _, files in os.walk(temp_dir):
|
212 |
+
for filename in files:
|
213 |
+
filepath = os.path.join(root, filename)
|
214 |
+
if self.is_text_file(filepath):
|
215 |
+
try:
|
216 |
+
with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
|
217 |
+
content = f.read()
|
218 |
+
if content.strip():
|
219 |
+
results.append({
|
220 |
+
"source": "file",
|
221 |
+
"filename": filename,
|
222 |
+
"content": content,
|
223 |
+
"timestamp": datetime.now().isoformat()
|
224 |
+
})
|
225 |
+
except Exception as e:
|
226 |
+
logger.error(f"Error reading file {filename}: {str(e)}")
|
227 |
+
return results
|
228 |
+
|
229 |
+
def _process_single_file(self, file) -> List[Dict]:
|
230 |
+
try:
|
231 |
+
file_stat = os.stat(file.name)
|
232 |
+
with open(file.name, 'r', encoding='utf-8', errors='ignore') as f:
|
233 |
+
content = f.read()
|
234 |
+
|
235 |
+
return [{
|
236 |
+
'source': 'file',
|
237 |
+
'filename': os.path.basename(file.name),
|
238 |
+
'file_size': file_stat.st_size,
|
239 |
+
'mime_type': mimetypes.guess_type(file.name)[0],
|
240 |
+
'created': datetime.fromtimestamp(file_stat.st_ctime).isoformat(),
|
241 |
+
'modified': datetime.fromtimestamp(file_stat.st_mtime).isoformat(),
|
242 |
+
'content': content,
|
243 |
+
'timestamp': datetime.now().isoformat()
|
244 |
+
}]
|
245 |
+
except Exception as e:
|
246 |
+
logger.error(f"File processing error: {e}")
|
247 |
+
return []
|
248 |
|
249 |
def create_interface():
|
250 |
"""Create a comprehensive Gradio interface with advanced features"""
|
|
|
260 |
|
261 |
with gr.Tab("URL Processing"):
|
262 |
url_input = gr.Textbox(
|
263 |
+
label="Enter URLs (comma or newline separated)",
|
264 |
lines=5,
|
265 |
placeholder="https://example1.com\nhttps://example2.com"
|
266 |
)
|
|
|
273 |
|
274 |
with gr.Tab("Text Input"):
|
275 |
text_input = gr.Textbox(
|
276 |
+
label="Raw Text Input",
|
277 |
lines=5,
|
278 |
placeholder="Paste your text here..."
|
279 |
)
|
280 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
281 |
process_btn = gr.Button("Process Input", variant="primary")
|
282 |
|
283 |
output_text = gr.Textbox(label="Processing Results", interactive=False)
|
284 |
output_file = gr.File(label="Processed Output")
|
285 |
|
|
|
|
|
286 |
def process_all_inputs(urls, file, text):
|
287 |
"""Process all input types with progress tracking"""
|
288 |
try:
|
|
|
290 |
file_processor = FileProcessor()
|
291 |
results = []
|
292 |
|
293 |
+
# Process URLs
|
294 |
if urls:
|
295 |
url_list = re.split(r'[,\n]', urls)
|
296 |
url_list = [url.strip() for url in url_list if url.strip()]
|
|
|
306 |
'content': content,
|
307 |
'timestamp': datetime.now().isoformat()
|
308 |
})
|
|
|
|
|
|
|
309 |
|
310 |
+
# Process files
|
311 |
+
if file:
|
312 |
+
results.extend(file_processor.process_file(file))
|
313 |
+
|
314 |
+
# Process text input
|
315 |
if text:
|
316 |
cleaned_text = processor.advanced_text_cleaning(text)
|
317 |
results.append({
|
|
|
320 |
'timestamp': datetime.now().isoformat()
|
321 |
})
|
322 |
|
323 |
+
# Generate output
|
324 |
if results:
|
325 |
output_dir = Path('output') / datetime.now().strftime('%Y-%m-%d')
|
326 |
output_dir.mkdir(parents=True, exist_ok=True)
|
|
|
330 |
json.dump(results, f, ensure_ascii=False, indent=2)
|
331 |
|
332 |
summary = f"Processed {len(results)} items successfully!"
|
333 |
+
# Convert Path object to string here
|
334 |
return str(output_path), summary
|
335 |
else:
|
336 |
return None, "No valid content to process."
|
|
|
338 |
except Exception as e:
|
339 |
logger.error(f"Processing error: {e}")
|
340 |
return None, f"Error: {str(e)}"
|
341 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
342 |
process_btn.click(
|
343 |
+
process_all_inputs,
|
344 |
+
inputs=[url_input, file_input, text_input],
|
345 |
outputs=[output_file, output_text]
|
346 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
347 |
|
348 |
gr.Markdown("""
|
349 |
### Usage Guidelines
|
350 |
- **URL Processing**: Enter valid HTTP/HTTPS URLs
|
351 |
- **File Input**: Upload text files or ZIP archives
|
352 |
- **Text Input**: Direct text processing
|
|
|
353 |
- Advanced cleaning and validation included
|
354 |
""")
|
355 |
|
356 |
return interface
|
357 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
358 |
def main():
|
359 |
# Configure system settings
|
360 |
mimetypes.init()
|
|
|
366 |
interface.launch(
|
367 |
server_name="0.0.0.0",
|
368 |
server_port=7860,
|
369 |
+
show_error=True,
|
370 |
share=False,
|
371 |
+
inbrowser=True,
|
372 |
+
debug=True
|
373 |
)
|
|
|
|
|
|