Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -73,7 +73,6 @@ class URLProcessor:
|
|
| 73 |
try:
|
| 74 |
if not validators.url(url):
|
| 75 |
return {'is_valid': False, 'message': 'Invalid URL format'}
|
| 76 |
-
|
| 77 |
response = self.session.head(url, timeout=self.timeout)
|
| 78 |
response.raise_for_status()
|
| 79 |
return {'is_valid': True, 'message': 'URL is valid and accessible'}
|
|
@@ -86,11 +85,9 @@ class URLProcessor:
|
|
| 86 |
# Google Drive document handling
|
| 87 |
if 'drive.google.com' in url:
|
| 88 |
return self._handle_google_drive(url)
|
| 89 |
-
|
| 90 |
# Google Calendar ICS handling
|
| 91 |
if 'calendar.google.com' in url and 'ical' in url:
|
| 92 |
return self._handle_google_calendar(url)
|
| 93 |
-
|
| 94 |
# Standard HTML processing
|
| 95 |
return self._fetch_html_content(url)
|
| 96 |
except Exception as e:
|
|
@@ -104,11 +101,9 @@ class URLProcessor:
|
|
| 104 |
if not file_id:
|
| 105 |
logger.error(f"Invalid Google Drive URL: {url}")
|
| 106 |
return None
|
| 107 |
-
|
| 108 |
direct_url = f"https://drive.google.com/uc?export=download&id={file_id.group(1)}"
|
| 109 |
response = self.session.get(direct_url, timeout=self.timeout)
|
| 110 |
response.raise_for_status()
|
| 111 |
-
|
| 112 |
return {
|
| 113 |
'content': response.text,
|
| 114 |
'content_type': response.headers.get('Content-Type', ''),
|
|
@@ -137,20 +132,15 @@ class URLProcessor:
|
|
| 137 |
try:
|
| 138 |
response = self.session.get(url, timeout=self.timeout)
|
| 139 |
response.raise_for_status()
|
| 140 |
-
|
| 141 |
soup = BeautifulSoup(response.text, 'html.parser')
|
| 142 |
-
|
| 143 |
# Remove unwanted elements
|
| 144 |
for element in soup(['script', 'style', 'nav', 'footer', 'header', 'meta', 'link']):
|
| 145 |
element.decompose()
|
| 146 |
-
|
| 147 |
# Extract main content
|
| 148 |
main_content = soup.find('main') or soup.find('article') or soup.body
|
| 149 |
-
|
| 150 |
# Clean and structure content
|
| 151 |
text_content = main_content.get_text(separator='\n', strip=True)
|
| 152 |
cleaned_content = self.advanced_text_cleaning(text_content)
|
| 153 |
-
|
| 154 |
return {
|
| 155 |
'content': cleaned_content,
|
| 156 |
'content_type': response.headers.get('Content-Type', ''),
|
|
@@ -217,17 +207,14 @@ class FileProcessor:
|
|
| 217 |
def _process_single_file(self, file) -> List[Dict]:
|
| 218 |
try:
|
| 219 |
file_stat = os.stat(file.name)
|
| 220 |
-
|
| 221 |
# For very large files, read in chunks and summarize
|
| 222 |
if file_stat.st_size > 100 * 1024 * 1024: # 100MB
|
| 223 |
logger.info(f"Processing large file: {file.name} ({file_stat.st_size} bytes)")
|
| 224 |
-
|
| 225 |
# Read first and last 1MB for extremely large files
|
| 226 |
content = ""
|
| 227 |
with open(file.name, 'r', encoding='utf-8', errors='ignore') as f:
|
| 228 |
content = f.read(1 * 1024 * 1024) # First 1MB
|
| 229 |
content += "\n...[Content truncated due to large file size]...\n"
|
| 230 |
-
|
| 231 |
# Seek to the last 1MB
|
| 232 |
f.seek(max(0, file_stat.st_size - 1 * 1024 * 1024))
|
| 233 |
content += f.read() # Last 1MB
|
|
@@ -235,7 +222,6 @@ class FileProcessor:
|
|
| 235 |
# Regular file processing
|
| 236 |
with open(file.name, 'r', encoding='utf-8', errors='ignore') as f:
|
| 237 |
content = f.read()
|
| 238 |
-
|
| 239 |
return [{
|
| 240 |
'source': 'file',
|
| 241 |
'filename': os.path.basename(file.name),
|
|
@@ -250,6 +236,7 @@ class FileProcessor:
|
|
| 250 |
logger.error(f"File processing error: {e}")
|
| 251 |
return []
|
| 252 |
|
|
|
|
| 253 |
def generate_qr(json_data):
|
| 254 |
"""Generate QR code from JSON data and return the file path."""
|
| 255 |
try:
|
|
@@ -261,7 +248,7 @@ def generate_qr(json_data):
|
|
| 261 |
)
|
| 262 |
qr.add_data(json_data)
|
| 263 |
qr.make(fit=True)
|
| 264 |
-
|
| 265 |
img = qr.make_image(fill_color="black", back_color="white")
|
| 266 |
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
|
| 267 |
img.save(temp_file.name)
|
|
@@ -269,7 +256,6 @@ def generate_qr(json_data):
|
|
| 269 |
except Exception as e:
|
| 270 |
# If the data is too large for a QR code
|
| 271 |
logger.error(f"QR generation error: {e}")
|
| 272 |
-
|
| 273 |
# Create a simple QR with error message
|
| 274 |
qr = qrcode.QRCode(
|
| 275 |
version=1,
|
|
@@ -279,54 +265,211 @@ def generate_qr(json_data):
|
|
| 279 |
)
|
| 280 |
qr.add_data("Error: Data too large for QR code")
|
| 281 |
qr.make(fit=True)
|
| 282 |
-
|
| 283 |
img = qr.make_image(fill_color="black", back_color="white")
|
| 284 |
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
|
| 285 |
img.save(temp_file.name)
|
| 286 |
return temp_file.name
|
| 287 |
|
| 288 |
-
def create_interface():
|
| 289 |
-
"""Create a comprehensive Gradio interface with advanced features"""
|
| 290 |
|
|
|
|
|
|
|
| 291 |
css = """
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 295 |
"""
|
| 296 |
-
|
| 297 |
with gr.Blocks(css=css, title="Advanced Text & URL Processor") as interface:
|
| 298 |
gr.Markdown("# 🌐 Advanced URL & Text Processing Toolkit")
|
| 299 |
|
| 300 |
-
with gr.Tab("URL Processing"):
|
| 301 |
url_input = gr.Textbox(
|
| 302 |
label="Enter URLs (comma or newline separated)",
|
| 303 |
lines=5,
|
| 304 |
-
placeholder="https://example1.com\nhttps://example2.com"
|
|
|
|
|
|
|
| 305 |
)
|
| 306 |
|
| 307 |
-
with gr.Tab("File Input"):
|
| 308 |
file_input = gr.File(
|
| 309 |
label="Upload text file or ZIP archive",
|
| 310 |
file_types=[".txt", ".zip", ".md", ".csv", ".json", ".xml"]
|
| 311 |
)
|
| 312 |
|
| 313 |
-
with gr.Tab("Text Input"):
|
| 314 |
text_input = gr.Textbox(
|
| 315 |
label="Raw Text Input",
|
| 316 |
lines=5,
|
| 317 |
-
placeholder="Paste your text here..."
|
|
|
|
| 318 |
)
|
| 319 |
|
| 320 |
-
with gr.Tab("JSON Editor"):
|
| 321 |
json_editor = gr.Textbox(
|
| 322 |
label="JSON Editor",
|
| 323 |
lines=20,
|
| 324 |
placeholder="View and edit your JSON data here...",
|
| 325 |
interactive=True,
|
| 326 |
-
elem_id="json-editor"
|
| 327 |
)
|
| 328 |
|
| 329 |
-
with gr.Tab("Scratchpad"):
|
| 330 |
scratchpad = gr.Textbox(
|
| 331 |
label="Scratchpad",
|
| 332 |
lines=10,
|
|
@@ -337,7 +480,7 @@ def create_interface():
|
|
| 337 |
process_btn = gr.Button("Process Input", variant="primary")
|
| 338 |
qr_btn = gr.Button("Generate QR Code", variant="secondary")
|
| 339 |
|
| 340 |
-
output_text = gr.Textbox(label="Processing Results", interactive=False)
|
| 341 |
output_file = gr.File(label="Processed Output")
|
| 342 |
qr_output = gr.Image(label="QR Code", type="filepath") # To display the generated QR code
|
| 343 |
|
|
@@ -347,12 +490,10 @@ def create_interface():
|
|
| 347 |
processor = URLProcessor()
|
| 348 |
file_processor = FileProcessor()
|
| 349 |
results = []
|
| 350 |
-
|
| 351 |
# Process URLs
|
| 352 |
if urls:
|
| 353 |
url_list = re.split(r'[,\n]', urls)
|
| 354 |
url_list = [url.strip() for url in url_list if url.strip()]
|
| 355 |
-
|
| 356 |
for url in url_list:
|
| 357 |
validation = processor.validate_url(url)
|
| 358 |
if validation.get('is_valid'):
|
|
@@ -364,11 +505,9 @@ def create_interface():
|
|
| 364 |
'content': content,
|
| 365 |
'timestamp': datetime.now().isoformat()
|
| 366 |
})
|
| 367 |
-
|
| 368 |
# Process files
|
| 369 |
if file:
|
| 370 |
results.extend(file_processor.process_file(file))
|
| 371 |
-
|
| 372 |
# Process text input
|
| 373 |
if text:
|
| 374 |
cleaned_text = processor.advanced_text_cleaning(text)
|
|
@@ -377,189 +516,54 @@ def create_interface():
|
|
| 377 |
'content': cleaned_text,
|
| 378 |
'timestamp': datetime.now().isoformat()
|
| 379 |
})
|
| 380 |
-
|
| 381 |
# Generate output
|
| 382 |
if results:
|
| 383 |
output_dir = Path('output') / datetime.now().strftime('%Y-%m-%d')
|
| 384 |
output_dir.mkdir(parents=True, exist_ok=True)
|
| 385 |
output_path = output_dir / f'processed_{int(time.time())}.json'
|
| 386 |
-
|
| 387 |
with open(output_path, 'w', encoding='utf-8') as f:
|
| 388 |
json.dump(results, f, ensure_ascii=False, indent=2)
|
| 389 |
-
|
| 390 |
summary = f"Processed {len(results)} items successfully!"
|
| 391 |
json_data = json.dumps(results, indent=2) # Prepare JSON for QR code
|
| 392 |
return str(output_path), summary, json_data # Return JSON for editor
|
| 393 |
else:
|
| 394 |
return None, "No valid content to process.", ""
|
| 395 |
-
|
| 396 |
except Exception as e:
|
| 397 |
logger.error(f"Processing error: {e}")
|
| 398 |
return None, f"Error: {str(e)}", ""
|
| 399 |
|
| 400 |
-
def
|
| 401 |
-
"""
|
| 402 |
-
|
| 403 |
-
|
| 404 |
-
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
|
| 410 |
-
|
| 411 |
-
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
|
| 417 |
-
|
| 418 |
-
|
| 419 |
-
|
| 420 |
-
|
| 421 |
-
|
| 422 |
-
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
|
| 426 |
-
border: 1px solid #00ff00;
|
| 427 |
-
border-radius: 5px;
|
| 428 |
-
margin-bottom: 10px;
|
| 429 |
-
padding: 10px;
|
| 430 |
-
}
|
| 431 |
-
.tab:hover {
|
| 432 |
-
background-color: rgba(0, 255, 0, 0.2);
|
| 433 |
-
}
|
| 434 |
-
.warning {
|
| 435 |
-
background-color: rgba(255, 255, 0, 0.2);
|
| 436 |
-
color: #856404;
|
| 437 |
-
}
|
| 438 |
-
.error {
|
| 439 |
-
background-color: rgba(255, 0, 0, 0.3);
|
| 440 |
-
color: #721c24;
|
| 441 |
-
}
|
| 442 |
-
input[type="text"], input[type="file"] {
|
| 443 |
-
border: 2px solid #00ff00;
|
| 444 |
-
border-radius: 5px;
|
| 445 |
-
padding: 10px;
|
| 446 |
-
background-color: #1d1d1d;
|
| 447 |
-
color: #ffffff;
|
| 448 |
-
}
|
| 449 |
-
input[type="text"]:focus, input[type="file"]:focus {
|
| 450 |
-
border-color: #ff7f50;
|
| 451 |
-
outline: none;
|
| 452 |
-
}
|
| 453 |
-
.btn-primary {
|
| 454 |
-
background-color: #00ff00;
|
| 455 |
-
border: none;
|
| 456 |
-
border-radius: 5px;
|
| 457 |
-
padding: 10px 20px;
|
| 458 |
-
cursor: pointer;
|
| 459 |
-
transition: background-color 0.3s;
|
| 460 |
-
}
|
| 461 |
-
.btn-secondary {
|
| 462 |
-
background-color: #ff7f50;
|
| 463 |
-
border: none;
|
| 464 |
-
border-radius: 5px;
|
| 465 |
-
padding: 10px 20px;
|
| 466 |
-
cursor: pointer;
|
| 467 |
-
transition: background-color 0.3s;
|
| 468 |
-
}
|
| 469 |
-
.btn-primary:hover, .btn-secondary:hover {
|
| 470 |
-
background-color: rgba(255, 165, 0, 0.8);
|
| 471 |
-
}
|
| 472 |
-
textarea {
|
| 473 |
-
border: 2px solid #00ff00;
|
| 474 |
-
border-radius: 5px;
|
| 475 |
-
padding: 10px;
|
| 476 |
-
background-color: #1d1d1d;
|
| 477 |
-
color: #ffffff;
|
| 478 |
-
}
|
| 479 |
-
textarea:focus {
|
| 480 |
-
border-color: #ff7f50;
|
| 481 |
-
outline: none;
|
| 482 |
-
}
|
| 483 |
-
.output-box {
|
| 484 |
-
background-color: #1d1d1d;
|
| 485 |
-
border: 1px solid #00ff00;
|
| 486 |
-
border-radius: 5px;
|
| 487 |
-
padding: 10px;
|
| 488 |
-
overflow-x: auto;
|
| 489 |
-
}
|
| 490 |
-
"""
|
| 491 |
-
|
| 492 |
-
with gr.Blocks(css=css, title="Advanced Text & URL Processor") as interface:
|
| 493 |
-
gr.Markdown("# 🌐 Advanced URL & Text Processing Toolkit")
|
| 494 |
-
|
| 495 |
-
with gr.Tab("URL Processing"):
|
| 496 |
-
url_input = gr.Textbox(
|
| 497 |
-
label="Enter URLs (comma or newline separated)",
|
| 498 |
-
lines=5,
|
| 499 |
-
placeholder="https://example1.com\nhttps://example2.com",
|
| 500 |
-
interactive=True,
|
| 501 |
-
elem_id="url-input"
|
| 502 |
-
)
|
| 503 |
-
|
| 504 |
-
with gr.Tab("File Input"):
|
| 505 |
-
file_input = gr.File(
|
| 506 |
-
label="Upload text file or ZIP archive",
|
| 507 |
-
file_types=[".txt", ".zip", ".md", ".csv", ".json", ".xml"]
|
| 508 |
-
)
|
| 509 |
-
|
| 510 |
-
with gr.Tab("Text Input"):
|
| 511 |
-
text_input = gr.Textbox(
|
| 512 |
-
label="Raw Text Input",
|
| 513 |
-
lines=5,
|
| 514 |
-
placeholder="Paste your text here...",
|
| 515 |
-
interactive=True
|
| 516 |
-
)
|
| 517 |
-
|
| 518 |
-
with gr.Tab("JSON Editor"):
|
| 519 |
-
json_editor = gr.Textbox(
|
| 520 |
-
label="JSON Editor",
|
| 521 |
-
lines=20,
|
| 522 |
-
placeholder="View and edit your JSON data here...",
|
| 523 |
-
interactive=True,
|
| 524 |
-
elem_id="json-editor"
|
| 525 |
-
)
|
| 526 |
-
|
| 527 |
-
with gr.Tab("Scratchpad"):
|
| 528 |
-
scratchpad = gr.Textbox(
|
| 529 |
-
label="Scratchpad",
|
| 530 |
-
lines=10,
|
| 531 |
-
placeholder="Quick notes or text collections...",
|
| 532 |
-
interactive=True
|
| 533 |
-
)
|
| 534 |
-
|
| 535 |
-
process_btn = gr.Button("Process Input", variant="primary")
|
| 536 |
-
qr_btn = gr.Button("Generate QR Code", variant="secondary")
|
| 537 |
-
|
| 538 |
-
output_text = gr.Textbox(label="Processing Results", interactive=False, elem_id="output-text")
|
| 539 |
-
output_file = gr.File(label="Processed Output")
|
| 540 |
-
qr_output = gr.Image(label="QR Code", type="filepath") # To display the generated QR code
|
| 541 |
-
|
| 542 |
-
# The rest of the internal logic remains unchanged
|
| 543 |
-
|
| 544 |
-
gr.Markdown("""
|
| 545 |
-
### Usage Guidelines
|
| 546 |
-
- **URL Processing**: Enter valid HTTP/HTTPS URLs
|
| 547 |
-
- **File Input**: Upload text files or ZIP archives
|
| 548 |
-
- **Text Input**: Direct text processing
|
| 549 |
-
- **JSON Editor**: View and edit your JSON data
|
| 550 |
-
- **Scratchpad**: Quick notes or text collections
|
| 551 |
-
- Advanced cleaning and validation included
|
| 552 |
-
""")
|
| 553 |
-
|
| 554 |
-
return interface
|
| 555 |
|
| 556 |
def main():
|
| 557 |
# Configure system settings
|
| 558 |
mimetypes.init()
|
| 559 |
-
|
| 560 |
# Create and launch interface
|
| 561 |
interface = create_interface()
|
| 562 |
-
|
| 563 |
# Launch with proper configuration
|
| 564 |
interface.launch(
|
| 565 |
server_name="0.0.0.0",
|
|
@@ -570,5 +574,6 @@ def main():
|
|
| 570 |
debug=True
|
| 571 |
)
|
| 572 |
|
|
|
|
| 573 |
if __name__ == "__main__":
|
| 574 |
main()
|
|
|
|
| 73 |
try:
|
| 74 |
if not validators.url(url):
|
| 75 |
return {'is_valid': False, 'message': 'Invalid URL format'}
|
|
|
|
| 76 |
response = self.session.head(url, timeout=self.timeout)
|
| 77 |
response.raise_for_status()
|
| 78 |
return {'is_valid': True, 'message': 'URL is valid and accessible'}
|
|
|
|
| 85 |
# Google Drive document handling
|
| 86 |
if 'drive.google.com' in url:
|
| 87 |
return self._handle_google_drive(url)
|
|
|
|
| 88 |
# Google Calendar ICS handling
|
| 89 |
if 'calendar.google.com' in url and 'ical' in url:
|
| 90 |
return self._handle_google_calendar(url)
|
|
|
|
| 91 |
# Standard HTML processing
|
| 92 |
return self._fetch_html_content(url)
|
| 93 |
except Exception as e:
|
|
|
|
| 101 |
if not file_id:
|
| 102 |
logger.error(f"Invalid Google Drive URL: {url}")
|
| 103 |
return None
|
|
|
|
| 104 |
direct_url = f"https://drive.google.com/uc?export=download&id={file_id.group(1)}"
|
| 105 |
response = self.session.get(direct_url, timeout=self.timeout)
|
| 106 |
response.raise_for_status()
|
|
|
|
| 107 |
return {
|
| 108 |
'content': response.text,
|
| 109 |
'content_type': response.headers.get('Content-Type', ''),
|
|
|
|
| 132 |
try:
|
| 133 |
response = self.session.get(url, timeout=self.timeout)
|
| 134 |
response.raise_for_status()
|
|
|
|
| 135 |
soup = BeautifulSoup(response.text, 'html.parser')
|
|
|
|
| 136 |
# Remove unwanted elements
|
| 137 |
for element in soup(['script', 'style', 'nav', 'footer', 'header', 'meta', 'link']):
|
| 138 |
element.decompose()
|
|
|
|
| 139 |
# Extract main content
|
| 140 |
main_content = soup.find('main') or soup.find('article') or soup.body
|
|
|
|
| 141 |
# Clean and structure content
|
| 142 |
text_content = main_content.get_text(separator='\n', strip=True)
|
| 143 |
cleaned_content = self.advanced_text_cleaning(text_content)
|
|
|
|
| 144 |
return {
|
| 145 |
'content': cleaned_content,
|
| 146 |
'content_type': response.headers.get('Content-Type', ''),
|
|
|
|
| 207 |
def _process_single_file(self, file) -> List[Dict]:
|
| 208 |
try:
|
| 209 |
file_stat = os.stat(file.name)
|
|
|
|
| 210 |
# For very large files, read in chunks and summarize
|
| 211 |
if file_stat.st_size > 100 * 1024 * 1024: # 100MB
|
| 212 |
logger.info(f"Processing large file: {file.name} ({file_stat.st_size} bytes)")
|
|
|
|
| 213 |
# Read first and last 1MB for extremely large files
|
| 214 |
content = ""
|
| 215 |
with open(file.name, 'r', encoding='utf-8', errors='ignore') as f:
|
| 216 |
content = f.read(1 * 1024 * 1024) # First 1MB
|
| 217 |
content += "\n...[Content truncated due to large file size]...\n"
|
|
|
|
| 218 |
# Seek to the last 1MB
|
| 219 |
f.seek(max(0, file_stat.st_size - 1 * 1024 * 1024))
|
| 220 |
content += f.read() # Last 1MB
|
|
|
|
| 222 |
# Regular file processing
|
| 223 |
with open(file.name, 'r', encoding='utf-8', errors='ignore') as f:
|
| 224 |
content = f.read()
|
|
|
|
| 225 |
return [{
|
| 226 |
'source': 'file',
|
| 227 |
'filename': os.path.basename(file.name),
|
|
|
|
| 236 |
logger.error(f"File processing error: {e}")
|
| 237 |
return []
|
| 238 |
|
| 239 |
+
|
| 240 |
def generate_qr(json_data):
|
| 241 |
"""Generate QR code from JSON data and return the file path."""
|
| 242 |
try:
|
|
|
|
| 248 |
)
|
| 249 |
qr.add_data(json_data)
|
| 250 |
qr.make(fit=True)
|
| 251 |
+
|
| 252 |
img = qr.make_image(fill_color="black", back_color="white")
|
| 253 |
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
|
| 254 |
img.save(temp_file.name)
|
|
|
|
| 256 |
except Exception as e:
|
| 257 |
# If the data is too large for a QR code
|
| 258 |
logger.error(f"QR generation error: {e}")
|
|
|
|
| 259 |
# Create a simple QR with error message
|
| 260 |
qr = qrcode.QRCode(
|
| 261 |
version=1,
|
|
|
|
| 265 |
)
|
| 266 |
qr.add_data("Error: Data too large for QR code")
|
| 267 |
qr.make(fit=True)
|
| 268 |
+
|
| 269 |
img = qr.make_image(fill_color="black", back_color="white")
|
| 270 |
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
|
| 271 |
img.save(temp_file.name)
|
| 272 |
return temp_file.name
|
| 273 |
|
|
|
|
|
|
|
| 274 |
|
| 275 |
+
def create_interface():
|
| 276 |
+
"""Create a comprehensive Gradio interface with advanced features and styling"""
|
| 277 |
css = """
|
| 278 |
+
body {
|
| 279 |
+
font-family: 'Inter', sans-serif;
|
| 280 |
+
background: linear-gradient(to bottom, #08041C, #030712); /* Dark cosmic background */
|
| 281 |
+
color: #ffffff;
|
| 282 |
+
}
|
| 283 |
+
.container {
|
| 284 |
+
max-width: 1200px;
|
| 285 |
+
margin: auto;
|
| 286 |
+
background-color: rgba(255, 255, 255, 0.06);
|
| 287 |
+
backdrop-filter: blur(12px);
|
| 288 |
+
border: 1px solid rgba(255, 255, 255, 0.1);
|
| 289 |
+
border-radius: 1rem;
|
| 290 |
+
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.25);
|
| 291 |
+
padding: 2rem;
|
| 292 |
+
}
|
| 293 |
+
h1 {
|
| 294 |
+
color: #00FF00;
|
| 295 |
+
text-align: center;
|
| 296 |
+
text-shadow: 0 0 10px rgba(0, 255, 0, 0.8);
|
| 297 |
+
}
|
| 298 |
+
h2, h3, h4 {
|
| 299 |
+
color: #FF9900;
|
| 300 |
+
text-shadow: 0 0 10px rgba(255, 153, 0, 0.8);
|
| 301 |
+
}
|
| 302 |
+
.tab {
|
| 303 |
+
background-color: rgba(255, 255, 255, 0.06);
|
| 304 |
+
backdrop-filter: blur(12px);
|
| 305 |
+
border: 1px solid rgba(255, 255, 255, 0.1);
|
| 306 |
+
border-radius: 0.75rem;
|
| 307 |
+
margin-bottom: 1rem;
|
| 308 |
+
padding: 1.5rem;
|
| 309 |
+
}
|
| 310 |
+
.tab:hover {
|
| 311 |
+
background-color: rgba(255, 255, 255, 0.1);
|
| 312 |
+
}
|
| 313 |
+
.warning {
|
| 314 |
+
background-color: #fff3cd;
|
| 315 |
+
color: #856404;
|
| 316 |
+
border-radius: 0.5rem;
|
| 317 |
+
padding: 1rem;
|
| 318 |
+
margin-bottom: 1rem;
|
| 319 |
+
}
|
| 320 |
+
.error {
|
| 321 |
+
background-color: #f8d7da;
|
| 322 |
+
color: #721c24;
|
| 323 |
+
border-radius: 0.5rem;
|
| 324 |
+
padding: 1rem;
|
| 325 |
+
margin-bottom: 1rem;
|
| 326 |
+
}
|
| 327 |
+
input[type="text"], input[type="file"] {
|
| 328 |
+
width: 100%;
|
| 329 |
+
padding: 0.75rem;
|
| 330 |
+
border-radius: 0.5rem;
|
| 331 |
+
background-color: rgba(0, 0, 0, 0.2);
|
| 332 |
+
color: #ffffff;
|
| 333 |
+
border: 1px solid #4a5568;
|
| 334 |
+
font-size: 1rem;
|
| 335 |
+
transition: border-color 0.3s ease;
|
| 336 |
+
}
|
| 337 |
+
input[type="text"]:focus, input[type="file"]:focus {
|
| 338 |
+
outline: none;
|
| 339 |
+
border-color: #00FF00;
|
| 340 |
+
box-shadow: 0 0 5px rgba(0, 255, 0, 0.7);
|
| 341 |
+
}
|
| 342 |
+
.btn-primary {
|
| 343 |
+
padding: 0.75rem 1.5rem;
|
| 344 |
+
border-radius: 1.5rem;
|
| 345 |
+
font-weight: 600;
|
| 346 |
+
cursor: pointer;
|
| 347 |
+
transition: transform 0.2s ease, box-shadow 0.2s ease, background-image 0.3s;
|
| 348 |
+
background-image: linear-gradient(to right, #00FF00, #00A300);
|
| 349 |
+
color: #000000;
|
| 350 |
+
border: none;
|
| 351 |
+
box-shadow: 0 0 8px rgba(0, 255, 0, 0.5);
|
| 352 |
+
}
|
| 353 |
+
.btn-primary:hover {
|
| 354 |
+
transform: scale(1.05);
|
| 355 |
+
box-shadow: 0 0 12px rgba(0, 255, 0, 0.7);
|
| 356 |
+
background-image: linear-gradient(to right, #00A300, #007D00);
|
| 357 |
+
}
|
| 358 |
+
.btn-secondary {
|
| 359 |
+
padding: 0.75rem 1.5rem;
|
| 360 |
+
border-radius: 1.5rem;
|
| 361 |
+
font-weight: 600;
|
| 362 |
+
cursor: pointer;
|
| 363 |
+
transition: transform 0.2s ease, box-shadow 0.2s ease, background-image 0.3s;
|
| 364 |
+
background-image: linear-gradient(to right, #FF9900, #FF6600);
|
| 365 |
+
color: #000000;
|
| 366 |
+
border: none;
|
| 367 |
+
box-shadow: 0 0 8px rgba(255, 153, 0, 0.5);
|
| 368 |
+
}
|
| 369 |
+
.btn-secondary:hover {
|
| 370 |
+
transform: scale(1.05);
|
| 371 |
+
box-shadow: 0 0 12px rgba(255, 153, 0, 0.7);
|
| 372 |
+
background-image: linear-gradient(to right, #FF6600, #CC4700);
|
| 373 |
+
}
|
| 374 |
+
textarea {
|
| 375 |
+
width: 100%;
|
| 376 |
+
padding: 0.75rem;
|
| 377 |
+
border-radius: 0.5rem;
|
| 378 |
+
background-color: rgba(0, 0, 0, 0.2);
|
| 379 |
+
color: #ffffff;
|
| 380 |
+
border: 1px solid #4a5568;
|
| 381 |
+
font-size: 1rem;
|
| 382 |
+
transition: border-color 0.3s ease;
|
| 383 |
+
min-height: 8rem;
|
| 384 |
+
}
|
| 385 |
+
textarea:focus {
|
| 386 |
+
outline: none;
|
| 387 |
+
border-color: #00FF00;
|
| 388 |
+
box-shadow: 0 0 5px rgba(0, 255, 0, 0.7);
|
| 389 |
+
}
|
| 390 |
+
.output-box {
|
| 391 |
+
background-color: rgba(0, 0, 0, 0.2);
|
| 392 |
+
border: 1px solid #4a5568;
|
| 393 |
+
border-radius: 0.5rem;
|
| 394 |
+
padding: 1rem;
|
| 395 |
+
overflow-x: auto;
|
| 396 |
+
color: #ffffff;
|
| 397 |
+
font-size: 1rem;
|
| 398 |
+
white-space: pre-wrap;
|
| 399 |
+
}
|
| 400 |
+
#json-editor {
|
| 401 |
+
background-color: rgba(0, 0, 0, 0.2);
|
| 402 |
+
color: #ffffff;
|
| 403 |
+
border: 1px solid #4a5568;
|
| 404 |
+
border-radius: 0.5rem;
|
| 405 |
+
padding: 1rem;
|
| 406 |
+
font-size: 1rem;
|
| 407 |
+
min-height: 20rem;
|
| 408 |
+
}
|
| 409 |
+
#json-editor:focus {
|
| 410 |
+
outline: none;
|
| 411 |
+
border-color: #00FF00;
|
| 412 |
+
box-shadow: 0 0 5px rgba(0, 255, 0, 0.7);
|
| 413 |
+
}
|
| 414 |
+
#url-input {
|
| 415 |
+
background-color: rgba(0, 0, 0, 0.2);
|
| 416 |
+
color: #ffffff;
|
| 417 |
+
border: 1px solid #4a5568;
|
| 418 |
+
border-radius: 0.5rem;
|
| 419 |
+
padding: 1rem;
|
| 420 |
+
font-size: 1rem;
|
| 421 |
+
min-height: 8rem;
|
| 422 |
+
}
|
| 423 |
+
#url-input:focus {
|
| 424 |
+
outline: none;
|
| 425 |
+
border-color: #00FF00;
|
| 426 |
+
box-shadow: 0 0 5px rgba(0, 255, 0, 0.7);
|
| 427 |
+
}
|
| 428 |
+
#output-text{
|
| 429 |
+
background-color: rgba(0, 0, 0, 0.2);
|
| 430 |
+
color: #ffffff;
|
| 431 |
+
border: 1px solid #4a5568;
|
| 432 |
+
border-radius: 0.5rem;
|
| 433 |
+
padding: 1rem;
|
| 434 |
+
font-size: 1rem;
|
| 435 |
+
}
|
| 436 |
"""
|
|
|
|
| 437 |
with gr.Blocks(css=css, title="Advanced Text & URL Processor") as interface:
|
| 438 |
gr.Markdown("# 🌐 Advanced URL & Text Processing Toolkit")
|
| 439 |
|
| 440 |
+
with gr.Tab("URL Processing") as url_tab:
|
| 441 |
url_input = gr.Textbox(
|
| 442 |
label="Enter URLs (comma or newline separated)",
|
| 443 |
lines=5,
|
| 444 |
+
placeholder="https://example1.com\nhttps://example2.com",
|
| 445 |
+
interactive=True,
|
| 446 |
+
elem_id="url-input"
|
| 447 |
)
|
| 448 |
|
| 449 |
+
with gr.Tab("File Input") as file_tab:
|
| 450 |
file_input = gr.File(
|
| 451 |
label="Upload text file or ZIP archive",
|
| 452 |
file_types=[".txt", ".zip", ".md", ".csv", ".json", ".xml"]
|
| 453 |
)
|
| 454 |
|
| 455 |
+
with gr.Tab("Text Input") as text_tab:
|
| 456 |
text_input = gr.Textbox(
|
| 457 |
label="Raw Text Input",
|
| 458 |
lines=5,
|
| 459 |
+
placeholder="Paste your text here...",
|
| 460 |
+
interactive=True
|
| 461 |
)
|
| 462 |
|
| 463 |
+
with gr.Tab("JSON Editor") as json_tab:
|
| 464 |
json_editor = gr.Textbox(
|
| 465 |
label="JSON Editor",
|
| 466 |
lines=20,
|
| 467 |
placeholder="View and edit your JSON data here...",
|
| 468 |
interactive=True,
|
| 469 |
+
elem_id="json-editor"
|
| 470 |
)
|
| 471 |
|
| 472 |
+
with gr.Tab("Scratchpad") as scratchpad_tab:
|
| 473 |
scratchpad = gr.Textbox(
|
| 474 |
label="Scratchpad",
|
| 475 |
lines=10,
|
|
|
|
| 480 |
process_btn = gr.Button("Process Input", variant="primary")
|
| 481 |
qr_btn = gr.Button("Generate QR Code", variant="secondary")
|
| 482 |
|
| 483 |
+
output_text = gr.Textbox(label="Processing Results", interactive=False, elem_id="output-text")
|
| 484 |
output_file = gr.File(label="Processed Output")
|
| 485 |
qr_output = gr.Image(label="QR Code", type="filepath") # To display the generated QR code
|
| 486 |
|
|
|
|
| 490 |
processor = URLProcessor()
|
| 491 |
file_processor = FileProcessor()
|
| 492 |
results = []
|
|
|
|
| 493 |
# Process URLs
|
| 494 |
if urls:
|
| 495 |
url_list = re.split(r'[,\n]', urls)
|
| 496 |
url_list = [url.strip() for url in url_list if url.strip()]
|
|
|
|
| 497 |
for url in url_list:
|
| 498 |
validation = processor.validate_url(url)
|
| 499 |
if validation.get('is_valid'):
|
|
|
|
| 505 |
'content': content,
|
| 506 |
'timestamp': datetime.now().isoformat()
|
| 507 |
})
|
|
|
|
| 508 |
# Process files
|
| 509 |
if file:
|
| 510 |
results.extend(file_processor.process_file(file))
|
|
|
|
| 511 |
# Process text input
|
| 512 |
if text:
|
| 513 |
cleaned_text = processor.advanced_text_cleaning(text)
|
|
|
|
| 516 |
'content': cleaned_text,
|
| 517 |
'timestamp': datetime.now().isoformat()
|
| 518 |
})
|
|
|
|
| 519 |
# Generate output
|
| 520 |
if results:
|
| 521 |
output_dir = Path('output') / datetime.now().strftime('%Y-%m-%d')
|
| 522 |
output_dir.mkdir(parents=True, exist_ok=True)
|
| 523 |
output_path = output_dir / f'processed_{int(time.time())}.json'
|
|
|
|
| 524 |
with open(output_path, 'w', encoding='utf-8') as f:
|
| 525 |
json.dump(results, f, ensure_ascii=False, indent=2)
|
|
|
|
| 526 |
summary = f"Processed {len(results)} items successfully!"
|
| 527 |
json_data = json.dumps(results, indent=2) # Prepare JSON for QR code
|
| 528 |
return str(output_path), summary, json_data # Return JSON for editor
|
| 529 |
else:
|
| 530 |
return None, "No valid content to process.", ""
|
|
|
|
| 531 |
except Exception as e:
|
| 532 |
logger.error(f"Processing error: {e}")
|
| 533 |
return None, f"Error: {str(e)}", ""
|
| 534 |
|
| 535 |
+
def generate_qr_code(json_data):
|
| 536 |
+
"""Generate QR code from JSON data."""
|
| 537 |
+
if not json_data:
|
| 538 |
+
return "No data to encode."
|
| 539 |
+
qr_file = generate_qr(json_data)
|
| 540 |
+
return qr_file
|
| 541 |
+
|
| 542 |
+
process_btn.click(
|
| 543 |
+
process_all_inputs,
|
| 544 |
+
inputs=[url_input, file_input, text_input, scratchpad],
|
| 545 |
+
outputs=[output_file, output_text, json_editor]
|
| 546 |
+
)
|
| 547 |
+
qr_btn.click(generate_qr_code, inputs=[json_editor], outputs=[qr_output])
|
| 548 |
+
|
| 549 |
+
gr.Markdown("""
|
| 550 |
+
### Usage Guidelines
|
| 551 |
+
- **URL Processing**: Enter valid HTTP/HTTPS URLs, separated by commas or newlines.
|
| 552 |
+
- **File Input**: Upload text files orZIP archives containing text files.
|
| 553 |
+
- **Text Input**: Paste text directly for processing.
|
| 554 |
+
- **JSON Editor**: View the processed data in JSON format. This is automatically updated after processing.
|
| 555 |
+
- **Scratchpad**: Use this area for temporary notes or text snippets.
|
| 556 |
+
- Click "Process Input" to analyze the data. The results will be available for download and in the JSON Editor.
|
| 557 |
+
- Click "Generate QR Code" to create a QR code from the JSON data.
|
| 558 |
+
""")
|
| 559 |
+
return interface
|
| 560 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 561 |
|
| 562 |
def main():
|
| 563 |
# Configure system settings
|
| 564 |
mimetypes.init()
|
|
|
|
| 565 |
# Create and launch interface
|
| 566 |
interface = create_interface()
|
|
|
|
| 567 |
# Launch with proper configuration
|
| 568 |
interface.launch(
|
| 569 |
server_name="0.0.0.0",
|
|
|
|
| 574 |
debug=True
|
| 575 |
)
|
| 576 |
|
| 577 |
+
|
| 578 |
if __name__ == "__main__":
|
| 579 |
main()
|