Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -287,6 +287,144 @@ def generate_qr(json_data):
|
|
287 |
def create_interface():
|
288 |
"""Create a comprehensive Gradio interface with advanced features"""
|
289 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
290 |
css = """
|
291 |
.container { max-width: 1200px; margin: auto; }
|
292 |
.warning { background-color: #fff3cd; color: #856404; }
|
@@ -346,10 +484,12 @@ def create_interface():
|
|
346 |
processor = URLProcessor()
|
347 |
file_processor = FileProcessor()
|
348 |
results = []
|
|
|
349 |
# Process URLs
|
350 |
if urls:
|
351 |
url_list = re.split(r'[,\n]', urls)
|
352 |
url_list = [url.strip() for url in url_list if url.strip()]
|
|
|
353 |
for url in url_list:
|
354 |
validation = processor.validate_url(url)
|
355 |
if validation.get('is_valid'):
|
@@ -361,9 +501,11 @@ def create_interface():
|
|
361 |
'content': content,
|
362 |
'timestamp': datetime.now().isoformat()
|
363 |
})
|
|
|
364 |
# Process files
|
365 |
if file:
|
366 |
results.extend(file_processor.process_file(file))
|
|
|
367 |
# Process text input
|
368 |
if text:
|
369 |
cleaned_text = processor.advanced_text_cleaning(text)
|
@@ -372,18 +514,22 @@ def create_interface():
|
|
372 |
'content': cleaned_text,
|
373 |
'timestamp': datetime.now().isoformat()
|
374 |
})
|
|
|
375 |
# Generate output
|
376 |
if results:
|
377 |
output_dir = Path('output') / datetime.now().strftime('%Y-%m-%d')
|
378 |
output_dir.mkdir(parents=True, exist_ok=True)
|
379 |
output_path = output_dir / f'processed_{int(time.time())}.json'
|
|
|
380 |
with open(output_path, 'w', encoding='utf-8') as f:
|
381 |
json.dump(results, f, ensure_ascii=False, indent=2)
|
|
|
382 |
summary = f"Processed {len(results)} items successfully!"
|
383 |
json_data = json.dumps(results, indent=2) # Prepare JSON for QR code
|
384 |
return str(output_path), summary, json_data # Return JSON for editor
|
385 |
else:
|
386 |
return None, "No valid content to process.", ""
|
|
|
387 |
except Exception as e:
|
388 |
logger.error(f"Processing error: {e}")
|
389 |
return None, f"Error: {str(e)}", ""
|
@@ -410,7 +556,7 @@ def create_interface():
|
|
410 |
### Usage Guidelines
|
411 |
- **URL Processing**: Enter valid HTTP/HTTPS URLs
|
412 |
- **File Input**: Upload text files or ZIP archives
|
413 |
-
- **Text Input**: Direct text processing
|
414 |
- **JSON Editor**: View and edit your JSON data
|
415 |
- **Scratchpad**: Quick notes or text collections
|
416 |
- Advanced cleaning and validation included
|
|
|
287 |
def create_interface():
|
288 |
"""Create a comprehensive Gradio interface with advanced features"""
|
289 |
|
290 |
+
css = """
|
291 |
+
.container { max-width: 1200px; margin: auto; }
|
292 |
+
.warning { background-color: #fff3cd; color: #856404; }
|
293 |
+
.error { background-color: #f8d7da; color: #721c24; }
|
294 |
+
"""
|
295 |
+
|
296 |
+
with gr.Blocks(css=css, title="Advanced Text & URL Processor") as interface:
|
297 |
+
gr.Markdown("# 🌐 Advanced URL & Text Processing Toolkit")
|
298 |
+
|
299 |
+
with gr.Tab("URL Processing"):
|
300 |
+
url_input = gr.Textbox(
|
301 |
+
label="Enter URLs (comma or newline separated)",
|
302 |
+
lines=5,
|
303 |
+
placeholder="https://example1.com\nhttps://example2.com"
|
304 |
+
)
|
305 |
+
|
306 |
+
with gr.Tab("File Input"):
|
307 |
+
file_input = gr.File(
|
308 |
+
label="Upload text file or ZIP archive",
|
309 |
+
file_types=[".txt", ".zip", ".md", ".csv", ".json", ".xml"]
|
310 |
+
)
|
311 |
+
|
312 |
+
with gr.Tab("Text Input"):
|
313 |
+
text_input = gr.Textbox(
|
314 |
+
label="Raw Text Input",
|
315 |
+
lines=5,
|
316 |
+
placeholder="Paste your text here..."
|
317 |
+
)
|
318 |
+
|
319 |
+
with gr.Tab("JSON Editor"):
|
320 |
+
json_editor = gr.Textbox(
|
321 |
+
label="JSON Editor",
|
322 |
+
lines=20,
|
323 |
+
placeholder="View and edit your JSON data here...",
|
324 |
+
interactive=True,
|
325 |
+
elem_id="json-editor" # Optional: for custom styling
|
326 |
+
)
|
327 |
+
|
328 |
+
with gr.Tab("Scratchpad"):
|
329 |
+
scratchpad = gr.Textbox(
|
330 |
+
label="Scratchpad",
|
331 |
+
lines=10,
|
332 |
+
placeholder="Quick notes or text collections...",
|
333 |
+
interactive=True
|
334 |
+
)
|
335 |
+
|
336 |
+
process_btn = gr.Button("Process Input", variant="primary")
|
337 |
+
qr_btn = gr.Button("Generate QR Code", variant="secondary")
|
338 |
+
|
339 |
+
output_text = gr.Textbox(label="Processing Results", interactive=False)
|
340 |
+
output_file = gr.File(label="Processed Output")
|
341 |
+
qr_output = gr.Image(label="QR Code", type="filepath") # To display the generated QR code
|
342 |
+
|
343 |
+
def process_all_inputs(urls, file, text, notes):
|
344 |
+
"""Process all input types with progress tracking"""
|
345 |
+
try:
|
346 |
+
processor = URLProcessor()
|
347 |
+
file_processor = FileProcessor()
|
348 |
+
results = []
|
349 |
+
|
350 |
+
# Process URLsif urls:
|
351 |
+
url_list = re.split(r'[,\n]', urls)
|
352 |
+
url_list = [url.strip() for url in url_list if url.strip()]
|
353 |
+
for url in url_list:
|
354 |
+
validation = processor.validate_url(url)
|
355 |
+
if validation.get('is_valid'):
|
356 |
+
content = processor.fetch_content(url)
|
357 |
+
if content:
|
358 |
+
results.append({
|
359 |
+
'source': 'url',
|
360 |
+
'url': url,
|
361 |
+
'content': content,
|
362 |
+
'timestamp': datetime.now().isoformat()
|
363 |
+
})
|
364 |
+
|
365 |
+
# Process files
|
366 |
+
if file:
|
367 |
+
results.extend(file_processor.process_file(file))
|
368 |
+
|
369 |
+
# Process text input
|
370 |
+
if text:
|
371 |
+
cleaned_text = processor.advanced_text_cleaning(text)
|
372 |
+
results.append({
|
373 |
+
'source': 'direct_input',
|
374 |
+
'content': cleaned_text,
|
375 |
+
'timestamp': datetime.now().isoformat()
|
376 |
+
})
|
377 |
+
|
378 |
+
# Generate output
|
379 |
+
if results:
|
380 |
+
output_dir = Path('output') / datetime.now().strftime('%Y-%m-%d')
|
381 |
+
output_dir.mkdir(parents=True, exist_ok=True)
|
382 |
+
output_path = output_dir / f'processed_{int(time.time())}.json'
|
383 |
+
|
384 |
+
with open(output_path, 'w', encoding='utf-8') as f:
|
385 |
+
json.dump(results, f, ensure_ascii=False, indent=2)
|
386 |
+
|
387 |
+
summary = f"Processed {len(results)} items successfully!"
|
388 |
+
json_data = json.dumps(results, indent=2) # Prepare JSON for QR code
|
389 |
+
return str(output_path), summary, json_data # Return JSON for editor
|
390 |
+
else:
|
391 |
+
return None, "No valid content to process.", ""
|
392 |
+
|
393 |
+
except Exception as e:
|
394 |
+
logger.error(f"Processing error: {e}")
|
395 |
+
return None, f"Error: {str(e)}", ""
|
396 |
+
|
397 |
+
def generate_qr_code(json_data):
|
398 |
+
"""Generate QR code from JSON data and return the file path."""
|
399 |
+
if json_data:
|
400 |
+
return generate_qr(json_data)
|
401 |
+
return None
|
402 |
+
|
403 |
+
process_btn.click(
|
404 |
+
process_all_inputs,
|
405 |
+
inputs=[url_input, file_input, text_input, scratchpad],
|
406 |
+
outputs=[output_file, output_text, json_editor] # Update outputs to include JSON editor
|
407 |
+
)
|
408 |
+
|
409 |
+
qr_btn.click(
|
410 |
+
generate_qr_code,
|
411 |
+
inputs=json_editor,
|
412 |
+
outputs=qr_output
|
413 |
+
)
|
414 |
+
|
415 |
+
gr.Markdown("""
|
416 |
+
### Usage Guidelines
|
417 |
+
- **URL Processing**: Enter valid HTTP/HTTPS URLs
|
418 |
+
- **File Input**: Upload text files or ZIP archives
|
419 |
+
- **Text Input**: Direct text processing
|
420 |
+
- **JSON Editor**: View and edit your JSON data
|
421 |
+
- **Scratchpad**: Quick notes or text collections
|
422 |
+
- Advanced cleaning and validation included
|
423 |
+
""")
|
424 |
+
return interface
|
425 |
+
|
426 |
+
def create_interface():
|
427 |
+
"""Create a comprehensive Gradio interface with advanced features"""
|
428 |
css = """
|
429 |
.container { max-width: 1200px; margin: auto; }
|
430 |
.warning { background-color: #fff3cd; color: #856404; }
|
|
|
484 |
processor = URLProcessor()
|
485 |
file_processor = FileProcessor()
|
486 |
results = []
|
487 |
+
|
488 |
# Process URLs
|
489 |
if urls:
|
490 |
url_list = re.split(r'[,\n]', urls)
|
491 |
url_list = [url.strip() for url in url_list if url.strip()]
|
492 |
+
|
493 |
for url in url_list:
|
494 |
validation = processor.validate_url(url)
|
495 |
if validation.get('is_valid'):
|
|
|
501 |
'content': content,
|
502 |
'timestamp': datetime.now().isoformat()
|
503 |
})
|
504 |
+
|
505 |
# Process files
|
506 |
if file:
|
507 |
results.extend(file_processor.process_file(file))
|
508 |
+
|
509 |
# Process text input
|
510 |
if text:
|
511 |
cleaned_text = processor.advanced_text_cleaning(text)
|
|
|
514 |
'content': cleaned_text,
|
515 |
'timestamp': datetime.now().isoformat()
|
516 |
})
|
517 |
+
|
518 |
# Generate output
|
519 |
if results:
|
520 |
output_dir = Path('output') / datetime.now().strftime('%Y-%m-%d')
|
521 |
output_dir.mkdir(parents=True, exist_ok=True)
|
522 |
output_path = output_dir / f'processed_{int(time.time())}.json'
|
523 |
+
|
524 |
with open(output_path, 'w', encoding='utf-8') as f:
|
525 |
json.dump(results, f, ensure_ascii=False, indent=2)
|
526 |
+
|
527 |
summary = f"Processed {len(results)} items successfully!"
|
528 |
json_data = json.dumps(results, indent=2) # Prepare JSON for QR code
|
529 |
return str(output_path), summary, json_data # Return JSON for editor
|
530 |
else:
|
531 |
return None, "No valid content to process.", ""
|
532 |
+
|
533 |
except Exception as e:
|
534 |
logger.error(f"Processing error: {e}")
|
535 |
return None, f"Error: {str(e)}", ""
|
|
|
556 |
### Usage Guidelines
|
557 |
- **URL Processing**: Enter valid HTTP/HTTPS URLs
|
558 |
- **File Input**: Upload text files or ZIP archives
|
559 |
+
- ** Text Input**: Direct text processing
|
560 |
- **JSON Editor**: View and edit your JSON data
|
561 |
- **Scratchpad**: Quick notes or text collections
|
562 |
- Advanced cleaning and validation included
|