Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -308,8 +308,11 @@ class EnhancedFileProcessor:
|
|
308 |
if file_info.file_size > 0 and not file_info.filename.endswith('/'):
|
309 |
extracted_path = extract_to / file_info.filename
|
310 |
if extracted_path.suffix.lower() in self.supported_extensions:
|
311 |
-
|
312 |
-
|
|
|
|
|
|
|
313 |
# Handle TAR archives
|
314 |
elif archive_path.lower().endswith(('.tar', '.tar.gz', '.tgz')):
|
315 |
try:
|
@@ -317,10 +320,13 @@ class EnhancedFileProcessor:
|
|
317 |
for member in tar_ref.getmembers():
|
318 |
if member.isfile():
|
319 |
extracted_path = extract_to / member.name
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
|
|
|
|
|
|
324 |
except tarfile.TarError as e:
|
325 |
logger.error(f"Error processing TAR archive: {e}")
|
326 |
# Handle GZIP archives (single file)
|
@@ -606,7 +612,7 @@ def create_qr_sequence_visualizer(output_gallery):
|
|
606 |
qr_visualization.update(value=composite_image)
|
607 |
|
608 |
# Event handlers
|
609 |
-
visualize_btn.click(process_qr_codes, inputs=qr_input, outputs=[visualization_status,
|
610 |
reset_btn.click(lambda: (None, None, None, "β οΈ Visualization reset."), outputs=[visualization_status, qr_visualization, qr_preview])
|
611 |
|
612 |
# Integrate the visualizer into the main application
|
@@ -779,120 +785,120 @@ def create_modern_interface():
|
|
779 |
return ""
|
780 |
|
781 |
def process_inputs(urls, files, text, combine):
|
782 |
-
|
783 |
-
|
784 |
-
|
785 |
-
|
786 |
-
|
787 |
|
788 |
-
# Process JSON input
|
789 |
-
if text and text.strip():
|
790 |
-
try:
|
791 |
-
json_data = json.loads(text)
|
792 |
-
if isinstance(json_data, list):
|
793 |
-
results.extend(json_data)
|
794 |
-
else:
|
795 |
-
results.append(json_data)
|
796 |
-
except json.JSONDecodeError as e:
|
797 |
-
return None, [], f"β Invalid JSON format: {str(e)}"
|
798 |
-
|
799 |
-
# Process URLs
|
800 |
-
if urls and urls.strip():
|
801 |
-
url_list = re.split(r'[,\n]', urls)
|
802 |
-
url_list = [url.strip() for url in url_list if url.strip()]
|
803 |
-
for url in url_list:
|
804 |
-
validation = url_processor.validate_url(url)
|
805 |
-
if validation['is_valid']:
|
806 |
-
content = url_processor.fetch_content(url)
|
807 |
-
if content:
|
808 |
-
results.append({
|
809 |
-
'source': 'url',
|
810 |
-
'url': url,
|
811 |
-
'content': content,
|
812 |
-
'timestamp': datetime.now().isoformat()
|
813 |
-
})
|
814 |
-
|
815 |
-
# Process files
|
816 |
-
if files:
|
817 |
-
for file in files:
|
818 |
-
file_results = file_processor.process_file(file)
|
819 |
-
if file_results:
|
820 |
-
results.extend(file_results)
|
821 |
-
|
822 |
-
# Generate QR codes
|
823 |
-
if results:
|
824 |
-
qr_paths = generate_qr_codes(results, combine)
|
825 |
-
if qr_paths:
|
826 |
-
return (
|
827 |
-
results,
|
828 |
-
[str(path) for path in qr_paths],
|
829 |
-
f"β
Successfully processed {len(results)} items and generated {len(qr_paths)} QR codes!"
|
830 |
-
)
|
831 |
-
else:
|
832 |
-
return None, [], "β Failed to generate QR codes"
|
833 |
-
else:
|
834 |
-
return None, [], "β οΈ No valid content to process"
|
835 |
-
except Exception as e:
|
836 |
-
logger.error(f"Processing error: {e}")
|
837 |
-
return None, [], f"β Error: {str(e)}"
|
838 |
-
|
839 |
-
# Set up event handlers
|
840 |
-
example_btn.click(load_example, outputs=[text_input])
|
841 |
-
clear_btn.click(clear_input, outputs=[text_input])
|
842 |
-
process_btn.click(
|
843 |
-
process_inputs,
|
844 |
-
inputs=[url_input, file_input, text_input, combine_data],
|
845 |
-
outputs=[output_json, output_gallery, output_text]
|
846 |
-
)
|
847 |
|
848 |
-
# Add the visualization button and its click event within the interface scope
|
849 |
-
visualize_btn = gr.Button("π Visualize QR Codes")
|
850 |
-
visualize_btn.click(visualize_qr_codes, inputs=output_gallery, outputs=None)
|
851 |
|
852 |
-
|
853 |
-
|
854 |
-
|
855 |
-
|
856 |
-
|
857 |
-
|
858 |
-
|
859 |
-
|
860 |
-
|
861 |
-
|
862 |
-
|
863 |
-
|
864 |
-
|
865 |
-
|
866 |
-
|
867 |
-
|
868 |
-
|
869 |
-
|
870 |
-
|
871 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
872 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
873 |
def main():
|
874 |
-
|
875 |
-
|
876 |
-
|
877 |
-
|
878 |
-
|
879 |
-
|
880 |
-
|
881 |
-
|
882 |
-
|
883 |
-
|
884 |
-
|
885 |
-
|
886 |
-
|
887 |
-
|
888 |
-
|
889 |
-
|
890 |
-
|
891 |
-
|
892 |
-
|
893 |
-
|
894 |
-
|
895 |
-
|
896 |
-
|
897 |
-
|
898 |
-
main()
|
|
|
308 |
if file_info.file_size > 0 and not file_info.filename.endswith('/'):
|
309 |
extracted_path = extract_to / file_info.filename
|
310 |
if extracted_path.suffix.lower() in self.supported_extensions:
|
311 |
+
try:
|
312 |
+
with open(extracted_path, 'rb') as f:
|
313 |
+
dataset.extend(self._process_single_file(f))
|
314 |
+
except Exception as e:
|
315 |
+
logger.error(f"Error processing extracted file {extracted_path}: {e}")
|
316 |
# Handle TAR archives
|
317 |
elif archive_path.lower().endswith(('.tar', '.tar.gz', '.tgz')):
|
318 |
try:
|
|
|
320 |
for member in tar_ref.getmembers():
|
321 |
if member.isfile():
|
322 |
extracted_path = extract_to / member.name
|
323 |
+
try:
|
324 |
+
tar_ref.extract(member, path=extract_to)
|
325 |
+
if extracted_path.suffix.lower() in self.supported_extensions:
|
326 |
+
with open(extracted_path, 'rb') as f:
|
327 |
+
dataset.extend(self._process_single_file(f))
|
328 |
+
except Exception as e:
|
329 |
+
logger.error(f"Error extracting or processing TAR member {member.name}: {e}")
|
330 |
except tarfile.TarError as e:
|
331 |
logger.error(f"Error processing TAR archive: {e}")
|
332 |
# Handle GZIP archives (single file)
|
|
|
612 |
qr_visualization.update(value=composite_image)
|
613 |
|
614 |
# Event handlers
|
615 |
+
visualize_btn.click(process_qr_codes, inputs=qr_input, outputs=[visualization_status, qr_paths, qr_preview])
|
616 |
reset_btn.click(lambda: (None, None, None, "β οΈ Visualization reset."), outputs=[visualization_status, qr_visualization, qr_preview])
|
617 |
|
618 |
# Integrate the visualizer into the main application
|
|
|
785 |
return ""
|
786 |
|
787 |
def process_inputs(urls, files, text, combine):
|
788 |
+
"""Process all inputs and generate QR codes"""
|
789 |
+
try:
|
790 |
+
results = []
|
791 |
+
url_processor = EnhancedURLProcessor()
|
792 |
+
file_processor = EnhancedFileProcessor()
|
793 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
794 |
|
|
|
|
|
|
|
795 |
|
796 |
+
# Process JSON input
|
797 |
+
if text and text.strip():
|
798 |
+
try:
|
799 |
+
json_data = json.loads(text)
|
800 |
+
if isinstance(json_data, list):
|
801 |
+
results.extend(json_data)
|
802 |
+
else:
|
803 |
+
results.append(json_data)
|
804 |
+
except json.JSONDecodeError as e:
|
805 |
+
return None, [], f"β Invalid JSON format: {str(e)}"
|
806 |
+
|
807 |
+
# Process URLs
|
808 |
+
if urls and urls.strip():
|
809 |
+
url_list = re.split(r'[,\n]', urls)
|
810 |
+
url_list = [url.strip() for url in url_list if url.strip()]
|
811 |
+
for url in url_list:
|
812 |
+
validation = url_processor.validate_url(url)
|
813 |
+
if validation['is_valid']:
|
814 |
+
content = url_processor.fetch_content(url)
|
815 |
+
if content:
|
816 |
+
results.append({
|
817 |
+
'source': 'url',
|
818 |
+
'url': url,
|
819 |
+
'content': content,
|
820 |
+
'timestamp': datetime.now().isoformat()
|
821 |
+
})
|
822 |
|
823 |
+
# Process files
|
824 |
+
if files:
|
825 |
+
for file in files:
|
826 |
+
file_results = file_processor.process_file(file)
|
827 |
+
if file_results:
|
828 |
+
results.extend(file_results)
|
829 |
+
|
830 |
+
# Generate QR codes
|
831 |
+
if results:
|
832 |
+
qr_paths = generate_qr_codes(results, combine)
|
833 |
+
if qr_paths:
|
834 |
+
return (
|
835 |
+
results,
|
836 |
+
[str(path) for path in qr_paths],
|
837 |
+
f"β
Successfully processed {len(results)} items and generated {len(qr_paths)} QR codes!"
|
838 |
+
)
|
839 |
+
else:
|
840 |
+
return None, [], "β Failed to generate QR codes"
|
841 |
+
else:
|
842 |
+
return None, [], "β οΈ No valid content to process"
|
843 |
+
except Exception as e:
|
844 |
+
logger.error(f"Processing error: {e}")
|
845 |
+
return None, [], f"β Error: {str(e)}"
|
846 |
+
|
847 |
+
# Set up event handlers
|
848 |
+
example_btn.click(load_example, outputs=[text_input])
|
849 |
+
clear_btn.click(clear_input, outputs=[text_input])
|
850 |
+
process_btn.click(
|
851 |
+
process_inputs,
|
852 |
+
inputs=[url_input, file_input, text_input, combine_data],
|
853 |
+
outputs=[output_json, output_gallery, output_text]
|
854 |
+
)
|
855 |
+
|
856 |
+
# Add the visualization button and its click event within the interface scope
|
857 |
+
#visualize_btn = gr.Button("π Visualize QR Codes")
|
858 |
+
#visualize_btn.click(visualize_qr_codes, inputs=output_gallery, outputs=None)
|
859 |
+
|
860 |
+
# Add helpful documentation
|
861 |
+
gr.Markdown("""
|
862 |
+
### π Features
|
863 |
+
- **Complete URL Scraping**: Extracts every character from web pages
|
864 |
+
- **Advanced File Processing**: Full content extraction from various text-based files and common archives. Supports flexible JSON handling.
|
865 |
+
- **Smart JSON Handling**: Processes any size JSON with automatic chunking, either via direct input or file upload.
|
866 |
+
- **Sequential QR Codes**: Maintains data integrity across multiple codes
|
867 |
+
- **Modern Design**: Clean, responsive interface with visual feedback
|
868 |
+
### π‘ Tips
|
869 |
+
1. **URLs**: Enter multiple URLs separated by commas or newlines
|
870 |
+
2. **Files**: Upload any type of file. The processor will attempt to handle supported text-based files, archives (.zip, .tar, .gz), and JSON files.
|
871 |
+
3. **JSON**: Use the example button to see the expected format or upload a .json file. The system will also try to detect JSON content in other file types.
|
872 |
+
4. **QR Codes**: Choose whether to combine data into sequential codes
|
873 |
+
5. **Processing**: Monitor the status for real-time feedback
|
874 |
+
### π¨ Output
|
875 |
+
- Generated QR codes are saved in the `output/qr_codes` directory
|
876 |
+
- Each QR code contains metadata for proper sequencing
|
877 |
+
- Hover over QR codes in the gallery to see details
|
878 |
+
""")
|
879 |
+
return interface
|
880 |
def main():
|
881 |
+
"""Initialize and launch the application"""
|
882 |
+
try:
|
883 |
+
# Configure system settings
|
884 |
+
mimetypes.init()
|
885 |
+
|
886 |
+
# Create and launch interface
|
887 |
+
interface = create_modern_interface()
|
888 |
+
|
889 |
+
# Add the QR sequence visualizer tab
|
890 |
+
#with interface:
|
891 |
+
create_qr_sequence_visualizer(None) # output_gallery might not be relevant here
|
892 |
+
|
893 |
+
# Launch with configuration
|
894 |
+
interface.launch(
|
895 |
+
share=False,
|
896 |
+
debug=False,
|
897 |
+
show_error=True,
|
898 |
+
show_api=False
|
899 |
+
)
|
900 |
+
except Exception as e:
|
901 |
+
logger.error(f"Application startup error: {e}")
|
902 |
+
raise
|
903 |
+
if name == "main":
|
904 |
+
main()
|
|