Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -61,8 +61,16 @@ class URLProcessor:
|
|
| 61 |
return {'is_valid': False, 'message': f'URL validation failed: {str(e)}'}
|
| 62 |
|
| 63 |
def fetch_content(self, url: str) -> Optional[Dict]:
|
| 64 |
-
"""
|
| 65 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
logger.info(f"Fetching content from URL: {url}")
|
| 67 |
response = self.session.get(url, timeout=self.timeout)
|
| 68 |
response.raise_for_status()
|
|
@@ -101,30 +109,61 @@ class URLProcessor:
|
|
| 101 |
url_list = [url.strip() for url in url_list if url.strip()]
|
| 102 |
|
| 103 |
for url in url_list:
|
| 104 |
-
logger.info(f"Processing URL: {url}")
|
| 105 |
validation = processor.validate_url(url)
|
| 106 |
-
|
| 107 |
if validation.get('is_valid'):
|
| 108 |
-
logger.info(f"URL {url} is valid, fetching content...")
|
| 109 |
content = processor.fetch_content(url)
|
| 110 |
-
|
| 111 |
if content:
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
else:
|
| 117 |
-
logger.
|
| 118 |
|
| 119 |
-
#
|
| 120 |
-
if
|
| 121 |
-
|
| 122 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
|
| 124 |
-
return results, [], None
|
| 125 |
except Exception as e:
|
| 126 |
-
logger.error(f"
|
| 127 |
-
|
|
|
|
|
|
|
|
|
|
| 128 |
|
| 129 |
def _handle_google_drive(self, url: str) -> Optional[Dict]:
|
| 130 |
"""Process Google Drive file links"""
|
|
|
|
| 61 |
return {'is_valid': False, 'message': f'URL validation failed: {str(e)}'}
|
| 62 |
|
| 63 |
def fetch_content(self, url: str) -> Optional[Dict]:
|
| 64 |
+
"""Universal content fetcher with special case handling"""
|
| 65 |
try:
|
| 66 |
+
# Google Drive document handling
|
| 67 |
+
if 'drive.google.com' in url:
|
| 68 |
+
return self._handle_google_drive(url)
|
| 69 |
+
|
| 70 |
+
# Google Calendar ICS handling
|
| 71 |
+
if 'calendar.google.com' in url and ' ics' in url:
|
| 72 |
+
return self._handle_google_calendar(url)
|
| 73 |
+
|
| 74 |
logger.info(f"Fetching content from URL: {url}")
|
| 75 |
response = self.session.get(url, timeout=self.timeout)
|
| 76 |
response.raise_for_status()
|
|
|
|
| 109 |
url_list = [url.strip() for url in url_list if url.strip()]
|
| 110 |
|
| 111 |
for url in url_list:
|
|
|
|
| 112 |
validation = processor.validate_url(url)
|
|
|
|
| 113 |
if validation.get('is_valid'):
|
|
|
|
| 114 |
content = processor.fetch_content(url)
|
|
|
|
| 115 |
if content:
|
| 116 |
+
# Convert HTML content to a proper JSON object
|
| 117 |
+
url_data = {
|
| 118 |
+
'source': 'url',
|
| 119 |
+
'url': url,
|
| 120 |
+
'content': content.get('content', ''),
|
| 121 |
+
'content_type': content.get('content_type', ''),
|
| 122 |
+
'timestamp': datetime.now().isoformat()
|
| 123 |
+
}
|
| 124 |
+
results.append(url_data)
|
| 125 |
else:
|
| 126 |
+
logger.warning(f"Invalid URL: {url} - {validation.get('message')}")
|
| 127 |
|
| 128 |
+
# Process files if provided
|
| 129 |
+
if file:
|
| 130 |
+
file_results = file_processor.process_file(file)
|
| 131 |
+
if file_results:
|
| 132 |
+
results.extend(file_results)
|
| 133 |
+
|
| 134 |
+
# Generate QR codes
|
| 135 |
+
if results:
|
| 136 |
+
if combine:
|
| 137 |
+
combined_data = {
|
| 138 |
+
'type': 'combined_data',
|
| 139 |
+
'items': results,
|
| 140 |
+
'timestamp': datetime.now().isoformat()
|
| 141 |
+
}
|
| 142 |
+
qr_paths = generate_qr_code(combined_data, combined=True)
|
| 143 |
+
else:
|
| 144 |
+
qr_paths = []
|
| 145 |
+
for item in results:
|
| 146 |
+
item_paths = generate_qr_code(item, combined=True)
|
| 147 |
+
if item_paths:
|
| 148 |
+
qr_paths.extend(item_paths)
|
| 149 |
+
|
| 150 |
+
if qr_paths:
|
| 151 |
+
return (
|
| 152 |
+
results,
|
| 153 |
+
[str(path) for path in qr_paths],
|
| 154 |
+
f"✅ Successfully processed {len(results)} items and generated {len(qr_paths)} QR code(s)!"
|
| 155 |
+
)
|
| 156 |
+
else:
|
| 157 |
+
return None, [], "❌ Failed to generate QR codes. Please check the input data."
|
| 158 |
+
else:
|
| 159 |
+
return None, [], "⚠️ No valid content to process. Please provide some input data."
|
| 160 |
|
|
|
|
| 161 |
except Exception as e:
|
| 162 |
+
logger.error(f"Processing error: {e}")
|
| 163 |
+
import traceback
|
| 164 |
+
logger.error(traceback.format_exc()) # Print the full stack trace
|
| 165 |
+
return None, [], f"❌ Error: {str(e)}"
|
| 166 |
+
|
| 167 |
|
| 168 |
def _handle_google_drive(self, url: str) -> Optional[Dict]:
|
| 169 |
"""Process Google Drive file links"""
|