Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -61,8 +61,16 @@ class URLProcessor:
|
|
61 |
return {'is_valid': False, 'message': f'URL validation failed: {str(e)}'}
|
62 |
|
63 |
def fetch_content(self, url: str) -> Optional[Dict]:
|
64 |
-
"""
|
65 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
logger.info(f"Fetching content from URL: {url}")
|
67 |
response = self.session.get(url, timeout=self.timeout)
|
68 |
response.raise_for_status()
|
@@ -101,30 +109,61 @@ class URLProcessor:
|
|
101 |
url_list = [url.strip() for url in url_list if url.strip()]
|
102 |
|
103 |
for url in url_list:
|
104 |
-
logger.info(f"Processing URL: {url}")
|
105 |
validation = processor.validate_url(url)
|
106 |
-
|
107 |
if validation.get('is_valid'):
|
108 |
-
logger.info(f"URL {url} is valid, fetching content...")
|
109 |
content = processor.fetch_content(url)
|
110 |
-
|
111 |
if content:
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
|
|
|
|
|
|
|
|
|
|
116 |
else:
|
117 |
-
logger.
|
118 |
|
119 |
-
#
|
120 |
-
if
|
121 |
-
|
122 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
123 |
|
124 |
-
return results, [], None
|
125 |
except Exception as e:
|
126 |
-
logger.error(f"
|
127 |
-
|
|
|
|
|
|
|
128 |
|
129 |
def _handle_google_drive(self, url: str) -> Optional[Dict]:
|
130 |
"""Process Google Drive file links"""
|
|
|
61 |
return {'is_valid': False, 'message': f'URL validation failed: {str(e)}'}
|
62 |
|
63 |
def fetch_content(self, url: str) -> Optional[Dict]:
|
64 |
+
"""Universal content fetcher with special case handling"""
|
65 |
try:
|
66 |
+
# Google Drive document handling
|
67 |
+
if 'drive.google.com' in url:
|
68 |
+
return self._handle_google_drive(url)
|
69 |
+
|
70 |
+
# Google Calendar ICS handling
|
71 |
+
if 'calendar.google.com' in url and ' ics' in url:
|
72 |
+
return self._handle_google_calendar(url)
|
73 |
+
|
74 |
logger.info(f"Fetching content from URL: {url}")
|
75 |
response = self.session.get(url, timeout=self.timeout)
|
76 |
response.raise_for_status()
|
|
|
109 |
url_list = [url.strip() for url in url_list if url.strip()]
|
110 |
|
111 |
for url in url_list:
|
|
|
112 |
validation = processor.validate_url(url)
|
|
|
113 |
if validation.get('is_valid'):
|
|
|
114 |
content = processor.fetch_content(url)
|
|
|
115 |
if content:
|
116 |
+
# Convert HTML content to a proper JSON object
|
117 |
+
url_data = {
|
118 |
+
'source': 'url',
|
119 |
+
'url': url,
|
120 |
+
'content': content.get('content', ''),
|
121 |
+
'content_type': content.get('content_type', ''),
|
122 |
+
'timestamp': datetime.now().isoformat()
|
123 |
+
}
|
124 |
+
results.append(url_data)
|
125 |
else:
|
126 |
+
logger.warning(f"Invalid URL: {url} - {validation.get('message')}")
|
127 |
|
128 |
+
# Process files if provided
|
129 |
+
if file:
|
130 |
+
file_results = file_processor.process_file(file)
|
131 |
+
if file_results:
|
132 |
+
results.extend(file_results)
|
133 |
+
|
134 |
+
# Generate QR codes
|
135 |
+
if results:
|
136 |
+
if combine:
|
137 |
+
combined_data = {
|
138 |
+
'type': 'combined_data',
|
139 |
+
'items': results,
|
140 |
+
'timestamp': datetime.now().isoformat()
|
141 |
+
}
|
142 |
+
qr_paths = generate_qr_code(combined_data, combined=True)
|
143 |
+
else:
|
144 |
+
qr_paths = []
|
145 |
+
for item in results:
|
146 |
+
item_paths = generate_qr_code(item, combined=True)
|
147 |
+
if item_paths:
|
148 |
+
qr_paths.extend(item_paths)
|
149 |
+
|
150 |
+
if qr_paths:
|
151 |
+
return (
|
152 |
+
results,
|
153 |
+
[str(path) for path in qr_paths],
|
154 |
+
f"✅ Successfully processed {len(results)} items and generated {len(qr_paths)} QR code(s)!"
|
155 |
+
)
|
156 |
+
else:
|
157 |
+
return None, [], "❌ Failed to generate QR codes. Please check the input data."
|
158 |
+
else:
|
159 |
+
return None, [], "⚠️ No valid content to process. Please provide some input data."
|
160 |
|
|
|
161 |
except Exception as e:
|
162 |
+
logger.error(f"Processing error: {e}")
|
163 |
+
import traceback
|
164 |
+
logger.error(traceback.format_exc()) # Print the full stack trace
|
165 |
+
return None, [], f"❌ Error: {str(e)}"
|
166 |
+
|
167 |
|
168 |
def _handle_google_drive(self, url: str) -> Optional[Dict]:
|
169 |
"""Process Google Drive file links"""
|