Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -19,7 +19,6 @@ from fake_useragent import UserAgent
|
|
19 |
from cleantext import clean
|
20 |
import qrcode
|
21 |
import zipfile
|
22 |
-
import zipfile36 as zipfile
|
23 |
|
24 |
# Setup logging with detailed configuration
|
25 |
logging.basicConfig(
|
@@ -28,10 +27,10 @@ logging.basicConfig(
|
|
28 |
handlers=[
|
29 |
logging.StreamHandler(),
|
30 |
logging.FileHandler('app.log', encoding='utf-8')
|
31 |
-
]
|
32 |
-
)
|
33 |
logger = logging.getLogger(__name__)
|
34 |
|
|
|
35 |
class URLProcessor:
|
36 |
def __init__(self):
|
37 |
self.session = requests.Session()
|
@@ -44,7 +43,7 @@ class URLProcessor:
|
|
44 |
'Connection': 'keep-alive',
|
45 |
'Upgrade-Insecure-Requests': '1'
|
46 |
})
|
47 |
-
|
48 |
def advanced_text_cleaning(self, text: str) -> str:
|
49 |
"""Robust text cleaning with version compatibility"""
|
50 |
try:
|
@@ -74,7 +73,7 @@ class URLProcessor:
|
|
74 |
try:
|
75 |
if not validators.url(url):
|
76 |
return {'is_valid': False, 'message': 'Invalid URL format'}
|
77 |
-
|
78 |
response = self.session.head(url, timeout=self.timeout)
|
79 |
response.raise_for_status()
|
80 |
return {'is_valid': True, 'message': 'URL is valid and accessible'}
|
@@ -87,11 +86,11 @@ class URLProcessor:
|
|
87 |
# Google Drive document handling
|
88 |
if 'drive.google.com' in url:
|
89 |
return self._handle_google_drive(url)
|
90 |
-
|
91 |
# Google Calendar ICS handling
|
92 |
if 'calendar.google.com' in url and 'ical' in url:
|
93 |
return self._handle_google_calendar(url)
|
94 |
-
|
95 |
# Standard HTML processing
|
96 |
return self._fetch_html_content(url)
|
97 |
except Exception as e:
|
@@ -105,11 +104,11 @@ class URLProcessor:
|
|
105 |
if not file_id:
|
106 |
logger.error(f"Invalid Google Drive URL: {url}")
|
107 |
return None
|
108 |
-
|
109 |
direct_url = f"https://drive.google.com/uc?export=download&id={file_id.group(1)}"
|
110 |
response = self.session.get(direct_url, timeout=self.timeout)
|
111 |
response.raise_for_status()
|
112 |
-
|
113 |
return {
|
114 |
'content': response.text,
|
115 |
'content_type': response.headers.get('Content-Type', ''),
|
@@ -138,20 +137,20 @@ class URLProcessor:
|
|
138 |
try:
|
139 |
response = self.session.get(url, timeout=self.timeout)
|
140 |
response.raise_for_status()
|
141 |
-
|
142 |
soup = BeautifulSoup(response.text, 'html.parser')
|
143 |
-
|
144 |
# Remove unwanted elements
|
145 |
for element in soup(['script', 'style', 'nav', 'footer', 'header', 'meta', 'link']):
|
146 |
element.decompose()
|
147 |
-
|
148 |
# Extract main content
|
149 |
main_content = soup.find('main') or soup.find('article') or soup.body
|
150 |
-
|
151 |
# Clean and structure content
|
152 |
text_content = main_content.get_text(separator='\n', strip=True)
|
153 |
cleaned_content = self.advanced_text_cleaning(text_content)
|
154 |
-
|
155 |
return {
|
156 |
'content': cleaned_content,
|
157 |
'content_type': response.headers.get('Content-Type', ''),
|
@@ -160,14 +159,15 @@ class URLProcessor:
|
|
160 |
except Exception as e:
|
161 |
logger.error(f"HTML processing failed: {e}")
|
162 |
return None
|
163 |
-
|
|
|
164 |
class FileProcessor:
|
165 |
"""Class to handle file processing"""
|
166 |
-
|
167 |
def __init__(self, max_file_size: int = 2 * 1024 * 1024 * 1024): # 2GB default
|
168 |
self.max_file_size = max_file_size
|
169 |
self.supported_text_extensions = {'.txt', '.md', '.csv', '.json', '.xml'}
|
170 |
-
|
171 |
def is_text_file(self, filepath: str) -> bool:
|
172 |
"""Check if file is a text file"""
|
173 |
try:
|
@@ -181,24 +181,20 @@ class FileProcessor:
|
|
181 |
"""Process uploaded file with enhanced error handling"""
|
182 |
if not file:
|
183 |
return []
|
184 |
-
|
185 |
dataset = []
|
186 |
try:
|
187 |
file_size = os.path.getsize(file.name)
|
188 |
if file_size > self.max_file_size:
|
189 |
logger.warning(f"File size ({file_size} bytes) exceeds maximum allowed size")
|
190 |
return []
|
191 |
-
|
192 |
with tempfile.TemporaryDirectory() as temp_dir:
|
193 |
if zipfile.is_zipfile(file.name):
|
194 |
dataset.extend(self._process_zip_file(file.name, temp_dir))
|
195 |
else:
|
196 |
dataset.extend(self._process_single_file(file))
|
197 |
-
|
198 |
except Exception as e:
|
199 |
logger.error(f"Error processing file: {str(e)}")
|
200 |
return []
|
201 |
-
|
202 |
return dataset
|
203 |
|
204 |
def _process_zip_file(self, zip_path, temp_dir):
|
@@ -217,29 +213,29 @@ class FileProcessor:
|
|
217 |
'timestamp': datetime.now().isoformat()
|
218 |
})
|
219 |
return result
|
220 |
-
|
221 |
def _process_single_file(self, file) -> List[Dict]:
|
222 |
try:
|
223 |
file_stat = os.stat(file.name)
|
224 |
-
|
225 |
# For very large files, read in chunks and summarize
|
226 |
if file_stat.st_size > 100 * 1024 * 1024: # 100MB
|
227 |
logger.info(f"Processing large file: {file.name} ({file_stat.st_size} bytes)")
|
228 |
-
|
229 |
# Read first and last 1MB for extremely large files
|
230 |
content = ""
|
231 |
with open(file.name, 'r', encoding='utf-8', errors='ignore') as f:
|
232 |
content = f.read(1 * 1024 * 1024) # First 1MB
|
233 |
content += "\n...[Content truncated due to large file size]...\n"
|
234 |
-
|
235 |
# Seek to the last 1MB
|
236 |
f.seek(max(0, file_stat.st_size - 1 * 1024 * 1024))
|
237 |
content += f.read() # Last 1MB
|
238 |
else:
|
239 |
# Regular file processing
|
240 |
with open(file.name, 'r', encoding='utf-8', errors='ignore') as f:
|
241 |
-
content =f.read()
|
242 |
-
|
243 |
return [{
|
244 |
'source': 'file',
|
245 |
'filename': os.path.basename(file.name),
|
@@ -253,38 +249,41 @@ class FileProcessor:
|
|
253 |
except Exception as e:
|
254 |
logger.error(f"File processing error: {e}")
|
255 |
return []
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
|
|
|
|
|
|
268 |
|
269 |
def create_interface():
|
270 |
"""Create a comprehensive Gradio interface with advanced features"""
|
271 |
-
|
272 |
css = """
|
273 |
.container { max-width: 1200px; margin: auto; }
|
274 |
.warning { background-color: #fff3cd; color: #856404; }
|
275 |
.error { background-color: #f8d7da; color: #721c24; }
|
276 |
"""
|
277 |
-
|
278 |
with gr.Blocks(css=css, title="Advanced Text & URL Processor") as interface:
|
279 |
gr.Markdown("# 🌐 Advanced URL & Text Processing Toolkit")
|
280 |
-
|
281 |
with gr.Tab("URL Processing"):
|
282 |
url_input = gr.Textbox(
|
283 |
-
label="Enter URLs (comma or newline separated)",
|
284 |
lines=5,
|
285 |
placeholder="https://example1.com\nhttps://example2.com"
|
286 |
)
|
287 |
-
|
288 |
with gr.Tab("File Input"):
|
289 |
file_input = gr.File(
|
290 |
label="Upload text file or ZIP archive",
|
@@ -293,11 +292,11 @@ def create_interface():
|
|
293 |
|
294 |
with gr.Tab("Text Input"):
|
295 |
text_input = gr.Textbox(
|
296 |
-
label="Raw Text Input",
|
297 |
lines=5,
|
298 |
placeholder="Paste your text here..."
|
299 |
)
|
300 |
-
|
301 |
with gr.Tab("JSON Editor"):
|
302 |
json_editor = gr.Textbox(
|
303 |
label="JSON Editor",
|
@@ -306,7 +305,7 @@ def create_interface():
|
|
306 |
interactive=True,
|
307 |
elem_id="json-editor" # Optional: for custom styling
|
308 |
)
|
309 |
-
|
310 |
with gr.Tab("Scratchpad"):
|
311 |
scratchpad = gr.Textbox(
|
312 |
label="Scratchpad",
|
@@ -314,26 +313,26 @@ def create_interface():
|
|
314 |
placeholder="Quick notes or text collections...",
|
315 |
interactive=True
|
316 |
)
|
317 |
-
|
318 |
process_btn = gr.Button("Process Input", variant="primary")
|
319 |
qr_btn = gr.Button("Generate QR Code", variant="secondary")
|
320 |
-
|
321 |
output_text = gr.Textbox(label="Processing Results", interactive=False)
|
322 |
output_file = gr.File(label="Processed Output")
|
323 |
qr_output = gr.Image(label="QR Code", type="filepath") # To display the generated QR code
|
324 |
-
|
325 |
-
def process_all_inputs(urls, file, text, notes):
|
326 |
"""Process all input types with progress tracking"""
|
327 |
try:
|
328 |
processor = URLProcessor()
|
329 |
file_processor = FileProcessor()
|
330 |
results = []
|
331 |
-
|
332 |
# Process URLs
|
333 |
if urls:
|
334 |
url_list = re.split(r'[,\n]', urls)
|
335 |
url_list = [url.strip() for url in url_list if url.strip()]
|
336 |
-
|
337 |
for url in url_list:
|
338 |
validation = processor.validate_url(url)
|
339 |
if validation.get('is_valid'):
|
@@ -345,11 +344,11 @@ def process_all_inputs(urls, file, text, notes):
|
|
345 |
'content': content,
|
346 |
'timestamp': datetime.now().isoformat()
|
347 |
})
|
348 |
-
|
349 |
# Process files
|
350 |
if file:
|
351 |
results.extend(file_processor.process_file(file))
|
352 |
-
|
353 |
# Process text input
|
354 |
if text:
|
355 |
cleaned_text = processor.advanced_text_cleaning(text)
|
@@ -358,56 +357,45 @@ def process_all_inputs(urls, file, text, notes):
|
|
358 |
'content': cleaned_text,
|
359 |
'timestamp': datetime.now().isoformat()
|
360 |
})
|
361 |
-
|
362 |
# Generate output
|
363 |
if results:
|
364 |
output_dir = Path('output') / datetime.now().strftime('%Y-%m-%d')
|
365 |
output_dir.mkdir(parents=True, exist_ok=True)
|
366 |
output_path = output_dir / f'processed_{int(time.time())}.json'
|
367 |
-
|
368 |
with open(output_path, 'w', encoding='utf-8') as f:
|
369 |
json.dump(results, f, ensure_ascii=False, indent=2)
|
370 |
-
|
371 |
summary = f"Processed {len(results)} items successfully!"
|
372 |
json_data = json.dumps(results, indent=2) # Prepare JSON for QR code
|
373 |
return str(output_path), summary, json_data # Return JSON for editor
|
374 |
else:
|
375 |
return None, "No valid content to process.", ""
|
376 |
-
|
377 |
except Exception as e:
|
378 |
logger.error(f"Processing error: {e}")
|
379 |
return None, f"Error: {str(e)}", ""
|
380 |
-
|
381 |
-
def
|
382 |
-
|
383 |
-
qr = qrcode.QRCode(
|
384 |
-
version=40, # Force maximum version
|
385 |
-
error_correction=qrcode.constants.ERROR_CORRECT_L, # Use lower error correction
|
386 |
-
box_size=10,
|
387 |
-
border=4,
|
388 |
-
)
|
389 |
-
qr.add_data(json_data)
|
390 |
-
qr.make(fit=True)
|
391 |
-
return qr.make_image(fill_color="black", back_color="white")
|
392 |
-
|
393 |
if json_data:
|
394 |
-
return
|
395 |
return None
|
396 |
-
|
397 |
-
|
398 |
-
|
399 |
-
|
400 |
-
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
|
410 |
-
gr.Markdown("""
|
411 |
### Usage Guidelines
|
412 |
- **URL Processing**: Enter valid HTTP/HTTPS URLs
|
413 |
- **File Input**: Upload text files or ZIP archives
|
@@ -416,16 +404,15 @@ def generate_qr(json_data):
|
|
416 |
- **Scratchpad**: Quick notes or text collections
|
417 |
- Advanced cleaning and validation included
|
418 |
""")
|
419 |
-
|
420 |
-
return interface
|
421 |
|
422 |
def main():
|
423 |
# Configure system settings
|
424 |
mimetypes.init()
|
425 |
-
|
426 |
# Create and launch interface
|
427 |
interface = create_interface()
|
428 |
-
|
429 |
# Launch with proper configuration
|
430 |
interface.launch(
|
431 |
server_name="0.0.0.0",
|
@@ -437,4 +424,4 @@ def main():
|
|
437 |
)
|
438 |
|
439 |
if __name__ == "__main__":
|
440 |
-
main()
|
|
|
19 |
from cleantext import clean
|
20 |
import qrcode
|
21 |
import zipfile
|
|
|
22 |
|
23 |
# Setup logging with detailed configuration
|
24 |
logging.basicConfig(
|
|
|
27 |
handlers=[
|
28 |
logging.StreamHandler(),
|
29 |
logging.FileHandler('app.log', encoding='utf-8')
|
30 |
+
])
|
|
|
31 |
logger = logging.getLogger(__name__)
|
32 |
|
33 |
+
|
34 |
class URLProcessor:
|
35 |
def __init__(self):
|
36 |
self.session = requests.Session()
|
|
|
43 |
'Connection': 'keep-alive',
|
44 |
'Upgrade-Insecure-Requests': '1'
|
45 |
})
|
46 |
+
|
47 |
def advanced_text_cleaning(self, text: str) -> str:
|
48 |
"""Robust text cleaning with version compatibility"""
|
49 |
try:
|
|
|
73 |
try:
|
74 |
if not validators.url(url):
|
75 |
return {'is_valid': False, 'message': 'Invalid URL format'}
|
76 |
+
|
77 |
response = self.session.head(url, timeout=self.timeout)
|
78 |
response.raise_for_status()
|
79 |
return {'is_valid': True, 'message': 'URL is valid and accessible'}
|
|
|
86 |
# Google Drive document handling
|
87 |
if 'drive.google.com' in url:
|
88 |
return self._handle_google_drive(url)
|
89 |
+
|
90 |
# Google Calendar ICS handling
|
91 |
if 'calendar.google.com' in url and 'ical' in url:
|
92 |
return self._handle_google_calendar(url)
|
93 |
+
|
94 |
# Standard HTML processing
|
95 |
return self._fetch_html_content(url)
|
96 |
except Exception as e:
|
|
|
104 |
if not file_id:
|
105 |
logger.error(f"Invalid Google Drive URL: {url}")
|
106 |
return None
|
107 |
+
|
108 |
direct_url = f"https://drive.google.com/uc?export=download&id={file_id.group(1)}"
|
109 |
response = self.session.get(direct_url, timeout=self.timeout)
|
110 |
response.raise_for_status()
|
111 |
+
|
112 |
return {
|
113 |
'content': response.text,
|
114 |
'content_type': response.headers.get('Content-Type', ''),
|
|
|
137 |
try:
|
138 |
response = self.session.get(url, timeout=self.timeout)
|
139 |
response.raise_for_status()
|
140 |
+
|
141 |
soup = BeautifulSoup(response.text, 'html.parser')
|
142 |
+
|
143 |
# Remove unwanted elements
|
144 |
for element in soup(['script', 'style', 'nav', 'footer', 'header', 'meta', 'link']):
|
145 |
element.decompose()
|
146 |
+
|
147 |
# Extract main content
|
148 |
main_content = soup.find('main') or soup.find('article') or soup.body
|
149 |
+
|
150 |
# Clean and structure content
|
151 |
text_content = main_content.get_text(separator='\n', strip=True)
|
152 |
cleaned_content = self.advanced_text_cleaning(text_content)
|
153 |
+
|
154 |
return {
|
155 |
'content': cleaned_content,
|
156 |
'content_type': response.headers.get('Content-Type', ''),
|
|
|
159 |
except Exception as e:
|
160 |
logger.error(f"HTML processing failed: {e}")
|
161 |
return None
|
162 |
+
|
163 |
+
|
164 |
class FileProcessor:
|
165 |
"""Class to handle file processing"""
|
166 |
+
|
167 |
def __init__(self, max_file_size: int = 2 * 1024 * 1024 * 1024): # 2GB default
|
168 |
self.max_file_size = max_file_size
|
169 |
self.supported_text_extensions = {'.txt', '.md', '.csv', '.json', '.xml'}
|
170 |
+
|
171 |
def is_text_file(self, filepath: str) -> bool:
|
172 |
"""Check if file is a text file"""
|
173 |
try:
|
|
|
181 |
"""Process uploaded file with enhanced error handling"""
|
182 |
if not file:
|
183 |
return []
|
|
|
184 |
dataset = []
|
185 |
try:
|
186 |
file_size = os.path.getsize(file.name)
|
187 |
if file_size > self.max_file_size:
|
188 |
logger.warning(f"File size ({file_size} bytes) exceeds maximum allowed size")
|
189 |
return []
|
|
|
190 |
with tempfile.TemporaryDirectory() as temp_dir:
|
191 |
if zipfile.is_zipfile(file.name):
|
192 |
dataset.extend(self._process_zip_file(file.name, temp_dir))
|
193 |
else:
|
194 |
dataset.extend(self._process_single_file(file))
|
|
|
195 |
except Exception as e:
|
196 |
logger.error(f"Error processing file: {str(e)}")
|
197 |
return []
|
|
|
198 |
return dataset
|
199 |
|
200 |
def _process_zip_file(self, zip_path, temp_dir):
|
|
|
213 |
'timestamp': datetime.now().isoformat()
|
214 |
})
|
215 |
return result
|
216 |
+
|
217 |
def _process_single_file(self, file) -> List[Dict]:
|
218 |
try:
|
219 |
file_stat = os.stat(file.name)
|
220 |
+
|
221 |
# For very large files, read in chunks and summarize
|
222 |
if file_stat.st_size > 100 * 1024 * 1024: # 100MB
|
223 |
logger.info(f"Processing large file: {file.name} ({file_stat.st_size} bytes)")
|
224 |
+
|
225 |
# Read first and last 1MB for extremely large files
|
226 |
content = ""
|
227 |
with open(file.name, 'r', encoding='utf-8', errors='ignore') as f:
|
228 |
content = f.read(1 * 1024 * 1024) # First 1MB
|
229 |
content += "\n...[Content truncated due to large file size]...\n"
|
230 |
+
|
231 |
# Seek to the last 1MB
|
232 |
f.seek(max(0, file_stat.st_size - 1 * 1024 * 1024))
|
233 |
content += f.read() # Last 1MB
|
234 |
else:
|
235 |
# Regular file processing
|
236 |
with open(file.name, 'r', encoding='utf-8', errors='ignore') as f:
|
237 |
+
content = f.read()
|
238 |
+
|
239 |
return [{
|
240 |
'source': 'file',
|
241 |
'filename': os.path.basename(file.name),
|
|
|
249 |
except Exception as e:
|
250 |
logger.error(f"File processing error: {e}")
|
251 |
return []
|
252 |
+
|
253 |
+
def generate_qr(json_data):
|
254 |
+
"""Generate QR code from JSON data and return the file path."""
|
255 |
+
qr = qrcode.QRCode(
|
256 |
+
version=40, # Force maximum version
|
257 |
+
error_correction=qrcode.constants.ERROR_CORRECT_L, # Use lower error correction
|
258 |
+
box_size=10,
|
259 |
+
border=4,
|
260 |
+
)
|
261 |
+
qr.add_data(json_data)
|
262 |
+
qr.make(fit=True)
|
263 |
+
img = qr.make_image(fill_color="black", back_color="white")
|
264 |
+
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
|
265 |
+
img.save(temp_file.name)
|
266 |
+
return temp_file.name
|
267 |
|
268 |
def create_interface():
|
269 |
"""Create a comprehensive Gradio interface with advanced features"""
|
270 |
+
|
271 |
css = """
|
272 |
.container { max-width: 1200px; margin: auto; }
|
273 |
.warning { background-color: #fff3cd; color: #856404; }
|
274 |
.error { background-color: #f8d7da; color: #721c24; }
|
275 |
"""
|
276 |
+
|
277 |
with gr.Blocks(css=css, title="Advanced Text & URL Processor") as interface:
|
278 |
gr.Markdown("# 🌐 Advanced URL & Text Processing Toolkit")
|
279 |
+
|
280 |
with gr.Tab("URL Processing"):
|
281 |
url_input = gr.Textbox(
|
282 |
+
label="Enter URLs (comma or newline separated)",
|
283 |
lines=5,
|
284 |
placeholder="https://example1.com\nhttps://example2.com"
|
285 |
)
|
286 |
+
|
287 |
with gr.Tab("File Input"):
|
288 |
file_input = gr.File(
|
289 |
label="Upload text file or ZIP archive",
|
|
|
292 |
|
293 |
with gr.Tab("Text Input"):
|
294 |
text_input = gr.Textbox(
|
295 |
+
label="Raw Text Input",
|
296 |
lines=5,
|
297 |
placeholder="Paste your text here..."
|
298 |
)
|
299 |
+
|
300 |
with gr.Tab("JSON Editor"):
|
301 |
json_editor = gr.Textbox(
|
302 |
label="JSON Editor",
|
|
|
305 |
interactive=True,
|
306 |
elem_id="json-editor" # Optional: for custom styling
|
307 |
)
|
308 |
+
|
309 |
with gr.Tab("Scratchpad"):
|
310 |
scratchpad = gr.Textbox(
|
311 |
label="Scratchpad",
|
|
|
313 |
placeholder="Quick notes or text collections...",
|
314 |
interactive=True
|
315 |
)
|
316 |
+
|
317 |
process_btn = gr.Button("Process Input", variant="primary")
|
318 |
qr_btn = gr.Button("Generate QR Code", variant="secondary")
|
319 |
+
|
320 |
output_text = gr.Textbox(label="Processing Results", interactive=False)
|
321 |
output_file = gr.File(label="Processed Output")
|
322 |
qr_output = gr.Image(label="QR Code", type="filepath") # To display the generated QR code
|
323 |
+
|
324 |
+
def process_all_inputs(urls, file, text, notes):
|
325 |
"""Process all input types with progress tracking"""
|
326 |
try:
|
327 |
processor = URLProcessor()
|
328 |
file_processor = FileProcessor()
|
329 |
results = []
|
330 |
+
|
331 |
# Process URLs
|
332 |
if urls:
|
333 |
url_list = re.split(r'[,\n]', urls)
|
334 |
url_list = [url.strip() for url in url_list if url.strip()]
|
335 |
+
|
336 |
for url in url_list:
|
337 |
validation = processor.validate_url(url)
|
338 |
if validation.get('is_valid'):
|
|
|
344 |
'content': content,
|
345 |
'timestamp': datetime.now().isoformat()
|
346 |
})
|
347 |
+
|
348 |
# Process files
|
349 |
if file:
|
350 |
results.extend(file_processor.process_file(file))
|
351 |
+
|
352 |
# Process text input
|
353 |
if text:
|
354 |
cleaned_text = processor.advanced_text_cleaning(text)
|
|
|
357 |
'content': cleaned_text,
|
358 |
'timestamp': datetime.now().isoformat()
|
359 |
})
|
360 |
+
|
361 |
# Generate output
|
362 |
if results:
|
363 |
output_dir = Path('output') / datetime.now().strftime('%Y-%m-%d')
|
364 |
output_dir.mkdir(parents=True, exist_ok=True)
|
365 |
output_path = output_dir / f'processed_{int(time.time())}.json'
|
366 |
+
|
367 |
with open(output_path, 'w', encoding='utf-8') as f:
|
368 |
json.dump(results, f, ensure_ascii=False, indent=2)
|
369 |
+
|
370 |
summary = f"Processed {len(results)} items successfully!"
|
371 |
json_data = json.dumps(results, indent=2) # Prepare JSON for QR code
|
372 |
return str(output_path), summary, json_data # Return JSON for editor
|
373 |
else:
|
374 |
return None, "No valid content to process.", ""
|
375 |
+
|
376 |
except Exception as e:
|
377 |
logger.error(f"Processing error: {e}")
|
378 |
return None, f"Error: {str(e)}", ""
|
379 |
+
|
380 |
+
def generate_qr_code(json_data):
|
381 |
+
"""Generate QR code from JSON data and return the file path."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
382 |
if json_data:
|
383 |
+
return generate_qr(json_data)
|
384 |
return None
|
385 |
+
|
386 |
+
process_btn.click(
|
387 |
+
process_all_inputs,
|
388 |
+
inputs=[url_input, file_input, text_input, scratchpad],
|
389 |
+
outputs=[output_file, output_text, json_editor] # Update outputs to include JSON editor
|
390 |
+
)
|
391 |
+
|
392 |
+
qr_btn.click(
|
393 |
+
generate_qr_code,
|
394 |
+
inputs=json_editor,
|
395 |
+
outputs=qr_output
|
396 |
+
)
|
397 |
+
|
398 |
+
gr.Markdown("""
|
|
|
399 |
### Usage Guidelines
|
400 |
- **URL Processing**: Enter valid HTTP/HTTPS URLs
|
401 |
- **File Input**: Upload text files or ZIP archives
|
|
|
404 |
- **Scratchpad**: Quick notes or text collections
|
405 |
- Advanced cleaning and validation included
|
406 |
""")
|
407 |
+
return interface
|
|
|
408 |
|
409 |
def main():
|
410 |
# Configure system settings
|
411 |
mimetypes.init()
|
412 |
+
|
413 |
# Create and launch interface
|
414 |
interface = create_interface()
|
415 |
+
|
416 |
# Launch with proper configuration
|
417 |
interface.launch(
|
418 |
server_name="0.0.0.0",
|
|
|
424 |
)
|
425 |
|
426 |
if __name__ == "__main__":
|
427 |
+
main()
|