acecalisto3 commited on
Commit
7d538d5
·
verified ·
1 Parent(s): faa89e2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +373 -224
app.py CHANGED
@@ -1,279 +1,428 @@
1
- """
2
- Advanced URL & Text Processing Suite - Main Application
3
- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4
-
5
- A sophisticated Gradio interface with URL processing, file manipulation, QR operations,
6
- and advanced data chat capabilities.
7
- """
8
-
9
- import gradio as gr
10
- import logging
11
  import json
12
  import os
13
- import sys
14
- import zipfile
15
- import pandas as pd
16
- import numpy as np
 
17
  from datetime import datetime
18
  from pathlib import Path
19
- from typing import Dict, List, Optional, Union, Any, Tuple
 
 
 
 
 
 
 
 
 
20
 
21
- # Configure logging
22
  logging.basicConfig(
23
  level=logging.INFO,
24
- format='%(asctime)s.%(msecs)03d [%(levelname)s] %(name)s - %(message)s',
25
- datefmt='%Y-%m-%d %H:%M:%S'
 
 
 
26
  )
27
  logger = logging.getLogger(__name__)
28
 
29
- # Modern UI Configuration
30
- THEME = gr.themes.Soft(
31
- primary_hue="indigo",
32
- secondary_hue="blue",
33
- neutral_hue="slate",
34
- spacing_size=gr.themes.sizes.spacing_md,
35
- radius_size=gr.themes.sizes.radius_md,
36
- text_size=gr.themes.sizes.text_md,
37
- )
38
-
39
- class DataChatProcessor:
40
  def __init__(self):
41
- self.trained_data = {}
42
- self.current_dataset = None
43
-
44
- def process_zip_file(self, file_obj, mode):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  try:
46
- if not file_obj:
47
- return "Please upload a ZIP file", []
48
 
49
- # Extract ZIP contents
50
- with zipfile.ZipFile(file_obj.name, 'r') as zip_ref:
51
- temp_dir = Path('temp_data')
52
- temp_dir.mkdir(exist_ok=True)
53
- zip_ref.extractall(temp_dir)
 
 
 
 
 
 
 
54
 
55
- # Process based on mode
56
- if mode == "TrainedOnData":
57
- return self._train_on_data(temp_dir)
58
- else: # TalkAboutData
59
- return self._analyze_data(temp_dir)
60
 
 
 
61
  except Exception as e:
62
- logger.error(f"Error processing ZIP file: {e}")
63
- return f"Error: {str(e)}", []
64
-
65
- def _train_on_data(self, data_dir):
 
66
  try:
67
- datasets = []
68
- for file in data_dir.glob('**/*.csv'):
69
- df = pd.read_csv(file)
70
- datasets.append({
71
- 'name': file.name,
72
- 'data': df,
73
- 'summary': {
74
- 'rows': len(df),
75
- 'columns': len(df.columns),
76
- 'dtypes': df.dtypes.astype(str).to_dict()
77
- }
78
- })
79
 
80
- self.trained_data = {
81
- 'datasets': datasets,
 
82
  'timestamp': datetime.now().isoformat()
83
  }
84
-
85
- summary = f"Trained on {len(datasets)} datasets"
86
- messages = [
87
- {"role": "assistant", "content": "Training completed successfully."},
88
- {"role": "assistant", "content": summary}
89
- ]
90
-
91
- return summary, messages
92
-
93
  except Exception as e:
94
- logger.error(f"Error training on data: {e}")
95
- return f"Error during training: {str(e)}", []
96
-
97
- def _analyze_data(self, data_dir):
 
98
  try:
99
- analyses = []
100
- for file in data_dir.glob('**/*.csv'):
101
- df = pd.read_csv(file)
102
- analyses.append({
103
- 'file': file.name,
104
- 'shape': df.shape,
105
- 'dtypes': df.dtypes.astype(str).to_dict()
106
- })
107
-
108
- self.current_dataset = {
109
- 'analyses': analyses,
110
  'timestamp': datetime.now().isoformat()
111
  }
 
 
 
 
 
 
 
 
 
112
 
113
- summary = f"Analyzed {len(analyses)} files"
114
- messages = [
115
- {"role": "assistant", "content": "Analysis completed successfully."},
116
- {"role": "assistant", "content": summary}
117
- ]
118
 
119
- return summary, messages
 
 
 
 
 
 
 
 
 
120
 
 
 
 
 
 
121
  except Exception as e:
122
- logger.error(f"Error analyzing data: {e}")
123
- return f"Error during analysis: {str(e)}", []
 
 
 
124
 
125
- def chat(self, message, history, mode):
126
- if not message:
127
- return "", history
128
-
129
- history.append({"role": "user", "content": message})
130
 
 
 
131
  try:
132
- if mode == "TrainedOnData":
133
- if not self.trained_data:
134
- response = "Please upload and train on data first."
135
- else:
136
- response = self._generate_trained_response(message)
137
- else:
138
- if not self.current_dataset:
139
- response = "Please upload data for analysis first."
 
 
 
 
 
 
 
 
 
 
 
 
 
140
  else:
141
- response = self._generate_analysis_response(message)
142
-
143
- history.append({"role": "assistant", "content": response})
144
- return "", history
145
-
146
  except Exception as e:
147
- logger.error(f"Error in chat: {e}")
148
- history.append({"role": "assistant", "content": f"Error: {str(e)}"})
149
- return "", history
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
 
151
- def _generate_trained_response(self, message):
152
- datasets = self.trained_data['datasets']
153
 
154
- if "how many" in message.lower():
155
- return f"There are {len(datasets)} datasets."
 
 
 
 
156
 
157
- if "summary" in message.lower():
158
- summaries = []
159
- for ds in datasets:
160
- summaries.append(
161
- f"Dataset '{ds['name']}': {ds['summary']['rows']} rows, "
162
- f"{ds['summary']['columns']} columns"
163
- )
164
- return "\n".join(summaries)
 
 
 
 
165
 
166
- return "I can help you analyze the trained datasets. Ask about number of datasets or summaries."
167
-
168
- def _generate_analysis_response(self, message):
169
- analyses = self.current_dataset['analyses']
 
 
 
 
170
 
171
- if "how many" in message.lower():
172
- return f"There are {len(analyses)} files."
 
 
 
 
 
173
 
174
- if "summary" in message.lower():
175
- summaries = []
176
- for analysis in analyses:
177
- summaries.append(
178
- f"File '{analysis['file']}': {analysis['shape'][0]} rows, "
179
- f"{analysis['shape'][1]} columns"
180
- )
181
- return "\n".join(summaries)
182
 
183
- return "I can help you explore the current dataset. Ask about file count or summaries."
184
-
185
- def create_interface():
186
- data_chat = DataChatProcessor()
187
-
188
- with gr.Blocks(theme=THEME) as interface:
189
- gr.Markdown(
190
- """
191
- # 🌐 Advanced Data Processing & Analysis Suite
192
- Enterprise-grade toolkit for data processing, analysis, and interactive chat capabilities.
193
- """
194
- )
195
 
196
- with gr.Tab("💬 DataChat"):
197
- with gr.Row():
198
- # Left column for file upload and mode selection
199
- with gr.Column(scale=1):
200
- data_file = gr.File(
201
- label="Upload ZIP File",
202
- file_types=[".zip"]
203
- )
204
-
205
- mode = gr.Radio(
206
- choices=["TrainedOnData", "TalkAboutData"],
207
- value="TrainedOnData",
208
- label="Chat Mode"
209
- )
210
-
211
- process_btn = gr.Button("Process Data", variant="primary")
212
 
213
- status_output = gr.Textbox(
214
- label="Status",
215
- interactive=False
216
- )
 
 
 
 
 
 
 
217
 
218
- # Right column for chat interface
219
- with gr.Column(scale=2):
220
- chatbot = gr.Chatbot(
221
- label="Chat History",
222
- height=400,
223
- show_label=True,
224
- type="messages" # Specify OpenAI-style message format
225
- )
 
 
 
 
 
 
 
 
 
 
226
 
227
- msg = gr.Textbox(
228
- label="Your Message",
229
- placeholder="Ask questions about your data...",
230
- lines=2
231
- )
232
 
233
- with gr.Row():
234
- submit_btn = gr.Button("Send", variant="primary")
235
- clear_btn = gr.Button("Clear Chat", variant="secondary")
236
-
237
- # Event handlers
238
- process_btn.click(
239
- fn=data_chat.process_zip_file,
240
- inputs=[data_file, mode],
241
- outputs=[status_output, chatbot]
242
- )
243
 
244
- submit_btn.click(
245
- fn=data_chat.chat,
246
- inputs=[msg, chatbot, mode],
247
- outputs=[msg, chatbot]
248
- )
249
 
250
- msg.submit(
251
- fn=data_chat.chat,
252
- inputs=[msg, chatbot, mode],
253
- outputs=[msg, chatbot]
254
  )
255
 
256
- clear_btn.click(
257
- fn=lambda: ([], "Chat cleared"),
258
- outputs=[chatbot, status_output]
 
259
  )
260
 
261
- return interface
 
 
 
 
 
 
 
 
 
 
262
 
263
  def main():
264
- try:
265
- interface = create_interface()
266
- if interface:
267
- interface.launch(
268
- server_name="0.0.0.0",
269
- server_port=8000
270
- )
271
- else:
272
- logger.error("Failed to create interface")
273
- sys.exit(1)
274
- except Exception as e:
275
- logger.error(f"Application startup error: {e}", exc_info=True)
276
- sys.exit(1)
 
 
277
 
278
  if __name__ == "__main__":
279
- main()
 
 
 
 
 
 
 
 
 
 
 
1
  import json
2
  import os
3
+ import re
4
+ import time
5
+ import logging
6
+ import mimetypes
7
+ import tempfile
8
  from datetime import datetime
9
  from pathlib import Path
10
+ from urllib.parse import urlparse
11
+ from typing import List, Dict, Tuple, Union, Optional
12
+ import requests
13
+ import validators
14
+ import gradio as gr
15
+ from diskcache import Cache
16
+ from bs4 import BeautifulSoup
17
+ from fake_useragent import UserAgent
18
+ from cleantext import clean
19
+ import qrcode
20
 
21
+ # Setup logging with detailed configuration
22
  logging.basicConfig(
23
  level=logging.INFO,
24
+ format='%(asctime)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s',
25
+ handlers=[
26
+ logging.StreamHandler(),
27
+ logging.FileHandler('app.log', encoding='utf-8')
28
+ ]
29
  )
30
  logger = logging.getLogger(__name__)
31
 
32
+ class URLProcessor:
 
 
 
 
 
 
 
 
 
 
33
  def __init__(self):
34
+ self.session = requests.Session()
35
+ self.timeout = 10 # seconds
36
+ self.session.headers.update({
37
+ 'User-Agent': UserAgent().random,
38
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
39
+ 'Accept-Language': 'en-US,en;q=0.5',
40
+ 'Accept-Encoding': 'gzip, deflate, br',
41
+ 'Connection': 'keep-alive',
42
+ 'Upgrade-Insecure-Requests': '1'
43
+ })
44
+
45
+ def advanced_text_cleaning(self, text: str) -> str:
46
+ """Robust text cleaning with version compatibility"""
47
+ try:
48
+ cleaned_text = clean(
49
+ text,
50
+ fix_unicode=True,
51
+ to_ascii=True,
52
+ lower=True,
53
+ no_line_breaks=True,
54
+ no_urls=True,
55
+ no_emails=True,
56
+ no_phone_numbers=True,
57
+ no_numbers=False,
58
+ no_digits=False,
59
+ no_currency_symbols=True,
60
+ no_punct=False
61
+ ).strip()
62
+ return cleaned_text
63
+ except Exception as e:
64
+ logger.warning(f"Text cleaning error: {e}. Using fallback method.")
65
+ text = re.sub(r'[\x00-\x1F\x7F-\x9F]', '', text) # Remove control characters
66
+ text = text.encode('ascii', 'ignore').decode('ascii') # Remove non-ASCII characters
67
+ text = re.sub(r'\s+', ' ', text) # Normalize whitespace
68
+ return text.strip()
69
+
70
+ def validate_url(self, url: str) -> Dict:
71
+ """Validate URL format and accessibility"""
72
  try:
73
+ if not validators.url(url):
74
+ return {'is_valid': False, 'message': 'Invalid URL format'}
75
 
76
+ response = self.session.head(url, timeout=self.timeout)
77
+ response.raise_for_status()
78
+ return {'is_valid': True, 'message': 'URL is valid and accessible'}
79
+ except Exception as e:
80
+ return {'is_valid': False, 'message': f'URL validation failed: {str(e)}'}
81
+
82
+ def fetch_content(self, url: str) -> Optional[Dict]:
83
+ """Universal content fetcher with special case handling"""
84
+ try:
85
+ # Google Drive document handling
86
+ if 'drive.google.com' in url:
87
+ return self._handle_google_drive(url)
88
 
89
+ # Google Calendar ICS handling
90
+ if 'calendar.google.com' in url and 'ical' in url:
91
+ return self._handle_google_calendar(url)
 
 
92
 
93
+ # Standard HTML processing
94
+ return self._fetch_html_content(url)
95
  except Exception as e:
96
+ logger.error(f"Content fetch failed: {e}")
97
+ return None
98
+
99
+ def _handle_google_drive(self, url: str) -> Optional[Dict]:
100
+ """Process Google Drive file links"""
101
  try:
102
+ file_id = re.search(r'/file/d/([a-zA-Z0-9_-]+)', url)
103
+ if not file_id:
104
+ logger.error(f"Invalid Google Drive URL: {url}")
105
+ return None
106
+
107
+ direct_url = f"https://drive.google.com/uc?export=download&id={file_id.group(1)}"
108
+ response = self.session.get(direct_url, timeout=self.timeout)
109
+ response.raise_for_status()
 
 
 
 
110
 
111
+ return {
112
+ 'content': response.text,
113
+ 'content_type': response.headers.get('Content-Type', ''),
114
  'timestamp': datetime.now().isoformat()
115
  }
 
 
 
 
 
 
 
 
 
116
  except Exception as e:
117
+ logger.error(f"Google Drive processing failed: {e}")
118
+ return None
119
+
120
+ def _handle_google_calendar(self, url: str) -> Optional[Dict]:
121
+ """Process Google Calendar ICS feeds"""
122
  try:
123
+ response = self.session.get(url, timeout=self.timeout)
124
+ response.raise_for_status()
125
+ return {
126
+ 'content': response.text,
127
+ 'content_type': 'text/calendar',
 
 
 
 
 
 
128
  'timestamp': datetime.now().isoformat()
129
  }
130
+ except Exception as e:
131
+ logger.error(f"Calendar fetch failed: {e}")
132
+ return None
133
+
134
+ def _fetch_html_content(self, url: str) -> Optional[Dict]:
135
+ """Standard HTML content processing"""
136
+ try:
137
+ response = self.session.get(url, timeout=self.timeout)
138
+ response.raise_for_status()
139
 
140
+ soup = BeautifulSoup(response.text, 'html.parser')
 
 
 
 
141
 
142
+ # Remove unwanted elements
143
+ for element in soup(['script', 'style', 'nav', 'footer', 'header', 'meta', 'link']):
144
+ element.decompose()
145
+
146
+ # Extract main content
147
+ main_content = soup.find('main') or soup.find('article') or soup.body
148
+
149
+ # Clean and structure content
150
+ text_content = main_content.get_text(separator='\n', strip=True)
151
+ cleaned_content = self.advanced_text_cleaning(text_content)
152
 
153
+ return {
154
+ 'content': cleaned_content,
155
+ 'content_type': response.headers.get('Content-Type', ''),
156
+ 'timestamp': datetime.now().isoformat()
157
+ }
158
  except Exception as e:
159
+ logger.error(f"HTML processing failed: {e}")
160
+ return None
161
+
162
+ class FileProcessor:
163
+ """Class to handle file processing"""
164
 
165
+ def __init__(self, max_file_size: int = 2 * 1024 * 1024 * 1024): # 2GB default
166
+ self.max_file_size = max_file_size
167
+ self.supported_text_extensions = {'.txt', '.md', '.csv', '.json', '.xml'}
 
 
168
 
169
+ def is_text_file(self, filepath: str) -> bool:
170
+ """Check if file is a text file"""
171
  try:
172
+ mime_type, _ = mimetypes.guess_type(filepath)
173
+ return (mime_type and mime_type.startswith('text/')) or \
174
+ (os.path.splitext(filepath)[1].lower() in self.supported_text_extensions)
175
+ except Exception:
176
+ return False
177
+
178
+ def process_file(self, file) -> List[Dict]:
179
+ """Process uploaded file with enhanced error handling"""
180
+ if not file:
181
+ return []
182
+
183
+ dataset = []
184
+ try:
185
+ file_size = os.path.getsize(file.name)
186
+ if file_size > self.max_file_size:
187
+ logger.warning(f"File size ({file_size} bytes) exceeds maximum allowed size")
188
+ return []
189
+
190
+ with tempfile.TemporaryDirectory() as temp_dir:
191
+ if zipfile.is_zipfile(file.name):
192
+ dataset.extend(self._process_zip_file(file.name, temp_dir))
193
  else:
194
+ dataset.extend(self._process_single_file(file))
195
+
 
 
 
196
  except Exception as e:
197
+ logger.error(f"Error processing file: {str(e)}")
198
+ return []
199
+
200
+ return dataset
201
+
202
+ def _process_zip_file(self, zip_path: str, temp_dir: str) -> List[Dict]:
203
+ """Process ZIP file contents"""
204
+ results = []
205
+ with zipfile.ZipFile(zip_path, 'r') as zip_ref:
206
+ zip_ref.extractall(temp_dir)
207
+ for root, _, files in os.walk(temp_dir):
208
+ for filename in files:
209
+ filepath = os.path.join(root, filename)
210
+ if self.is_text_file(filepath):
211
+ try:
212
+ with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
213
+ content = f.read()
214
+ if content.strip():
215
+ results.append({
216
+ "source": "file",
217
+ "filename": filename,
218
+ "content": content,
219
+ "timestamp": datetime.now().isoformat()
220
+ })
221
+ except Exception as e:
222
+ logger.error(f"Error reading file {filename}: {str(e)}")
223
+ return results
224
+
225
+ def _process_single_file(self, file) -> List[Dict]:
226
+ try:
227
+ file_stat = os.stat(file.name)
228
+
229
+ # For very large files, read in chunks and summarize
230
+ if file_stat.st_size > 100 * 1024 * 1024: # 100MB
231
+ logger.info(f"Processing large file: {file.name} ({file_stat.st_size} bytes)")
232
+
233
+ # Read first and last 1MB for extremely large files
234
+ content = ""
235
+ with open(file.name, 'r', encoding='utf-8', errors='ignore') as f:
236
+ content = f.read(1 * 1024 * 1024) # First 1MB
237
+ content += "\n...[Content truncated due to large file size]...\n"
238
+
239
+ # Seek to the last 1MB
240
+ f.seek(max(0, file_stat.st_size - 1 * 1024 * 1024))
241
+ content += f.read() # Last 1MB
242
+ else:
243
+ # Regular file processing
244
+ with open(file.name, 'r', encoding='utf-8', errors='ignore') as f:
245
+ content = f.read()
246
+
247
+ return [{
248
+ 'source': 'file',
249
+ 'filename': os.path.basename(file.name),
250
+ 'file_size': file_stat.st_size,
251
+ 'mime_type': mimetypes.guess_type(file.name)[0],
252
+ 'created': datetime.fromtimestamp(file_stat.st_ctime).isoformat(),
253
+ 'modified': datetime.fromtimestamp(file_stat.st_mtime).isoformat(),
254
+ 'content': content,
255
+ 'timestamp': datetime.now().isoformat()
256
+ }]
257
+ except Exception as e:
258
+ logger.error(f"File processing error: {e}")
259
+ return []
260
+
261
+ def generate_qr_code(json_data):
262
+ """Generate a QR code from JSON data."""
263
+ qr = qrcode.make(json_data)
264
+ qr_path = "output/qr_code.png"
265
+ qr.save(qr_path)
266
+ return qr_path
267
+
268
+ def create_interface():
269
+ """Create a comprehensive Gradio interface with advanced features"""
270
+
271
+ css = """
272
+ .container { max-width: 1200px; margin: auto; }
273
+ .warning { background-color: #fff3cd; color: #856404; }
274
+ .error { background-color: #f8d7da; color: #721c24; }
275
+ """
276
 
277
+ with gr.Blocks(css=css, title="Advanced Text & URL Processor") as interface:
278
+ gr.Markdown("# 🌐 Advanced URL & Text Processing Toolkit")
279
 
280
+ with gr.Tab("URL Processing"):
281
+ url_input = gr.Textbox(
282
+ label="Enter URLs (comma or newline separated)",
283
+ lines=5,
284
+ placeholder="https://example1.com\nhttps://example2.com"
285
+ )
286
 
287
+ with gr.Tab("File Input"):
288
+ file_input = gr.File(
289
+ label="Upload text file or ZIP archive",
290
+ file_types=[".txt", ".zip", ".md", ".csv", ".json", ".xml"]
291
+ )
292
+
293
+ with gr.Tab("Text Input"):
294
+ text_input = gr.Textbox(
295
+ label="Raw Text Input",
296
+ lines=5,
297
+ placeholder="Paste your text here..."
298
+ )
299
 
300
+ with gr.Tab("JSON Editor"):
301
+ json_editor = gr.Textbox(
302
+ label="JSON Editor",
303
+ lines=20,
304
+ placeholder="View and edit your JSON data here...",
305
+ interactive=True,
306
+ elem_id="json-editor" # Optional: for custom styling
307
+ )
308
 
309
+ with gr.Tab("Scratchpad"):
310
+ scratchpad = gr.Textbox(
311
+ label="Scratchpad",
312
+ lines=10,
313
+ placeholder="Quick notes or text collections...",
314
+ interactive=True
315
+ )
316
 
317
+ process_btn = gr.Button("Process Input", variant="primary")
318
+ qr_btn = gr.Button("Generate QR Code", variant="secondary")
 
 
 
 
 
 
319
 
320
+ output_text = gr.Textbox(label="Processing Results", interactive=False)
321
+ output_file = gr.File(label="Processed Output")
322
+ qr_output = gr.Image(label="QR Code", type="filepath") # To display the generated QR code
 
 
 
 
 
 
 
 
 
323
 
324
+ def process_all_inputs(urls, file, text, notes):
325
+ """Process all input types with progress tracking"""
326
+ try:
327
+ processor = URLProcessor()
328
+ file_processor = FileProcessor()
329
+ results = []
330
+
331
+ # Process URLs
332
+ if urls:
333
+ url_list = re.split(r'[,\n]', urls)
334
+ url_list = [url.strip() for url in url_list if url.strip()]
 
 
 
 
 
335
 
336
+ for url in url_list:
337
+ validation = processor.validate_url(url)
338
+ if validation.get('is_valid'):
339
+ content = processor.fetch_content(url)
340
+ if content:
341
+ results.append({
342
+ 'source': 'url',
343
+ 'url': url,
344
+ 'content': content,
345
+ 'timestamp': datetime.now().isoformat()
346
+ })
347
 
348
+ # Process files
349
+ if file:
350
+ results.extend(file_processor.process_file(file))
351
+
352
+ # Process text input
353
+ if text:
354
+ cleaned_text = processor.advanced_text_cleaning(text)
355
+ results.append({
356
+ 'source': 'direct_input',
357
+ 'content': cleaned_text,
358
+ 'timestamp': datetime.now().isoformat()
359
+ })
360
+
361
+ # Generate output
362
+ if results:
363
+ output_dir = Path('output') / datetime.now().strftime('%Y-%m-%d')
364
+ output_dir.mkdir(parents=True, exist_ok=True)
365
+ output_path = output_dir / f'processed_{int(time.time())}.json'
366
 
367
+ with open(output_path, 'w', encoding='utf-8') as f:
368
+ json.dump(results, f, ensure_ascii=False, indent=2)
 
 
 
369
 
370
+ summary = f"Processed {len(results)} items successfully!"
371
+ json_data = json.dumps(results, indent=2) # Prepare JSON for QR code
372
+ return str(output_path), summary, json_data # Return JSON for editor
373
+ else:
374
+ return None, "No valid content to process.", ""
375
+
376
+ except Exception as e:
377
+ logger.error(f"Processing error: {e}")
378
+ return None, f"Error: {str(e)}", ""
 
379
 
380
+ def generate_qr(json_data):
381
+ """Generate QR code from JSON data and return the file path."""
382
+ if json_data:
383
+ return generate_qr_code(json_data)
384
+ return None
385
 
386
+ process_btn.click(
387
+ process_all_inputs,
388
+ inputs=[url_input, file_input, text_input, scratchpad],
389
+ outputs=[output_file, output_text, json_editor] # Update outputs to include JSON editor
390
  )
391
 
392
+ qr_btn.click(
393
+ generate_qr,
394
+ inputs=json_editor,
395
+ outputs=qr_output
396
  )
397
 
398
+ gr.Markdown("""
399
+ ### Usage Guidelines
400
+ - **URL Processing**: Enter valid HTTP/HTTPS URLs
401
+ - **File Input**: Upload text files or ZIP archives
402
+ - **Text Input**: Direct text processing
403
+ - **JSON Editor**: View and edit your JSON data
404
+ - **Scratchpad**: Quick notes or text collections
405
+ - Advanced cleaning and validation included
406
+ """)
407
+
408
+ return interface
409
 
410
  def main():
411
+ # Configure system settings
412
+ mimetypes.init()
413
+
414
+ # Create and launch interface
415
+ interface = create_interface()
416
+
417
+ # Launch with proper configuration
418
+ interface.launch(
419
+ server_name="0.0.0.0",
420
+ server_port=7860,
421
+ show_error=True,
422
+ share=False,
423
+ inbrowser=True,
424
+ debug=True
425
+ )
426
 
427
  if __name__ == "__main__":
428
+ main()