acecalisto3 commited on
Commit
14baf76
·
verified ·
1 Parent(s): 6098474

Create app2.py

Browse files
Files changed (1) hide show
  1. app2.py +288 -0
app2.py ADDED
@@ -0,0 +1,288 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ return []
2
+ with tempfile.TemporaryDirectory() as temp_dir:
3
+ if zipfile.is_zipfile(file.name):
4
+ dataset.extend(self._process_zip_file(file.name, temp_dir))
5
+ else:
6
+ dataset.extend(self._process_single_file(file))
7
+ except Exception as e:
8
+ logger.error(f"Error processing file: {str(e)}")
9
+ return []
10
+ return dataset
11
+
12
+ def _process_zip_file(self, zip_path, temp_dir):
13
+ """Extract and process files within a ZIP archive."""
14
+ result = []
15
+ with zipfile.ZipFile(zip_path, 'r') as zip_ref:
16
+ zip_ref.extractall(temp_dir)
17
+ for extracted_file in os.listdir(temp_dir):
18
+ extracted_file_path = os.path.join(temp_dir, extracted_file)
19
+ if os.path.isfile(extracted_file_path):
20
+ with open(extracted_file_path, 'r', encoding='utf-8', errors='ignore') as f:
21
+ result.append({
22
+ 'source': 'file_from_zip',
23
+ 'filename': extracted_file,
24
+ 'content': f.read(),
25
+ 'timestamp': datetime.now().isoformat()
26
+ })
27
+ return result
28
+
29
+ def _process_single_file(self, file) -> List[Dict]:
30
+ try:
31
+ file_stat = os.stat(file.name)
32
+ # For very large files, read in chunks and summarize
33
+ if file_stat.st_size > 100 * 1024 * 1024: # 100MB
34
+ logger.info(f"Processing large file: {file.name} ({file_stat.st_size} bytes)")
35
+ # Read first and last 1MB for extremely large files
36
+ content = ""
37
+ with open(file.name, 'r', encoding='utf-8', errors='ignore') as f:
38
+ content = f.read(1 * 1024 * 1024) # First 1MB
39
+ content += "\n...[Content truncated due to large file size]...\n"
40
+ # Seek to the last 1MB
41
+ f.seek(max(0, file_stat.st_size - 1 * 1024 * 1024))
42
+ content += f.read() # Last 1MB
43
+ else:
44
+ # Regular file processing
45
+ with open(file.name, 'r', encoding='utf-8', errors='ignore') as f:
46
+ content = f.read()
47
+ return [{
48
+ 'source': 'file',
49
+ 'filename': os.path.basename(file.name),
50
+ 'file_size': file_stat.st_size,
51
+ 'mime_type': mimetypes.guess_type(file.name)[0],
52
+ 'created': datetime.fromtimestamp(file_stat.st_ctime).isoformat(),
53
+ 'modified': datetime.fromtimestamp(file_stat.st_mtime).isoformat(),
54
+ 'content': content,
55
+ 'timestamp': datetime.now().isoformat()
56
+ }]
57
+ except Exception as e:
58
+ logger.error(f"File processing error: {e}")
59
+ return []
60
+
61
+
62
+ # Move process_all_inputs outside of the FileProcessor class
63
+ def process_all_inputs(urls, file, text, notes):
64
+ """Process all input types with progress tracking"""
65
+ try:
66
+ processor = URLProcessor()
67
+ file_processor = FileProcessor()
68
+ results = []
69
+
70
+ # Process URLs
71
+ if urls:
72
+ url_list = re.split(r'[,\n]', urls)
73
+ url_list = [url.strip() for url in url_list if url.strip()]
74
+
75
+ for url in url_list:
76
+ validation = processor.validate_url(url)
77
+ if validation.get('is_valid'):
78
+ content = processor.fetch_content(url)
79
+ if content:
80
+ results.append({
81
+ 'source': 'url',
82
+ 'url': url,
83
+ 'content': content,
84
+ 'timestamp': datetime.now().isoformat()
85
+ })
86
+ # Process files
87
+ if file:
88
+ results.extend(file_processor.process_file(file))
89
+ # Process text input
90
+ if text:
91
+ cleaned_text = processor.advanced_text_cleaning(text)
92
+ results.append({
93
+ 'source': 'direct_input',
94
+ 'content': cleaned_text,
95
+ 'timestamp': datetime.now().isoformat()
96
+ })
97
+ # Generate output
98
+ if results:
99
+ output_dir = Path('output') / datetime.now().strftime('%Y-%m-%d')
100
+ output_dir.mkdir(parents=True, exist_ok=True)
101
+ output_path = output_dir / f'processed_{int(time.time())}.json'
102
+
103
+ with open(output_path, 'w', encoding='utf-8') as f:
104
+ json.dump(results, f, ensure_ascii=False, indent=2)
105
+ summary = f"Processed {len(results)} items successfully!"
106
+ json_data = json.dumps(results, indent=2) # Prepare JSON for QR code
107
+ return str(output_path), summary, json_data # Return JSON for editor
108
+ else:
109
+ return None, "No valid content to process.", ""
110
+ except Exception as e:
111
+ logger.error(f"Processing error: {e}")
112
+ return None, f"Error: {str(e)}", ""
113
+
114
+
115
+ # Also move generate_qr_code outside of the FileProcessor class
116
+ def generate_qr_code(json_data):
117
+ """Generate QR code from JSON data and return the file path."""
118
+ if json_data:
119
+ return generate_qr(json_data)
120
+
121
+
122
+ # Move generate_qr outside of the FileProcessor class as well
123
+ def generate_qr(json_data):
124
+ """Generate QR code from JSON data and return the file path."""
125
+ try:
126
+ # Try first with automatic version selection
127
+ qr = qrcode.QRCode(
128
+ error_correction=qrcode.constants.ERROR_CORRECT_L,
129
+ box_size=10,
130
+ border=4,
131
+ )
132
+ qr.add_data(json_data)
133
+ qr.make(fit=True)
134
+
135
+ img = qrcode.make_image(fill_color="black", back_color="white")
136
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
137
+ img.save(temp_file.name)
138
+ return temp_file.name
139
+ except Exception as e:
140
+ # If the data is too large for a QR code
141
+ logger.error(f"QR generation error: {e}")
142
+
143
+ # Create a simple QR with error message
144
+ qr = qrcode.QRCode(
145
+ version=1,
146
+ error_correction=qrcode.constants.ERROR_CORRECT_L,
147
+ box_size=10,
148
+ border=4,
149
+ )
150
+ qr.add_data("Error: Data too large for QR code")
151
+ qr.make(fit=True)
152
+
153
+ img = qrcode.make_image(fill_color="black", back_color="white")
154
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
155
+ img.save(temp_file.name)
156
+ return temp_file.name
157
+
158
+
159
+ def create_interface():
160
+ """Create a comprehensive Gradio interface with advanced features"""
161
+ css = """
162
+ .container { max-width: 1200px; margin: auto; }
163
+ .warning { background-color: #fff3cd; color: #856404; }
164
+ .error { background-color: #f8d7da; color: #721c24; }
165
+ """
166
+ with gr.Blocks(css=css, title="Advanced Text & URL Processing") as interface:
167
+ gr.Markdown("# 🌐 Advanced URL & Text Processing Toolkit")
168
+
169
+ with gr.Tab("URL Processing"):
170
+ url_input = gr.Textbox(
171
+ label="Enter URLs (comma or newline separated)",
172
+ lines=5,
173
+ placeholder="https://example1.com\nhttps://example2.com"
174
+ )
175
+
176
+ with gr.Tab("File Input"):
177
+ file_input = gr.File(
178
+ label="Upload text file or ZIP archive",
179
+ file_types=[".txt", ".zip", ".md", ".csv", ".json", ".xml"]
180
+ )
181
+
182
+ with gr.Tab("Text Input"):
183
+ text_input = gr.Textbox(
184
+ label="Raw Text Input",
185
+ lines=5,
186
+ placeholder="Paste your text here..."
187
+ )
188
+
189
+ with gr.Tab("JSON Editor"):
190
+ json_editor = gr.Textbox(
191
+ label="JSON Editor",
192
+ lines=20,
193
+ placeholder="View and edit your JSON data here...",
194
+ interactive=True,
195
+ elem_id="json-editor" # Optional: for custom styling
196
+ )
197
+
198
+ with gr.Tab("Scratchpad"):
199
+ scratchpad = gr.Textbox(
200
+ label="Scratchpad",
201
+ lines=10,
202
+ placeholder="Quick notes or text collections...",
203
+ interactive=True
204
+ )
205
+
206
+ process_btn = gr.Button("Process Input", variant="primary")
207
+ qr_btn = gr.Button("Generate QR Code", variant="secondary")
208
+
209
+ output_text = gr.Textbox(label="Processing Results", interactive=False)
210
+ output_file = gr.File(label="Processed Output")
211
+ qr_output = gr.Image(label="QR Code", type="filepath") # To display the generated QR code
212
+
213
+ process_btn.click(
214
+ process_all_inputs,
215
+ inputs=[url_input, file_input, text_input, scratchpad],
216
+ outputs=[output_file, output_text, json_editor] # Update outputs to include JSON editor
217
+ )
218
+ qr_btn.click(
219
+ generate_qr_code,
220
+ inputs=json_editor,
221
+ outputs=qr_output
222
+ )
223
+ gr.Markdown("""
224
+ ### Usage Guidelines
225
+ - **URL Processing**: Enter valid HTTP/HTTPS URLs
226
+ - **File Input**: Upload text files or ZIP archives
227
+ - ** Text Input**: Direct text processing
228
+ - **JSON Editor**: View and edit your JSON data
229
+ - **Scratchpad**: Quick notes or text collections
230
+ - Advanced cleaning and validation included
231
+ """)
232
+ return interface
233
+
234
+
235
+ def check_network_connectivity():
236
+ """Check if the network is working properly by testing connection to common sites"""
237
+ test_sites = ["https://www.google.com", "https://www.cloudflare.com", "https://www.amazon.com"]
238
+ results = []
239
+
240
+ for site in test_sites:
241
+ try:
242
+ response = requests.get(site, timeout=5)
243
+ results.append({
244
+ "site": site,
245
+ "status": "OK" if response.status_code == 200 else f"Error: {response.status_code}",
246
+ "response_time": response.elapsed.total_seconds()
247
+ })
248
+ except Exception as e:
249
+ results.append({
250
+ "site": site,
251
+ "status": f"Error: {str(e)}",
252
+ "response_time": None
253
+ })
254
+ # If all sites failed, there might be a network issue
255
+ if all(result["status"].startswith("Error") for result in results):
256
+ logger.error("Network connectivity issue detected. All test sites failed.")
257
+ return False, results
258
+
259
+ return True, results
260
+
261
+
262
+ # Add this to the main function
263
+ def main():
264
+ # Configure system settings
265
+ mimetypes.init()
266
+
267
+ # Check network connectivity
268
+ network_ok, network_results = check_network_connectivity()
269
+ if not network_ok:
270
+ logger.warning("Network connectivity issues detected. Some features may not work properly.")
271
+ for result in network_results:
272
+ logger.warning(f"Test site {result['site']}: {result['status']}")
273
+ # Create and launch interface
274
+ interface = create_interface()
275
+
276
+ # Launch with proper configuration
277
+ interface.launch(
278
+ server_name="0.0.0.0",
279
+ server_port=7860,
280
+ show_error=True,
281
+ share=False,
282
+ inbrowser=True,
283
+ debug=True
284
+ )
285
+
286
+
287
+ if __name__ == "__main__":
288
+ main()