acecalisto3 commited on
Commit
8433748
Β·
verified Β·
1 Parent(s): e784699

Update app2.py

Browse files
Files changed (1) hide show
  1. app2.py +680 -680
app2.py CHANGED
@@ -1,710 +1,710 @@
1
- """
2
- Advanced URL & Text Processing Suite - Professional Edition
3
- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4
-
5
- Enterprise-grade application with advanced features, real-time analytics,
6
- parallel processing, and sophisticated UI components.
7
- """
8
-
9
  import gradio as gr
10
- import logging
11
- import json
 
 
 
 
12
  import os
13
- import sys
14
- import threading
15
- import queue
16
- import time
17
- from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
18
- from datetime import datetime
19
- from pathlib import Path
20
- from typing import Dict, List, Optional, Union, Any, Tuple
21
- from dataclasses import dataclass, asdict
22
- import numpy as np
23
- import pandas as pd
24
- # Configure advanced logging with rotation
25
- from logging.handlers import RotatingFileHandler
26
- log_handler = RotatingFileHandler(
27
- 'urld_pro.log',
28
- maxBytes=10*1024*1024, # 10MB
29
- backupCount=5
30
- )
31
- log_handler.setFormatter(
32
- logging.Formatter('%(asctime)s.%(msecs)03d [%(levelname)s] %(name)s - %(message)s')
33
  )
34
- logger = logging.getLogger(__name__)
35
- logger.addHandler(log_handler)
36
- logger.setLevel(logging.INFO)
37
-
38
- # Advanced Theme Configuration
39
- THEME = gr.themes.Soft(
40
- primary_hue=gr.themes.colors.indigo,
41
- secondary_hue=gr.themes.colors.blue,
42
- neutral_hue=gr.themes.colors.slate,
43
- spacing_size=gr.themes.sizes.spacing_lg,
44
- radius_size=gr.themes.sizes.radius_lg,
45
- text_size=gr.themes.sizes.text_lg,
46
- ).set(
47
- body_background_fill="*background_fill_secondary",
48
- button_primary_background_fill="*primary_500",
49
- button_primary_background_fill_hover="*primary_600",
50
- button_primary_text_color="white",
51
- button_primary_border_color="*primary_500",
52
- button_secondary_background_fill="*secondary_500",
53
- button_secondary_background_fill_hover="*secondary_600",
54
- button_secondary_text_color="white",
55
- button_secondary_border_color="*secondary_500",
56
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
- # Enhanced CSS with advanced animations and modern design
59
- CUSTOM_CSS = """
60
- @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');
61
-
62
- :root {
63
- --primary-color: #4f46e5;
64
- --secondary-color: #2563eb;
65
- --success-color: #059669;
66
- --error-color: #dc2626;
67
- --warning-color: #d97706;
68
- --info-color: #2563eb;
69
- }
70
-
71
- body {
72
- font-family: 'Inter', sans-serif;
73
- }
74
-
75
- .container {
76
- max-width: 1400px !important;
77
- margin: auto !important;
78
- padding: 2rem !important;
79
- }
80
-
81
- .pro-header {
82
- text-align: center;
83
- margin-bottom: 2rem;
84
- padding: 2rem;
85
- background: linear-gradient(135deg, var(--primary-color) 0%, var(--secondary-color) 100%);
86
- border-radius: 1rem;
87
- color: white;
88
- box-shadow: 0 4px 6px -1px rgb(0 0 0 / 0.1), 0 2px 4px -2px rgb(0 0 0 / 0.1);
89
- }
90
-
91
- .pro-header h1 {
92
- font-size: 2.5rem;
93
- font-weight: 700;
94
- margin-bottom: 1rem;
95
- }
96
-
97
- .pro-header p {
98
- font-size: 1.25rem;
99
- opacity: 0.9;
100
- }
101
-
102
- .dashboard {
103
- display: grid;
104
- grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
105
- gap: 1rem;
106
- margin-bottom: 2rem;
107
- }
108
-
109
- .stat-card {
110
- background: white;
111
- padding: 1.5rem;
112
- border-radius: 1rem;
113
- box-shadow: 0 1px 3px 0 rgb(0 0 0 / 0.1), 0 1px 2px -1px rgb(0 0 0 / 0.1);
114
- transition: transform 0.2s ease;
115
- }
116
-
117
- .stat-card:hover {
118
- transform: translateY(-2px);
119
- }
120
-
121
- .chart-container {
122
- background: white;
123
- padding: 1rem;
124
- border-radius: 1rem;
125
- box-shadow: 0 1px 3px 0 rgb(0 0 0 / 0.1);
126
- margin-bottom: 1rem;
127
- }
128
-
129
- .pro-tab {
130
- transition: all 0.3s ease;
131
- }
132
-
133
- .pro-tab:hover {
134
- transform: translateY(-1px);
135
- }
136
-
137
- .pro-button {
138
- transition: all 0.2s ease;
139
- }
140
-
141
- .pro-button:hover {
142
- transform: translateY(-1px);
143
- box-shadow: 0 4px 6px -1px rgb(0 0 0 / 0.1);
144
- }
145
-
146
- .pro-button:active {
147
- transform: translateY(0);
148
- }
149
-
150
- .status-message {
151
- padding: 1rem;
152
- border-radius: 0.5rem;
153
- margin: 1rem 0;
154
- animation: slideIn 0.3s ease;
155
- }
156
-
157
- .status-message.success {
158
- background: #ecfdf5;
159
- border: 1px solid var(--success-color);
160
- color: var(--success-color);
161
- }
162
-
163
- .status-message.error {
164
- background: #fef2f2;
165
- border: 1px solid var(--error-color);
166
- color: var(--error-color);
167
- }
168
-
169
- .status-message.warning {
170
- background: #fffbeb;
171
- border: 1px solid var(--warning-color);
172
- color: var(--warning-color);
173
- }
174
-
175
- @keyframes slideIn {
176
- from {
177
- opacity: 0;
178
- transform: translateY(-10px);
179
- }
180
- to {
181
- opacity: 1;
182
- transform: translateY(0);
183
- }
184
- }
185
-
186
- .loading {
187
- position: relative;
188
- }
189
-
190
- .loading::after {
191
- content: '';
192
- position: absolute;
193
- top: 0;
194
- left: 0;
195
- width: 100%;
196
- height: 100%;
197
- background: linear-gradient(
198
- 90deg,
199
- rgba(255,255,255,0) 0%,
200
- rgba(255,255,255,0.2) 50%,
201
- rgba(255,255,255,0) 100%
202
- );
203
- animation: shimmer 1.5s infinite;
204
- }
205
-
206
- @keyframes shimmer {
207
- 0% {
208
- transform: translateX(-100%);
209
- }
210
- 100% {
211
- transform: translateX(100%);
212
- }
213
- }
214
-
215
- .pro-footer {
216
- text-align: center;
217
- margin-top: 3rem;
218
- padding: 2rem;
219
- background: #f8fafc;
220
- border-radius: 1rem;
221
- box-shadow: 0 -1px 3px 0 rgb(0 0 0 / 0.1);
222
- }
223
- """
224
 
225
- @dataclass
226
- class ProcessingStats:
227
- """Data class for tracking processing statistics"""
228
- total_urls: int = 0
229
- successful_urls: int = 0
230
- failed_urls: int = 0
231
- total_files: int = 0
232
- successful_files: int = 0
233
- failed_files: int = 0
234
- total_qr_codes: int = 0
235
- successful_qr_codes: int = 0
236
- failed_qr_codes: int = 0
237
- processing_time: float = 0.0
238
- last_updated: str = datetime.now().isoformat()
239
-
240
- class AdvancedProInterface:
241
- """Professional interface with advanced features and real-time analytics"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
242
 
243
- def __init__(self):
244
- """Initialize the professional interface with advanced components"""
245
- self.url_processor = URLProcessor()
246
- self.file_processor = FileProcessor()
247
- self.qr_processor = QRProcessor()
248
- self.stats = ProcessingStats()
249
- self.processing_queue = queue.Queue()
250
- self.thread_pool = ThreadPoolExecutor(max_workers=10)
251
- self.process_pool = ProcessPoolExecutor(max_workers=4)
252
- self.processing_history: List[Dict] = []
 
 
 
 
 
 
 
 
253
 
254
- # Initialize real-time monitoring
255
- self._start_monitoring()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
256
 
257
- def _start_monitoring(self):
258
- """Start background monitoring thread"""
259
- def monitor():
260
- while True:
261
- try:
262
- # Update statistics
263
- self.stats.last_updated = datetime.now().isoformat()
264
-
265
- # Process queued items
266
- while not self.processing_queue.empty():
267
- item = self.processing_queue.get_nowait()
268
- self._process_queued_item(item)
269
-
270
- time.sleep(1)
271
- except Exception as e:
272
- logger.error(f"Monitoring error: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
273
 
274
- threading.Thread(target=monitor, daemon=True).start()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
275
 
276
- def _process_queued_item(self, item: Dict):
277
- """Process queued items with advanced error handling"""
 
 
 
 
 
 
278
  try:
279
- item_type = item.get('type')
280
- if item_type == 'url':
281
- self._process_url_item(item)
282
- elif item_type == 'file':
283
- self._process_file_item(item)
284
- elif item_type == 'qr':
285
- self._process_qr_item(item)
286
  except Exception as e:
287
- logger.error(f"Queue processing error: {e}")
288
-
289
- def _process_url_item(self, item: Dict):
290
- """Process URL items with advanced features"""
291
- try:
292
- start_time = time.time()
293
- results = self.url_processor.process_urls([item['url']], mode=item['mode'])
294
- processing_time = time.time() - start_time
295
-
296
- self.stats.total_urls += 1
297
- if any('error' in r for r in results):
298
- self.stats.failed_urls += 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
299
  else:
300
- self.stats.successful_urls += 1
301
-
302
- self.stats.processing_time += processing_time
303
-
304
- # Update history
305
- self.processing_history.append({
306
- 'timestamp': datetime.now().isoformat(),
307
- 'type': 'url',
308
- 'url': item['url'],
309
- 'results': results,
310
- 'processing_time': processing_time
311
- })
312
- except Exception as e:
313
- logger.error(f"URL processing error: {e}")
314
- self.stats.failed_urls += 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
315
 
316
- async def process_urls_parallel(self, urls: str, mode: str) -> Tuple[str, str, str, Dict]:
317
- """Process URLs in parallel with advanced features"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
318
  try:
319
- url_list = [url.strip() for url in urls.split('\n') if url.strip()]
 
 
 
 
 
 
 
 
 
 
 
 
320
 
321
- if not url_list:
322
- return "", "⚠️ No valid URLs provided", "", {}
 
 
 
 
 
 
 
 
 
 
 
323
 
324
- start_time = time.time()
 
 
 
 
 
 
 
 
 
325
 
326
- # Process URLs in parallel
327
- futures = []
328
- for url in url_list:
329
- future = self.thread_pool.submit(
330
- self.url_processor.process_urls,
331
- [url],
332
- mode=mode
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
333
  )
334
- futures.append(future)
335
-
336
- # Collect results
337
- results = []
338
- for future in futures:
339
- try:
340
- result = future.result(timeout=30)
341
- results.extend(result)
342
- except Exception as e:
343
- logger.error(f"URL processing error: {e}")
344
- results.append({
345
- 'error': str(e),
346
- 'timestamp': datetime.now().isoformat()
347
- })
348
-
349
- processing_time = time.time() - start_time
350
-
351
- # Update statistics
352
- self.stats.total_urls += len(url_list)
353
- self.stats.successful_urls += len([r for r in results if 'error' not in r])
354
- self.stats.failed_urls += len([r for r in results if 'error' in r])
355
- self.stats.processing_time += processing_time
356
-
357
- # Generate analytics
358
- analytics = self._generate_analytics(results, processing_time)
359
-
360
- # Format output
361
- formatted_results = json.dumps(results, indent=2)
362
- summary = self._generate_summary(results)
363
-
364
- return (
365
- formatted_results,
366
- f"βœ… Processed {len(url_list)} URLs in {processing_time:.2f}s",
367
- summary,
368
- analytics
 
 
 
 
 
 
369
  )
370
-
371
- except Exception as e:
372
- logger.error(f"Parallel processing error: {e}")
373
- return "", f"❌ Error: {str(e)}", "", {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
374
 
375
- def _generate_analytics(self, results: List[Dict], processing_time: float) -> Dict:
376
- """Generate detailed analytics from processing results"""
377
- analytics = {
378
- 'processing_time': processing_time,
379
- 'total_items': len(results),
380
- 'success_rate': len([r for r in results if 'error' not in r]) / len(results) if results else 0,
381
- 'error_rate': len([r for r in results if 'error' in r]) / len(results) if results else 0,
382
- 'average_time_per_item': processing_time / len(results) if results else 0,
383
- 'timestamp': datetime.now().isoformat()
384
- }
385
-
386
- # Add historical trends
387
- if self.processing_history:
388
- historical_success_rates = [
389
- len([r for r in h['results'] if 'error' not in r]) / len(h['results'])
390
- for h in self.processing_history[-10:] # Last 10 operations
391
- ]
392
- analytics['historical_success_rate'] = sum(historical_success_rates) / len(historical_success_rates)
393
-
394
- return analytics
395
 
396
- def create_interface(self) -> gr.Blocks:
397
- """Create an advanced professional interface with real-time analytics"""
398
-
399
- with gr.Blocks(theme=THEME, css=CUSTOM_CSS) as interface:
400
- # Professional Header
401
- gr.Markdown(
402
- """
403
- <div class="pro-header">
404
- <h1>🌐 Advanced URL & Text Processing Suite - Pro</h1>
405
- <p>Enterprise-grade toolkit with advanced features and real-time analytics</p>
406
- </div>
407
- """
408
  )
409
-
410
- # Real-time Dashboard
411
- with gr.Row(elem_classes="dashboard"):
412
- with gr.Column(elem_classes="stat-card"):
413
- url_stats = gr.JSON(
414
- value={
415
- "Total URLs": 0,
416
- "Success Rate": "0%",
417
- "Avg. Processing Time": "0ms"
418
- },
419
- label="URL Processing Stats"
420
- )
421
-
422
- with gr.Column(elem_classes="stat-card"):
423
- file_stats = gr.JSON(
424
- value={
425
- "Total Files": 0,
426
- "Success Rate": "0%",
427
- "Avg. Processing Time": "0ms"
428
- },
429
- label="File Processing Stats"
430
- )
431
-
432
- with gr.Column(elem_classes="stat-card"):
433
- qr_stats = gr.JSON(
434
- value={
435
- "Total QR Codes": 0,
436
- "Success Rate": "0%",
437
- "Avg. Processing Time": "0ms"
438
- },
439
- label="QR Code Stats"
440
- )
441
-
442
- # Main Interface Tabs
443
- with gr.Tabs() as tabs:
444
- # Advanced URL Processing Tab
445
- with gr.Tab("πŸ”— URL Processing", elem_classes="pro-tab"):
446
- with gr.Row():
447
- with gr.Column(scale=2):
448
- url_input = gr.Textbox(
449
- label="URLs",
450
- placeholder="Enter URLs (one per line)",
451
- lines=5
452
- )
453
-
454
- with gr.Row():
455
- mode = gr.Radio(
456
- choices=["basic", "interactive", "deep"],
457
- value="basic",
458
- label="Processing Mode"
459
- )
460
- parallel = gr.Checkbox(
461
- label="Enable Parallel Processing",
462
- value=True
463
- )
464
-
465
- with gr.Row():
466
- process_btn = gr.Button(
467
- "πŸš€ Process URLs",
468
- elem_classes="pro-button"
469
- )
470
- clear_btn = gr.Button(
471
- "πŸ—‘οΈ Clear",
472
- elem_classes="pro-button"
473
- )
474
-
475
- with gr.Column(scale=1):
476
- gr.Markdown(
477
- """
478
- ### πŸ“Š Processing Modes
479
-
480
- #### Basic Mode
481
- - Fast content retrieval
482
- - Basic metadata extraction
483
- - Suitable for simple URLs
484
-
485
- #### Interactive Mode
486
- - Handles JavaScript-rendered content
487
- - Social media support
488
- - Advanced rate limiting
489
-
490
- #### Deep Mode
491
- - Full content analysis
492
- - Link following
493
- - Comprehensive metadata
494
- """
495
- )
496
-
497
- with gr.Row():
498
- status_output = gr.Textbox(
499
- label="Status",
500
- interactive=False
501
- )
502
-
503
- with gr.Tabs():
504
- with gr.Tab("Results"):
505
- json_output = gr.JSON(
506
- label="Detailed Results"
507
- )
508
-
509
- with gr.Tab("Summary"):
510
- summary_output = gr.Textbox(
511
- label="Processing Summary",
512
- interactive=False,
513
- lines=10
514
- )
515
-
516
- with gr.Tab("Analytics"):
517
- analytics_output = gr.Plot(
518
- label="Processing Analytics"
519
- )
520
-
521
- # Advanced File Processing Tab
522
- with gr.Tab("πŸ“ File Processing", elem_classes="pro-tab"):
523
- with gr.Row():
524
- with gr.Column(scale=2):
525
- file_input = gr.File(
526
- label="Upload Files",
527
- file_types=[
528
- ".txt", ".pdf", ".doc", ".docx",
529
- ".zip", ".tar.gz", ".jpg", ".png"
530
- ],
531
- multiple=True
532
- )
533
-
534
- with gr.Row():
535
- process_file_btn = gr.Button(
536
- "πŸ“₯ Process Files",
537
- elem_classes="pro-button"
538
- )
539
- batch_size = gr.Slider(
540
- minimum=1,
541
- maximum=10,
542
- value=3,
543
- step=1,
544
- label="Batch Size"
545
- )
546
-
547
- with gr.Column(scale=1):
548
- gr.Markdown(
549
- """
550
- ### πŸ“‘ Supported Formats
551
-
552
- #### Documents
553
- - PDF files (.pdf)
554
- - Word documents (.doc, .docx)
555
- - Text files (.txt)
556
-
557
- #### Archives
558
- - ZIP archives (.zip)
559
- - TAR archives (.tar.gz)
560
-
561
- #### Media
562
- - Images (.jpg, .png)
563
- - And more...
564
- """
565
- )
566
-
567
- with gr.Row():
568
- file_status = gr.Textbox(
569
- label="Status",
570
- interactive=False
571
- )
572
-
573
- with gr.Tabs():
574
- with gr.Tab("Results"):
575
- file_output = gr.File(
576
- label="Processed Files"
577
- )
578
-
579
- with gr.Tab("Details"):
580
- file_json_output = gr.JSON(
581
- label="Processing Details"
582
- )
583
-
584
- with gr.Tab("Analytics"):
585
- file_analytics = gr.Plot(
586
- label="File Processing Analytics"
587
- )
588
-
589
- # Advanced QR Code Tab
590
- with gr.Tab("πŸ“± QR Code", elem_classes="pro-tab"):
591
- with gr.Row():
592
- with gr.Column(scale=2):
593
- qr_input = gr.Textbox(
594
- label="Data",
595
- placeholder="Enter data to encode",
596
- lines=3
597
- )
598
-
599
- with gr.Row():
600
- qr_size = gr.Slider(
601
- minimum=5,
602
- maximum=50,
603
- value=10,
604
- step=1,
605
- label="QR Code Size"
606
- )
607
- qr_correction = gr.Dropdown(
608
- choices=["L", "M", "Q", "H"],
609
- value="M",
610
- label="Error Correction"
611
- )
612
-
613
- with gr.Row():
614
- generate_qr_btn = gr.Button(
615
- "✨ Generate QR",
616
- elem_classes="pro-button"
617
- )
618
- customize_btn = gr.Button(
619
- "🎨 Customize",
620
- elem_classes="pro-button"
621
- )
622
-
623
- with gr.Column(scale=1):
624
- qr_output = gr.Image(
625
- label="Generated QR Code"
626
- )
627
- qr_status = gr.Textbox(
628
- label="Status",
629
- interactive=False
630
- )
631
-
632
- # Professional Footer
633
- gr.Markdown(
634
- """
635
- <div class="pro-footer">
636
- <p>Advanced URL & Text Processing Suite - Professional Edition</p>
637
- <p style="font-size: 0.9rem;">Version 1.0.0 Pro | Β© 2024 Advanced URL Processing Team</p>
638
- </div>
639
- """
640
  )
641
-
642
- # Event Handlers
643
- process_btn.click(
644
- fn=self.process_urls_parallel,
645
- inputs=[url_input, mode],
646
- outputs=[
647
- json_output,
648
- status_output,
649
- summary_output,
650
- analytics_output
651
- ]
652
  )
653
-
654
- clear_btn.click(
655
- fn=lambda: ("", "", "", None),
656
- inputs=[],
657
- outputs=[
658
- url_input,
659
- status_output,
660
- summary_output,
661
- analytics_output
662
- ]
663
  )
664
-
665
- process_file_btn.click(
666
- fn=self.process_file,
667
- inputs=[file_input],
668
- outputs=[
669
- file_json_output,
670
- file_status,
671
- file_output
672
- ]
673
  )
674
-
675
- generate_qr_btn.click(
676
- fn=self.generate_qr,
677
- inputs=[qr_input, qr_size],
678
- outputs=[qr_output, qr_status]
 
 
 
 
 
 
 
679
  )
680
-
681
- # Update statistics periodically
682
- gr.Markdown.update(every=5)
683
-
684
- return interface
685
-
686
- def main():
687
- """Main entry point with advanced error handling"""
688
- try:
689
- # Initialize interface
690
- app = AdvancedProInterface()
691
- interface = app.create_interface()
692
 
693
- # Launch with professional configuration
694
- interface.launch(
695
- server_name="0.0.0.0",
696
- server_port=8000,
697
- share=False,
698
- debug=True,
699
- enable_queue=True,
700
- max_threads=40,
701
- auth=None, # Add authentication if needed
702
- ssl_keyfile=None, # Add SSL if needed
703
- ssl_certfile=None
704
- )
705
- except Exception as e:
706
- logger.error(f"Application startup error: {e}", exc_info=True)
707
- sys.exit(1)
708
-
709
- if __name__ == "__main__":
710
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ #import urllib.request
3
+ import requests
4
+ import zipfile
5
+ import uuid
6
+ import bs4
7
+ import lxml
8
  import os
9
+ #import subprocess
10
+ from huggingface_hub import InferenceClient,HfApi
11
+ import random
12
+ import json
13
+ import datetime
14
+ from pypdf import PdfReader
15
+ import uuid
16
+ #from query import tasks
17
+ from agent import (
18
+ PREFIX,
19
+ COMPRESS_DATA_PROMPT,
20
+ COMPRESS_DATA_PROMPT_SMALL,
21
+ LOG_PROMPT,
22
+ LOG_RESPONSE,
 
 
 
 
 
 
23
  )
24
+ client = InferenceClient(
25
+ "mistralai/Mixtral-8x7B-Instruct-v0.1"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  )
27
+ reponame="acecalisto3/tmp"
28
+ save_data=f'https://huggingface.co/datasets/{reponame}/raw/main/'
29
+ token_self = os.environ['HF_TOKEN']
30
+ api=HfApi(token=token_self)
31
+
32
+ def find_all(purpose, task, history, url, result, steps):
33
+ return_list = []
34
+ visited_links = set()
35
+ links_to_visit = [(url, 0)]
36
+
37
+ while links_to_visit:
38
+ current_url, current_depth = links_to_visit.pop(0)
39
+ if current_depth < steps:
40
+ try:
41
+ if current_url not in visited_links:
42
+ visited_links.add(current_url)
43
+ source = requests.get(current_url)
44
+ if source.status_code == 200:
45
+ soup = bs4.BeautifulSoup(source.content, 'lxml')
46
+ rawp = f'RAW TEXT RETURNED: {soup.text}'
47
+ return_list.append(rawp)
48
+
49
+ for link in soup.find_all("a"):
50
+ href = link.get('href')
51
+ if href and href.startswith('http'):
52
+ links_to_visit.append((href, current_depth + 1))
53
+ except Exception as e:
54
+ print(f"Error fetching {current_url}: {e}")
55
+
56
+ return True, return_list
57
+
58
+ def read_txt(txt_path):
59
+ text=""
60
+ with open(txt_path,"r") as f:
61
+ text = f.read()
62
+ f.close()
63
+ print (text)
64
+ return text
65
+
66
+ def read_pdf(pdf_path):
67
+ text=""
68
+ reader = PdfReader(f'{pdf_path}')
69
+ number_of_pages = len(reader.pages)
70
+ for i in range(number_of_pages):
71
+ page = reader.pages[i]
72
+ text = f'{text}\n{page.extract_text()}'
73
+ print (text)
74
+ return text
75
+
76
+ error_box=[]
77
+ def read_pdf_online(url):
78
+ uid=uuid.uuid4()
79
+ print(f"reading {url}")
80
+ response = requests.get(url, stream=True)
81
+ print(response.status_code)
82
+ text=""
83
+ #################
84
+
85
+ #####################
86
+ try:
87
+ if response.status_code == 200:
88
+ with open("test.pdf", "wb") as f:
89
+ f.write(response.content)
90
+ #f.close()
91
+ #out = Path("./data.pdf")
92
+ #print (out)
93
+ reader = PdfReader("test.pdf")
94
+ number_of_pages = len(reader.pages)
95
+ print(number_of_pages)
96
+ for i in range(number_of_pages):
97
+ page = reader.pages[i]
98
+ text = f'{text}\n{page.extract_text()}'
99
+ print(f"PDF_TEXT:: {text}")
100
+ return text
101
+ else:
102
+ text = response.status_code
103
+ error_box.append(url)
104
+ print(text)
105
+ return text
106
+
107
+
108
+ except Exception as e:
109
+ print (e)
110
+ return e
111
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
 
113
+ VERBOSE = True
114
+ MAX_HISTORY = 100
115
+ MAX_DATA = 20000
116
+
117
+ def format_prompt(message, history):
118
+ prompt = "<s>"
119
+ for user_prompt, bot_response in history:
120
+ prompt += f"[INST] {user_prompt} [/INST]"
121
+ prompt += f" {bot_response}</s> "
122
+ prompt += f"[INST] {message} [/INST]"
123
+ return prompt
124
+
125
+
126
+
127
+ def run_gpt(
128
+ prompt_template,
129
+ stop_tokens,
130
+ max_tokens,
131
+ seed,
132
+ **prompt_kwargs,
133
+ ):
134
+ print(seed)
135
+ timestamp=datetime.datetime.now()
136
+
137
+ generate_kwargs = dict(
138
+ temperature=0.9,
139
+ max_new_tokens=max_tokens,
140
+ top_p=0.95,
141
+ repetition_penalty=1.0,
142
+ do_sample=True,
143
+ seed=seed,
144
+ )
145
+
146
+ content = PREFIX.format(
147
+ timestamp=timestamp,
148
+ purpose="Compile the provided data and complete the users task"
149
+ ) + prompt_template.format(**prompt_kwargs)
150
+ if VERBOSE:
151
+ print(LOG_PROMPT.format(content))
152
+
153
+
154
+ #formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
155
+ #formatted_prompt = format_prompt(f'{content}', history)
156
+
157
+ stream = client.text_generation(content, **generate_kwargs, stream=True, details=True, return_full_text=False)
158
+ resp = ""
159
+ for response in stream:
160
+ resp += response.token.text
161
+ #yield resp
162
+
163
+ if VERBOSE:
164
+ print(LOG_RESPONSE.format(resp))
165
+ return resp
166
+
167
+
168
+ def compress_data(c, instruct, history):
169
+ seed=random.randint(1,1000000000)
170
 
171
+ print (c)
172
+ #tot=len(purpose)
173
+ #print(tot)
174
+ divr=int(c)/MAX_DATA
175
+ divi=int(divr)+1 if divr != int(divr) else int(divr)
176
+ chunk = int(int(c)/divr)
177
+ print(f'chunk:: {chunk}')
178
+ print(f'divr:: {divr}')
179
+ print (f'divi:: {divi}')
180
+ out = []
181
+ #out=""
182
+ s=0
183
+ e=chunk
184
+ print(f'e:: {e}')
185
+ new_history=""
186
+ #task = f'Compile this data to fulfill the task: {task}, and complete the purpose: {purpose}\n'
187
+ for z in range(divi):
188
+ print(f's:e :: {s}:{e}')
189
 
190
+ hist = history[s:e]
191
+
192
+ resp = run_gpt(
193
+ COMPRESS_DATA_PROMPT_SMALL,
194
+ stop_tokens=["observation:", "task:", "action:", "thought:"],
195
+ max_tokens=8192,
196
+ seed=seed,
197
+ direction=instruct,
198
+ knowledge="",
199
+ history=hist,
200
+ )
201
+ out.append(resp)
202
+ #new_history = resp
203
+ print (resp)
204
+ #out+=resp
205
+ e=e+chunk
206
+ s=s+chunk
207
+ return out
208
+
209
 
210
+ def compress_data_og(c, instruct, history):
211
+ seed=random.randint(1,1000000000)
212
+
213
+ print (c)
214
+ #tot=len(purpose)
215
+ #print(tot)
216
+ divr=int(c)/MAX_DATA
217
+ divi=int(divr)+1 if divr != int(divr) else int(divr)
218
+ chunk = int(int(c)/divr)
219
+ print(f'chunk:: {chunk}')
220
+ print(f'divr:: {divr}')
221
+ print (f'divi:: {divi}')
222
+ out = []
223
+ #out=""
224
+ s=0
225
+ e=chunk
226
+ print(f'e:: {e}')
227
+ new_history=""
228
+ #task = f'Compile this data to fulfill the task: {task}, and complete the purpose: {purpose}\n'
229
+ for z in range(divi):
230
+ print(f's:e :: {s}:{e}')
231
+
232
+ hist = history[s:e]
233
+
234
+ resp = run_gpt(
235
+ COMPRESS_DATA_PROMPT,
236
+ stop_tokens=["observation:", "task:", "action:", "thought:"],
237
+ max_tokens=8192,
238
+ seed=seed,
239
+ direction=instruct,
240
+ knowledge=new_history,
241
+ history=hist,
242
+ )
243
 
244
+ new_history = resp
245
+ print (resp)
246
+ out+=resp
247
+ e=e+chunk
248
+ s=s+chunk
249
+ '''
250
+ resp = run_gpt(
251
+ COMPRESS_DATA_PROMPT,
252
+ stop_tokens=["observation:", "task:", "action:", "thought:"],
253
+ max_tokens=8192,
254
+ seed=seed,
255
+ direction=instruct,
256
+ knowledge=new_history,
257
+ history="All data has been recieved.",
258
+ )'''
259
+ print ("final" + resp)
260
+ #history = "observation: {}\n".format(resp)
261
+ return resp
262
+
263
+
264
+
265
+ def summarize(
266
+ inp: str,
267
+ history: list,
268
+ report_check: bool,
269
+ sum_mem_check: str,
270
+ data: str = None,
271
+ files: list = None,
272
+ url: str = None,
273
+ pdf_url: str = None,
274
+ pdf_batch: str = None
275
+ ) -> str:
276
+ """
277
+ Summarizes the provided input data, processes files, URLs, and PDFs, and yields the results.
278
+
279
+ Parameters:
280
+ - inp (str): The input data to be processed. If empty, defaults to "Process this data".
281
+ - history (list): A list to keep track of the conversation history.
282
+ - report_check (bool): A flag indicating whether to return a report.
283
+ - sum_mem_check (str): A string indicating whether to summarize or save memory.
284
+ - data (str, optional): Additional data to process. Defaults to None.
285
+ - files (list, optional): A list of file paths to process. Defaults to None.
286
+ - url (str, optional): A URL to fetch data from. Defaults to None.
287
+ - pdf_url (str, optional): A URL pointing to a PDF file to read. Defaults to None.
288
+ - pdf_batch (str, optional): A batch of PDF URLs (comma-separated) to read. Defaults to None.
289
+
290
+ Yields:
291
+ - A tuple containing:
292
+ - An empty string (for future use).
293
+ - The updated history list.
294
+ - An error box (if any errors occurred).
295
+ - A JSON box for structured output.
296
+
297
+ The function processes the input data, reads from specified URLs, PDFs, and files, and summarizes or saves the data based on the provided parameters.
298
+ """
299
+ json_box = []
300
+ rawp = ""
301
+ json_out = None
302
+
303
+ if inp == "":
304
+ inp = "Process this data"
305
 
306
+ history.clear()
307
+ history = [(inp, "Working on it...")]
308
+ yield "", history, error_box, json_box
309
+
310
+ # Process PDF batch URLs
311
+ if pdf_batch and pdf_batch.startswith("http"):
312
+ c = pdf_batch.count(",") + 1 # Count the number of URLs
313
+ data = ""
314
  try:
315
+ for i in range(c):
316
+ batch_url = pdf_batch.split(",", c)[i]
317
+ bb = read_pdf_online(batch_url)
318
+ data = f'{data}\nFile Name URL ({batch_url}):\n{bb}'
 
 
 
319
  except Exception as e:
320
+ print(e)
321
+
322
+ # Process single PDF URL
323
+ if pdf_url and pdf_url.startswith("http"):
324
+ print("PDF_URL")
325
+ out = read_pdf_online(pdf_url)
326
+ data = out
327
+
328
+ # Process regular URL
329
+ if url and url.startswith("http"):
330
+ val, out = find_all(inp, "", history, url, "") # Add missing arguments
331
+ if not val:
332
+ data = "Error"
333
+ rawp = str(out) # Assign rawp here
334
+ else:
335
+ data = out
336
+
337
+ # Process uploaded files
338
+ if files:
339
+ for i, file in enumerate(files):
340
+ try:
341
+ print(file)
342
+ if file.endswith(".pdf"):
343
+ zz = read_pdf(file)
344
+ print(zz)
345
+ data = f'{data}\nFile Name ({file}):\n{zz}'
346
+ elif file.endswith(".txt"):
347
+ zz = read_txt(file)
348
+ print(zz)
349
+ data = f'{data}\nFile Name ({file}):\n{zz}'
350
+ except Exception as e:
351
+ data = f'{data}\nError opening File Name ({file})'
352
+ print(e)
353
+
354
+ # Process the collected data
355
+ if data != "Error" and data != "":
356
+ print(inp)
357
+ out = str(data)
358
+ rl = len(out)
359
+ print(f'rl:: {rl}')
360
+ c = sum(1 for i in str(out) if i in [" ", ",", "\n"]) # Count delimiters
361
+ print(f'c:: {c}')
362
+
363
+ if sum_mem_check == "Memory":
364
+ json_out = save_memory(inp, out)
365
+ rawp = "Complete" # Assign rawp here
366
+
367
+ if sum_mem_check == "Summarize":
368
+ json_out = compress_data(c, inp, out)
369
+ out = str(json_out)
370
+
371
+ if report_check:
372
+ rl = len(out)
373
+ print(f'rl:: {rl}')
374
+ c = sum(1 for i in str(out) if i in [" ", ",", "\n"]) # Count delimiters
375
+ print(f'c2:: {c}')
376
+ rawp = compress_data_og(c, inp, out) # Assign rawp here
377
  else:
378
+ rawp = out # Assign rawp here
379
+ else:
380
+ rawp = "Provide a valid data source" # Assign rawp here
381
+
382
+ history.clear()
383
+ history.append((inp, rawp))
384
+ yield "", history, error_box, json_out
385
+ SAVE_MEMORY = """
386
+ You are attempting to complete the task
387
+ task: {task}
388
+ Data:
389
+ {history}
390
+ Instructions:
391
+ Compile and categorize the data above into a JSON dictionary string
392
+ Include ALL text, datapoints, titles, descriptions, and source urls indexed into an easy to search JSON format
393
+ Your final response should be only the final formatted JSON string enclosed in brackets, and nothing else.
394
+ Required keys:
395
+ "keywords":["short", "list", "of", "important", "keywords", "found", "in", "this", "entry"]
396
+ "title":"title of entry"
397
+ "description":"A sentence summarizing the topic of this entry"
398
+ "content":"A brief paragraph summarizing the important datapoints found in this entry"
399
+ "url":"https://url.source"
400
+ """
401
+
402
+ def save_memory(purpose, history):
403
+ uid=uuid.uuid4()
404
+ history=str(history)
405
+ c=1
406
+ inp = str(history)
407
+ rl = len(inp)
408
+ print(f'rl:: {rl}')
409
+ for i in str(inp):
410
+ if i == " " or i=="," or i=="\n" or i=="/" or i=="\\" or i=="." or i=="<":
411
+ c +=1
412
+ print (f'c:: {c}')
413
+
414
+ seed=random.randint(1,1000000000)
415
 
416
+ print (c)
417
+ #tot=len(purpose)
418
+ #print(tot)
419
+ divr=int(c)/MAX_DATA
420
+ divi=int(divr)+1 if divr != int(divr) else int(divr)
421
+ chunk = int(int(c)/divr)
422
+ print(f'chunk:: {chunk}')
423
+ print(f'divr:: {divr}')
424
+ print (f'divi:: {divi}')
425
+ out_box = []
426
+ #out=""
427
+ s=0
428
+ ee=chunk
429
+ print(f'e:: {ee}')
430
+ new_history=""
431
+ task = f'Index this Data\n'
432
+ for z in range(divi):
433
+ print(f's:e :: {s}:{ee}')
434
+
435
+ hist = inp[s:ee]
436
+
437
+ resp = run_gpt(
438
+ SAVE_MEMORY,
439
+ stop_tokens=["observation:", "task:", "action:", "thought:"],
440
+ max_tokens=4096,
441
+ seed=seed,
442
+ purpose=purpose,
443
+ task=task,
444
+ history=hist,
445
+ ).strip('\n')
446
+ #new_history = resp
447
+ #print (resp)
448
+ #out+=resp
449
+
450
+ #print ("final1" + resp)
451
  try:
452
+ resp='[{'+resp.split('[{')[1].split('</s>')[0]
453
+ #print ("final2\n" + resp)
454
+ #print(f"keywords:: {resp['keywords']}")
455
+ except Exception as e:
456
+ resp = resp
457
+ print(e)
458
+ timestamp=str(datetime.datetime.now())
459
+ timename=timestamp.replace(" ","--").replace(":","-").replace(".","-")
460
+ json_object=resp
461
+ #json_object = json.dumps(out_box)
462
+ #json_object = json.dumps(out_box,indent=4)
463
+ with open(f"tmp-{uid}.json", "w") as outfile:
464
+ outfile.write(json_object)
465
 
466
+ outfile.close()
467
+ api.upload_file(
468
+ path_or_fileobj=f"tmp-{uid}.json",
469
+ path_in_repo=f"/mem-test2/{timename}---{s}-{ee}.json",
470
+ repo_id=reponame,
471
+ #repo_id=save_data.split('datasets/',1)[1].split('/raw',1)[0],
472
+ token=token_self,
473
+ repo_type="dataset",
474
+ )
475
+ lines = resp.strip().strip("\n").split("\n")
476
+ r = requests.get(f'{save_data}mem-test2/main.json')
477
+ print(f'status code main:: {r.status_code}')
478
+ if r.status_code==200:
479
 
480
+ lod = json.loads(r.text)
481
+ #lod = eval(lod)
482
+ print (f'lod:: {lod}')
483
+ if not r.status_code==200:
484
+ lod = []
485
+ for i,line in enumerate(lines):
486
+ key_box=[]
487
+ print(f'LINE:: {line}')
488
+ if ":" in line:
489
+ print(f'line:: {line}')
490
 
491
+ if "keywords" in line:
492
+ print(f'trying:: {line}')
493
+ keyw=line.split(":")[1]
494
+ print (keyw)
495
+ print (keyw.split("[")[1].split("]")[0])
496
+ keyw=keyw.split("[")[1].split("]")[0]
497
+ for ea in keyw.split(","):
498
+ s1=""
499
+ ea=ea.strip().strip("\n")
500
+ for ev in ea:
501
+ if ev.isalnum():
502
+ s1+=ev
503
+ if ev == " ":
504
+ s1+=ev
505
+ #ea=s1
506
+ print(s1)
507
+ key_box.append(s1)
508
+ lod.append({"file_name":f"{timename}---{s}-{ee}","keywords":key_box,"index":f"{s}:{ee}"})
509
+ json_object = json.dumps(lod, indent=4)
510
+ with open(f"tmp2-{uid}.json", "w") as outfile2:
511
+ outfile2.write(json_object)
512
+ outfile2.close()
513
+ api.upload_file(
514
+ path_or_fileobj=f"tmp2-{uid}.json",
515
+ path_in_repo=f"/mem-test2/main.json",
516
+ repo_id=reponame,
517
+ #repo_id=save_data.split('datasets/',1)[1].split('/raw',1)[0],
518
+ token=token_self,
519
+ repo_type="dataset",
520
  )
521
+ ee=ee+chunk
522
+ s=s+chunk
523
+ out_box.append(resp)
524
+ return out_box
525
+
526
+ def create_zip_file(output_data, zip_name):
527
+ with zipfile.ZipFile(zip_name, 'w') as zipf:
528
+ for i, data in enumerate(output_data):
529
+ zipf.writestr(f'data_{i}.txt', data)
530
+ return zip_name
531
+
532
+
533
+
534
+ def clear_fn():
535
+ return "", [(None, None)]
536
+
537
+ with gr.Blocks() as app:
538
+ gr.HTML("""<center><h1>Mixtral 8x7B TLDR Summarizer + Web</h1><h3>Summarize Data of unlimited length</h3></center>""")
539
+
540
+ # Main chat interface
541
+ chatbot = gr.Chatbot(
542
+ label="Mixtral 8x7B Chatbot",
543
+ show_copy_button=True,
544
+ type='messages',
545
+ height=400,
546
+ purpose_input = gr.Textbox(label="Purpose"),
547
+ task_input = gr.Textbox(label="Task"),
548
+ history_input = gr.Textbox(label="History"),
549
+ url_input = gr.Textbox(label="URL"),
550
+ result_input = gr.Textbox(label="Result"),
551
+ steps_input = gr.Number(label="Steps", value=3), # Default value of 3 steps
552
+ output_component = gr.Textbox(label="Output"),
553
+ button = gr.Button("Search"),
554
+ )
555
+
556
+ # Control Panel
557
+ with gr.Row():
558
+ with gr.Column(scale=3):
559
+ prompt = gr.Textbox(
560
+ label="Instructions (optional)",
561
+ placeholder="Enter processing instructions here..."
562
  )
563
+ steps = gr.Slider(
564
+ label="Crawl Steps",
565
+ minimum=1,
566
+ maximum=5,
567
+ value=1,
568
+ info="Number of levels to crawl for web content"
569
+ )
570
+ with gr.Column(scale=1):
571
+ report_check = gr.Checkbox(
572
+ label="Return Report",
573
+ value=True,
574
+ info="Generate detailed analysis report"
575
+ )
576
+ sum_mem_check = gr.Radio(
577
+ label="Output Type",
578
+ choices=["Summary", "Memory"],
579
+ value="Summary",
580
+ info="Choose between summarized or memory-based output"
581
+ )
582
+ button = gr.Button("Process", variant="primary")
583
 
584
+ # Clear button
585
+ with gr.Row():
586
+ clear_btn = gr.Button("Clear", variant="secondary")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
587
 
588
+ # Input Tabs
589
+ with gr.Tabs() as input_tabs:
590
+ with gr.Tab("πŸ“ Text"):
591
+ data = gr.Textbox(
592
+ label="Input Data",
593
+ lines=6,
594
+ placeholder="Paste your text here..."
 
 
 
 
 
595
  )
596
+ with gr.Tab("πŸ“ File"):
597
+ files = gr.File(
598
+ label="Upload Files",
599
+ file_types=[".pdf", ".txt"],
600
+ file_count="multiple"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
601
  )
602
+ with gr.Tab("🌐 Web URL"):
603
+ url = gr.Textbox(
604
+ label="Website URL",
605
+ placeholder="https://example.com"
 
 
 
 
 
 
 
606
  )
607
+ with gr.Tab("πŸ“„ PDF URL"):
608
+ pdf_url = gr.Textbox(
609
+ label="PDF URL",
610
+ placeholder="https://example.com/document.pdf"
 
 
 
 
 
 
611
  )
612
+ with gr.Tab("πŸ“š PDF Batch"):
613
+ pdf_batch = gr.Textbox(
614
+ label="PDF URLs (comma separated)",
615
+ placeholder="url1.pdf, url2.pdf, url3.pdf"
 
 
 
 
 
616
  )
617
+
618
+ # Output Section
619
+ with gr.Row():
620
+ with gr.Column():
621
+ json_out = gr.JSON(
622
+ label="Structured Output",
623
+ show_label=True
624
+ )
625
+ with gr.Column():
626
+ e_box = gr.Textbox(
627
+ label="Status & Errors",
628
+ interactive=False
629
  )
 
 
 
 
 
 
 
 
 
 
 
 
630
 
631
+ def process_and_format_response(instructions, chat_history, report, summary_memory,
632
+ input_data, uploaded_files, input_url, pdf_input_url): # Removed extra parameters
633
+ try:
634
+ # Process the inputs with reduced parameters
635
+ result = None
636
+ for _ in summarize(
637
+ instructions,
638
+ chat_history if chat_history else [],
639
+ report,
640
+ summary_memory,
641
+ input_data,
642
+ uploaded_files,
643
+ input_url,
644
+ pdf_input_url # Removed extra parameters
645
+ ):
646
+ result = _
647
+
648
+ if result:
649
+ _, history, errors, json_data = result
650
+
651
+ # Convert history to ChatMessage format
652
+ formatted_messages = []
653
+ if isinstance(history, list):
654
+ for msg in history:
655
+ if isinstance(msg, tuple) and len(msg) == 2:
656
+ formatted_messages.extend([
657
+ gr.ChatMessage(content=str(msg[0]), role="user"),
658
+ gr.ChatMessage(content=str(msg[1]), role="assistant")
659
+ ])
660
+ else:
661
+ formatted_messages.extend([
662
+ gr.ChatMessage(content=str(instructions), role="user"),
663
+ gr.ChatMessage(content=str(history), role="assistant")
664
+ ])
665
+
666
+ # Format error messages
667
+ error_message = "\n".join(errors) if errors else "Processing completed successfully"
668
+
669
+ return (
670
+ "", # Clear the prompt
671
+ formatted_messages,
672
+ error_message,
673
+ json_data
674
+ )
675
+ except Exception as e:
676
+ error_msg = f"Error: {str(e)}"
677
+ return (
678
+ "",
679
+ [
680
+ gr.ChatMessage(content=str(instructions), role="user"),
681
+ gr.ChatMessage(content=error_msg, role="assistant")
682
+ ],
683
+ error_msg,
684
+ None
685
+ )
686
+
687
+ def clear_fn():
688
+ return "", []
689
+
690
+ # Update the button click event to match parameters
691
+ button.click(
692
+ find_all,
693
+ inputs=[
694
+ purpose_input, # Add these input components to your Gradio interface
695
+ task_input,
696
+ history_input,
697
+ url_input,
698
+ result_input,
699
+ steps_input
700
+ ],
701
+ outputs=[output_component]
702
+ )
703
+
704
+ # Launch the app
705
+ app.queue(default_concurrency_limit=20).launch(
706
+ show_api=False,
707
+ share=True,
708
+ server_name="0.0.0.0",
709
+ server_port=7860
710
+ )