limitedonly41 commited on
Commit
29f7300
·
verified ·
1 Parent(s): e5f0f5b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -1136
app.py CHANGED
@@ -1,1142 +1,54 @@
1
- #!/usr/bin/env python3
2
- """
3
- Advanced CSV Manipulation Tool with Gradio Interface
4
- Commercial-ready application for powerful CSV data processing
5
-
6
- Features:
7
- - File upload with 1GB limit
8
- - Data preview with selectable rows
9
- - Value replacement based on conditions
10
- - CSV concatenation with column selection
11
- - Advanced statistical analysis and visualization
12
- - Data validation and quality checks
13
- - Export to CSV, Excel, JSON
14
- - Batch operations and operation recipes
15
- - Undo/Redo functionality
16
- - Memory-efficient large file processing
17
- """
18
-
19
- import gradio as gr
20
- import pandas as pd
21
- import numpy as np
22
- import json
23
- import io
24
- import zipfile
25
- from datetime import datetime, timedelta
26
- import re
27
- import matplotlib.pyplot as plt
28
- import seaborn as sns
29
- import plotly.express as px
30
- import plotly.graph_objects as go
31
- from plotly.subplots import make_subplots
32
- import warnings
33
  import os
34
- from typing import Dict, List, Tuple, Optional, Any
35
- import hashlib
36
- import pickle
37
- from pathlib import Path
38
-
39
- warnings.filterwarnings('ignore')
40
- plt.style.use('seaborn-v0_8')
41
- sns.set_palette("husl")
42
-
43
- class CSVProcessor:
44
- """Advanced CSV processing class with state management and history"""
45
-
46
- def __init__(self):
47
- self.original_df = None
48
- self.current_df = None
49
- self.history = []
50
- self.recipes = {}
51
- self.batch_files = []
52
-
53
- def load_data(self, file, preview_rows=100, encoding='utf-8'):
54
- """Load data file with error handling and memory optimization"""
55
- try:
56
- if file is None:
57
- return None, "No file provided"
58
-
59
- file_path = file.name if hasattr(file, 'name') else str(file)
60
- file_extension = Path(file_path).suffix.lower()
61
-
62
- # Chunked reading for large files
63
- if file_extension == '.csv':
64
- # Try different encodings
65
- encodings = [encoding, 'utf-8', 'latin-1', 'cp1252']
66
- df = None
67
- for enc in encodings:
68
- try:
69
- df = pd.read_csv(file_path, encoding=enc, low_memory=False)
70
- break
71
- except UnicodeDecodeError:
72
- continue
73
- if df is None:
74
- return None, "Failed to decode file with supported encodings"
75
-
76
- elif file_extension in ['.xlsx', '.xls']:
77
- df = pd.read_excel(file_path)
78
- elif file_extension == '.json':
79
- df = pd.read_json(file_path)
80
- elif file_extension == '.parquet':
81
- df = pd.read_parquet(file_path)
82
- else:
83
- return None, f"Unsupported file format: {file_extension}"
84
-
85
- self.original_df = df.copy()
86
- self.current_df = df.copy()
87
- self.history = []
88
-
89
- # Create preview
90
- if preview_rows > 0:
91
- preview = df.head(preview_rows)
92
- else:
93
- preview = df
94
-
95
- # Memory and performance info
96
- memory_mb = df.memory_usage(deep=True).sum() / 1024**2
97
- info = {
98
- 'rows': len(df),
99
- 'columns': len(df.columns),
100
- 'memory_usage': f"{memory_mb:.2f} MB",
101
- 'dtypes': dict(df.dtypes.astype(str)),
102
- 'null_counts': dict(df.isnull().sum()),
103
- 'duplicates': df.duplicated().sum()
104
- }
105
-
106
- success_msg = f"✅ File loaded successfully!\n"
107
- success_msg += f"📊 {info['rows']:,} rows × {info['columns']} columns\n"
108
- success_msg += f"💾 Memory usage: {info['memory_usage']}\n"
109
- success_msg += f"🔄 Duplicates: {info['duplicates']:,}\n"
110
- success_msg += f"❌ Missing values: {sum(info['null_counts'].values()):,}"
111
-
112
- return preview, success_msg, info
113
-
114
- except Exception as e:
115
- return None, f"❌ Error loading file: {str(e)}", {}
116
-
117
- def save_state(self, operation_name: str):
118
- """Save current state to history with memory management"""
119
- if len(self.history) > 50: # Limit history to prevent memory issues
120
- self.history = self.history[-25:] # Keep last 25 operations
121
-
122
- self.history.append({
123
- 'operation': operation_name,
124
- 'timestamp': datetime.now(),
125
- 'df': self.current_df.copy() if self.current_df is not None else None
126
- })
127
-
128
- def undo_operation(self):
129
- """Undo last operation"""
130
- if len(self.history) > 1:
131
- self.history.pop()
132
- self.current_df = self.history[-1]['df'].copy()
133
- return self.current_df, f"✅ Undone: {self.history[-1]['operation']}"
134
- elif len(self.history) == 1:
135
- self.current_df = self.original_df.copy()
136
- self.history = []
137
- return self.current_df, "✅ Reset to original data"
138
- else:
139
- return self.current_df, "❌ No operations to undo"
140
-
141
- def reset_to_original(self):
142
- """Reset to original data"""
143
- if self.original_df is not None:
144
- self.current_df = self.original_df.copy()
145
- self.history = []
146
- return self.current_df, "✅ Reset to original data"
147
- return None, "❌ No original data available"
148
-
149
- # Global processor instance
150
- processor = CSVProcessor()
151
-
152
- def create_download_file(df: pd.DataFrame, format_type: str, filename: str = "processed_data"):
153
- """Create downloadable file in specified format"""
154
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
155
- filename_with_timestamp = f"{filename}_{timestamp}"
156
-
157
- try:
158
- if format_type == "csv":
159
- csv_data = df.to_csv(index=False)
160
- return csv_data, f"{filename_with_timestamp}.csv"
161
- elif format_type == "excel":
162
- buffer = io.BytesIO()
163
- with pd.ExcelWriter(buffer, engine='openpyxl') as writer:
164
- df.to_excel(writer, index=False, sheet_name='Data')
165
- buffer.seek(0)
166
- return buffer.getvalue(), f"{filename_with_timestamp}.xlsx"
167
- elif format_type == "json":
168
- json_data = df.to_json(orient='records', indent=2, date_format='iso')
169
- return json_data, f"{filename_with_timestamp}.json"
170
- except Exception as e:
171
- return None, f"Error creating {format_type} file: {str(e)}"
172
-
173
- def get_data_info(df: pd.DataFrame) -> str:
174
- """Get comprehensive data information"""
175
- if df is None or df.empty:
176
- return "No data loaded"
177
-
178
- info_dict = {
179
- '📊 Shape': f"{df.shape[0]:,} rows × {df.shape[1]} columns",
180
- '💾 Memory': f"{df.memory_usage(deep=True).sum() / 1024**2:.2f} MB",
181
- '🔄 Duplicates': f"{df.duplicated().sum():,}",
182
- '❌ Missing Values': f"{df.isnull().sum().sum():,}",
183
- '📈 Numeric Columns': f"{len(df.select_dtypes(include=[np.number]).columns)}",
184
- '📝 Text Columns': f"{len(df.select_dtypes(include=['object']).columns)}",
185
- '📅 Date Columns': f"{len(df.select_dtypes(include=['datetime64']).columns)}"
186
- }
187
-
188
- return "\n".join([f"{k}: {v}" for k, v in info_dict.items()])
189
-
190
- def get_column_options(df: pd.DataFrame) -> List[str]:
191
- """Get list of column names for dropdowns"""
192
- return list(df.columns) if df is not None else []
193
-
194
- # ===========================================
195
- # CORE DATA MANIPULATION FUNCTIONS
196
- # ===========================================
197
-
198
- def rename_values_conditional(df: pd.DataFrame, target_col: str, condition_col: str,
199
- condition_value: str, new_value: str, match_type: str = "exact") -> Tuple[pd.DataFrame, str]:
200
- """Rename values in target column based on condition in another column"""
201
- try:
202
- if df is None or df.empty:
203
- return df, "❌ No data available"
204
-
205
- if target_col not in df.columns or condition_col not in df.columns:
206
- return df, "❌ One or more columns not found"
207
-
208
- df_result = df.copy()
209
-
210
- if match_type == "exact":
211
- mask = df_result[condition_col] == condition_value
212
- elif match_type == "contains":
213
- mask = df_result[condition_col].astype(str).str.contains(condition_value, na=False)
214
- elif match_type == "regex":
215
- mask = df_result[condition_col].astype(str).str.match(condition_value, na=False)
216
- elif match_type == "starts_with":
217
- mask = df_result[condition_col].astype(str).str.startswith(condition_value, na=False)
218
- elif match_type == "ends_with":
219
- mask = df_result[condition_col].astype(str).str.endswith(condition_value, na=False)
220
-
221
- affected_rows = mask.sum()
222
- df_result.loc[mask, target_col] = new_value
223
-
224
- processor.current_df = df_result
225
- processor.save_state(f"Renamed values in '{target_col}' based on '{condition_col}'")
226
-
227
- return df_result, f"✅ Updated {affected_rows:,} rows in column '{target_col}'"
228
-
229
- except Exception as e:
230
- return df, f"❌ Error: {str(e)}"
231
-
232
- def concatenate_csvs(files: List, selected_columns: str, join_type: str = "outer") -> Tuple[pd.DataFrame, str]:
233
- """Concatenate multiple CSV files with column selection"""
234
- try:
235
- if not files:
236
- return None, "❌ No files provided"
237
-
238
- dfs = []
239
- columns_to_use = [col.strip() for col in selected_columns.split(",") if col.strip()] if selected_columns else None
240
-
241
- for file in files:
242
- if hasattr(file, 'name'):
243
- file_path = file.name
244
- if file_path.endswith('.csv'):
245
- df = pd.read_csv(file_path, encoding='utf-8', low_memory=False)
246
- elif file_path.endswith(('.xlsx', '.xls')):
247
- df = pd.read_excel(file_path)
248
- else:
249
- continue
250
-
251
- # Select specific columns if specified
252
- if columns_to_use:
253
- available_cols = [col for col in columns_to_use if col in df.columns]
254
- if available_cols:
255
- df = df[available_cols]
256
- else:
257
- continue
258
-
259
- # Add source file identifier
260
- df['_source_file'] = Path(file_path).stem
261
- dfs.append(df)
262
-
263
- if not dfs:
264
- return None, "❌ No valid files found or columns don't exist"
265
-
266
- # Concatenate with specified join type
267
- if join_type == "inner":
268
- result_df = pd.concat(dfs, ignore_index=True, join='inner')
269
- else:
270
- result_df = pd.concat(dfs, ignore_index=True, join='outer')
271
-
272
- processor.current_df = result_df
273
- processor.save_state(f"Concatenated {len(dfs)} files")
274
-
275
- return result_df, f"✅ Successfully concatenated {len(dfs)} files with {len(result_df):,} total rows"
276
-
277
- except Exception as e:
278
- return None, f"❌ Error concatenating files: {str(e)}"
279
-
280
- def get_value_counts(df: pd.DataFrame, column: str, top_n: int = 20, normalize: bool = False) -> Tuple[pd.DataFrame, str]:
281
- """Get value counts for specified column"""
282
- try:
283
- if df is None or df.empty:
284
- return None, "❌ No data available"
285
-
286
- if column not in df.columns:
287
- return None, f"❌ Column '{column}' not found"
288
-
289
- value_counts = df[column].value_counts(normalize=normalize, dropna=False).head(top_n)
290
-
291
- # Convert to DataFrame for better display
292
- result_df = pd.DataFrame({
293
- 'Value': value_counts.index,
294
- 'Count' if not normalize else 'Percentage': value_counts.values
295
- })
296
-
297
- if normalize:
298
- result_df['Percentage'] = result_df['Percentage'].map(lambda x: f"{x:.2%}")
299
-
300
- return result_df, f"✅ Value counts for '{column}' (Top {min(top_n, len(result_df))})"
301
-
302
- except Exception as e:
303
- return None, f"❌ Error: {str(e)}"
304
-
305
- def filter_data(df: pd.DataFrame, column: str, condition: str, value: str) -> Tuple[pd.DataFrame, str]:
306
- """Filter data based on conditions"""
307
- try:
308
- if df is None or df.empty:
309
- return df, "❌ No data available"
310
-
311
- if column not in df.columns:
312
- return df, f"❌ Column '{column}' not found"
313
-
314
- df_result = df.copy()
315
-
316
- if condition == "equals":
317
- mask = df_result[column] == value
318
- elif condition == "not_equals":
319
- mask = df_result[column] != value
320
- elif condition == "contains":
321
- mask = df_result[column].astype(str).str.contains(value, na=False)
322
- elif condition == "not_contains":
323
- mask = ~df_result[column].astype(str).str.contains(value, na=False)
324
- elif condition == "starts_with":
325
- mask = df_result[column].astype(str).str.startswith(value, na=False)
326
- elif condition == "ends_with":
327
- mask = df_result[column].astype(str).str.endswith(value, na=False)
328
- elif condition == "greater_than":
329
- mask = pd.to_numeric(df_result[column], errors='coerce') > float(value)
330
- elif condition == "less_than":
331
- mask = pd.to_numeric(df_result[column], errors='coerce') < float(value)
332
- elif condition == "is_null":
333
- mask = df_result[column].isnull()
334
- elif condition == "is_not_null":
335
- mask = df_result[column].notnull()
336
- else:
337
- return df, f"❌ Unknown condition: {condition}"
338
-
339
- filtered_df = df_result[mask]
340
-
341
- processor.current_df = filtered_df
342
- processor.save_state(f"Filtered data: {column} {condition} {value}")
343
-
344
- return filtered_df, f"✅ Filtered to {len(filtered_df):,} rows (removed {len(df) - len(filtered_df):,} rows)"
345
-
346
- except Exception as e:
347
- return df, f"❌ Error: {str(e)}"
348
-
349
- def handle_missing_values(df: pd.DataFrame, column: str, method: str, fill_value: str = "") -> Tuple[pd.DataFrame, str]:
350
- """Handle missing values in specified column"""
351
- try:
352
- if df is None or df.empty:
353
- return df, "❌ No data available"
354
-
355
- if column != "ALL" and column not in df.columns:
356
- return df, f"❌ Column '{column}' not found"
357
-
358
- df_result = df.copy()
359
- columns_to_process = [column] if column != "ALL" else df_result.columns.tolist()
360
-
361
- total_missing_before = df_result.isnull().sum().sum()
362
-
363
- for col in columns_to_process:
364
- if method == "drop_rows":
365
- df_result = df_result.dropna(subset=[col])
366
- elif method == "fill_value":
367
- df_result[col] = df_result[col].fillna(fill_value)
368
- elif method == "fill_mean":
369
- if df_result[col].dtype in ['int64', 'float64']:
370
- df_result[col] = df_result[col].fillna(df_result[col].mean())
371
- elif method == "fill_median":
372
- if df_result[col].dtype in ['int64', 'float64']:
373
- df_result[col] = df_result[col].fillna(df_result[col].median())
374
- elif method == "fill_mode":
375
- mode_val = df_result[col].mode()
376
- if len(mode_val) > 0:
377
- df_result[col] = df_result[col].fillna(mode_val[0])
378
- elif method == "forward_fill":
379
- df_result[col] = df_result[col].fillna(method='ffill')
380
- elif method == "backward_fill":
381
- df_result[col] = df_result[col].fillna(method='bfill')
382
-
383
- total_missing_after = df_result.isnull().sum().sum()
384
-
385
- processor.current_df = df_result
386
- processor.save_state(f"Handle missing values: {method}")
387
-
388
- return df_result, f"✅ Processed missing values. Before: {total_missing_before:,}, After: {total_missing_after:,}"
389
-
390
- except Exception as e:
391
- return df, f"❌ Error: {str(e)}"
392
-
393
- def detect_and_remove_duplicates(df: pd.DataFrame, columns: str = "", keep: str = "first") -> Tuple[pd.DataFrame, str]:
394
- """Detect and remove duplicate rows"""
395
- try:
396
- if df is None or df.empty:
397
- return df, "❌ No data available"
398
-
399
- df_result = df.copy()
400
-
401
- # Parse columns
402
- if columns.strip():
403
- cols_list = [col.strip() for col in columns.split(",") if col.strip() in df.columns]
404
- subset = cols_list if cols_list else None
405
  else:
406
- subset = None
407
-
408
- duplicates_before = df_result.duplicated(subset=subset).sum()
409
-
410
- if duplicates_before == 0:
411
- return df_result, "✅ No duplicate rows found"
412
-
413
- df_result = df_result.drop_duplicates(subset=subset, keep=keep)
414
-
415
- processor.current_df = df_result
416
- processor.save_state(f"Removed {duplicates_before:,} duplicate rows")
417
-
418
- return df_result, f"✅ Removed {duplicates_before:,} duplicate rows. Remaining: {len(df_result):,} rows"
419
-
420
- except Exception as e:
421
- return df, f"❌ Error: {str(e)}"
422
-
423
- def perform_column_operations(df: pd.DataFrame, operation: str, col1: str, col2: str = "",
424
- new_col_name: str = "", constant: str = "") -> Tuple[pd.DataFrame, str]:
425
- """Perform mathematical and string operations on columns"""
426
- try:
427
- if df is None or df.empty:
428
- return df, "❌ No data available"
429
-
430
- if col1 not in df.columns:
431
- return df, f"❌ Column '{col1}' not found"
432
-
433
- df_result = df.copy()
434
-
435
- if not new_col_name:
436
- new_col_name = f"{col1}_{operation}"
437
-
438
- if operation == "add":
439
- if col2 and col2 in df.columns:
440
- df_result[new_col_name] = pd.to_numeric(df_result[col1], errors='coerce') + pd.to_numeric(df_result[col2], errors='coerce')
441
- elif constant:
442
- df_result[new_col_name] = pd.to_numeric(df_result[col1], errors='coerce') + float(constant)
443
-
444
- elif operation == "subtract":
445
- if col2 and col2 in df.columns:
446
- df_result[new_col_name] = pd.to_numeric(df_result[col1], errors='coerce') - pd.to_numeric(df_result[col2], errors='coerce')
447
- elif constant:
448
- df_result[new_col_name] = pd.to_numeric(df_result[col1], errors='coerce') - float(constant)
449
-
450
- elif operation == "multiply":
451
- if col2 and col2 in df.columns:
452
- df_result[new_col_name] = pd.to_numeric(df_result[col1], errors='coerce') * pd.to_numeric(df_result[col2], errors='coerce')
453
- elif constant:
454
- df_result[new_col_name] = pd.to_numeric(df_result[col1], errors='coerce') * float(constant)
455
-
456
- elif operation == "divide":
457
- if col2 and col2 in df.columns:
458
- df_result[new_col_name] = pd.to_numeric(df_result[col1], errors='coerce') / pd.to_numeric(df_result[col2], errors='coerce')
459
- elif constant:
460
- df_result[new_col_name] = pd.to_numeric(df_result[col1], errors='coerce') / float(constant)
461
-
462
- elif operation == "concatenate":
463
- if col2 and col2 in df.columns:
464
- df_result[new_col_name] = df_result[col1].astype(str) + " " + df_result[col2].astype(str)
465
- elif constant:
466
- df_result[new_col_name] = df_result[col1].astype(str) + constant
467
-
468
- elif operation == "extract_numbers":
469
- df_result[new_col_name] = df_result[col1].astype(str).str.extract(r'(\d+)')[0]
470
-
471
- elif operation == "upper":
472
- df_result[new_col_name] = df_result[col1].astype(str).str.upper()
473
-
474
- elif operation == "lower":
475
- df_result[new_col_name] = df_result[col1].astype(str).str.lower()
476
 
477
- elif operation == "title":
478
- df_result[new_col_name] = df_result[col1].astype(str).str.title()
479
-
480
- elif operation == "length":
481
- df_result[new_col_name] = df_result[col1].astype(str).str.len()
482
-
483
- else:
484
- return df, f"❌ Unknown operation: {operation}"
485
-
486
- processor.current_df = df_result
487
- processor.save_state(f"Column operation: {operation} on {col1}")
488
-
489
- return df_result, f"✅ Created new column '{new_col_name}' using {operation} operation"
490
-
491
  except Exception as e:
492
- return df, f" Error: {str(e)}"
493
-
494
- def convert_data_types(df: pd.DataFrame, column: str, target_type: str) -> Tuple[pd.DataFrame, str]:
495
- """Convert column data types"""
496
- try:
497
- if df is None or df.empty:
498
- return df, "❌ No data available"
499
-
500
- if column not in df.columns:
501
- return df, f"❌ Column '{column}' not found"
502
-
503
- df_result = df.copy()
504
-
505
- if target_type == "string":
506
- df_result[column] = df_result[column].astype(str)
507
- elif target_type == "integer":
508
- df_result[column] = pd.to_numeric(df_result[column], errors='coerce').astype('Int64')
509
- elif target_type == "float":
510
- df_result[column] = pd.to_numeric(df_result[column], errors='coerce')
511
- elif target_type == "datetime":
512
- df_result[column] = pd.to_datetime(df_result[column], errors='coerce')
513
- elif target_type == "boolean":
514
- df_result[column] = df_result[column].astype(bool)
515
- elif target_type == "category":
516
- df_result[column] = df_result[column].astype('category')
517
- else:
518
- return df, f"❌ Unknown data type: {target_type}"
519
-
520
- processor.current_df = df_result
521
- processor.save_state(f"Converted '{column}' to {target_type}")
522
-
523
- return df_result, f"✅ Converted column '{column}' to {target_type}"
524
-
525
- except Exception as e:
526
- return df, f"❌ Error: {str(e)}"
527
-
528
- # ===========================================
529
- # ANALYSIS AND VISUALIZATION FUNCTIONS
530
- # ===========================================
531
-
532
- def generate_statistical_summary(df: pd.DataFrame) -> Tuple[pd.DataFrame, str]:
533
- """Generate comprehensive statistical summary"""
534
- try:
535
- if df is None or df.empty:
536
- return None, "❌ No data available"
537
-
538
- numeric_cols = df.select_dtypes(include=[np.number]).columns
539
-
540
- if len(numeric_cols) == 0:
541
- return None, "❌ No numeric columns found"
542
-
543
- stats_df = df[numeric_cols].describe()
544
-
545
- # Add additional statistics
546
- stats_df.loc['variance'] = df[numeric_cols].var()
547
- stats_df.loc['skewness'] = df[numeric_cols].skew()
548
- stats_df.loc['kurtosis'] = df[numeric_cols].kurtosis()
549
- stats_df.loc['missing'] = df[numeric_cols].isnull().sum()
550
-
551
- return stats_df.round(4), "✅ Statistical summary generated"
552
-
553
- except Exception as e:
554
- return None, f"❌ Error: {str(e)}"
555
-
556
- def create_correlation_matrix(df: pd.DataFrame) -> Tuple[str, str]:
557
- """Create correlation matrix visualization"""
558
- try:
559
- if df is None or df.empty:
560
- return None, "❌ No data available"
561
-
562
- numeric_cols = df.select_dtypes(include=[np.number]).columns
563
-
564
- if len(numeric_cols) < 2:
565
- return None, "❌ Need at least 2 numeric columns for correlation"
566
-
567
- # Calculate correlation matrix
568
- corr_matrix = df[numeric_cols].corr()
569
-
570
- # Create heatmap
571
- plt.figure(figsize=(12, 8))
572
- mask = np.triu(np.ones_like(corr_matrix, dtype=bool))
573
- sns.heatmap(corr_matrix, mask=mask, annot=True, cmap='coolwarm', center=0,
574
- square=True, linewidths=0.5, cbar_kws={"shrink": 0.8})
575
- plt.title('Correlation Matrix Heatmap', fontsize=16, fontweight='bold')
576
- plt.tight_layout()
577
-
578
- # Save plot
579
- plt.savefig('correlation_matrix.png', dpi=300, bbox_inches='tight')
580
- plt.close()
581
-
582
- return 'correlation_matrix.png', "✅ Correlation matrix created"
583
-
584
- except Exception as e:
585
- return None, f"❌ Error: {str(e)}"
586
-
587
- def create_distribution_plots(df: pd.DataFrame, column: str, plot_type: str = "histogram") -> Tuple[str, str]:
588
- """Create distribution plots"""
589
- try:
590
- if df is None or df.empty:
591
- return None, "❌ No data available"
592
-
593
- if column not in df.columns:
594
- return None, f"❌ Column '{column}' not found"
595
-
596
- plt.figure(figsize=(12, 6))
597
-
598
- if plot_type == "histogram":
599
- plt.subplot(1, 2, 1)
600
- df[column].hist(bins=30, edgecolor='black', alpha=0.7)
601
- plt.title(f'Histogram of {column}')
602
- plt.xlabel(column)
603
- plt.ylabel('Frequency')
604
-
605
- plt.subplot(1, 2, 2)
606
- df.boxplot(column=column)
607
- plt.title(f'Box Plot of {column}')
608
-
609
- elif plot_type == "density":
610
- plt.subplot(1, 2, 1)
611
- df[column].plot(kind='density')
612
- plt.title(f'Density Plot of {column}')
613
- plt.xlabel(column)
614
-
615
- plt.subplot(1, 2, 2)
616
- df[column].plot(kind='box')
617
- plt.title(f'Box Plot of {column}')
618
-
619
- plt.tight_layout()
620
- plt.savefig(f'distribution_{column}_{plot_type}.png', dpi=300, bbox_inches='tight')
621
- plt.close()
622
-
623
- return f'distribution_{column}_{plot_type}.png', f"✅ Distribution plot created for {column}"
624
-
625
- except Exception as e:
626
- return None, f"❌ Error: {str(e)}"
627
-
628
- # ===========================================
629
- # GRADIO INTERFACE SETUP
630
- # ===========================================
631
-
632
- def create_interface():
633
- """Create the main Gradio interface"""
634
-
635
- with gr.Blocks(title="Advanced CSV Manipulation Tool", theme=gr.themes.Soft()) as demo:
636
-
637
- gr.HTML("""
638
- <div style="text-align: center; padding: 20px;">
639
- <h1 style="color: #2e7d32; margin-bottom: 10px;">🔥 Advanced CSV Manipulation Tool</h1>
640
- <p style="font-size: 18px; color: #666;">Commercial-ready data processing with advanced analytics</p>
641
- <hr style="margin: 20px 0;">
642
- </div>
643
- """)
644
-
645
- # Global state variables
646
- current_data = gr.State(None)
647
- data_info = gr.State({})
648
-
649
- with gr.Tabs():
650
-
651
- # ===== FILE UPLOAD TAB =====
652
- with gr.TabItem("📁 File Upload & Preview"):
653
- with gr.Row():
654
- with gr.Column(scale=1):
655
- file_upload = gr.File(
656
- label="Upload CSV/Excel/JSON file (Max 1GB)",
657
- file_types=[".csv", ".xlsx", ".xls", ".json"],
658
- file_count="single"
659
- )
660
- preview_rows = gr.Slider(
661
- minimum=0,
662
- maximum=1000,
663
- value=100,
664
- step=50,
665
- label="Preview Rows (0 = All)",
666
- info="Number of rows to display in preview"
667
- )
668
- upload_btn = gr.Button("📊 Load & Analyze Data", variant="primary", size="lg")
669
-
670
- with gr.Column(scale=2):
671
- upload_status = gr.Textbox(label="Status", lines=5, interactive=False)
672
- data_info_display = gr.Textbox(label="Data Information", lines=8, interactive=False)
673
-
674
- data_preview = gr.DataFrame(label="Data Preview", interactive=False)
675
-
676
- def load_file_handler(file, rows):
677
- if file is None:
678
- return None, "Please upload a file first", "", None, {}
679
-
680
- preview, status, info = processor.load_data(file, rows)
681
- info_text = get_data_info(processor.current_df) if processor.current_df is not None else ""
682
-
683
- return preview, status, info_text, processor.current_df, info
684
-
685
- upload_btn.click(
686
- load_file_handler,
687
- inputs=[file_upload, preview_rows],
688
- outputs=[data_preview, upload_status, data_info_display, current_data, data_info]
689
- )
690
-
691
- # ===== VALUE REPLACEMENT TAB =====
692
- with gr.TabItem("🔄 Value Replacement"):
693
- gr.HTML("<h3>Replace values in one column based on conditions in another column</h3>")
694
-
695
- with gr.Row():
696
- with gr.Column():
697
- target_col = gr.Dropdown(label="Target Column (to modify)", choices=[], interactive=True)
698
- condition_col = gr.Dropdown(label="Condition Column (to check)", choices=[], interactive=True)
699
- condition_value = gr.Textbox(label="Condition Value", placeholder="Value to match in condition column")
700
- new_value = gr.Textbox(label="New Value", placeholder="Replacement value for target column")
701
- match_type = gr.Radio(
702
- choices=["exact", "contains", "starts_with", "ends_with", "regex"],
703
- value="exact",
704
- label="Match Type"
705
- )
706
- replace_btn = gr.Button("🔄 Replace Values", variant="primary")
707
-
708
- with gr.Column():
709
- replace_status = gr.Textbox(label="Status", lines=3, interactive=False)
710
-
711
- # Update column choices when data changes
712
- def update_columns(df):
713
- if df is not None:
714
- cols = list(df.columns)
715
- return gr.Dropdown(choices=cols), gr.Dropdown(choices=cols)
716
- return gr.Dropdown(choices=[]), gr.Dropdown(choices=[])
717
-
718
- current_data.change(
719
- update_columns,
720
- inputs=[current_data],
721
- outputs=[target_col, condition_col]
722
- )
723
-
724
- def replace_values_handler(df, tcol, ccol, cval, nval, mtype):
725
- if df is None:
726
- return None, "❌ No data loaded", ""
727
-
728
- result_df, status = rename_values_conditional(df, tcol, ccol, cval, nval, mtype)
729
- info_text = get_data_info(result_df) if result_df is not None else ""
730
-
731
- return result_df, status, info_text
732
-
733
- replace_btn.click(
734
- replace_values_handler,
735
- inputs=[current_data, target_col, condition_col, condition_value, new_value, match_type],
736
- outputs=[current_data, replace_status, data_info_display]
737
- )
738
-
739
- # ===== CSV CONCATENATION TAB =====
740
- with gr.TabItem("📋 CSV Concatenation"):
741
- gr.HTML("<h3>Combine multiple CSV files with column selection</h3>")
742
-
743
- with gr.Row():
744
- with gr.Column():
745
- multi_files = gr.File(
746
- label="Upload Multiple Files",
747
- file_types=[".csv", ".xlsx", ".xls"],
748
- file_count="multiple"
749
- )
750
- selected_columns = gr.Textbox(
751
- label="Columns to Include",
752
- placeholder="column1, column2, column3 (leave empty for all)",
753
- info="Comma-separated list of column names"
754
- )
755
- join_type = gr.Radio(
756
- choices=["outer", "inner"],
757
- value="outer",
758
- label="Join Type",
759
- info="Outer: keep all columns, Inner: only common columns"
760
- )
761
- concat_btn = gr.Button("📋 Concatenate Files", variant="primary")
762
-
763
- with gr.Column():
764
- concat_status = gr.Textbox(label="Status", lines=5, interactive=False)
765
-
766
- def concat_handler(files, cols, jtype):
767
- if not files:
768
- return None, "❌ Please upload files first", ""
769
-
770
- result_df, status = concatenate_csvs(files, cols, jtype)
771
- info_text = get_data_info(result_df) if result_df is not None else ""
772
-
773
- return result_df, status, info_text
774
-
775
- concat_btn.click(
776
- concat_handler,
777
- inputs=[multi_files, selected_columns, join_type],
778
- outputs=[current_data, concat_status, data_info_display]
779
- )
780
-
781
- # ===== VALUE COUNTS TAB =====
782
- with gr.TabItem("📊 Value Analysis"):
783
- gr.HTML("<h3>Analyze value frequencies and distributions</h3>")
784
-
785
- with gr.Row():
786
- with gr.Column():
787
- analysis_col = gr.Dropdown(label="Column to Analyze", choices=[], interactive=True)
788
- top_n = gr.Slider(minimum=5, maximum=100, value=20, step=5, label="Top N Values")
789
- normalize_counts = gr.Checkbox(label="Show Percentages", value=False)
790
- analyze_btn = gr.Button("📊 Analyze Values", variant="primary")
791
-
792
- with gr.Column():
793
- analysis_status = gr.Textbox(label="Status", lines=3, interactive=False)
794
-
795
- analysis_results = gr.DataFrame(label="Value Counts")
796
-
797
- # Update analysis column choices
798
- current_data.change(
799
- lambda df: gr.Dropdown(choices=list(df.columns) if df is not None else []),
800
- inputs=[current_data],
801
- outputs=[analysis_col]
802
- )
803
-
804
- def analysis_handler(df, col, n, norm):
805
- if df is None:
806
- return None, "❌ No data loaded"
807
-
808
- return get_value_counts(df, col, n, norm)
809
-
810
- analyze_btn.click(
811
- analysis_handler,
812
- inputs=[current_data, analysis_col, top_n, normalize_counts],
813
- outputs=[analysis_results, analysis_status]
814
- )
815
-
816
- # ===== DATA CLEANING TAB =====
817
- with gr.TabItem("🧹 Data Cleaning"):
818
- gr.HTML("<h3>Clean and preprocess your data</h3>")
819
-
820
- with gr.Tabs():
821
- # Missing Values
822
- with gr.TabItem("Missing Values"):
823
- with gr.Row():
824
- with gr.Column():
825
- missing_col = gr.Dropdown(label="Column", choices=["ALL"], value="ALL", interactive=True)
826
- missing_method = gr.Radio(
827
- choices=["drop_rows", "fill_value", "fill_mean", "fill_median", "fill_mode", "forward_fill", "backward_fill"],
828
- value="drop_rows",
829
- label="Method"
830
- )
831
- fill_value_input = gr.Textbox(label="Fill Value", placeholder="For fill_value method")
832
- missing_btn = gr.Button("🧹 Handle Missing Values", variant="primary")
833
-
834
- with gr.Column():
835
- missing_status = gr.Textbox(label="Status", lines=4, interactive=False)
836
-
837
- # Duplicates
838
- with gr.TabItem("Duplicates"):
839
- with gr.Row():
840
- with gr.Column():
841
- duplicate_cols = gr.Textbox(
842
- label="Columns to Check",
843
- placeholder="column1, column2 (empty = all columns)"
844
- )
845
- keep_method = gr.Radio(
846
- choices=["first", "last", "false"],
847
- value="first",
848
- label="Keep Method"
849
- )
850
- duplicate_btn = gr.Button("🗑️ Remove Duplicates", variant="primary")
851
-
852
- with gr.Column():
853
- duplicate_status = gr.Textbox(label="Status", lines=4, interactive=False)
854
-
855
- # Data Filtering
856
- with gr.TabItem("Filtering"):
857
- with gr.Row():
858
- with gr.Column():
859
- filter_col = gr.Dropdown(label="Column", choices=[], interactive=True)
860
- filter_condition = gr.Dropdown(
861
- choices=["equals", "not_equals", "contains", "not_contains", "starts_with", "ends_with",
862
- "greater_than", "less_than", "is_null", "is_not_null"],
863
- value="equals",
864
- label="Condition"
865
- )
866
- filter_value = gr.Textbox(label="Value")
867
- filter_btn = gr.Button("🔍 Filter Data", variant="primary")
868
-
869
- with gr.Column():
870
- filter_status = gr.Textbox(label="Status", lines=4, interactive=False)
871
-
872
- # Update dropdown choices
873
- current_data.change(
874
- lambda df: (
875
- gr.Dropdown(choices=["ALL"] + list(df.columns) if df is not None else ["ALL"]),
876
- gr.Dropdown(choices=list(df.columns) if df is not None else [])
877
- ),
878
- inputs=[current_data],
879
- outputs=[missing_col, filter_col]
880
- )
881
-
882
- # Event handlers
883
- missing_btn.click(
884
- lambda df, col, method, val: handle_missing_values(df, col, method, val)[1] if df is not None else "❌ No data",
885
- inputs=[current_data, missing_col, missing_method, fill_value_input],
886
- outputs=[missing_status]
887
- ).then(
888
- lambda: processor.current_df,
889
- outputs=[current_data]
890
- ).then(
891
- lambda df: get_data_info(df),
892
- inputs=[current_data],
893
- outputs=[data_info_display]
894
- )
895
-
896
- duplicate_btn.click(
897
- lambda df, cols, keep: detect_and_remove_duplicates(df, cols, keep)[1] if df is not None else "❌ No data",
898
- inputs=[current_data, duplicate_cols, keep_method],
899
- outputs=[duplicate_status]
900
- ).then(
901
- lambda: processor.current_df,
902
- outputs=[current_data]
903
- ).then(
904
- lambda df: get_data_info(df),
905
- inputs=[current_data],
906
- outputs=[data_info_display]
907
- )
908
-
909
- filter_btn.click(
910
- lambda df, col, cond, val: filter_data(df, col, cond, val)[1] if df is not None else "❌ No data",
911
- inputs=[current_data, filter_col, filter_condition, filter_value],
912
- outputs=[filter_status]
913
- ).then(
914
- lambda: processor.current_df,
915
- outputs=[current_data]
916
- ).then(
917
- lambda df: get_data_info(df),
918
- inputs=[current_data],
919
- outputs=[data_info_display]
920
- )
921
-
922
- # ===== COLUMN OPERATIONS TAB =====
923
- with gr.TabItem("⚙️ Column Operations"):
924
- gr.HTML("<h3>Perform operations on columns</h3>")
925
-
926
- with gr.Row():
927
- with gr.Column():
928
- op_type = gr.Dropdown(
929
- choices=["add", "subtract", "multiply", "divide", "concatenate",
930
- "extract_numbers", "upper", "lower", "title", "length"],
931
- value="add",
932
- label="Operation"
933
- )
934
- op_col1 = gr.Dropdown(label="Primary Column", choices=[], interactive=True)
935
- op_col2 = gr.Dropdown(label="Second Column (optional)", choices=[], interactive=True)
936
- op_constant = gr.Textbox(label="Constant Value (optional)")
937
- op_new_name = gr.Textbox(label="New Column Name")
938
- op_btn = gr.Button("⚙️ Execute Operation", variant="primary")
939
-
940
- with gr.Column():
941
- op_status = gr.Textbox(label="Status", lines=5, interactive=False)
942
-
943
- # Data type conversion
944
- gr.HTML("<hr><h4>Data Type Conversion</h4>")
945
- convert_col = gr.Dropdown(label="Column", choices=[], interactive=True)
946
- convert_type = gr.Dropdown(
947
- choices=["string", "integer", "float", "datetime", "boolean", "category"],
948
- value="string",
949
- label="Target Type"
950
- )
951
- convert_btn = gr.Button("🔄 Convert Type", variant="secondary")
952
- convert_status = gr.Textbox(label="Conversion Status", lines=2, interactive=False)
953
-
954
- # Update column choices
955
- current_data.change(
956
- lambda df: (
957
- gr.Dropdown(choices=list(df.columns) if df is not None else []),
958
- gr.Dropdown(choices=list(df.columns) if df is not None else []),
959
- gr.Dropdown(choices=list(df.columns) if df is not None else [])
960
- ),
961
- inputs=[current_data],
962
- outputs=[op_col1, op_col2, convert_col]
963
- )
964
-
965
- # Event handlers
966
- def operation_handler(df, op, col1, col2, const, new_name):
967
- if df is None:
968
- return None, "❌ No data loaded", ""
969
-
970
- result_df, status = perform_column_operations(df, op, col1, col2, new_name, const)
971
- info_text = get_data_info(result_df) if result_df is not None else ""
972
-
973
- return result_df, status, info_text
974
-
975
- op_btn.click(
976
- operation_handler,
977
- inputs=[current_data, op_type, op_col1, op_col2, op_constant, op_new_name],
978
- outputs=[current_data, op_status, data_info_display]
979
- )
980
-
981
- def convert_handler(df, col, target_type):
982
- if df is None:
983
- return None, "❌ No data loaded", ""
984
-
985
- result_df, status = convert_data_types(df, col, target_type)
986
- info_text = get_data_info(result_df) if result_df is not None else ""
987
-
988
- return result_df, status, info_text
989
-
990
- convert_btn.click(
991
- convert_handler,
992
- inputs=[current_data, convert_col, convert_type],
993
- outputs=[current_data, convert_status, data_info_display]
994
- )
995
-
996
- # ===== STATISTICS TAB =====
997
- with gr.TabItem("📈 Statistics & Analysis"):
998
- gr.HTML("<h3>Statistical analysis and insights</h3>")
999
-
1000
- with gr.Row():
1001
- with gr.Column():
1002
- stats_btn = gr.Button("📊 Generate Statistical Summary", variant="primary")
1003
- corr_btn = gr.Button("🔗 Create Correlation Matrix", variant="secondary")
1004
-
1005
- # Distribution plots
1006
- gr.HTML("<hr><h4>Distribution Analysis</h4>")
1007
- dist_col = gr.Dropdown(label="Column", choices=[], interactive=True)
1008
- plot_type = gr.Radio(choices=["histogram", "density"], value="histogram", label="Plot Type")
1009
- dist_btn = gr.Button("📈 Create Distribution Plot", variant="secondary")
1010
-
1011
- with gr.Column():
1012
- stats_status = gr.Textbox(label="Status", lines=3, interactive=False)
1013
- plot_output = gr.Image(label="Visualization")
1014
-
1015
- stats_results = gr.DataFrame(label="Statistical Summary")
1016
-
1017
- # Update column choices
1018
- current_data.change(
1019
- lambda df: gr.Dropdown(choices=list(df.select_dtypes(include=[np.number]).columns) if df is not None else []),
1020
- inputs=[current_data],
1021
- outputs=[dist_col]
1022
- )
1023
-
1024
- # Event handlers
1025
- stats_btn.click(
1026
- lambda df: generate_statistical_summary(df) if df is not None else (None, "❌ No data"),
1027
- inputs=[current_data],
1028
- outputs=[stats_results, stats_status]
1029
- )
1030
-
1031
- corr_btn.click(
1032
- lambda df: create_correlation_matrix(df) if df is not None else (None, "❌ No data"),
1033
- inputs=[current_data],
1034
- outputs=[plot_output, stats_status]
1035
- )
1036
-
1037
- dist_btn.click(
1038
- lambda df, col, ptype: create_distribution_plots(df, col, ptype) if df is not None else (None, "❌ No data"),
1039
- inputs=[current_data, dist_col, plot_type],
1040
- outputs=[plot_output, stats_status]
1041
- )
1042
-
1043
- # ===== EXPORT TAB =====
1044
- with gr.TabItem("💾 Export & Download"):
1045
- gr.HTML("<h3>Export your processed data</h3>")
1046
-
1047
- with gr.Row():
1048
- with gr.Column():
1049
- export_format = gr.Radio(
1050
- choices=["csv", "excel", "json"],
1051
- value="csv",
1052
- label="Export Format"
1053
- )
1054
- export_filename = gr.Textbox(
1055
- label="Filename (without extension)",
1056
- value="processed_data",
1057
- placeholder="Enter filename"
1058
- )
1059
- export_btn = gr.Button("💾 Create Download File", variant="primary", size="lg")
1060
-
1061
- with gr.Column():
1062
- export_status = gr.Textbox(label="Status", lines=3, interactive=False)
1063
- download_file = gr.File(label="Download", visible=False)
1064
-
1065
- # History and Undo/Redo
1066
- with gr.Row():
1067
- with gr.Column():
1068
- gr.HTML("<hr><h4>History & Undo Operations</h4>")
1069
- undo_btn = gr.Button("↶ Undo Last Operation", variant="secondary")
1070
- reset_btn = gr.Button("🔄 Reset to Original", variant="secondary")
1071
-
1072
- with gr.Column():
1073
- history_status = gr.Textbox(label="History Status", lines=3, interactive=False)
1074
-
1075
- def export_handler(df, fmt, filename):
1076
- if df is None:
1077
- return None, "❌ No data to export", gr.File(visible=False)
1078
-
1079
- try:
1080
- file_data, file_name = create_download_file(df, fmt, filename)
1081
-
1082
- # Save file temporarily
1083
- with open(file_name, 'wb' if fmt == 'excel' else 'w', encoding=None if fmt == 'excel' else 'utf-8') as f:
1084
- if fmt == 'excel':
1085
- f.write(file_data)
1086
- else:
1087
- f.write(file_data)
1088
-
1089
- return file_name, f"✅ File created successfully: {file_name}", gr.File(value=file_name, visible=True)
1090
-
1091
- except Exception as e:
1092
- return None, f"❌ Export error: {str(e)}", gr.File(visible=False)
1093
-
1094
- export_btn.click(
1095
- export_handler,
1096
- inputs=[current_data, export_format, export_filename],
1097
- outputs=[download_file, export_status, download_file]
1098
- )
1099
-
1100
- def undo_handler():
1101
- result_df, status = processor.undo_operation()
1102
- info_text = get_data_info(result_df) if result_df is not None else ""
1103
- return result_df, status, info_text
1104
-
1105
- def reset_handler():
1106
- result_df, status = processor.reset_to_original()
1107
- info_text = get_data_info(result_df) if result_df is not None else ""
1108
- return result_df, status, info_text
1109
-
1110
- undo_btn.click(
1111
- undo_handler,
1112
- outputs=[current_data, history_status, data_info_display]
1113
- )
1114
-
1115
- reset_btn.click(
1116
- reset_handler,
1117
- outputs=[current_data, history_status, data_info_display]
1118
- )
1119
-
1120
- # Footer
1121
- gr.HTML("""
1122
- <div style="text-align: center; padding: 20px; margin-top: 30px; border-top: 1px solid #ddd;">
1123
- <p style="color: #666; font-size: 14px;">
1124
- 🚀 <strong>Advanced CSV Manipulation Tool</strong> |
1125
- Commercial-ready data processing with enterprise features |
1126
- Built with Gradio & Python
1127
- </p>
1128
- </div>
1129
- """)
1130
-
1131
- return demo
1132
 
1133
  if __name__ == "__main__":
1134
- # Create and launch the interface
1135
- demo = create_interface()
1136
- demo.launch(
1137
- share=True,
1138
- inbrowser=True,
1139
- server_name="0.0.0.0",
1140
- server_port=7860,
1141
- max_file_size="1gb"
1142
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
+ from huggingface_hub import hf_hub_download
3
+ import importlib.util
4
+ import sys
5
+
6
+ # Get HF token from environment (set in Space secrets)
7
+ HF_TOKEN = os.environ.get("HF_TOKEN")
8
+
9
+ # Your private model/repo details
10
+ REPO_ID = "limitedonly41/cv_all_src"
11
+ FILENAME = "csv_manipulations_ui.py"
12
+
13
+ def load_and_run():
14
+ try:
15
+ # Download the code file from private repo
16
+ file_path = hf_hub_download(
17
+ repo_id=REPO_ID,
18
+ filename=FILENAME,
19
+ token=HF_TOKEN,
20
+ repo_type="model"
21
+ )
22
+
23
+ # Load the module dynamically
24
+ spec = importlib.util.spec_from_file_location("csv_manipulations_module", file_path)
25
+ module = importlib.util.module_from_spec(spec)
26
+ sys.modules["csv_manipulations_module"] = module
27
+ spec.loader.exec_module(module)
28
+
29
+ # Try to find and launch the interface
30
+ if hasattr(module, 'interface'):
31
+ print("Found 'interface' object, launching...")
32
+ module.interface.launch()
33
+ elif hasattr(module, 'demo'):
34
+ print("Found 'demo' object, launching...")
35
+ module.demo.launch()
36
+ elif hasattr(module, 'create_interface'):
37
+ print("Found 'create_interface' function, creating and launching...")
38
+ interface = module.create_interface()
39
+ interface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  else:
41
+ print("Error: Could not find 'interface', 'demo', or 'create_interface' in the loaded module")
42
+ print("Available attributes:", dir(module))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  except Exception as e:
45
+ print(f"Error loading code: {e}")
46
+ import traceback
47
+ traceback.print_exc()
48
+ print("\nMake sure:")
49
+ print("1. HF_TOKEN is set in Space secrets")
50
+ print("2. REPO_ID points to your private repository")
51
+ print("3. The code file exists in the repository")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
  if __name__ == "__main__":
54
+ load_and_run()