mrradix commited on
Commit
5abe59f
·
verified ·
1 Parent(s): 013c3e4

Update utils/storage.py

Browse files
Files changed (1) hide show
  1. utils/storage.py +260 -117
utils/storage.py CHANGED
@@ -1,157 +1,300 @@
 
 
 
1
  import json
2
  import pickle
3
- import pandas as pd
4
  from pathlib import Path
5
  from typing import Any, Dict, Optional, Union
 
6
  import streamlit as st
7
-
8
  from utils.error_handling import handle_data_exceptions, DataError, ValidationError
9
- from utils.logging import get_logger
10
 
11
- logger = get_logger(__name__)
12
 
13
- # In-memory storage for Streamlit (since file operations may be limited)
14
- _memory_store = {}
 
 
 
 
 
15
 
16
- @handle_data_exceptions
17
- def save_data(key: str, data: Any, storage_type: str = "memory") -> bool:
18
- """Save data with specified storage type"""
19
- try:
20
- if storage_type == "memory":
21
- _memory_store[key] = data
22
- logger.info(f"Data saved to memory with key: {key}")
23
- return True
24
- elif storage_type == "session":
25
- if 'storage' not in st.session_state:
26
- st.session_state.storage = {}
27
- st.session_state.storage[key] = data
28
- logger.info(f"Data saved to session state with key: {key}")
29
- return True
30
- else:
31
- raise ValidationError(f"Unsupported storage type: {storage_type}")
32
- except Exception as e:
33
- logger.error(f"Failed to save data with key {key}: {str(e)}")
34
- return False
35
 
36
  @handle_data_exceptions
37
- def load_data(key: str, storage_type: str = "memory", default_value: Any = None) -> Any:
38
- """Load data from specified storage type"""
 
 
 
 
 
 
 
 
 
 
39
  try:
40
- if storage_type == "memory":
41
- data = _memory_store.get(key, default_value)
42
- if data is not None:
43
- logger.info(f"Data loaded from memory with key: {key}")
44
- return data
45
- elif storage_type == "session":
46
- if 'storage' not in st.session_state:
47
- st.session_state.storage = {}
48
- data = st.session_state.storage.get(key, default_value)
49
- if data is not None:
50
- logger.info(f"Data loaded from session state with key: {key}")
51
- return data
52
- else:
53
- raise ValidationError(f"Unsupported storage type: {storage_type}")
54
  except Exception as e:
55
- logger.error(f"Failed to load data with key {key}: {str(e)}")
56
- return default_value
57
 
58
  @handle_data_exceptions
59
- def delete_data(key: str, storage_type: str = "memory") -> bool:
60
- """Delete data from specified storage type"""
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  try:
62
- if storage_type == "memory":
63
- if key in _memory_store:
64
- del _memory_store[key]
65
- logger.info(f"Data deleted from memory with key: {key}")
66
- return True
67
- elif storage_type == "session":
68
- if 'storage' in st.session_state and key in st.session_state.storage:
69
- del st.session_state.storage[key]
70
- logger.info(f"Data deleted from session state with key: {key}")
71
- return True
72
- return False
73
  except Exception as e:
74
- logger.error(f"Failed to delete data with key {key}: {str(e)}")
75
- return False
76
 
77
  @handle_data_exceptions
78
- def list_keys(storage_type: str = "memory") -> list:
79
- """List all keys in specified storage type"""
 
 
 
 
 
 
 
 
 
 
 
 
80
  try:
81
- if storage_type == "memory":
82
- return list(_memory_store.keys())
83
- elif storage_type == "session":
84
- if 'storage' not in st.session_state:
85
- st.session_state.storage = {}
86
- return list(st.session_state.storage.keys())
 
 
87
  else:
88
- raise ValidationError(f"Unsupported storage type: {storage_type}")
 
 
89
  except Exception as e:
90
- logger.error(f"Failed to list keys for storage type {storage_type}: {str(e)}")
91
- return []
92
 
93
  @handle_data_exceptions
94
- def clear_storage(storage_type: str = "memory") -> bool:
95
- """Clear all data from specified storage type"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  try:
97
- if storage_type == "memory":
98
- _memory_store.clear()
99
- logger.info("Memory storage cleared")
100
- return True
101
- elif storage_type == "session":
102
- if 'storage' in st.session_state:
103
- st.session_state.storage.clear()
104
- logger.info("Session storage cleared")
105
- return True
106
  else:
107
- raise ValidationError(f"Unsupported storage type: {storage_type}")
 
 
108
  except Exception as e:
109
- logger.error(f"Failed to clear storage type {storage_type}: {str(e)}")
110
- return False
111
 
112
  @handle_data_exceptions
113
- def save_dataframe(df: pd.DataFrame, key: str, storage_type: str = "memory") -> bool:
114
- """Save pandas DataFrame"""
115
- if not isinstance(df, pd.DataFrame):
116
- raise ValidationError("Data must be a pandas DataFrame")
 
 
 
 
117
 
118
- # Convert DataFrame to dict for JSON serialization
119
- df_dict = {
120
- 'data': df.to_dict('records'),
121
- 'columns': df.columns.tolist(),
122
- 'index': df.index.tolist()
123
- }
124
 
125
- return save_data(key, df_dict, storage_type)
 
 
 
 
 
 
126
 
127
  @handle_data_exceptions
128
- def load_dataframe(key: str, storage_type: str = "memory") -> Optional[pd.DataFrame]:
129
- """Load pandas DataFrame"""
130
- df_dict = load_data(key, storage_type)
 
 
 
131
 
132
- if df_dict is None:
133
- return None
 
 
 
 
 
134
 
135
  try:
136
- df = pd.DataFrame(df_dict['data'])
137
- if 'columns' in df_dict:
138
- df.columns = df_dict['columns']
139
- if 'index' in df_dict:
140
- df.index = df_dict['index']
141
- return df
142
  except Exception as e:
143
- logger.error(f"Failed to reconstruct DataFrame from stored data: {str(e)}")
144
- return None
145
 
146
- def get_storage_info(storage_type: str = "memory") -> Dict[str, Any]:
147
- """Get information about storage usage"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
  try:
149
- keys = list_keys(storage_type)
150
- return {
151
- 'storage_type': storage_type,
152
- 'total_keys': len(keys),
153
- 'keys': keys
154
- }
155
  except Exception as e:
156
- logger.error(f"Failed to get storage info: {str(e)}")
157
- return {'storage_type': storage_type, 'total_keys': 0, 'keys': []}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Storage utilities for data persistence
3
+ """
4
  import json
5
  import pickle
6
+ import os
7
  from pathlib import Path
8
  from typing import Any, Dict, Optional, Union
9
+ import pandas as pd
10
  import streamlit as st
 
11
  from utils.error_handling import handle_data_exceptions, DataError, ValidationError
 
12
 
 
13
 
14
+ # Default storage directories
15
+ DATA_DIR = Path("data")
16
+ CACHE_DIR = Path("cache")
17
+
18
+ # Create directories if they don't exist
19
+ DATA_DIR.mkdir(exist_ok=True)
20
+ CACHE_DIR.mkdir(exist_ok=True)
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  @handle_data_exceptions
24
+ def load_data(filename: str, data_dir: Optional[Path] = None) -> Optional[Dict[str, Any]]:
25
+ """
26
+ Load JSON data from file
27
+ """
28
+ if data_dir is None:
29
+ data_dir = DATA_DIR
30
+
31
+ filepath = data_dir / filename
32
+
33
+ if not filepath.exists():
34
+ raise DataError(f"File {filepath} does not exist")
35
+
36
  try:
37
+ with open(filepath, 'r', encoding='utf-8') as f:
38
+ data = json.load(f)
39
+ return data
40
+ except json.JSONDecodeError as e:
41
+ raise DataError(f"Invalid JSON in file {filepath}: {str(e)}")
 
 
 
 
 
 
 
 
 
42
  except Exception as e:
43
+ raise DataError(f"Error loading data from {filepath}: {str(e)}")
44
+
45
 
46
  @handle_data_exceptions
47
+ def save_data(data: Dict[str, Any], filename: str, data_dir: Optional[Path] = None) -> bool:
48
+ """
49
+ Save data to JSON file
50
+ """
51
+ if data_dir is None:
52
+ data_dir = DATA_DIR
53
+
54
+ if not isinstance(data, dict):
55
+ raise ValidationError("Data must be a dictionary")
56
+
57
+ filepath = data_dir / filename
58
+
59
+ # Create directory if it doesn't exist
60
+ filepath.parent.mkdir(parents=True, exist_ok=True)
61
+
62
  try:
63
+ with open(filepath, 'w', encoding='utf-8') as f:
64
+ json.dump(data, f, indent=2, ensure_ascii=False)
65
+ return True
 
 
 
 
 
 
 
 
66
  except Exception as e:
67
+ raise DataError(f"Error saving data to {filepath}: {str(e)}")
68
+
69
 
70
  @handle_data_exceptions
71
+ def load_dataframe(filename: str, data_dir: Optional[Path] = None) -> Optional[pd.DataFrame]:
72
+ """
73
+ Load DataFrame from various file formats
74
+ """
75
+ if data_dir is None:
76
+ data_dir = DATA_DIR
77
+
78
+ filepath = data_dir / filename
79
+
80
+ if not filepath.exists():
81
+ raise DataError(f"File {filepath} does not exist")
82
+
83
+ file_extension = filepath.suffix.lower()
84
+
85
  try:
86
+ if file_extension == '.csv':
87
+ df = pd.read_csv(filepath)
88
+ elif file_extension == '.xlsx':
89
+ df = pd.read_excel(filepath)
90
+ elif file_extension == '.json':
91
+ df = pd.read_json(filepath)
92
+ elif file_extension == '.pkl':
93
+ df = pd.read_pickle(filepath)
94
  else:
95
+ raise DataError(f"Unsupported file format: {file_extension}")
96
+
97
+ return df
98
  except Exception as e:
99
+ raise DataError(f"Error loading dataframe from {filepath}: {str(e)}")
100
+
101
 
102
  @handle_data_exceptions
103
+ def save_dataframe(df: pd.DataFrame, filename: str, data_dir: Optional[Path] = None,
104
+ file_format: str = 'csv') -> bool:
105
+ """
106
+ Save DataFrame to file
107
+ """
108
+ if data_dir is None:
109
+ data_dir = DATA_DIR
110
+
111
+ if not isinstance(df, pd.DataFrame):
112
+ raise ValidationError("Data must be a pandas DataFrame")
113
+
114
+ if df.empty:
115
+ raise ValidationError("DataFrame cannot be empty")
116
+
117
+ # Ensure filename has correct extension
118
+ if not filename.endswith(f'.{file_format}'):
119
+ filename = f"{filename}.{file_format}"
120
+
121
+ filepath = data_dir / filename
122
+
123
+ # Create directory if it doesn't exist
124
+ filepath.parent.mkdir(parents=True, exist_ok=True)
125
+
126
  try:
127
+ if file_format == 'csv':
128
+ df.to_csv(filepath, index=False)
129
+ elif file_format == 'xlsx':
130
+ df.to_excel(filepath, index=False)
131
+ elif file_format == 'json':
132
+ df.to_json(filepath, orient='records', indent=2)
133
+ elif file_format == 'pkl':
134
+ df.to_pickle(filepath)
 
135
  else:
136
+ raise DataError(f"Unsupported file format: {file_format}")
137
+
138
+ return True
139
  except Exception as e:
140
+ raise DataError(f"Error saving dataframe to {filepath}: {str(e)}")
141
+
142
 
143
  @handle_data_exceptions
144
+ def load_pickle(filename: str, data_dir: Optional[Path] = None) -> Optional[Any]:
145
+ """
146
+ Load pickled data from file
147
+ """
148
+ if data_dir is None:
149
+ data_dir = DATA_DIR
150
+
151
+ filepath = data_dir / filename
152
 
153
+ if not filepath.exists():
154
+ raise DataError(f"File {filepath} does not exist")
 
 
 
 
155
 
156
+ try:
157
+ with open(filepath, 'rb') as f:
158
+ data = pickle.load(f)
159
+ return data
160
+ except Exception as e:
161
+ raise DataError(f"Error loading pickle from {filepath}: {str(e)}")
162
+
163
 
164
  @handle_data_exceptions
165
+ def save_pickle(data: Any, filename: str, data_dir: Optional[Path] = None) -> bool:
166
+ """
167
+ Save data to pickle file
168
+ """
169
+ if data_dir is None:
170
+ data_dir = DATA_DIR
171
 
172
+ if not filename.endswith('.pkl'):
173
+ filename = f"{filename}.pkl"
174
+
175
+ filepath = data_dir / filename
176
+
177
+ # Create directory if it doesn't exist
178
+ filepath.parent.mkdir(parents=True, exist_ok=True)
179
 
180
  try:
181
+ with open(filepath, 'wb') as f:
182
+ pickle.dump(data, f)
183
+ return True
 
 
 
184
  except Exception as e:
185
+ raise DataError(f"Error saving pickle to {filepath}: {str(e)}")
 
186
 
187
+
188
+ def list_files(data_dir: Optional[Path] = None, pattern: str = "*") -> list[Path]:
189
+ """
190
+ List files in data directory
191
+ """
192
+ if data_dir is None:
193
+ data_dir = DATA_DIR
194
+
195
+ if not data_dir.exists():
196
+ return []
197
+
198
+ return list(data_dir.glob(pattern))
199
+
200
+
201
+ def file_exists(filename: str, data_dir: Optional[Path] = None) -> bool:
202
+ """
203
+ Check if file exists
204
+ """
205
+ if data_dir is None:
206
+ data_dir = DATA_DIR
207
+
208
+ filepath = data_dir / filename
209
+ return filepath.exists()
210
+
211
+
212
+ def delete_file(filename: str, data_dir: Optional[Path] = None) -> bool:
213
+ """
214
+ Delete file
215
+ """
216
+ if data_dir is None:
217
+ data_dir = DATA_DIR
218
+
219
+ filepath = data_dir / filename
220
+
221
+ if filepath.exists():
222
+ try:
223
+ filepath.unlink()
224
+ return True
225
+ except Exception as e:
226
+ st.error(f"Error deleting file {filepath}: {str(e)}")
227
+ return False
228
+ return False
229
+
230
+
231
+ def get_file_size(filename: str, data_dir: Optional[Path] = None) -> Optional[int]:
232
+ """
233
+ Get file size in bytes
234
+ """
235
+ if data_dir is None:
236
+ data_dir = DATA_DIR
237
+
238
+ filepath = data_dir / filename
239
+
240
+ if filepath.exists():
241
+ return filepath.stat().st_size
242
+ return None
243
+
244
+
245
+ def create_backup(filename: str, data_dir: Optional[Path] = None) -> bool:
246
+ """
247
+ Create backup of file
248
+ """
249
+ if data_dir is None:
250
+ data_dir = DATA_DIR
251
+
252
+ filepath = data_dir / filename
253
+
254
+ if not filepath.exists():
255
+ return False
256
+
257
+ backup_filename = f"{filepath.stem}_backup{filepath.suffix}"
258
+ backup_filepath = data_dir / backup_filename
259
+
260
  try:
261
+ import shutil
262
+ shutil.copy2(filepath, backup_filepath)
263
+ return True
 
 
 
264
  except Exception as e:
265
+ st.error(f"Error creating backup: {str(e)}")
266
+ return False
267
+
268
+
269
+ # Session state management
270
+ def init_session_state(key: str, default_value: Any = None):
271
+ """
272
+ Initialize session state variable if it doesn't exist
273
+ """
274
+ if key not in st.session_state:
275
+ st.session_state[key] = default_value
276
+
277
+
278
+ def get_session_state(key: str, default_value: Any = None) -> Any:
279
+ """
280
+ Get value from session state
281
+ """
282
+ return st.session_state.get(key, default_value)
283
+
284
+
285
+ def set_session_state(key: str, value: Any):
286
+ """
287
+ Set value in session state
288
+ """
289
+ st.session_state[key] = value
290
+
291
+
292
+ def clear_session_state(key: Optional[str] = None):
293
+ """
294
+ Clear session state (specific key or all)
295
+ """
296
+ if key:
297
+ if key in st.session_state:
298
+ del st.session_state[key]
299
+ else:
300
+ st.session_state.clear()