Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -167,68 +167,56 @@ class FileProcessor:
|
|
167 |
"""Check if the file is a text file based on its extension."""
|
168 |
return any(file_path.lower().endswith(ext) for ext in self.supported_text_extensions)
|
169 |
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
175 |
combined_data = []
|
176 |
-
self.processed_zip_count = 0
|
177 |
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
|
|
|
|
186 |
continue
|
187 |
-
|
188 |
-
|
189 |
-
|
|
|
190 |
continue
|
|
|
|
|
191 |
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
continue
|
201 |
-
self.processed_zip_count += 1
|
202 |
-
zip_results = self._process_zip_file(file_path)
|
203 |
-
combined_data.extend(zip_results)
|
204 |
-
elif self.is_text_file(file_path):
|
205 |
-
file_results = self._process_single_file(file) # Changed file_path to file
|
206 |
-
combined_data.extend(file_results)
|
207 |
-
else:
|
208 |
-
logger.warning(f"Unsupported file type: {file_path}")
|
209 |
-
|
210 |
-
except Exception as e:
|
211 |
-
logger.error(f"Error processing files: {str(e)}")
|
212 |
-
|
213 |
return combined_data
|
214 |
-
|
215 |
|
216 |
-
def
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
'filename': os.path.basename(file.name),
|
225 |
-
'file_size': file_stat.st_size,
|
226 |
-
'mime_type': mimetypes.guess_type(file.name)[0],
|
227 |
-
'created': datetime.fromtimestamp(file_stat.st_ctime).isoformat(),
|
228 |
-
'modified': datetime.fromtimestamp(file_stat.st_mtime).isoformat(),
|
229 |
-
'content': content,
|
230 |
-
'timestamp': datetime.now().isoformat()
|
231 |
-
}]
|
232 |
except Exception as e:
|
233 |
logger.error(f"File processing error: {e}")
|
234 |
return []
|
|
|
167 |
"""Check if the file is a text file based on its extension."""
|
168 |
return any(file_path.lower().endswith(ext) for ext in self.supported_text_extensions)
|
169 |
|
170 |
+
|
171 |
+
def validate_filepath(path: Path) -> bool:
|
172 |
+
"""Validate file exists and has supported extension"""
|
173 |
+
try:
|
174 |
+
return path.exists() and path.is_file() and path.suffix.lower() in valid_extensions
|
175 |
+
except Exception as e:
|
176 |
+
logger.error(f"Validation error for {path}: {str(e)}")
|
177 |
+
return False
|
178 |
+
|
179 |
+
def process_files(base_path: str = "/app/data") -> list:
|
180 |
+
"""Process files with validation and error handling"""
|
181 |
combined_data = []
|
|
|
182 |
|
183 |
+
base_dir = Path(base_path)
|
184 |
+
if not base_dir.exists():
|
185 |
+
base_dir.mkdir(parents=True, exist_ok=True)
|
186 |
+
logger.info(f"Created data directory at {base_dir}")
|
187 |
+
|
188 |
+
for item in base_dir.glob('**/*'):
|
189 |
+
try:
|
190 |
+
# Skip directories immediately
|
191 |
+
if item.is_dir():
|
192 |
+
logger.debug(f"Skipping directory: {item}")
|
193 |
continue
|
194 |
+
|
195 |
+
# Validate file using shared function
|
196 |
+
if not validate_filepath(item):
|
197 |
+
logger.warning(f"Invalid file skipped: {item}")
|
198 |
continue
|
199 |
+
|
200 |
+
logger.info(f"Processing valid file: {item.name}")
|
201 |
|
202 |
+
# Add actual processing logic here
|
203 |
+
file_data = process_single_file(item) # Your processing function
|
204 |
+
combined_data.append(file_data)
|
205 |
+
|
206 |
+
except Exception as e:
|
207 |
+
logger.error(f"Failed processing {item}: {str(e)}")
|
208 |
+
continue
|
209 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
210 |
return combined_data
|
|
|
211 |
|
212 |
+
def process_single_file(file_path: Path) -> dict:
|
213 |
+
"""Example processing function"""
|
214 |
+
# Add your actual file processing logic here
|
215 |
+
return {
|
216 |
+
'filename': file_path.name,
|
217 |
+
'content': "processed content", # Replace with real content
|
218 |
+
'metadata': {} # Add actual metadata
|
219 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
220 |
except Exception as e:
|
221 |
logger.error(f"File processing error: {e}")
|
222 |
return []
|