acecalisto3 commited on
Commit
776de71
·
verified ·
1 Parent(s): 0ee778a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -34
app.py CHANGED
@@ -227,43 +227,43 @@ class FileProcessor:
227
 
228
  return chunks
229
 
230
- def _process_single_file(self, file) -> List[Dict]:
231
- """Process a single file"""
232
- try:
233
- file_stat = os.stat(file.name)
234
 
235
- # For very large files, read in chunks and summarize
236
- if file_stat.st_size > 100 * 1024 * 1024: # 100MB
237
- logger.info(f"Processing large file: {file.name} ({file_stat.st_size} bytes)")
238
 
239
- # Read first and last 1MB for extremely large files
240
- content = ""
241
- with open(file.name, 'r', encoding='utf-8', errors='ignore') as f:
242
- content = f.read(1 * 1024 * 1024) # First 1MB
243
- content += "\n...[Content truncated due to large file size]...\n"
244
-
245
- # Seek to the last 1MB
246
- f.seek(max(0, file_stat.st_size - 1 * 1024 * 1024))
247
- content += f.read() # Last 1MB
248
- else:
249
- # Regular file processing
250
- with open(file.name, 'r', encoding='utf-8', errors='ignore') as f:
251
- content = f.read()
252
-
253
- return [{
254
- 'source',
255
- 'filename': os.path.basename(file.name),
256
- 'file_size': file_stat.st_size,
257
- 'mime_type': mimetypes.guess_type(file.name)[0],
258
- 'created': datetime.fromtimestamp(file_stat.st_ctime).isoformat(),
259
- 'modified': datetime.fromtimestamp(file_stat.st_mtime).isoformat(),
260
- 'content': content,
261
- 'timestamp': datetime.now().isoformat()
262
- }]
263
- except Exception as e:
264
- logger.error(f"File processing error: {e}")
265
- return []
266
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
267
  def clean_json(data: Union[str, Dict]) -> Optional[Dict]:
268
  """Clean and validate JSON data"""
269
  try:
 
227
 
228
  return chunks
229
 
230
+ def _process_single_file(self, file) -> List[Dict]:
231
+ """Process a single file"""
232
+ try:
233
+ file_stat = os.stat(file.name)
234
 
235
+ # For very large files, read in chunks and summarize
236
+ if file_stat.st_size > 100 * 1024 * 1024: # 100MB
237
+ logger.info(f"Processing large file: {file.name} ({file_stat.st_size} bytes)")
238
 
239
+ # Read first and last 1MB for extremely large files
240
+ content = ""
241
+ with open(file.name, 'r', encoding='utf-8', errors='ignore') as f:
242
+ content = f.read(1 * 1024 * 1024) # First 1MB
243
+ content += "\n...[Content truncated due to large file size]...\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
 
245
+ # Seek to the last 1MB
246
+ f.seek(max(0, file_stat.st_size - 1 * 1024 * 1024))
247
+ content += f.read() # Last 1MB
248
+ else:
249
+ # Regular file processing
250
+ with open(file.name, 'r', encoding='utf-8', errors='ignore') as f:
251
+ content = f.read()
252
+
253
+ return [{
254
+ 'source': 'filename', # Assuming 'source' should be a string value
255
+ 'filename': os.path.basename(file.name),
256
+ 'file_size': file_stat.st_size,
257
+ 'mime_type': mimetypes.guess_type(file.name)[0],
258
+ 'created': datetime.fromtimestamp(file_stat.st_ctime).isoformat(),
259
+ 'modified': datetime.fromtimestamp(file_stat.st_mtime).isoformat(),
260
+ 'content': content,
261
+ 'timestamp': datetime.now().isoformat()
262
+ }]
263
+ except Exception as e:
264
+ logger.error(f"File processing error: {e}")
265
+ return []
266
+
267
  def clean_json(data: Union[str, Dict]) -> Optional[Dict]:
268
  """Clean and validate JSON data"""
269
  try: