acecalisto3 commited on
Commit
c42909d
·
verified ·
1 Parent(s): e8a15e5

Update app2.py

Browse files
Files changed (1) hide show
  1. app2.py +35 -19
app2.py CHANGED
@@ -166,10 +166,10 @@ class URLProcessor:
166
  class FileProcessor:
167
  """Class to handle file processing"""
168
 
169
- def __init__(self, max_file_size: int = 10 * 1024 * 1024): # 10MB default
170
  self.max_file_size = max_file_size
171
  self.supported_text_extensions = {'.txt', '.md', '.csv', '.json', '.xml'}
172
-
173
  def is_text_file(self, filepath: str) -> bool:
174
  """Check if file is a text file"""
175
  try:
@@ -226,25 +226,41 @@ class FileProcessor:
226
  logger.error(f"Error reading file {filename}: {str(e)}")
227
  return results
228
 
229
- def _process_single_file(self, file) -> List[Dict]:
230
- try:
231
- file_stat = os.stat(file.name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
232
  with open(file.name, 'r', encoding='utf-8', errors='ignore') as f:
233
  content = f.read()
234
-
235
- return [{
236
- 'source': 'file',
237
- 'filename': os.path.basename(file.name),
238
- 'file_size': file_stat.st_size,
239
- 'mime_type': mimetypes.guess_type(file.name)[0],
240
- 'created': datetime.fromtimestamp(file_stat.st_ctime).isoformat(),
241
- 'modified': datetime.fromtimestamp(file_stat.st_mtime).isoformat(),
242
- 'content': content,
243
- 'timestamp': datetime.now().isoformat()
244
- }]
245
- except Exception as e:
246
- logger.error(f"File processing error: {e}")
247
- return []
248
 
249
  def create_interface():
250
  """Create a comprehensive Gradio interface with advanced features"""
 
166
  class FileProcessor:
167
  """Class to handle file processing"""
168
 
169
+ def __init__(self, max_file_size: int = 2 * 1024 * 1024 * 1024): # 2GB default
170
  self.max_file_size = max_file_size
171
  self.supported_text_extensions = {'.txt', '.md', '.csv', '.json', '.xml'}
172
+
173
  def is_text_file(self, filepath: str) -> bool:
174
  """Check if file is a text file"""
175
  try:
 
226
  logger.error(f"Error reading file {filename}: {str(e)}")
227
  return results
228
 
229
+ def _process_single_file(self, file) -> List[Dict]:
230
+ try:
231
+ file_stat = os.stat(file.name)
232
+
233
+ # For very large files, read in chunks and summarize
234
+ if file_stat.st_size > 100 * 1024 * 1024: # 100MB
235
+ logger.info(f"Processing large file: {file.name} ({file_stat.st_size} bytes)")
236
+
237
+ # Read first and last 1MB for extremely large files
238
+ content = ""
239
+ with open(file.name, 'r', encoding='utf-8', errors='ignore') as f:
240
+ content = f.read(1 * 1024 * 1024) # First 1MB
241
+ content += "\n...[Content truncated due to large file size]...\n"
242
+
243
+ # Seek to the last 1MB
244
+ f.seek(max(0, file_stat.st_size - 1 * 1024 * 1024))
245
+ content += f.read() # Last 1MB
246
+ else:
247
+ # Regular file processing
248
  with open(file.name, 'r', encoding='utf-8', errors='ignore') as f:
249
  content = f.read()
250
+
251
+ return [{
252
+ 'source': 'file',
253
+ 'filename': os.path.basename(file.name),
254
+ 'file_size': file_stat.st_size,
255
+ 'mime_type': mimetypes.guess_type(file.name)[0],
256
+ 'created': datetime.fromtimestamp(file_stat.st_ctime).isoformat(),
257
+ 'modified': datetime.fromtimestamp(file_stat.st_mtime).isoformat(),
258
+ 'content': content,
259
+ 'timestamp': datetime.now().isoformat()
260
+ }]
261
+ except Exception as e:
262
+ logger.error(f"File processing error: {e}")
263
+ return []
264
 
265
  def create_interface():
266
  """Create a comprehensive Gradio interface with advanced features"""