Spaces:
Running
Running
Update app2.py
Browse files
app2.py
CHANGED
@@ -166,10 +166,10 @@ class URLProcessor:
|
|
166 |
class FileProcessor:
|
167 |
"""Class to handle file processing"""
|
168 |
|
169 |
-
def __init__(self, max_file_size: int =
|
170 |
self.max_file_size = max_file_size
|
171 |
self.supported_text_extensions = {'.txt', '.md', '.csv', '.json', '.xml'}
|
172 |
-
|
173 |
def is_text_file(self, filepath: str) -> bool:
|
174 |
"""Check if file is a text file"""
|
175 |
try:
|
@@ -226,25 +226,41 @@ class FileProcessor:
|
|
226 |
logger.error(f"Error reading file {filename}: {str(e)}")
|
227 |
return results
|
228 |
|
229 |
-
|
230 |
-
|
231 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
232 |
with open(file.name, 'r', encoding='utf-8', errors='ignore') as f:
|
233 |
content = f.read()
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
|
249 |
def create_interface():
|
250 |
"""Create a comprehensive Gradio interface with advanced features"""
|
|
|
166 |
class FileProcessor:
|
167 |
"""Class to handle file processing"""
|
168 |
|
169 |
+
def __init__(self, max_file_size: int = 2 * 1024 * 1024 * 1024): # 2GB default
|
170 |
self.max_file_size = max_file_size
|
171 |
self.supported_text_extensions = {'.txt', '.md', '.csv', '.json', '.xml'}
|
172 |
+
|
173 |
def is_text_file(self, filepath: str) -> bool:
|
174 |
"""Check if file is a text file"""
|
175 |
try:
|
|
|
226 |
logger.error(f"Error reading file {filename}: {str(e)}")
|
227 |
return results
|
228 |
|
229 |
+
def _process_single_file(self, file) -> List[Dict]:
|
230 |
+
try:
|
231 |
+
file_stat = os.stat(file.name)
|
232 |
+
|
233 |
+
# For very large files, read in chunks and summarize
|
234 |
+
if file_stat.st_size > 100 * 1024 * 1024: # 100MB
|
235 |
+
logger.info(f"Processing large file: {file.name} ({file_stat.st_size} bytes)")
|
236 |
+
|
237 |
+
# Read first and last 1MB for extremely large files
|
238 |
+
content = ""
|
239 |
+
with open(file.name, 'r', encoding='utf-8', errors='ignore') as f:
|
240 |
+
content = f.read(1 * 1024 * 1024) # First 1MB
|
241 |
+
content += "\n...[Content truncated due to large file size]...\n"
|
242 |
+
|
243 |
+
# Seek to the last 1MB
|
244 |
+
f.seek(max(0, file_stat.st_size - 1 * 1024 * 1024))
|
245 |
+
content += f.read() # Last 1MB
|
246 |
+
else:
|
247 |
+
# Regular file processing
|
248 |
with open(file.name, 'r', encoding='utf-8', errors='ignore') as f:
|
249 |
content = f.read()
|
250 |
+
|
251 |
+
return [{
|
252 |
+
'source': 'file',
|
253 |
+
'filename': os.path.basename(file.name),
|
254 |
+
'file_size': file_stat.st_size,
|
255 |
+
'mime_type': mimetypes.guess_type(file.name)[0],
|
256 |
+
'created': datetime.fromtimestamp(file_stat.st_ctime).isoformat(),
|
257 |
+
'modified': datetime.fromtimestamp(file_stat.st_mtime).isoformat(),
|
258 |
+
'content': content,
|
259 |
+
'timestamp': datetime.now().isoformat()
|
260 |
+
}]
|
261 |
+
except Exception as e:
|
262 |
+
logger.error(f"File processing error: {e}")
|
263 |
+
return []
|
264 |
|
265 |
def create_interface():
|
266 |
"""Create a comprehensive Gradio interface with advanced features"""
|