Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -32,7 +32,7 @@ class URLProcessor:
|
|
32 |
self.session = requests.Session()
|
33 |
self.timeout = 10 # seconds
|
34 |
self.session.headers.update({
|
35 |
-
'User
|
36 |
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
37 |
'Accept-Language': 'en-US,en;q=0.5',
|
38 |
'Accept-Encoding': 'gzip, deflate, br',
|
@@ -173,11 +173,8 @@ class FileProcessor:
|
|
173 |
combined_data = []
|
174 |
try:
|
175 |
for file in files:
|
|
|
176 |
file_name = file.name if isinstance(file, gr.File) else file
|
177 |
-
if os.path.isdir(file_name):
|
178 |
-
logger.warning(f"Skipping directory: {file_name}")
|
179 |
-
continue # Skip directories
|
180 |
-
|
181 |
file_size = os.path.getsize(file_name)
|
182 |
if file_size > self.max_file_size:
|
183 |
logger.warning(f"File size ({file_size} bytes) exceeds maximum allowed size")
|
@@ -192,8 +189,6 @@ class FileProcessor:
|
|
192 |
logger.error(f"Error processing files: {str(e)}")
|
193 |
return []
|
194 |
|
195 |
-
return combined_data
|
196 |
-
|
197 |
def _process_zip_file(self, zip_path: str) -> List[Dict]:
|
198 |
"""Process ZIP file contents"""
|
199 |
results = []
|
@@ -225,13 +220,13 @@ class FileProcessor:
|
|
225 |
content = f.read()
|
226 |
|
227 |
return [{
|
228 |
-
'source': 'file',
|
229 |
'filename': os.path.basename(file_path),
|
230 |
'file_size': file_stat.st_size,
|
231 |
'mime_type': mimetypes.guess_type(file_path)[0],
|
232 |
'created': datetime.fromtimestamp(file_stat.st_ctime).isoformat(),
|
233 |
'modified': datetime.fromtimestamp(file_stat.st_mtime).isoformat(),
|
234 |
-
'
|
235 |
'timestamp': datetime.now().isoformat()
|
236 |
}]
|
237 |
except Exception as e:
|
@@ -291,8 +286,7 @@ def create_interface():
|
|
291 |
with gr.Tab("File Input"):
|
292 |
file_input = gr.File(
|
293 |
label="Upload text files or ZIP archives",
|
294 |
-
file_types=[".txt", ".zip", ".md", ".csv", ".json", ".xml"]
|
295 |
-
multiple=True # Allow multiple file uploads
|
296 |
)
|
297 |
|
298 |
with gr.Tab("Text Input"):
|
@@ -341,7 +335,7 @@ def create_interface():
|
|
341 |
content = processor.fetch_content(url)
|
342 |
if content:
|
343 |
results.append({
|
344 |
-
'source': 'url',
|
345 |
'url': url,
|
346 |
'content': content,
|
347 |
'timestamp': datetime.now().isoformat()
|
@@ -354,11 +348,12 @@ def create_interface():
|
|
354 |
|
355 |
# Process text input
|
356 |
if text:
|
357 |
-
cleaned_text = processor.advanced_text_cleaning(text
|
|
|
358 |
'source': 'direct_input',
|
359 |
'content': cleaned_text,
|
360 |
-
'timestamp': datetime.now().isoformat()
|
361 |
-
})
|
362 |
|
363 |
# Generate output
|
364 |
if results:
|
@@ -383,14 +378,53 @@ def create_interface():
|
|
383 |
return chatbot.load_data(json_data)
|
384 |
|
385 |
def chat_with_data(user_input):
|
386 |
-
"""
|
387 |
return chatbot.chat(user_input)
|
388 |
|
389 |
-
|
390 |
-
|
391 |
-
|
392 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
393 |
|
394 |
-
interface.launch() ```python
|
395 |
if __name__ == "__main__":
|
396 |
-
|
|
|
32 |
self.session = requests.Session()
|
33 |
self.timeout = 10 # seconds
|
34 |
self.session.headers.update({
|
35 |
+
'User-Agent': UserAgent().random,
|
36 |
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
37 |
'Accept-Language': 'en-US,en;q=0.5',
|
38 |
'Accept-Encoding': 'gzip, deflate, br',
|
|
|
173 |
combined_data = []
|
174 |
try:
|
175 |
for file in files:
|
176 |
+
# Check if the file is a Gradio File object or a string path
|
177 |
file_name = file.name if isinstance(file, gr.File) else file
|
|
|
|
|
|
|
|
|
178 |
file_size = os.path.getsize(file_name)
|
179 |
if file_size > self.max_file_size:
|
180 |
logger.warning(f"File size ({file_size} bytes) exceeds maximum allowed size")
|
|
|
189 |
logger.error(f"Error processing files: {str(e)}")
|
190 |
return []
|
191 |
|
|
|
|
|
192 |
def _process_zip_file(self, zip_path: str) -> List[Dict]:
|
193 |
"""Process ZIP file contents"""
|
194 |
results = []
|
|
|
220 |
content = f.read()
|
221 |
|
222 |
return [{
|
223 |
+
' source': 'file',
|
224 |
'filename': os.path.basename(file_path),
|
225 |
'file_size': file_stat.st_size,
|
226 |
'mime_type': mimetypes.guess_type(file_path)[0],
|
227 |
'created': datetime.fromtimestamp(file_stat.st_ctime).isoformat(),
|
228 |
'modified': datetime.fromtimestamp(file_stat.st_mtime).isoformat(),
|
229 |
+
'content': content,
|
230 |
'timestamp': datetime.now().isoformat()
|
231 |
}]
|
232 |
except Exception as e:
|
|
|
286 |
with gr.Tab("File Input"):
|
287 |
file_input = gr.File(
|
288 |
label="Upload text files or ZIP archives",
|
289 |
+
file_types=[".txt", ".zip", ".md", ".csv", ".json", ".xml"]
|
|
|
290 |
)
|
291 |
|
292 |
with gr.Tab("Text Input"):
|
|
|
335 |
content = processor.fetch_content(url)
|
336 |
if content:
|
337 |
results.append({
|
338 |
+
'source': ' url',
|
339 |
'url': url,
|
340 |
'content': content,
|
341 |
'timestamp': datetime.now().isoformat()
|
|
|
348 |
|
349 |
# Process text input
|
350 |
if text:
|
351 |
+
cleaned_text = processor.advanced_text_cleaning(text)
|
352 |
+
results.append({
|
353 |
'source': 'direct_input',
|
354 |
'content': cleaned_text,
|
355 |
+
'timestamp': datetime.now().isoformat()
|
356 |
+
})
|
357 |
|
358 |
# Generate output
|
359 |
if results:
|
|
|
378 |
return chatbot.load_data(json_data)
|
379 |
|
380 |
def chat_with_data(user_input):
|
381 |
+
"""Chat with the loaded data."""
|
382 |
return chatbot.chat(user_input)
|
383 |
|
384 |
+
process_btn.click(
|
385 |
+
process_all_inputs,
|
386 |
+
inputs=[url_input, file_input, text_input],
|
387 |
+
outputs=[output_file, output_text]
|
388 |
+
)
|
389 |
+
|
390 |
+
load_btn.click(
|
391 |
+
load_chat_data,
|
392 |
+
inputs=json_input,
|
393 |
+
outputs=chat_output
|
394 |
+
)
|
395 |
+
|
396 |
+
chat_input.submit(
|
397 |
+
chat_with_data,
|
398 |
+
inputs=chat_input,
|
399 |
+
outputs=chat_output
|
400 |
+
)
|
401 |
+
|
402 |
+
gr.Markdown("""
|
403 |
+
### Usage Guidelines
|
404 |
+
- **URL Processing**: Enter valid HTTP/HTTPS URLs
|
405 |
+
- **File Input**: Upload multiple text files or ZIP archives
|
406 |
+
- **Text Input**: Direct text processing
|
407 |
+
- **Chat**: Load your JSON data and ask questions about it
|
408 |
+
- Advanced cleaning and validation included
|
409 |
+
""")
|
410 |
+
|
411 |
+
return interface
|
412 |
+
|
413 |
+
def main():
|
414 |
+
# Configure system settings
|
415 |
+
mimetypes.init()
|
416 |
+
|
417 |
+
# Create and launch interface
|
418 |
+
interface = create_interface()
|
419 |
+
|
420 |
+
# Launch with proper configuration
|
421 |
+
interface.launch(
|
422 |
+
server_name="0.0.0.0",
|
423 |
+
server_port=7860,
|
424 |
+
share=True, # Enable public sharing
|
425 |
+
inbrowser=False,
|
426 |
+
debug=False
|
427 |
+
)
|
428 |
|
|
|
429 |
if __name__ == "__main__":
|
430 |
+
main()
|