Spaces:
Running
Running
""" | |
Advanced URL & Text Processing Suite - Main Application | |
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
A sophisticated Gradio interface with URL processing, file manipulation, QR operations, | |
and advanced data chat capabilities. | |
""" | |
import gradio as gr | |
import logging | |
import json | |
import os | |
import sys | |
import zipfile | |
import pandas as pd | |
import numpy as np | |
from datetime import datetime | |
from pathlib import Path | |
from typing import Dict, List, Optional, Union, Any, Tuple | |
# Configure logging | |
logging.basicConfig( | |
level=logging.INFO, | |
format='%(asctime)s.%(msecs)03d [%(levelname)s] %(name)s - %(message)s', | |
datefmt='%Y-%m-%d %H:%M:%S' | |
) | |
logger = logging.getLogger(__name__) | |
# Modern UI Configuration | |
THEME = gr.themes.Soft( | |
primary_hue="indigo", | |
secondary_hue="blue", | |
neutral_hue="slate", | |
spacing_size=gr.themes.sizes.spacing_md, | |
radius_size=gr.themes.sizes.radius_md, | |
text_size=gr.themes.sizes.text_md, | |
) | |
class DataChatProcessor: | |
def __init__(self): | |
self.trained_data = {} | |
self.current_dataset = None | |
def process_zip_file(self, file_obj, mode): | |
try: | |
if not file_obj: | |
return "Please upload a ZIP file", [] | |
# Extract ZIP contents | |
with zipfile.ZipFile(file_obj.name, 'r') as zip_ref: | |
temp_dir = Path('temp_data') | |
temp_dir.mkdir(exist_ok=True) | |
zip_ref.extractall(temp_dir) | |
# Process based on mode | |
if mode == "TrainedOnData": | |
return self._train_on_data(temp_dir) | |
else: # TalkAboutData | |
return self._analyze_data(temp_dir) | |
except Exception as e: | |
logger.error(f"Error processing ZIP file: {e}") | |
return f"Error: {str(e)}", [] | |
def _train_on_data(self, data_dir): | |
try: | |
datasets = [] | |
for file in data_dir.glob('**/*.csv'): | |
df = pd.read_csv(file) | |
datasets.append({ | |
'name': file.name, | |
'data': df, | |
'summary': { | |
'rows': len(df), | |
'columns': len(df.columns), | |
'dtypes': df.dtypes.astype(str).to_dict() | |
} | |
}) | |
self.trained_data = { | |
'datasets': datasets, | |
'timestamp': datetime.now().isoformat() | |
} | |
summary = f"Trained on {len(datasets)} datasets" | |
messages = [ | |
{"role": "assistant", "content": "Training completed successfully."}, | |
{"role": "assistant", "content": summary} | |
] | |
return summary, messages | |
except Exception as e: | |
logger.error(f"Error training on data: {e}") | |
return f"Error during training: {str(e)}", [] | |
def _analyze_data(self, data_dir): | |
try: | |
analyses = [] | |
for file in data_dir.glob('**/*.csv'): | |
df = pd.read_csv(file) | |
analyses.append({ | |
'file': file.name, | |
'shape': df.shape, | |
'dtypes': df.dtypes.astype(str).to_dict() | |
}) | |
self.current_dataset = { | |
'analyses': analyses, | |
'timestamp': datetime.now().isoformat() | |
} | |
summary = f"Analyzed {len(analyses)} files" | |
messages = [ | |
{"role": "assistant", "content": "Analysis completed successfully."}, | |
{"role": "assistant", "content": summary} | |
] | |
return summary, messages | |
except Exception as e: | |
logger.error(f"Error analyzing data: {e}") | |
return f"Error during analysis: {str(e)}", [] | |
def chat(self, message, history, mode): | |
if not message: | |
return "", history | |
history.append({"role": "user", "content": message}) | |
try: | |
if mode == "TrainedOnData": | |
if not self.trained_data: | |
response = "Please upload and train on data first." | |
else: | |
response = self._generate_trained_response(message) | |
else: | |
if not self.current_dataset: | |
response = "Please upload data for analysis first." | |
else: | |
response = self._generate_analysis_response(message) | |
history.append({"role": "assistant", "content": response}) | |
return "", history | |
except Exception as e: | |
logger.error(f"Error in chat: {e}") | |
history.append({"role": "assistant", "content": f"Error: {str(e)}"}) | |
return "", history | |
def _generate_trained_response(self, message): | |
datasets = self.trained_data['datasets'] | |
if "how many" in message.lower(): | |
return f"There are {len(datasets)} datasets." | |
if "summary" in message.lower(): | |
summaries = [] | |
for ds in datasets: | |
summaries.append( | |
f"Dataset '{ds['name']}': {ds['summary']['rows']} rows, " | |
f"{ds['summary']['columns']} columns" | |
) | |
return "\n".join(summaries) | |
return "I can help you analyze the trained datasets. Ask about number of datasets or summaries." | |
def _generate_analysis_response(self, message): | |
analyses = self.current_dataset['analyses'] | |
if "how many" in message.lower(): | |
return f"There are {len(analyses)} files." | |
if "summary" in message.lower(): | |
summaries = [] | |
for analysis in analyses: | |
summaries.append( | |
f"File '{analysis['file']}': {analysis['shape'][0]} rows, " | |
f"{analysis['shape'][1]} columns" | |
) | |
return "\n".join(summaries) | |
return "I can help you explore the current dataset. Ask about file count or summaries." | |
def create_interface(): | |
data_chat = DataChatProcessor() | |
with gr.Blocks(theme=THEME) as interface: | |
gr.Markdown( | |
""" | |
# π Advanced Data Processing & Analysis Suite | |
Enterprise-grade toolkit for data processing, analysis, and interactive chat capabilities. | |
""" | |
) | |
with gr.Tab("π¬ DataChat"): | |
with gr.Row(): | |
# Left column for file upload and mode selection | |
with gr.Column(scale=1): | |
data_file = gr.File( | |
label="Upload ZIP File", | |
file_types=[".zip"] | |
) | |
mode = gr.Radio( | |
choices=["TrainedOnData", "TalkAboutData"], | |
value="TrainedOnData", | |
label="Chat Mode" | |
) | |
process_btn = gr.Button("Process Data", variant="primary") | |
status_output = gr.Textbox( | |
label="Status", | |
interactive=False | |
) | |
# Right column for chat interface | |
with gr.Column(scale=2): | |
chatbot = gr.Chatbot( | |
label="Chat History", | |
height=400, | |
show_label=True, | |
type="messages" # Specify OpenAI-style message format | |
) | |
msg = gr.Textbox( | |
label="Your Message", | |
placeholder="Ask questions about your data...", | |
lines=2 | |
) | |
with gr.Row(): | |
submit_btn = gr.Button("Send", variant="primary") | |
clear_btn = gr.Button("Clear Chat", variant="secondary") | |
# Event handlers | |
process_btn.click( | |
fn=data_chat.process_zip_file, | |
inputs=[data_file, mode], | |
outputs=[status_output, chatbot] | |
) | |
submit_btn.click( | |
fn=data_chat.chat, | |
inputs=[msg, chatbot, mode], | |
outputs=[msg, chatbot] | |
) | |
msg.submit( | |
fn=data_chat.chat, | |
inputs=[msg, chatbot, mode], | |
outputs=[msg, chatbot] | |
) | |
clear_btn.click( | |
fn=lambda: ([], "Chat cleared"), | |
outputs=[chatbot, status_output] | |
) | |
return interface | |
def main(): | |
try: | |
interface = create_interface() | |
if interface: | |
interface.launch( | |
server_name="0.0.0.0", | |
server_port=8000 | |
) | |
else: | |
logger.error("Failed to create interface") | |
sys.exit(1) | |
except Exception as e: | |
logger.error(f"Application startup error: {e}", exc_info=True) | |
sys.exit(1) | |
if __name__ == "__main__": | |
main() |