llm-excel-plotter-agent / data_processor.py
“Transcendental-Programmer”
feat: inital project files and Docker setup
d773e1b
raw
history blame
1.49 kB
import pandas as pd
import os
import logging
class DataProcessor:
def __init__(self, data_path=None):
logging.info("Initializing DataProcessor")
# Allow dynamic data path (for user uploads), fallback to default
if data_path and os.path.exists(data_path):
self.data_path = data_path
else:
self.data_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'data', 'sample_data.xlsx')
self.data = self.load_data(self.data_path)
def load_data(self, path):
ext = os.path.splitext(path)[1].lower()
try:
if ext == '.csv':
data = pd.read_csv(path)
elif ext in ['.xls', '.xlsx']:
data = pd.read_excel(path)
else:
raise ValueError(f"Unsupported file type: {ext}")
logging.info(f"Loaded data from {path} with shape {data.shape}")
return data
except Exception as e:
logging.error(f"Failed to load data: {e}")
return pd.DataFrame()
def validate_columns(self, required_columns):
missing = [col for col in required_columns if col not in self.data.columns]
if missing:
logging.warning(f"Missing columns: {missing}")
return False, missing
return True, []
def get_columns(self):
return list(self.data.columns)
def preview(self, n=5):
return self.data.head(n).to_dict(orient='records')