Spaces:
Runtime error
Runtime error
import gradio as gr | |
import pandas as pd | |
import numpy as np | |
from sklearn.ensemble import GradientBoostingClassifier | |
from sklearn.decomposition import PCA | |
from sklearn.preprocessing import StandardScaler | |
from sklearn.model_selection import train_test_split | |
from sklearn.metrics import classification_report, accuracy_score | |
import torch | |
from torch import nn | |
from torch.autograd import Variable | |
# GAN-based anomaly detection for financial analysis | |
class GANRiskAnalyzer: | |
def __init__(self, input_dim, hidden_dim, output_dim): | |
self.generator = nn.Sequential( | |
nn.Linear(input_dim, hidden_dim), | |
nn.ReLU(), | |
nn.Linear(hidden_dim, output_dim), | |
nn.Tanh() | |
) | |
self.discriminator = nn.Sequential( | |
nn.Linear(output_dim, hidden_dim), | |
nn.LeakyReLU(0.2), | |
nn.Linear(hidden_dim, 1), | |
nn.Sigmoid() | |
) | |
self.loss = nn.BCELoss() | |
self.generator_optimizer = torch.optim.Adam(self.generator.parameters(), lr=0.0002) | |
self.discriminator_optimizer = torch.optim.Adam(self.discriminator.parameters(), lr=0.0002) | |
def train(self, data, epochs=100): | |
real_labels = Variable(torch.ones(data.size(0), 1)) | |
fake_labels = Variable(torch.zeros(data.size(0), 1)) | |
for epoch in range(epochs): | |
# Train Discriminator | |
self.discriminator_optimizer.zero_grad() | |
real_data = Variable(data) | |
real_output = self.discriminator(real_data) | |
real_loss = self.loss(real_output, real_labels) | |
z = Variable(torch.randn(data.size(0), data.size(1))) | |
fake_data = self.generator(z) | |
fake_output = self.discriminator(fake_data.detach()) | |
fake_loss = self.loss(fake_output, fake_labels) | |
d_loss = real_loss + fake_loss | |
d_loss.backward() | |
self.discriminator_optimizer.step() | |
# Train Generator | |
self.generator_optimizer.zero_grad() | |
fake_output = self.discriminator(fake_data) | |
g_loss = self.loss(fake_output, real_labels) | |
g_loss.backward() | |
self.generator_optimizer.step() | |
def generate(self, n_samples, input_dim): | |
z = Variable(torch.randn(n_samples, input_dim)) | |
generated_data = self.generator(z) | |
return generated_data.detach().numpy() | |
# Risk Analysis | |
def analyze_financial_data(file): | |
try: | |
# Read the uploaded CSV file | |
data = pd.read_csv(file.name, encoding="utf-8", on_bad_lines='skip') | |
except UnicodeDecodeError: | |
try: | |
data = pd.read_csv(file.name, encoding="ISO-8859-1", on_bad_lines='skip') | |
except Exception as e: | |
return {"error": f"Failed to read file: {str(e)}"} | |
except Exception as e: | |
return {"error": f"An unexpected error occurred: {str(e)}"} | |
if data.empty: | |
return {"error": "The uploaded file is empty or has an invalid structure."} | |
# Dynamically map required columns | |
expected_columns = ["Revenue", "Profit", "Loss", "Expenses", "Risk_Level"] | |
available_columns = data.columns.tolist() | |
column_mapping = {} | |
for expected_col in expected_columns: | |
for available_col in available_columns: | |
if expected_col.lower() in available_col.lower(): | |
column_mapping[expected_col] = available_col | |
break | |
if len(column_mapping) != len(expected_columns): | |
return {"error": f"The CSV must contain columns similar to: {', '.join(expected_columns)}"} | |
data.rename(columns=column_mapping, inplace=True) | |
try: | |
# Data Preprocessing | |
X = data[[column_mapping["Revenue"], column_mapping["Profit"], column_mapping["Loss"], column_mapping["Expenses"]]].dropna() | |
y = data[column_mapping["Risk_Level"]].dropna() | |
if X.empty or y.empty: | |
return {"error": "The data contains missing values or invalid rows after cleaning."} | |
scaler = StandardScaler() | |
X_scaled = scaler.fit_transform(X) | |
# Dimensionality Reduction | |
pca = PCA(n_components=2) | |
X_pca = pca.fit_transform(X_scaled) | |
# Train-Test Split | |
X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42) | |
# Gradient Boosting Classifier | |
model = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=5) | |
model.fit(X_train, y_train) | |
y_pred = model.predict(X_test) | |
accuracy = accuracy_score(y_test, y_pred) | |
report = classification_report(y_test, y_pred, output_dict=True) | |
# GAN-based Anomaly Detection | |
gan = GANRiskAnalyzer(input_dim=X_pca.shape[1], hidden_dim=128, output_dim=X_pca.shape[1]) | |
gan.train(torch.tensor(X_pca, dtype=torch.float32), epochs=200) | |
anomalies = gan.generate(n_samples=5, input_dim=X_pca.shape[1]) | |
total_revenue = data[column_mapping["Revenue"]].sum() | |
total_profit = data[column_mapping["Profit"]].sum() | |
total_loss = data[column_mapping["Loss"]].sum() | |
return { | |
"Accuracy": f"{accuracy * 100:.2f}%", | |
"Classification Report": report, | |
"Generated Anomalies (GAN)": anomalies.tolist(), | |
"Financial Summary": { | |
"Total Revenue": f"${total_revenue:,.2f}", | |
"Total Profit": f"${total_profit:,.2f}", | |
"Total Loss": f"${total_loss:,.2f}", | |
"Net Balance": f"${(total_revenue - total_loss):,.2f}" | |
} | |
} | |
except Exception as e: | |
return {"error": f"An error occurred during analysis: {str(e)}"} | |
# Gradio Interface | |
with gr.Blocks(theme=gr.themes.Monochrome()) as interface: | |
gr.Markdown("# **AI Risk Analyst Agent**") | |
gr.Markdown("Analyze your financial risks and identify anomalies using AI models.") | |
with gr.Row(): | |
with gr.Column(): | |
data_file = gr.File(label="Upload Financial Data (CSV)", file_types=[".csv"]) | |
submit_button = gr.Button("Analyze") | |
with gr.Column(): | |
output = gr.JSON(label="Risk Analysis Insights") | |
submit_button.click(analyze_financial_data, inputs=data_file, outputs=output) | |
interface.launch() |