gaur3009 commited on
Commit
20877cb
·
verified ·
1 Parent(s): 1df2fec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -45
app.py CHANGED
@@ -64,60 +64,73 @@ class GANRiskAnalyzer:
64
  def analyze_financial_data(file):
65
  try:
66
  # Attempt to read the CSV file
67
- data = pd.read_csv(file, encoding="utf-8")
68
  except UnicodeDecodeError:
69
  # Fallback for files with non-UTF-8 encodings
70
- data = pd.read_csv(file, encoding="ISO-8859-1")
71
-
 
 
 
 
 
 
72
  # Check required columns
73
  required_columns = ["Revenue", "Profit", "Loss", "Expenses", "Risk_Level"]
74
  if not all(column in data.columns for column in required_columns):
75
  return "The uploaded CSV must include these columns: Revenue, Profit, Loss, Expenses, Risk_Level."
76
 
77
  # Data Preprocessing
78
- X = data[["Revenue", "Profit", "Loss", "Expenses"]]
79
- y = data["Risk_Level"]
80
-
81
- scaler = StandardScaler()
82
- X_scaled = scaler.fit_transform(X)
83
-
84
- # Dimensionality Reduction
85
- pca = PCA(n_components=2)
86
- X_pca = pca.fit_transform(X_scaled)
87
-
88
- # Train-Test Split
89
- X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)
90
-
91
- # Gradient Boosting Classifier
92
- model = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=5)
93
- model.fit(X_train, y_train)
94
- y_pred = model.predict(X_test)
95
-
96
- accuracy = accuracy_score(y_test, y_pred)
97
- report = classification_report(y_test, y_pred, output_dict=True)
98
-
99
- # GAN-based Anomaly Detection
100
- gan = GANRiskAnalyzer(input_dim=X_pca.shape[1], hidden_dim=128, output_dim=X_pca.shape[1])
101
- gan.train(torch.tensor(X_pca, dtype=torch.float32), epochs=200)
102
- anomalies = gan.generate(n_samples=5, input_dim=X_pca.shape[1])
103
-
104
- # Analysis Insights
105
- total_revenue = data["Revenue"].sum()
106
- total_profit = data["Profit"].sum()
107
- total_loss = data["Loss"].sum()
108
-
109
- insights = {
110
- "Accuracy": f"{accuracy * 100:.2f}%",
111
- "Classification Report": report,
112
- "Generated Anomalies (GAN)": anomalies.tolist(),
113
- "Financial Summary": {
114
- "Total Revenue": f"${total_revenue:,.2f}",
115
- "Total Profit": f"${total_profit:,.2f}",
116
- "Total Loss": f"${total_loss:,.2f}",
117
- "Net Balance": f"${(total_revenue - total_loss):,.2f}"
 
 
 
 
 
 
118
  }
119
- }
120
- return insights
 
121
 
122
  with gr.Blocks(theme=gr.themes.Monochrome()) as interface:
123
  gr.Markdown("# **AI Risk Analyst Agent**")
 
64
  def analyze_financial_data(file):
65
  try:
66
  # Attempt to read the CSV file
67
+ data = pd.read_csv(file, encoding="utf-8", error_bad_lines=False)
68
  except UnicodeDecodeError:
69
  # Fallback for files with non-UTF-8 encodings
70
+ data = pd.read_csv(file, encoding="ISO-8859-1", error_bad_lines=False)
71
+ except Exception as e:
72
+ return f"An error occurred while reading the file: {str(e)}"
73
+
74
+ # Handle empty or malformed data
75
+ if data.empty:
76
+ return "The uploaded file is empty or has an invalid structure."
77
+
78
  # Check required columns
79
  required_columns = ["Revenue", "Profit", "Loss", "Expenses", "Risk_Level"]
80
  if not all(column in data.columns for column in required_columns):
81
  return "The uploaded CSV must include these columns: Revenue, Profit, Loss, Expenses, Risk_Level."
82
 
83
  # Data Preprocessing
84
+ try:
85
+ X = data[["Revenue", "Profit", "Loss", "Expenses"]].dropna()
86
+ y = data["Risk_Level"].dropna()
87
+
88
+ # Check for empty rows after cleaning
89
+ if X.empty or y.empty:
90
+ return "The data has missing values or invalid rows after cleaning. Please check the file."
91
+
92
+ scaler = StandardScaler()
93
+ X_scaled = scaler.fit_transform(X)
94
+
95
+ # Dimensionality Reduction
96
+ pca = PCA(n_components=2)
97
+ X_pca = pca.fit_transform(X_scaled)
98
+
99
+ # Train-Test Split
100
+ X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)
101
+
102
+ # Gradient Boosting Classifier
103
+ model = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=5)
104
+ model.fit(X_train, y_train)
105
+ y_pred = model.predict(X_test)
106
+
107
+ accuracy = accuracy_score(y_test, y_pred)
108
+ report = classification_report(y_test, y_pred, output_dict=True)
109
+
110
+ # GAN-based Anomaly Detection
111
+ gan = GANRiskAnalyzer(input_dim=X_pca.shape[1], hidden_dim=128, output_dim=X_pca.shape[1])
112
+ gan.train(torch.tensor(X_pca, dtype=torch.float32), epochs=200)
113
+ anomalies = gan.generate(n_samples=5, input_dim=X_pca.shape[1])
114
+
115
+ # Analysis Insights
116
+ total_revenue = data["Revenue"].sum()
117
+ total_profit = data["Profit"].sum()
118
+ total_loss = data["Loss"].sum()
119
+
120
+ insights = {
121
+ "Accuracy": f"{accuracy * 100:.2f}%",
122
+ "Classification Report": report,
123
+ "Generated Anomalies (GAN)": anomalies.tolist(),
124
+ "Financial Summary": {
125
+ "Total Revenue": f"${total_revenue:,.2f}",
126
+ "Total Profit": f"${total_profit:,.2f}",
127
+ "Total Loss": f"${total_loss:,.2f}",
128
+ "Net Balance": f"${(total_revenue - total_loss):,.2f}"
129
+ }
130
  }
131
+ return insights
132
+ except Exception as e:
133
+ return f"An error occurred during analysis: {str(e)}"
134
 
135
  with gr.Blocks(theme=gr.themes.Monochrome()) as interface:
136
  gr.Markdown("# **AI Risk Analyst Agent**")