Spaces:
Sleeping
Sleeping
Test spaces.GPU
Browse files
app.py
CHANGED
@@ -67,6 +67,7 @@ import matplotlib.colors as mcolors
|
|
67 |
import textwrap
|
68 |
import pandas as pd
|
69 |
import streamlit as st
|
|
|
70 |
|
71 |
# Configure logging
|
72 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
@@ -150,7 +151,7 @@ class UAPAnalyzer:
|
|
150 |
self.embeddings = self._extract_embeddings(string_data)
|
151 |
logging.info("Data preprocessing complete")
|
152 |
|
153 |
-
|
154 |
def _extract_embeddings(self, data_column):
|
155 |
"""
|
156 |
Extracts embeddings from the given data column.
|
@@ -165,6 +166,7 @@ class UAPAnalyzer:
|
|
165 |
# convert to str
|
166 |
return embed_model.encode(data_column.tolist(), show_progress_bar=True)
|
167 |
|
|
|
168 |
def reduce_dimensionality(self, method='UMAP', n_components=2, **kwargs):
|
169 |
"""
|
170 |
Reduces the dimensionality of embeddings using specified method.
|
@@ -184,7 +186,8 @@ class UAPAnalyzer:
|
|
184 |
|
185 |
self.reduced_embeddings = reducer.fit_transform(self.embeddings)
|
186 |
logging.info(f"Dimensionality reduced using {method}")
|
187 |
-
|
|
|
188 |
def cluster_data(self, method='HDBSCAN', **kwargs):
|
189 |
"""
|
190 |
Clusters the reduced dimensionality data using the specified clustering method.
|
@@ -205,7 +208,7 @@ class UAPAnalyzer:
|
|
205 |
self.cluster_labels = clusterer.labels_
|
206 |
logging.info(f"Data clustering complete using {method}")
|
207 |
|
208 |
-
|
209 |
def get_tf_idf_clusters(self, top_n=2):
|
210 |
"""
|
211 |
Names clusters using the most frequent terms based on TF-IDF analysis.
|
@@ -387,7 +390,8 @@ class UAPAnalyzer:
|
|
387 |
# Update string labels to reflect merged labels
|
388 |
updated_string_labels = [cluster_terms[label] for label in updated_cluster_labels]
|
389 |
return updated_string_labels
|
390 |
-
|
|
|
391 |
def cluster_cosine(self, cluster_terms, cluster_labels, similarity_threshold):
|
392 |
from sklearn.metrics.pairwise import cosine_similarity
|
393 |
|
@@ -650,6 +654,7 @@ class UAPAnalyzer:
|
|
650 |
|
651 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
652 |
|
|
|
653 |
def analyze_and_predict(data, analyzers, col_names):
|
654 |
"""
|
655 |
Performs analysis on the data using provided analyzers and makes predictions on specified columns.
|
@@ -677,6 +682,8 @@ def analyze_and_predict(data, analyzers, col_names):
|
|
677 |
logging.error(f"Error processing {col}: {e}")
|
678 |
return new_data
|
679 |
|
|
|
|
|
680 |
def train_xgboost(x_train, y_train, x_test, y_test, num_classes):
|
681 |
"""
|
682 |
Trains an XGBoost model and evaluates its performance.
|
@@ -788,7 +795,7 @@ def plot_cramers_v_heatmap(data, significance_level=0.05):
|
|
788 |
class UAPVisualizer:
|
789 |
def __init__(self, data=None):
|
790 |
pass # Initialization can be added if needed
|
791 |
-
|
792 |
def analyze_and_predict(self, data, analyzers, col_names):
|
793 |
new_data = pd.DataFrame()
|
794 |
for i, (column, analyzer) in enumerate(zip(col_names, analyzers)):
|
|
|
67 |
import textwrap
|
68 |
import pandas as pd
|
69 |
import streamlit as st
|
70 |
+
import spaces
|
71 |
|
72 |
# Configure logging
|
73 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
|
151 |
self.embeddings = self._extract_embeddings(string_data)
|
152 |
logging.info("Data preprocessing complete")
|
153 |
|
154 |
+
@spaces.GPU
|
155 |
def _extract_embeddings(self, data_column):
|
156 |
"""
|
157 |
Extracts embeddings from the given data column.
|
|
|
166 |
# convert to str
|
167 |
return embed_model.encode(data_column.tolist(), show_progress_bar=True)
|
168 |
|
169 |
+
@spaces.GPU
|
170 |
def reduce_dimensionality(self, method='UMAP', n_components=2, **kwargs):
|
171 |
"""
|
172 |
Reduces the dimensionality of embeddings using specified method.
|
|
|
186 |
|
187 |
self.reduced_embeddings = reducer.fit_transform(self.embeddings)
|
188 |
logging.info(f"Dimensionality reduced using {method}")
|
189 |
+
|
190 |
+
@spaces.GPU
|
191 |
def cluster_data(self, method='HDBSCAN', **kwargs):
|
192 |
"""
|
193 |
Clusters the reduced dimensionality data using the specified clustering method.
|
|
|
208 |
self.cluster_labels = clusterer.labels_
|
209 |
logging.info(f"Data clustering complete using {method}")
|
210 |
|
211 |
+
@spaces.GPU
|
212 |
def get_tf_idf_clusters(self, top_n=2):
|
213 |
"""
|
214 |
Names clusters using the most frequent terms based on TF-IDF analysis.
|
|
|
390 |
# Update string labels to reflect merged labels
|
391 |
updated_string_labels = [cluster_terms[label] for label in updated_cluster_labels]
|
392 |
return updated_string_labels
|
393 |
+
|
394 |
+
@spaces.GPU
|
395 |
def cluster_cosine(self, cluster_terms, cluster_labels, similarity_threshold):
|
396 |
from sklearn.metrics.pairwise import cosine_similarity
|
397 |
|
|
|
654 |
|
655 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
656 |
|
657 |
+
@spaces.GPU
|
658 |
def analyze_and_predict(data, analyzers, col_names):
|
659 |
"""
|
660 |
Performs analysis on the data using provided analyzers and makes predictions on specified columns.
|
|
|
682 |
logging.error(f"Error processing {col}: {e}")
|
683 |
return new_data
|
684 |
|
685 |
+
|
686 |
+
@spaces.GPU
|
687 |
def train_xgboost(x_train, y_train, x_test, y_test, num_classes):
|
688 |
"""
|
689 |
Trains an XGBoost model and evaluates its performance.
|
|
|
795 |
class UAPVisualizer:
|
796 |
def __init__(self, data=None):
|
797 |
pass # Initialization can be added if needed
|
798 |
+
|
799 |
def analyze_and_predict(self, data, analyzers, col_names):
|
800 |
new_data = pd.DataFrame()
|
801 |
for i, (column, analyzer) in enumerate(zip(col_names, analyzers)):
|