Ashoka74 commited on
Commit
70f1c51
·
verified ·
1 Parent(s): 702acaf

Test spaces.GPU

Browse files
Files changed (1) hide show
  1. app.py +12 -5
app.py CHANGED
@@ -67,6 +67,7 @@ import matplotlib.colors as mcolors
67
  import textwrap
68
  import pandas as pd
69
  import streamlit as st
 
70
 
71
  # Configure logging
72
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
@@ -150,7 +151,7 @@ class UAPAnalyzer:
150
  self.embeddings = self._extract_embeddings(string_data)
151
  logging.info("Data preprocessing complete")
152
 
153
-
154
  def _extract_embeddings(self, data_column):
155
  """
156
  Extracts embeddings from the given data column.
@@ -165,6 +166,7 @@ class UAPAnalyzer:
165
  # convert to str
166
  return embed_model.encode(data_column.tolist(), show_progress_bar=True)
167
 
 
168
  def reduce_dimensionality(self, method='UMAP', n_components=2, **kwargs):
169
  """
170
  Reduces the dimensionality of embeddings using specified method.
@@ -184,7 +186,8 @@ class UAPAnalyzer:
184
 
185
  self.reduced_embeddings = reducer.fit_transform(self.embeddings)
186
  logging.info(f"Dimensionality reduced using {method}")
187
-
 
188
  def cluster_data(self, method='HDBSCAN', **kwargs):
189
  """
190
  Clusters the reduced dimensionality data using the specified clustering method.
@@ -205,7 +208,7 @@ class UAPAnalyzer:
205
  self.cluster_labels = clusterer.labels_
206
  logging.info(f"Data clustering complete using {method}")
207
 
208
-
209
  def get_tf_idf_clusters(self, top_n=2):
210
  """
211
  Names clusters using the most frequent terms based on TF-IDF analysis.
@@ -387,7 +390,8 @@ class UAPAnalyzer:
387
  # Update string labels to reflect merged labels
388
  updated_string_labels = [cluster_terms[label] for label in updated_cluster_labels]
389
  return updated_string_labels
390
-
 
391
  def cluster_cosine(self, cluster_terms, cluster_labels, similarity_threshold):
392
  from sklearn.metrics.pairwise import cosine_similarity
393
 
@@ -650,6 +654,7 @@ class UAPAnalyzer:
650
 
651
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
652
 
 
653
  def analyze_and_predict(data, analyzers, col_names):
654
  """
655
  Performs analysis on the data using provided analyzers and makes predictions on specified columns.
@@ -677,6 +682,8 @@ def analyze_and_predict(data, analyzers, col_names):
677
  logging.error(f"Error processing {col}: {e}")
678
  return new_data
679
 
 
 
680
  def train_xgboost(x_train, y_train, x_test, y_test, num_classes):
681
  """
682
  Trains an XGBoost model and evaluates its performance.
@@ -788,7 +795,7 @@ def plot_cramers_v_heatmap(data, significance_level=0.05):
788
  class UAPVisualizer:
789
  def __init__(self, data=None):
790
  pass # Initialization can be added if needed
791
-
792
  def analyze_and_predict(self, data, analyzers, col_names):
793
  new_data = pd.DataFrame()
794
  for i, (column, analyzer) in enumerate(zip(col_names, analyzers)):
 
67
  import textwrap
68
  import pandas as pd
69
  import streamlit as st
70
+ import spaces
71
 
72
  # Configure logging
73
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 
151
  self.embeddings = self._extract_embeddings(string_data)
152
  logging.info("Data preprocessing complete")
153
 
154
+ @spaces.GPU
155
  def _extract_embeddings(self, data_column):
156
  """
157
  Extracts embeddings from the given data column.
 
166
  # convert to str
167
  return embed_model.encode(data_column.tolist(), show_progress_bar=True)
168
 
169
+ @spaces.GPU
170
  def reduce_dimensionality(self, method='UMAP', n_components=2, **kwargs):
171
  """
172
  Reduces the dimensionality of embeddings using specified method.
 
186
 
187
  self.reduced_embeddings = reducer.fit_transform(self.embeddings)
188
  logging.info(f"Dimensionality reduced using {method}")
189
+
190
+ @spaces.GPU
191
  def cluster_data(self, method='HDBSCAN', **kwargs):
192
  """
193
  Clusters the reduced dimensionality data using the specified clustering method.
 
208
  self.cluster_labels = clusterer.labels_
209
  logging.info(f"Data clustering complete using {method}")
210
 
211
+ @spaces.GPU
212
  def get_tf_idf_clusters(self, top_n=2):
213
  """
214
  Names clusters using the most frequent terms based on TF-IDF analysis.
 
390
  # Update string labels to reflect merged labels
391
  updated_string_labels = [cluster_terms[label] for label in updated_cluster_labels]
392
  return updated_string_labels
393
+
394
+ @spaces.GPU
395
  def cluster_cosine(self, cluster_terms, cluster_labels, similarity_threshold):
396
  from sklearn.metrics.pairwise import cosine_similarity
397
 
 
654
 
655
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
656
 
657
+ @spaces.GPU
658
  def analyze_and_predict(data, analyzers, col_names):
659
  """
660
  Performs analysis on the data using provided analyzers and makes predictions on specified columns.
 
682
  logging.error(f"Error processing {col}: {e}")
683
  return new_data
684
 
685
+
686
+ @spaces.GPU
687
  def train_xgboost(x_train, y_train, x_test, y_test, num_classes):
688
  """
689
  Trains an XGBoost model and evaluates its performance.
 
795
  class UAPVisualizer:
796
  def __init__(self, data=None):
797
  pass # Initialization can be added if needed
798
+
799
  def analyze_and_predict(self, data, analyzers, col_names):
800
  new_data = pd.DataFrame()
801
  for i, (column, analyzer) in enumerate(zip(col_names, analyzers)):