umarigan commited on
Commit
97c72c9
·
verified ·
1 Parent(s): 864b189

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -52
app.py CHANGED
@@ -32,53 +32,6 @@ def create_embeddings(text):
32
  print("Embeddings created successfully.")
33
  return embeddings, sentences
34
 
35
- def generate_plot_bokeh(query, pdf_file):
36
- logging.info("Generating plot.")
37
- # Generate embeddings for the query
38
- query_embedding = model.encode([query])[0]
39
-
40
- # Process the PDF and create embeddings
41
- text = process_pdf(pdf_file.name)
42
- embeddings, sentences = create_embeddings(text)
43
-
44
- logging.info("Data prepared for UMAP.")
45
- # Prepare the data for UMAP and visualization
46
- all_embeddings = np.vstack([embeddings, query_embedding])
47
- all_sentences = sentences + [query]
48
-
49
- # UMAP transformation
50
- umap_transform = umap.UMAP(n_neighbors=15, min_dist=0.0, n_components=2, random_state=42)
51
- umap_embeddings = umap_transform.fit_transform(all_embeddings)
52
-
53
- logging.info("UMAP transformation completed.")
54
- # Find the closest sentences to the query
55
- distances = cosine_similarity([query_embedding], embeddings)[0]
56
- closest_indices = distances.argsort()[-5:][::-1] # Adjust the number as needed
57
-
58
- # Prepare data for plotting
59
- data = {
60
- 'x': umap_embeddings[:-1, 0], # Exclude the query point itself
61
- 'y': umap_embeddings[:-1, 1], # Exclude the query point itself
62
- 'content': all_sentences[:-1], # Exclude the query sentence itself
63
- 'color': ['red' if i in closest_indices else 'blue' for i in range(len(sentences))],
64
- }
65
- source = ColumnDataSource(data)
66
-
67
- # Create the Bokeh plot
68
- p = figure(title="UMAP Projection of Sentences", width=700, height=700)
69
- p.scatter('x', 'y', color='color', source=source)
70
-
71
- hover = HoverTool(tooltips=[("Content", "@content")])
72
- p.add_tools(hover)
73
-
74
- logging.info("Plot created successfully.")
75
- # Save the plot to an HTML file
76
- temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".html")
77
- logging.info(f"temp file is {temp_file.name}")
78
- output_file(temp_file.name)
79
- save(p)
80
- logging.info("Plot saved to file.")
81
- return temp_file.name
82
  import plotly.express as px
83
  import plotly.graph_objects as go
84
 
@@ -106,16 +59,22 @@ def generate_plotly_figure(query, pdf_file):
106
  closest_indices = distances.argsort()[-5:][::-1] # Adjust the number as needed
107
 
108
  # Prepare data for plotting
109
- colors = ['red' if i in closest_indices else 'blue' for i in range(len(sentences))]
110
- fig = go.Figure()
111
- fig.add_trace(go.Scatter(x=umap_embeddings[:-1, 0], y=umap_embeddings[:-1, 1], mode='markers',
112
- marker=dict(color=colors), text=all_sentences[:-1]))
 
 
 
 
 
 
 
113
 
114
  fig.update_layout(title="UMAP Projection of Sentences", xaxis_title="UMAP 1", yaxis_title="UMAP 2")
115
 
116
  logging.info("Plotly figure created successfully.")
117
  return fig
118
-
119
  def gradio_interface(pdf_file, query):
120
  logging.info("Gradio interface called.")
121
  fig = generate_plotly_figure(query, pdf_file)
 
32
  print("Embeddings created successfully.")
33
  return embeddings, sentences
34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  import plotly.express as px
36
  import plotly.graph_objects as go
37
 
 
59
  closest_indices = distances.argsort()[-5:][::-1] # Adjust the number as needed
60
 
61
  # Prepare data for plotting
62
+ colors = ['green' if i in closest_indices else 'blue' for i in range(len(sentences))] # Target points in green
63
+ colors.append('red') # Query point in red
64
+
65
+ # Add the scatter plot for sentences and query
66
+ fig = go.Figure(data=go.Scatter(x=umap_embeddings[:-1, 0], y=umap_embeddings[:-1, 1], mode='markers',
67
+ marker=dict(color=colors[:-1]), text=all_sentences[:-1],
68
+ name='Sentences'))
69
+
70
+ # Add the scatter plot for the query point
71
+ fig.add_trace(go.Scatter(x=[umap_embeddings[-1, 0]], y=[umap_embeddings[-1, 1]], mode='markers',
72
+ marker=dict(color='red'), text=[query], name='Query'))
73
 
74
  fig.update_layout(title="UMAP Projection of Sentences", xaxis_title="UMAP 1", yaxis_title="UMAP 2")
75
 
76
  logging.info("Plotly figure created successfully.")
77
  return fig
 
78
  def gradio_interface(pdf_file, query):
79
  logging.info("Gradio interface called.")
80
  fig = generate_plotly_figure(query, pdf_file)