Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -32,53 +32,6 @@ def create_embeddings(text):
|
|
32 |
print("Embeddings created successfully.")
|
33 |
return embeddings, sentences
|
34 |
|
35 |
-
def generate_plot_bokeh(query, pdf_file):
|
36 |
-
logging.info("Generating plot.")
|
37 |
-
# Generate embeddings for the query
|
38 |
-
query_embedding = model.encode([query])[0]
|
39 |
-
|
40 |
-
# Process the PDF and create embeddings
|
41 |
-
text = process_pdf(pdf_file.name)
|
42 |
-
embeddings, sentences = create_embeddings(text)
|
43 |
-
|
44 |
-
logging.info("Data prepared for UMAP.")
|
45 |
-
# Prepare the data for UMAP and visualization
|
46 |
-
all_embeddings = np.vstack([embeddings, query_embedding])
|
47 |
-
all_sentences = sentences + [query]
|
48 |
-
|
49 |
-
# UMAP transformation
|
50 |
-
umap_transform = umap.UMAP(n_neighbors=15, min_dist=0.0, n_components=2, random_state=42)
|
51 |
-
umap_embeddings = umap_transform.fit_transform(all_embeddings)
|
52 |
-
|
53 |
-
logging.info("UMAP transformation completed.")
|
54 |
-
# Find the closest sentences to the query
|
55 |
-
distances = cosine_similarity([query_embedding], embeddings)[0]
|
56 |
-
closest_indices = distances.argsort()[-5:][::-1] # Adjust the number as needed
|
57 |
-
|
58 |
-
# Prepare data for plotting
|
59 |
-
data = {
|
60 |
-
'x': umap_embeddings[:-1, 0], # Exclude the query point itself
|
61 |
-
'y': umap_embeddings[:-1, 1], # Exclude the query point itself
|
62 |
-
'content': all_sentences[:-1], # Exclude the query sentence itself
|
63 |
-
'color': ['red' if i in closest_indices else 'blue' for i in range(len(sentences))],
|
64 |
-
}
|
65 |
-
source = ColumnDataSource(data)
|
66 |
-
|
67 |
-
# Create the Bokeh plot
|
68 |
-
p = figure(title="UMAP Projection of Sentences", width=700, height=700)
|
69 |
-
p.scatter('x', 'y', color='color', source=source)
|
70 |
-
|
71 |
-
hover = HoverTool(tooltips=[("Content", "@content")])
|
72 |
-
p.add_tools(hover)
|
73 |
-
|
74 |
-
logging.info("Plot created successfully.")
|
75 |
-
# Save the plot to an HTML file
|
76 |
-
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".html")
|
77 |
-
logging.info(f"temp file is {temp_file.name}")
|
78 |
-
output_file(temp_file.name)
|
79 |
-
save(p)
|
80 |
-
logging.info("Plot saved to file.")
|
81 |
-
return temp_file.name
|
82 |
import plotly.express as px
|
83 |
import plotly.graph_objects as go
|
84 |
|
@@ -106,16 +59,22 @@ def generate_plotly_figure(query, pdf_file):
|
|
106 |
closest_indices = distances.argsort()[-5:][::-1] # Adjust the number as needed
|
107 |
|
108 |
# Prepare data for plotting
|
109 |
-
colors = ['
|
110 |
-
|
111 |
-
|
112 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
|
114 |
fig.update_layout(title="UMAP Projection of Sentences", xaxis_title="UMAP 1", yaxis_title="UMAP 2")
|
115 |
|
116 |
logging.info("Plotly figure created successfully.")
|
117 |
return fig
|
118 |
-
|
119 |
def gradio_interface(pdf_file, query):
|
120 |
logging.info("Gradio interface called.")
|
121 |
fig = generate_plotly_figure(query, pdf_file)
|
|
|
32 |
print("Embeddings created successfully.")
|
33 |
return embeddings, sentences
|
34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
import plotly.express as px
|
36 |
import plotly.graph_objects as go
|
37 |
|
|
|
59 |
closest_indices = distances.argsort()[-5:][::-1] # Adjust the number as needed
|
60 |
|
61 |
# Prepare data for plotting
|
62 |
+
colors = ['green' if i in closest_indices else 'blue' for i in range(len(sentences))] # Target points in green
|
63 |
+
colors.append('red') # Query point in red
|
64 |
+
|
65 |
+
# Add the scatter plot for sentences and query
|
66 |
+
fig = go.Figure(data=go.Scatter(x=umap_embeddings[:-1, 0], y=umap_embeddings[:-1, 1], mode='markers',
|
67 |
+
marker=dict(color=colors[:-1]), text=all_sentences[:-1],
|
68 |
+
name='Sentences'))
|
69 |
+
|
70 |
+
# Add the scatter plot for the query point
|
71 |
+
fig.add_trace(go.Scatter(x=[umap_embeddings[-1, 0]], y=[umap_embeddings[-1, 1]], mode='markers',
|
72 |
+
marker=dict(color='red'), text=[query], name='Query'))
|
73 |
|
74 |
fig.update_layout(title="UMAP Projection of Sentences", xaxis_title="UMAP 1", yaxis_title="UMAP 2")
|
75 |
|
76 |
logging.info("Plotly figure created successfully.")
|
77 |
return fig
|
|
|
78 |
def gradio_interface(pdf_file, query):
|
79 |
logging.info("Gradio interface called.")
|
80 |
fig = generate_plotly_figure(query, pdf_file)
|