lambdaofgod commited on
Commit
28aae69
·
1 Parent(s): 524b123

added dependency graph visualizations

Browse files
Files changed (2) hide show
  1. app.py +26 -7
  2. graph_visualizations.py +522 -0
app.py CHANGED
@@ -5,7 +5,8 @@ import re
5
  from task_visualizations import TaskVisualizations
6
  import plotly.graph_objects as go
7
  from functools import partial
8
- from text_visualization import WordCloudExtractor, EmbeddingVisualizer
 
9
 
10
  logging.basicConfig(level=logging.INFO)
11
 
@@ -56,7 +57,15 @@ def setup_repository_representations_tab(repos, representation_types):
56
 
57
  wordcloud_dict = get_representation_wordclouds(representation_types, repos_df)
58
  gr.Markdown("## Wordclouds")
59
- gr.Gallery([(wordcloud, representation_type) for representation_type, wordcloud in wordcloud_dict.items()], columns=[3], rows=[4], height=300)
 
 
 
 
 
 
 
 
60
 
61
  gr.Markdown("Select a repository and two representation types to compare them.")
62
  with gr.Row():
@@ -115,8 +124,11 @@ def load_embeddings_intro_description():
115
  In the first scatterplot we display PapersWithCode tasks that are colored by area.
116
  """
117
 
 
118
  def load_embeddings_description():
119
  return
 
 
120
  ## main
121
  repos_df = load_repo_df(AppConfig.repo_representations_path)
122
  repos = list(repos_df["repo_name"].unique())
@@ -135,8 +147,7 @@ embedding_visualizer = EmbeddingVisualizer(display_df=display_df)
135
 
136
  descriptions = {
137
  "intro": load_embeddings_intro_description(),
138
-
139
- "Basic representations": """Now we show the embeddings of tasks and repos, using various texts or representations.
140
 
141
  The fact that selected code and/or dependency signatures (containing mostly repo's file names) are dissimilar from task names
142
  should not be surprising. For our problem this illustrates the fact that these representations work poorly for retrieval.
@@ -151,10 +162,12 @@ descriptions = {
151
 
152
  Below we can also see embeddings of task names with MPNet after dimensionality reduction with UMAP.
153
  MPNet, a sentence-transformer model, the embeddings visibly separate tasks by area.
154
- """
155
  }
156
 
157
  with gr.Blocks() as demo:
 
 
158
  with gr.Tab("Explore Repository Embeddings"):
159
 
160
  tab_elems = [
@@ -163,8 +176,14 @@ with gr.Blocks() as demo:
163
  gr.Plot(embedding_visualizer.make_task_area_scatterplot()),
164
  ]
165
 
166
- embedding_plots = embedding_visualizer.make_embedding_plots(color_col="representation")
167
- for plot_name in ["Basic representations", "Dependency graph based representations", "READMEs"]:
 
 
 
 
 
 
168
  tab_elems.append(gr.Markdown(f"## {plot_name}"))
169
  if descriptions.get(plot_name):
170
  tab_elems.append(gr.Markdown(descriptions[plot_name]))
 
5
  from task_visualizations import TaskVisualizations
6
  import plotly.graph_objects as go
7
  from functools import partial
8
+ from text_visualization import WordCloudExtractor, EmbeddingVisualizer
9
+ from graph_visualizations import graph_tab
10
 
11
  logging.basicConfig(level=logging.INFO)
12
 
 
57
 
58
  wordcloud_dict = get_representation_wordclouds(representation_types, repos_df)
59
  gr.Markdown("## Wordclouds")
60
+ gr.Gallery(
61
+ [
62
+ (wordcloud, representation_type)
63
+ for representation_type, wordcloud in wordcloud_dict.items()
64
+ ],
65
+ columns=[3],
66
+ rows=[4],
67
+ height=300,
68
+ )
69
 
70
  gr.Markdown("Select a repository and two representation types to compare them.")
71
  with gr.Row():
 
124
  In the first scatterplot we display PapersWithCode tasks that are colored by area.
125
  """
126
 
127
+
128
  def load_embeddings_description():
129
  return
130
+
131
+
132
  ## main
133
  repos_df = load_repo_df(AppConfig.repo_representations_path)
134
  repos = list(repos_df["repo_name"].unique())
 
147
 
148
  descriptions = {
149
  "intro": load_embeddings_intro_description(),
150
+ "Basic representations": """Now we show the embeddings of tasks and repos, using various texts or representations.
 
151
 
152
  The fact that selected code and/or dependency signatures (containing mostly repo's file names) are dissimilar from task names
153
  should not be surprising. For our problem this illustrates the fact that these representations work poorly for retrieval.
 
162
 
163
  Below we can also see embeddings of task names with MPNet after dimensionality reduction with UMAP.
164
  MPNet, a sentence-transformer model, the embeddings visibly separate tasks by area.
165
+ """,
166
  }
167
 
168
  with gr.Blocks() as demo:
169
+ with gr.Tab("Explore Dependency Graphs"):
170
+ graph_tab()
171
  with gr.Tab("Explore Repository Embeddings"):
172
 
173
  tab_elems = [
 
176
  gr.Plot(embedding_visualizer.make_task_area_scatterplot()),
177
  ]
178
 
179
+ embedding_plots = embedding_visualizer.make_embedding_plots(
180
+ color_col="representation"
181
+ )
182
+ for plot_name in [
183
+ "Basic representations",
184
+ "Dependency graph based representations",
185
+ "READMEs",
186
+ ]:
187
  tab_elems.append(gr.Markdown(f"## {plot_name}"))
188
  if descriptions.get(plot_name):
189
  tab_elems.append(gr.Markdown(descriptions[plot_name]))
graph_visualizations.py ADDED
@@ -0,0 +1,522 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import networkx as nx
4
+ import tqdm
5
+ import plotly.graph_objects as go
6
+ import plotly.express as px
7
+ from datasets import load_dataset
8
+ import pandas as pd
9
+
10
+
11
+ def load_graph_from_edge_df(
12
+ repo_name: str,
13
+ edge_df: pd.DataFrame,
14
+ ) -> nx.DiGraph:
15
+ """
16
+ Create a NetworkX directed graph from the dependency edge DataFrame.
17
+ Uses all edge types for centrality calculation.
18
+
19
+ Args:
20
+ repo_name: Name of the repository to filter by
21
+ edge_df: DataFrame with columns [repo_name, target, source, edge_type]
22
+
23
+ Returns:
24
+ NetworkX DiGraph with edges and edge attributes
25
+ """
26
+ G = nx.DiGraph()
27
+ repo_edge_df = edge_df[edge_df["repo_name"] == repo_name]
28
+
29
+ # Add edges with attributes (all edge types for accurate centrality)
30
+ for _, row in repo_edge_df.iterrows():
31
+ source = row["source"]
32
+ target = row["target"]
33
+ edge_type = row["edge_type"]
34
+
35
+ # Add edge with attributes
36
+ G.add_edge(source, target, edge_type=edge_type, repo_name=repo_name)
37
+
38
+ return G
39
+
40
+
41
+ def init_graphs():
42
+ """Initialize graphs from dependency data on startup"""
43
+ print("Loading dependency data from HuggingFace Hub...")
44
+ dataset = load_dataset(
45
+ "lambdaofgod/pwc_github_search",
46
+ data_files="sample_repo_dependency_records.parquet",
47
+ )
48
+ graph_dependencies_df = dataset["train"].to_pandas()
49
+
50
+ repos = graph_dependencies_df["repo_name"].unique()
51
+
52
+ graphs = dict()
53
+ print(f"Loading {len(repos)} graphs...")
54
+ for repo_name in tqdm.tqdm(repos):
55
+ graph = load_graph_from_edge_df(repo_name, graph_dependencies_df)
56
+ graphs[repo_name] = graph
57
+
58
+ print("Graphs loaded successfully!")
59
+ return graphs
60
+
61
+
62
+ def get_node_type(node, graph):
63
+ """Determine node type based on edge relationships"""
64
+ node_str = str(node)
65
+
66
+ # Check if it's a repository (has '/' and is source of repo-file edges)
67
+ if "/" in node_str:
68
+ for _, _, data in graph.edges(node, data=True):
69
+ if data.get("edge_type") == "repo-file":
70
+ return "repository"
71
+
72
+ # Check if it's a file (target of repo-file edges or source of file-* edges)
73
+ if ".py" in node_str:
74
+ # Check if it's target of repo-file edge
75
+ for source, target, data in graph.edges(data=True):
76
+ if target == node and data.get("edge_type") == "repo-file":
77
+ return "file"
78
+ # Check if it's source of file-* edges
79
+ for _, _, data in graph.edges(node, data=True):
80
+ edge_type = data.get("edge_type", "")
81
+ if edge_type.startswith("file-"):
82
+ return "file"
83
+
84
+ # Check if it's an import (target of file-import or source/target of import-import)
85
+ for source, target, data in graph.edges(data=True):
86
+ edge_type = data.get("edge_type", "")
87
+ if (target == node and edge_type == "file-import") or (
88
+ edge_type == "import-import" and (source == node or target == node)
89
+ ):
90
+ return "import"
91
+
92
+ # Check if it's a class (target of file-class edges or source of class-method/inheritance)
93
+ for source, target, data in graph.edges(data=True):
94
+ edge_type = data.get("edge_type", "")
95
+ if target == node and edge_type == "file-class":
96
+ return "class"
97
+ if source == node and edge_type in ["class-method", "inheritance"]:
98
+ return "class"
99
+
100
+ # Check if it's a function (target of file-function or function-function edges)
101
+ for source, target, data in graph.edges(data=True):
102
+ edge_type = data.get("edge_type", "")
103
+ if target == node and edge_type == "file-function":
104
+ return "function"
105
+ if edge_type == "function-function" and (source == node or target == node):
106
+ return "function"
107
+
108
+ # Check if it's a method (target of class-method edges)
109
+ for source, target, data in graph.edges(data=True):
110
+ if target == node and data.get("edge_type") == "class-method":
111
+ return "method"
112
+
113
+ # Default fallback
114
+ return "unknown"
115
+
116
+
117
+ def create_interactive_plotly_graph(
118
+ repo_name, graph, layout_type="spring", selected_edge_types=None
119
+ ):
120
+ """Create an interactive Plotly graph with node names and edge types"""
121
+ if selected_edge_types is None:
122
+ selected_edge_types = set()
123
+ # Get node positions using selected layout
124
+ if layout_type == "spring":
125
+ pos = nx.spring_layout(graph, k=1, iterations=100)
126
+ elif layout_type == "circular":
127
+ pos = nx.circular_layout(graph)
128
+ elif layout_type == "kamada_kawai":
129
+ pos = nx.kamada_kawai_layout(graph)
130
+ elif layout_type == "fruchterman_reingold":
131
+ pos = nx.fruchterman_reingold_layout(graph, k=1, iterations=100)
132
+ elif layout_type == "shell":
133
+ pos = nx.shell_layout(graph)
134
+ elif layout_type == "spectral":
135
+ pos = nx.spectral_layout(graph)
136
+ elif layout_type == "planar":
137
+ try:
138
+ pos = nx.planar_layout(graph)
139
+ except nx.NetworkXException:
140
+ # Fallback to spring layout if graph is not planar
141
+ pos = nx.spring_layout(graph, k=1, iterations=50)
142
+ else:
143
+ pos = nx.spring_layout(graph, k=1, iterations=50)
144
+
145
+ # Filter edges based on selected edge types
146
+ filtered_edges = []
147
+ for edge in graph.edges(data=True):
148
+ edge_type = edge[2].get("edge_type", "unknown")
149
+ if not selected_edge_types or edge_type in selected_edge_types:
150
+ filtered_edges.append(edge)
151
+
152
+ # Extract edges with their data
153
+ edge_x = []
154
+ edge_y = []
155
+ edge_info = []
156
+
157
+ for edge in filtered_edges:
158
+ x0, y0 = pos[edge[0]]
159
+ x1, y1 = pos[edge[1]]
160
+ edge_x.extend([x0, x1, None])
161
+ edge_y.extend([y0, y1, None])
162
+
163
+ # Extract edge type from edge data
164
+ edge_type = edge[2].get("edge_type", "unknown")
165
+ edge_info.append(f"{edge[0]} → {edge[1]}<br>Type: {edge_type}")
166
+
167
+ # Create edge trace
168
+ edge_trace = go.Scatter(
169
+ x=edge_x,
170
+ y=edge_y,
171
+ line=dict(width=1, color="#888"),
172
+ hoverinfo="none",
173
+ mode="lines",
174
+ name="Edges",
175
+ )
176
+
177
+ # Define color scheme for node types
178
+ node_type_colors = {
179
+ "repository": "#FF6B6B", # Red
180
+ "file": "#4ECDC4", # Teal
181
+ "class": "#45B7D1", # Blue
182
+ "function": "#96CEB4", # Green
183
+ "method": "#FFEAA7", # Yellow
184
+ "import": "#FF9F43", # Orange
185
+ "unknown": "#DDA0DD", # Plum
186
+ }
187
+
188
+ # Get nodes that are connected by filtered edges
189
+ connected_nodes = set()
190
+ for edge in filtered_edges:
191
+ connected_nodes.add(edge[0])
192
+ connected_nodes.add(edge[1])
193
+
194
+ # If no edges are selected, show all nodes
195
+ if not selected_edge_types:
196
+ connected_nodes = set(graph.nodes())
197
+
198
+ # Calculate degree statistics for opacity normalization
199
+ degrees = [graph.degree(node) for node in connected_nodes]
200
+ min_degree = min(degrees) if degrees else 0
201
+ max_degree = max(degrees) if degrees else 1
202
+ degree_range = max_degree - min_degree if max_degree > min_degree else 1
203
+
204
+ # Extract node information
205
+ node_x = []
206
+ node_y = []
207
+ node_text = []
208
+ node_info = []
209
+ node_colors = []
210
+ node_types = []
211
+ node_sizes = []
212
+ node_opacities = []
213
+
214
+ for node in connected_nodes:
215
+ x, y = pos[node]
216
+ node_x.append(x)
217
+ node_y.append(y)
218
+
219
+ # Determine node type
220
+ node_type = get_node_type(node, graph)
221
+ node_types.append(node_type)
222
+
223
+ # Calculate node size based on degree
224
+ degree = graph.degree(node)
225
+ # Scale size between 8 and 25 based on degree
226
+ size = max(8, min(25, 8 + degree * 1.5))
227
+ node_sizes.append(size)
228
+
229
+ # Calculate opacity based on normalized degree (0.3 to 1.0)
230
+ normalized_degree = (degree - min_degree) / degree_range
231
+ opacity = 0.3 + (normalized_degree * 0.7) # Range from 0.3 to 1.0
232
+ node_opacities.append(opacity)
233
+
234
+ # Truncate long node names for display
235
+ display_name = str(node)
236
+ if len(display_name) > 30:
237
+ display_name = display_name[:27] + "..."
238
+
239
+ node_text.append(display_name)
240
+ node_info.append(
241
+ f"Node: {node}<br>Type: {node_type}<br>Degree: {graph.degree(node)}"
242
+ )
243
+
244
+ # Color nodes by type
245
+ node_colors.append(node_type_colors.get(node_type, node_type_colors["unknown"]))
246
+
247
+ # Create node trace
248
+ node_trace = go.Scatter(
249
+ x=node_x,
250
+ y=node_y,
251
+ mode="markers+text",
252
+ hoverinfo="text",
253
+ hovertext=node_info,
254
+ text=node_text,
255
+ textposition="middle center",
256
+ textfont=dict(size=8, color="rgba(0,0,0,0.6)"), # Semi-transparent text
257
+ marker=dict(
258
+ size=node_sizes,
259
+ color=node_colors,
260
+ line=dict(width=1, color="black"),
261
+ opacity=node_opacities, # Variable opacity based on degree
262
+ ),
263
+ name="Nodes",
264
+ )
265
+
266
+ # Create the figure
267
+ fig = go.Figure(data=[edge_trace, node_trace])
268
+
269
+ fig.update_layout(
270
+ title=dict(
271
+ text=f"Interactive Dependency Graph: {repo_name}", font=dict(size=16)
272
+ ),
273
+ showlegend=True,
274
+ hovermode="closest",
275
+ margin=dict(b=20, l=5, r=5, t=40),
276
+ annotations=[
277
+ dict(
278
+ text="Hover over nodes for details. Zoom and pan to explore.",
279
+ showarrow=False,
280
+ xref="paper",
281
+ yref="paper",
282
+ x=0.005,
283
+ y=-0.002,
284
+ )
285
+ ],
286
+ xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
287
+ yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
288
+ plot_bgcolor="white",
289
+ )
290
+
291
+ return fig
292
+
293
+
294
+ def get_available_edge_types(graph):
295
+ """Get all unique edge types in the graph"""
296
+ edge_types = set()
297
+ for _, _, data in graph.edges(data=True):
298
+ edge_type = data.get("edge_type", "unknown")
299
+ edge_types.add(edge_type)
300
+
301
+ # Define preferred order
302
+ preferred_order = [
303
+ "repo-file",
304
+ "file-class",
305
+ "file-import",
306
+ "inheritance",
307
+ "import-import",
308
+ "file-function",
309
+ "class-method",
310
+ "function-function",
311
+ ]
312
+
313
+ # Sort edge types according to preferred order, then alphabetically for any others
314
+ ordered_types = []
315
+ for edge_type in preferred_order:
316
+ if edge_type in edge_types:
317
+ ordered_types.append(edge_type)
318
+ edge_types.remove(edge_type)
319
+
320
+ # Add any remaining edge types alphabetically
321
+ ordered_types.extend(sorted(list(edge_types)))
322
+
323
+ return ordered_types
324
+
325
+
326
+ def visualize_graph(
327
+ repo_name, graphs_dict, layout_type="spring", selected_edge_types=None
328
+ ):
329
+ """Visualize the selected repository's graph"""
330
+ if repo_name not in graphs_dict:
331
+ return None, f"Repository '{repo_name}' not found in loaded graphs."
332
+
333
+ if repo_name is None:
334
+ return None, "Please select a repository."
335
+
336
+ graph = graphs_dict[repo_name]
337
+
338
+ # Create interactive Plotly graph
339
+ fig = create_interactive_plotly_graph(
340
+ repo_name, graph, layout_type, selected_edge_types
341
+ )
342
+
343
+ # Generate statistics for filtered graph
344
+ edge_types = {}
345
+ filtered_edge_count = 0
346
+ for _, _, data in graph.edges(data=True):
347
+ edge_type = data.get("edge_type", "unknown")
348
+ if not selected_edge_types or edge_type in selected_edge_types:
349
+ edge_types[edge_type] = edge_types.get(edge_type, 0) + 1
350
+ filtered_edge_count += 1
351
+
352
+ edge_type_summary = "\n".join(
353
+ [f" {edge_type}: {count}" for edge_type, count in edge_types.items()]
354
+ )
355
+
356
+ # Generate node type statistics for visible nodes
357
+ if selected_edge_types:
358
+ # Get nodes connected by filtered edges
359
+ connected_nodes = set()
360
+ for source, target, data in graph.edges(data=True):
361
+ edge_type = data.get("edge_type", "unknown")
362
+ if edge_type in selected_edge_types:
363
+ connected_nodes.add(source)
364
+ connected_nodes.add(target)
365
+ else:
366
+ connected_nodes = set(graph.nodes())
367
+
368
+ node_types = {}
369
+ for node in connected_nodes:
370
+ node_type = get_node_type(node, graph)
371
+ node_types[node_type] = node_types.get(node_type, 0) + 1
372
+
373
+ node_type_summary = "\n".join(
374
+ [f" {node_type}: {count}" for node_type, count in node_types.items()]
375
+ )
376
+
377
+ stats = f"""Repository: {repo_name}
378
+ Visible nodes: {len(connected_nodes)} / {graph.number_of_nodes()}
379
+ Visible edges: {filtered_edge_count} / {graph.number_of_edges()}
380
+
381
+ Visible node types:
382
+ {node_type_summary}
383
+
384
+ Visible edge types:
385
+ {edge_type_summary}
386
+ """
387
+
388
+ return fig, stats
389
+
390
+
391
+ def graph_tab():
392
+ gr.Markdown("# Dependency Graph Visualization")
393
+ gr.Markdown("Select a repository to visualize its dependency graph.")
394
+ graphs_dict = init_graphs()
395
+ repo_names = list(graphs_dict.keys())
396
+
397
+ def plot_selected_repo(repo_name, layout_type, *edge_type_checkboxes):
398
+ # Convert checkbox values to selected edge types
399
+ edge_types = (
400
+ get_available_edge_types(graphs_dict[repo_name])
401
+ if repo_name in graphs_dict
402
+ else []
403
+ )
404
+ selected_edge_types = set()
405
+ for i, is_selected in enumerate(edge_type_checkboxes):
406
+ if is_selected and i < len(edge_types):
407
+ selected_edge_types.add(edge_types[i])
408
+
409
+ fig, stats = visualize_graph(
410
+ repo_name, graphs_dict, layout_type, selected_edge_types
411
+ )
412
+ return fig, stats
413
+
414
+ def update_edge_checkboxes(repo_name):
415
+ """Update edge type checkboxes when repository changes"""
416
+ if repo_name not in graphs_dict:
417
+ return [gr.Checkbox(visible=False)] * 8
418
+
419
+ edge_types = get_available_edge_types(graphs_dict[repo_name])
420
+ checkboxes = []
421
+
422
+ # Create checkboxes for each edge type (up to 8)
423
+ for i in range(8):
424
+ if i < len(edge_types):
425
+ edge_type = edge_types[i]
426
+ # function-function should be unchecked by default
427
+ default_value = edge_type != "function-function"
428
+ checkboxes.append(
429
+ gr.Checkbox(label=edge_type, value=default_value, visible=True)
430
+ )
431
+ else:
432
+ checkboxes.append(gr.Checkbox(visible=False))
433
+
434
+ return checkboxes
435
+
436
+ # Get initial edge types for the first repository
437
+ initial_edge_types = []
438
+ if repo_names:
439
+ initial_edge_types = get_available_edge_types(graphs_dict[repo_names[0]])
440
+
441
+ with gr.Row():
442
+ with gr.Column(scale=1):
443
+ repo_dropdown = gr.Dropdown(
444
+ choices=repo_names,
445
+ label="Select Repository",
446
+ value=repo_names[0] if repo_names else None,
447
+ )
448
+
449
+ layout_dropdown = gr.Dropdown(
450
+ choices=[
451
+ ("Spring Layout (Force-directed)", "spring"),
452
+ ("Circular Layout", "circular"),
453
+ ("Kamada-Kawai Layout", "kamada_kawai"),
454
+ ("Fruchterman-Reingold Layout", "fruchterman_reingold"),
455
+ ("Shell Layout", "shell"),
456
+ ("Spectral Layout", "spectral"),
457
+ ("Planar Layout", "planar"),
458
+ ],
459
+ label="Select Layout",
460
+ value="spring",
461
+ )
462
+
463
+ gr.Markdown("### Edge Type Filters")
464
+ gr.Markdown("Select which edge types to display:")
465
+
466
+ # Create checkboxes for edge types with initial values
467
+ edge_checkboxes = []
468
+ for i in range(8): # Support up to 8 edge types
469
+ if i < len(initial_edge_types):
470
+ checkbox = gr.Checkbox(
471
+ label=initial_edge_types[i], value=True, visible=True
472
+ )
473
+ else:
474
+ checkbox = gr.Checkbox(label=f"Edge Type {i+1}", visible=False)
475
+ edge_checkboxes.append(checkbox)
476
+
477
+ visualize_btn = gr.Button("Visualize Graph", variant="primary")
478
+
479
+ stats_text = gr.Textbox(
480
+ label="Graph Statistics", lines=6, interactive=False
481
+ )
482
+
483
+ with gr.Column(scale=2):
484
+ graph_plot = gr.Plot(label="Interactive Dependency Graph")
485
+
486
+ # Set up event handlers
487
+ all_inputs = [repo_dropdown, layout_dropdown] + edge_checkboxes
488
+
489
+ visualize_btn.click(
490
+ fn=plot_selected_repo,
491
+ inputs=all_inputs,
492
+ outputs=[graph_plot, stats_text],
493
+ )
494
+
495
+ # Update checkboxes when repository changes
496
+ repo_dropdown.change(
497
+ fn=update_edge_checkboxes,
498
+ inputs=[repo_dropdown],
499
+ outputs=edge_checkboxes,
500
+ )
501
+
502
+ # Auto-visualize on dropdown change
503
+ repo_dropdown.change(
504
+ fn=plot_selected_repo,
505
+ inputs=all_inputs,
506
+ outputs=[graph_plot, stats_text],
507
+ )
508
+
509
+ # Auto-visualize on layout change
510
+ layout_dropdown.change(
511
+ fn=plot_selected_repo,
512
+ inputs=all_inputs,
513
+ outputs=[graph_plot, stats_text],
514
+ )
515
+
516
+ # Auto-visualize on checkbox changes
517
+ for checkbox in edge_checkboxes:
518
+ checkbox.change(
519
+ fn=plot_selected_repo,
520
+ inputs=all_inputs,
521
+ outputs=[graph_plot, stats_text],
522
+ )