Spaces:

lambdaofgod
/

github_search_visualizations

Sleeping

lambdaofgod commited on Jul 21, 2025

Commit

c752e68

1 Parent(s): a9b2499

updated embeddings

Files changed (4) hide show

app.py CHANGED Viewed

@@ -50,7 +50,10 @@ task_visualizations = TaskVisualizations(
     AppConfig.selected_task_counts_path,
     AppConfig.tasks_path,
 )
-display_df = pd.read_parquet("data/selected_repos_representations_umap2d.parquet")
 display_df["is_task"] = display_df["representation"] == "task"
 embedding_visualizer = EmbeddingVisualizer(display_df=display_df)

     AppConfig.selected_task_counts_path,
     AppConfig.tasks_path,
 )
+display_df = datasets.load_dataset(
+    "lambdaofgod/pwc_github_search",
+    data_files="selected_repos_representations_umap2d.parquet",
+)["train"].to_pandas()
 display_df["is_task"] = display_df["representation"] == "task"
 embedding_visualizer = EmbeddingVisualizer(display_df=display_df)

data/selected_repos_representations_umap2d.parquet DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:26f519620fb265574be6034ed18419b58fa7d345d17b9dc180a938ef3f37ecc8
-size 18983840

gradio_tabs.py CHANGED Viewed

@@ -35,10 +35,9 @@ def load_embeddings_description():
     return
-def setup_repository_representations_tab(repos_df, repos, representation_types):
     wordcloud_dict = get_representation_wordclouds(representation_types, repos_df)
-    gr.Markdown("## Wordclouds")
     gr.Gallery(
         [
             (wordcloud, representation_type)
@@ -49,6 +48,10 @@ def setup_repository_representations_tab(repos_df, repos, representation_types):
         height=300,
     )
     gr.Markdown("Select a repository and two representation types to compare them.")
     with gr.Row():
         repo = gr.Dropdown(choices=repos, label="Repository", value=repos[0])
@@ -100,6 +103,8 @@ def setup_repository_representations_tab(repos_df, repos, representation_types):
             outputs=[displayed_tasks, text1, text2],
         )
 def setup_tasks_tab(descriptions, task_visualizations):

     return
+def display_wordclouds(representation_types, repos_df):
     wordcloud_dict = get_representation_wordclouds(representation_types, repos_df)
+    gr.Markdown("# Wordclouds")
     gr.Gallery(
         [
             (wordcloud, representation_type)
         height=300,
     )
+def setup_repository_representations_tab(repos_df, repos, representation_types):
+    gr.Markdown("# Comparing repository representations")
     gr.Markdown("Select a repository and two representation types to compare them.")
     with gr.Row():
         repo = gr.Dropdown(choices=repos, label="Repository", value=repos[0])
             outputs=[displayed_tasks, text1, text2],
         )
+    display_wordclouds(representation_types, repos_df)
 def setup_tasks_tab(descriptions, task_visualizations):

text_visualization.py CHANGED Viewed

@@ -84,7 +84,9 @@ class EmbeddingVisualizer(BaseModel):
         display_df = display_df.sort_values("representation", ascending=False)
         readme_df = display_df[
-            display_df["representation"].isin(["readme", "generated_readme", "task"])
         ]
         raw_df = display_df[
             display_df["representation"].isin(

         display_df = display_df.sort_values("representation", ascending=False)
         readme_df = display_df[
+            display_df["representation"].isin(
+                ["readme", "code2doc_generated_readme", "task"]
+            )
         ]
         raw_df = display_df[
             display_df["representation"].isin(