Commit
·
c752e68
1
Parent(s):
a9b2499
updated embeddings
Browse files- app.py +4 -1
- data/selected_repos_representations_umap2d.parquet +0 -3
- gradio_tabs.py +8 -3
- text_visualization.py +3 -1
app.py
CHANGED
|
@@ -50,7 +50,10 @@ task_visualizations = TaskVisualizations(
|
|
| 50 |
AppConfig.selected_task_counts_path,
|
| 51 |
AppConfig.tasks_path,
|
| 52 |
)
|
| 53 |
-
display_df =
|
|
|
|
|
|
|
|
|
|
| 54 |
display_df["is_task"] = display_df["representation"] == "task"
|
| 55 |
embedding_visualizer = EmbeddingVisualizer(display_df=display_df)
|
| 56 |
|
|
|
|
| 50 |
AppConfig.selected_task_counts_path,
|
| 51 |
AppConfig.tasks_path,
|
| 52 |
)
|
| 53 |
+
display_df = datasets.load_dataset(
|
| 54 |
+
"lambdaofgod/pwc_github_search",
|
| 55 |
+
data_files="selected_repos_representations_umap2d.parquet",
|
| 56 |
+
)["train"].to_pandas()
|
| 57 |
display_df["is_task"] = display_df["representation"] == "task"
|
| 58 |
embedding_visualizer = EmbeddingVisualizer(display_df=display_df)
|
| 59 |
|
data/selected_repos_representations_umap2d.parquet
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:26f519620fb265574be6034ed18419b58fa7d345d17b9dc180a938ef3f37ecc8
|
| 3 |
-
size 18983840
|
|
|
|
|
|
|
|
|
|
|
|
gradio_tabs.py
CHANGED
|
@@ -35,10 +35,9 @@ def load_embeddings_description():
|
|
| 35 |
return
|
| 36 |
|
| 37 |
|
| 38 |
-
def
|
| 39 |
-
|
| 40 |
wordcloud_dict = get_representation_wordclouds(representation_types, repos_df)
|
| 41 |
-
gr.Markdown("
|
| 42 |
gr.Gallery(
|
| 43 |
[
|
| 44 |
(wordcloud, representation_type)
|
|
@@ -49,6 +48,10 @@ def setup_repository_representations_tab(repos_df, repos, representation_types):
|
|
| 49 |
height=300,
|
| 50 |
)
|
| 51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
gr.Markdown("Select a repository and two representation types to compare them.")
|
| 53 |
with gr.Row():
|
| 54 |
repo = gr.Dropdown(choices=repos, label="Repository", value=repos[0])
|
|
@@ -100,6 +103,8 @@ def setup_repository_representations_tab(repos_df, repos, representation_types):
|
|
| 100 |
outputs=[displayed_tasks, text1, text2],
|
| 101 |
)
|
| 102 |
|
|
|
|
|
|
|
| 103 |
|
| 104 |
def setup_tasks_tab(descriptions, task_visualizations):
|
| 105 |
|
|
|
|
| 35 |
return
|
| 36 |
|
| 37 |
|
| 38 |
+
def display_wordclouds(representation_types, repos_df):
|
|
|
|
| 39 |
wordcloud_dict = get_representation_wordclouds(representation_types, repos_df)
|
| 40 |
+
gr.Markdown("# Wordclouds")
|
| 41 |
gr.Gallery(
|
| 42 |
[
|
| 43 |
(wordcloud, representation_type)
|
|
|
|
| 48 |
height=300,
|
| 49 |
)
|
| 50 |
|
| 51 |
+
|
| 52 |
+
def setup_repository_representations_tab(repos_df, repos, representation_types):
|
| 53 |
+
|
| 54 |
+
gr.Markdown("# Comparing repository representations")
|
| 55 |
gr.Markdown("Select a repository and two representation types to compare them.")
|
| 56 |
with gr.Row():
|
| 57 |
repo = gr.Dropdown(choices=repos, label="Repository", value=repos[0])
|
|
|
|
| 103 |
outputs=[displayed_tasks, text1, text2],
|
| 104 |
)
|
| 105 |
|
| 106 |
+
display_wordclouds(representation_types, repos_df)
|
| 107 |
+
|
| 108 |
|
| 109 |
def setup_tasks_tab(descriptions, task_visualizations):
|
| 110 |
|
text_visualization.py
CHANGED
|
@@ -84,7 +84,9 @@ class EmbeddingVisualizer(BaseModel):
|
|
| 84 |
|
| 85 |
display_df = display_df.sort_values("representation", ascending=False)
|
| 86 |
readme_df = display_df[
|
| 87 |
-
display_df["representation"].isin(
|
|
|
|
|
|
|
| 88 |
]
|
| 89 |
raw_df = display_df[
|
| 90 |
display_df["representation"].isin(
|
|
|
|
| 84 |
|
| 85 |
display_df = display_df.sort_values("representation", ascending=False)
|
| 86 |
readme_df = display_df[
|
| 87 |
+
display_df["representation"].isin(
|
| 88 |
+
["readme", "code2doc_generated_readme", "task"]
|
| 89 |
+
)
|
| 90 |
]
|
| 91 |
raw_df = display_df[
|
| 92 |
display_df["representation"].isin(
|