Spaces:
Runtime error
Runtime error
debug
Browse files- .gitignore +2 -1
- lrt/lrt.py +43 -11
- widgets/body.py +3 -2
- widgets/sidebar.py +2 -2
.gitignore
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
venv
|
| 2 |
test.py
|
| 3 |
.config.json
|
| 4 |
-
__pycache__
|
|
|
|
|
|
| 1 |
venv
|
| 2 |
test.py
|
| 3 |
.config.json
|
| 4 |
+
__pycache__
|
| 5 |
+
.idea
|
lrt/lrt.py
CHANGED
|
@@ -3,6 +3,9 @@ from typing import List
|
|
| 3 |
import textdistance as td
|
| 4 |
from .utils import UnionFind, ArticleList
|
| 5 |
from .academic_query import AcademicQuery
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
class LiteratureResearchTool:
|
| 8 |
def __init__(self, cluster_config: Configuration = None):
|
|
@@ -45,7 +48,6 @@ class LiteratureResearchTool:
|
|
| 45 |
platforms: List[str] = ['IEEE', 'Arxiv', 'Paper with Code'],
|
| 46 |
best_k: int = 5,
|
| 47 |
loading_ctx_manager = None,
|
| 48 |
-
decorator: callable = None
|
| 49 |
):
|
| 50 |
|
| 51 |
|
|
@@ -68,27 +70,57 @@ class LiteratureResearchTool:
|
|
| 68 |
end_year: int,
|
| 69 |
best_k: int = 5
|
| 70 |
) -> (ClusterList,ArticleList):
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
clusters = self.cluster_pipeline(abstracts, best_k=best_k)
|
| 75 |
clusters = self.__postprocess_clusters__(clusters)
|
| 76 |
-
return clusters,articles
|
| 77 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
articles = ArticleList.parse_arxiv_articles(
|
| 79 |
-
|
| 80 |
abstracts = articles.getAbstracts() # List[str]
|
| 81 |
-
clusters = self.cluster_pipeline(abstracts,best_k=best_k)
|
| 82 |
clusters = self.__postprocess_clusters__(clusters)
|
| 83 |
return clusters, articles
|
| 84 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
articles = ArticleList.parse_pwc_articles(
|
| 86 |
-
|
| 87 |
abstracts = articles.getAbstracts() # List[str]
|
| 88 |
-
clusters = self.cluster_pipeline(abstracts,best_k=best_k)
|
| 89 |
clusters = self.__postprocess_clusters__(clusters)
|
| 90 |
return clusters, articles
|
| 91 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
|
| 93 |
|
| 94 |
|
|
|
|
| 3 |
import textdistance as td
|
| 4 |
from .utils import UnionFind, ArticleList
|
| 5 |
from .academic_query import AcademicQuery
|
| 6 |
+
import streamlit as st
|
| 7 |
+
from tokenizers import Tokenizer
|
| 8 |
+
|
| 9 |
|
| 10 |
class LiteratureResearchTool:
|
| 11 |
def __init__(self, cluster_config: Configuration = None):
|
|
|
|
| 48 |
platforms: List[str] = ['IEEE', 'Arxiv', 'Paper with Code'],
|
| 49 |
best_k: int = 5,
|
| 50 |
loading_ctx_manager = None,
|
|
|
|
| 51 |
):
|
| 52 |
|
| 53 |
|
|
|
|
| 70 |
end_year: int,
|
| 71 |
best_k: int = 5
|
| 72 |
) -> (ClusterList,ArticleList):
|
| 73 |
+
|
| 74 |
+
@st.cache(hash_funcs={Tokenizer: Tokenizer.__hash__})
|
| 75 |
+
def ieee_process(
|
| 76 |
+
query: str,
|
| 77 |
+
num_papers: int,
|
| 78 |
+
start_year: int,
|
| 79 |
+
end_year: int,
|
| 80 |
+
best_k: int = 5
|
| 81 |
+
):
|
| 82 |
+
articles = ArticleList.parse_ieee_articles(
|
| 83 |
+
self.literature_search.ieee(query, start_year, end_year, num_papers)) # ArticleList
|
| 84 |
+
abstracts = articles.getAbstracts() # List[str]
|
| 85 |
clusters = self.cluster_pipeline(abstracts, best_k=best_k)
|
| 86 |
clusters = self.__postprocess_clusters__(clusters)
|
| 87 |
+
return clusters, articles
|
| 88 |
+
|
| 89 |
+
@st.cache(hash_funcs={Tokenizer: Tokenizer.__hash__})
|
| 90 |
+
def arxiv_process(
|
| 91 |
+
query: str,
|
| 92 |
+
num_papers: int,
|
| 93 |
+
best_k: int = 5
|
| 94 |
+
):
|
| 95 |
articles = ArticleList.parse_arxiv_articles(
|
| 96 |
+
self.literature_search.arxiv(query, num_papers)) # ArticleList
|
| 97 |
abstracts = articles.getAbstracts() # List[str]
|
| 98 |
+
clusters = self.cluster_pipeline(abstracts, best_k=best_k)
|
| 99 |
clusters = self.__postprocess_clusters__(clusters)
|
| 100 |
return clusters, articles
|
| 101 |
+
|
| 102 |
+
@st.cache(hash_funcs={Tokenizer: Tokenizer.__hash__})
|
| 103 |
+
def pwc_process(
|
| 104 |
+
query: str,
|
| 105 |
+
num_papers: int,
|
| 106 |
+
best_k: int = 5
|
| 107 |
+
):
|
| 108 |
articles = ArticleList.parse_pwc_articles(
|
| 109 |
+
self.literature_search.paper_with_code(query, num_papers)) # ArticleList
|
| 110 |
abstracts = articles.getAbstracts() # List[str]
|
| 111 |
+
clusters = self.cluster_pipeline(abstracts, best_k=best_k)
|
| 112 |
clusters = self.__postprocess_clusters__(clusters)
|
| 113 |
return clusters, articles
|
| 114 |
|
| 115 |
+
if platforn_name == 'IEEE':
|
| 116 |
+
return ieee_process(query,num_papers,start_year,end_year,best_k)
|
| 117 |
+
elif platforn_name == 'Arxiv':
|
| 118 |
+
return arxiv_process(query,num_papers,best_k)
|
| 119 |
+
elif platforn_name == 'Paper with Code':
|
| 120 |
+
return pwc_process(query,num_papers,best_k)
|
| 121 |
+
else:
|
| 122 |
+
raise RuntimeError('This platform is not supported. Please open an issue on the GitHub.')
|
| 123 |
+
|
| 124 |
|
| 125 |
|
| 126 |
|
widgets/body.py
CHANGED
|
@@ -60,8 +60,9 @@ def render_body(platforms, num_papers, num_papers_preview, query_input, show_pre
|
|
| 60 |
|
| 61 |
|
| 62 |
# lrt results
|
| 63 |
-
generator = baseline_lrt(query_input,num_papers,start_year,end_year,platforms, best_k=k,
|
| 64 |
-
|
|
|
|
| 65 |
for plat in platforms:
|
| 66 |
clusters, articles = next(generator)
|
| 67 |
print(clusters)
|
|
|
|
| 60 |
|
| 61 |
|
| 62 |
# lrt results
|
| 63 |
+
generator = baseline_lrt(query_input,num_papers,start_year,end_year,platforms, best_k=k,
|
| 64 |
+
# loading_ctx_manager= st.spinner,
|
| 65 |
+
)
|
| 66 |
for plat in platforms:
|
| 67 |
clusters, articles = next(generator)
|
| 68 |
print(clusters)
|
widgets/sidebar.py
CHANGED
|
@@ -38,13 +38,13 @@ def render_sidebar():
|
|
| 38 |
'IEEE',
|
| 39 |
# 'Google Scholar',
|
| 40 |
'Arxiv',
|
| 41 |
-
'
|
| 42 |
], default=[
|
| 43 |
# 'Elvsier',
|
| 44 |
'IEEE',
|
| 45 |
# 'Google Scholar',
|
| 46 |
'Arxiv',
|
| 47 |
-
'
|
| 48 |
])
|
| 49 |
|
| 50 |
|
|
|
|
| 38 |
'IEEE',
|
| 39 |
# 'Google Scholar',
|
| 40 |
'Arxiv',
|
| 41 |
+
'Paper with Code'
|
| 42 |
], default=[
|
| 43 |
# 'Elvsier',
|
| 44 |
'IEEE',
|
| 45 |
# 'Google Scholar',
|
| 46 |
'Arxiv',
|
| 47 |
+
'Paper with Code'
|
| 48 |
])
|
| 49 |
|
| 50 |
|