ahmedess0 commited on
Commit
2b38079
·
verified ·
1 Parent(s): e0b448e
Files changed (1) hide show
  1. app.py +12 -68
app.py CHANGED
@@ -1,8 +1,4 @@
1
  from smolagents import CodeAgent, DuckDuckGoSearchTool, HfApiModel, load_tool, tool
2
- import xml.etree.ElementTree as ET
3
- from wordcloud import WordCloud
4
- from collections import Counter
5
- import re
6
  import datetime
7
  import requests
8
  import pytz
@@ -13,68 +9,30 @@ import os
13
 
14
  # Below is an example of a tool that does nothing. Amaze us with your creativity!
15
  @tool
16
- def search_arxiv(query: str):
17
- """Searches arXiv for academic papers and returns structured results.
18
 
19
  Args:
20
  query (str): The topic or keywords to search for.
21
 
22
  Returns:
23
- list: A list of tuples containing titles, summaries, and links.
24
  """
25
- max_results = 5
26
  url = f"http://export.arxiv.org/api/query?search_query={query}&max_results={max_results}"
27
  response = requests.get(url)
28
 
29
  if response.status_code == 200:
30
  papers = []
31
- root = ET.fromstring(response.text)
32
- for entry in root.findall("{http://www.w3.org/2005/Atom}entry"):
33
- title = entry.find("{http://www.w3.org/2005/Atom}title").text
34
- summary = entry.find("{http://www.w3.org/2005/Atom}summary").text
35
- link = entry.find("{http://www.w3.org/2005/Atom}id").text
36
- papers.append((title, summary, link))
37
 
38
- return papers
39
 
40
- return []
41
-
42
- def generate_visuals(query):
43
- results = search_arxiv(query)
44
- if not results:
45
- return "No papers found.", None, None
46
-
47
- # Extract text data
48
- titles = [title for title, _, _ in results]
49
- summaries = " ".join(summary for _, summary, _ in results)
50
-
51
- # Generate Bar Chart for Keyword Frequency in Titles
52
- words = [word.lower() for title in titles for word in re.findall(r'\b\w+\b', title) if len(word) > 3]
53
- word_counts = Counter(words).most_common(10)
54
-
55
- # Save Bar Chart Image
56
- bar_chart_path = "/tmp/bar_chart.png"
57
- plt.figure(figsize=(8, 5))
58
- plt.bar(*zip(*word_counts), color='skyblue')
59
- plt.xticks(rotation=45)
60
- plt.title("Top Keywords in Titles")
61
- plt.xlabel("Keywords")
62
- plt.ylabel("Frequency")
63
- plt.tight_layout()
64
- plt.savefig(bar_chart_path)
65
- plt.close()
66
-
67
- # Generate Word Cloud for Summary Text
68
- wordcloud = WordCloud(width=500, height=300, background_color="white").generate(summaries)
69
- wordcloud_path = "/tmp/wordcloud.png"
70
- wordcloud.to_file(wordcloud_path)
71
-
72
- # Display Search Results as Clickable Links
73
- markdown_text = "\n\n".join(
74
- [f"**[{title}]({link})**\n\n{summary}" for title, summary, link in results]
75
- )
76
-
77
- return markdown_text, bar_chart_path, wordcloud_path
78
 
79
  @tool
80
  def summarize_text(text: str) -> str:
@@ -131,18 +89,4 @@ agent = CodeAgent(
131
  prompt_templates=prompt_templates
132
  )
133
 
134
- # Gradio Interface for arXiv research search and visualization
135
- iface = gr.Interface(
136
- fn=generate_visuals,
137
- inputs="text",
138
- outputs=["markdown", "image", "image"],
139
- title="🔎 arXiv Research Paper Search",
140
- description="Enter a topic or keywords to search for academic papers on arXiv. Get a list of papers with visual analysis.",
141
- examples=[["Machine Learning"], ["Quantum Computing"], ["Climate Change"]]
142
- )
143
-
144
- # Launch Gradio Interface
145
- iface.launch()
146
-
147
- # The Gradio UI component (not needed if you already have the Gradio interface launched above)
148
- # GradioUI(agent).launch()
 
1
  from smolagents import CodeAgent, DuckDuckGoSearchTool, HfApiModel, load_tool, tool
 
 
 
 
2
  import datetime
3
  import requests
4
  import pytz
 
9
 
10
  # Below is an example of a tool that does nothing. Amaze us with your creativity!
11
  @tool
12
+ def search_arxiv(query: str) -> str:
13
+ """Searches arXiv for academic papers.
14
 
15
  Args:
16
  query (str): The topic or keywords to search for.
17
 
18
  Returns:
19
+ str: A formatted list of found papers with titles, summaries, and links.
20
  """
21
+ max_results = 5 # Static value for maximum results
22
  url = f"http://export.arxiv.org/api/query?search_query={query}&max_results={max_results}"
23
  response = requests.get(url)
24
 
25
  if response.status_code == 200:
26
  papers = []
27
+ for entry in response.text.split("<entry>")[1:max_results+1]:
28
+ title = entry.split("<title>")[1].split("</title>")[0].strip()
29
+ summary = entry.split("<summary>")[1].split("</summary>")[0].strip()
30
+ link = entry.split("<id>")[1].split("</id>")[0].strip()
31
+ papers.append(f"Title: {title}\nSummary: {summary}\nLink: {link}\n")
 
32
 
33
+ return "\n\n".join(papers)
34
 
35
+ return "No papers found."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
  @tool
38
  def summarize_text(text: str) -> str:
 
89
  prompt_templates=prompt_templates
90
  )
91
 
92
+ GradioUI(agent).launch()