gaur3009 commited on
Commit
79c9666
·
verified ·
1 Parent(s): a809898

Create agents/scout.py

Browse files
Files changed (1) hide show
  1. agents/scout.py +47 -0
agents/scout.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+ from sentence_transformers import SentenceTransformer, util
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer
5
+ import torch
6
+
7
+ device = torch.device("cpu")
8
+ model_id = "TheBloke/Mistral-7B-Instruct-v0.1" # replace if needed
9
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
10
+ llm = AutoModelForCausalLM.from_pretrained(model_id).to(device)
11
+
12
+ embedder = SentenceTransformer('all-MiniLM-L6-v2')
13
+
14
+ def search_and_summarize(query, max_papers=5):
15
+ url = "https://arxiv.org/rss/cs.AI"
16
+ res = requests.get(url)
17
+ soup = BeautifulSoup(res.text, 'xml')
18
+ items = soup.find_all('item')
19
+
20
+ papers = []
21
+ for item in items:
22
+ title = item.title.text
23
+ abstract = item.description.text
24
+ link = item.link.text
25
+ papers.append({'title': title, 'abstract': abstract, 'link': link})
26
+
27
+ # embed & find top matches
28
+ query_emb = embedder.encode(query)
29
+ paper_embs = embedder.encode([p['abstract'] for p in papers])
30
+ sims = util.cos_sim(query_emb, paper_embs)[0]
31
+ top_idx = sims.argsort(descending=True)[:max_papers]
32
+
33
+ results = []
34
+ for idx in top_idx:
35
+ paper = papers[idx]
36
+ context = f"Title: {paper['title']}\nAbstract: {paper['abstract']}"
37
+ prompt = f"{context}\n\nExplain this paper in simple terms for an AI researcher:"
38
+ inputs = tokenizer(prompt, return_tensors="pt").to(device)
39
+ outputs = llm.generate(**inputs, max_new_tokens=200)
40
+ explanation = tokenizer.decode(outputs[0], skip_special_tokens=True)
41
+ explanation = explanation[len(prompt):].strip()
42
+ results.append({
43
+ 'title': paper['title'],
44
+ 'summary': explanation,
45
+ 'link': paper['link']
46
+ })
47
+ return results