Spaces:
Sleeping
Sleeping
Commit
·
3baeead
1
Parent(s):
ede2b7a
Update app.py
Browse files
app.py
CHANGED
@@ -4,7 +4,7 @@ from langchain.document_loaders import ArxivLoader
|
|
4 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
5 |
from langchain.vectorstores import Chroma
|
6 |
from langchain.embeddings import HuggingFaceEmbeddings
|
7 |
-
|
8 |
|
9 |
CHUNK_SIZE = 1000
|
10 |
LOAD_MAX_DOCS = 5
|
@@ -17,22 +17,27 @@ loader = ArxivLoader(query=query, load_max_docs=LOAD_MAX_DOCS)
|
|
17 |
|
18 |
embeddings = HuggingFaceEmbeddings()
|
19 |
|
|
|
|
|
|
|
|
|
|
|
20 |
def get_data(user_query: str):
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
for doc in texts:
|
25 |
-
doc.page_content = doc.metadata["Summary"]
|
26 |
-
db = Chroma.from_documents(texts, embeddings)
|
27 |
retriever = db.as_retriever()
|
28 |
-
|
29 |
-
print(
|
30 |
-
|
|
|
|
|
|
|
31 |
|
32 |
demo = gr.Interface(
|
33 |
fn=get_data,
|
34 |
inputs="text",
|
35 |
-
outputs=
|
36 |
title="Document Filter",
|
37 |
description="Enter a query to filter the list of documents."
|
38 |
)
|
|
|
4 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
5 |
from langchain.vectorstores import Chroma
|
6 |
from langchain.embeddings import HuggingFaceEmbeddings
|
7 |
+
from langchain.document_loaders import Document
|
8 |
|
9 |
CHUNK_SIZE = 1000
|
10 |
LOAD_MAX_DOCS = 5
|
|
|
17 |
|
18 |
embeddings = HuggingFaceEmbeddings()
|
19 |
|
20 |
+
def process_document(doc: Document):
|
21 |
+
metadata = doc.metadata
|
22 |
+
metadata["Body"] = doc.page_content
|
23 |
+
return Document(page_content=doc.metadata["Summary"], metadata=metadata)
|
24 |
+
|
25 |
def get_data(user_query: str):
|
26 |
+
docs = loader.load()
|
27 |
+
docs = [process_document(doc) for doc in docs]
|
28 |
+
db = Chroma.from_documents(docs, embeddings)
|
|
|
|
|
|
|
29 |
retriever = db.as_retriever()
|
30 |
+
relevant_docs = retriever.get_relevant_documents(user_query)
|
31 |
+
print(relevant_docs[0].metadata)
|
32 |
+
output = ""
|
33 |
+
for doc in relevant_docs:
|
34 |
+
output += f"**Title: {doc["title"]}**\nAbstract: {doc["Summary"]}\n\n"
|
35 |
+
return output
|
36 |
|
37 |
demo = gr.Interface(
|
38 |
fn=get_data,
|
39 |
inputs="text",
|
40 |
+
outputs=gr.Markdown(),
|
41 |
title="Document Filter",
|
42 |
description="Enter a query to filter the list of documents."
|
43 |
)
|