vincentmin commited on
Commit
3baeead
·
1 Parent(s): ede2b7a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -11
app.py CHANGED
@@ -4,7 +4,7 @@ from langchain.document_loaders import ArxivLoader
4
  from langchain.text_splitter import RecursiveCharacterTextSplitter
5
  from langchain.vectorstores import Chroma
6
  from langchain.embeddings import HuggingFaceEmbeddings
7
- # from langchain.document_loaders import Document
8
 
9
  CHUNK_SIZE = 1000
10
  LOAD_MAX_DOCS = 5
@@ -17,22 +17,27 @@ loader = ArxivLoader(query=query, load_max_docs=LOAD_MAX_DOCS)
17
 
18
  embeddings = HuggingFaceEmbeddings()
19
 
 
 
 
 
 
20
  def get_data(user_query: str):
21
- documents = loader.load()
22
- # texts = text_splitter.split_documents(documents)
23
- texts = documents
24
- for doc in texts:
25
- doc.page_content = doc.metadata["Summary"]
26
- db = Chroma.from_documents(texts, embeddings)
27
  retriever = db.as_retriever()
28
- docs = retriever.get_relevant_documents(user_query)
29
- print(docs[0].metadata)
30
- return "\n\n".join([d.page_content for d in docs])
 
 
 
31
 
32
  demo = gr.Interface(
33
  fn=get_data,
34
  inputs="text",
35
- outputs="text",
36
  title="Document Filter",
37
  description="Enter a query to filter the list of documents."
38
  )
 
4
  from langchain.text_splitter import RecursiveCharacterTextSplitter
5
  from langchain.vectorstores import Chroma
6
  from langchain.embeddings import HuggingFaceEmbeddings
7
+ from langchain.document_loaders import Document
8
 
9
  CHUNK_SIZE = 1000
10
  LOAD_MAX_DOCS = 5
 
17
 
18
  embeddings = HuggingFaceEmbeddings()
19
 
20
+ def process_document(doc: Document):
21
+ metadata = doc.metadata
22
+ metadata["Body"] = doc.page_content
23
+ return Document(page_content=doc.metadata["Summary"], metadata=metadata)
24
+
25
  def get_data(user_query: str):
26
+ docs = loader.load()
27
+ docs = [process_document(doc) for doc in docs]
28
+ db = Chroma.from_documents(docs, embeddings)
 
 
 
29
  retriever = db.as_retriever()
30
+ relevant_docs = retriever.get_relevant_documents(user_query)
31
+ print(relevant_docs[0].metadata)
32
+ output = ""
33
+ for doc in relevant_docs:
34
+ output += f"**Title: {doc["title"]}**\nAbstract: {doc["Summary"]}\n\n"
35
+ return output
36
 
37
  demo = gr.Interface(
38
  fn=get_data,
39
  inputs="text",
40
+ outputs=gr.Markdown(),
41
  title="Document Filter",
42
  description="Enter a query to filter the list of documents."
43
  )