Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -1,69 +1,63 @@
|
|
1 |
import streamlit as st
|
|
|
2 |
import chromadb
|
3 |
from chromadb.utils import embedding_functions
|
4 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
5 |
from transformers import pipeline
|
6 |
from langchain.llms import HuggingFacePipeline
|
7 |
|
8 |
-
|
9 |
chroma_client = chromadb.PersistentClient(path="data_db")
|
10 |
|
11 |
-
|
12 |
sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="all-mpnet-base-v2")
|
13 |
|
14 |
-
|
15 |
collection = chroma_client.get_or_create_collection(name="my_collection", embedding_function=sentence_transformer_ef)
|
16 |
|
17 |
-
|
|
|
18 |
st.title("ChromaDB and HuggingFace Pipeline Integration")
|
|
|
19 |
query = st.text_input("Enter your query:", value="director")
|
20 |
|
21 |
if st.button("Search"):
|
22 |
-
# Query the collection
|
23 |
results = collection.query(
|
24 |
query_texts=[query],
|
25 |
-
n_results=
|
26 |
include=['documents', 'distances', 'metadatas']
|
27 |
)
|
28 |
-
|
29 |
st.write("Query Results:")
|
30 |
st.write(results['metadatas'])
|
31 |
|
32 |
if results['documents']:
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
model
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
answer = local_llm(l)
|
64 |
-
st.write("Answer:")
|
65 |
-
st.write(answer)
|
66 |
-
else:
|
67 |
-
st.write("No valid context found in the results.")
|
68 |
-
else:
|
69 |
-
st.write("No documents found for the query.")
|
|
|
1 |
import streamlit as st
|
2 |
+
import csv
|
3 |
import chromadb
|
4 |
from chromadb.utils import embedding_functions
|
5 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
6 |
from transformers import pipeline
|
7 |
from langchain.llms import HuggingFacePipeline
|
8 |
|
9 |
+
|
10 |
chroma_client = chromadb.PersistentClient(path="data_db")
|
11 |
|
12 |
+
|
13 |
sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="all-mpnet-base-v2")
|
14 |
|
15 |
+
|
16 |
collection = chroma_client.get_or_create_collection(name="my_collection", embedding_function=sentence_transformer_ef)
|
17 |
|
18 |
+
|
19 |
+
# Streamlit app layout
|
20 |
st.title("ChromaDB and HuggingFace Pipeline Integration")
|
21 |
+
|
22 |
query = st.text_input("Enter your query:", value="director")
|
23 |
|
24 |
if st.button("Search"):
|
|
|
25 |
results = collection.query(
|
26 |
query_texts=[query],
|
27 |
+
n_results=3,
|
28 |
include=['documents', 'distances', 'metadatas']
|
29 |
)
|
|
|
30 |
st.write("Query Results:")
|
31 |
st.write(results['metadatas'])
|
32 |
|
33 |
if results['documents']:
|
34 |
+
context = results['documents'][0][0]
|
35 |
+
st.write("Context:")
|
36 |
+
st.write(context)
|
37 |
+
tokenizer = AutoTokenizer.from_pretrained("MBZUAI/LaMini-T5-738M")
|
38 |
+
model = AutoModelForSeq2SeqLM.from_pretrained("MBZUAI/LaMini-T5-738M")
|
39 |
+
|
40 |
+
pipe = pipeline(
|
41 |
+
"text2text-generation",
|
42 |
+
model=model,
|
43 |
+
tokenizer=tokenizer,
|
44 |
+
max_length=512
|
45 |
+
)
|
46 |
+
|
47 |
+
local_llm = HuggingFacePipeline(pipeline=pipe)
|
48 |
+
|
49 |
+
l = f"""
|
50 |
+
Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
|
51 |
+
|
52 |
+
{context}
|
53 |
+
|
54 |
+
Question: {query}
|
55 |
+
Helpful Answer:
|
56 |
+
"""
|
57 |
+
|
58 |
+
answer = local_llm(l)
|
59 |
+
st.write("Answer:")
|
60 |
+
st.write(answer)
|
61 |
+
|
62 |
+
|
63 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|