Adarsh-aot commited on
Commit
d23f120
·
verified ·
1 Parent(s): 59cb95a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -40
app.py CHANGED
@@ -1,66 +1,69 @@
1
-
2
  import streamlit as st
3
- import csv
4
  import chromadb
5
  from chromadb.utils import embedding_functions
6
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
7
  from transformers import pipeline
8
  from langchain.llms import HuggingFacePipeline
9
-
10
-
11
 
 
12
  chroma_client = chromadb.PersistentClient(path="data_db")
13
 
14
-
15
  sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="all-mpnet-base-v2")
16
 
17
-
18
  collection = chroma_client.get_or_create_collection(name="my_collection", embedding_function=sentence_transformer_ef)
19
 
20
-
21
-
22
-
23
  st.title("ChromaDB and HuggingFace Pipeline Integration")
24
-
25
  query = st.text_input("Enter your query:", value="director")
26
 
27
  if st.button("Search"):
 
28
  results = collection.query(
29
  query_texts=[query],
30
  n_results=1,
31
  include=['documents', 'distances', 'metadatas']
32
  )
 
33
  st.write("Query Results:")
34
  st.write(results['metadatas'])
35
 
36
  if results['documents']:
37
- context = results['documents']
38
- st.write("Context:")
39
- st.write(context)
40
- tokenizer = AutoTokenizer.from_pretrained("MBZUAI/LaMini-T5-738M")
41
- model = AutoModelForSeq2SeqLM.from_pretrained("MBZUAI/LaMini-T5-738M")
42
-
43
- pipe = pipeline(
44
- "text2text-generation",
45
- model=model,
46
- tokenizer=tokenizer,
47
- max_length=512
48
- )
49
-
50
- local_llm = HuggingFacePipeline(pipeline=pipe)
51
-
52
- l = f"""
53
- Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
54
-
55
- {context}
56
-
57
- Question: {query}
58
- Helpful Answer:
59
- """
60
-
61
- answer = local_llm(l)
62
- st.write("Answer:")
63
- st.write(answer)
64
-
65
-
66
-
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
 
2
  import chromadb
3
  from chromadb.utils import embedding_functions
4
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
5
  from transformers import pipeline
6
  from langchain.llms import HuggingFacePipeline
 
 
7
 
8
+ # Initialize ChromaDB client
9
  chroma_client = chromadb.PersistentClient(path="data_db")
10
 
11
+ # Define the embedding function
12
  sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="all-mpnet-base-v2")
13
 
14
+ # Get or create a collection
15
  collection = chroma_client.get_or_create_collection(name="my_collection", embedding_function=sentence_transformer_ef)
16
 
17
+ # Streamlit UI elements
 
 
18
  st.title("ChromaDB and HuggingFace Pipeline Integration")
 
19
  query = st.text_input("Enter your query:", value="director")
20
 
21
  if st.button("Search"):
22
+ # Query the collection
23
  results = collection.query(
24
  query_texts=[query],
25
  n_results=1,
26
  include=['documents', 'distances', 'metadatas']
27
  )
28
+
29
  st.write("Query Results:")
30
  st.write(results['metadatas'])
31
 
32
  if results['documents']:
33
+ # Check if the structure of results['documents'] is as expected
34
+ if len(results['documents']) > 0 and isinstance(results['documents'][0], list) and len(results['documents'][0]) > 0:
35
+ context = results['documents'][0][0]
36
+ st.write("Context:")
37
+ st.write(context)
38
+
39
+ # Load tokenizer and model
40
+ tokenizer = AutoTokenizer.from_pretrained("MBZUAI/LaMini-T5-738M")
41
+ model = AutoModelForSeq2SeqLM.from_pretrained("MBZUAI/LaMini-T5-738M")
42
+
43
+ # Create pipeline
44
+ pipe = pipeline(
45
+ "text2text-generation",
46
+ model=model,
47
+ tokenizer=tokenizer,
48
+ max_length=512
49
+ )
50
+
51
+ local_llm = HuggingFacePipeline(pipeline=pipe)
52
+
53
+ l = f"""
54
+ Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
55
+
56
+ {context}
57
+
58
+ Question: {query}
59
+ Helpful Answer:
60
+ """
61
+
62
+ # Generate answer
63
+ answer = local_llm(l)
64
+ st.write("Answer:")
65
+ st.write(answer)
66
+ else:
67
+ st.write("No valid context found in the results.")
68
+ else:
69
+ st.write("No documents found for the query.")