Adarsh-aot commited on
Commit
f0c50f4
·
verified ·
1 Parent(s): 791a4d3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -41
app.py CHANGED
@@ -1,63 +1,73 @@
1
  import streamlit as st
2
- import csv
3
  import chromadb
4
  from chromadb.utils import embedding_functions
5
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
6
  from transformers import pipeline
7
- from langchain.llms import HuggingFacePipeline
8
-
9
-
10
- chroma_client = chromadb.PersistentClient(path="./data_db")
11
 
 
 
12
 
 
13
  sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="all-mpnet-base-v2")
14
 
15
-
16
  collection = chroma_client.get_or_create_collection(name="my_collection", embedding_function=sentence_transformer_ef)
17
 
18
-
19
- # Streamlit app layout
20
  st.title("ChromaDB and HuggingFace Pipeline Integration")
21
-
22
  query = st.text_input("Enter your query:", value="director")
23
 
24
  if st.button("Search"):
 
25
  results = collection.query(
26
  query_texts=[query],
27
- n_results=3,
28
  include=['documents', 'distances', 'metadatas']
29
  )
 
30
  st.write("Query Results:")
31
  st.write(results['metadatas'])
32
 
33
- if results['documents']:
34
- context = results['documents'][0][0]
35
- st.write("Context:")
36
- st.write(context)
37
- tokenizer = AutoTokenizer.from_pretrained("MBZUAI/LaMini-T5-738M")
38
- model = AutoModelForSeq2SeqLM.from_pretrained("MBZUAI/LaMini-T5-738M")
39
-
40
- pipe = pipeline(
41
- "text2text-generation",
42
- model=model,
43
- tokenizer=tokenizer,
44
- max_length=512
45
- )
46
-
47
- local_llm = HuggingFacePipeline(pipeline=pipe)
48
-
49
- l = f"""
50
- Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
51
-
52
- {context}
53
-
54
- Question: {query}
55
- Helpful Answer:
56
- """
57
-
58
- answer = local_llm(l)
59
- st.write("Answer:")
60
- st.write(answer)
61
-
62
-
63
-
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
 
2
  import chromadb
3
  from chromadb.utils import embedding_functions
4
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
5
  from transformers import pipeline
6
+ from langchain_community.llms import HuggingFacePipeline
 
 
 
7
 
8
+ # Initialize ChromaDB client
9
+ chroma_client = chromadb.PersistentClient(path="data_db")
10
 
11
+ # Define the embedding function
12
  sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="all-mpnet-base-v2")
13
 
14
+ # Get or create a collection
15
  collection = chroma_client.get_or_create_collection(name="my_collection", embedding_function=sentence_transformer_ef)
16
 
17
+ # Streamlit UI elements
 
18
  st.title("ChromaDB and HuggingFace Pipeline Integration")
 
19
  query = st.text_input("Enter your query:", value="director")
20
 
21
  if st.button("Search"):
22
+ # Query the collection
23
  results = collection.query(
24
  query_texts=[query],
25
+ n_results=1,
26
  include=['documents', 'distances', 'metadatas']
27
  )
28
+
29
  st.write("Query Results:")
30
  st.write(results['metadatas'])
31
 
32
+ # Log the structure of results
33
+ st.write("Results Structure:")
34
+ st.write(results)
35
+
36
+ if 'documents' in results and results['documents']:
37
+ # Check if the structure of results['documents'] is as expected
38
+ if len(results['documents']) > 0 and isinstance(results['documents'][0], list) and len(results['documents'][0]) > 0:
39
+ context = results['documents'][0][0]
40
+ st.write("Context:")
41
+ st.write(context)
42
+
43
+ # Load tokenizer and model
44
+ tokenizer = AutoTokenizer.from_pretrained("MBZUAI/LaMini-T5-738M")
45
+ model = AutoModelForSeq2SeqLM.from_pretrained("MBZUAI/LaMini-T5-738M")
46
+
47
+ # Create pipeline
48
+ pipe = pipeline(
49
+ "text2text-generation",
50
+ model=model,
51
+ tokenizer=tokenizer,
52
+ max_length=512
53
+ )
54
+
55
+ local_llm = HuggingFacePipeline(pipeline=pipe)
56
+
57
+ l = f"""
58
+ Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
59
+
60
+ {context}
61
+
62
+ Question: {query}
63
+ Helpful Answer:
64
+ """
65
+
66
+ # Generate answer
67
+ answer = local_llm(l)
68
+ st.write("Answer:")
69
+ st.write(answer)
70
+ else:
71
+ st.write("No valid context found in the results.")
72
+ else:
73
+ st.write("No documents found for the query.")