Adarsh-aot commited on
Commit
784ac8a
·
verified ·
1 Parent(s): d23f120

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -44
app.py CHANGED
@@ -1,69 +1,63 @@
1
  import streamlit as st
 
2
  import chromadb
3
  from chromadb.utils import embedding_functions
4
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
5
  from transformers import pipeline
6
  from langchain.llms import HuggingFacePipeline
7
 
8
- # Initialize ChromaDB client
9
  chroma_client = chromadb.PersistentClient(path="data_db")
10
 
11
- # Define the embedding function
12
  sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="all-mpnet-base-v2")
13
 
14
- # Get or create a collection
15
  collection = chroma_client.get_or_create_collection(name="my_collection", embedding_function=sentence_transformer_ef)
16
 
17
- # Streamlit UI elements
 
18
  st.title("ChromaDB and HuggingFace Pipeline Integration")
 
19
  query = st.text_input("Enter your query:", value="director")
20
 
21
  if st.button("Search"):
22
- # Query the collection
23
  results = collection.query(
24
  query_texts=[query],
25
- n_results=1,
26
  include=['documents', 'distances', 'metadatas']
27
  )
28
-
29
  st.write("Query Results:")
30
  st.write(results['metadatas'])
31
 
32
  if results['documents']:
33
- # Check if the structure of results['documents'] is as expected
34
- if len(results['documents']) > 0 and isinstance(results['documents'][0], list) and len(results['documents'][0]) > 0:
35
- context = results['documents'][0][0]
36
- st.write("Context:")
37
- st.write(context)
38
-
39
- # Load tokenizer and model
40
- tokenizer = AutoTokenizer.from_pretrained("MBZUAI/LaMini-T5-738M")
41
- model = AutoModelForSeq2SeqLM.from_pretrained("MBZUAI/LaMini-T5-738M")
42
-
43
- # Create pipeline
44
- pipe = pipeline(
45
- "text2text-generation",
46
- model=model,
47
- tokenizer=tokenizer,
48
- max_length=512
49
- )
50
-
51
- local_llm = HuggingFacePipeline(pipeline=pipe)
52
-
53
- l = f"""
54
- Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
55
-
56
- {context}
57
-
58
- Question: {query}
59
- Helpful Answer:
60
- """
61
-
62
- # Generate answer
63
- answer = local_llm(l)
64
- st.write("Answer:")
65
- st.write(answer)
66
- else:
67
- st.write("No valid context found in the results.")
68
- else:
69
- st.write("No documents found for the query.")
 
1
  import streamlit as st
2
+ import csv
3
  import chromadb
4
  from chromadb.utils import embedding_functions
5
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
6
  from transformers import pipeline
7
  from langchain.llms import HuggingFacePipeline
8
 
9
+
10
  chroma_client = chromadb.PersistentClient(path="data_db")
11
 
12
+
13
  sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="all-mpnet-base-v2")
14
 
15
+
16
  collection = chroma_client.get_or_create_collection(name="my_collection", embedding_function=sentence_transformer_ef)
17
 
18
+
19
+ # Streamlit app layout
20
  st.title("ChromaDB and HuggingFace Pipeline Integration")
21
+
22
  query = st.text_input("Enter your query:", value="director")
23
 
24
  if st.button("Search"):
 
25
  results = collection.query(
26
  query_texts=[query],
27
+ n_results=3,
28
  include=['documents', 'distances', 'metadatas']
29
  )
 
30
  st.write("Query Results:")
31
  st.write(results['metadatas'])
32
 
33
  if results['documents']:
34
+ context = results['documents'][0][0]
35
+ st.write("Context:")
36
+ st.write(context)
37
+ tokenizer = AutoTokenizer.from_pretrained("MBZUAI/LaMini-T5-738M")
38
+ model = AutoModelForSeq2SeqLM.from_pretrained("MBZUAI/LaMini-T5-738M")
39
+
40
+ pipe = pipeline(
41
+ "text2text-generation",
42
+ model=model,
43
+ tokenizer=tokenizer,
44
+ max_length=512
45
+ )
46
+
47
+ local_llm = HuggingFacePipeline(pipeline=pipe)
48
+
49
+ l = f"""
50
+ Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
51
+
52
+ {context}
53
+
54
+ Question: {query}
55
+ Helpful Answer:
56
+ """
57
+
58
+ answer = local_llm(l)
59
+ st.write("Answer:")
60
+ st.write(answer)
61
+
62
+
63
+