File size: 2,393 Bytes
5e75742
 
 
 
 
 
 
d23f120
59cb95a
 
d23f120
5e75742
 
d23f120
5e75742
 
d23f120
5e75742
 
 
 
d23f120
5e75742
 
 
 
 
d23f120
5e75742
 
 
 
d23f120
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import streamlit as st
import chromadb
from chromadb.utils import embedding_functions
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from transformers import pipeline
from langchain.llms import HuggingFacePipeline

# Initialize ChromaDB client
chroma_client = chromadb.PersistentClient(path="data_db")

# Define the embedding function
sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="all-mpnet-base-v2")

# Get or create a collection
collection = chroma_client.get_or_create_collection(name="my_collection", embedding_function=sentence_transformer_ef)

# Streamlit UI elements
st.title("ChromaDB and HuggingFace Pipeline Integration")
query = st.text_input("Enter your query:", value="director")

if st.button("Search"):
    # Query the collection
    results = collection.query(
        query_texts=[query],
        n_results=1,
        include=['documents', 'distances', 'metadatas']
    )

    st.write("Query Results:")
    st.write(results['metadatas'])

    if results['documents']:
        # Check if the structure of results['documents'] is as expected
        if len(results['documents']) > 0 and isinstance(results['documents'][0], list) and len(results['documents'][0]) > 0:
            context = results['documents'][0][0]
            st.write("Context:")
            st.write(context)

            # Load tokenizer and model
            tokenizer = AutoTokenizer.from_pretrained("MBZUAI/LaMini-T5-738M")
            model = AutoModelForSeq2SeqLM.from_pretrained("MBZUAI/LaMini-T5-738M")

            # Create pipeline
            pipe = pipeline(
                "text2text-generation",
                model=model,
                tokenizer=tokenizer,
                max_length=512
            )

            local_llm = HuggingFacePipeline(pipeline=pipe)

            l = f"""
            Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

            {context}

            Question: {query}
            Helpful Answer:
            """

            # Generate answer
            answer = local_llm(l)
            st.write("Answer:")
            st.write(answer)
        else:
            st.write("No valid context found in the results.")
    else:
        st.write("No documents found for the query.")