|
import streamlit as st |
|
import os |
|
import logging |
|
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline |
|
from langchain_community.embeddings import HuggingFaceEmbeddings |
|
from langchain_community.vectorstores import Chroma |
|
from langchain_community.llms import HuggingFacePipeline |
|
from langchain.chains import RetrievalQA |
|
from ingest import create_chroma_db |
|
|
|
|
|
logging.basicConfig(level=logging.INFO) |
|
logger = logging.getLogger(__name__) |
|
|
|
checkpoint = "LaMini-T5-738M" |
|
|
|
@st.cache_resource |
|
def load_llm(): |
|
tokenizer = AutoTokenizer.from_pretrained(checkpoint) |
|
model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint) |
|
pipe = pipeline( |
|
'text2text-generation', |
|
model=model, |
|
tokenizer=tokenizer, |
|
max_length=256, |
|
do_sample=True, |
|
temperature=0.3, |
|
top_p=0.95 |
|
) |
|
return HuggingFacePipeline(pipeline=pipe) |
|
|
|
def load_chroma_db(): |
|
chroma_dir = "chroma_db" |
|
if not os.path.exists(chroma_dir): |
|
st.warning("Chroma database not found. Creating a new one...") |
|
create_chroma_db() |
|
|
|
if not os.path.exists(chroma_dir): |
|
st.error("Failed to create the Chroma database. Please check the 'docs' directory and try again.") |
|
raise RuntimeError("Chroma database creation failed.") |
|
|
|
try: |
|
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") |
|
db = Chroma.load_local(chroma_dir, embeddings) |
|
logger.info(f"Chroma database loaded successfully from {chroma_dir}") |
|
return db.as_retriever() |
|
except Exception as e: |
|
st.error(f"Failed to load Chroma database: {e}") |
|
logger.exception("Exception in load_chroma_db") |
|
raise |
|
|
|
def process_answer(instruction): |
|
try: |
|
retriever = load_chroma_db() |
|
llm = load_llm() |
|
qa = RetrievalQA.from_chain_type( |
|
llm=llm, |
|
chain_type="stuff", |
|
retriever=retriever, |
|
return_source_documents=True |
|
) |
|
generated_text = qa.invoke(instruction) |
|
answer = generated_text['result'] |
|
return answer, generated_text |
|
except Exception as e: |
|
st.error(f"An error occurred while processing the answer: {e}") |
|
logger.exception("Exception in process_answer") |
|
return "An error occurred while processing your request.", {} |
|
|
|
def main(): |
|
st.title("Search Your PDF ππ") |
|
|
|
with st.expander("About the App"): |
|
st.markdown( |
|
""" |
|
This is a Generative AI powered Question and Answering app that responds to questions about your PDF File. |
|
""" |
|
) |
|
|
|
question = st.text_area("Enter your Question") |
|
|
|
if st.button("Ask"): |
|
st.info("Your Question: " + question) |
|
st.info("Your Answer") |
|
try: |
|
answer, metadata = process_answer(question) |
|
st.write(answer) |
|
st.write(metadata) |
|
except Exception as e: |
|
st.error(f"An unexpected error occurred: {e}") |
|
logger.exception("Unexpected error in main function") |
|
|
|
if __name__ == '__main__': |
|
main() |
|
|