Spaces:

jarif
/

AI-Powered-PDF-Document-Search-and-QA

Sleeping

App Files Files Community

AI-Powered-PDF-Document-Search-and-QA / app.py

jarif

Update app.py

90c47ef verified 12 months ago

raw

history blame

3.13 kB

	import streamlit as st
	import os
	import logging
	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
	from langchain_community.embeddings import HuggingFaceEmbeddings
	from langchain_community.vectorstores import Chroma
	from langchain_community.llms import HuggingFacePipeline
	from langchain.chains import RetrievalQA
	from ingest import create_chroma_db

	# Set up logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	checkpoint = "LaMini-T5-738M"

	@st.cache_resource
	def load_llm():
	tokenizer = AutoTokenizer.from_pretrained(checkpoint)
	model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
	pipe = pipeline(
	'text2text-generation',
	model=model,
	tokenizer=tokenizer,
	max_length=256,
	do_sample=True,
	temperature=0.3,
	top_p=0.95
	)
	return HuggingFacePipeline(pipeline=pipe)

	def load_chroma_db():
	chroma_dir = "chroma_db"
	if not os.path.exists(chroma_dir):
	st.warning("Chroma database not found. Creating a new one...")
	create_chroma_db()

	if not os.path.exists(chroma_dir):
	st.error("Failed to create the Chroma database. Please check the 'docs' directory and try again.")
	raise RuntimeError("Chroma database creation failed.")

	try:
	embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
	db = Chroma.load_local(chroma_dir, embeddings)
	logger.info(f"Chroma database loaded successfully from {chroma_dir}")
	return db.as_retriever()
	except Exception as e:
	st.error(f"Failed to load Chroma database: {e}")
	logger.exception("Exception in load_chroma_db")
	raise

	def process_answer(instruction):
	try:
	retriever = load_chroma_db()
	llm = load_llm()
	qa = RetrievalQA.from_chain_type(
	llm=llm,
	chain_type="stuff",
	retriever=retriever,
	return_source_documents=True
	)
	generated_text = qa.invoke(instruction)
	answer = generated_text['result']
	return answer, generated_text
	except Exception as e:
	st.error(f"An error occurred while processing the answer: {e}")
	logger.exception("Exception in process_answer")
	return "An error occurred while processing your request.", {}

	def main():
	st.title("Search Your PDF 📚📝")

	with st.expander("About the App"):
	st.markdown(
	"""
	This is a Generative AI powered Question and Answering app that responds to questions about your PDF File.
	"""
	)

	question = st.text_area("Enter your Question")

	if st.button("Ask"):
	st.info("Your Question: " + question)
	st.info("Your Answer")
	try:
	answer, metadata = process_answer(question)
	st.write(answer)
	st.write(metadata)
	except Exception as e:
	st.error(f"An unexpected error occurred: {e}")
	logger.exception("Unexpected error in main function")

	if __name__ == '__main__':
	main()