Spaces:
Sleeping
Sleeping
File size: 2,439 Bytes
6674899 21206fd 6674899 8a6b9ad 6674899 1084bdb 6674899 c7c9218 ebee81a 68420b4 70a1f11 f7672aa 70a1f11 68420b4 70a1f11 f7672aa 68420b4 70a1f11 68420b4 70a1f11 68420b4 70a1f11 68420b4 c7c9218 ecbad47 c7c9218 0fed33e c7c9218 0fed33e c7c9218 68420b4 c7c9218 68420b4 c7c9218 68420b4 b0ad4e0 f424600 68420b4 f3e1614 21206fd 70a1f11 68420b4 70a1f11 68420b4 6674899 137d750 68420b4 70a1f11 6674899 aa8d70a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
import os
import gradio as gr
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain_community.llms import HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
def create_qa_system():
try:
# Validate PDF
if not os.path.exists("file.pdf"):
raise FileNotFoundError("Upload PDF via Files tab")
# Process PDF
loader = PyMuPDFLoader("file.pdf")
documents = loader.load()
if len(documents) == 0:
raise ValueError("PDF is empty or corrupted")
# Split text
text_splitter = CharacterTextSplitter(
chunk_size=300,
chunk_overlap=50
)
texts = text_splitter.split_documents(documents)
# Create embeddings
embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2"
)
# Build vector store
db = FAISS.from_documents(texts, embeddings)
# Initialize local model with LangChain wrapper
model_name = "google/flan-t5-small"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
pipe = pipeline(
"text2text-generation",
model=model,
tokenizer=tokenizer,
max_length=128,
temperature=0.2,
device_map="auto"
)
llm = HuggingFacePipeline(pipeline=pipe)
return RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=db.as_retriever(search_kwargs={"k": 2}))
except Exception as e:
raise gr.Error(f"Initialization failed: {str(e)}")
# Initialize system
try:
qa = create_qa_system()
except Exception as e:
print(f"Fatal error: {str(e)}")
raise
def chat_response(message, history):
try:
response = qa({"query": message})
return response["result"]
except Exception as e:
print(f"Error during query: {str(e)}")
return f"⚠️ Error: {str(e)[:100]}"
gr.ChatInterface(chat_response).launch() |