Spaces:
Build error
Build error
File size: 2,960 Bytes
2d8c319 42d3ee2 c48b838 91b268b 42d3ee2 c48b838 42d3ee2 c48b838 42d3ee2 c48b838 42d3ee2 f8c1ecf c48b838 f8c1ecf c48b838 4d6816c c48b838 2d88065 c48b838 0e5b4a4 c48b838 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
import os
import gradio as gr
from langchain_community.document_loaders import PyMuPDFLoader, TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain_community.llms import HuggingFacePipeline
from transformers import pipeline, AutoTokenizer
def load_documents(file_path="study_materials"):
documents = []
for filename in os.listdir(file_path):
path = os.path.join(file_path, filename)
if filename.endswith(".pdf"):
loader = PyMuPDFLoader(path)
documents.extend(loader.load())
elif filename.endswith(".txt"):
loader = TextLoader(path)
documents.extend(loader.load())
return documents
def create_qa_system():
try:
# Load documents
documents = load_documents()
if not documents:
raise ValueError("π No study materials found")
# Text splitting
text_splitter = CharacterTextSplitter(
chunk_size=1100,
chunk_overlap=200,
separator="\n\n"
)
texts = text_splitter.split_documents(documents)
# Embeddings
embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2"
)
# Vector store
db = FAISS.from_documents(texts, embeddings)
# LLM setup with proper LangChain wrapper
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-large") # β
pipe = pipeline(
"text2text-generation",
model="google/flan-t5-large",
max_length=600,
temperature=0.7,
tokenizer=tokenizer,
do_sample=True,
top_k=50,
device=-1
)
# Wrap pipeline in LangChain component
llm = HuggingFacePipeline(pipeline=pipe)
# Create QA chain
return RetrievalQA.from_llm(
llm=llm,
retriever=db.as_retriever(search_kwargs={"k": 3}),
return_source_documents=True
)
except Exception as e:
raise gr.Error(f"Error: {str(e)}")
# Initialize system
try:
qa = create_qa_system()
except Exception as e:
print(f"Startup failed: {str(e)}")
raise
def ask_question(question, history):
try:
result = qa.invoke({"query": question})
answer = result["result"]
sources = list({doc.metadata['source'] for doc in result['source_documents']})
return f"{answer}\n\nπ Sources: {', '.join(sources)}"
except Exception as e:
return f"Error: {str(e)[:150]}"
gr.ChatInterface(
ask_question,
title="Study Assistant",
description="Upload PDF/TXT files in 'study_materials' folder and ask questions!",
theme="soft"
).launch()
|