Spaces:
Sleeping
Sleeping
File size: 1,616 Bytes
6674899 21206fd 6674899 8a6b9ad 6674899 1084bdb 6674899 137d750 62390c0 21206fd 137d750 ebee81a 137d750 ebee81a 5a6f9e0 6674899 5a6f9e0 62390c0 6674899 62390c0 6674899 137d750 62390c0 137d750 6674899 62390c0 6674899 62390c0 6674899 21206fd 137d750 62390c0 6674899 137d750 62390c0 6674899 137d750 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
import os
import gradio as gr
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain_community.llms import HuggingFaceEndpoint # Updated import
from huggingface_hub import login
# 1. Authentication
login(token=os.environ.get('HF_TOKEN'))
# 2. PDF Processing
def create_qa_system():
if not os.path.exists("file.pdf"):
raise gr.Error("❗ Upload file.pdf in Files tab")
loader = PyMuPDFLoader("file.pdf")
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
texts = text_splitter.split_documents(documents)
embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2"
)
db = FAISS.from_documents(texts, embeddings)
# 3. Updated LLM initialization
llm = HuggingFaceEndpoint(
repo_id="google/flan-t5-base",
max_length=256,
temperature=0.2,
huggingfacehub_api_token=os.environ.get('HF_TOKEN') # Explicit token passing
)
return RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=db.as_retriever(search_kwargs={"k": 2})
)
# 4. Initialize system
qa = create_qa_system()
# 5. Chat interface
def chat_response(message, history):
response = qa({"query": message})
return response["result"]
gr.ChatInterface(chat_response).launch() |