Spaces:
Running
Running
File size: 3,500 Bytes
53a7eb8 687fccd 57faddd 82134da 53a7eb8 c3aaf4b 53a7eb8 57faddd 53a7eb8 57faddd 53a7eb8 c3aaf4b 687fccd 53a7eb8 57faddd 5ed3b00 53a7eb8 3f6e254 2ceeece 57faddd 5ed3b00 57faddd 53a7eb8 c3aaf4b 53a7eb8 82134da 687fccd 53a7eb8 2ceeece |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
import os
import sys
from langchain.chains import ConversationalRetrievalChain
from langchain.document_loaders import PyPDFLoader, Docx2txtLoader, TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms.base import LLM
from huggingface_hub import InferenceClient
import gradio as gr
# workaround for sqlite in HF spaces
__import__('pysqlite3')
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
# π Load documents
docs = []
for f in os.listdir("multiple_docs"):
if f.endswith(".pdf"):
loader = PyPDFLoader(os.path.join("multiple_docs", f))
docs.extend(loader.load())
elif f.endswith(".docx") or f.endswith(".doc"):
loader = Docx2txtLoader(os.path.join("multiple_docs", f))
docs.extend(loader.load())
elif f.endswith(".txt"):
loader = TextLoader(os.path.join("multiple_docs", f))
docs.extend(loader.load())
# π Split into chunks
splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
docs = splitter.split_documents(docs)
texts = [doc.page_content for doc in docs]
metadatas = [{"id": i} for i in range(len(texts))]
# π§ Embeddings
embedding_function = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
# ποΈ Vectorstore
vectorstore = Chroma(
persist_directory="./db",
embedding_function=embedding_function
)
vectorstore.add_texts(texts=texts, metadatas=metadatas)
vectorstore.persist()
# π Get HF token from env variable
HF_API_KEY = os.getenv("HF_API_KEY")
if HF_API_KEY is None:
raise ValueError("HF_API_KEY environment variable is not set.")
HF_MODEL = "deepseek-ai/deepseek-llm-7b-base"
# π€ Create InferenceClient bound to model
client = InferenceClient(model=HF_MODEL, token=HF_API_KEY)
# π· Wrap HF client into LangChain LLM interface
class HuggingFaceInferenceLLM(LLM):
"""LLM that queries HuggingFace Inference API."""
client: InferenceClient = client
def _call(self, prompt, stop=None, run_manager=None, **kwargs):
response = self.client.text_generation(
prompt,
max_new_tokens=512,
temperature=0.7,
do_sample=True,
)
return response
@property
def _llm_type(self) -> str:
return "huggingface_inference_api"
llm = HuggingFaceInferenceLLM()
# π Conversational chain
chain = ConversationalRetrievalChain.from_llm(
llm,
retriever=vectorstore.as_retriever(search_kwargs={'k': 6}),
return_source_documents=True,
verbose=False
)
# π¬ Gradio UI
chat_history = []
with gr.Blocks() as demo:
chatbot = gr.Chatbot(
[("", "Hello, I'm Thierry Decae's chatbot. Ask me about my experience, skills, eligibility, etc.")],
avatar_images=["./multiple_docs/Guest.jpg", "./multiple_docs/Thierry Picture.jpg"]
)
msg = gr.Textbox(placeholder="Type your question here...")
clear = gr.Button("Clear")
def user(query, chat_history):
chat_history_tuples = [(m[0], m[1]) for m in chat_history]
result = chain({"question": query, "chat_history": chat_history_tuples})
chat_history.append((query, result["answer"]))
return gr.update(value=""), chat_history
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False)
clear.click(lambda: None, None, chatbot, queue=False)
demo.launch(debug=True) # remove share=True if running in HF Spaces
|