Spaces:
Sleeping
Sleeping
File size: 2,527 Bytes
b13872f 4e3a79b b13872f 4e3a79b b13872f 3b6dd97 b13872f 0ffbfee a62dca0 b13872f a62dca0 b13872f 4e3a79b b13872f 4e3a79b a62dca0 b13872f 4e3a79b a62dca0 4e3a79b a62dca0 b13872f 4e3a79b b13872f a62dca0 b13872f 0ffbfee b13872f a503e7e 4e3a79b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
import os
import gradio as gr
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.document_loaders import PyPDFLoader, TextLoader, Docx2txtLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.llms import CTransformers
# ุงุญุตู ุนูู ุงูุชููู ู
ู Secrets
HF_TOKEN = os.getenv("HF_TOKEN")
# ุชุญู
ูู ุงููู
ูุฐุฌ ู
ุญูููุง
llm = CTransformers(
model="TheBloke/Mistral-7B-Instruct-v0.2-GGUF",
model_file="mistral-7b-instruct-v0.2.Q4_K_M.gguf",
model_type="mistral",
hf_token=HF_TOKEN,
config={"max_new_tokens": 512, "temperature": 0.7}
)
# ุงูุชุญู
ูู ุงูุฏูุงูู ูููุตูุต
def load_documents(file_path):
if file_path.endswith(".pdf"):
loader = PyPDFLoader(file_path)
elif file_path.endswith(".txt"):
loader = TextLoader(file_path, encoding='utf-8')
elif file_path.endswith(".docx"):
loader = Docx2txtLoader(file_path)
else:
raise ValueError("Unsupported file type.")
return loader.load()
# ุชุฌููุฒ ุงูู
ุณุชูุฏ
def process_file(file):
docs = load_documents(file.name)
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = splitter.split_documents(docs)
embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
vectordb = Chroma.from_documents(chunks, embedding)
retriever = vectordb.as_retriever()
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
return qa_chain
qa_chain = None
# ูุงุฌูุฉ Gradio
def ask_question(file, question):
global qa_chain
if qa_chain is None:
qa_chain = process_file(file)
answer = qa_chain.run(question)
return f"<div dir='rtl' style='text-align: right;'>{answer}</div>"
with gr.Blocks(css="body {direction: rtl; text-align: right;}") as demo:
gr.Markdown("## ู
ุณุงุนุฏ ุงููุซุงุฆู ุงูุฐูู - ุงุณุชุนูุงู
ุจุงููุบุฉ ุงูุนุฑุจูุฉ ู
ู ู
ููุงุชู")
file_input = gr.File(label="๐ ุญู
ูู ู
ูููุง (PDF / DOCX / TXT)", file_types=[".pdf", ".txt", ".docx"])
question_input = gr.Textbox(label="โ ุฃุฏุฎู ุณุคุงูู ุจุงูุนุฑุจูุฉ", placeholder="ู
ุง ูู ู
ูุถูุน ูุฐุง ุงูู
ููุ")
output = gr.HTML()
submit_btn = gr.Button("๐ ุงุณุชุนูู
")
submit_btn.click(fn=ask_question, inputs=[file_input, question_input], outputs=output)
demo.launch()
|