M17idd commited on
Commit
053d440
·
1 Parent(s): 76f6398

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -13
app.py CHANGED
@@ -16,6 +16,8 @@ import faiss
16
  from langchain.indexes import VectorstoreIndexCreator
17
  from langchain.vectorstores import FAISS
18
  from langchain.embeddings import SentenceTransformerEmbeddings
 
 
19
 
20
 
21
  # ----------------- تنظیمات صفحه -----------------
@@ -99,24 +101,15 @@ st.markdown("""
99
  </div>
100
  """, unsafe_allow_html=True)
101
 
102
- # ----------------- لود PDF و ساخت ایندکس -----------------
103
-
104
  # ----------------- لود PDF و ساخت ایندکس -----------------
105
  @st.cache_resource
106
  def get_pdf_index():
107
  with st.spinner('📄 در حال پردازش فایل PDF...'):
108
- loader = PyPDFLoader('test1.pdf')
109
-
110
- splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=128)
111
-
112
- embedding_function = SentenceTransformer("togethercomputer/m2-bert-80M-8k-retrieval", trust_remote_code=True)
113
-
114
- embedding = SentenceTransformerEmbeddings(model=embedding_function)
115
 
116
- return VectorstoreIndexCreator(
117
- embedding=embedding,
118
- text_splitter=splitter
119
- ).from_loaders(loader)
120
 
121
  # ----------------- بارگذاری دیتا -----------------
122
  index = get_pdf_index()
 
16
  from langchain.indexes import VectorstoreIndexCreator
17
  from langchain.vectorstores import FAISS
18
  from langchain.embeddings import SentenceTransformerEmbeddings
19
+ from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings
20
+
21
 
22
 
23
  # ----------------- تنظیمات صفحه -----------------
 
101
  </div>
102
  """, unsafe_allow_html=True)
103
 
 
 
104
  # ----------------- لود PDF و ساخت ایندکس -----------------
105
  @st.cache_resource
106
  def get_pdf_index():
107
  with st.spinner('📄 در حال پردازش فایل PDF...'):
108
+ pdf_reader = PyPDFLoader('test1.pdf')
 
 
 
 
 
 
109
 
110
+ embeddings = HuggingFaceInstructEmbeddings(model_name="togethercomputer/m2-bert-80M-8k-retrieval", trust_remote_code=True)
111
+ index = VectorstoreIndexCreator( embedding=embeddings, text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=128)).from_loaders(pdf_reader)
112
+ return index
 
113
 
114
  # ----------------- بارگذاری دیتا -----------------
115
  index = get_pdf_index()