M17idd commited on
Commit
1c463e8
·
1 Parent(s): 5ba7663

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -8
app.py CHANGED
@@ -93,20 +93,31 @@ st.markdown("""
93
  """, unsafe_allow_html=True)
94
 
95
  # ----------------- لود PDF و ساخت ایندکس -----------------
 
 
 
 
 
 
 
 
 
 
 
 
96
  @st.cache_resource
97
  def get_pdf_index():
98
  with st.spinner('📄 در حال پردازش فایل PDF...'):
99
- pdf_loader = PyPDFLoader('test1.pdf')
100
-
101
  embeddings = TogetherEmbeddings(
 
102
  api_key="0291f33aee03412a47fa5d8e562e515182dcc5d9aac5a7fb5eefdd1759005979"
103
  )
 
 
 
 
104
 
105
- index = VectorstoreIndexCreator(embedding=embeddings).from_loaders([pdf_loader])
106
-
107
- return index
108
-
109
- # ----------------- بارگذاری دیتا -----------------
110
  index = get_pdf_index()
111
 
112
  llm = ChatOpenAI(
@@ -152,7 +163,6 @@ if st.session_state.pending_prompt:
152
  thinking.empty()
153
  full_response = ""
154
  placeholder = st.empty()
155
-
156
  for word in answer.split():
157
  full_response += word + " "
158
  placeholder.markdown(full_response + "▌")
 
93
  """, unsafe_allow_html=True)
94
 
95
  # ----------------- لود PDF و ساخت ایندکس -----------------
96
+ class TogetherEmbeddings(Embeddings):
97
+ def __init__(self, model_name: str, api_key: str):
98
+ self.model_name = model_name
99
+ self.client = Together(api_key=api_key)
100
+
101
+ def embed_documents(self, texts: List[str]) -> List[List[float]]:
102
+ response = self.client.embeddings.create(model=self.model_name, input=texts)
103
+ return [item.embedding for item in response.data]
104
+
105
+ def embed_query(self, text: str) -> List[float]:
106
+ return self.embed_documents([text])[0]
107
+
108
  @st.cache_resource
109
  def get_pdf_index():
110
  with st.spinner('📄 در حال پردازش فایل PDF...'):
111
+ loader = [PyPDFLoader('test1.pdf')]
 
112
  embeddings = TogetherEmbeddings(
113
+ model_name="togethercomputer/m2-bert-80M-8k-retrieval",
114
  api_key="0291f33aee03412a47fa5d8e562e515182dcc5d9aac5a7fb5eefdd1759005979"
115
  )
116
+ return VectorstoreIndexCreator(
117
+ embedding=embeddings,
118
+ text_splitter=RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=0)
119
+ ).from_loaders(loader)
120
 
 
 
 
 
 
121
  index = get_pdf_index()
122
 
123
  llm = ChatOpenAI(
 
163
  thinking.empty()
164
  full_response = ""
165
  placeholder = st.empty()
 
166
  for word in answer.split():
167
  full_response += word + " "
168
  placeholder.markdown(full_response + "▌")