Update app.py
Browse files
app.py
CHANGED
@@ -93,20 +93,31 @@ st.markdown("""
|
|
93 |
""", unsafe_allow_html=True)
|
94 |
|
95 |
# ----------------- لود PDF و ساخت ایندکس -----------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
@st.cache_resource
|
97 |
def get_pdf_index():
|
98 |
with st.spinner('📄 در حال پردازش فایل PDF...'):
|
99 |
-
|
100 |
-
|
101 |
embeddings = TogetherEmbeddings(
|
|
|
102 |
api_key="0291f33aee03412a47fa5d8e562e515182dcc5d9aac5a7fb5eefdd1759005979"
|
103 |
)
|
|
|
|
|
|
|
|
|
104 |
|
105 |
-
index = VectorstoreIndexCreator(embedding=embeddings).from_loaders([pdf_loader])
|
106 |
-
|
107 |
-
return index
|
108 |
-
|
109 |
-
# ----------------- بارگذاری دیتا -----------------
|
110 |
index = get_pdf_index()
|
111 |
|
112 |
llm = ChatOpenAI(
|
@@ -152,7 +163,6 @@ if st.session_state.pending_prompt:
|
|
152 |
thinking.empty()
|
153 |
full_response = ""
|
154 |
placeholder = st.empty()
|
155 |
-
|
156 |
for word in answer.split():
|
157 |
full_response += word + " "
|
158 |
placeholder.markdown(full_response + "▌")
|
|
|
93 |
""", unsafe_allow_html=True)
|
94 |
|
95 |
# ----------------- لود PDF و ساخت ایندکس -----------------
|
96 |
+
class TogetherEmbeddings(Embeddings):
|
97 |
+
def __init__(self, model_name: str, api_key: str):
|
98 |
+
self.model_name = model_name
|
99 |
+
self.client = Together(api_key=api_key)
|
100 |
+
|
101 |
+
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
102 |
+
response = self.client.embeddings.create(model=self.model_name, input=texts)
|
103 |
+
return [item.embedding for item in response.data]
|
104 |
+
|
105 |
+
def embed_query(self, text: str) -> List[float]:
|
106 |
+
return self.embed_documents([text])[0]
|
107 |
+
|
108 |
@st.cache_resource
|
109 |
def get_pdf_index():
|
110 |
with st.spinner('📄 در حال پردازش فایل PDF...'):
|
111 |
+
loader = [PyPDFLoader('test1.pdf')]
|
|
|
112 |
embeddings = TogetherEmbeddings(
|
113 |
+
model_name="togethercomputer/m2-bert-80M-8k-retrieval",
|
114 |
api_key="0291f33aee03412a47fa5d8e562e515182dcc5d9aac5a7fb5eefdd1759005979"
|
115 |
)
|
116 |
+
return VectorstoreIndexCreator(
|
117 |
+
embedding=embeddings,
|
118 |
+
text_splitter=RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=0)
|
119 |
+
).from_loaders(loader)
|
120 |
|
|
|
|
|
|
|
|
|
|
|
121 |
index = get_pdf_index()
|
122 |
|
123 |
llm = ChatOpenAI(
|
|
|
163 |
thinking.empty()
|
164 |
full_response = ""
|
165 |
placeholder = st.empty()
|
|
|
166 |
for word in answer.split():
|
167 |
full_response += word + " "
|
168 |
placeholder.markdown(full_response + "▌")
|