File size: 7,705 Bytes
6dfbc1e
02a2d80
 
d3311ac
8a8d9dd
 
 
 
0189c6f
8dfc9e7
597f25d
 
 
26608f4
 
cdbe11c
6dfbc1e
6631b7b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8a8d9dd
eac86d2
26608f4
 
 
2a0ad25
8a8d9dd
8a2dda5
8a8d9dd
 
548b3b7
8a8d9dd
 
 
 
 
 
 
 
 
 
 
 
 
 
0c4ad43
8a2dda5
5597954
26608f4
8a2dda5
e5ae991
8a2dda5
548b3b7
e5ae991
 
 
 
 
 
 
5597954
af2b6fa
 
 
8a2dda5
8a8d9dd
2a0ad25
8a8d9dd
eac86d2
26608f4
6e6b1e1
 
ffdd46f
 
 
 
597f25d
f408c43
597f25d
d71c024
8e71560
e7019aa
f408c43
 
e7019aa
0c4ad43
26608f4
2a0ad25
e7019aa
 
d71c024
e7019aa
 
 
 
 
597f25d
 
9153fe9
 
03d1abd
597f25d
03d1abd
 
 
 
 
 
597f25d
8a8d9dd
02a2d80
 
 
 
 
 
6dfbc1e
80d2c6b
 
 
02a2d80
6dfbc1e
8a8d9dd
02a2d80
 
 
 
 
 
597f25d
02a2d80
 
26608f4
548b3b7
26608f4
6dfbc1e
8a8d9dd
 
21dbe18
 
02a2d80
26608f4
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
import os
import time
import streamlit as st
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document as LangchainDocument
from langchain.chains import RetrievalQA
import torch
from langchain_core.retrievers import BaseRetriever
from langchain_core.documents import Document
from typing import List
from pydantic import Field
from sentence_transformers import SentenceTransformer
import numpy as np

# ----------------- تنظیمات صفحه -----------------
st.set_page_config(page_title="چت‌ بات توانا", page_icon="🪖", layout="wide")

st.markdown("""
    <style>
    @import url('https://fonts.googleapis.com/css2?family=Vazirmatn:wght@400;700&display=swap');
    html, body, [class*="css"] {
        font-family: 'Vazirmatn', Tahoma, sans-serif;
        direction: rtl;
        text-align: right;
    }
    .stApp {
        background: url("./military_bg.jpeg") no-repeat center center fixed;
        background-size: cover;
        backdrop-filter: blur(2px);
    }
    .stChatMessage {
        background-color: rgba(255,255,255,0.8);
        border: 1px solid #4e8a3e;
        border-radius: 12px;
        padding: 16px;
        margin-bottom: 15px;
        box-shadow: 0 4px 10px rgba(0,0,0,0.2);
        animation: fadeIn 0.4s ease-in-out;
    }
    .stTextInput > div > input, .stTextArea textarea {
        background-color: rgba(255,255,255,0.9) !important;
        border-radius: 8px !important;
        direction: rtl;
        text-align: right;
        font-family: 'Vazirmatn', Tahoma;
    }
    .stButton>button {
        background-color: #4e8a3e !important;
        color: white !important;
        font-weight: bold;
        border-radius: 10px;
        padding: 8px 20px;
        transition: 0.3s;
    }
    .stButton>button:hover {
        background-color: #3c6d30 !important;
    }
    .header-text {
        text-align: center;
        margin-top: 20px;
        margin-bottom: 40px;
        background-color: rgba(255, 255, 255, 0.75);
        padding: 20px;
        border-radius: 20px;
        box-shadow: 0 4px 12px rgba(0,0,0,0.2);
    }
    .header-text h1 {
        font-size: 42px;
        color: #2c3e50;
        margin: 0;
        font-weight: bold;
    }
    .subtitle {
        font-size: 18px;
        color: #34495e;
        margin-top: 8px;
    }
    @keyframes fadeIn {
        from { opacity: 0; transform: translateY(10px); }
        to { opacity: 1; transform: translateY(0); }
    }
    </style>
""", unsafe_allow_html=True)

col1, col2, col3 = st.columns([1, 1, 1])
with col2:
    st.image("army.png", width=240)

st.markdown("""
    <div class="header-text">
        <h1>چت‌ بات توانا</h1>
        <div class="subtitle">دستیار هوشمند برای تصمیم‌گیری در میدان نبرد</div>
    </div>
""", unsafe_allow_html=True)

# ----------------- بارگذاری مدل FarsiBERT -----------------
# model_name = "HooshvareLab/bert-fa-zwnj-base"
# tokenizer = AutoTokenizer.from_pretrained(model_name)
# model = AutoModel.from_pretrained(model_name)

# ----------------- لود PDF و ساخت ایندکس -----------------

@st.cache_resource
def build_pdf_index():
    with st.spinner('📄 در حال پردازش فایل ...'):
        loader = PyPDFLoader("test1.pdf")
        pages = loader.load()

        splitter = RecursiveCharacterTextSplitter(
            chunk_size=500,
            chunk_overlap=50
        )

        texts = []
        for page in pages:
            texts.extend(splitter.split_text(page.page_content))

        documents = [LangchainDocument(page_content=t) for t in texts]

        sentence_model = SentenceTransformer("aubmindlab/bert-base-arabert")

        progress_bar = st.progress(0)
        total_docs = len(documents)

        texts_to_encode = [doc.page_content for doc in documents]

        batch_size = 128  
        embeddings = []
        for i in range(0, total_docs, batch_size):
            batch_texts = texts_to_encode[i:i+batch_size]
            batch_embeddings = sentence_model.encode(batch_texts, convert_to_numpy=True)
            embeddings.extend(batch_embeddings)

            progress_bar.progress(min((i + batch_size) / total_docs, 1.0))

        
        time.sleep(5)
        progress_bar.empty()
        embeddings = np.array(embeddings)

        return documents, embeddings

# ----------------- تعریف LLM از Groq -----------------
# groq_api_key = "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"

# به جای OpenAI اینو بذار:
llm = ChatOpenAI(
    base_url="https://api.together.xyz/v1",
    api_key='0291f33aee03412a47fa5d8e562e515182dcc5d9aac5a7fb5eefdd1759005979',
    model="meta-llama/Llama-3.3-70B-Instruct-Turbo-Free"
)

# ----------------- تعریف SimpleRetriever -----------------
class SimpleRetriever(BaseRetriever):
    documents: List[Document] = Field(...)
    embeddings: List[np.ndarray] = Field(...)

    def _get_relevant_documents(self, query: str) -> List[Document]:
        # استفاده از sentence_model برای تبدیل query به بردار
        sentence_model = SentenceTransformer("aubmindlab/bert-base-arabert")
        query_embedding = sentence_model.encode(query, convert_to_numpy=True)

        # محاسبه شباهت‌های برداری برای تمام اسناد
        similarities = np.dot(self.embeddings, query_embedding)

        # ترتیب‌دهی اسناد بر اساس شباهت‌ها
        ranked_docs = np.argsort(similarities)[::-1]

        # برگشتن به ۵ سند برتر
        return [self.documents[i] for i in ranked_docs[:5]]

# ----------------- ساخت Index -----------------
documents, embeddings = build_pdf_index()
retriever = SimpleRetriever(documents=documents, embeddings=embeddings)

# ----------------- ساخت Chain -----------------
chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    chain_type="stuff",
    input_key="question"
)

# ----------------- استیت برای چت -----------------
if 'messages' not in st.session_state:
    st.session_state.messages = []

if 'pending_prompt' not in st.session_state:
    st.session_state.pending_prompt = None

# ----------------- نمایش پیام‌های قبلی -----------------
for msg in st.session_state.messages:
    with st.chat_message(msg['role']):
        st.markdown(f"🗨️ {msg['content']}", unsafe_allow_html=True)

# ----------------- ورودی چت -----------------
prompt = st.chat_input("سوالی در مورد فایل بپرس...")

if prompt:
    st.session_state.messages.append({'role': 'user', 'content': prompt})
    st.session_state.pending_prompt = prompt
    st.rerun()

# ----------------- پاسخ مدل -----------------
if st.session_state.pending_prompt:
    with st.chat_message('ai'):
        thinking = st.empty()
        thinking.markdown("🤖 در حال فکر کردن  ...")

        try:
            response = chain.run(f"سوال: {st.session_state.pending_prompt}")
            answer = response.strip()
        except Exception as e:
            answer = f"خطا در پاسخ‌دهی: {str(e)}"

        thinking.empty()

        full_response = ""
        placeholder = st.empty()
        for word in answer.split():
            full_response += word + " "
            placeholder.markdown(full_response + "▌")
            time.sleep(0.03)

        placeholder.markdown(full_response)
        st.session_state.messages.append({'role': 'ai', 'content': full_response})
        st.session_state.pending_prompt = None