File size: 6,999 Bytes
02a2d80 80d2c6b cdbe11c c28d5e0 1650168 80d2c6b c28d5e0 02a2d80 abfcbe7 c28d5e0 abfcbe7 c28d5e0 1650168 abfcbe7 c28d5e0 abfcbe7 02a2d80 abfcbe7 c28d5e0 abfcbe7 21bf972 abfcbe7 c28d5e0 abfcbe7 c28d5e0 abfcbe7 c28d5e0 abfcbe7 c28d5e0 abfcbe7 408d87c abfcbe7 c28d5e0 abfcbe7 c28d5e0 02a2d80 c28d5e0 1650168 abfcbe7 408d87c abfcbe7 1650168 abfcbe7 408d87c 3962050 1650168 02a2d80 1650168 02a2d80 1650168 d2055dc 1650168 40b0e5e 1650168 d2055dc 40b0e5e d2055dc 40b0e5e d2055dc 40b0e5e d2055dc 1650168 d2055dc 40b0e5e 1650168 d2055dc 40b0e5e d2055dc 40b0e5e d2055dc 40b0e5e 02a2d80 80d2c6b 38908cd f9ee1b6 02a2d80 80d2c6b 02a2d80 80d2c6b 02a2d80 80d2c6b 02a2d80 80d2c6b 02a2d80 1650168 80d2c6b 02a2d80 80d2c6b 02a2d80 80d2c6b 02a2d80 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 |
import time
import streamlit as st
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.base import Embeddings
from langchain.vectorstores import FAISS
from langchain.indexes import VectorstoreIndexCreator
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from typing import List
from together import Together
st.set_page_config(page_title="چت بات ارتش", page_icon="🪖", layout="wide")
st.markdown("""
<style>
@import url('https://fonts.googleapis.com/css2?family=Vazirmatn:wght@400;700&display=swap');
html, body, [class*="css"] {
font-family: 'Vazirmatn', Tahoma, sans-serif;
direction: rtl;
text-align: right;
}
.stApp {
background: url("./military_bg.jpeg") no-repeat center center fixed;
background-size: cover;
backdrop-filter: blur(2px);
}
.stChatMessage {
background-color: rgba(255,255,255,0.8);
border: 1px solid #4e8a3e;
border-radius: 12px;
padding: 16px;
margin-bottom: 15px;
box-shadow: 0 4px 10px rgba(0,0,0,0.2);
animation: fadeIn 0.4s ease-in-out;
}
.stTextInput > div > input, .stTextArea textarea {
background-color: rgba(255,255,255,0.9) !important;
border-radius: 8px !important;
direction: rtl;
text-align: right;
font-family: 'Vazirmatn', Tahoma;
}
.stButton>button {
background-color: #4e8a3e !important;
color: white !important;
font-weight: bold;
border-radius: 10px;
padding: 8px 20px;
transition: 0.3s;
}
.stButton>button:hover {
background-color: #3c6d30 !important;
}
.header-text {
text-align: center;
margin-top: 20px;
margin-bottom: 40px;
background-color: rgba(255, 255, 255, 0.75);
padding: 20px;
border-radius: 20px;
box-shadow: 0 4px 12px rgba(0,0,0,0.2);
}
.header-text h1 {
font-size: 42px;
color: #2c3e50;
margin: 0;
font-weight: bold;
}
.subtitle {
font-size: 18px;
color: #34495e;
margin-top: 8px;
}
@keyframes fadeIn {
from { opacity: 0; transform: translateY(10px); }
to { opacity: 1; transform: translateY(0); }
}
</style>
""", unsafe_allow_html=True)
col1, col2, col3 = st.columns([1, 1, 1])
with col2:
st.image("army.png", width=240)
st.markdown("""
<div class="header-text">
<h1>چت بات ارتش</h1>
<div class="subtitle">دستیار هوشمند برای تصمیمگیری در میدان نبرد</div>
</div>
""", unsafe_allow_html=True)
class TogetherEmbeddings(Embeddings):
def __init__(self, model_name: str, api_key: str):
self.model_name = model_name
self.client = Together(api_key=api_key)
def embed_documents(self, texts: List[str]) -> List[List[float]]:
response = self.client.embeddings.create(model=self.model_name, input=texts)
return [item.embedding for item in response.data]
def embed_query(self, text: str) -> List[float]:
return self.embed_documents([text])[0]
@st.cache_resource
def get_pdf_index():
with st.spinner('📄 در حال پردازش فایل PDF...'):
# لود فایل
loader = [PyPDFLoader('test1.pdf')]
pages = []
for l in loader:
pages.extend(l.load())
# اول چانک کردن عادی با سایز 124
splitter_initial = RecursiveCharacterTextSplitter(
chunk_size=124,
chunk_overlap=25
)
small_chunks = []
for page in pages:
text = page.page_content
if len(text) > 124:
small_chunks.extend(splitter_initial.split_text(text))
else:
small_chunks.append(text)
# حالا چک کنیم هیچ چانکی بیشتر از 3000 کاراکتر نباشه
final_chunks = []
final_splitter = RecursiveCharacterTextSplitter(
chunk_size=2000,
chunk_overlap=200
)
for chunk in small_chunks:
if len(chunk) > 2000:
final_chunks.extend(final_splitter.split_text(chunk))
else:
final_chunks.append(chunk)
# آماده سازی اسناد برای index
from langchain.schema import Document as LangchainDocument
documents = [LangchainDocument(page_content=text) for text in final_chunks]
# ساختن embedding
embeddings = TogetherEmbeddings(
model_name="togethercomputer/m2-bert-80M-32k-retrieval",
api_key="0291f33aee03412a47fa5d8e562e515182dcc5d9aac5a7fb5eefdd1759005979"
)
# ساختن ایندکس
return VectorstoreIndexCreator(
embedding=embeddings,
text_splitter=RecursiveCharacterTextSplitter(
chunk_size=2000,
chunk_overlap=200
)
).from_documents(documents)
index = get_pdf_index()
llm = ChatOpenAI(
base_url="https://api.together.xyz/v1",
api_key='0291f33aee03412a47fa5d8e562e515182dcc5d9aac5a7fb5eefdd1759005979',
model="meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8"
)
chain = RetrievalQA.from_chain_type(
llm=llm,
chain_type='stuff',
retriever=index.vectorstore.as_retriever(),
input_key='question'
)
if 'messages' not in st.session_state:
st.session_state.messages = []
if 'pending_prompt' not in st.session_state:
st.session_state.pending_prompt = None
for msg in st.session_state.messages:
with st.chat_message(msg['role']):
st.markdown(f"🗨️ {msg['content']}", unsafe_allow_html=True)
prompt = st.chat_input("چطور میتونم کمک کنم؟")
if prompt:
st.session_state.messages.append({'role': 'user', 'content': prompt})
st.session_state.pending_prompt = prompt
st.rerun()
if st.session_state.pending_prompt:
with st.chat_message('ai'):
thinking = st.empty()
thinking.markdown("🤖 در حال فکر کردن...")
response = chain.run(f'question:پاسخ را فقط به زبان فارسی جواب بده {st.session_state.pending_prompt}')
answer = response.split("Helpful Answer:")[-1].strip()
if not answer:
answer = "متأسفم، اطلاعات دقیقی در این مورد ندارم."
thinking.empty()
full_response = ""
placeholder = st.empty()
for word in answer.split():
full_response += word + " "
placeholder.markdown(full_response + "▌")
time.sleep(0.03)
placeholder.markdown(full_response)
st.session_state.messages.append({'role': 'ai', 'content': full_response})
st.session_state.pending_prompt = None
|