Update app.py
Browse files
app.py
CHANGED
@@ -5,30 +5,27 @@ import docx
|
|
5 |
from langchain.chat_models import ChatOpenAI
|
6 |
from langchain.schema import SystemMessage, HumanMessage
|
7 |
from rapidfuzz import fuzz
|
|
|
8 |
|
9 |
# ---------- استایل ----------
|
10 |
st.markdown("""
|
11 |
<style>
|
12 |
@import url('https://fonts.googleapis.com/css2?family=Vazirmatn:wght@400;700&display=swap');
|
13 |
-
|
14 |
html, body, [class*="css"] {
|
15 |
font-family: 'Vazirmatn', Tahoma, sans-serif;
|
16 |
direction: rtl;
|
17 |
text-align: right;
|
18 |
}
|
19 |
-
|
20 |
.stApp {
|
21 |
background: linear-gradient(to left, #4b5e40, #2e3b2e);
|
22 |
color: #ffffff;
|
23 |
}
|
24 |
-
|
25 |
[data-testid="stSidebar"] {
|
26 |
width: 260px !important;
|
27 |
background-color: #1a2b1e;
|
28 |
border: none !important;
|
29 |
padding-top: 20px;
|
30 |
}
|
31 |
-
|
32 |
.menu-item {
|
33 |
display: flex;
|
34 |
align-items: center;
|
@@ -39,17 +36,14 @@ st.markdown("""
|
|
39 |
cursor: pointer;
|
40 |
transition: background-color 0.3s ease;
|
41 |
}
|
42 |
-
|
43 |
.menu-item:hover {
|
44 |
background-color: #2e3b2e;
|
45 |
color: #b8860b;
|
46 |
}
|
47 |
-
|
48 |
.menu-item img {
|
49 |
width: 24px;
|
50 |
height: 24px;
|
51 |
}
|
52 |
-
|
53 |
.stButton>button {
|
54 |
background-color: #b8860b !important;
|
55 |
color: #1a2b1e !important;
|
@@ -63,13 +57,11 @@ st.markdown("""
|
|
63 |
width: 100%;
|
64 |
margin: 10px 0;
|
65 |
}
|
66 |
-
|
67 |
.stButton>button:hover {
|
68 |
background-color: #8b6508 !important;
|
69 |
transform: translateY(-2px);
|
70 |
box-shadow: 0 4px 8px rgba(0,0,0,0.3);
|
71 |
}
|
72 |
-
|
73 |
.header-text {
|
74 |
text-align: center;
|
75 |
margin: 20px 0;
|
@@ -78,20 +70,17 @@ st.markdown("""
|
|
78 |
border-radius: 15px;
|
79 |
box-shadow: 0 6px 12px rgba(0,0,0,0.4);
|
80 |
}
|
81 |
-
|
82 |
.header-text h1 {
|
83 |
font-size: 42px;
|
84 |
color: #b8860b;
|
85 |
margin: 0;
|
86 |
font-weight: 700;
|
87 |
}
|
88 |
-
|
89 |
.subtitle {
|
90 |
font-size: 18px;
|
91 |
color: #d4d4d4;
|
92 |
margin-top: 10px;
|
93 |
}
|
94 |
-
|
95 |
.chat-message {
|
96 |
background-color: rgba(26, 43, 30, 0.95);
|
97 |
border: 2px solid #b8860b;
|
@@ -106,12 +95,10 @@ st.markdown("""
|
|
106 |
align-items: center;
|
107 |
gap: 15px;
|
108 |
}
|
109 |
-
|
110 |
@keyframes fadeIn {
|
111 |
from { opacity: 0; transform: translateY(10px); }
|
112 |
to { opacity: 1; transform: translateY(0); }
|
113 |
}
|
114 |
-
|
115 |
.stTextInput>div>input, .stTextArea textarea {
|
116 |
background-color: rgba(26, 43, 30, 0.95) !important;
|
117 |
border-radius: 10px !important;
|
@@ -121,12 +108,10 @@ st.markdown("""
|
|
121 |
font-size: 16px;
|
122 |
color: #d4d4d4 !important;
|
123 |
}
|
124 |
-
|
125 |
hr {
|
126 |
border: 1px solid #b8860b;
|
127 |
margin: 15px 0;
|
128 |
}
|
129 |
-
|
130 |
[data-testid="stSidebar"] > div {
|
131 |
border: none !important;
|
132 |
}
|
@@ -186,33 +171,41 @@ llm = ChatOpenAI(
|
|
186 |
model="meta-llama/Llama-3.3-70B-Instruct-Turbo-Free"
|
187 |
)
|
188 |
|
189 |
-
# ---------- پردازش فایلها ----------
|
190 |
-
folder_path = '
|
191 |
-
texts = []
|
192 |
-
|
193 |
-
for filename in os.listdir(folder_path):
|
194 |
-
if filename.endswith(".docx"):
|
195 |
-
full_path = os.path.join(folder_path, filename)
|
196 |
-
doc = docx.Document(full_path)
|
197 |
-
file_text = "\n".join([para.text for para in doc.paragraphs])
|
198 |
-
if file_text.strip():
|
199 |
-
texts.append(file_text)
|
200 |
-
|
201 |
normalizer = Normalizer()
|
202 |
sentence_tokenizer = SentenceTokenizer()
|
203 |
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
209 |
|
210 |
# ---------- ورودی جستجو ----------
|
211 |
query = st.text_input("🔎 کلمه یا عبارت موردنظر خود را وارد کنید:")
|
212 |
|
213 |
if query:
|
214 |
found = False
|
215 |
-
threshold =
|
216 |
|
217 |
for idx, sentence in enumerate(all_sentences):
|
218 |
similarity = fuzz.partial_ratio(query, sentence)
|
@@ -240,4 +233,4 @@ if query:
|
|
240 |
HumanMessage(content=prompt)
|
241 |
])
|
242 |
rewritten = response.content.strip()
|
243 |
-
st.markdown(f'<div class="chat-message">{rewritten}</div>', unsafe_allow_html=True)
|
|
|
5 |
from langchain.chat_models import ChatOpenAI
|
6 |
from langchain.schema import SystemMessage, HumanMessage
|
7 |
from rapidfuzz import fuzz
|
8 |
+
import concurrent.futures
|
9 |
|
10 |
# ---------- استایل ----------
|
11 |
st.markdown("""
|
12 |
<style>
|
13 |
@import url('https://fonts.googleapis.com/css2?family=Vazirmatn:wght@400;700&display=swap');
|
|
|
14 |
html, body, [class*="css"] {
|
15 |
font-family: 'Vazirmatn', Tahoma, sans-serif;
|
16 |
direction: rtl;
|
17 |
text-align: right;
|
18 |
}
|
|
|
19 |
.stApp {
|
20 |
background: linear-gradient(to left, #4b5e40, #2e3b2e);
|
21 |
color: #ffffff;
|
22 |
}
|
|
|
23 |
[data-testid="stSidebar"] {
|
24 |
width: 260px !important;
|
25 |
background-color: #1a2b1e;
|
26 |
border: none !important;
|
27 |
padding-top: 20px;
|
28 |
}
|
|
|
29 |
.menu-item {
|
30 |
display: flex;
|
31 |
align-items: center;
|
|
|
36 |
cursor: pointer;
|
37 |
transition: background-color 0.3s ease;
|
38 |
}
|
|
|
39 |
.menu-item:hover {
|
40 |
background-color: #2e3b2e;
|
41 |
color: #b8860b;
|
42 |
}
|
|
|
43 |
.menu-item img {
|
44 |
width: 24px;
|
45 |
height: 24px;
|
46 |
}
|
|
|
47 |
.stButton>button {
|
48 |
background-color: #b8860b !important;
|
49 |
color: #1a2b1e !important;
|
|
|
57 |
width: 100%;
|
58 |
margin: 10px 0;
|
59 |
}
|
|
|
60 |
.stButton>button:hover {
|
61 |
background-color: #8b6508 !important;
|
62 |
transform: translateY(-2px);
|
63 |
box-shadow: 0 4px 8px rgba(0,0,0,0.3);
|
64 |
}
|
|
|
65 |
.header-text {
|
66 |
text-align: center;
|
67 |
margin: 20px 0;
|
|
|
70 |
border-radius: 15px;
|
71 |
box-shadow: 0 6px 12px rgba(0,0,0,0.4);
|
72 |
}
|
|
|
73 |
.header-text h1 {
|
74 |
font-size: 42px;
|
75 |
color: #b8860b;
|
76 |
margin: 0;
|
77 |
font-weight: 700;
|
78 |
}
|
|
|
79 |
.subtitle {
|
80 |
font-size: 18px;
|
81 |
color: #d4d4d4;
|
82 |
margin-top: 10px;
|
83 |
}
|
|
|
84 |
.chat-message {
|
85 |
background-color: rgba(26, 43, 30, 0.95);
|
86 |
border: 2px solid #b8860b;
|
|
|
95 |
align-items: center;
|
96 |
gap: 15px;
|
97 |
}
|
|
|
98 |
@keyframes fadeIn {
|
99 |
from { opacity: 0; transform: translateY(10px); }
|
100 |
to { opacity: 1; transform: translateY(0); }
|
101 |
}
|
|
|
102 |
.stTextInput>div>input, .stTextArea textarea {
|
103 |
background-color: rgba(26, 43, 30, 0.95) !important;
|
104 |
border-radius: 10px !important;
|
|
|
108 |
font-size: 16px;
|
109 |
color: #d4d4d4 !important;
|
110 |
}
|
|
|
111 |
hr {
|
112 |
border: 1px solid #b8860b;
|
113 |
margin: 15px 0;
|
114 |
}
|
|
|
115 |
[data-testid="stSidebar"] > div {
|
116 |
border: none !important;
|
117 |
}
|
|
|
171 |
model="meta-llama/Llama-3.3-70B-Instruct-Turbo-Free"
|
172 |
)
|
173 |
|
174 |
+
# ---------- پردازش فایلها با کش و موازی ----------
|
175 |
+
folder_path = '46'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
normalizer = Normalizer()
|
177 |
sentence_tokenizer = SentenceTokenizer()
|
178 |
|
179 |
+
@st.cache_data(show_spinner="در حال پردازش اسناد... لطفاً صبور باشید.")
|
180 |
+
def load_and_process_documents(path):
|
181 |
+
def process_docx(filename):
|
182 |
+
try:
|
183 |
+
full_path = os.path.join(path, filename)
|
184 |
+
doc = docx.Document(full_path)
|
185 |
+
file_text = "\n".join([para.text for para in doc.paragraphs])
|
186 |
+
if file_text.strip():
|
187 |
+
normalized = normalizer.normalize(file_text)
|
188 |
+
return sentence_tokenizer.tokenize(normalized)
|
189 |
+
except:
|
190 |
+
return []
|
191 |
+
return []
|
192 |
+
|
193 |
+
all_sentences = []
|
194 |
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
195 |
+
results = executor.map(process_docx, [f for f in os.listdir(path) if f.endswith(".docx")])
|
196 |
+
for sentences in results:
|
197 |
+
if sentences:
|
198 |
+
all_sentences.extend(sentences)
|
199 |
+
return all_sentences
|
200 |
+
|
201 |
+
all_sentences = load_and_process_documents(folder_path)
|
202 |
|
203 |
# ---------- ورودی جستجو ----------
|
204 |
query = st.text_input("🔎 کلمه یا عبارت موردنظر خود را وارد کنید:")
|
205 |
|
206 |
if query:
|
207 |
found = False
|
208 |
+
threshold = 70
|
209 |
|
210 |
for idx, sentence in enumerate(all_sentences):
|
211 |
similarity = fuzz.partial_ratio(query, sentence)
|
|
|
233 |
HumanMessage(content=prompt)
|
234 |
])
|
235 |
rewritten = response.content.strip()
|
236 |
+
st.markdown(f'<div class="chat-message">{rewritten}</div>', unsafe_allow_html=True)
|