Spaces:

M17idd
/

army

Running

App Files Files Community

M17idd commited on May 4

Commit

1b8e48e

1 Parent(s): 192d589

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -24

app.py CHANGED Viewed

@@ -539,44 +539,59 @@ def load_and_process_documents(path):
 doc_texts = load_and_process_documents(folder_path)
 def clean_text(text):
     return re.sub(r'[^آ-ی۰-۹0-9،.؟!؛+\-* ]+', '', text)
-def find_closest_filenames(query, filenames, top_n=3):
-    # گرفتن نزدیک‌ترین فایل‌ها بر اساس شباهت
-    scores = [(f, fuzz.partial_ratio(query, f)) for f in filenames]
-    scores.sort(key=lambda x: x[1], reverse=True)
-    return [score[0] for score in scores[:top_n]]  # برگشت دادن N فایل با بیشترین شباهت
-def find_best_answer(query, top_files, doc_texts):
-    best_match = None
-    best_score = 0
     # بررسی محتوای فایل‌ها
-    for filename in top_files:
-        text = doc_texts[filename]
-        similarity = fuzz.partial_ratio(query, text)  # مقایسه سوال با محتوای فایل
-        if similarity > best_score:
-            best_score = similarity
-            best_match = filename
-    return best_match, doc_texts.get(best_match, "")
 # حالا این رو در کد اصلی استفاده می‌کنیم:
 if query:
-    top_files = find_closest_filenames(query, list(doc_texts.keys()), top_n=3)
-    best_file, matched_text = find_best_answer(query, top_files, doc_texts)
-    if best_file:
         prompt = f"""
-        لطفاً با توجه به سؤال زیر و محتوای سند موجود، یک پاسخ نهایی حرفه‌ای، دقیق و روان تولید کن. فقط از متن سند استفاده کن. اگر اطلاعات کافی در متن وجود ندارد، صادقانه اعلام کن.
         سوال:
         {query}
-        محتوای سند:
-        {matched_text}
         پاسخ نهایی:
         """
@@ -589,4 +604,4 @@ if query:
         st.markdown(f'<div class="chat-message">{rewritten}</div>', unsafe_allow_html=True)
     else:
-        st.warning("هیچ سند مرتبطی پیدا نشد.")

 doc_texts = load_and_process_documents(folder_path)
+# تابعی برای استخراج کلمات از متن
+def extract_keywords_from_text(text, query_words):
+    matched_lines = []
+    lines = text.split("\n")
+    # جستجو برای هر کلمه در هر خط
+    for line in lines:
+        if any(query_word in line for query_word in query_words):
+            matched_lines.append(line)
+    return matched_lines
+# تابعی برای پاکسازی متن
 def clean_text(text):
     return re.sub(r'[^آ-ی۰-۹0-9،.؟!؛+\-* ]+', '', text)
+# تابعی برای پیدا کردن نزدیک‌ترین خطوط به سوال
+def find_closest_lines(query, doc_texts, top_n=20, exclude_line=None):
+    # تقسیم سوال به کلمات
+    query_words = query.split()
+    all_matched_lines = []
     # بررسی محتوای فایل‌ها
+    for filename, text in doc_texts.items():
+        matched_lines = extract_keywords_from_text(text, query_words)
+        for line in matched_lines:
+            similarity = fuzz.partial_ratio(query, line)  # محاسبه شباهت خط با سوال
+            all_matched_lines.append((line, similarity))
+    # مرتب سازی بر اساس شباهت
+    all_matched_lines.sort(key=lambda x: x[1], reverse=True)
+    # انتخاب ۲۰ خط نزدیک‌تر
+    closest_lines = [line for line, _ in all_matched_lines[:top_n]]
+    # حذف خط خاص از لیست در صورت وجود
+    if exclude_line and exclude_line in closest_lines:
+        closest_lines.remove(exclude_line)
+    return closest_lines
 # حالا این رو در کد اصلی استفاده می‌کنیم:
 if query:
+    # پیدا کردن ۲۰ خط نزدیک‌تر به سوال (و حذف یک خط خاص)
+    closest_lines = find_closest_lines(query, doc_texts, top_n=20, exclude_line=None)
+    if closest_lines:
         prompt = f"""
+        لطفاً با توجه به سؤال زیر و محتوای خطوط مرتبط، یک پاسخ نهایی حرفه‌ای، دقیق و روان تولید کن. فقط از متن خطوط مرتبط استفاده کن. اگر اطلاعات کافی در متن وجود ندارد، صادقانه اعلام کن.
         سوال:
         {query}
+        خطوط مرتبط:
+        {closest_lines}
         پاسخ نهایی:
         """
         st.markdown(f'<div class="chat-message">{rewritten}</div>', unsafe_allow_html=True)
     else:
+        st.warning("هیچ خط مرتبطی پیدا نشد.")