Spaces:

M17idd
/

army

Sleeping

App Files Files Community

M17idd commited on May 4

Commit

e513ab7

1 Parent(s): 80e301b

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -21

app.py CHANGED Viewed

@@ -540,57 +540,43 @@ def load_and_process_documents(path):
 doc_texts = load_and_process_documents(folder_path)
-# لیست کلمات توقف
-stop_words = [
-    "است", "و", "با", "که", "در", "از", "برای", "به", "بر", "تا", "این", "آن", "یک", "کدام", "کجا", "هم", "همه",
-    "یا", "از", "بر", "همچنین", "می", "باید", "شود", "شد", "گفت", "گویا", "داشت", "داشتن", "کنند", "کنیم",
-    "کرد", "کردن", "نیز", "یا", "اگر", "ای", "اینکه", "نه", "باشید", "باشم", "باشی", "در حالی که", "مگر", "چرا"
-]
-# تابعی برای پاکسازی کلمات اضافی از سوال
 def remove_stop_words(text, stop_words):
     words = text.split()
     return " ".join([word for word in words if word not in stop_words])
-# تابعی برای استخراج کلمات از متن
 def extract_keywords_from_text(text, query_words):
     matched_lines = []
     lines = text.split("\n")
-    # جستجو برای هر کلمه در هر خط
     for line in lines:
         if any(query_word in line for query_word in query_words):
             matched_lines.append(line)
     return matched_lines
-# تابعی برای پاکسازی متن
 def clean_text(text):
     return re.sub(r'[^آ-ی۰-۹0-9،.؟!؛+\-* ]+', '', text)
-# تابعی برای پیدا کردن نزدیک‌ترین خطوط به سوال
 def find_closest_lines(query, doc_texts, stop_words, top_n=10):
-    # حذف کلمات اضافی از سوال
     cleaned_query = remove_stop_words(query, stop_words)
     query_words = cleaned_query.split()
     all_matched_lines = []
-    # بررسی محتوای فایل‌ها
     for filename, text in doc_texts.items():
         matched_lines = extract_keywords_from_text(text, query_words)
         for line in matched_lines:
             similarity = fuzz.partial_ratio(query, line)  # محاسبه شباهت خط با سوال
             all_matched_lines.append((line, similarity))
-    # مرتب سازی بر اساس شباهت
     all_matched_lines.sort(key=lambda x: x[1], reverse=True)
-    # انتخاب ۱۰ خط نزدیک‌تر
     closest_lines = [line for line, _ in all_matched_lines[:top_n]]
     return closest_lines
-# تابعی برای حذف کلمات توقف از یک لیست از خطوط
 def remove_stop_words_from_lines(lines, stop_words):
     cleaned_lines = []
     for line in lines:
@@ -599,12 +585,9 @@ def remove_stop_words_from_lines(lines, stop_words):
         cleaned_lines.append(" ".join(cleaned_words))
     return cleaned_lines
-# حالا این رو در کد اصلی استفاده می‌کنیم:
 if query:
-    # پیدا کردن ۱۰ خط نزدیک‌تر به سوال
-    closest_lines = find_closest_lines(query, doc_texts, stop_words, top_n=3)
-    # حذف کلمات توقف از خطوط نزدیک
     cleaned_closest_lines = remove_stop_words_from_lines(closest_lines, stop_words)
     if cleaned_closest_lines:

 doc_texts = load_and_process_documents(folder_path)
+with open('C:/Users/ici/Downloads/Telegram Desktop/45/stopwords.txt', 'r', encoding='utf-8') as file:
+    stop_words = set(file.read().splitlines())
 def remove_stop_words(text, stop_words):
     words = text.split()
     return " ".join([word for word in words if word not in stop_words])
 def extract_keywords_from_text(text, query_words):
     matched_lines = []
     lines = text.split("\n")
     for line in lines:
         if any(query_word in line for query_word in query_words):
             matched_lines.append(line)
     return matched_lines
 def clean_text(text):
     return re.sub(r'[^آ-ی۰-۹0-9،.؟!؛+\-* ]+', '', text)
 def find_closest_lines(query, doc_texts, stop_words, top_n=10):
     cleaned_query = remove_stop_words(query, stop_words)
     query_words = cleaned_query.split()
     all_matched_lines = []
     for filename, text in doc_texts.items():
         matched_lines = extract_keywords_from_text(text, query_words)
         for line in matched_lines:
             similarity = fuzz.partial_ratio(query, line)  # محاسبه شباهت خط با سوال
             all_matched_lines.append((line, similarity))
     all_matched_lines.sort(key=lambda x: x[1], reverse=True)
     closest_lines = [line for line, _ in all_matched_lines[:top_n]]
     return closest_lines
 def remove_stop_words_from_lines(lines, stop_words):
     cleaned_lines = []
     for line in lines:
         cleaned_lines.append(" ".join(cleaned_words))
     return cleaned_lines
 if query:
+    closest_lines = find_closest_lines(query, doc_texts, stop_words, top_n=10)
     cleaned_closest_lines = remove_stop_words_from_lines(closest_lines, stop_words)
     if cleaned_closest_lines: