Spaces:

M17idd
/

army

Sleeping

App Files Files Community

M17idd commited on May 4

Commit

34701b1

verified ·

1 Parent(s): e54fee7

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -35

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import streamlit as st
 from hazm import Normalizer, SentenceTokenizer
 import os
@@ -517,10 +518,7 @@ import concurrent.futures
 from hazm import Normalizer
 from rapidfuzz import fuzz
 from langchain.schema import SystemMessage, HumanMessage
-from collections import Counter
-import heapq
-# مسیر پوشه اسناد
 folder_path = '46'
 normalizer = Normalizer()
@@ -546,34 +544,27 @@ def load_and_process_documents(path):
     return doc_texts
-# پردازش فایل‌ها
 doc_texts = load_and_process_documents(folder_path)
-# خواندن استاپ وردها
 with open("stopwords.txt", "r", encoding="utf-8") as f:
     stop_words = set(line.strip() for line in f if line.strip())
-# حذف استاپ‌وردها از متن
 def remove_stop_words(text, stop_words):
     words = text.split()
     return " ".join([word for word in words if word not in stop_words])
-# حذف عبارات ایست
-def remove_stop_phrases(text, stop_words):
-    for phrase in stop_words:
-        text = text.replace(phrase, "")
-    return text
-# استخراج خطوط حاوی کلمات کوئری
 def extract_keywords_from_text(text, query_words):
     matched_lines = []
     lines = text.split("\n")
     for line in lines:
         if any(query_word in line for query_word in query_words):
             matched_lines.append(line)
     return matched_lines
-# خلاصه‌سازی بر اساس فراوانی واژگان
 def summarize_text_by_frequency(text, num_sentences=1):
     sentences = text.split('\n')
     word_freq = Counter()
@@ -592,43 +583,48 @@ def summarize_text_by_frequency(text, num_sentences=1):
     summarized_sentences = heapq.nlargest(num_sentences, sentence_scores, key=sentence_scores.get)
     return "\n".join(summarized_sentences)
-# پیدا کردن خطوط مشابه
 def find_closest_lines(query, doc_texts, stop_words, top_n=15):
     cleaned_query = remove_stop_words(query, stop_words)
     query_words = cleaned_query.split()
     all_matched_lines = []
     for filename, text in doc_texts.items():
         matched_lines = extract_keywords_from_text(text, query_words)
         for line in matched_lines:
-            similarity = fuzz.partial_ratio(query, line)
             all_matched_lines.append((line, similarity))
     all_matched_lines.sort(key=lambda x: x[1], reverse=True)
     closest_lines = [line for line, _ in all_matched_lines[:top_n]]
     return closest_lines
-# رابط کاربری Streamlit
-st.title("پاسخ‌دهی به سوالات بر اساس اسناد بارگذاری‌شده")
-query = st.text_input("سوال خود را وارد کنید:")
 if query:
     closest_lines = find_closest_lines(query, doc_texts, stop_words, top_n=15)
-    # حذف استاپ‌وردها از خطوط
     cleaned_closest_lines = [
-        remove_stop_phrases(line, stop_words)
-        for line in closest_lines
     ]
-    # خلاصه‌سازی
     summarized_text = summarize_text_by_frequency("\n".join(cleaned_closest_lines), num_sentences=1)
-    # نمایش خلاصه
     st.markdown(summarized_text)
     if summarized_text:
         prompt = f"""
         لطفاً با توجه به سؤال زیر و محتوای خطوط مرتبط، یک پاسخ نهایی حرفه‌ای، دقیق و روان تولید کن.
@@ -640,16 +636,18 @@ if query:
         {summarized_text}
         پاسخ نهایی:
         """
         response = llm([
-            SystemMessage(content="تو رزم یار ارتش هستی و از کتاب و دیتای موجود به سوالات پاسخ میدی."),
             HumanMessage(content=prompt)
         ])
         rewritten = response.content.strip()
         # نمایش نتیجه
         st.markdown(f'<div class="chat-message">{rewritten}</div>', unsafe_allow_html=True)
     else:
-        st.warning("هیچ خط مرتبطی پیدا نشد.")

 import streamlit as st
 from hazm import Normalizer, SentenceTokenizer
 import os
 from hazm import Normalizer
 from rapidfuzz import fuzz
 from langchain.schema import SystemMessage, HumanMessage
 folder_path = '46'
 normalizer = Normalizer()
     return doc_texts
 doc_texts = load_and_process_documents(folder_path)
 with open("stopwords.txt", "r", encoding="utf-8") as f:
     stop_words = set(line.strip() for line in f if line.strip())
 def remove_stop_words(text, stop_words):
     words = text.split()
     return " ".join([word for word in words if word not in stop_words])
 def extract_keywords_from_text(text, query_words):
     matched_lines = []
     lines = text.split("\n")
     for line in lines:
         if any(query_word in line for query_word in query_words):
             matched_lines.append(line)
     return matched_lines
+from collections import Counter
+import heapq
 def summarize_text_by_frequency(text, num_sentences=1):
     sentences = text.split('\n')
     word_freq = Counter()
     summarized_sentences = heapq.nlargest(num_sentences, sentence_scores, key=sentence_scores.get)
     return "\n".join(summarized_sentences)
 def find_closest_lines(query, doc_texts, stop_words, top_n=15):
     cleaned_query = remove_stop_words(query, stop_words)
     query_words = cleaned_query.split()
     all_matched_lines = []
     for filename, text in doc_texts.items():
         matched_lines = extract_keywords_from_text(text, query_words)
         for line in matched_lines:
+            similarity = fuzz.partial_ratio(query, line)  # محاسبه شباهت خط با سوال
             all_matched_lines.append((line, similarity))
     all_matched_lines.sort(key=lambda x: x[1], reverse=True)
     closest_lines = [line for line, _ in all_matched_lines[:top_n]]
     return closest_lines
+def remove_stop_phrases(text, stop_words):
+    for phrase in stop_words:
+        text = text.replace(phrase, "")
+    return text
 if query:
     closest_lines = find_closest_lines(query, doc_texts, stop_words, top_n=15)
+    # حذف استپ‌وردها از خطوط و سپس پاکسازی نهایی متن
     cleaned_closest_lines = [
+    remove_stop_phrases(line, stop_words)
+    for line in closest_lines
     ]
     summarized_text = summarize_text_by_frequency("\n".join(cleaned_closest_lines), num_sentences=1)
+    summarized_cleaned = remove_stop_phrases(summarized_text, stop_words)
     st.markdown(summarized_text)
     if summarized_text:
         prompt = f"""
         لطفاً با توجه به سؤال زیر و محتوای خطوط مرتبط، یک پاسخ نهایی حرفه‌ای، دقیق و روان تولید کن.
         {summarized_text}
         پاسخ نهایی:
         """
+        # ارسال پیام به مدل به صورت صحیح
         response = llm([
+            SystemMessage(content="تو رزم یار ارتش هستی و  از کتاب و دیتای موجود به سوالات پاسخ میدی."),
             HumanMessage(content=prompt)
         ])
+        # فرض بر این است که خروجی مدل به صورت دیکشنری است
         rewritten = response.content.strip()
         # نمایش نتیجه
         st.markdown(f'<div class="chat-message">{rewritten}</div>', unsafe_allow_html=True)
     else:
+        st.warning("هیچ خط مرتبطی پیدا نشد.")