import streamlit as st from hazm import Normalizer, SentenceTokenizer import os import docx from langchain.chat_models import ChatOpenAI from langchain.schema import SystemMessage, HumanMessage from rapidfuzz import fuzz import concurrent.futures import time # from sentence_transformers import SentenceTransformer import numpy as np from hazm import * import re import nltk nltk.download('punkt') st.markdown(""" """, unsafe_allow_html=True) st.markdown(""" """, unsafe_allow_html=True) # ---------- احراز هویت ---------- if "authenticated" not in st.session_state: st.session_state.authenticated = False if not st.session_state.authenticated: st.markdown('', unsafe_allow_html=True) st.markdown(""" """, unsafe_allow_html=True) st.markdown(""" """, unsafe_allow_html=True) username = st.text_input("نام کاربری:", placeholder="شناسه خود را وارد کنید", label_visibility="visible") password = st.text_input("رمز عبور:", placeholder="رمز عبور ", type="password", label_visibility="visible") st.markdown(""" """, unsafe_allow_html=True) if st.button("ورود"): if username == "admin" and password == "123": st.session_state.authenticated = True st.rerun() else: st.markdown("""

نام کاربری یا رمز عبور اشتباه است.

""", unsafe_allow_html=True) st.stop() # ---------- سایدبار ---------- with st.sidebar: st.image("log.png", use_container_width=True) menu_items = [ ("گزارش عملیاتی", "https://cdn-icons-png.flaticon.com/512/3596/3596165.png", "https://m17idd-reporting.hf.space"), ("تاریخچه ماموریت‌ها", "https://cdn-icons-png.flaticon.com/512/709/709496.png", None), ("تحلیل داده‌های نظامی", "https://cdn-icons-png.flaticon.com/512/1828/1828932.png", "https://m17idd-test.hf.space"), ("مدیریت منابع", "https://cdn-icons-png.flaticon.com/512/681/681494.png", None), ("دستیار فرماندهی", "https://cdn-icons-png.flaticon.com/512/3601/3601646.png", None), ("تنظیمات امنیتی", "https://cdn-icons-png.flaticon.com/512/2099/2099058.png", None), ("پشتیبانی فنی", "https://cdn-icons-png.flaticon.com/512/597/597177.png", None), ] st.markdown(""" """, unsafe_allow_html=True) for idx, (text, icon, link) in enumerate(menu_items): content = f""" """ if link: content = f'{content}' st.markdown(content, unsafe_allow_html=True) if idx in [1, 3, 5]: st.markdown("

", unsafe_allow_html=True) st.markdown("""

رزم‌‌یار‌ارتش

دستیارهوشمندارتش جمهوری اسلامی ایران

""", unsafe_allow_html=True) # ---------- مدل زبانی ---------- llm = ChatOpenAI( base_url="https://api.together.xyz/v1", api_key='0291f33aee03412a47fa5d8e562e515182dcc5d9aac5a7fb5eefdd1759005979', model="deepseek-ai/DeepSeek-R1-Distill-Llama-70B-free", ) # from transformers import pipeline # hf_api_key = os.getenv("tavana55") # model_name = "Qwen/Qwen3-0.6B" # llm = pipeline("text-generation", model=model_name) # ---------- ورودی جستجو ---------- st.markdown(""" """, unsafe_allow_html=True) st.markdown(""" """, unsafe_allow_html=True) st.markdown(""" """, unsafe_allow_html=True) st.markdown(""" """, unsafe_allow_html=True) st.markdown(""" """, unsafe_allow_html=True) query = st.chat_input("چطور می‌تونم کمک کنم؟") if query: st.markdown(f'

{query}

', unsafe_allow_html=True) think = st.markdown("""

در حال فکر کردن...

""", unsafe_allow_html=True) else: st.markdown("") # استایل‌ها برای چرخش و پیام در حال فکر کردن st.markdown(""" """, unsafe_allow_html=True) import os import re import docx import streamlit as st import concurrent.futures from hazm import Normalizer from rapidfuzz import fuzz from langchain.schema import SystemMessage, HumanMessage from collections import Counter import heapq # مسیر پوشه اسناد folder_path = '46' normalizer = Normalizer() @st.cache_data(show_spinner="در حال پردازش اسناد... لطفاً صبور باشید.") def load_and_process_documents(path): def process_docx(filename): try: full_path = os.path.join(path, filename) doc = docx.Document(full_path) text = "\n".join([para.text for para in doc.paragraphs]) normalized = normalizer.normalize(text) return filename, normalized except Exception as e: print(f"Error processing {filename}: {e}") return filename, "" filenames = [f for f in os.listdir(path) if f.endswith(".docx")] doc_texts = {} with concurrent.futures.ThreadPoolExecutor() as executor: for filename, content in executor.map(process_docx, filenames): doc_texts[filename] = content return doc_texts # پردازش فایل‌ها doc_texts = load_and_process_documents(folder_path) # خواندن استاپ وردها with open("stopwords.txt", "r", encoding="utf-8") as f: stop_words = set(line.strip() for line in f if line.strip()) # حذف استاپ‌وردها از متن def remove_stop_words(text, stop_words): words = text.split() return " ".join([word for word in words if word not in stop_words]) # حذف عبارات ایست def remove_stop_phrases(text, stop_words): for phrase in stop_words: text = text.replace(phrase, "") return text # استخراج خطوط حاوی کلمات کوئری def extract_keywords_from_text(text, query_words): matched_lines = [] lines = text.split("\n") for line in lines: if any(query_word in line for query_word in query_words): matched_lines.append(line) return matched_lines # خلاصه‌سازی بر اساس فراوانی واژگان def summarize_text_by_frequency(text, num_sentences=1): sentences = text.split('\n') word_freq = Counter() for sentence in sentences: for word in sentence.split(): if word not in stop_words: word_freq[word] += 1 sentence_scores = {} for sentence in sentences: for word in sentence.split(): if word in word_freq: sentence_scores[sentence] = sentence_scores.get(sentence, 0) + word_freq[word] summarized_sentences = heapq.nlargest(num_sentences, sentence_scores, key=sentence_scores.get) return "\n".join(summarized_sentences) # پیدا کردن خطوط مشابه def find_closest_lines(query, doc_texts, stop_words, top_n=15): cleaned_query = remove_stop_words(query, stop_words) query_words = cleaned_query.split() all_matched_lines = [] for filename, text in doc_texts.items(): matched_lines = extract_keywords_from_text(text, query_words) for line in matched_lines: similarity = fuzz.partial_ratio(query, line) all_matched_lines.append((line, similarity)) all_matched_lines.sort(key=lambda x: x[1], reverse=True) closest_lines = [line for line, _ in all_matched_lines[:top_n]] return closest_lines # رابط کاربری Streamlit st.title("پاسخ‌دهی به سوالات بر اساس اسناد بارگذاری‌شده") query = st.text_input("سوال خود را وارد کنید:") if query: closest_lines = find_closest_lines(query, doc_texts, stop_words, top_n=15) # حذف استاپ‌وردها از خطوط cleaned_closest_lines = [ remove_stop_phrases(line, stop_words) for line in closest_lines ] # خلاصه‌سازی summarized_text = summarize_text_by_frequency("\n".join(cleaned_closest_lines), num_sentences=1) # نمایش خلاصه st.markdown(summarized_text) if summarized_text: prompt = f""" لطفاً با توجه به سؤال زیر و محتوای خطوط مرتبط، یک پاسخ نهایی حرفه‌ای، دقیق و روان تولید کن. فقط از متن خطوط مرتبط استفاده کن و خلاصه بنویس. اطلاعات اضافی ننویس و فقط به سوال پاسخ بده. در صورتی که اطلاعات کافی در متن وجود ندارد، صادقانه اعلام کن که اطلاعات کافی برای پاسخ‌دهی موجود نیست. سوال: {query} خطوط مرتبط: {summarized_text} پاسخ نهایی: """ response = llm([ SystemMessage(content="تو رزم یار ارتش هستی و از کتاب و دیتای موجود به سوالات پاسخ میدی."), HumanMessage(content=prompt) ]) rewritten = response.content.strip() # نمایش نتیجه st.markdown(f'

{rewritten}

', unsafe_allow_html=True) else: st.warning("هیچ خط مرتبطی پیدا نشد.")