|
import os |
|
import gradio as gr |
|
import numpy as np |
|
import pickle |
|
from sentence_transformers import SentenceTransformer |
|
from pypdf import PdfReader |
|
import re |
|
import google.generativeai as genai |
|
|
|
|
|
genai.configure(api_key=os.environ["GEMINI_API_KEY"]) |
|
|
|
|
|
st_model = SentenceTransformer('dimitriz/st-greek-media-bert-base-uncased') |
|
|
|
|
|
def cosine_similarity(a, b): |
|
return np.dot(a, b.T) / (np.linalg.norm(a, axis=1)[:, np.newaxis] * np.linalg.norm(b, axis=1)) |
|
|
|
|
|
def replace_pattern(text, pattern=r" ν\. (\d+)", replacement=r" ν.\1"): |
|
return re.sub(pattern, replacement, text) |
|
|
|
|
|
if os.path.exists('embedded_data.pkl'): |
|
with open('embedded_data.pkl', 'rb') as file: |
|
chunks, embedded_data = pickle.load(file) |
|
else: |
|
chunks = [] |
|
for file in os.listdir("archive"): |
|
if file.endswith(".pdf"): |
|
reader = PdfReader(f'archive/{file}') |
|
text = "".join([page.extract_text() for page in reader.pages]) |
|
text = replace_pattern(text, pattern=r" ν\. (\d+)", replacement=r" ν.\1") |
|
chunks += text.split("\n") |
|
embedded_data = st_model.encode(chunks) |
|
with open('embedded_data.pkl', 'wb') as file: |
|
pickle.dump((chunks, embedded_data), file) |
|
|
|
|
|
def rag_response(query): |
|
embedded_query = st_model.encode(query) |
|
similarities = cosine_similarity(embedded_query[np.newaxis, :], embedded_data) |
|
top_indices = np.argsort(similarities[0])[::-1][:20] |
|
top_doct = [chunks[index] for index in top_indices] |
|
|
|
augmented_prompt = f"""Είσαι ένα σύστημα εμπειρογνώμονας και απαντάς ερωτήσεις. |
|
Θα σου δώσω μια ερώτηση και ένα πλαίσιο και θα επιστρέψεις την απάντηση. |
|
Ερώτηση : {query} Πλαίσιο : {top_doct}""" |
|
|
|
try: |
|
response = genai.generate_text( |
|
model="text-bison-001", |
|
prompt=augmented_prompt |
|
) |
|
return response.result |
|
except Exception as e: |
|
return f"Σφάλμα: {str(e)}" |
|
|
|
|
|
interface = gr.Interface( |
|
fn=rag_response, |
|
inputs=gr.Textbox(label="Η ερώτησή σας:", lines=2, placeholder="Πληκτρολογήστε την ερώτησή σας εδώ..."), |
|
outputs=gr.Textbox(label="Απάντηση από το Gemini:"), |
|
title="RAG Gemini Assistant", |
|
description="Χρησιμοποιεί RAG για να παρέχει ακριβείς και τεκμηριωμένες απαντήσεις." |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
interface.launch(share=True) |
|
|