|
import gradio as gr |
|
from pathlib import Path |
|
import hashlib |
|
import google.generativeai as genai |
|
from PyPDF2 import PdfReader |
|
|
|
import os |
|
token=os.environ.get("TOKEN") |
|
genai.configure(api_key=token) |
|
|
|
|
|
generation_config = { |
|
"max_output_tokens": 8192, |
|
} |
|
|
|
safety_settings = [ |
|
{ |
|
"category": "HARM_CATEGORY_HARASSMENT","threshold": "BLOCK_NONE" |
|
}, |
|
{ |
|
"category": "HARM_CATEGORY_HATE_SPEECH","threshold": "BLOCK_NONE" |
|
}, |
|
{ |
|
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE" |
|
}, |
|
{ |
|
"category": "HARM_CATEGORY_DANGEROUS_CONTENT","threshold": "BLOCK_NONE" |
|
}, |
|
] |
|
|
|
system_instruction = "Utillisez les éléments de contexte pour répondre. Si vous ne connaissez pas la réponse n'essayez pas d'inventer une réponse et preciser que vous ne la connaissez pas. Je veux que tu agisses comme un profesionel du droit. Je pose une questiob et tu reponds en te basant sur le contexte. Je ne veux aucune explication. Juste la réponse..réponds en citant tes sources et articles" |
|
|
|
|
|
model = genai.GenerativeModel(model_name="gemini-1.5-flash-latest", |
|
generation_config=generation_config, |
|
system_instruction=system_instruction, safety_settings=safety_settings) |
|
|
|
|
|
uploaded_files = [] |
|
def upload_if_needed(pathname: str) -> list[str]: |
|
path = Path(pathname) |
|
hash_id = hashlib.sha256(path.read_bytes()).hexdigest() |
|
try: |
|
existing_file = genai.get_file(name=hash_id) |
|
return [existing_file.uri] |
|
except: |
|
pass |
|
uploaded_files.append(genai.upload_file(path=path, display_name=hash_id)) |
|
return [uploaded_files[-1].uri] |
|
|
|
|
|
|
|
def extract_pdf_pages(pathname: str) -> list[str]: |
|
parts = [f"--- START OF PDF ${pathname} ---"] |
|
try: |
|
|
|
reader = PdfReader(pathname) |
|
for page_num in range(len(reader.pages)): |
|
page = reader.pages[page_num] |
|
text = page.extract_text() |
|
parts.append(f"--- PAGE {page_num} ---") |
|
parts.append(text) |
|
except FileNotFoundError: |
|
print(f"Erreur: Fichier PDF '{pathname}' introuvable.") |
|
return parts |
|
|
|
|
|
def load_pdfs_from_data(): |
|
data_dir = "data" |
|
pdf_files = [f for f in os.listdir(data_dir) if f.endswith('.pdf')] |
|
convo = model.start_chat() |
|
for pdf_file in pdf_files: |
|
pdf_path = os.path.join(data_dir, pdf_file) |
|
convo.send_message({"role": "user", "parts": extract_pdf_pages(pdf_path)}) |
|
return convo |
|
|
|
convo = load_pdfs_from_data() |
|
|
|
def respond(user_input): |
|
convo.send_message(user_input) |
|
response = convo.last.text |
|
print(response) |
|
return response |
|
|
|
iface = gr.Interface(fn=respond, inputs="text", outputs="text", title="Fang to French Translator") |
|
iface.launch() |
|
|
|
for uploaded_file in uploaded_files: |
|
genai.delete_file(name=uploaded_file.name) |