legalrci / app.py
Docfile's picture
Update app.py
0438389 verified
import gradio as gr
from pathlib import Path
import hashlib
import google.generativeai as genai
from PyPDF2 import PdfReader
import os
token=os.environ.get("TOKEN")
genai.configure(api_key=token)
# Set up the model
generation_config = {
"max_output_tokens": 8192,
}
safety_settings = [
{
"category": "HARM_CATEGORY_HARASSMENT","threshold": "BLOCK_NONE"
},
{
"category": "HARM_CATEGORY_HATE_SPEECH","threshold": "BLOCK_NONE"
},
{
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"
},
{
"category": "HARM_CATEGORY_DANGEROUS_CONTENT","threshold": "BLOCK_NONE"
},
]
system_instruction = "Utillisez les éléments de contexte pour répondre. Si vous ne connaissez pas la réponse n'essayez pas d'inventer une réponse et preciser que vous ne la connaissez pas. Je veux que tu agisses comme un profesionel du droit. Je pose une questiob et tu reponds en te basant sur le contexte. Je ne veux aucune explication. Juste la réponse..réponds en citant tes sources et articles"
model = genai.GenerativeModel(model_name="gemini-1.5-flash-latest",
generation_config=generation_config,
system_instruction=system_instruction, safety_settings=safety_settings)
uploaded_files = []
def upload_if_needed(pathname: str) -> list[str]:
path = Path(pathname)
hash_id = hashlib.sha256(path.read_bytes()).hexdigest()
try:
existing_file = genai.get_file(name=hash_id)
return [existing_file.uri]
except:
pass
uploaded_files.append(genai.upload_file(path=path, display_name=hash_id))
return [uploaded_files[-1].uri]
def extract_pdf_pages(pathname: str) -> list[str]:
parts = [f"--- START OF PDF ${pathname} ---"]
try:
# Lecture du PDF avec PyPDF2
reader = PdfReader(pathname)
for page_num in range(len(reader.pages)):
page = reader.pages[page_num]
text = page.extract_text()
parts.append(f"--- PAGE {page_num} ---")
parts.append(text)
except FileNotFoundError:
print(f"Erreur: Fichier PDF '{pathname}' introuvable.")
return parts
# Fonction pour charger tous les PDFs du dossier data
def load_pdfs_from_data():
data_dir = "data"
pdf_files = [f for f in os.listdir(data_dir) if f.endswith('.pdf')]
convo = model.start_chat()
for pdf_file in pdf_files:
pdf_path = os.path.join(data_dir, pdf_file)
convo.send_message({"role": "user", "parts": extract_pdf_pages(pdf_path)})
return convo
convo = load_pdfs_from_data()
def respond(user_input):
convo.send_message(user_input)
response = convo.last.text
print(response)
return response
iface = gr.Interface(fn=respond, inputs="text", outputs="text", title="Fang to French Translator")
iface.launch()
for uploaded_file in uploaded_files:
genai.delete_file(name=uploaded_file.name)