File size: 2,907 Bytes
ea114c9
510ec12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a25995c
 
510ec12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0438389
 
 
 
 
 
 
 
 
 
 
510ec12
 
 
 
 
 
a25995c
510ec12
 
a25995c
510ec12
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import gradio as gr
from pathlib import Path
import hashlib
import google.generativeai as genai
from PyPDF2 import PdfReader  

import os 
token=os.environ.get("TOKEN") 
genai.configure(api_key=token)

# Set up the model
generation_config = {
  "max_output_tokens": 8192,
}

safety_settings = [
  {
    "category": "HARM_CATEGORY_HARASSMENT","threshold": "BLOCK_NONE"
  },
  {
    "category": "HARM_CATEGORY_HATE_SPEECH","threshold": "BLOCK_NONE"
  },
  {
    "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"
  },
  {
    "category": "HARM_CATEGORY_DANGEROUS_CONTENT","threshold": "BLOCK_NONE"
  },
]

system_instruction = "Utillisez les éléments de contexte pour répondre. Si vous ne connaissez pas la réponse n'essayez pas d'inventer une réponse et preciser que vous ne la connaissez pas.  Je veux que tu agisses comme un profesionel du droit. Je pose une questiob  et tu reponds en te basant sur le contexte. Je ne veux aucune explication. Juste la réponse..réponds en citant tes sources et articles"


model = genai.GenerativeModel(model_name="gemini-1.5-flash-latest",
                              generation_config=generation_config,
                              system_instruction=system_instruction,                           safety_settings=safety_settings)


uploaded_files = []
def upload_if_needed(pathname: str) -> list[str]:
  path = Path(pathname)
  hash_id = hashlib.sha256(path.read_bytes()).hexdigest()
  try:
    existing_file = genai.get_file(name=hash_id)
    return [existing_file.uri]
  except:
    pass
  uploaded_files.append(genai.upload_file(path=path, display_name=hash_id))
  return [uploaded_files[-1].uri]



def extract_pdf_pages(pathname: str) -> list[str]:
    parts = [f"--- START OF PDF ${pathname} ---"]
    try:
        # Lecture du PDF avec PyPDF2
        reader = PdfReader(pathname)
        for page_num in range(len(reader.pages)):
            page = reader.pages[page_num]
            text = page.extract_text()
            parts.append(f"--- PAGE {page_num} ---")
            parts.append(text)
    except FileNotFoundError:
        print(f"Erreur: Fichier PDF '{pathname}' introuvable.")
    return parts

# Fonction pour charger tous les PDFs du dossier data
def load_pdfs_from_data():
  data_dir = "data"
  pdf_files = [f for f in os.listdir(data_dir) if f.endswith('.pdf')]
  convo = model.start_chat()
  for pdf_file in pdf_files:
    pdf_path = os.path.join(data_dir, pdf_file)
    convo.send_message({"role": "user", "parts": extract_pdf_pages(pdf_path)})
  return convo

convo = load_pdfs_from_data()

def respond(user_input):
  convo.send_message(user_input)
  response = convo.last.text
  print(response)
  return response

iface = gr.Interface(fn=respond, inputs="text", outputs="text", title="Fang to French Translator")
iface.launch()

for uploaded_file in uploaded_files:
  genai.delete_file(name=uploaded_file.name)