Update app.py
Browse files
app.py
CHANGED
@@ -1,146 +1,105 @@
|
|
1 |
import gradio as gr
|
2 |
-
import
|
3 |
-
import
|
4 |
-
|
5 |
-
import
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
g4f.debug.logging = True
|
30 |
-
from llama_index.core import Settings
|
31 |
-
from langchain_google_genai import ChatGoogleGenerativeAI
|
32 |
-
|
33 |
-
|
34 |
-
llm= LLM = G4FLLM(
|
35 |
-
model=models.gpt_35_turbo_16k,
|
36 |
-
)
|
37 |
-
|
38 |
-
llm = LangChainLLM(llm=llm)
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
safe = [
|
43 |
-
{
|
44 |
-
"category": "HARM_CATEGORY_HARASSMENT",
|
45 |
-
"threshold": "BLOCK_NONE",
|
46 |
-
},
|
47 |
-
{
|
48 |
-
"category": "HARM_CATEGORY_HATE_SPEECH",
|
49 |
-
"threshold": "BLOCK_NONE",
|
50 |
-
},
|
51 |
-
{
|
52 |
-
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
|
53 |
-
"threshold": "BLOCK_NONE",
|
54 |
-
},
|
55 |
-
{
|
56 |
-
"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
|
57 |
-
"threshold": "BLOCK_NONE",
|
58 |
-
},
|
59 |
]
|
60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
|
|
|
|
|
62 |
|
63 |
-
|
64 |
-
|
65 |
-
model_name = "models/embedding-001"
|
66 |
-
|
67 |
-
|
68 |
-
#llm = Gemini()
|
69 |
-
embed_model = GeminiEmbedding(
|
70 |
-
model_name=model_name, api_key=GOOGLE_API_KEY, title="this is a document"
|
71 |
-
)
|
72 |
-
Settings.embed_model = embed_model
|
73 |
-
# Reads pdfs at "./" path
|
74 |
-
|
75 |
-
|
76 |
-
"""
|
77 |
-
parser = LlamaParse(
|
78 |
-
api_key="llx-KMCDGpt3Yn89wwOYJXaFDfJLHTbUQbnTKVccaGVHJLfAN96w", # can also be set in your env as LLAMA_CLOUD_API_KEY
|
79 |
-
result_type="markdown", # "markdown" and "text" are available
|
80 |
-
verbose=True
|
81 |
-
)
|
82 |
-
|
83 |
-
file_extractor = {".pdf": parser}
|
84 |
-
documents = SimpleDirectoryReader("./data", file_extractor=file_extractor).load_data()
|
85 |
-
|
86 |
-
"""
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
documents = (
|
91 |
-
SimpleDirectoryReader(
|
92 |
-
input_dir = 'data',
|
93 |
-
required_exts = [".pdf"])
|
94 |
-
.load_data()
|
95 |
-
)
|
96 |
-
|
97 |
-
# ServiceContext is a bundle of commonly used
|
98 |
-
# resources used during the indexing and
|
99 |
-
# querying stage
|
100 |
-
service_context = (
|
101 |
-
ServiceContext
|
102 |
-
.from_defaults(
|
103 |
-
llm=llm,
|
104 |
-
embed_model=embed_model,
|
105 |
-
chunk_size=8045
|
106 |
-
)
|
107 |
-
)
|
108 |
-
set_global_service_context(service_context)
|
109 |
-
print("node passer11")
|
110 |
-
# Node represents a “chunk” of a source Document
|
111 |
-
nodes = (
|
112 |
-
service_context
|
113 |
-
.node_parser
|
114 |
-
.get_nodes_from_documents(documents)
|
115 |
-
)
|
116 |
-
print("node passer")
|
117 |
-
# offers core abstractions around storage of Nodes,
|
118 |
-
# indices, and vectors
|
119 |
-
storage_context = StorageContext.from_defaults()
|
120 |
-
storage_context.docstore.add_documents(nodes)
|
121 |
-
print("node passer")
|
122 |
-
# Create the vectorstore index
|
123 |
-
index = (
|
124 |
-
VectorStoreIndex
|
125 |
-
.from_documents(
|
126 |
-
documents,
|
127 |
-
storage_context=storage_context,
|
128 |
-
llm=llm
|
129 |
-
)
|
130 |
-
)
|
131 |
-
print("node passer")
|
132 |
-
|
133 |
-
query_engine = index.as_query_engine()
|
134 |
-
# Query the index
|
135 |
-
|
136 |
-
|
137 |
-
def greet(name):
|
138 |
-
ss = name + ".réponds en citant tes sources et articles"
|
139 |
-
response = query_engine.query(ss)
|
140 |
-
|
141 |
-
print("question :",name)
|
142 |
-
print("réponse :", response)
|
143 |
-
return response
|
144 |
-
|
145 |
-
iface = gr.Interface(fn=greet, inputs=gr.Textbox(label="Question:", lines=4), outputs="text")
|
146 |
-
iface.launch()
|
|
|
1 |
import gradio as gr
|
2 |
+
from pathlib import Path
|
3 |
+
import hashlib
|
4 |
+
import google.generativeai as genai
|
5 |
+
from PyPDF2 import PdfReader
|
6 |
+
|
7 |
+
import os
|
8 |
+
token=os.environ.get("TOKEN")
|
9 |
+
genai.configure(api_key=token)
|
10 |
+
|
11 |
+
# Set up the model
|
12 |
+
generation_config = {
|
13 |
+
"max_output_tokens": 8192,
|
14 |
+
}
|
15 |
+
|
16 |
+
safety_settings = [
|
17 |
+
{
|
18 |
+
"category": "HARM_CATEGORY_HARASSMENT","threshold": "BLOCK_NONE"
|
19 |
+
},
|
20 |
+
{
|
21 |
+
"category": "HARM_CATEGORY_HATE_SPEECH","threshold": "BLOCK_NONE"
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"category": "HARM_CATEGORY_DANGEROUS_CONTENT","threshold": "BLOCK_NONE"
|
28 |
+
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
]
|
30 |
|
31 |
+
system_instruction = "Utillisez les éléments de contexte pour répondre. Si vous ne connaissez pas la réponse n'essayez pas d'inventer une réponse et preciser que vous ne la connaissez pas. Je veux que tu agisses comme un profesionel du droit. Je pose une questiob et tu reponds en te basant sur le contexte. Je ne veux aucune explication. Juste la réponse..réponds en citant tes sources et articles"
|
32 |
+
|
33 |
+
|
34 |
+
model = genai.GenerativeModel(model_name="gemini-1.5-flash-latest",
|
35 |
+
generation_config=generation_config,
|
36 |
+
system_instruction=system_instruction, safety_settings=safety_settings)
|
37 |
+
|
38 |
+
|
39 |
+
code_civil = "data/CODE_CIVIL_I_-_2023 (1).pdf"
|
40 |
+
code_civil_II = "data/CODE_civil_II_-_2.pdf"
|
41 |
+
Code_de_procedure_pénale = "data/Code_de_proc._pénale-2023.pdf"
|
42 |
+
code_penal = "data/NOUVEAU_CODE_PENAL_2024.pdf"
|
43 |
+
|
44 |
+
uploaded_files = []
|
45 |
+
def upload_if_needed(pathname: str) -> list[str]:
|
46 |
+
path = Path(pathname)
|
47 |
+
hash_id = hashlib.sha256(path.read_bytes()).hexdigest()
|
48 |
+
try:
|
49 |
+
existing_file = genai.get_file(name=hash_id)
|
50 |
+
return [existing_file.uri]
|
51 |
+
except:
|
52 |
+
pass
|
53 |
+
uploaded_files.append(genai.upload_file(path=path, display_name=hash_id))
|
54 |
+
return [uploaded_files[-1].uri]
|
55 |
+
|
56 |
+
|
57 |
+
|
58 |
+
def extract_pdf_pages(pathname: str) -> list[str]:
|
59 |
+
parts = [f"--- START OF PDF ${pathname} ---"]
|
60 |
+
try:
|
61 |
+
# Lecture du PDF avec PyPDF2
|
62 |
+
reader = PdfReader(pathname)
|
63 |
+
for page_num in range(len(reader.pages)):
|
64 |
+
page = reader.pages[page_num]
|
65 |
+
text = page.extract_text()
|
66 |
+
parts.append(f"--- PAGE {page_num} ---")
|
67 |
+
parts.append(text)
|
68 |
+
except FileNotFoundError:
|
69 |
+
print(f"Erreur: Fichier PDF '{pathname}' introuvable.")
|
70 |
+
return parts
|
71 |
+
|
72 |
+
convo = model.start_chat(history=[
|
73 |
+
{
|
74 |
+
"role": "user",
|
75 |
+
"parts": extract_pdf_pages(code_civil)
|
76 |
+
},
|
77 |
+
{
|
78 |
+
"role": "user",
|
79 |
+
"parts": extract_pdf_pages(code_civil_II)
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"role": "user",
|
83 |
+
"parts": extract_pdf_pages(Code_de_procedure_pénale)
|
84 |
+
},
|
85 |
+
{
|
86 |
+
"role": "user",
|
87 |
+
"parts": extract_pdf_pages(code_penal)
|
88 |
+
},
|
89 |
+
{ "role": "model",
|
90 |
+
"parts": [""]
|
91 |
+
},
|
92 |
+
])
|
93 |
+
|
94 |
+
|
95 |
+
def respond(user_input):
|
96 |
+
convo.send_message(user_input)
|
97 |
+
response = convo.last.text
|
98 |
+
print(response)
|
99 |
+
return response
|
100 |
|
101 |
+
iface = gr.Interface(fn=respond, inputs="text", outputs="text", title="Fang to French Translator")
|
102 |
+
iface.launch()
|
103 |
|
104 |
+
for uploaded_file in uploaded_files:
|
105 |
+
genai.delete_file(name=uploaded_file.name)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|