Docfile commited on
Commit
510ec12
·
verified ·
1 Parent(s): 41b8d5c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +100 -141
app.py CHANGED
@@ -1,146 +1,105 @@
1
  import gradio as gr
2
- import os
3
- import logging
4
- from llama_index.llms.gemini import Gemini
5
- import sys
6
- logging.basicConfig(stream=sys.stdout, level=logging.INFO)
7
- logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
8
-
9
- GOOGLE_API_KEY = "AIzaSyDYhyRoOWBJWOb4bqY5wmFLrBo4HTwQDko" # add your GOOGLE API key here
10
- os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY
11
-
12
- from llama_index.core import SimpleDirectoryReader
13
- from g4f import Provider, models
14
- from langchain.llms.base import LLM
15
-
16
- from llama_index.llms.langchain import LangChainLLM
17
- from langchain_g4f import G4FLLM
18
-
19
- from llama_index.core import (
20
- ServiceContext,
21
- SimpleDirectoryReader,
22
- StorageContext,
23
- VectorStoreIndex,
24
- set_global_service_context,
25
- )
26
- #from llama_index.llms import Gemini
27
- from llama_index.embeddings.gemini import GeminiEmbedding
28
- import g4f
29
- g4f.debug.logging = True
30
- from llama_index.core import Settings
31
- from langchain_google_genai import ChatGoogleGenerativeAI
32
-
33
-
34
- llm= LLM = G4FLLM(
35
- model=models.gpt_35_turbo_16k,
36
- )
37
-
38
- llm = LangChainLLM(llm=llm)
39
-
40
-
41
-
42
- safe = [
43
- {
44
- "category": "HARM_CATEGORY_HARASSMENT",
45
- "threshold": "BLOCK_NONE",
46
- },
47
- {
48
- "category": "HARM_CATEGORY_HATE_SPEECH",
49
- "threshold": "BLOCK_NONE",
50
- },
51
- {
52
- "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
53
- "threshold": "BLOCK_NONE",
54
- },
55
- {
56
- "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
57
- "threshold": "BLOCK_NONE",
58
- },
59
  ]
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
 
 
62
 
63
- #llm = Gemini(model="models/gemini-pro", safety_settings=safe)
64
-
65
- model_name = "models/embedding-001"
66
-
67
-
68
- #llm = Gemini()
69
- embed_model = GeminiEmbedding(
70
- model_name=model_name, api_key=GOOGLE_API_KEY, title="this is a document"
71
- )
72
- Settings.embed_model = embed_model
73
- # Reads pdfs at "./" path
74
-
75
-
76
- """
77
- parser = LlamaParse(
78
- api_key="llx-KMCDGpt3Yn89wwOYJXaFDfJLHTbUQbnTKVccaGVHJLfAN96w", # can also be set in your env as LLAMA_CLOUD_API_KEY
79
- result_type="markdown", # "markdown" and "text" are available
80
- verbose=True
81
- )
82
-
83
- file_extractor = {".pdf": parser}
84
- documents = SimpleDirectoryReader("./data", file_extractor=file_extractor).load_data()
85
-
86
- """
87
-
88
-
89
-
90
- documents = (
91
- SimpleDirectoryReader(
92
- input_dir = 'data',
93
- required_exts = [".pdf"])
94
- .load_data()
95
- )
96
-
97
- # ServiceContext is a bundle of commonly used
98
- # resources used during the indexing and
99
- # querying stage
100
- service_context = (
101
- ServiceContext
102
- .from_defaults(
103
- llm=llm,
104
- embed_model=embed_model,
105
- chunk_size=8045
106
- )
107
- )
108
- set_global_service_context(service_context)
109
- print("node passer11")
110
- # Node represents a “chunk” of a source Document
111
- nodes = (
112
- service_context
113
- .node_parser
114
- .get_nodes_from_documents(documents)
115
- )
116
- print("node passer")
117
- # offers core abstractions around storage of Nodes,
118
- # indices, and vectors
119
- storage_context = StorageContext.from_defaults()
120
- storage_context.docstore.add_documents(nodes)
121
- print("node passer")
122
- # Create the vectorstore index
123
- index = (
124
- VectorStoreIndex
125
- .from_documents(
126
- documents,
127
- storage_context=storage_context,
128
- llm=llm
129
- )
130
- )
131
- print("node passer")
132
-
133
- query_engine = index.as_query_engine()
134
- # Query the index
135
-
136
-
137
- def greet(name):
138
- ss = name + ".réponds en citant tes sources et articles"
139
- response = query_engine.query(ss)
140
-
141
- print("question :",name)
142
- print("réponse :", response)
143
- return response
144
-
145
- iface = gr.Interface(fn=greet, inputs=gr.Textbox(label="Question:", lines=4), outputs="text")
146
- iface.launch()
 
1
  import gradio as gr
2
+ from pathlib import Path
3
+ import hashlib
4
+ import google.generativeai as genai
5
+ from PyPDF2 import PdfReader
6
+
7
+ import os
8
+ token=os.environ.get("TOKEN")
9
+ genai.configure(api_key=token)
10
+
11
+ # Set up the model
12
+ generation_config = {
13
+ "max_output_tokens": 8192,
14
+ }
15
+
16
+ safety_settings = [
17
+ {
18
+ "category": "HARM_CATEGORY_HARASSMENT","threshold": "BLOCK_NONE"
19
+ },
20
+ {
21
+ "category": "HARM_CATEGORY_HATE_SPEECH","threshold": "BLOCK_NONE"
22
+ },
23
+ {
24
+ "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"
25
+ },
26
+ {
27
+ "category": "HARM_CATEGORY_DANGEROUS_CONTENT","threshold": "BLOCK_NONE"
28
+ },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  ]
30
 
31
+ system_instruction = "Utillisez les éléments de contexte pour répondre. Si vous ne connaissez pas la réponse n'essayez pas d'inventer une réponse et preciser que vous ne la connaissez pas. Je veux que tu agisses comme un profesionel du droit. Je pose une questiob et tu reponds en te basant sur le contexte. Je ne veux aucune explication. Juste la réponse..réponds en citant tes sources et articles"
32
+
33
+
34
+ model = genai.GenerativeModel(model_name="gemini-1.5-flash-latest",
35
+ generation_config=generation_config,
36
+ system_instruction=system_instruction, safety_settings=safety_settings)
37
+
38
+
39
+ code_civil = "data/CODE_CIVIL_I_-_2023 (1).pdf"
40
+ code_civil_II = "data/CODE_civil_II_-_2.pdf"
41
+ Code_de_procedure_pénale = "data/Code_de_proc._pénale-2023.pdf"
42
+ code_penal = "data/NOUVEAU_CODE_PENAL_2024.pdf"
43
+
44
+ uploaded_files = []
45
+ def upload_if_needed(pathname: str) -> list[str]:
46
+ path = Path(pathname)
47
+ hash_id = hashlib.sha256(path.read_bytes()).hexdigest()
48
+ try:
49
+ existing_file = genai.get_file(name=hash_id)
50
+ return [existing_file.uri]
51
+ except:
52
+ pass
53
+ uploaded_files.append(genai.upload_file(path=path, display_name=hash_id))
54
+ return [uploaded_files[-1].uri]
55
+
56
+
57
+
58
+ def extract_pdf_pages(pathname: str) -> list[str]:
59
+ parts = [f"--- START OF PDF ${pathname} ---"]
60
+ try:
61
+ # Lecture du PDF avec PyPDF2
62
+ reader = PdfReader(pathname)
63
+ for page_num in range(len(reader.pages)):
64
+ page = reader.pages[page_num]
65
+ text = page.extract_text()
66
+ parts.append(f"--- PAGE {page_num} ---")
67
+ parts.append(text)
68
+ except FileNotFoundError:
69
+ print(f"Erreur: Fichier PDF '{pathname}' introuvable.")
70
+ return parts
71
+
72
+ convo = model.start_chat(history=[
73
+ {
74
+ "role": "user",
75
+ "parts": extract_pdf_pages(code_civil)
76
+ },
77
+ {
78
+ "role": "user",
79
+ "parts": extract_pdf_pages(code_civil_II)
80
+ },
81
+ {
82
+ "role": "user",
83
+ "parts": extract_pdf_pages(Code_de_procedure_pénale)
84
+ },
85
+ {
86
+ "role": "user",
87
+ "parts": extract_pdf_pages(code_penal)
88
+ },
89
+ { "role": "model",
90
+ "parts": ["​"]
91
+ },
92
+ ])
93
+
94
+
95
+ def respond(user_input):
96
+ convo.send_message(user_input)
97
+ response = convo.last.text
98
+ print(response)
99
+ return response
100
 
101
+ iface = gr.Interface(fn=respond, inputs="text", outputs="text", title="Fang to French Translator")
102
+ iface.launch()
103
 
104
+ for uploaded_file in uploaded_files:
105
+ genai.delete_file(name=uploaded_file.name)