legalrci / app.py
Docfile's picture
Update app.py
41b8d5c verified
raw
history blame
3.49 kB
import gradio as gr
import os
import logging
from llama_index.llms.gemini import Gemini
import sys
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
GOOGLE_API_KEY = "AIzaSyDYhyRoOWBJWOb4bqY5wmFLrBo4HTwQDko" # add your GOOGLE API key here
os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY
from llama_index.core import SimpleDirectoryReader
from g4f import Provider, models
from langchain.llms.base import LLM
from llama_index.llms.langchain import LangChainLLM
from langchain_g4f import G4FLLM
from llama_index.core import (
ServiceContext,
SimpleDirectoryReader,
StorageContext,
VectorStoreIndex,
set_global_service_context,
)
#from llama_index.llms import Gemini
from llama_index.embeddings.gemini import GeminiEmbedding
import g4f
g4f.debug.logging = True
from llama_index.core import Settings
from langchain_google_genai import ChatGoogleGenerativeAI
llm= LLM = G4FLLM(
model=models.gpt_35_turbo_16k,
)
llm = LangChainLLM(llm=llm)
safe = [
{
"category": "HARM_CATEGORY_HARASSMENT",
"threshold": "BLOCK_NONE",
},
{
"category": "HARM_CATEGORY_HATE_SPEECH",
"threshold": "BLOCK_NONE",
},
{
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
"threshold": "BLOCK_NONE",
},
{
"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
"threshold": "BLOCK_NONE",
},
]
#llm = Gemini(model="models/gemini-pro", safety_settings=safe)
model_name = "models/embedding-001"
#llm = Gemini()
embed_model = GeminiEmbedding(
model_name=model_name, api_key=GOOGLE_API_KEY, title="this is a document"
)
Settings.embed_model = embed_model
# Reads pdfs at "./" path
"""
parser = LlamaParse(
api_key="llx-KMCDGpt3Yn89wwOYJXaFDfJLHTbUQbnTKVccaGVHJLfAN96w", # can also be set in your env as LLAMA_CLOUD_API_KEY
result_type="markdown", # "markdown" and "text" are available
verbose=True
)
file_extractor = {".pdf": parser}
documents = SimpleDirectoryReader("./data", file_extractor=file_extractor).load_data()
"""
documents = (
SimpleDirectoryReader(
input_dir = 'data',
required_exts = [".pdf"])
.load_data()
)
# ServiceContext is a bundle of commonly used
# resources used during the indexing and
# querying stage
service_context = (
ServiceContext
.from_defaults(
llm=llm,
embed_model=embed_model,
chunk_size=8045
)
)
set_global_service_context(service_context)
print("node passer11")
# Node represents a “chunk” of a source Document
nodes = (
service_context
.node_parser
.get_nodes_from_documents(documents)
)
print("node passer")
# offers core abstractions around storage of Nodes,
# indices, and vectors
storage_context = StorageContext.from_defaults()
storage_context.docstore.add_documents(nodes)
print("node passer")
# Create the vectorstore index
index = (
VectorStoreIndex
.from_documents(
documents,
storage_context=storage_context,
llm=llm
)
)
print("node passer")
query_engine = index.as_query_engine()
# Query the index
def greet(name):
ss = name + ".réponds en citant tes sources et articles"
response = query_engine.query(ss)
print("question :",name)
print("réponse :", response)
return response
iface = gr.Interface(fn=greet, inputs=gr.Textbox(label="Question:", lines=4), outputs="text")
iface.launch()