legalrci

Runtime error

App Files Files Community

legalrci / app.py

Docfile

Update app.py

41b8d5c verified over 1 year ago

raw

history blame

3.49 kB

	import gradio as gr
	import os
	import logging
	from llama_index.llms.gemini import Gemini
	import sys
	logging.basicConfig(stream=sys.stdout, level=logging.INFO)
	logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

	GOOGLE_API_KEY = "AIzaSyDYhyRoOWBJWOb4bqY5wmFLrBo4HTwQDko" # add your GOOGLE API key here
	os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY

	from llama_index.core import SimpleDirectoryReader
	from g4f import Provider, models
	from langchain.llms.base import LLM

	from llama_index.llms.langchain import LangChainLLM
	from langchain_g4f import G4FLLM

	from llama_index.core import (
	ServiceContext,
	SimpleDirectoryReader,
	StorageContext,
	VectorStoreIndex,
	set_global_service_context,
	)
	#from llama_index.llms import Gemini
	from llama_index.embeddings.gemini import GeminiEmbedding
	import g4f
	g4f.debug.logging = True
	from llama_index.core import Settings
	from langchain_google_genai import ChatGoogleGenerativeAI


	llm= LLM = G4FLLM(
	model=models.gpt_35_turbo_16k,
	)

	llm = LangChainLLM(llm=llm)



	safe = [
	{
	"category": "HARM_CATEGORY_HARASSMENT",
	"threshold": "BLOCK_NONE",
	},
	{
	"category": "HARM_CATEGORY_HATE_SPEECH",
	"threshold": "BLOCK_NONE",
	},
	{
	"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
	"threshold": "BLOCK_NONE",
	},
	{
	"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
	"threshold": "BLOCK_NONE",
	},
	]



	#llm = Gemini(model="models/gemini-pro", safety_settings=safe)

	model_name = "models/embedding-001"


	#llm = Gemini()
	embed_model = GeminiEmbedding(
	model_name=model_name, api_key=GOOGLE_API_KEY, title="this is a document"
	)
	Settings.embed_model = embed_model
	# Reads pdfs at "./" path


	"""
	parser = LlamaParse(
	api_key="llx-KMCDGpt3Yn89wwOYJXaFDfJLHTbUQbnTKVccaGVHJLfAN96w", # can also be set in your env as LLAMA_CLOUD_API_KEY
	result_type="markdown", # "markdown" and "text" are available
	verbose=True
	)

	file_extractor = {".pdf": parser}
	documents = SimpleDirectoryReader("./data", file_extractor=file_extractor).load_data()

	"""



	documents = (
	SimpleDirectoryReader(
	input_dir = 'data',
	required_exts = [".pdf"])
	.load_data()
	)

	# ServiceContext is a bundle of commonly used
	# resources used during the indexing and
	# querying stage
	service_context = (
	ServiceContext
	.from_defaults(
	llm=llm,
	embed_model=embed_model,
	chunk_size=8045
	)
	)
	set_global_service_context(service_context)
	print("node passer11")
	# Node represents a “chunk” of a source Document
	nodes = (
	service_context
	.node_parser
	.get_nodes_from_documents(documents)
	)
	print("node passer")
	# offers core abstractions around storage of Nodes,
	# indices, and vectors
	storage_context = StorageContext.from_defaults()
	storage_context.docstore.add_documents(nodes)
	print("node passer")
	# Create the vectorstore index
	index = (
	VectorStoreIndex
	.from_documents(
	documents,
	storage_context=storage_context,
	llm=llm
	)
	)
	print("node passer")

	query_engine = index.as_query_engine()
	# Query the index


	def greet(name):
	ss = name + ".réponds en citant tes sources et articles"
	response = query_engine.query(ss)

	print("question :",name)
	print("réponse :", response)
	return response

	iface = gr.Interface(fn=greet, inputs=gr.Textbox(label="Question:", lines=4), outputs="text")
	iface.launch()