File size: 3,491 Bytes
ea114c9
e087217
155e6d5
9aff2be
5e15d01
155e6d5
 
e087217
886316e
e087217
a78647f
40562b6
155e6d5
 
4fd641e
 
155e6d5
ea114c9
feb92a2
ea114c9
 
 
 
 
 
bebd600
4fae062
155e6d5
 
617e84e
ffe84c1
617e84e
809f8ec
155e6d5
 
 
 
 
ea114c9
809f8ec
a25995c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
809f8ec
a25995c
ea114c9
 
 
155e6d5
ea114c9
 
 
617e84e
ea114c9
40562b6
 
a78647f
40562b6
 
 
 
 
 
 
 
 
 
 
a78647f
a25995c
ea114c9
 
1c7ede8
ea114c9
 
 
a78647f
ea114c9
 
 
 
 
 
 
 
809f8ec
ea114c9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
809f8ec
41b8d5c
ea114c9
 
 
 
41b8d5c
 
36ee523
 
 
ea114c9
 
27f4c8b
ea114c9
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import gradio as gr
import os
import logging
from llama_index.llms.gemini import Gemini
import sys
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

GOOGLE_API_KEY = "AIzaSyDYhyRoOWBJWOb4bqY5wmFLrBo4HTwQDko"  # add your GOOGLE API key here
os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY
 
from llama_index.core import SimpleDirectoryReader
from g4f import Provider, models
from langchain.llms.base import LLM

from llama_index.llms.langchain import LangChainLLM
from langchain_g4f import G4FLLM

from llama_index.core import (
    ServiceContext,
    SimpleDirectoryReader,
    StorageContext,
    VectorStoreIndex,
    set_global_service_context,
)
#from llama_index.llms import Gemini
from llama_index.embeddings.gemini import GeminiEmbedding
import g4f 
g4f.debug.logging = True 
from llama_index.core import Settings
from langchain_google_genai import ChatGoogleGenerativeAI


llm= LLM = G4FLLM(
        model=models.gpt_35_turbo_16k,
    )

llm = LangChainLLM(llm=llm)



safe = [
    {
        "category": "HARM_CATEGORY_HARASSMENT",
        "threshold": "BLOCK_NONE",
    },
    {
        "category": "HARM_CATEGORY_HATE_SPEECH",
        "threshold": "BLOCK_NONE",
    },
    {
        "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
        "threshold": "BLOCK_NONE",
    },
    {
        "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
        "threshold": "BLOCK_NONE",
    },
]



#llm = Gemini(model="models/gemini-pro", safety_settings=safe)

model_name = "models/embedding-001"


#llm = Gemini()
embed_model = GeminiEmbedding(
    model_name=model_name, api_key=GOOGLE_API_KEY, title="this is a document"
)
Settings.embed_model = embed_model
# Reads pdfs at "./" path


"""
parser = LlamaParse(
    api_key="llx-KMCDGpt3Yn89wwOYJXaFDfJLHTbUQbnTKVccaGVHJLfAN96w",  # can also be set in your env as LLAMA_CLOUD_API_KEY
    result_type="markdown",  # "markdown" and "text" are available
    verbose=True
)

file_extractor = {".pdf": parser}
documents = SimpleDirectoryReader("./data", file_extractor=file_extractor).load_data()

"""



documents = (
    SimpleDirectoryReader(
        input_dir = 'data',
        required_exts = [".pdf"])
        .load_data()
)

# ServiceContext is a bundle of commonly used 
# resources used during the indexing and 
# querying stage 
service_context = (
    ServiceContext
    .from_defaults(
        llm=llm, 
        embed_model=embed_model, 
        chunk_size=8045
    )
)
set_global_service_context(service_context)
print("node passer11")
# Node represents a “chunk” of a source Document
nodes = (
    service_context
    .node_parser
    .get_nodes_from_documents(documents)
)
print("node passer")
# offers core abstractions around storage of Nodes, 
# indices, and vectors
storage_context = StorageContext.from_defaults()
storage_context.docstore.add_documents(nodes)
print("node passer")
# Create the vectorstore index
index = (
    VectorStoreIndex
    .from_documents(
        documents, 
        storage_context=storage_context, 
        llm=llm
        )
)
print("node passer")

query_engine = index.as_query_engine()
# Query the index


def greet(name):
    ss = name + ".réponds en citant tes sources et articles"
    response = query_engine.query(ss)

    print("question :",name)
    print("réponse :", response)
    return response

iface = gr.Interface(fn=greet, inputs=gr.Textbox(label="Question:", lines=4), outputs="text")
iface.launch()