Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,10 +7,68 @@ from langchain_community.vectorstores import FAISS
|
|
| 7 |
from langchain.chains import RetrievalQA
|
| 8 |
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
|
| 9 |
from langchain.prompts import PromptTemplate
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
# --- CONFIGURAÇÕES DE MODELOS ---
|
| 12 |
-
|
| 13 |
-
LLM_MODEL = 'google/gemma-3-1b-it'
|
| 14 |
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
|
| 15 |
|
| 16 |
# --- CONFIGURAÇÃO DO TOKEN HF ---
|
|
|
|
| 7 |
from langchain.chains import RetrievalQA
|
| 8 |
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
|
| 9 |
from langchain.prompts import PromptTemplate
|
| 10 |
+
from langchain_community.document_loaders import WebBaseLoader
|
| 11 |
+
from langchain_text_splitters import CharacterTextSplitter
|
| 12 |
+
from langchain_community.vectorstores import FAISS
|
| 13 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
| 14 |
+
from dotenv import load_dotenv
|
| 15 |
+
import os
|
| 16 |
+
import logging
|
| 17 |
+
logging.getLogger("langchain.text_splitter").setLevel(logging.ERROR)
|
| 18 |
+
import warnings
|
| 19 |
+
warnings.filterwarnings("ignore")
|
| 20 |
+
from langchain_community.document_loaders import RecursiveUrlLoader
|
| 21 |
+
import yaml
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
# ------------ criando vs -----------------
|
| 26 |
+
|
| 27 |
+
## knowledge base offline
|
| 28 |
+
url_list = [
|
| 29 |
+
"https://www.infinitepay.io",
|
| 30 |
+
"https://www.infinitepay.io/maquininha",
|
| 31 |
+
"https://www.infinitepay.io/maquininha-celular",
|
| 32 |
+
"https://www.infinitepay.io/tap-to-pay",
|
| 33 |
+
"https://www.infinitepay.io/pdv",
|
| 34 |
+
"https://www.infinitepay.io/receba-na-hora",
|
| 35 |
+
"https://www.infinitepay.io/gestao-de-cobranca",
|
| 36 |
+
"https://www.infinitepay.io/gestao-de-cobranca-2",
|
| 37 |
+
"https://www.infinitepay.io/link-de-pagamento",
|
| 38 |
+
"https://www.infinitepay.io/loja-online",
|
| 39 |
+
"https://www.infinitepay.io/boleto",
|
| 40 |
+
"https://www.infinitepay.io/conta-digital",
|
| 41 |
+
"https://www.infinitepay.io/conta-pj",
|
| 42 |
+
"https://www.infinitepay.io/pix",
|
| 43 |
+
"https://www.infinitepay.io/pix-parcelado",
|
| 44 |
+
"https://www.infinitepay.io/emprestimo",
|
| 45 |
+
"https://www.infinitepay.io/cartao",
|
| 46 |
+
"https://www.infinitepay.io/rendimento",
|
| 47 |
+
'https://www.infinitepay.io/taxas',
|
| 48 |
+
'https://www.cloudwalk.io/',
|
| 49 |
+
'https://www.cloudwalk.io/#our-mission',
|
| 50 |
+
'https://www.cloudwalk.io/#our-pillars',
|
| 51 |
+
'https://www.cloudwalk.io/#our-products',
|
| 52 |
+
]
|
| 53 |
+
|
| 54 |
+
# Carregue o conteúdo da página web como documentos LangChain
|
| 55 |
+
loader = WebBaseLoader(web_paths=url_list_unique)
|
| 56 |
+
docs = loader.load()
|
| 57 |
+
print(f"Total de páginas carregadas: {len(docs)}")
|
| 58 |
+
|
| 59 |
+
text_splitter = CharacterTextSplitter(chunk_size=1500, chunk_overlap=100)
|
| 60 |
+
split_docs = text_splitter.split_documents(docs)
|
| 61 |
+
embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL,
|
| 62 |
+
cache_folder=cache_folder)
|
| 63 |
+
vector_store = FAISS.from_documents(split_docs, embeddings)
|
| 64 |
+
# vs_base = "../vector_store/vs_base"
|
| 65 |
+
os.makedirs(VS_BASE, exist_ok=True)
|
| 66 |
+
vector_store.save_local(VS_BASE)
|
| 67 |
+
print(f"vs_base salva em {VS_BASE}")
|
| 68 |
|
| 69 |
# --- CONFIGURAÇÕES DE MODELOS ---
|
| 70 |
+
LLM_MODEL = 'google/gemma-3-4b-it'
|
| 71 |
+
# LLM_MODEL = 'google/gemma-3-1b-it'
|
| 72 |
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
|
| 73 |
|
| 74 |
# --- CONFIGURAÇÃO DO TOKEN HF ---
|