chunkr-api / app.py
KJ24's picture
Update app.py
d5e5243 verified
raw
history blame
2.35 kB
from fastapi import FastAPI
from pydantic import BaseModel
from typing import Optional
from llama_index.core.settings import Settings
from llama_index.core import Document
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.llama_cpp import LlamaCPP
from llama_index.core.node_parser import SemanticSplitterNodeParser
app = FastAPI()
# 📥 Modèle de la requête JSON envoyée à /chunk
class ChunkRequest(BaseModel):
text: str
source_id: Optional[str] = None
titre: Optional[str] = None
source: Optional[str] = None
type: Optional[str] = None
@app.post("/chunk")
async def chunk_text(data: ChunkRequest):
# ✅ Chargement direct d’un modèle hébergé sur Hugging Face (pas de fichier local .gguf)
llm = LlamaCPP(
model_url="https://huggingface.co/leafspark/Mistral-7B-Instruct-v0.2-Q4_K_M-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf",
temperature=0.1,
max_new_tokens=512,
context_window=2048,
generate_kwargs={"top_p": 0.95},
model_kwargs={"n_gpu_layers": 1}, # Laisse 1 si CPU
)
# ✅ Embedding open-source via Hugging Face
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
# ✅ Configuration du service IA
# service_context = ServiceContext.from_defaults(
# llm=llm,
# embed_model=embed_model
# )
# ✅ Nouvelle méthode recommandée : paramétrer Settings globalement
Settings.llm = llm
Settings.embed_model = embed_model
try:
# ✅ Découpage sémantique intelligent
# parser = SemanticSplitterNodeParser.from_defaults(service_context=service_context)
# ✅ Appel du parser sans service_context
parser = SemanticSplitterNodeParser.from_defaults()
nodes = parser.get_nodes_from_documents([Document(text=data.text)])
return {
"chunks": [node.text for node in nodes],
"metadatas": [node.metadata for node in nodes],
"source_id": data.source_id,
"titre": data.titre,
"source": data.source,
"type": data.type
}
except Exception as e:
return {"error": str(e)}
if __name__ == "__main__":
import uvicorn
uvicorn.run("app:app", host="0.0.0.0", port=7860)