Spaces:
Runtime error
Runtime error
File size: 2,346 Bytes
d11e1fe bec7021 dbd9820 037a839 bec7021 037a839 200fee8 d11e1fe 5583ab1 d11e1fe 2583cf2 d11e1fe 5583ab1 200fee8 5583ab1 200fee8 5583ab1 d11e1fe 5583ab1 bec7021 5583ab1 037a839 d11e1fe 037a839 d11e1fe 5583ab1 037a839 d11e1fe bec7021 d11e1fe d00f6f0 d5e5243 d00f6f0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
from fastapi import FastAPI
from pydantic import BaseModel
from typing import Optional
from llama_index.core.settings import Settings
from llama_index.core import Document
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.llama_cpp import LlamaCPP
from llama_index.core.node_parser import SemanticSplitterNodeParser
app = FastAPI()
# 📥 Modèle de la requête JSON envoyée à /chunk
class ChunkRequest(BaseModel):
text: str
source_id: Optional[str] = None
titre: Optional[str] = None
source: Optional[str] = None
type: Optional[str] = None
@app.post("/chunk")
async def chunk_text(data: ChunkRequest):
# ✅ Chargement direct d’un modèle hébergé sur Hugging Face (pas de fichier local .gguf)
llm = LlamaCPP(
model_url="https://huggingface.co/leafspark/Mistral-7B-Instruct-v0.2-Q4_K_M-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf",
temperature=0.1,
max_new_tokens=512,
context_window=2048,
generate_kwargs={"top_p": 0.95},
model_kwargs={"n_gpu_layers": 1}, # Laisse 1 si CPU
)
# ✅ Embedding open-source via Hugging Face
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
# ✅ Configuration du service IA
# service_context = ServiceContext.from_defaults(
# llm=llm,
# embed_model=embed_model
# )
# ✅ Nouvelle méthode recommandée : paramétrer Settings globalement
Settings.llm = llm
Settings.embed_model = embed_model
try:
# ✅ Découpage sémantique intelligent
# parser = SemanticSplitterNodeParser.from_defaults(service_context=service_context)
# ✅ Appel du parser sans service_context
parser = SemanticSplitterNodeParser.from_defaults()
nodes = parser.get_nodes_from_documents([Document(text=data.text)])
return {
"chunks": [node.text for node in nodes],
"metadatas": [node.metadata for node in nodes],
"source_id": data.source_id,
"titre": data.titre,
"source": data.source,
"type": data.type
}
except Exception as e:
return {"error": str(e)}
if __name__ == "__main__":
import uvicorn
uvicorn.run("app:app", host="0.0.0.0", port=7860)
|