Spaces:
Runtime error
Runtime error
File size: 2,931 Bytes
d11e1fe bec7021 dbd9820 ec7f6a1 037a839 200fee8 ec7f6a1 d11e1fe ec7f6a1 d11e1fe 2583cf2 d11e1fe ec7f6a1 037a839 ec7f6a1 037a839 ec7f6a1 d11e1fe 5583ab1 037a839 d11e1fe bec7021 ec7f6a1 bec7021 ec7f6a1 d11e1fe d00f6f0 d5e5243 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
from fastapi import FastAPI
from pydantic import BaseModel
from typing import Optional
# ✅ Modules de LlamaIndex
from llama_index.core.settings import Settings
from llama_index.core import Document
from llama_index.llms.llama_cpp import LlamaCPP
from llama_index.core.node_parser import SemanticSplitterNodeParser
# ✅ Pour l'embedding LOCAL via transformers
from transformers import AutoTokenizer, AutoModel
import torch
import torch.nn.functional as F
import os
app = FastAPI()
# ✅ Configuration locale du cache HF pour Hugging Face
CACHE_DIR = "/data"
os.environ["HF_HOME"] = CACHE_DIR
os.environ["TRANSFORMERS_CACHE"] = CACHE_DIR
os.environ["HF_MODULES_CACHE"] = CACHE_DIR
os.environ["HF_HUB_CACHE"] = CACHE_DIR
# ✅ Configuration du modèle d’embedding local (ex: BGE / Nomic / GTE etc.)
MODEL_NAME = "BAAI/bge-small-en-v1.5"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR)
model = AutoModel.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR)
def get_embedding(text: str):
with torch.no_grad():
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
outputs = model(**inputs)
embeddings = outputs.last_hidden_state[:, 0]
return F.normalize(embeddings, p=2, dim=1).squeeze().tolist()
# ✅ Données entrantes du POST
class ChunkRequest(BaseModel):
text: str
source_id: Optional[str] = None
titre: Optional[str] = None
source: Optional[str] = None
type: Optional[str] = None
@app.post("/chunk")
async def chunk_text(data: ChunkRequest):
try:
# ✅ Chargement du modèle LLM depuis Hugging Face en ligne (pas de .gguf local)
llm = LlamaCPP(
model_url="https://huggingface.co/leafspark/Mistral-7B-Instruct-v0.2-Q4_K_M-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf",
temperature=0.1,
max_new_tokens=512,
context_window=2048,
generate_kwargs={"top_p": 0.95},
model_kwargs={"n_gpu_layers": 1},
)
# ✅ Intégration manuelle de l'embedding local dans Settings
class SimpleEmbedding:
def get_text_embedding(self, text: str):
return get_embedding(text)
Settings.llm = llm
Settings.embed_model = SimpleEmbedding()
# ✅ Découpage sémantique intelligent
parser = SemanticSplitterNodeParser.from_defaults()
nodes = parser.get_nodes_from_documents([Document(text=data.text)])
return {
"chunks": [node.text for node in nodes],
"metadatas": [node.metadata for node in nodes],
"source_id": data.source_id,
"titre": data.titre,
"source": data.source,
"type": data.type,
}
except Exception as e:
return {"error": str(e)}
if __name__ == "__main__":
import uvicorn
uvicorn.run("app:app", host="0.0.0.0", port=7860)
|