Spaces:
Runtime error
Runtime error
File size: 3,020 Bytes
d11e1fe bec7021 dbd9820 ec7f6a1 037a839 200fee8 ec7f6a1 d11e1fe ec7f6a1 073d270 ec7f6a1 d11e1fe 2583cf2 d11e1fe ec7f6a1 037a839 ec7f6a1 037a839 ec7f6a1 d11e1fe 5583ab1 037a839 d11e1fe bec7021 ec7f6a1 bec7021 ec7f6a1 d11e1fe d00f6f0 d5e5243 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
from fastapi import FastAPI
from pydantic import BaseModel
from typing import Optional
# ✅ Modules de LlamaIndex
from llama_index.core.settings import Settings
from llama_index.core import Document
from llama_index.llms.llama_cpp import LlamaCPP
from llama_index.core.node_parser import SemanticSplitterNodeParser
# ✅ Pour l'embedding LOCAL via transformers
from transformers import AutoTokenizer, AutoModel
import torch
import torch.nn.functional as F
import os
app = FastAPI()
# ✅ Configuration locale du cache HF pour Hugging Face
CACHE_DIR = "/data"
os.environ["HF_HOME"] = CACHE_DIR
os.environ["TRANSFORMERS_CACHE"] = CACHE_DIR
os.environ["HF_MODULES_CACHE"] = CACHE_DIR
os.environ["HF_HUB_CACHE"] = CACHE_DIR
# ✅ Configuration du modèle d’embedding local (ex: BGE / Nomic / GTE etc.)
MODEL_NAME = "BAAI/bge-small-en-v1.5"
# tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, cache_dir=os.getenv("HF_HOME"))
model = AutoModel.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR)
def get_embedding(text: str):
with torch.no_grad():
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
outputs = model(**inputs)
embeddings = outputs.last_hidden_state[:, 0]
return F.normalize(embeddings, p=2, dim=1).squeeze().tolist()
# ✅ Données entrantes du POST
class ChunkRequest(BaseModel):
text: str
source_id: Optional[str] = None
titre: Optional[str] = None
source: Optional[str] = None
type: Optional[str] = None
@app.post("/chunk")
async def chunk_text(data: ChunkRequest):
try:
# ✅ Chargement du modèle LLM depuis Hugging Face en ligne (pas de .gguf local)
llm = LlamaCPP(
model_url="https://huggingface.co/leafspark/Mistral-7B-Instruct-v0.2-Q4_K_M-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf",
temperature=0.1,
max_new_tokens=512,
context_window=2048,
generate_kwargs={"top_p": 0.95},
model_kwargs={"n_gpu_layers": 1},
)
# ✅ Intégration manuelle de l'embedding local dans Settings
class SimpleEmbedding:
def get_text_embedding(self, text: str):
return get_embedding(text)
Settings.llm = llm
Settings.embed_model = SimpleEmbedding()
# ✅ Découpage sémantique intelligent
parser = SemanticSplitterNodeParser.from_defaults()
nodes = parser.get_nodes_from_documents([Document(text=data.text)])
return {
"chunks": [node.text for node in nodes],
"metadatas": [node.metadata for node in nodes],
"source_id": data.source_id,
"titre": data.titre,
"source": data.source,
"type": data.type,
}
except Exception as e:
return {"error": str(e)}
if __name__ == "__main__":
import uvicorn
uvicorn.run("app:app", host="0.0.0.0", port=7860)
|