File size: 3,020 Bytes
d11e1fe
 
bec7021
dbd9820
ec7f6a1
037a839
 
 
200fee8
 
ec7f6a1
 
 
 
 
 
d11e1fe
 
ec7f6a1
 
 
 
 
 
 
 
 
073d270
 
 
ec7f6a1
 
 
 
 
 
 
 
 
 
d11e1fe
 
2583cf2
 
 
 
 
d11e1fe
 
ec7f6a1
 
 
 
 
 
 
 
 
 
037a839
ec7f6a1
 
 
 
037a839
ec7f6a1
 
d11e1fe
5583ab1
037a839
d11e1fe
bec7021
 
 
 
 
 
 
ec7f6a1
bec7021
ec7f6a1
d11e1fe
 
d00f6f0
 
 
d5e5243
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
from fastapi import FastAPI
from pydantic import BaseModel
from typing import Optional

# ✅ Modules de LlamaIndex
from llama_index.core.settings import Settings
from llama_index.core import Document
from llama_index.llms.llama_cpp import LlamaCPP
from llama_index.core.node_parser import SemanticSplitterNodeParser

# ✅ Pour l'embedding LOCAL via transformers
from transformers import AutoTokenizer, AutoModel
import torch
import torch.nn.functional as F
import os

app = FastAPI()

# ✅ Configuration locale du cache HF pour Hugging Face
CACHE_DIR = "/data"
os.environ["HF_HOME"] = CACHE_DIR
os.environ["TRANSFORMERS_CACHE"] = CACHE_DIR
os.environ["HF_MODULES_CACHE"] = CACHE_DIR
os.environ["HF_HUB_CACHE"] = CACHE_DIR

# ✅ Configuration du modèle d’embedding local (ex: BGE / Nomic / GTE etc.)
MODEL_NAME = "BAAI/bge-small-en-v1.5"

# tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, cache_dir=os.getenv("HF_HOME"))
model = AutoModel.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR)

def get_embedding(text: str):
    with torch.no_grad():
        inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
        outputs = model(**inputs)
        embeddings = outputs.last_hidden_state[:, 0]
        return F.normalize(embeddings, p=2, dim=1).squeeze().tolist()

# ✅ Données entrantes du POST
class ChunkRequest(BaseModel):
    text: str
    source_id: Optional[str] = None
    titre: Optional[str] = None
    source: Optional[str] = None
    type: Optional[str] = None

@app.post("/chunk")
async def chunk_text(data: ChunkRequest):
    try:
        # ✅ Chargement du modèle LLM depuis Hugging Face en ligne (pas de .gguf local)
        llm = LlamaCPP(
            model_url="https://huggingface.co/leafspark/Mistral-7B-Instruct-v0.2-Q4_K_M-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf",
            temperature=0.1,
            max_new_tokens=512,
            context_window=2048,
            generate_kwargs={"top_p": 0.95},
            model_kwargs={"n_gpu_layers": 1},
        )

        # ✅ Intégration manuelle de l'embedding local dans Settings
        class SimpleEmbedding:
            def get_text_embedding(self, text: str):
                return get_embedding(text)

        Settings.llm = llm
        Settings.embed_model = SimpleEmbedding()

        # ✅ Découpage sémantique intelligent
        parser = SemanticSplitterNodeParser.from_defaults()
        nodes = parser.get_nodes_from_documents([Document(text=data.text)])

        return {
            "chunks": [node.text for node in nodes],
            "metadatas": [node.metadata for node in nodes],
            "source_id": data.source_id,
            "titre": data.titre,
            "source": data.source,
            "type": data.type,
        }
    
    except Exception as e:
        return {"error": str(e)}

if __name__ == "__main__":
    import uvicorn
    uvicorn.run("app:app", host="0.0.0.0", port=7860)