File size: 2,346 Bytes
d11e1fe
 
bec7021
dbd9820
037a839
 
bec7021
037a839
200fee8
 
d11e1fe
 
5583ab1
d11e1fe
 
2583cf2
 
 
 
 
d11e1fe
 
5583ab1
200fee8
5583ab1
200fee8
 
 
 
5583ab1
d11e1fe
 
5583ab1
bec7021
 
5583ab1
037a839
 
 
 
 
 
 
 
 
 
d11e1fe
037a839
d11e1fe
5583ab1
037a839
 
 
 
 
d11e1fe
bec7021
 
 
 
 
 
 
 
 
d11e1fe
 
d00f6f0
 
 
 
d5e5243
 
d00f6f0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
from fastapi import FastAPI
from pydantic import BaseModel
from typing import Optional

from llama_index.core.settings import Settings
from llama_index.core import Document
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.llama_cpp import LlamaCPP
from llama_index.core.node_parser import SemanticSplitterNodeParser

app = FastAPI()

# 📥 Modèle de la requête JSON envoyée à /chunk
class ChunkRequest(BaseModel):
    text: str
    source_id: Optional[str] = None
    titre: Optional[str] = None
    source: Optional[str] = None
    type: Optional[str] = None

@app.post("/chunk")
async def chunk_text(data: ChunkRequest):
    # ✅ Chargement direct d’un modèle hébergé sur Hugging Face (pas de fichier local .gguf)
    llm = LlamaCPP(
        model_url="https://huggingface.co/leafspark/Mistral-7B-Instruct-v0.2-Q4_K_M-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf",
        temperature=0.1,
        max_new_tokens=512,
        context_window=2048,
        generate_kwargs={"top_p": 0.95},
        model_kwargs={"n_gpu_layers": 1},  # Laisse 1 si CPU
    )

    # ✅ Embedding open-source via Hugging Face
    embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

    # ✅ Configuration du service IA
    # service_context = ServiceContext.from_defaults(
        # llm=llm,
        # embed_model=embed_model
    # )


    # ✅ Nouvelle méthode recommandée : paramétrer Settings globalement
    Settings.llm = llm
    Settings.embed_model = embed_model


    
    try:
        # ✅ Découpage sémantique intelligent
        # parser = SemanticSplitterNodeParser.from_defaults(service_context=service_context)
        
        # ✅ Appel du parser sans service_context
        
        parser = SemanticSplitterNodeParser.from_defaults()
        nodes = parser.get_nodes_from_documents([Document(text=data.text)])

        return {
            "chunks": [node.text for node in nodes],
            "metadatas": [node.metadata for node in nodes],
            "source_id": data.source_id,
            "titre": data.titre,
            "source": data.source,
            "type": data.type
        }
    except Exception as e:
        return {"error": str(e)}


if __name__ == "__main__":
    import uvicorn
    uvicorn.run("app:app", host="0.0.0.0", port=7860)