Spaces:

KJ24
/

chunkr-api

Runtime error

KJ24 commited on Jun 11

Commit

200fee8

verified ·

1 Parent(s): cbb6dd7

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,9 +3,10 @@ from pydantic import BaseModel
 from typing import Optional
 from llama_index.core import Document, ServiceContext
-from llama_index.llms.openai import OpenAI
-from llama_index.core.node_parser import SemanticSplitterNodeParser
 from llama_index.embeddings.huggingface import HuggingFaceEmbedding
 import os
 app = FastAPI()
@@ -19,19 +20,20 @@ class ChunkRequest(BaseModel):
     type: Optional[str] = None
 # 🔹 Endpoint principal
 @app.post("/chunk")
 async def chunk_text(data: ChunkRequest):
-    # Modèle LLM (OpenRouter - Llama 4 Maverick)
-    llm = OpenAI(
-        model="meta-llama/llama-4-maverick:free",
-        api_base="https://openrouter.ai/api/v1",
-        api_key=os.getenv("OPENROUTER_API_KEY")
     )
-    # 🔹 Embedding open source gratuit
     embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
-    # 🔹 Service Context avec LLM + embeddings
     service_context = ServiceContext.from_defaults(
         llm=llm,
         embed_model=embed_model
@@ -51,3 +53,8 @@ async def chunk_text(data: ChunkRequest):
         }
     except Exception as e:
         return {"error": str(e)}

 from typing import Optional
 from llama_index.core import Document, ServiceContext
+from llama_index.llms.llama_cpp import LlamaCPP
 from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+from llama_index.core.node_parser import SemanticSplitterNodeParser
 import os
 app = FastAPI()
     type: Optional[str] = None
 # 🔹 Endpoint principal
 @app.post("/chunk")
 async def chunk_text(data: ChunkRequest):
+    llm = LlamaCPP(
+        model_path="/models/mistral-7b-instruct.gguf",
+        temperature=0.1,
+        max_new_tokens=512,
+        context_window=2048,
+        generate_kwargs={"top_p": 0.95},
+        model_kwargs={"n_gpu_layers": 1},
     )
     embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
     service_context = ServiceContext.from_defaults(
         llm=llm,
         embed_model=embed_model
         }
     except Exception as e:
         return {"error": str(e)}