HamidOmarov commited on
Commit
64fd9b7
·
1 Parent(s): 39d9e01

Initial commit: FastAPI RAG API

Browse files
.gitignore CHANGED
@@ -1,6 +1,12 @@
1
- venv/
2
  .venv/
3
  __pycache__/
4
  *.pyc
5
  *.pyo
6
  *.pyd
 
 
 
 
 
 
 
1
+ venv/
2
  .venv/
3
  __pycache__/
4
  *.pyc
5
  *.pyo
6
  *.pyd
7
+ *.log
8
+ .env
9
+ data/uploads/
10
+ data/index/
11
+ .git
12
+ .gitignore
app/__init__.py ADDED
File without changes
app/api.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/api.py
2
+ from fastapi import FastAPI, UploadFile, File, Form
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+ from fastapi.responses import JSONResponse
5
+ from pathlib import Path
6
+ import shutil
7
+ import traceback
8
+
9
+ from .rag_system import SimpleRAG, UPLOAD_DIR
10
+ from .schemas import AskRequest, AskResponse, UploadResponse, HistoryResponse, HistoryItem
11
+ from .store import add_history, get_history
12
+ from .utils import ensure_session, http400
13
+
14
+ app = FastAPI(title="RAG API", version="1.0.0")
15
+
16
+ # CORS (Streamlit/UI üçün)
17
+ app.add_middleware(
18
+ CORSMiddleware,
19
+ allow_origins=["*"], # prod-da domeninlə dəyiş
20
+ allow_credentials=True,
21
+ allow_methods=["*"],
22
+ allow_headers=["*"],
23
+ )
24
+
25
+ rag = SimpleRAG()
26
+
27
+ @app.get("/health")
28
+ def health():
29
+ return {"status": "ok"}
30
+
31
+ @app.post("/upload_pdf", response_model=UploadResponse)
32
+ async def upload_pdf(file: UploadFile = File(...)):
33
+ try:
34
+ if not file.filename.lower().endswith(".pdf"):
35
+ http400("Yalnız PDF faylları qəbul olunur.")
36
+ dest = UPLOAD_DIR / file.filename
37
+ with dest.open("wb") as f:
38
+ shutil.copyfileobj(file.file, f)
39
+ chunks_added = rag.add_pdf(dest)
40
+ return UploadResponse(filename=file.filename, chunks_added=chunks_added)
41
+ except Exception as e:
42
+ traceback.print_exc()
43
+ return JSONResponse(status_code=500, content={"detail": f"Server xətası: {str(e)}"})
44
+
45
+ @app.post("/ask_question", response_model=AskResponse)
46
+ async def ask_question(payload: AskRequest):
47
+ try:
48
+ session_id = ensure_session(payload.session_id)
49
+ add_history(session_id, "user", payload.question)
50
+ results = rag.search(payload.question, k=payload.top_k)
51
+ contexts = [c for c, _ in results]
52
+ answer = rag.synthesize_answer(payload.question, contexts) if hasattr(rag, "synthesize_answer") else None
53
+ if answer is None:
54
+ # rag_system.synthesize_answer funksiyasını birbaşa import etməmişiksə:
55
+ from .rag_system import synthesize_answer
56
+ answer = synthesize_answer(payload.question, contexts)
57
+ add_history(session_id, "assistant", answer)
58
+ return AskResponse(answer=answer, contexts=contexts, session_id=session_id)
59
+ except Exception as e:
60
+ traceback.print_exc()
61
+ return JSONResponse(status_code=500, content={"detail": f"Server xətası: {str(e)}"})
62
+
63
+ @app.get("/get_history", response_model=HistoryResponse)
64
+ async def get_history_endpoint(session_id: str):
65
+ try:
66
+ hist_raw = get_history(session_id)
67
+ history = [HistoryItem(**h) for h in hist_raw]
68
+ return HistoryResponse(session_id=session_id, history=history)
69
+ except Exception as e:
70
+ traceback.print_exc()
71
+ return JSONResponse(status_code=500, content={"detail": f"Server xətası: {str(e)}"})
72
+ from fastapi.responses import RedirectResponse
73
+
74
+ @app.get("/")
75
+ def root():
76
+ return RedirectResponse(url="/docs")
app/rag/__init__.py ADDED
File without changes
app/rag_system.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/rag_system.py
2
+ from pathlib import Path
3
+ from typing import List, Tuple
4
+ import os
5
+ import faiss
6
+ import numpy as np
7
+ from sentence_transformers import SentenceTransformer
8
+ from pypdf import PdfReader
9
+
10
+ DATA_DIR = Path(__file__).resolve().parent.parent / "data"
11
+ UPLOAD_DIR = DATA_DIR / "uploads"
12
+ INDEX_DIR = DATA_DIR / "index"
13
+ INDEX_DIR.mkdir(parents=True, exist_ok=True)
14
+ UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
15
+
16
+ MODEL_NAME = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
17
+
18
+ class SimpleRAG:
19
+ def __init__(self, index_path: Path = INDEX_DIR / "faiss.index", meta_path: Path = INDEX_DIR / "meta.npy"):
20
+ self.model = SentenceTransformer(MODEL_NAME)
21
+ self.index_path = index_path
22
+ self.meta_path = meta_path
23
+ self.index = None
24
+ self.chunks: List[str] = []
25
+ self._load()
26
+
27
+ def _load(self):
28
+ # meta (chunks) yüklə
29
+ if self.meta_path.exists():
30
+ self.chunks = np.load(self.meta_path, allow_pickle=True).tolist()
31
+ # faiss index yüklə
32
+ if self.index_path.exists():
33
+ # dim modelin çıxış ölçüsü
34
+ dim = self.model.get_sentence_embedding_dimension()
35
+ self.index = faiss.read_index(str(self.index_path))
36
+ # təhlükəsizlik: ölçüsü uyğun olmalıdır
37
+ if self.index.d != dim:
38
+ # uyğunsuzluqda sıfırdan başla
39
+ self.index = faiss.IndexFlatIP(dim)
40
+ else:
41
+ dim = self.model.get_sentence_embedding_dimension()
42
+ self.index = faiss.IndexFlatIP(dim)
43
+
44
+ def _persist(self):
45
+ faiss.write_index(self.index, str(self.index_path))
46
+ np.save(self.meta_path, np.array(self.chunks, dtype=object))
47
+
48
+ @staticmethod
49
+ def _pdf_to_texts(pdf_path: Path) -> List[str]:
50
+ reader = PdfReader(str(pdf_path))
51
+ full_text = []
52
+ for page in reader.pages:
53
+ t = page.extract_text() or ""
54
+ if t.strip():
55
+ full_text.append(t)
56
+ # sadə parçalama: ~500 hərf
57
+ chunks = []
58
+ for txt in full_text:
59
+ step = 800
60
+ for i in range(0, len(txt), step):
61
+ chunks.append(txt[i:i+step])
62
+ return chunks
63
+
64
+ def add_pdf(self, pdf_path: Path) -> int:
65
+ texts = self._pdf_to_texts(pdf_path)
66
+ if not texts:
67
+ return 0
68
+ emb = self.model.encode(texts, convert_to_numpy=True, normalize_embeddings=True)
69
+ self.index.add(emb)
70
+ self.chunks.extend(texts)
71
+ self._persist()
72
+ return len(texts)
73
+
74
+ def search(self, query: str, k: int = 5) -> List[Tuple[str, float]]:
75
+ q = self.model.encode([query], convert_to_numpy=True, normalize_embeddings=True)
76
+ D, I = self.index.search(q, k)
77
+ results = []
78
+ if I.size > 0 and len(self.chunks) > 0:
79
+ for idx, score in zip(I[0], D[0]):
80
+ if 0 <= idx < len(self.chunks):
81
+ results.append((self.chunks[idx], float(score)))
82
+ return results
83
+
84
+ # sadə cavab formalaşdırıcı (LLM yoxdursa, kontekst + heuristika)
85
+ def synthesize_answer(question: str, contexts: List[str]) -> str:
86
+ if not contexts:
87
+ return "Kontekst tapılmadı. Sualı daha dəqiq verin və ya PDF yükləyin."
88
+ joined = "\n---\n".join(contexts[:3])
89
+ return (
90
+ f"Sual: {question}\n\n"
91
+ f"Cavab (kontekstdən çıxarış):\n{joined}\n\n"
92
+ f"(Qeyd: Demo rejimi — LLM inteqrasiyası üçün / later: OpenAI/Groq və s.)"
93
+ )
app/schemas.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/schemas.py
2
+ from pydantic import BaseModel, Field
3
+ from typing import Optional, List
4
+
5
+ class AskRequest(BaseModel):
6
+ question: str = Field(..., min_length=2)
7
+ session_id: Optional[str] = None
8
+ top_k: int = 5
9
+
10
+ class AskResponse(BaseModel):
11
+ answer: str
12
+ contexts: List[str]
13
+ session_id: str
14
+
15
+ class UploadResponse(BaseModel):
16
+ filename: str
17
+ chunks_added: int
18
+
19
+ class HistoryItem(BaseModel):
20
+ role: str
21
+ content: str
22
+
23
+ class HistoryResponse(BaseModel):
24
+ session_id: str
25
+ history: List[HistoryItem]
app/store.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/store.py
2
+ from collections import defaultdict
3
+ from typing import List, Dict
4
+
5
+ # in-memory chat tarixi (prod üçün Redis/Postgres məsləhətdir)
6
+ _history: Dict[str, List[dict]] = defaultdict(list)
7
+
8
+ def add_history(session_id: str, role: str, content: str):
9
+ _history[session_id].append({"role": role, "content": content})
10
+
11
+ def get_history(session_id: str) -> List[dict]:
12
+ return _history.get(session_id, [])
app/utils.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # app/utils.py
2
+ import uuid
3
+ from fastapi import HTTPException
4
+
5
+ def ensure_session(session_id: str | None) -> str:
6
+ return session_id or str(uuid.uuid4())
7
+
8
+ def http400(msg: str):
9
+ raise HTTPException(status_code=400, detail=msg)
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn[standard]
3
+ python-multipart
4
+ pydantic==2.*
5
+ pydantic-settings
6
+ sentence-transformers
7
+ faiss-cpu
8
+ pypdf
9
+ python-dotenv
saa ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ diff.astextplain.textconv=astextplain
2
+ filter.lfs.clean=git-lfs clean -- %f
3
+ filter.lfs.smudge=git-lfs smudge -- %f
4
+ filter.lfs.process=git-lfs filter-process
5
+ filter.lfs.required=true
6
+ http.sslbackend=schannel
7
+ core.autocrlf=true
8
+ core.fscache=true
9
+ core.symlinks=false
10
+ pull.rebase=false
11
+ credential.helper=manager
12
+ credential.https://dev.azure.com.usehttppath=true
13
+ init.defaultbranch=master
14
+ user.name=HamidOmarov
15
16
+ core.repositoryformatversion=0
17
+ core.filemode=false
18
+ core.bare=false
19
+ core.logallrefupdates=true
20
+ core.symlinks=false
21
+ core.ignorecase=true
tests/test_requests.http ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # PDF yüklə
2
+ curl -X POST "http://127.0.0.1:8000/upload_pdf" \
3
+ -H "accept: application/json" -H "Content-Type: multipart/form-data" \
4
+ -F "file=@/path/to/file.pdf"
5
+
6
+ # Sual ver
7
+ curl -X POST "http://127.0.0.1:8000/ask_question" \
8
+ -H "Content-Type: application/json" \
9
+ -d '{"question":"Bu sənəd nədən bəhs edir?","top_k":5}'