File size: 9,796 Bytes
64fd9b7
7715973
a037cf8
7715973
 
 
 
 
 
 
 
40a908e
64fd9b7
a037cf8
7715973
a7ef914
40a908e
78bd110
7715973
 
 
40a908e
78bd110
 
 
 
 
 
 
 
 
 
 
7715973
 
 
a037cf8
 
 
 
 
7715973
 
a037cf8
 
 
 
 
7715973
 
 
 
a037cf8
 
7715973
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a037cf8
7715973
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40a908e
 
 
 
 
 
 
 
a7ef914
 
 
 
78bd110
a7ef914
78bd110
a7ef914
 
 
64fd9b7
 
7715973
 
 
a037cf8
 
 
 
 
 
 
7715973
a037cf8
40a908e
 
7715973
 
a037cf8
64fd9b7
 
a037cf8
7715973
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64fd9b7
 
a037cf8
7715973
 
 
 
40a908e
 
7715973
 
40a908e
7715973
 
 
 
40a908e
 
 
7715973
40a908e
 
 
 
 
 
 
 
 
 
 
 
 
 
7715973
 
 
 
 
 
40a908e
 
26ad320
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
# app/api.py
from __future__ import annotations

from typing import List, Optional
from collections import deque
from datetime import datetime
from time import perf_counter
import re
import os

import faiss
from fastapi import FastAPI, UploadFile, File, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse, RedirectResponse
from pydantic import BaseModel, Field

from .rag_system import SimpleRAG, UPLOAD_DIR, INDEX_DIR

# ------------------------------------------------------------------------------
# App setup
# ------------------------------------------------------------------------------
app = FastAPI(title="RAG API", version="1.3.0")

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

rag = SimpleRAG()

# ------------------------------------------------------------------------------
# Models
# ------------------------------------------------------------------------------
class UploadResponse(BaseModel):
    filename: str
    chunks_added: int

class AskRequest(BaseModel):
    question: str = Field(..., min_length=1)
    top_k: int = Field(5, ge=1, le=20)

class AskResponse(BaseModel):
    answer: str
    contexts: List[str]

class HistoryItem(BaseModel):
    question: str
    timestamp: str

class HistoryResponse(BaseModel):
    total_chunks: int
    history: List[HistoryItem] = []

# ------------------------------------------------------------------------------
# Lightweight stats store (in-memory)
# ------------------------------------------------------------------------------
class StatsStore:
    def __init__(self):
        self.documents_indexed = 0
        self.questions_answered = 0
        self.latencies_ms = deque(maxlen=500)
        # Mon..Sun simple counter (index 0 = today for simplicity)
        self.last7_questions = deque([0] * 7, maxlen=7)
        self.history = deque(maxlen=50)  # recent questions

    def add_docs(self, n: int):
        if n > 0:
            self.documents_indexed += n

    def add_question(self, latency_ms: Optional[int] = None, q: Optional[str] = None):
        self.questions_answered += 1
        if latency_ms is not None:
            self.latencies_ms.append(int(latency_ms))
        if len(self.last7_questions) < 7:
            self.last7_questions.appendleft(1)
        else:
            # attribute to "today" bucket
            self.last7_questions[0] += 1
        if q:
            self.history.appendleft(
                {"question": q, "timestamp": datetime.utcnow().isoformat()}
            )

    @property
    def avg_ms(self) -> int:
        return int(sum(self.latencies_ms) / len(self.latencies_ms)) if self.latencies_ms else 0

stats = StatsStore()

# ------------------------------------------------------------------------------
# Helpers
# ------------------------------------------------------------------------------
_GENERIC_PATTERNS = [
    r"\bbased on document context\b",
    r"\bappears to be\b",
    r"\bgeneral (?:summary|overview)\b",
]

_STOPWORDS = {
    "the","a","an","of","for","and","or","in","on","to","from","with","by","is","are",
    "was","were","be","been","being","at","as","that","this","these","those","it",
    "its","into","than","then","so","such","about","over","per","via","vs","within"
}

def is_generic_answer(text: str) -> bool:
    if not text:
        return True
    low = text.strip().lower()
    if len(low) < 15:
        return True
    for pat in _GENERIC_PATTERNS:
        if re.search(pat, low):
            return True
    return False

def tokenize(s: str) -> List[str]:
    return [w for w in re.findall(r"[a-zA-Z0-9]+", s.lower()) if w and w not in _STOPWORDS and len(w) > 2]

def extractive_answer(question: str, contexts: List[str], max_chars: int = 500) -> str:
    """
    Simple keyword-based extractive fallback:
    pick sentences containing most question tokens.
    """
    if not contexts:
        return "I couldn't find relevant information in the indexed documents for this question."

    q_tokens = set(tokenize(question))
    if not q_tokens:
        # if question is e.g. numbers only
        q_tokens = set(tokenize(" ".join(contexts[:1])))

    # split into sentences
    sentences: List[str] = []
    for c in contexts:
        c = c or ""
        # rough sentence split
        for s in re.split(r"(?<=[\.!\?])\s+|\n+", c.strip()):
            s = s.strip()
            if s:
                sentences.append(s)

    if not sentences:
        # fallback to first context chunk
        return (contexts[0] or "")[:max_chars]

    # score sentences
    scored: List[tuple[int, str]] = []
    for s in sentences:
        toks = set(tokenize(s))
        score = len(q_tokens & toks)
        scored.append((score, s))

    # pick top sentences with score > 0, otherwise first few sentences
    scored.sort(key=lambda x: (x[0], len(x[1])), reverse=True)
    picked: List[str] = []

    for score, sent in scored:
        if score <= 0 and picked:
            break
        if len(" ".join(picked) + " " + sent) > max_chars:
            break
        picked.append(sent)

    if not picked:
        # no overlap, take first ~max_chars from contexts
        return (contexts[0] or "")[:max_chars]

    return " ".join(picked).strip()

# ------------------------------------------------------------------------------
# Routes
# ------------------------------------------------------------------------------
@app.get("/")
def root():
    return RedirectResponse(url="/docs")

@app.get("/health")
def health():
    return {"status": "ok", "version": app.version, "summarizer": "extractive_en + translate + fallback"}

@app.get("/debug/translate")
def debug_translate():
    try:
        from transformers import pipeline
        tr = pipeline("translation", model="Helsinki-NLP/opus-mt-az-en", cache_dir=str(rag.cache_dir), device=-1)
        out = tr("Sənəd təmiri və quraşdırılması ilə bağlı işlər görülüb.", max_length=80)[0]["translation_text"]
        return {"ok": True, "example_out": out}
    except Exception as e:
        return JSONResponse(status_code=500, content={"ok": False, "error": str(e)})

@app.post("/upload_pdf", response_model=UploadResponse)
async def upload_pdf(file: UploadFile = File(...)):
    if not file.filename.lower().endswith(".pdf"):
        raise HTTPException(status_code=400, detail="Only PDF files are allowed.")

    dest = UPLOAD_DIR / file.filename
    with open(dest, "wb") as f:
        while True:
            chunk = await file.read(1024 * 1024)
            if not chunk:
                break
            f.write(chunk)

    added = rag.add_pdf(dest)
    if added == 0:
        raise HTTPException(status_code=400, detail="No extractable text found (likely a scanned image PDF).")

    stats.add_docs(added)
    return UploadResponse(filename=file.filename, chunks_added=added)

@app.post("/ask_question", response_model=AskResponse)
def ask_question(payload: AskRequest):
    q = (payload.question or "").strip()
    if not q:
        raise HTTPException(status_code=400, detail="Missing 'question'.")

    k = max(1, int(payload.top_k))
    t0 = perf_counter()

    # retrieval
    try:
        hits = rag.search(q, k=k)  # expected: List[Tuple[str, float]]
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Search failed: {e}")

    contexts = [c for c, _ in (hits or []) if c] or (rag.last_added[:k] if getattr(rag, "last_added", None) else [])

    if not contexts:
        stats.add_question(int((perf_counter() - t0) * 1000), q=q)
        return AskResponse(
            answer="I couldn't find relevant information in the indexed documents for this question.",
            contexts=[]
        )

    # synthesis (LLM or rule-based inside rag)
    try:
        synthesized = rag.synthesize_answer(q, contexts) or ""
    except Exception:
        synthesized = ""

    # guard against generic/unchanging answers
    if is_generic_answer(synthesized):
        synthesized = extractive_answer(q, contexts, max_chars=600)

    latency_ms = int((perf_counter() - t0) * 1000)
    stats.add_question(latency_ms, q=q)
    return AskResponse(answer=synthesized.strip(), contexts=contexts)

@app.get("/get_history", response_model=HistoryResponse)
def get_history():
    return HistoryResponse(
        total_chunks=len(rag.chunks),
        history=[HistoryItem(**h) for h in list(stats.history)]
    )

@app.get("/stats")
def stats_endpoint():
    # keep backward compat fields + add dashboard-friendly metrics
    return {
        "documents_indexed": stats.documents_indexed,
        "questions_answered": stats.questions_answered,
        "avg_ms": stats.avg_ms,
        "last7_questions": list(stats.last7_questions),
        "total_chunks": len(rag.chunks),
        "faiss_ntotal": int(getattr(rag.index, "ntotal", 0)),
        "model_dim": int(getattr(rag.index, "d", rag.embed_dim)),
        "last_added_chunks": len(getattr(rag, "last_added", [])),
        "version": app.version,
    }

@app.post("/reset_index")
def reset_index():
    try:
        rag.index = faiss.IndexFlatIP(rag.embed_dim)
        rag.chunks = []
        rag.last_added = []
        for p in [INDEX_DIR / "faiss.index", INDEX_DIR / "meta.npy"]:
            try:
                os.remove(p)
            except FileNotFoundError:
                pass
        # also reset stats counters to avoid stale analytics
        stats.documents_indexed = 0
        stats.questions_answered = 0
        stats.latencies_ms.clear()
        stats.last7_questions = deque([0] * 7, maxlen=7)
        stats.history.clear()
        return {"ok": True}
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))