HamidOmarov commited on
Commit
1cc2b5b
·
verified ·
1 Parent(s): cf92f2c

Update app/rag_system.py

Browse files
Files changed (1) hide show
  1. app/rag_system.py +12 -0
app/rag_system.py CHANGED
@@ -9,6 +9,18 @@ from typing import List, Tuple, Optional
9
  import faiss
10
  import numpy as np
11
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  # Prefer pypdf; fallback to PyPDF2 if needed
13
  try:
14
  from pypdf import PdfReader
 
9
  import faiss
10
  import numpy as np
11
 
12
+ # -- add near other helpers --
13
+ import re
14
+
15
+ AZ_LATIN = "A-Za-zƏəĞğİıÖöŞşÇç"
16
+ _SINGLE_LETTER_RUN = re.compile(rf"\b(?:[{AZ_LATIN}]\s+){{2,}}[{AZ_LATIN}]\b")
17
+
18
+ def _fix_intra_word_spaces(s: str) -> str:
19
+ """Join sequences like 'H Ə F T Ə' -> 'HƏFTƏ' without touching normal words."""
20
+ if not s:
21
+ return s
22
+ return _SINGLE_LETTER_RUN.sub(lambda m: re.sub(r"\s+", "", m.group(0)), s)
23
+
24
  # Prefer pypdf; fallback to PyPDF2 if needed
25
  try:
26
  from pypdf import PdfReader